summaryrefslogtreecommitdiffstats
path: root/storage/innobase/dict
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/dict')
-rw-r--r--storage/innobase/dict/dict0boot.cc492
-rw-r--r--storage/innobase/dict/dict0crea.cc2237
-rw-r--r--storage/innobase/dict/dict0defrag_bg.cc327
-rw-r--r--storage/innobase/dict/dict0dict.cc5277
-rw-r--r--storage/innobase/dict/dict0load.cc3687
-rw-r--r--storage/innobase/dict/dict0mem.cc1396
-rw-r--r--storage/innobase/dict/dict0stats.cc4306
-rw-r--r--storage/innobase/dict/dict0stats_bg.cc479
8 files changed, 18201 insertions, 0 deletions
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
new file mode 100644
index 00000000..bd2cf4ff
--- /dev/null
+++ b/storage/innobase/dict/dict0boot.cc
@@ -0,0 +1,492 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0boot.cc
+Data dictionary creation and booting
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "dict0load.h"
+#include "trx0trx.h"
+#include "srv0srv.h"
+#include "ibuf0ibuf.h"
+#include "buf0flu.h"
+#include "log0recv.h"
+#include "os0file.h"
+
+/** @return the DICT_HDR block, x-latched */
+buf_block_t *dict_hdr_get(mtr_t* mtr)
+{
+ buf_block_t *block= buf_page_get(page_id_t(DICT_HDR_SPACE, DICT_HDR_PAGE_NO),
+ 0, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
+ return block;
+}
+
+/**********************************************************************//**
+Returns a new table, index, or space id. */
+void
+dict_hdr_get_new_id(
+/*================*/
+ table_id_t* table_id, /*!< out: table id
+ (not assigned if NULL) */
+ index_id_t* index_id, /*!< out: index id
+ (not assigned if NULL) */
+ ulint* space_id) /*!< out: space id
+ (not assigned if NULL) */
+{
+ ib_id_t id;
+ mtr_t mtr;
+
+ mtr.start();
+ buf_block_t* dict_hdr = dict_hdr_get(&mtr);
+
+ if (table_id) {
+ id = mach_read_from_8(DICT_HDR + DICT_HDR_TABLE_ID
+ + dict_hdr->frame);
+ id++;
+ mtr.write<8>(*dict_hdr, DICT_HDR + DICT_HDR_TABLE_ID
+ + dict_hdr->frame, id);
+ *table_id = id;
+ }
+
+ if (index_id) {
+ id = mach_read_from_8(DICT_HDR + DICT_HDR_INDEX_ID
+ + dict_hdr->frame);
+ id++;
+ mtr.write<8>(*dict_hdr, DICT_HDR + DICT_HDR_INDEX_ID
+ + dict_hdr->frame, id);
+ *index_id = id;
+ }
+
+ if (space_id) {
+ *space_id = mach_read_from_4(DICT_HDR + DICT_HDR_MAX_SPACE_ID
+ + dict_hdr->frame);
+ if (fil_assign_new_space_id(space_id)) {
+ mtr.write<4>(*dict_hdr,
+ DICT_HDR + DICT_HDR_MAX_SPACE_ID
+ + dict_hdr->frame, *space_id);
+ }
+ }
+
+ mtr.commit();
+}
+
+/**********************************************************************//**
+Writes the current value of the row id counter to the dictionary header file
+page. */
+void
+dict_hdr_flush_row_id(void)
+/*=======================*/
+{
+ row_id_t id;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ id = dict_sys.row_id;
+
+ mtr.start();
+
+ buf_block_t* d = dict_hdr_get(&mtr);
+
+ mtr.write<8>(*d, DICT_HDR + DICT_HDR_ROW_ID + d->frame, id);
+
+ mtr.commit();
+}
+
+/*****************************************************************//**
+Creates the file page for the dictionary header. This function is
+called only at the database creation.
+@return TRUE if succeed */
+static
+ibool
+dict_hdr_create(
+/*============*/
+ mtr_t* mtr) /*!< in: mtr */
+{
+ buf_block_t* block;
+ ulint root_page_no;
+
+ ut_ad(mtr);
+ compile_time_assert(DICT_HDR_SPACE == 0);
+
+ /* Create the dictionary header file block in a new, allocated file
+ segment in the system tablespace */
+ block = fseg_create(fil_system.sys_space,
+ DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
+
+ ut_a(block->page.id() == page_id_t(DICT_HDR_SPACE, DICT_HDR_PAGE_NO));
+
+ buf_block_t* d = dict_hdr_get(mtr);
+
+ /* Start counting row, table, index, and tree ids from
+ DICT_HDR_FIRST_ID */
+ mtr->write<8>(*d, DICT_HDR + DICT_HDR_ROW_ID + d->frame,
+ DICT_HDR_FIRST_ID);
+ mtr->write<8>(*d, DICT_HDR + DICT_HDR_TABLE_ID + d->frame,
+ DICT_HDR_FIRST_ID);
+ mtr->write<8>(*d, DICT_HDR + DICT_HDR_INDEX_ID + d->frame,
+ DICT_HDR_FIRST_ID);
+
+ ut_ad(!mach_read_from_4(DICT_HDR + DICT_HDR_MAX_SPACE_ID + d->frame));
+
+ /* Obsolete, but we must initialize it anyway. */
+ mtr->write<4>(*d, DICT_HDR + DICT_HDR_MIX_ID_LOW + d->frame,
+ DICT_HDR_FIRST_ID);
+
+ /* Create the B-tree roots for the clustered indexes of the basic
+ system tables */
+
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+ fil_system.sys_space, DICT_TABLES_ID,
+ nullptr, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mtr->write<4>(*d, DICT_HDR + DICT_HDR_TABLES + d->frame, root_page_no);
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_UNIQUE,
+ fil_system.sys_space, DICT_TABLE_IDS_ID,
+ nullptr, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mtr->write<4>(*d, DICT_HDR + DICT_HDR_TABLE_IDS + d->frame,
+ root_page_no);
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+ fil_system.sys_space, DICT_COLUMNS_ID,
+ nullptr, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mtr->write<4>(*d, DICT_HDR + DICT_HDR_COLUMNS + d->frame,
+ root_page_no);
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+ fil_system.sys_space, DICT_INDEXES_ID,
+ nullptr, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mtr->write<4>(*d, DICT_HDR + DICT_HDR_INDEXES + d->frame,
+ root_page_no);
+ /*--------------------------*/
+ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+ fil_system.sys_space, DICT_FIELDS_ID,
+ nullptr, mtr);
+ if (root_page_no == FIL_NULL) {
+
+ return(FALSE);
+ }
+
+ mtr->write<4>(*d, DICT_HDR + DICT_HDR_FIELDS + d->frame, root_page_no);
+ /*--------------------------*/
+
+ return(TRUE);
+}
+
+/*****************************************************************//**
+Initializes the data dictionary memory structures when the database is
+started. This function is also called when the data dictionary is created.
+@return DB_SUCCESS or error code. */
+dberr_t
+dict_boot(void)
+/*===========*/
+{
+ dict_table_t* table;
+ dict_index_t* index;
+ mem_heap_t* heap;
+ mtr_t mtr;
+
+ /* Be sure these constants do not ever change. To avoid bloat,
+ only check the *NUM_FIELDS* in each table */
+
+ ut_ad(DICT_NUM_COLS__SYS_TABLES == 8);
+ ut_ad(DICT_NUM_FIELDS__SYS_TABLES == 10);
+ ut_ad(DICT_NUM_FIELDS__SYS_TABLE_IDS == 2);
+ ut_ad(DICT_NUM_COLS__SYS_COLUMNS == 7);
+ ut_ad(DICT_NUM_FIELDS__SYS_COLUMNS == 9);
+ ut_ad(DICT_NUM_COLS__SYS_INDEXES == 8);
+ ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 10);
+ ut_ad(DICT_NUM_COLS__SYS_FIELDS == 3);
+ ut_ad(DICT_NUM_FIELDS__SYS_FIELDS == 5);
+ ut_ad(DICT_NUM_COLS__SYS_FOREIGN == 4);
+ ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN == 6);
+ ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2);
+ ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4);
+ ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6);
+
+ mtr_start(&mtr);
+
+ /* Create the hash tables etc. */
+ dict_sys.create();
+
+ heap = mem_heap_create(450);
+
+ mutex_enter(&dict_sys.mutex);
+
+ /* Get the dictionary header */
+ const byte* dict_hdr = &dict_hdr_get(&mtr)->frame[DICT_HDR];
+
+ /* Because we only write new row ids to disk-based data structure
+ (dictionary header) when it is divisible by
+ DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
+ the latest value of the row id counter. Therefore we advance
+ the counter at the database startup to avoid overlapping values.
+ Note that when a user after database startup first time asks for
+ a new row id, then because the counter is now divisible by
+ ..._MARGIN, it will immediately be updated to the disk-based
+ header. */
+
+ dict_sys.row_id = DICT_HDR_ROW_ID_WRITE_MARGIN
+ + ut_uint64_align_up(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID),
+ DICT_HDR_ROW_ID_WRITE_MARGIN);
+ if (ulint max_space_id = mach_read_from_4(dict_hdr
+ + DICT_HDR_MAX_SPACE_ID)) {
+ max_space_id--;
+ fil_assign_new_space_id(&max_space_id);
+ }
+
+ /* Insert into the dictionary cache the descriptions of the basic
+ system tables */
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_TABLES", fil_system.sys_space,
+ 8, 0, 0, 0);
+
+ dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0,
+ MAX_FULL_NAME_LEN);
+ dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 8);
+ /* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
+ dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
+ /* The low order bit of TYPE is always set to 1. If ROW_FORMAT
+ is not REDUNDANT or COMPACT, this field matches table->flags. */
+ dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
+ /* MIX_LEN may contain additional table flags when
+ ROW_FORMAT!=REDUNDANT. */
+ dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
+
+ table->id = DICT_TABLES_ID;
+
+ dict_table_add_system_columns(table, heap);
+ table->add_to_cache();
+ dict_sys.sys_tables = table;
+ mem_heap_empty(heap);
+
+ index = dict_mem_index_create(table, "CLUST_IND",
+ DICT_UNIQUE | DICT_CLUSTERED, 1);
+
+ dict_mem_index_add_field(index, "NAME", 0);
+
+ index->id = DICT_TABLES_ID;
+ dberr_t error = dict_index_add_to_cache(
+ index, mach_read_from_4(dict_hdr + DICT_HDR_TABLES));
+ ut_a(error == DB_SUCCESS);
+ ut_ad(!table->is_instant());
+ table->indexes.start->n_core_null_bytes = static_cast<uint8_t>(
+ UT_BITS_IN_BYTES(unsigned(table->indexes.start->n_nullable)));
+
+ /*-------------------------*/
+ index = dict_mem_index_create(table, "ID_IND", DICT_UNIQUE, 1);
+ dict_mem_index_add_field(index, "ID", 0);
+
+ index->id = DICT_TABLE_IDS_ID;
+ error = dict_index_add_to_cache(
+ index, mach_read_from_4(dict_hdr + DICT_HDR_TABLE_IDS));
+ ut_a(error == DB_SUCCESS);
+
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_COLUMNS", fil_system.sys_space,
+ 7, 0, 0, 0);
+
+ dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
+ dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4);
+
+ table->id = DICT_COLUMNS_ID;
+
+ dict_table_add_system_columns(table, heap);
+ table->add_to_cache();
+ dict_sys.sys_columns = table;
+ mem_heap_empty(heap);
+
+ index = dict_mem_index_create(table, "CLUST_IND",
+ DICT_UNIQUE | DICT_CLUSTERED, 2);
+
+ dict_mem_index_add_field(index, "TABLE_ID", 0);
+ dict_mem_index_add_field(index, "POS", 0);
+
+ index->id = DICT_COLUMNS_ID;
+ error = dict_index_add_to_cache(
+ index, mach_read_from_4(dict_hdr + DICT_HDR_COLUMNS));
+ ut_a(error == DB_SUCCESS);
+ ut_ad(!table->is_instant());
+ table->indexes.start->n_core_null_bytes = static_cast<uint8_t>(
+ UT_BITS_IN_BYTES(unsigned(table->indexes.start->n_nullable)));
+
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_INDEXES", fil_system.sys_space,
+ DICT_NUM_COLS__SYS_INDEXES, 0, 0, 0);
+
+ dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 8);
+ dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 8);
+ dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
+ dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
+ /* SYS_INDEXES.SPACE is redundant and not being read;
+ SYS_TABLES.SPACE is being used instead. */
+ dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "MERGE_THRESHOLD", DATA_INT, 0, 4);
+
+ table->id = DICT_INDEXES_ID;
+
+ dict_table_add_system_columns(table, heap);
+ /* The column SYS_INDEXES.MERGE_THRESHOLD was "instantly"
+ added in MySQL 5.7 and MariaDB 10.2.2. Assign it DEFAULT NULL.
+ Because of file format compatibility, we must treat SYS_INDEXES
+ as a special case, relaxing some debug assertions
+ for DICT_INDEXES_ID. */
+ dict_table_get_nth_col(table, DICT_COL__SYS_INDEXES__MERGE_THRESHOLD)
+ ->def_val.len = UNIV_SQL_NULL;
+ table->add_to_cache();
+ dict_sys.sys_indexes = table;
+ mem_heap_empty(heap);
+
+ index = dict_mem_index_create(table, "CLUST_IND",
+ DICT_UNIQUE | DICT_CLUSTERED, 2);
+
+ dict_mem_index_add_field(index, "TABLE_ID", 0);
+ dict_mem_index_add_field(index, "ID", 0);
+
+ index->id = DICT_INDEXES_ID;
+ error = dict_index_add_to_cache(
+ index, mach_read_from_4(dict_hdr + DICT_HDR_INDEXES));
+ ut_a(error == DB_SUCCESS);
+ ut_ad(!table->is_instant());
+ table->indexes.start->n_core_null_bytes = static_cast<uint8_t>(
+ UT_BITS_IN_BYTES(unsigned(table->indexes.start->n_nullable)));
+
+ /*-------------------------*/
+ table = dict_mem_table_create("SYS_FIELDS", fil_system.sys_space,
+ 3, 0, 0, 0);
+
+ dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 8);
+ dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
+ dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
+
+ table->id = DICT_FIELDS_ID;
+
+ dict_table_add_system_columns(table, heap);
+ table->add_to_cache();
+ dict_sys.sys_fields = table;
+ mem_heap_free(heap);
+
+ index = dict_mem_index_create(table, "CLUST_IND",
+ DICT_UNIQUE | DICT_CLUSTERED, 2);
+
+ dict_mem_index_add_field(index, "INDEX_ID", 0);
+ dict_mem_index_add_field(index, "POS", 0);
+
+ index->id = DICT_FIELDS_ID;
+ error = dict_index_add_to_cache(
+ index, mach_read_from_4(dict_hdr + DICT_HDR_FIELDS));
+ ut_a(error == DB_SUCCESS);
+ ut_ad(!table->is_instant());
+ table->indexes.start->n_core_null_bytes = static_cast<uint8_t>(
+ UT_BITS_IN_BYTES(unsigned(table->indexes.start->n_nullable)));
+
+ mtr_commit(&mtr);
+
+ /*-------------------------*/
+
+ /* Initialize the insert buffer table and index for each tablespace */
+
+ dberr_t err = ibuf_init_at_db_start();
+
+ if (err == DB_SUCCESS) {
+ /* Load definitions of other indexes on system tables */
+
+ dict_load_sys_table(dict_sys.sys_tables);
+ dict_load_sys_table(dict_sys.sys_columns);
+ dict_load_sys_table(dict_sys.sys_indexes);
+ dict_load_sys_table(dict_sys.sys_fields);
+ }
+
+ mutex_exit(&dict_sys.mutex);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Inserts the basic system table data into themselves in the database
+creation. */
+static
+void
+dict_insert_initial_data(void)
+/*==========================*/
+{
+ /* Does nothing yet */
+}
+
+/*****************************************************************//**
+Creates and initializes the data dictionary at the server bootstrap.
+@return DB_SUCCESS or error code. */
+dberr_t
+dict_create(void)
+/*=============*/
+{
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ dict_hdr_create(&mtr);
+
+ mtr_commit(&mtr);
+
+ dberr_t err = dict_boot();
+
+ if (err == DB_SUCCESS) {
+ dict_insert_initial_data();
+ }
+
+ return(err);
+}
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
new file mode 100644
index 00000000..55e3191c
--- /dev/null
+++ b/storage/innobase/dict/dict0crea.cc
@@ -0,0 +1,2237 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0crea.cc
+Database object creation
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0crea.h"
+#include "btr0pcur.h"
+#ifdef BTR_CUR_HASH_ADAPT
+# include "btr0sea.h"
+#endif /* BTR_CUR_HASH_ADAPT */
+#include "page0page.h"
+#include "mach0data.h"
+#include "dict0boot.h"
+#include "dict0dict.h"
+#include "que0que.h"
+#include "row0ins.h"
+#include "row0mysql.h"
+#include "pars0pars.h"
+#include "trx0roll.h"
+#include "trx0rseg.h"
+#include "trx0undo.h"
+#include "ut0vec.h"
+#include "dict0priv.h"
+#include "fts0priv.h"
+#include "srv0start.h"
+
+/*****************************************************************//**
+Based on a table object, this function builds the entry to be inserted
+in the SYS_TABLES system table.
+@return the tuple which should be inserted */
+static
+dtuple_t*
+dict_create_sys_tables_tuple(
+/*=========================*/
+ const dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap) /*!< in: memory heap from
+ which the memory for the built
+ tuple is allocated */
+{
+ dtuple_t* entry;
+ dfield_t* dfield;
+ byte* ptr;
+ ulint type;
+
+ ut_ad(table);
+ ut_ad(!table->space || table->space->id == table->space_id);
+ ut_ad(heap);
+ ut_ad(table->n_cols >= DATA_N_SYS_COLS);
+
+ entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(entry, dict_sys.sys_tables);
+
+ /* 0: NAME -----------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_TABLES__NAME);
+
+ dfield_set_data(dfield,
+ table->name.m_name, strlen(table->name.m_name));
+
+ /* 1: DB_TRX_ID added later */
+ /* 2: DB_ROLL_PTR added later */
+ /* 3: ID -------------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_TABLES__ID);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(ptr, table->id);
+
+ dfield_set_data(dfield, ptr, 8);
+
+ /* 4: N_COLS ---------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_TABLES__N_COLS);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ /* If there is any virtual column, encode it in N_COLS */
+ mach_write_to_4(ptr, dict_table_encode_n_col(
+ ulint(table->n_cols - DATA_N_SYS_COLS),
+ ulint(table->n_v_def))
+ | (ulint(table->flags & DICT_TF_COMPACT) << 31));
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 5: TYPE (table flags) -----------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_TABLES__TYPE);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ /* Validate the table flags and convert them to what is saved in
+ SYS_TABLES.TYPE. Table flag values 0 and 1 are both written to
+ SYS_TABLES.TYPE as 1. */
+ type = dict_tf_to_sys_tables_type(table->flags);
+ mach_write_to_4(ptr, type);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 6: MIX_ID (obsolete) ---------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_TABLES__MIX_ID);
+
+ ptr = static_cast<byte*>(mem_heap_zalloc(heap, 8));
+
+ dfield_set_data(dfield, ptr, 8);
+
+ /* 7: MIX_LEN (additional flags) --------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_TABLES__MIX_LEN);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ /* Be sure all non-used bits are zero. */
+ ut_a(!(table->flags2 & DICT_TF2_UNUSED_BIT_MASK));
+ mach_write_to_4(ptr, table->flags2);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 8: CLUSTER_NAME ---------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_TABLES__CLUSTER_ID);
+ dfield_set_null(dfield); /* not supported */
+
+ /* 9: SPACE ----------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_TABLES__SPACE);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, table->space_id);
+
+ dfield_set_data(dfield, ptr, 4);
+ /*----------------------------------*/
+
+ return(entry);
+}
+
+/*****************************************************************//**
+Based on a table object, this function builds the entry to be inserted
+in the SYS_COLUMNS system table.
+@return the tuple which should be inserted */
+static
+dtuple_t*
+dict_create_sys_columns_tuple(
+/*==========================*/
+ const dict_table_t* table, /*!< in: table */
+ ulint i, /*!< in: column number */
+ mem_heap_t* heap) /*!< in: memory heap from
+ which the memory for the built
+ tuple is allocated */
+{
+ dtuple_t* entry;
+ const dict_col_t* column;
+ dfield_t* dfield;
+ byte* ptr;
+ const char* col_name;
+ ulint num_base = 0;
+ ulint v_col_no = ULINT_UNDEFINED;
+
+ ut_ad(table);
+ ut_ad(heap);
+
+ /* Any column beyond table->n_def would be virtual columns */
+ if (i >= table->n_def) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(
+ table, i - table->n_def);
+ column = &v_col->m_col;
+ num_base = v_col->num_base;
+ v_col_no = column->ind;
+ } else {
+ column = dict_table_get_nth_col(table, i);
+ ut_ad(!column->is_virtual());
+ }
+
+ entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(entry, dict_sys.sys_columns);
+
+ /* 0: TABLE_ID -----------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__TABLE_ID);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(ptr, table->id);
+
+ dfield_set_data(dfield, ptr, 8);
+
+ /* 1: POS ----------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__POS);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ if (v_col_no != ULINT_UNDEFINED) {
+ /* encode virtual column's position in MySQL table and InnoDB
+ table in "POS" */
+ mach_write_to_4(ptr, dict_create_v_col_pos(
+ i - table->n_def, v_col_no));
+ } else {
+ mach_write_to_4(ptr, i);
+ }
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 2: DB_TRX_ID added later */
+ /* 3: DB_ROLL_PTR added later */
+ /* 4: NAME ---------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__NAME);
+
+ if (i >= table->n_def) {
+ col_name = dict_table_get_v_col_name(table, i - table->n_def);
+ } else {
+ col_name = dict_table_get_col_name(table, i);
+ }
+
+ dfield_set_data(dfield, col_name, strlen(col_name));
+
+ /* 5: MTYPE --------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__MTYPE);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, column->mtype);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 6: PRTYPE -------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PRTYPE);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, column->prtype);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 7: LEN ----------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__LEN);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, column->len);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 8: PREC ---------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PREC);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, num_base);
+
+ dfield_set_data(dfield, ptr, 4);
+ /*---------------------------------*/
+
+ return(entry);
+}
+
+/** Based on a table object, this function builds the entry to be inserted
+in the SYS_VIRTUAL system table. Each row maps a virtual column to one of
+its base column.
+@param[in] table table
+@param[in] v_col_n virtual column number
+@param[in] b_col_n base column sequence num
+@param[in] heap memory heap
+@return the tuple which should be inserted */
+static
+dtuple_t*
+dict_create_sys_virtual_tuple(
+ const dict_table_t* table,
+ ulint v_col_n,
+ ulint b_col_n,
+ mem_heap_t* heap)
+{
+ dtuple_t* entry;
+ const dict_col_t* base_column;
+ dfield_t* dfield;
+ byte* ptr;
+
+ ut_ad(table);
+ ut_ad(heap);
+
+ ut_ad(v_col_n < table->n_v_def);
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(table, v_col_n);
+ base_column = v_col->base_col[b_col_n];
+
+ entry = dtuple_create(heap, DICT_NUM_COLS__SYS_VIRTUAL
+ + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(entry, dict_sys.sys_virtual);
+
+ /* 0: TABLE_ID -----------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__TABLE_ID);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(ptr, table->id);
+
+ dfield_set_data(dfield, ptr, 8);
+
+ /* 1: POS ---------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__POS);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ ulint v_col_no = dict_create_v_col_pos(v_col_n, v_col->m_col.ind);
+ mach_write_to_4(ptr, v_col_no);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 2: BASE_POS ----------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__BASE_POS);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, base_column->ind);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 3: DB_TRX_ID added later */
+ /* 4: DB_ROLL_PTR added later */
+
+ /*---------------------------------*/
+ return(entry);
+}
+
+/***************************************************************//**
+Builds a table definition to insert.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+dict_build_table_def_step(
+/*======================*/
+ que_thr_t* thr, /*!< in: query thread */
+ tab_node_t* node) /*!< in: table create node */
+{
+ ut_ad(mutex_own(&dict_sys.mutex));
+ dict_table_t* table = node->table;
+ trx_t* trx = thr_get_trx(thr);
+ ut_ad(!table->is_temporary());
+ ut_ad(!table->space);
+ ut_ad(table->space_id == ULINT_UNDEFINED);
+ dict_hdr_get_new_id(&table->id, NULL, NULL);
+ trx->table_id = table->id;
+
+ /* Always set this bit for all new created tables */
+ DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
+ DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
+ DICT_TF2_FLAG_UNSET(table,
+ DICT_TF2_FTS_AUX_HEX_NAME););
+
+ if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE)) {
+ /* This table will need a new tablespace. */
+
+ ut_ad(DICT_TF_GET_ZIP_SSIZE(table->flags) == 0
+ || dict_table_has_atomic_blobs(table));
+ mtr_t mtr;
+ trx_undo_t* undo = trx->rsegs.m_redo.undo;
+ if (undo && !undo->table_id
+ && trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
+ /* This must be a TRUNCATE operation where
+ the empty table is created after the old table
+ was renamed. Be sure to mark the transaction
+ associated with the new empty table, so that
+ we can remove it on recovery. */
+ mtr.start();
+ undo->table_id = trx->table_id;
+ undo->dict_operation = TRUE;
+ buf_block_t* block = trx_undo_page_get(
+ page_id_t(trx->rsegs.m_redo.rseg->space->id,
+ undo->hdr_page_no),
+ &mtr);
+ mtr.write<1,mtr_t::MAYBE_NOP>(
+ *block,
+ block->frame + undo->hdr_offset
+ + TRX_UNDO_DICT_TRANS, 1U);
+ mtr.write<8,mtr_t::MAYBE_NOP>(
+ *block,
+ block->frame + undo->hdr_offset
+ + TRX_UNDO_TABLE_ID, trx->table_id);
+ mtr.commit();
+ log_write_up_to(mtr.commit_lsn(), true);
+ }
+ /* Get a new tablespace ID */
+ ulint space_id;
+ dict_hdr_get_new_id(NULL, NULL, &space_id);
+
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_out_of_space_ids",
+ space_id = ULINT_UNDEFINED;
+ );
+
+ if (space_id == ULINT_UNDEFINED) {
+ return DB_ERROR;
+ }
+
+ /* Determine the tablespace flags. */
+ bool has_data_dir = DICT_TF_HAS_DATA_DIR(table->flags);
+ ulint fsp_flags = dict_tf_to_fsp_flags(table->flags);
+ ut_ad(!has_data_dir || table->data_dir_path);
+ char* filepath = has_data_dir
+ ? fil_make_filepath(table->data_dir_path,
+ table->name.m_name, IBD, true)
+ : fil_make_filepath(NULL,
+ table->name.m_name, IBD, false);
+
+ /* We create a new single-table tablespace for the table.
+ We initially let it be 4 pages:
+ - page 0 is the fsp header and an extent descriptor page,
+ - page 1 is an ibuf bitmap page,
+ - page 2 is the first inode page,
+ - page 3 will contain the root of the clustered index of
+ the table we create here. */
+
+ dberr_t err;
+ table->space = fil_ibd_create(
+ space_id, table->name.m_name, filepath, fsp_flags,
+ FIL_IBD_FILE_INITIAL_SIZE,
+ node->mode, node->key_id, &err);
+
+ ut_free(filepath);
+
+ if (!table->space) {
+ ut_ad(err != DB_SUCCESS);
+ return err;
+ }
+
+ table->space_id = space_id;
+ mtr.start();
+ mtr.set_named_space(table->space);
+ fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
+ mtr.commit();
+ } else {
+ ut_ad(dict_tf_get_rec_format(table->flags)
+ != REC_FORMAT_COMPRESSED);
+ table->space = fil_system.sys_space;
+ table->space_id = TRX_SYS_SPACE;
+ }
+
+ ins_node_set_new_row(node->tab_def,
+ dict_create_sys_tables_tuple(table, node->heap));
+ return DB_SUCCESS;
+}
+
+/** Builds a SYS_VIRTUAL row definition to insert.
+@param[in] node table create node */
+static
+void
+dict_build_v_col_def_step(
+ tab_node_t* node)
+{
+ dtuple_t* row;
+
+ row = dict_create_sys_virtual_tuple(node->table, node->col_no,
+ node->base_col_no,
+ node->heap);
+ ins_node_set_new_row(node->v_col_def, row);
+}
+
+/*****************************************************************//**
+Based on an index object, this function builds the entry to be inserted
+in the SYS_INDEXES system table.
+@return the tuple which should be inserted */
+static
+dtuple_t*
+dict_create_sys_indexes_tuple(
+/*==========================*/
+ const dict_index_t* index, /*!< in: index */
+ mem_heap_t* heap) /*!< in: memory heap from
+ which the memory for the built
+ tuple is allocated */
+{
+ dtuple_t* entry;
+ dfield_t* dfield;
+ byte* ptr;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_ad(index);
+ ut_ad(index->table->space || index->table->file_unreadable);
+ ut_ad(!index->table->space
+ || index->table->space->id == index->table->space_id);
+ ut_ad(heap);
+
+ entry = dtuple_create(
+ heap, DICT_NUM_COLS__SYS_INDEXES + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(entry, dict_sys.sys_indexes);
+
+ /* 0: TABLE_ID -----------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_INDEXES__TABLE_ID);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(ptr, index->table->id);
+
+ dfield_set_data(dfield, ptr, 8);
+
+ /* 1: ID ----------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_INDEXES__ID);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(ptr, index->id);
+
+ dfield_set_data(dfield, ptr, 8);
+
+ /* 2: DB_TRX_ID added later */
+ /* 3: DB_ROLL_PTR added later */
+ /* 4: NAME --------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_INDEXES__NAME);
+
+ if (!index->is_committed()) {
+ ulint len = strlen(index->name) + 1;
+ char* name = static_cast<char*>(
+ mem_heap_alloc(heap, len));
+ *name = *TEMP_INDEX_PREFIX_STR;
+ memcpy(name + 1, index->name, len - 1);
+ dfield_set_data(dfield, name, len);
+ } else {
+ dfield_set_data(dfield, index->name, strlen(index->name));
+ }
+
+ /* 5: N_FIELDS ----------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_INDEXES__N_FIELDS);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, index->n_fields);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 6: TYPE --------------------------*/
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_INDEXES__TYPE);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, index->type);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 7: SPACE --------------------------*/
+
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_INDEXES__SPACE);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, index->table->space_id);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 8: PAGE_NO --------------------------*/
+
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_INDEXES__PAGE_NO);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, FIL_NULL);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 9: MERGE_THRESHOLD ----------------*/
+
+ dfield = dtuple_get_nth_field(
+ entry, DICT_COL__SYS_INDEXES__MERGE_THRESHOLD);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(ptr, DICT_INDEX_MERGE_THRESHOLD_DEFAULT);
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /*--------------------------------*/
+
+ return(entry);
+}
+
+/*****************************************************************//**
+Based on an index object, this function builds the entry to be inserted
+in the SYS_FIELDS system table.
+@return the tuple which should be inserted */
+static
+dtuple_t*
+dict_create_sys_fields_tuple(
+/*=========================*/
+ const dict_index_t* index, /*!< in: index */
+ ulint fld_no, /*!< in: field number */
+ mem_heap_t* heap) /*!< in: memory heap from
+ which the memory for the built
+ tuple is allocated */
+{
+ dtuple_t* entry;
+ dict_field_t* field;
+ dfield_t* dfield;
+ byte* ptr;
+ ibool index_contains_column_prefix_field = FALSE;
+ ulint j;
+
+ ut_ad(index);
+ ut_ad(heap);
+
+ for (j = 0; j < index->n_fields; j++) {
+ if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
+ index_contains_column_prefix_field = TRUE;
+ break;
+ }
+ }
+
+ field = dict_index_get_nth_field(index, fld_no);
+
+ entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
+
+ dict_table_copy_types(entry, dict_sys.sys_fields);
+
+ /* 0: INDEX_ID -----------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__INDEX_ID);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(ptr, index->id);
+
+ dfield_set_data(dfield, ptr, 8);
+
+ /* 1: POS; FIELD NUMBER & PREFIX LENGTH -----------------------*/
+
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__POS);
+
+ ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ if (index_contains_column_prefix_field) {
+ /* If there are column prefix fields in the index, then
+ we store the number of the field to the 2 HIGH bytes
+ and the prefix length to the 2 low bytes, */
+
+ mach_write_to_4(ptr, (fld_no << 16) + field->prefix_len);
+ } else {
+ /* Else we store the number of the field to the 2 LOW bytes.
+ This is to keep the storage format compatible with
+ InnoDB versions < 4.0.14. */
+
+ mach_write_to_4(ptr, fld_no);
+ }
+
+ dfield_set_data(dfield, ptr, 4);
+
+ /* 2: DB_TRX_ID added later */
+ /* 3: DB_ROLL_PTR added later */
+ /* 4: COL_NAME -------------------------*/
+ dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__COL_NAME);
+
+ dfield_set_data(dfield, field->name, strlen(field->name));
+ /*---------------------------------*/
+
+ return(entry);
+}
+
+/*****************************************************************//**
+Creates the tuple with which the index entry is searched for writing the index
+tree root page number, if such a tree is created.
+@return the tuple for search */
+static
+dtuple_t*
+dict_create_search_tuple(
+/*=====================*/
+ const dtuple_t* tuple, /*!< in: the tuple inserted in the SYS_INDEXES
+ table */
+ mem_heap_t* heap) /*!< in: memory heap from which the memory for
+ the built tuple is allocated */
+{
+ dtuple_t* search_tuple;
+ const dfield_t* field1;
+ dfield_t* field2;
+
+ ut_ad(tuple && heap);
+
+ search_tuple = dtuple_create(heap, 2);
+
+ field1 = dtuple_get_nth_field(tuple, 0);
+ field2 = dtuple_get_nth_field(search_tuple, 0);
+
+ dfield_copy(field2, field1);
+
+ field1 = dtuple_get_nth_field(tuple, 1);
+ field2 = dtuple_get_nth_field(search_tuple, 1);
+
+ dfield_copy(field2, field1);
+
+ ut_ad(dtuple_validate(search_tuple));
+
+ return(search_tuple);
+}
+
+/***************************************************************//**
+Builds an index definition row to insert.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+dict_build_index_def_step(
+/*======================*/
+ que_thr_t* thr, /*!< in: query thread */
+ ind_node_t* node) /*!< in: index create node */
+{
+ dict_table_t* table;
+ dict_index_t* index;
+ dtuple_t* row;
+ trx_t* trx;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ trx = thr_get_trx(thr);
+
+ index = node->index;
+
+ table = index->table = node->table = dict_table_open_on_name(
+ node->table_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
+
+ if (table == NULL) {
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ if (!trx->table_id) {
+ /* Record only the first table id. */
+ trx->table_id = table->id;
+ }
+
+ ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
+ || dict_index_is_clust(index));
+
+ dict_hdr_get_new_id(NULL, &index->id, NULL);
+
+ /* Inherit the space id from the table; we store all indexes of a
+ table in the same tablespace */
+
+ node->page_no = FIL_NULL;
+ row = dict_create_sys_indexes_tuple(index, node->heap);
+ node->ind_row = row;
+
+ ins_node_set_new_row(node->ind_def, row);
+
+ /* Note that the index was created by this transaction. */
+ index->trx_id = trx->id;
+ ut_ad(table->def_trx_id <= trx->id);
+ table->def_trx_id = trx->id;
+ dict_table_close(table, true, false);
+
+ return(DB_SUCCESS);
+}
+
+/***************************************************************//**
+Builds an index definition without updating SYSTEM TABLES.
+@return DB_SUCCESS or error code */
+void
+dict_build_index_def(
+/*=================*/
+ const dict_table_t* table, /*!< in: table */
+ dict_index_t* index, /*!< in/out: index */
+ trx_t* trx) /*!< in/out: InnoDB transaction handle */
+{
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ if (trx->table_id == 0) {
+ /* Record only the first table id. */
+ trx->table_id = table->id;
+ }
+
+ ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
+ || dict_index_is_clust(index));
+
+ dict_hdr_get_new_id(NULL, &index->id, NULL);
+
+ /* Note that the index was created by this transaction. */
+ index->trx_id = trx->id;
+}
+
+/***************************************************************//**
+Builds a field definition row to insert. */
+static
+void
+dict_build_field_def_step(
+/*======================*/
+ ind_node_t* node) /*!< in: index create node */
+{
+ dict_index_t* index;
+ dtuple_t* row;
+
+ index = node->index;
+
+ row = dict_create_sys_fields_tuple(index, node->field_no, node->heap);
+
+ ins_node_set_new_row(node->field_def, row);
+}
+
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+dict_create_index_tree_step(
+/*========================*/
+ ind_node_t* node) /*!< in: index create node */
+{
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ dict_index_t* index;
+ dtuple_t* search_tuple;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ index = node->index;
+
+ if (index->type == DICT_FTS) {
+ /* FTS index does not need an index tree */
+ return(DB_SUCCESS);
+ }
+
+ /* Run a mini-transaction in which the index tree is allocated for
+ the index and its root address is written to the index entry in
+ sys_indexes */
+
+ mtr.start();
+
+ search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
+
+ btr_pcur_open(UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes),
+ search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+
+ dberr_t err = DB_SUCCESS;
+
+ if (!index->is_readable()) {
+ node->page_no = FIL_NULL;
+ } else {
+ index->set_modified(mtr);
+
+ node->page_no = btr_create(
+ index->type, index->table->space,
+ index->id, index, &mtr);
+
+ if (node->page_no == FIL_NULL) {
+ err = DB_OUT_OF_FILE_SPACE;
+ }
+
+ DBUG_EXECUTE_IF("ib_import_create_index_failure_1",
+ node->page_no = FIL_NULL;
+ err = DB_OUT_OF_FILE_SPACE; );
+ }
+
+ ulint len;
+ byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
+ DICT_FLD__SYS_INDEXES__PAGE_NO,
+ &len);
+ ut_ad(len == 4);
+ mtr.write<4,mtr_t::MAYBE_NOP>(*btr_pcur_get_block(&pcur), data,
+ node->page_no);
+
+ mtr.commit();
+
+ return(err);
+}
+
+/***************************************************************//**
+Creates an index tree for the index if it is not a member of a cluster.
+Don't update SYSTEM TABLES.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+dberr_t
+dict_create_index_tree_in_mem(
+/*==========================*/
+ dict_index_t* index, /*!< in/out: index */
+ const trx_t* trx) /*!< in: InnoDB transaction handle */
+{
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_ad(!(index->type & DICT_FTS));
+
+ mtr_start(&mtr);
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ /* Currently this function is being used by temp-tables only.
+ Import/Discard of temp-table is blocked and so this assert. */
+ ut_ad(index->is_readable());
+ ut_ad(!(index->table->flags2 & DICT_TF2_DISCARDED));
+
+ index->page = btr_create(index->type, index->table->space,
+ index->id, index, &mtr);
+ mtr_commit(&mtr);
+
+ index->trx_id = trx->id;
+
+ return index->page == FIL_NULL ? DB_OUT_OF_FILE_SPACE : DB_SUCCESS;
+}
+
+/** Drop the index tree associated with a row in SYS_INDEXES table.
+@param[in,out] pcur persistent cursor on rec
+@param[in,out] trx dictionary transaction
+@param[in,out] mtr mini-transaction */
+void dict_drop_index_tree(btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
+{
+ rec_t* rec = btr_pcur_get_rec(pcur);
+ byte* ptr;
+ ulint len;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_a(!dict_table_is_comp(dict_sys.sys_indexes));
+
+ ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
+
+ ut_ad(len == 4);
+
+ btr_pcur_store_position(pcur, mtr);
+
+ const uint32_t root_page_no = mach_read_from_4(ptr);
+
+ if (root_page_no == FIL_NULL) {
+ /* The tree has already been freed */
+ return;
+ }
+
+ compile_time_assert(FIL_NULL == 0xffffffff);
+ mtr->memset(btr_pcur_get_block(pcur), page_offset(ptr), 4, 0xff);
+
+ ptr = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
+
+ ut_ad(len == 4);
+
+ const uint32_t space_id = mach_read_from_4(ptr);
+ ut_ad(space_id < SRV_TMP_SPACE_ID);
+ if (space_id != TRX_SYS_SPACE
+ && trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
+ /* We are about to delete the entire .ibd file;
+ do not bother to free pages inside it. */
+ return;
+ }
+
+ ptr = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__ID, &len);
+
+ ut_ad(len == 8);
+
+ if (fil_space_t* s = fil_space_t::get(space_id)) {
+ /* Ensure that the tablespace file exists
+ in order to avoid a crash in buf_page_get_gen(). */
+ if (root_page_no < s->get_size()) {
+ btr_free_if_exists(page_id_t(space_id, root_page_no),
+ s->zip_size(),
+ mach_read_from_8(ptr), mtr);
+ }
+ s->release();
+ }
+}
+
+/*********************************************************************//**
+Creates a table create graph.
+@return own: table create node */
+tab_node_t*
+tab_create_graph_create(
+/*====================*/
+ dict_table_t* table, /*!< in: table to create, built as a memory data
+ structure */
+ mem_heap_t* heap, /*!< in: heap where created */
+ fil_encryption_t mode, /*!< in: encryption mode */
+ uint32_t key_id) /*!< in: encryption key_id */
+{
+ tab_node_t* node;
+
+ node = static_cast<tab_node_t*>(
+ mem_heap_alloc(heap, sizeof(tab_node_t)));
+
+ node->common.type = QUE_NODE_CREATE_TABLE;
+
+ node->table = table;
+
+ node->state = TABLE_BUILD_TABLE_DEF;
+ node->heap = mem_heap_create(256);
+ node->mode = mode;
+ node->key_id = key_id;
+
+ node->tab_def = ins_node_create(INS_DIRECT, dict_sys.sys_tables,
+ heap);
+ node->tab_def->common.parent = node;
+
+ node->col_def = ins_node_create(INS_DIRECT, dict_sys.sys_columns,
+ heap);
+ node->col_def->common.parent = node;
+
+ node->v_col_def = ins_node_create(INS_DIRECT, dict_sys.sys_virtual,
+ heap);
+ node->v_col_def->common.parent = node;
+
+ return(node);
+}
+
+/** Creates an index create graph.
+@param[in] index index to create, built as a memory data structure
+@param[in] table table name
+@param[in,out] heap heap where created
+@param[in] add_v new virtual columns added in the same clause with
+ add index
+@return own: index create node */
+ind_node_t*
+ind_create_graph_create(
+ dict_index_t* index,
+ const char* table,
+ mem_heap_t* heap,
+ const dict_add_v_col_t* add_v)
+{
+ ind_node_t* node;
+
+ node = static_cast<ind_node_t*>(
+ mem_heap_alloc(heap, sizeof(ind_node_t)));
+
+ node->common.type = QUE_NODE_CREATE_INDEX;
+
+ node->index = index;
+
+ node->table_name = table;
+
+ node->add_v = add_v;
+
+ node->state = INDEX_BUILD_INDEX_DEF;
+ node->page_no = FIL_NULL;
+ node->heap = mem_heap_create(256);
+
+ node->ind_def = ins_node_create(INS_DIRECT,
+ dict_sys.sys_indexes, heap);
+ node->ind_def->common.parent = node;
+
+ node->field_def = ins_node_create(INS_DIRECT,
+ dict_sys.sys_fields, heap);
+ node->field_def->common.parent = node;
+
+ return(node);
+}
+
+/***********************************************************//**
+Creates a table. This is a high-level function used in SQL execution graphs.
+@return query thread to run next or NULL */
+que_thr_t*
+dict_create_table_step(
+/*===================*/
+ que_thr_t* thr) /*!< in: query thread */
+{
+ tab_node_t* node;
+ dberr_t err = DB_ERROR;
+ trx_t* trx;
+
+ ut_ad(thr);
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ trx = thr_get_trx(thr);
+
+ node = static_cast<tab_node_t*>(thr->run_node);
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = TABLE_BUILD_TABLE_DEF;
+ }
+
+ if (node->state == TABLE_BUILD_TABLE_DEF) {
+
+ /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
+
+ err = dict_build_table_def_step(thr, node);
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->state = TABLE_BUILD_COL_DEF;
+ node->col_no = 0;
+
+ thr->run_node = node->tab_def;
+
+ return(thr);
+ }
+
+ if (node->state == TABLE_BUILD_COL_DEF) {
+
+ if (node->col_no + DATA_N_SYS_COLS
+ < (static_cast<ulint>(node->table->n_def)
+ + static_cast<ulint>(node->table->n_v_def))) {
+
+ ulint i = node->col_no++;
+ if (i + DATA_N_SYS_COLS >= node->table->n_def) {
+ i += DATA_N_SYS_COLS;
+ }
+
+ ins_node_set_new_row(
+ node->col_def,
+ dict_create_sys_columns_tuple(node->table, i,
+ node->heap));
+
+ thr->run_node = node->col_def;
+
+ return(thr);
+ } else {
+ /* Move on to SYS_VIRTUAL table */
+ node->col_no = 0;
+ node->base_col_no = 0;
+ node->state = TABLE_BUILD_V_COL_DEF;
+ }
+ }
+
+ if (node->state == TABLE_BUILD_V_COL_DEF) {
+
+ if (node->col_no < static_cast<ulint>(node->table->n_v_def)) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(
+ node->table, node->col_no);
+
+ /* If no base column */
+ while (v_col->num_base == 0) {
+ node->col_no++;
+ if (node->col_no == static_cast<ulint>(
+ (node->table)->n_v_def)) {
+ node->state = TABLE_ADD_TO_CACHE;
+ break;
+ }
+
+ v_col = dict_table_get_nth_v_col(
+ node->table, node->col_no);
+ node->base_col_no = 0;
+ }
+
+ if (node->state != TABLE_ADD_TO_CACHE) {
+ ut_ad(node->col_no == v_col->v_pos);
+ dict_build_v_col_def_step(node);
+
+ if (node->base_col_no
+ < unsigned{v_col->num_base} - 1) {
+ /* move on to next base column */
+ node->base_col_no++;
+ } else {
+ /* move on to next virtual column */
+ node->col_no++;
+ node->base_col_no = 0;
+ }
+
+ thr->run_node = node->v_col_def;
+
+ return(thr);
+ }
+ } else {
+ node->state = TABLE_ADD_TO_CACHE;
+ }
+ }
+
+ if (node->state == TABLE_ADD_TO_CACHE) {
+ DBUG_EXECUTE_IF("ib_ddl_crash_during_create", DBUG_SUICIDE(););
+
+ node->table->can_be_evicted = true;
+ node->table->add_to_cache();
+
+ err = DB_SUCCESS;
+ }
+
+function_exit:
+ trx->error_state = err;
+
+ if (err == DB_SUCCESS) {
+ /* Ok: do nothing */
+
+ } else if (err == DB_LOCK_WAIT) {
+
+ return(NULL);
+ } else {
+ /* SQL error detected */
+
+ return(NULL);
+ }
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
+
+/***********************************************************//**
+Creates an index. This is a high-level function used in SQL execution
+graphs.
+@return query thread to run next or NULL */
+que_thr_t*
+dict_create_index_step(
+/*===================*/
+ que_thr_t* thr) /*!< in: query thread */
+{
+ ind_node_t* node;
+ dberr_t err = DB_ERROR;
+ trx_t* trx;
+
+ ut_ad(thr);
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ trx = thr_get_trx(thr);
+
+ node = static_cast<ind_node_t*>(thr->run_node);
+
+ ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX);
+
+ if (thr->prev_node == que_node_get_parent(node)) {
+ node->state = INDEX_BUILD_INDEX_DEF;
+ }
+
+ if (node->state == INDEX_BUILD_INDEX_DEF) {
+ /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
+ err = dict_build_index_def_step(thr, node);
+
+ if (err != DB_SUCCESS) {
+
+ goto function_exit;
+ }
+
+ node->state = INDEX_BUILD_FIELD_DEF;
+ node->field_no = 0;
+
+ thr->run_node = node->ind_def;
+
+ return(thr);
+ }
+
+ if (node->state == INDEX_BUILD_FIELD_DEF) {
+
+ if (node->field_no < (node->index)->n_fields) {
+
+ dict_build_field_def_step(node);
+
+ node->field_no++;
+
+ thr->run_node = node->field_def;
+
+ return(thr);
+ } else {
+ node->state = INDEX_ADD_TO_CACHE;
+ }
+ }
+
+ if (node->state == INDEX_ADD_TO_CACHE) {
+ ut_ad(node->index->table == node->table);
+ err = dict_index_add_to_cache(node->index, FIL_NULL,
+ node->add_v);
+
+ ut_ad((node->index == NULL) == (err != DB_SUCCESS));
+
+ if (!node->index) {
+ goto function_exit;
+ }
+
+ ut_ad(!node->index->is_instant());
+ ut_ad(node->index->n_core_null_bytes
+ == ((dict_index_is_clust(node->index)
+ && node->table->supports_instant())
+ ? dict_index_t::NO_CORE_NULL_BYTES
+ : UT_BITS_IN_BYTES(
+ unsigned(node->index->n_nullable))));
+ node->index->n_core_null_bytes = static_cast<uint8_t>(
+ UT_BITS_IN_BYTES(unsigned(node->index->n_nullable)));
+ node->state = INDEX_CREATE_INDEX_TREE;
+ }
+
+ if (node->state == INDEX_CREATE_INDEX_TREE) {
+
+ err = dict_create_index_tree_step(node);
+
+ DBUG_EXECUTE_IF("ib_dict_create_index_tree_fail",
+ err = DB_OUT_OF_MEMORY;);
+
+ if (err != DB_SUCCESS) {
+ /* If this is a FTS index, we will need to remove
+ it from fts->cache->indexes list as well */
+ if ((node->index->type & DICT_FTS)
+ && node->table->fts) {
+ fts_index_cache_t* index_cache;
+
+ rw_lock_x_lock(
+ &node->table->fts->cache->init_lock);
+
+ index_cache = (fts_index_cache_t*)
+ fts_find_index_cache(
+ node->table->fts->cache,
+ node->index);
+
+ if (index_cache->words) {
+ rbt_free(index_cache->words);
+ index_cache->words = 0;
+ }
+
+ ib_vector_remove(
+ node->table->fts->cache->indexes,
+ *reinterpret_cast<void**>(index_cache));
+
+ rw_lock_x_unlock(
+ &node->table->fts->cache->init_lock);
+ }
+
+#ifdef BTR_CUR_HASH_ADAPT
+ ut_ad(!node->index->search_info->ref_count);
+#endif /* BTR_CUR_HASH_ADAPT */
+ dict_index_remove_from_cache(node->table, node->index);
+ node->index = NULL;
+
+ goto function_exit;
+ }
+
+ node->index->page = node->page_no;
+ /* These should have been set in
+ dict_build_index_def_step() and
+ dict_index_add_to_cache(). */
+ ut_ad(node->index->trx_id == trx->id);
+ ut_ad(node->index->table->def_trx_id == trx->id);
+ }
+
+function_exit:
+ trx->error_state = err;
+
+ if (err == DB_SUCCESS) {
+ /* Ok: do nothing */
+
+ } else if (err == DB_LOCK_WAIT) {
+
+ return(NULL);
+ } else {
+ /* SQL error detected */
+
+ return(NULL);
+ }
+
+ thr->run_node = que_node_get_parent(node);
+
+ return(thr);
+}
+
+/****************************************************************//**
+Check whether a system table exists. Additionally, if it exists,
+move it to the non-LRU end of the table LRU list. This is oly used
+for system tables that can be upgraded or added to an older database,
+which include SYS_FOREIGN, SYS_FOREIGN_COLS, SYS_TABLESPACES and
+SYS_DATAFILES.
+@return DB_SUCCESS if the sys table exists, DB_CORRUPTION if it exists
+but is not current, DB_TABLE_NOT_FOUND if it does not exist*/
+static
+dberr_t
+dict_check_if_system_table_exists(
+/*==============================*/
+ const char* tablename, /*!< in: name of table */
+ ulint num_fields, /*!< in: number of fields */
+ ulint num_indexes) /*!< in: number of indexes */
+{
+ dict_table_t* sys_table;
+ dberr_t error = DB_SUCCESS;
+
+ ut_ad(!srv_any_background_activity());
+
+ mutex_enter(&dict_sys.mutex);
+
+ sys_table = dict_table_get_low(tablename);
+
+ if (sys_table == NULL) {
+ error = DB_TABLE_NOT_FOUND;
+
+ } else if (UT_LIST_GET_LEN(sys_table->indexes) != num_indexes
+ || sys_table->n_cols != num_fields) {
+ error = DB_CORRUPTION;
+
+ } else {
+ /* This table has already been created, and it is OK.
+ Ensure that it can't be evicted from the table LRU cache. */
+
+ dict_table_prevent_eviction(sys_table);
+ }
+
+ mutex_exit(&dict_sys.mutex);
+
+ return(error);
+}
+
+/****************************************************************//**
+Creates the foreign key constraints system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_create_or_check_foreign_constraint_tables(void)
+/*================================================*/
+{
+ trx_t* trx;
+ my_bool srv_file_per_table_backup;
+ dberr_t err;
+ dberr_t sys_foreign_err;
+ dberr_t sys_foreign_cols_err;
+
+ ut_ad(!srv_any_background_activity());
+
+ /* Note: The master thread has not been started at this point. */
+
+
+ sys_foreign_err = dict_check_if_system_table_exists(
+ "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
+ sys_foreign_cols_err = dict_check_if_system_table_exists(
+ "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
+
+ if (sys_foreign_err == DB_SUCCESS
+ && sys_foreign_cols_err == DB_SUCCESS) {
+ return(DB_SUCCESS);
+ }
+
+ if (srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ return(DB_READ_ONLY);
+ }
+
+ trx = trx_create();
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ trx->op_info = "creating foreign key sys tables";
+
+ row_mysql_lock_data_dictionary(trx);
+
+ DBUG_EXECUTE_IF(
+ "create_and_drop_garbage",
+ err = que_eval_sql(
+ NULL,
+ "PROCEDURE CREATE_GARBAGE_TABLE_PROC () IS\n"
+ "BEGIN\n"
+ "CREATE TABLE\n"
+ "\"test/#sql-ib-garbage\"(ID CHAR);\n"
+ "CREATE UNIQUE CLUSTERED INDEX PRIMARY"
+ " ON \"test/#sql-ib-garbage\"(ID);\n"
+ "END;\n", FALSE, trx);
+ ut_ad(err == DB_SUCCESS);
+ row_drop_table_for_mysql("test/#sql-ib-garbage", trx,
+ SQLCOM_DROP_DB, true););
+
+ /* Check which incomplete table definition to drop. */
+
+ if (sys_foreign_err == DB_CORRUPTION) {
+ row_drop_table_after_create_fail("SYS_FOREIGN", trx);
+ }
+
+ if (sys_foreign_cols_err == DB_CORRUPTION) {
+ row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx);
+ }
+
+ ib::info() << "Creating foreign key constraint system tables.";
+
+ /* NOTE: in dict_load_foreigns we use the fact that
+ there are 2 secondary indexes on SYS_FOREIGN, and they
+ are defined just like below */
+
+ /* NOTE: when designing InnoDB's foreign key support in 2001, we made
+ an error and made the table names and the foreign key id of type
+ 'CHAR' (internally, really a VARCHAR). We should have made the type
+ VARBINARY, like in other InnoDB system tables, to get a clean
+ design. */
+
+ srv_file_per_table_backup = srv_file_per_table;
+
+ /* We always want SYSTEM tables to be created inside the system
+ tablespace. */
+
+ srv_file_per_table = 0;
+
+ err = que_eval_sql(
+ NULL,
+ "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
+ "BEGIN\n"
+ "CREATE TABLE\n"
+ "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
+ " REF_NAME CHAR, N_COLS INT);\n"
+ "CREATE UNIQUE CLUSTERED INDEX ID_IND"
+ " ON SYS_FOREIGN (ID);\n"
+ "CREATE INDEX FOR_IND"
+ " ON SYS_FOREIGN (FOR_NAME);\n"
+ "CREATE INDEX REF_IND"
+ " ON SYS_FOREIGN (REF_NAME);\n"
+ "CREATE TABLE\n"
+ "SYS_FOREIGN_COLS(ID CHAR, POS INT,"
+ " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
+ "CREATE UNIQUE CLUSTERED INDEX ID_IND"
+ " ON SYS_FOREIGN_COLS (ID, POS);\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ ib::error() << "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS"
+ " failed: " << err << ". Tablespace is"
+ " full. Dropping incompletely created tables.";
+
+ ut_ad(err == DB_OUT_OF_FILE_SPACE
+ || err == DB_TOO_MANY_CONCURRENT_TRXS);
+
+ row_drop_table_after_create_fail("SYS_FOREIGN", trx);
+ row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx);
+
+ if (err == DB_OUT_OF_FILE_SPACE) {
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+ }
+ }
+
+ trx_commit_for_mysql(trx);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx->free();
+
+ srv_file_per_table = srv_file_per_table_backup;
+
+ /* Note: The master thread has not been started at this point. */
+ /* Confirm and move to the non-LRU part of the table LRU list. */
+ sys_foreign_err = dict_check_if_system_table_exists(
+ "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
+ ut_a(sys_foreign_err == DB_SUCCESS);
+
+ sys_foreign_cols_err = dict_check_if_system_table_exists(
+ "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
+ ut_a(sys_foreign_cols_err == DB_SUCCESS);
+
+ return(err);
+}
+
+/** Creates the virtual column system table (SYS_VIRTUAL) inside InnoDB
+at server bootstrap or server start if the table is not found or is
+not of the right form.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_create_or_check_sys_virtual()
+{
+ trx_t* trx;
+ my_bool srv_file_per_table_backup;
+ dberr_t err;
+
+ ut_ad(!srv_any_background_activity());
+
+ /* Note: The master thread has not been started at this point. */
+ err = dict_check_if_system_table_exists(
+ "SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
+
+ if (err == DB_SUCCESS) {
+ mutex_enter(&dict_sys.mutex);
+ dict_sys.sys_virtual = dict_table_get_low("SYS_VIRTUAL");
+ mutex_exit(&dict_sys.mutex);
+ return(DB_SUCCESS);
+ }
+
+ if (srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ return(DB_READ_ONLY);
+ }
+
+ trx = trx_create();
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ trx->op_info = "creating sys_virtual tables";
+
+ row_mysql_lock_data_dictionary(trx);
+
+ /* Check which incomplete table definition to drop. */
+
+ if (err == DB_CORRUPTION) {
+ row_drop_table_after_create_fail("SYS_VIRTUAL", trx);
+ }
+
+ ib::info() << "Creating sys_virtual system tables.";
+
+ srv_file_per_table_backup = srv_file_per_table;
+
+ /* We always want SYSTEM tables to be created inside the system
+ tablespace. */
+
+ srv_file_per_table = 0;
+
+ err = que_eval_sql(
+ NULL,
+ "PROCEDURE CREATE_SYS_VIRTUAL_TABLES_PROC () IS\n"
+ "BEGIN\n"
+ "CREATE TABLE\n"
+ "SYS_VIRTUAL(TABLE_ID BIGINT, POS INT,"
+ " BASE_POS INT);\n"
+ "CREATE UNIQUE CLUSTERED INDEX BASE_IDX"
+ " ON SYS_VIRTUAL(TABLE_ID, POS, BASE_POS);\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ ib::error() << "Creation of SYS_VIRTUAL"
+ " failed: " << err << ". Tablespace is"
+ " full or too many transactions."
+ " Dropping incompletely created tables.";
+
+ ut_ad(err == DB_OUT_OF_FILE_SPACE
+ || err == DB_TOO_MANY_CONCURRENT_TRXS);
+
+ row_drop_table_after_create_fail("SYS_VIRTUAL", trx);
+
+ if (err == DB_OUT_OF_FILE_SPACE) {
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+ }
+ }
+
+ trx_commit_for_mysql(trx);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx->free();
+
+ srv_file_per_table = srv_file_per_table_backup;
+
+ /* Note: The master thread has not been started at this point. */
+ /* Confirm and move to the non-LRU part of the table LRU list. */
+ dberr_t sys_virtual_err = dict_check_if_system_table_exists(
+ "SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
+ ut_a(sys_virtual_err == DB_SUCCESS);
+ mutex_enter(&dict_sys.mutex);
+ dict_sys.sys_virtual = dict_table_get_low("SYS_VIRTUAL");
+ mutex_exit(&dict_sys.mutex);
+
+ return(err);
+}
+
+/****************************************************************//**
+Evaluate the given foreign key SQL statement.
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+dict_foreign_eval_sql(
+/*==================*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* sql, /*!< in: SQL string to evaluate */
+ const char* name, /*!< in: table name (for diagnostics) */
+ const char* id, /*!< in: foreign key id */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ dberr_t error;
+ FILE* ef = dict_foreign_err_file;
+
+ error = que_eval_sql(info, sql, FALSE, trx);
+
+ if (error == DB_DUPLICATE_KEY) {
+ mutex_enter(&dict_foreign_err_mutex);
+ rewind(ef);
+ ut_print_timestamp(ef);
+ fputs(" Error in foreign key constraint creation for table ",
+ ef);
+ ut_print_name(ef, trx, name);
+ fputs(".\nA foreign key constraint of name ", ef);
+ ut_print_name(ef, trx, id);
+ fputs("\nalready exists."
+ " (Note that internally InnoDB adds 'databasename'\n"
+ "in front of the user-defined constraint name.)\n"
+ "Note that InnoDB's FOREIGN KEY system tables store\n"
+ "constraint names as case-insensitive, with the\n"
+ "MySQL standard latin1_swedish_ci collation. If you\n"
+ "create tables or databases whose names differ only in\n"
+ "the character case, then collisions in constraint\n"
+ "names can occur. Workaround: name your constraints\n"
+ "explicitly with unique names.\n",
+ ef);
+
+ mutex_exit(&dict_foreign_err_mutex);
+
+ return(error);
+ }
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "Foreign key constraint creation failed: "
+ << error;
+
+ mutex_enter(&dict_foreign_err_mutex);
+ ut_print_timestamp(ef);
+ fputs(" Internal error in foreign key constraint creation"
+ " for table ", ef);
+ ut_print_name(ef, trx, name);
+ fputs(".\n"
+ "See the MySQL .err log in the datadir"
+ " for more information.\n", ef);
+ mutex_exit(&dict_foreign_err_mutex);
+
+ return(error);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************//**
+Add a single foreign key field definition to the data dictionary tables in
+the database.
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+dict_create_add_foreign_field_to_dictionary(
+/*========================================*/
+ ulint field_nr, /*!< in: field number */
+ const char* table_name, /*!< in: table name */
+ const dict_foreign_t* foreign, /*!< in: foreign */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ DBUG_ENTER("dict_create_add_foreign_field_to_dictionary");
+
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_str_literal(info, "id", foreign->id);
+
+ pars_info_add_int4_literal(info, "pos", field_nr);
+
+ pars_info_add_str_literal(info, "for_col_name",
+ foreign->foreign_col_names[field_nr]);
+
+ pars_info_add_str_literal(info, "ref_col_name",
+ foreign->referenced_col_names[field_nr]);
+
+ DBUG_RETURN(dict_foreign_eval_sql(
+ info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ "INSERT INTO SYS_FOREIGN_COLS VALUES"
+ "(:id, :pos, :for_col_name, :ref_col_name);\n"
+ "END;\n",
+ table_name, foreign->id, trx));
+}
+
+/********************************************************************//**
+Construct foreign key constraint defintion from data dictionary information.
+*/
+UNIV_INTERN
+char*
+dict_foreign_def_get(
+/*=================*/
+ dict_foreign_t* foreign,/*!< in: foreign */
+ trx_t* trx) /*!< in: trx */
+{
+ char* fk_def = (char *)mem_heap_alloc(foreign->heap, 4*1024);
+ const char* tbname;
+ char tablebuf[MAX_TABLE_NAME_LEN + 1] = "";
+ unsigned i;
+ char* bufend;
+
+ tbname = dict_remove_db_name(foreign->id);
+ bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
+ tbname, strlen(tbname), trx->mysql_thd);
+ tablebuf[bufend - tablebuf] = '\0';
+
+ sprintf(fk_def,
+ (char *)"CONSTRAINT %s FOREIGN KEY (", (char *)tablebuf);
+
+ for(i = 0; i < foreign->n_fields; i++) {
+ char buf[MAX_TABLE_NAME_LEN + 1] = "";
+ innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
+ foreign->foreign_col_names[i],
+ strlen(foreign->foreign_col_names[i]),
+ trx->mysql_thd);
+ strcat(fk_def, buf);
+ if (i < static_cast<unsigned>(foreign->n_fields-1)) {
+ strcat(fk_def, (char *)",");
+ }
+ }
+
+ strcat(fk_def,(char *)") REFERENCES ");
+
+ bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
+ foreign->referenced_table_name,
+ strlen(foreign->referenced_table_name),
+ trx->mysql_thd);
+ tablebuf[bufend - tablebuf] = '\0';
+
+ strcat(fk_def, tablebuf);
+ strcat(fk_def, " (");
+
+ for(i = 0; i < foreign->n_fields; i++) {
+ char buf[MAX_TABLE_NAME_LEN + 1] = "";
+ bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
+ foreign->referenced_col_names[i],
+ strlen(foreign->referenced_col_names[i]),
+ trx->mysql_thd);
+ buf[bufend - buf] = '\0';
+ strcat(fk_def, buf);
+ if (i < (uint)foreign->n_fields-1) {
+ strcat(fk_def, (char *)",");
+ }
+ }
+ strcat(fk_def, (char *)")");
+
+ return fk_def;
+}
+
+/********************************************************************//**
+Convert foreign key column names from data dictionary to SQL-layer.
+*/
+static
+void
+dict_foreign_def_get_fields(
+/*========================*/
+ dict_foreign_t* foreign,/*!< in: foreign */
+ trx_t* trx, /*!< in: trx */
+ char** field, /*!< out: foreign column */
+ char** field2, /*!< out: referenced column */
+ ulint col_no) /*!< in: column number */
+{
+ char* bufend;
+ char* fieldbuf = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1);
+ char* fieldbuf2 = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1);
+
+ bufend = innobase_convert_name(fieldbuf, MAX_TABLE_NAME_LEN,
+ foreign->foreign_col_names[col_no],
+ strlen(foreign->foreign_col_names[col_no]),
+ trx->mysql_thd);
+
+ fieldbuf[bufend - fieldbuf] = '\0';
+
+ bufend = innobase_convert_name(fieldbuf2, MAX_TABLE_NAME_LEN,
+ foreign->referenced_col_names[col_no],
+ strlen(foreign->referenced_col_names[col_no]),
+ trx->mysql_thd);
+
+ fieldbuf2[bufend - fieldbuf2] = '\0';
+ *field = fieldbuf;
+ *field2 = fieldbuf2;
+}
+
+/********************************************************************//**
+Add a foreign key definition to the data dictionary tables.
+@return error code or DB_SUCCESS */
+dberr_t
+dict_create_add_foreign_to_dictionary(
+/*==================================*/
+ const char* name, /*!< in: table name */
+ const dict_foreign_t* foreign,/*!< in: foreign key */
+ trx_t* trx) /*!< in/out: dictionary transaction */
+{
+ dberr_t error;
+
+ DBUG_ENTER("dict_create_add_foreign_to_dictionary");
+
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_str_literal(info, "id", foreign->id);
+
+ pars_info_add_str_literal(info, "for_name", name);
+
+ pars_info_add_str_literal(info, "ref_name",
+ foreign->referenced_table_name);
+
+ pars_info_add_int4_literal(info, "n_cols",
+ ulint(foreign->n_fields)
+ | (ulint(foreign->type) << 24));
+
+ DBUG_PRINT("dict_create_add_foreign_to_dictionary",
+ ("'%s', '%s', '%s', %d", foreign->id, name,
+ foreign->referenced_table_name,
+ foreign->n_fields + (foreign->type << 24)));
+
+ error = dict_foreign_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ "INSERT INTO SYS_FOREIGN VALUES"
+ "(:id, :for_name, :ref_name, :n_cols);\n"
+ "END;\n"
+ , name, foreign->id, trx);
+
+ if (error != DB_SUCCESS) {
+
+ if (error == DB_DUPLICATE_KEY) {
+ char buf[MAX_TABLE_NAME_LEN + 1] = "";
+ char tablename[MAX_TABLE_NAME_LEN + 1] = "";
+ char* fk_def;
+
+ innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
+ name, strlen(name), trx->mysql_thd);
+
+ innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
+ foreign->id, strlen(foreign->id), trx->mysql_thd);
+
+ fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
+
+ ib_push_warning(trx, error,
+ "Create or Alter table %s with foreign key constraint"
+ " failed. Foreign key constraint %s"
+ " already exists on data dictionary."
+ " Foreign key constraint names need to be unique in database."
+ " Error in foreign key definition: %s.",
+ tablename, buf, fk_def);
+ }
+
+ DBUG_RETURN(error);
+ }
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ error = dict_create_add_foreign_field_to_dictionary(
+ i, name, foreign, trx);
+
+ if (error != DB_SUCCESS) {
+ char buf[MAX_TABLE_NAME_LEN + 1] = "";
+ char tablename[MAX_TABLE_NAME_LEN + 1] = "";
+ char* field=NULL;
+ char* field2=NULL;
+ char* fk_def;
+
+ innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
+ name, strlen(name), trx->mysql_thd);
+ innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
+ foreign->id, strlen(foreign->id), trx->mysql_thd);
+ fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
+ dict_foreign_def_get_fields((dict_foreign_t*)foreign, trx, &field, &field2, i);
+
+ ib_push_warning(trx, error,
+ "Create or Alter table %s with foreign key constraint"
+ " failed. Error adding foreign key constraint name %s"
+ " fields %s or %s to the dictionary."
+ " Error in foreign key definition: %s.",
+ tablename, buf, i+1, fk_def);
+
+ DBUG_RETURN(error);
+ }
+ }
+
+ DBUG_RETURN(error);
+}
+
+/** Check if a foreign constraint is on the given column name.
+@param[in] col_name column name to be searched for fk constraint
+@param[in] table table to which foreign key constraint belongs
+@return true if fk constraint is present on the table, false otherwise. */
+static
+bool
+dict_foreign_base_for_stored(
+ const char* col_name,
+ const dict_table_t* table)
+{
+ /* Loop through each stored column and check if its base column has
+ the same name as the column name being checked */
+ dict_s_col_list::const_iterator it;
+ for (it = table->s_cols->begin();
+ it != table->s_cols->end(); ++it) {
+ dict_s_col_t s_col = *it;
+
+ for (ulint j = 0; j < s_col.num_base; j++) {
+ if (strcmp(col_name, dict_table_get_col_name(
+ table,
+ s_col.base_col[j]->ind)) == 0) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+}
+
+/** Check if a foreign constraint is on columns served as base columns
+of any stored column. This is to prevent creating SET NULL or CASCADE
+constraint on such columns
+@param[in] local_fk_set set of foreign key objects, to be added to
+the dictionary tables
+@param[in] table table to which the foreign key objects in
+local_fk_set belong to
+@return true if yes, otherwise, false */
+bool
+dict_foreigns_has_s_base_col(
+ const dict_foreign_set& local_fk_set,
+ const dict_table_t* table)
+{
+ dict_foreign_t* foreign;
+
+ if (table->s_cols == NULL) {
+ return (false);
+ }
+
+ for (dict_foreign_set::const_iterator it = local_fk_set.begin();
+ it != local_fk_set.end(); ++it) {
+
+ foreign = *it;
+ ulint type = foreign->type;
+
+ type &= ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
+ | DICT_FOREIGN_ON_UPDATE_NO_ACTION);
+
+ if (type == 0) {
+ continue;
+ }
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ /* Check if the constraint is on a column that
+ is a base column of any stored column */
+ if (dict_foreign_base_for_stored(
+ foreign->foreign_col_names[i], table)) {
+ return(true);
+ }
+ }
+ }
+
+ return(false);
+}
+
+/** Adds the given set of foreign key objects to the dictionary tables
+in the database. This function does not modify the dictionary cache. The
+caller must ensure that all foreign key objects contain a valid constraint
+name in foreign->id.
+@param[in] local_fk_set set of foreign key objects, to be added to
+the dictionary tables
+@param[in] table table to which the foreign key objects in
+local_fk_set belong to
+@param[in,out] trx transaction
+@return error code or DB_SUCCESS */
+dberr_t
+dict_create_add_foreigns_to_dictionary(
+/*===================================*/
+ const dict_foreign_set& local_fk_set,
+ const dict_table_t* table,
+ trx_t* trx)
+{
+ dict_foreign_t* foreign;
+ dberr_t error;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ if (NULL == dict_table_get_low("SYS_FOREIGN")) {
+
+ ib::error() << "Table SYS_FOREIGN not found"
+ " in internal data dictionary";
+
+ return(DB_ERROR);
+ }
+
+ error = DB_SUCCESS;
+
+ for (dict_foreign_set::const_iterator it = local_fk_set.begin();
+ it != local_fk_set.end();
+ ++it) {
+
+ foreign = *it;
+ ut_ad(foreign->id != NULL);
+
+ error = dict_create_add_foreign_to_dictionary(
+ table->name.m_name, foreign, trx);
+
+ if (error != DB_SUCCESS) {
+ break;
+ }
+ }
+
+ return error;
+}
+
+/****************************************************************//**
+Creates the tablespaces and datafiles system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_create_or_check_sys_tablespace(void)
+/*=====================================*/
+{
+ trx_t* trx;
+ my_bool srv_file_per_table_backup;
+ dberr_t err;
+ dberr_t sys_tablespaces_err;
+ dberr_t sys_datafiles_err;
+
+ ut_ad(!srv_any_background_activity());
+
+ /* Note: The master thread has not been started at this point. */
+
+ sys_tablespaces_err = dict_check_if_system_table_exists(
+ "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
+ sys_datafiles_err = dict_check_if_system_table_exists(
+ "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
+
+ if (sys_tablespaces_err == DB_SUCCESS
+ && sys_datafiles_err == DB_SUCCESS) {
+ srv_sys_tablespaces_open = true;
+ return(DB_SUCCESS);
+ }
+
+ if (srv_read_only_mode
+ || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ return(DB_READ_ONLY);
+ }
+
+ trx = trx_create();
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ trx->op_info = "creating tablepace and datafile sys tables";
+
+ row_mysql_lock_data_dictionary(trx);
+
+ /* Check which incomplete table definition to drop. */
+
+ if (sys_tablespaces_err == DB_CORRUPTION) {
+ row_drop_table_after_create_fail("SYS_TABLESPACES", trx);
+ }
+
+ if (sys_datafiles_err == DB_CORRUPTION) {
+ row_drop_table_after_create_fail("SYS_DATAFILES", trx);
+ }
+
+ ib::info() << "Creating tablespace and datafile system tables.";
+
+ /* We always want SYSTEM tables to be created inside the system
+ tablespace. */
+ srv_file_per_table_backup = srv_file_per_table;
+ srv_file_per_table = 0;
+
+ err = que_eval_sql(
+ NULL,
+ "PROCEDURE CREATE_SYS_TABLESPACE_PROC () IS\n"
+ "BEGIN\n"
+ "CREATE TABLE SYS_TABLESPACES(\n"
+ " SPACE INT, NAME CHAR, FLAGS INT);\n"
+ "CREATE UNIQUE CLUSTERED INDEX SYS_TABLESPACES_SPACE"
+ " ON SYS_TABLESPACES (SPACE);\n"
+ "CREATE TABLE SYS_DATAFILES(\n"
+ " SPACE INT, PATH CHAR);\n"
+ "CREATE UNIQUE CLUSTERED INDEX SYS_DATAFILES_SPACE"
+ " ON SYS_DATAFILES (SPACE);\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ ib::error() << "Creation of SYS_TABLESPACES and SYS_DATAFILES"
+ " has failed with error " << err
+ << ". Dropping incompletely created tables.";
+
+ ut_a(err == DB_OUT_OF_FILE_SPACE
+ || err == DB_DUPLICATE_KEY
+ || err == DB_TOO_MANY_CONCURRENT_TRXS);
+
+ row_drop_table_after_create_fail("SYS_TABLESPACES", trx);
+ row_drop_table_after_create_fail("SYS_DATAFILES", trx);
+
+ if (err == DB_OUT_OF_FILE_SPACE) {
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+ }
+ }
+
+ trx_commit_for_mysql(trx);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx->free();
+
+ srv_file_per_table = srv_file_per_table_backup;
+
+ if (err == DB_SUCCESS) {
+ srv_sys_tablespaces_open = true;
+ }
+
+ /* Note: The master thread has not been started at this point. */
+ /* Confirm and move to the non-LRU part of the table LRU list. */
+
+ sys_tablespaces_err = dict_check_if_system_table_exists(
+ "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
+ ut_a(sys_tablespaces_err == DB_SUCCESS || err != DB_SUCCESS);
+
+ sys_datafiles_err = dict_check_if_system_table_exists(
+ "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
+ ut_a(sys_datafiles_err == DB_SUCCESS || err != DB_SUCCESS);
+
+ return(err);
+}
+
+/** Put a tablespace definition into the data dictionary,
+replacing what was there previously.
+@param[in] space Tablespace id
+@param[in] name Tablespace name
+@param[in] flags Tablespace flags
+@param[in] path Tablespace path
+@param[in] trx Transaction
+@return error code or DB_SUCCESS */
+dberr_t
+dict_replace_tablespace_in_dictionary(
+ ulint space_id,
+ const char* name,
+ ulint flags,
+ const char* path,
+ trx_t* trx)
+{
+ if (!srv_sys_tablespaces_open) {
+ /* Startup procedure is not yet ready for updates. */
+ return(DB_SUCCESS);
+ }
+
+ dberr_t error;
+
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "space", space_id);
+
+ pars_info_add_str_literal(info, "name", name);
+
+ pars_info_add_int4_literal(info, "flags", flags);
+
+ pars_info_add_str_literal(info, "path", path);
+
+ error = que_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ "p CHAR;\n"
+
+ "DECLARE CURSOR c IS\n"
+ " SELECT PATH FROM SYS_DATAFILES\n"
+ " WHERE SPACE=:space FOR UPDATE;\n"
+
+ "BEGIN\n"
+ "OPEN c;\n"
+ "FETCH c INTO p;\n"
+
+ "IF (SQL % NOTFOUND) THEN"
+ " DELETE FROM SYS_TABLESPACES "
+ "WHERE SPACE=:space;\n"
+ " INSERT INTO SYS_TABLESPACES VALUES"
+ "(:space, :name, :flags);\n"
+ " INSERT INTO SYS_DATAFILES VALUES"
+ "(:space, :path);\n"
+ "ELSIF p <> :path THEN\n"
+ " UPDATE SYS_DATAFILES SET PATH=:path"
+ " WHERE CURRENT OF c;\n"
+ "END IF;\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+
+ trx->op_info = "";
+
+ return(error);
+}
diff --git a/storage/innobase/dict/dict0defrag_bg.cc b/storage/innobase/dict/dict0defrag_bg.cc
new file mode 100644
index 00000000..0d9cb185
--- /dev/null
+++ b/storage/innobase/dict/dict0defrag_bg.cc
@@ -0,0 +1,327 @@
+/*****************************************************************************
+
+Copyright (c) 2016, 2019, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0defrag_bg.cc
+Defragmentation routines.
+
+Created 25/08/2016 Jan Lindström
+*******************************************************/
+
+#include "dict0dict.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+#include "dict0defrag_bg.h"
+#include "btr0btr.h"
+#include "srv0start.h"
+
+static ib_mutex_t defrag_pool_mutex;
+
+#ifdef MYSQL_PFS
+static mysql_pfs_key_t defrag_pool_mutex_key;
+#endif
+
+/** Iterator type for iterating over the elements of objects of type
+defrag_pool_t. */
+typedef defrag_pool_t::iterator defrag_pool_iterator_t;
+
+/** Pool where we store information on which tables are to be processed
+by background defragmentation. */
+defrag_pool_t defrag_pool;
+
+
+/*****************************************************************//**
+Initialize the defrag pool, called once during thread initialization. */
+void
+dict_defrag_pool_init(void)
+/*=======================*/
+{
+ ut_ad(!srv_read_only_mode);
+
+ /* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */
+ mutex_create(LATCH_ID_DEFRAGMENT_MUTEX, &defrag_pool_mutex);
+}
+
+/*****************************************************************//**
+Free the resources occupied by the defrag pool, called once during
+thread de-initialization. */
+void
+dict_defrag_pool_deinit(void)
+/*=========================*/
+{
+ ut_ad(!srv_read_only_mode);
+
+ mutex_free(&defrag_pool_mutex);
+}
+
+/*****************************************************************//**
+Get an index from the auto defrag pool. The returned index id is removed
+from the pool.
+@return true if the pool was non-empty and "id" was set, false otherwise */
+static
+bool
+dict_stats_defrag_pool_get(
+/*=======================*/
+ table_id_t* table_id, /*!< out: table id, or unmodified if
+ list is empty */
+ index_id_t* index_id) /*!< out: index id, or unmodified if
+ list is empty */
+{
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&defrag_pool_mutex);
+
+ if (defrag_pool.empty()) {
+ mutex_exit(&defrag_pool_mutex);
+ return(false);
+ }
+
+ defrag_pool_item_t& item = defrag_pool.back();
+ *table_id = item.table_id;
+ *index_id = item.index_id;
+
+ defrag_pool.pop_back();
+
+ mutex_exit(&defrag_pool_mutex);
+
+ return(true);
+}
+
+/*****************************************************************//**
+Add an index in a table to the defrag pool, which is processed by the
+background stats gathering thread. Only the table id and index id are
+added to the list, so the table can be closed after being enqueued and
+it will be opened when needed. If the table or index does not exist later
+(has been DROPped), then it will be removed from the pool and skipped. */
+void
+dict_stats_defrag_pool_add(
+/*=======================*/
+ const dict_index_t* index) /*!< in: table to add */
+{
+ defrag_pool_item_t item;
+
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&defrag_pool_mutex);
+
+ /* quit if already in the list */
+ for (defrag_pool_iterator_t iter = defrag_pool.begin();
+ iter != defrag_pool.end();
+ ++iter) {
+ if ((*iter).table_id == index->table->id
+ && (*iter).index_id == index->id) {
+ mutex_exit(&defrag_pool_mutex);
+ return;
+ }
+ }
+
+ item.table_id = index->table->id;
+ item.index_id = index->id;
+ defrag_pool.push_back(item);
+ if (defrag_pool.size() == 1) {
+ /* Kick off dict stats optimizer work */
+ dict_stats_schedule_now();
+ }
+ mutex_exit(&defrag_pool_mutex);
+}
+
+/*****************************************************************//**
+Delete a given index from the auto defrag pool. */
+void
+dict_stats_defrag_pool_del(
+/*=======================*/
+ const dict_table_t* table, /*!<in: if given, remove
+ all entries for the table */
+ const dict_index_t* index) /*!< in: if given, remove this index */
+{
+ ut_a((table && !index) || (!table && index));
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ mutex_enter(&defrag_pool_mutex);
+
+ defrag_pool_iterator_t iter = defrag_pool.begin();
+ while (iter != defrag_pool.end()) {
+ if ((table && (*iter).table_id == table->id)
+ || (index
+ && (*iter).table_id == index->table->id
+ && (*iter).index_id == index->id)) {
+ /* erase() invalidates the iterator */
+ iter = defrag_pool.erase(iter);
+ if (index)
+ break;
+ } else {
+ iter++;
+ }
+ }
+
+ mutex_exit(&defrag_pool_mutex);
+}
+
+/*****************************************************************//**
+Get the first index that has been added for updating persistent defrag
+stats and eventually save its stats. */
+static
+void
+dict_stats_process_entry_from_defrag_pool()
+{
+ table_id_t table_id;
+ index_id_t index_id;
+
+ ut_ad(!srv_read_only_mode);
+
+ /* pop the first index from the auto defrag pool */
+ if (!dict_stats_defrag_pool_get(&table_id, &index_id)) {
+ /* no index in defrag pool */
+ return;
+ }
+
+ dict_table_t* table;
+
+ mutex_enter(&dict_sys.mutex);
+
+ /* If the table is no longer cached, we've already lost the in
+ memory stats so there's nothing really to write to disk. */
+ table = dict_table_open_on_id(table_id, TRUE,
+ DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
+
+ dict_index_t* index = table && !table->corrupted
+ ? dict_table_find_index_on_id(table, index_id)
+ : NULL;
+
+ if (!index || index->is_corrupted()) {
+ if (table) {
+ dict_table_close(table, TRUE, FALSE);
+ }
+ mutex_exit(&dict_sys.mutex);
+ return;
+ }
+
+ mutex_exit(&dict_sys.mutex);
+ dict_stats_save_defrag_stats(index);
+ dict_table_close(table, FALSE, FALSE);
+}
+
+/*****************************************************************//**
+Get the first index that has been added for updating persistent defrag
+stats and eventually save its stats. */
+void
+dict_defrag_process_entries_from_defrag_pool()
+/*==========================================*/
+{
+ while (defrag_pool.size()) {
+ dict_stats_process_entry_from_defrag_pool();
+ }
+}
+
+/*********************************************************************//**
+Save defragmentation result.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_summary(
+/*============================*/
+ dict_index_t* index) /*!< in: index */
+{
+ dberr_t ret=DB_SUCCESS;
+
+ if (dict_index_is_ibuf(index)) {
+ return DB_SUCCESS;
+ }
+
+ dict_sys_lock();
+
+ ret = dict_stats_save_index_stat(index, time(NULL), "n_pages_freed",
+ index->stat_defrag_n_pages_freed,
+ NULL,
+ "Number of pages freed during"
+ " last defragmentation run.",
+ NULL);
+
+ dict_sys_unlock();
+
+ return (ret);
+}
+
+/*********************************************************************//**
+Save defragmentation stats for a given index.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_defrag_stats(
+/*============================*/
+ dict_index_t* index) /*!< in: index */
+{
+ dberr_t ret;
+
+ if (dict_index_is_ibuf(index)) {
+ return DB_SUCCESS;
+ }
+
+ if (!index->is_readable()) {
+ return dict_stats_report_error(index->table, true);
+ }
+
+ const time_t now = time(NULL);
+ mtr_t mtr;
+ ulint n_leaf_pages;
+ ulint n_leaf_reserved;
+ mtr.start();
+ mtr_s_lock_index(index, &mtr);
+ n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES,
+ &n_leaf_pages, &mtr);
+ mtr.commit();
+
+ if (n_leaf_reserved == ULINT_UNDEFINED) {
+ // The index name is different during fast index creation,
+ // so the stats won't be associated with the right index
+ // for later use. We just return without saving.
+ return DB_SUCCESS;
+ }
+
+ dict_sys_lock();
+ ret = dict_stats_save_index_stat(index, now, "n_page_split",
+ index->stat_defrag_n_page_split,
+ NULL,
+ "Number of new page splits on leaves"
+ " since last defragmentation.",
+ NULL);
+ if (ret != DB_SUCCESS) {
+ goto end;
+ }
+
+ ret = dict_stats_save_index_stat(
+ index, now, "n_leaf_pages_defrag",
+ n_leaf_pages,
+ NULL,
+ "Number of leaf pages when this stat is saved to disk",
+ NULL);
+ if (ret != DB_SUCCESS) {
+ goto end;
+ }
+
+ ret = dict_stats_save_index_stat(
+ index, now, "n_leaf_pages_reserved",
+ n_leaf_reserved,
+ NULL,
+ "Number of pages reserved for this index leaves when this stat "
+ "is saved to disk",
+ NULL);
+
+end:
+ dict_sys_unlock();
+ return ret;
+}
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
new file mode 100644
index 00000000..7d80fc7e
--- /dev/null
+++ b/storage/innobase/dict/dict0dict.cc
@@ -0,0 +1,5277 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file dict/dict0dict.cc
+Data dictionary system
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include <my_config.h>
+#include <string>
+
+#include "ha_prototypes.h"
+#include <mysqld.h>
+#include <strfunc.h>
+
+#include "dict0dict.h"
+#include "fts0fts.h"
+#include "fil0fil.h"
+#include <algorithm>
+#include "sql_class.h"
+#include "sql_table.h"
+#include <mysql/service_thd_mdl.h>
+
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "btr0sea.h"
+#include "buf0buf.h"
+#include "data0type.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "dict0mem.h"
+#include "dict0priv.h"
+#include "dict0stats.h"
+#include "fts0fts.h"
+#include "fts0types.h"
+#include "lock0lock.h"
+#include "mach0data.h"
+#include "mem0mem.h"
+#include "page0page.h"
+#include "page0zip.h"
+#include "pars0pars.h"
+#include "pars0sym.h"
+#include "que0que.h"
+#include "rem0cmp.h"
+#include "row0log.h"
+#include "row0merge.h"
+#include "row0mysql.h"
+#include "row0upd.h"
+#include "srv0mon.h"
+#include "srv0start.h"
+#include "sync0sync.h"
+#include "trx0undo.h"
+
+#include <vector>
+#include <algorithm>
+
+/** the dictionary system */
+dict_sys_t dict_sys;
+
+/** Percentage of compression failures that are allowed in a single
+round */
+ulong zip_failure_threshold_pct = 5;
+
+/** Maximum percentage of a page that can be allowed as a pad to avoid
+compression failures */
+ulong zip_pad_max = 50;
+
+#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
+ creating a table or index object */
+#define DICT_POOL_PER_TABLE_HASH 512 /*!< buffer pool max size per table
+ hash table fixed size in bytes */
+#define DICT_POOL_PER_VARYING 4 /*!< buffer pool max size per data
+ dictionary varying size in bytes */
+
+/** Identifies generated InnoDB foreign key names */
+static char dict_ibfk[] = "_ibfk_";
+
+bool innodb_table_stats_not_found = false;
+bool innodb_index_stats_not_found = false;
+static bool innodb_table_stats_not_found_reported = false;
+static bool innodb_index_stats_not_found_reported = false;
+
+/*******************************************************************//**
+Tries to find column names for the index and sets the col field of the
+index.
+@param[in] index index
+@param[in] add_v new virtual columns added along with an add index call
+@return whether the column names were found */
+static
+bool
+dict_index_find_cols(
+ dict_index_t* index,
+ const dict_add_v_col_t* add_v);
+/*******************************************************************//**
+Builds the internal dictionary cache representation for a clustered
+index, containing also system fields not defined by the user.
+@return own: the internal representation of the clustered index */
+static
+dict_index_t*
+dict_index_build_internal_clust(
+/*============================*/
+ dict_index_t* index); /*!< in: user representation of
+ a clustered index */
+/*******************************************************************//**
+Builds the internal dictionary cache representation for a non-clustered
+index, containing also system fields not defined by the user.
+@return own: the internal representation of the non-clustered index */
+static
+dict_index_t*
+dict_index_build_internal_non_clust(
+/*================================*/
+ dict_index_t* index); /*!< in: user representation of
+ a non-clustered index */
+/**********************************************************************//**
+Builds the internal dictionary cache representation for an FTS index.
+@return own: the internal representation of the FTS index */
+static
+dict_index_t*
+dict_index_build_internal_fts(
+/*==========================*/
+ dict_index_t* index); /*!< in: user representation of an FTS index */
+
+/**********************************************************************//**
+Removes an index from the dictionary cache. */
+static
+void
+dict_index_remove_from_cache_low(
+/*=============================*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_index_t* index, /*!< in, own: index */
+ ibool lru_evict); /*!< in: TRUE if page being evicted
+ to make room in the table LRU list */
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Validate the dictionary table LRU list.
+@return TRUE if validate OK */
+static
+ibool
+dict_lru_validate(void);
+/*===================*/
+#endif /* UNIV_DEBUG */
+
+/* Stream for storing detailed information about the latest foreign key
+and unique key errors. Only created if !srv_read_only_mode */
+FILE* dict_foreign_err_file = NULL;
+/* mutex protecting the foreign and unique error buffers */
+ib_mutex_t dict_foreign_err_mutex;
+
+/********************************************************************//**
+Checks if the database name in two table names is the same.
+@return TRUE if same db name */
+ibool
+dict_tables_have_same_db(
+/*=====================*/
+ const char* name1, /*!< in: table name in the form
+ dbname '/' tablename */
+ const char* name2) /*!< in: table name in the form
+ dbname '/' tablename */
+{
+ for (; *name1 == *name2; name1++, name2++) {
+ if (*name1 == '/') {
+ return(TRUE);
+ }
+ ut_a(*name1); /* the names must contain '/' */
+ }
+ return(FALSE);
+}
+
+/********************************************************************//**
+Return the end of table name where we have removed dbname and '/'.
+@return table name */
+const char*
+dict_remove_db_name(
+/*================*/
+ const char* name) /*!< in: table name in the form
+ dbname '/' tablename */
+{
+ const char* s = strchr(name, '/');
+ ut_a(s);
+
+ return(s + 1);
+}
+
+/** Open a persistent table.
+@param[in] table_id persistent table identifier
+@param[in] ignore_err errors to ignore
+@param[in] cached_only whether to skip loading
+@return persistent table
+@retval NULL if not found */
+static dict_table_t* dict_table_open_on_id_low(
+ table_id_t table_id,
+ dict_err_ignore_t ignore_err,
+ bool cached_only)
+{
+ dict_table_t* table = dict_sys.get_table(table_id);
+
+ if (!table && !cached_only) {
+ table = dict_load_table_on_id(table_id, ignore_err);
+ }
+
+ return table;
+}
+
+/**********************************************************************//**
+Try to drop any indexes after an aborted index creation.
+This can also be after a server kill during DROP INDEX. */
+static
+void
+dict_table_try_drop_aborted(
+/*========================*/
+ dict_table_t* table, /*!< in: table, or NULL if it
+ needs to be looked up again */
+ table_id_t table_id, /*!< in: table identifier */
+ uint32_t ref_count) /*!< in: expected table->n_ref_count */
+{
+ trx_t* trx;
+
+ trx = trx_create();
+ trx->op_info = "try to drop any indexes after an aborted index creation";
+ row_mysql_lock_data_dictionary(trx);
+ trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+ if (table == NULL) {
+ table = dict_table_open_on_id_low(
+ table_id, DICT_ERR_IGNORE_FK_NOKEY, FALSE);
+ } else {
+ ut_ad(table->id == table_id);
+ }
+
+ if (table && table->get_ref_count() == ref_count && table->drop_aborted
+ && !UT_LIST_GET_FIRST(table->locks)) {
+ /* Silence a debug assertion in row_merge_drop_indexes(). */
+ ut_d(table->acquire());
+ row_merge_drop_indexes(trx, table, true);
+ ut_d(table->release());
+ ut_ad(table->get_ref_count() == ref_count);
+ trx_commit_for_mysql(trx);
+ }
+
+ row_mysql_unlock_data_dictionary(trx);
+ trx->free();
+}
+
+/**********************************************************************//**
+When opening a table,
+try to drop any indexes after an aborted index creation.
+Release the dict_sys.mutex. */
+static
+void
+dict_table_try_drop_aborted_and_mutex_exit(
+/*=======================================*/
+ dict_table_t* table, /*!< in: table (may be NULL) */
+ ibool try_drop) /*!< in: FALSE if should try to
+ drop indexes whose online creation
+ was aborted */
+{
+ if (try_drop
+ && table != NULL
+ && table->drop_aborted
+ && table->get_ref_count() == 1
+ && dict_table_get_first_index(table)) {
+
+ /* Attempt to drop the indexes whose online creation
+ was aborted. */
+ table_id_t table_id = table->id;
+
+ mutex_exit(&dict_sys.mutex);
+
+ dict_table_try_drop_aborted(table, table_id, 1);
+ } else {
+ mutex_exit(&dict_sys.mutex);
+ }
+}
+
+/** Decrements the count of open handles of a table.
+@param[in,out] table table
+@param[in] dict_locked data dictionary locked
+@param[in] try_drop try to drop any orphan indexes after
+ an aborted online index creation
+@param[in] thd thread to release MDL
+@param[in] mdl metadata lock or NULL if the thread
+ is a foreground one. */
+void
+dict_table_close(
+ dict_table_t* table,
+ bool dict_locked,
+ bool try_drop,
+ THD* thd,
+ MDL_ticket* mdl)
+{
+ if (!dict_locked) {
+ mutex_enter(&dict_sys.mutex);
+ }
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_a(table->get_ref_count() > 0);
+
+ const bool last_handle = table->release();
+
+ /* Force persistent stats re-read upon next open of the table
+ so that FLUSH TABLE can be used to forcibly fetch stats from disk
+ if they have been manually modified. We reset table->stat_initialized
+ only if table reference count is 0 because we do not want too frequent
+ stats re-reads (e.g. in other cases than FLUSH TABLE). */
+ if (last_handle && strchr(table->name.m_name, '/') != NULL
+ && dict_stats_is_persistent_enabled(table)) {
+
+ dict_stats_deinit(table);
+ }
+
+ MONITOR_DEC(MONITOR_TABLE_REFERENCE);
+
+ ut_ad(dict_lru_validate());
+ ut_ad(dict_sys.find(table));
+
+ if (!dict_locked) {
+ table_id_t table_id = table->id;
+ const bool drop_aborted = last_handle && try_drop
+ && table->drop_aborted
+ && dict_table_get_first_index(table);
+
+ mutex_exit(&dict_sys.mutex);
+
+ /* dict_table_try_drop_aborted() can generate undo logs.
+ So it should be avoided after shutdown of background
+ threads */
+ if (drop_aborted && !srv_undo_sources) {
+ dict_table_try_drop_aborted(NULL, table_id, 0);
+ }
+ }
+
+ if (!thd || !mdl) {
+ } else if (MDL_context *mdl_context= static_cast<MDL_context*>(
+ thd_mdl_context(thd))) {
+ mdl_context->release_lock(mdl);
+ }
+}
+
+/********************************************************************//**
+Closes the only open handle to a table and drops a table while assuring
+that dict_sys.mutex is held the whole time. This assures that the table
+is not evicted after the close when the count of open handles goes to zero.
+Because dict_sys.mutex is held, we do not need to call
+dict_table_prevent_eviction(). */
+void
+dict_table_close_and_drop(
+/*======================*/
+ trx_t* trx, /*!< in: data dictionary transaction */
+ dict_table_t* table) /*!< in/out: table */
+{
+ dberr_t err = DB_SUCCESS;
+
+ ut_d(dict_sys.assert_locked());
+ ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
+
+ dict_table_close(table, true, false);
+
+#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
+ /* Nobody should have initialized the stats of the newly created
+ table when this is called. So we know that it has not been added
+ for background stats gathering. */
+ ut_a(!table->stat_initialized);
+#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
+
+ err = row_merge_drop_table(trx, table);
+
+ if (err != DB_SUCCESS) {
+ ib::error() << "At " << __FILE__ << ":" << __LINE__
+ << " row_merge_drop_table returned error: " << err
+ << " table: " << table->name;
+ }
+}
+
+/** Check if the table has a given (non_virtual) column.
+@param[in] table table object
+@param[in] col_name column name
+@param[in] col_nr column number guessed, 0 as default
+@return column number if the table has the specified column,
+otherwise table->n_def */
+ulint
+dict_table_has_column(
+ const dict_table_t* table,
+ const char* col_name,
+ ulint col_nr)
+{
+ ulint col_max = table->n_def;
+
+ ut_ad(table);
+ ut_ad(col_name);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ if (col_nr < col_max
+ && innobase_strcasecmp(
+ col_name, dict_table_get_col_name(table, col_nr)) == 0) {
+ return(col_nr);
+ }
+
+ /** The order of column may changed, check it with other columns */
+ for (ulint i = 0; i < col_max; i++) {
+ if (i != col_nr
+ && innobase_strcasecmp(
+ col_name, dict_table_get_col_name(table, i)) == 0) {
+
+ return(i);
+ }
+ }
+
+ return(col_max);
+}
+
+/** Retrieve the column name.
+@param[in] table the table of this column */
+const char* dict_col_t::name(const dict_table_t& table) const
+{
+ ut_ad(table.magic_n == DICT_TABLE_MAGIC_N);
+
+ size_t col_nr;
+ const char *s;
+
+ if (is_virtual()) {
+ col_nr = size_t(reinterpret_cast<const dict_v_col_t*>(this)
+ - table.v_cols);
+ ut_ad(col_nr < table.n_v_def);
+ s = table.v_col_names;
+ } else {
+ col_nr = size_t(this - table.cols);
+ ut_ad(col_nr < table.n_def);
+ s = table.col_names;
+ }
+
+ if (s) {
+ for (size_t i = 0; i < col_nr; i++) {
+ s += strlen(s) + 1;
+ }
+ }
+
+ return(s);
+}
+
+/** Returns a virtual column's name.
+@param[in] table target table
+@param[in] col_nr virtual column number (nth virtual column)
+@return column name or NULL if column number out of range. */
+const char*
+dict_table_get_v_col_name(
+ const dict_table_t* table,
+ ulint col_nr)
+{
+ const char* s;
+
+ ut_ad(table);
+ ut_ad(col_nr < table->n_v_def);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ if (col_nr >= table->n_v_def) {
+ return(NULL);
+ }
+
+ s = table->v_col_names;
+
+ if (s != NULL) {
+ for (ulint i = 0; i < col_nr; i++) {
+ s += strlen(s) + 1;
+ }
+ }
+
+ return(s);
+}
+
+/** Search virtual column's position in InnoDB according to its position
+in original table's position
+@param[in] table target table
+@param[in] col_nr column number (nth column in the MySQL table)
+@return virtual column's position in InnoDB, ULINT_UNDEFINED if not find */
+static
+ulint
+dict_table_get_v_col_pos_for_mysql(
+ const dict_table_t* table,
+ ulint col_nr)
+{
+ ulint i;
+
+ ut_ad(table);
+ ut_ad(col_nr < static_cast<ulint>(table->n_t_def));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ for (i = 0; i < table->n_v_def; i++) {
+ if (col_nr == dict_get_v_col_mysql_pos(
+ table->v_cols[i].m_col.ind)) {
+ break;
+ }
+ }
+
+ if (i == table->n_v_def) {
+ return(ULINT_UNDEFINED);
+ }
+
+ return(i);
+}
+
+/** Returns a virtual column's name according to its original
+MySQL table position.
+@param[in] table target table
+@param[in] col_nr column number (nth column in the table)
+@return column name. */
+static
+const char*
+dict_table_get_v_col_name_mysql(
+ const dict_table_t* table,
+ ulint col_nr)
+{
+ ulint i = dict_table_get_v_col_pos_for_mysql(table, col_nr);
+
+ if (i == ULINT_UNDEFINED) {
+ return(NULL);
+ }
+
+ return(dict_table_get_v_col_name(table, i));
+}
+
+/** Get nth virtual column according to its original MySQL table position
+@param[in] table target table
+@param[in] col_nr column number in MySQL Table definition
+@return dict_v_col_t ptr */
+dict_v_col_t*
+dict_table_get_nth_v_col_mysql(
+ const dict_table_t* table,
+ ulint col_nr)
+{
+ ulint i = dict_table_get_v_col_pos_for_mysql(table, col_nr);
+
+ if (i == ULINT_UNDEFINED) {
+ return(NULL);
+ }
+
+ return(dict_table_get_nth_v_col(table, i));
+}
+
+
+/** Get all the FTS indexes on a table.
+@param[in] table table
+@param[out] indexes all FTS indexes on this table
+@return number of FTS indexes */
+ulint
+dict_table_get_all_fts_indexes(
+ const dict_table_t* table,
+ ib_vector_t* indexes)
+{
+ dict_index_t* index;
+
+ ut_a(ib_vector_size(indexes) == 0);
+
+ for (index = dict_table_get_first_index(table);
+ index;
+ index = dict_table_get_next_index(index)) {
+
+ if (index->type == DICT_FTS) {
+ ib_vector_push(indexes, &index);
+ }
+ }
+
+ return(ib_vector_size(indexes));
+}
+
+/** Looks for column n in an index.
+@param[in] index index
+@param[in] n column number
+@param[in] inc_prefix true=consider column prefixes too
+@param[in] is_virtual true==virtual column
+@param[out] prefix_col_pos col num if prefix
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+ulint
+dict_index_get_nth_col_or_prefix_pos(
+ const dict_index_t* index,
+ ulint n,
+ bool inc_prefix,
+ bool is_virtual,
+ ulint* prefix_col_pos)
+{
+ const dict_field_t* field;
+ const dict_col_t* col;
+ ulint pos;
+ ulint n_fields;
+
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ if (prefix_col_pos) {
+ *prefix_col_pos = ULINT_UNDEFINED;
+ }
+
+ if (is_virtual) {
+ col = &(dict_table_get_nth_v_col(index->table, n)->m_col);
+ } else {
+ col = dict_table_get_nth_col(index->table, n);
+ }
+
+ if (dict_index_is_clust(index)) {
+
+ return(dict_col_get_clust_pos(col, index));
+ }
+
+ n_fields = dict_index_get_n_fields(index);
+
+ for (pos = 0; pos < n_fields; pos++) {
+ field = dict_index_get_nth_field(index, pos);
+
+ if (col == field->col) {
+ if (prefix_col_pos) {
+ *prefix_col_pos = pos;
+ }
+ if (inc_prefix || field->prefix_len == 0) {
+ return(pos);
+ }
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/** Check if the index contains a column or a prefix of that column.
+@param[in] n column number
+@param[in] is_virtual whether it is a virtual col
+@return whether the index contains the column or its prefix */
+bool dict_index_t::contains_col_or_prefix(ulint n, bool is_virtual) const
+{
+ ut_ad(magic_n == DICT_INDEX_MAGIC_N);
+
+ if (is_primary()) {
+ return(!is_virtual);
+ }
+
+ const dict_col_t* col = is_virtual
+ ? &dict_table_get_nth_v_col(table, n)->m_col
+ : dict_table_get_nth_col(table, n);
+
+ for (ulint pos = 0; pos < n_fields; pos++) {
+ if (col == fields[pos].col) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/********************************************************************//**
+Looks for a matching field in an index. The column has to be the same. The
+column in index must be complete, or must contain a prefix longer than the
+column in index2. That is, we must be able to construct the prefix in index2
+from the prefix in index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+ulint
+dict_index_get_nth_field_pos(
+/*=========================*/
+ const dict_index_t* index, /*!< in: index from which to search */
+ const dict_index_t* index2, /*!< in: index */
+ ulint n) /*!< in: field number in index2 */
+{
+ const dict_field_t* field;
+ const dict_field_t* field2;
+ ulint n_fields;
+ ulint pos;
+
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ field2 = dict_index_get_nth_field(index2, n);
+
+ n_fields = dict_index_get_n_fields(index);
+
+ /* Are we looking for a MBR (Minimum Bound Box) field of
+ a spatial index */
+ bool is_mbr_fld = (n == 0 && dict_index_is_spatial(index2));
+
+ for (pos = 0; pos < n_fields; pos++) {
+ field = dict_index_get_nth_field(index, pos);
+
+ /* The first field of a spatial index is a transformed
+ MBR (Minimum Bound Box) field made out of original column,
+ so its field->col still points to original cluster index
+ col, but the actual content is different. So we cannot
+ consider them equal if neither of them is MBR field */
+ if (pos == 0 && dict_index_is_spatial(index) && !is_mbr_fld) {
+ continue;
+ }
+
+ if (field->col == field2->col
+ && (field->prefix_len == 0
+ || (field->prefix_len >= field2->prefix_len
+ && field2->prefix_len != 0))) {
+
+ return(pos);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/** Parse the table file name into table name and database name.
+@tparam dict_locked whether dict_sys.mutex is being held
+@param[in,out] db_name database name buffer
+@param[in,out] tbl_name table name buffer
+@param[out] db_name_len database name length
+@param[out] tbl_name_len table name length
+@return whether the table name is visible to SQL */
+template<bool dict_locked>
+bool dict_table_t::parse_name(char (&db_name)[NAME_LEN + 1],
+ char (&tbl_name)[NAME_LEN + 1],
+ size_t *db_name_len, size_t *tbl_name_len) const
+{
+ char db_buf[MAX_DATABASE_NAME_LEN + 1];
+ char tbl_buf[MAX_TABLE_NAME_LEN + 1];
+
+ if (!dict_locked)
+ mutex_enter(&dict_sys.mutex); /* protect against renaming */
+ else
+ ut_ad(mutex_own(&dict_sys.mutex));
+ const size_t db_len= name.dblen();
+ ut_ad(db_len <= MAX_DATABASE_NAME_LEN);
+
+ memcpy(db_buf, name.m_name, db_len);
+ db_buf[db_len]= 0;
+
+ size_t tbl_len= strlen(name.m_name + db_len + 1);
+
+ const bool is_temp= tbl_len > TEMP_FILE_PREFIX_LENGTH &&
+ !strncmp(name.m_name, TEMP_FILE_PREFIX, TEMP_FILE_PREFIX_LENGTH);
+
+ if (is_temp);
+ else if (const char *is_part= static_cast<const char*>
+ (memchr(name.m_name + db_len + 1, '#', tbl_len)))
+ tbl_len= static_cast<size_t>(is_part - &name.m_name[db_len + 1]);
+
+ memcpy(tbl_buf, name.m_name + db_len + 1, tbl_len);
+ tbl_buf[tbl_len]= 0;
+
+ if (!dict_locked)
+ mutex_exit(&dict_sys.mutex);
+
+ *db_name_len= filename_to_tablename(db_buf, db_name,
+ MAX_DATABASE_NAME_LEN + 1, true);
+
+ if (is_temp)
+ return false;
+
+ *tbl_name_len= filename_to_tablename(tbl_buf, tbl_name,
+ MAX_TABLE_NAME_LEN + 1, true);
+ return true;
+}
+
+template bool
+dict_table_t::parse_name<>(char(&)[NAME_LEN + 1], char(&)[NAME_LEN + 1],
+ size_t*, size_t*) const;
+
+/** Acquire MDL shared for the table name.
+@tparam trylock whether to use non-blocking operation
+@param[in,out] table table object
+@param[in,out] thd background thread
+@param[out] mdl mdl ticket
+@param[in] table_op operation to perform when opening
+@return table object after locking MDL shared
+@retval nullptr if the table is not readable, or if trylock && MDL blocked */
+template<bool trylock>
+dict_table_t*
+dict_acquire_mdl_shared(dict_table_t *table,
+ THD *thd,
+ MDL_ticket **mdl,
+ dict_table_op_t table_op)
+{
+ if (!table || !mdl)
+ return table;
+
+ MDL_context *mdl_context= static_cast<MDL_context*>(thd_mdl_context(thd));
+ size_t db_len;
+
+ if (trylock)
+ {
+ mutex_enter(&dict_sys.mutex);
+ db_len= dict_get_db_name_len(table->name.m_name);
+ mutex_exit(&dict_sys.mutex);
+ }
+ else
+ {
+ ut_ad(mutex_own(&dict_sys.mutex));
+ db_len= dict_get_db_name_len(table->name.m_name);
+ }
+
+ if (db_len == 0)
+ return table; /* InnoDB system tables are not covered by MDL */
+
+ if (!mdl_context)
+ return nullptr;
+
+ table_id_t table_id= table->id;
+ char db_buf[NAME_LEN + 1], db_buf1[NAME_LEN + 1];
+ char tbl_buf[NAME_LEN + 1], tbl_buf1[NAME_LEN + 1];
+ size_t tbl_len;
+ bool unaccessible= false;
+
+ if (!table->parse_name<!trylock>(db_buf, tbl_buf, &db_len, &tbl_len))
+ /* The name of an intermediate table starts with #sql */
+ return table;
+
+retry:
+ if (!unaccessible && (!table->is_readable() || table->corrupted))
+ {
+is_unaccessible:
+ if (*mdl)
+ {
+ mdl_context->release_lock(*mdl);
+ *mdl= nullptr;
+ }
+ unaccessible= true;
+ }
+
+ if (!trylock)
+ table->release();
+
+ if (unaccessible)
+ return nullptr;
+
+ if (!trylock)
+ mutex_exit(&dict_sys.mutex);
+ {
+ MDL_request request;
+ MDL_REQUEST_INIT(&request,MDL_key::TABLE, db_buf, tbl_buf, MDL_SHARED, MDL_EXPLICIT);
+ if (trylock
+ ? mdl_context->try_acquire_lock(&request)
+ : mdl_context->acquire_lock(&request,
+ /* FIXME: use compatible type, and maybe
+ remove this parameter altogether! */
+ static_cast<double>(global_system_variables
+ .lock_wait_timeout)))
+ {
+ *mdl= nullptr;
+ if (trylock)
+ return nullptr;
+ }
+ else
+ *mdl= request.ticket;
+ }
+
+ if (!trylock)
+ mutex_enter(&dict_sys.mutex);
+ else if (!*mdl)
+ return nullptr;
+
+ table= dict_table_open_on_id(table_id, !trylock, table_op);
+
+ if (!table)
+ {
+ /* The table was dropped. */
+ if (*mdl)
+ {
+ mdl_context->release_lock(*mdl);
+ *mdl= nullptr;
+ }
+ return nullptr;
+ }
+
+ if (!table->is_accessible())
+ goto is_unaccessible;
+
+ size_t db1_len, tbl1_len;
+
+ if (!table->parse_name<!trylock>(db_buf1, tbl_buf1, &db1_len, &tbl1_len))
+ {
+ /* The table was renamed to #sql prefix.
+ Release MDL (if any) for the old name and return. */
+ if (*mdl)
+ {
+ mdl_context->release_lock(*mdl);
+ *mdl= nullptr;
+ }
+ return table;
+ }
+
+ if (*mdl)
+ {
+ if (db_len == db1_len && tbl_len == tbl1_len &&
+ !memcmp(db_buf, db_buf1, db_len) &&
+ !memcmp(tbl_buf, tbl_buf1, tbl_len))
+ return table;
+
+ /* The table was renamed. Release MDL for the old name and
+ try to acquire MDL for the new name. */
+ mdl_context->release_lock(*mdl);
+ *mdl= nullptr;
+ }
+
+ db_len= db1_len;
+ tbl_len= tbl1_len;
+
+ memcpy(tbl_buf, tbl_buf1, tbl_len + 1);
+ memcpy(db_buf, db_buf1, db_len + 1);
+ goto retry;
+}
+
+template dict_table_t*
+dict_acquire_mdl_shared<true>(dict_table_t*,THD*,MDL_ticket**,dict_table_op_t);
+
+/** Look up a table by numeric identifier.
+@param[in] table_id table identifier
+@param[in] dict_locked data dictionary locked
+@param[in] table_op operation to perform when opening
+@param[in,out] thd background thread, or NULL to not acquire MDL
+@param[out] mdl mdl ticket, or NULL
+@return table, NULL if does not exist */
+dict_table_t*
+dict_table_open_on_id(table_id_t table_id, bool dict_locked,
+ dict_table_op_t table_op, THD *thd,
+ MDL_ticket **mdl)
+{
+ ut_ad(!dict_locked || !thd);
+
+ if (!dict_locked) {
+ mutex_enter(&dict_sys.mutex);
+ }
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ dict_table_t* table = dict_table_open_on_id_low(
+ table_id,
+ table_op == DICT_TABLE_OP_LOAD_TABLESPACE
+ ? DICT_ERR_IGNORE_RECOVER_LOCK
+ : DICT_ERR_IGNORE_FK_NOKEY,
+ table_op == DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
+
+ if (table != NULL) {
+ dict_sys.acquire(table);
+ MONITOR_INC(MONITOR_TABLE_REFERENCE);
+ }
+
+ if (!dict_locked) {
+ if (thd) {
+ table = dict_acquire_mdl_shared<false>(
+ table, thd, mdl, table_op);
+ }
+
+ dict_table_try_drop_aborted_and_mutex_exit(
+ table, table_op == DICT_TABLE_OP_DROP_ORPHAN);
+ }
+
+ return table;
+}
+
+/********************************************************************//**
+Looks for column n position in the clustered index.
+@return position in internal representation of the clustered index */
+unsigned
+dict_table_get_nth_col_pos(
+/*=======================*/
+ const dict_table_t* table, /*!< in: table */
+ ulint n, /*!< in: column number */
+ ulint* prefix_col_pos)
+{
+ ulint pos= dict_index_get_nth_col_pos(dict_table_get_first_index(table),
+ n, prefix_col_pos);
+ DBUG_ASSERT(pos <= dict_index_t::MAX_N_FIELDS);
+ return static_cast<unsigned>(pos);
+}
+
+/********************************************************************//**
+Checks if a column is in the ordering columns of the clustered index of a
+table. Column prefixes are treated like whole columns.
+@return TRUE if the column, or its prefix, is in the clustered key */
+ibool
+dict_table_col_in_clustered_key(
+/*============================*/
+ const dict_table_t* table, /*!< in: table */
+ ulint n) /*!< in: column number */
+{
+ const dict_index_t* index;
+ const dict_field_t* field;
+ const dict_col_t* col;
+ ulint pos;
+ ulint n_fields;
+
+ col = dict_table_get_nth_col(table, n);
+
+ index = dict_table_get_first_index(table);
+
+ n_fields = dict_index_get_n_unique(index);
+
+ for (pos = 0; pos < n_fields; pos++) {
+ field = dict_index_get_nth_field(index, pos);
+
+ if (col == field->col) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/** Initialise the data dictionary cache. */
+void dict_sys_t::create()
+{
+ ut_ad(this == &dict_sys);
+ ut_ad(!is_initialised());
+ m_initialised= true;
+ UT_LIST_INIT(table_LRU, &dict_table_t::table_LRU);
+ UT_LIST_INIT(table_non_LRU, &dict_table_t::table_LRU);
+
+ mutex_create(LATCH_ID_DICT_SYS, &mutex);
+
+ const ulint hash_size = buf_pool_get_curr_size()
+ / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE);
+
+ table_hash.create(hash_size);
+ table_id_hash.create(hash_size);
+ temp_id_hash.create(hash_size);
+
+ rw_lock_create(dict_operation_lock_key, &latch, SYNC_DICT_OPERATION);
+
+ if (!srv_read_only_mode)
+ {
+ dict_foreign_err_file= os_file_create_tmpfile();
+ ut_a(dict_foreign_err_file);
+ }
+
+ mutex_create(LATCH_ID_DICT_FOREIGN_ERR, &dict_foreign_err_mutex);
+}
+
+/** Acquire a reference to a cached table. */
+inline void dict_sys_t::acquire(dict_table_t* table)
+{
+ ut_ad(dict_sys.find(table));
+ if (table->can_be_evicted)
+ {
+ UT_LIST_REMOVE(dict_sys.table_LRU, table);
+ UT_LIST_ADD_FIRST(dict_sys.table_LRU, table);
+ }
+
+ table->acquire();
+}
+
+/**********************************************************************//**
+Returns a table object and increment its open handle count.
+NOTE! This is a high-level function to be used mainly from outside the
+'dict' module. Inside this directory dict_table_get_low
+is usually the appropriate function.
+@return table, NULL if does not exist */
+dict_table_t*
+dict_table_open_on_name(
+/*====================*/
+ const char* table_name, /*!< in: table name */
+ ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ ibool try_drop, /*!< in: TRUE=try to drop any orphan
+ indexes after an aborted online
+ index creation */
+ dict_err_ignore_t
+ ignore_err) /*!< in: error to be ignored when
+ loading a table definition */
+{
+ dict_table_t* table;
+ DBUG_ENTER("dict_table_open_on_name");
+ DBUG_PRINT("dict_table_open_on_name", ("table: '%s'", table_name));
+
+ if (!dict_locked) {
+ mutex_enter(&dict_sys.mutex);
+ }
+
+ ut_ad(table_name);
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ table = dict_table_check_if_in_cache_low(table_name);
+
+ if (table == NULL) {
+ table = dict_load_table(table_name, ignore_err);
+ }
+
+ ut_ad(!table || table->cached);
+
+ if (table != NULL) {
+
+ /* If table is encrypted or corrupted */
+ if (!(ignore_err & ~DICT_ERR_IGNORE_FK_NOKEY)
+ && !table->is_readable()) {
+ /* Make life easy for drop table. */
+ dict_sys.prevent_eviction(table);
+
+ if (table->corrupted) {
+
+ ib::error() << "Table " << table->name
+ << " is corrupted. Please "
+ "drop the table and recreate.";
+ if (!dict_locked) {
+ mutex_exit(&dict_sys.mutex);
+ }
+
+ DBUG_RETURN(NULL);
+ }
+
+ dict_sys.acquire(table);
+
+ if (!dict_locked) {
+ mutex_exit(&dict_sys.mutex);
+ }
+
+ DBUG_RETURN(table);
+ }
+
+ dict_sys.acquire(table);
+ MONITOR_INC(MONITOR_TABLE_REFERENCE);
+ }
+
+ ut_ad(dict_lru_validate());
+
+ if (!dict_locked) {
+ dict_table_try_drop_aborted_and_mutex_exit(table, try_drop);
+ }
+
+ DBUG_RETURN(table);
+}
+
+/**********************************************************************//**
+Adds system columns to a table object. */
+void
+dict_table_add_system_columns(
+/*==========================*/
+ dict_table_t* table, /*!< in/out: table */
+ mem_heap_t* heap) /*!< in: temporary heap */
+{
+ ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(!table->cached);
+
+ /* NOTE: the system columns MUST be added in the following order
+ (so that they can be indexed by the numerical value of DATA_ROW_ID,
+ etc.) and as the last columns of the table memory object.
+ The clustered index will not always physically contain all system
+ columns. */
+
+ dict_mem_table_add_col(table, heap, "DB_ROW_ID", DATA_SYS,
+ DATA_ROW_ID | DATA_NOT_NULL,
+ DATA_ROW_ID_LEN);
+
+ compile_time_assert(DATA_ROW_ID == 0);
+ dict_mem_table_add_col(table, heap, "DB_TRX_ID", DATA_SYS,
+ DATA_TRX_ID | DATA_NOT_NULL,
+ DATA_TRX_ID_LEN);
+ compile_time_assert(DATA_TRX_ID == 1);
+ dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS,
+ DATA_ROLL_PTR | DATA_NOT_NULL,
+ DATA_ROLL_PTR_LEN);
+ compile_time_assert(DATA_ROLL_PTR == 2);
+
+ /* This check reminds that if a new system column is added to
+ the program, it should be dealt with here */
+ compile_time_assert(DATA_N_SYS_COLS == 3);
+}
+
+/** Add the table definition to the data dictionary cache */
+void dict_table_t::add_to_cache()
+{
+ cached = TRUE;
+
+ dict_sys.add(this);
+}
+
+/** Add a table definition to the data dictionary cache */
+inline void dict_sys_t::add(dict_table_t* table)
+{
+ ut_ad(!find(table));
+
+ ulint fold = ut_fold_string(table->name.m_name);
+
+ new (&table->autoinc_mutex) std::mutex();
+
+ /* Look for a table with the same name: error if such exists */
+ {
+ dict_table_t* table2;
+ HASH_SEARCH(name_hash, &table_hash, fold,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ !strcmp(table2->name.m_name, table->name.m_name));
+ ut_a(table2 == NULL);
+
+#ifdef UNIV_DEBUG
+ /* Look for the same table pointer with a different name */
+ HASH_SEARCH_ALL(name_hash, &table_hash,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ table2 == table);
+ ut_ad(table2 == NULL);
+#endif /* UNIV_DEBUG */
+ }
+ HASH_INSERT(dict_table_t, name_hash, &table_hash, fold, table);
+
+ /* Look for a table with the same id: error if such exists */
+ hash_table_t* id_hash = table->is_temporary()
+ ? &temp_id_hash : &table_id_hash;
+ const ulint id_fold = ut_fold_ull(table->id);
+ {
+ dict_table_t* table2;
+ HASH_SEARCH(id_hash, id_hash, id_fold,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ table2->id == table->id);
+ ut_a(table2 == NULL);
+
+#ifdef UNIV_DEBUG
+ /* Look for the same table pointer with a different id */
+ HASH_SEARCH_ALL(id_hash, id_hash,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ table2 == table);
+ ut_ad(table2 == NULL);
+#endif /* UNIV_DEBUG */
+
+ HASH_INSERT(dict_table_t, id_hash, id_hash, id_fold, table);
+ }
+
+ UT_LIST_ADD_FIRST(table->can_be_evicted ? table_LRU : table_non_LRU,
+ table);
+ ut_ad(dict_lru_validate());
+}
+
+/**********************************************************************//**
+Test whether a table can be evicted from the LRU cache.
+@return TRUE if table can be evicted. */
+static
+ibool
+dict_table_can_be_evicted(
+/*======================*/
+ dict_table_t* table) /*!< in: table to test */
+{
+ ut_d(dict_sys.assert_locked());
+ ut_a(table->can_be_evicted);
+ ut_a(table->foreign_set.empty());
+ ut_a(table->referenced_set.empty());
+
+ if (table->get_ref_count() == 0) {
+ /* The transaction commit and rollback are called from
+ outside the handler interface. This means that there is
+ a window where the table->n_ref_count can be zero but
+ the table instance is in "use". */
+
+ if (lock_table_has_locks(table)) {
+ return(FALSE);
+ }
+
+#ifdef BTR_CUR_HASH_ADAPT
+ /* We cannot really evict the table if adaptive hash
+ index entries are pointing to any of its indexes. */
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ if (index->n_ahi_pages()) {
+ return(FALSE);
+ }
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ return(TRUE);
+ }
+
+ return(FALSE);
+}
+
+#ifdef BTR_CUR_HASH_ADAPT
+/** @return a clone of this */
+dict_index_t *dict_index_t::clone() const
+{
+ ut_ad(n_fields);
+ ut_ad(!(type & (DICT_IBUF | DICT_SPATIAL | DICT_FTS)));
+ ut_ad(online_status == ONLINE_INDEX_COMPLETE);
+ ut_ad(is_committed());
+ ut_ad(!is_dummy);
+ ut_ad(!parser);
+ ut_ad(!online_log);
+ ut_ad(!rtr_track);
+
+ const size_t size= sizeof *this + n_fields * sizeof(*fields) +
+#ifdef BTR_CUR_ADAPT
+ sizeof *search_info +
+#endif
+ 1 + strlen(name) +
+ n_uniq * (sizeof *stat_n_diff_key_vals +
+ sizeof *stat_n_sample_sizes +
+ sizeof *stat_n_non_null_key_vals);
+
+ mem_heap_t* heap= mem_heap_create(size);
+ dict_index_t *index= static_cast<dict_index_t*>(mem_heap_dup(heap, this,
+ sizeof *this));
+ *index= *this;
+ rw_lock_create(index_tree_rw_lock_key, &index->lock, SYNC_INDEX_TREE);
+ index->heap= heap;
+ index->name= mem_heap_strdup(heap, name);
+ index->fields= static_cast<dict_field_t*>
+ (mem_heap_dup(heap, fields, n_fields * sizeof *fields));
+#ifdef BTR_CUR_ADAPT
+ index->search_info= btr_search_info_create(index->heap);
+#endif /* BTR_CUR_ADAPT */
+ index->stat_n_diff_key_vals= static_cast<ib_uint64_t*>
+ (mem_heap_zalloc(heap, n_uniq * sizeof *stat_n_diff_key_vals));
+ index->stat_n_sample_sizes= static_cast<ib_uint64_t*>
+ (mem_heap_zalloc(heap, n_uniq * sizeof *stat_n_sample_sizes));
+ index->stat_n_non_null_key_vals= static_cast<ib_uint64_t*>
+ (mem_heap_zalloc(heap, n_uniq * sizeof *stat_n_non_null_key_vals));
+ new (&index->zip_pad.mutex) std::mutex();
+ return index;
+}
+
+/** Clone this index for lazy dropping of the adaptive hash.
+@return this or a clone */
+dict_index_t *dict_index_t::clone_if_needed()
+{
+ if (!search_info->ref_count)
+ return this;
+ dict_index_t *prev= UT_LIST_GET_PREV(indexes, this);
+
+ UT_LIST_REMOVE(table->indexes, this);
+ UT_LIST_ADD_LAST(table->freed_indexes, this);
+ dict_index_t *index= clone();
+ set_freed();
+ if (prev)
+ UT_LIST_INSERT_AFTER(table->indexes, prev, index);
+ else
+ UT_LIST_ADD_FIRST(table->indexes, index);
+ return index;
+}
+#endif /* BTR_CUR_HASH_ADAPT */
+
+/**********************************************************************//**
+Make room in the table cache by evicting an unused table. The unused table
+should not be part of FK relationship and currently not used in any user
+transaction. There is no guarantee that it will remove a table.
+@return number of tables evicted. If the number of tables in the dict_LRU
+is less than max_tables it will not do anything. */
+ulint
+dict_make_room_in_cache(
+/*====================*/
+ ulint max_tables, /*!< in: max tables allowed in cache */
+ ulint pct_check) /*!< in: max percent to check */
+{
+ ulint i;
+ ulint len;
+ dict_table_t* table;
+ ulint check_up_to;
+ ulint n_evicted = 0;
+
+ ut_a(pct_check > 0);
+ ut_a(pct_check <= 100);
+ ut_d(dict_sys.assert_locked());
+ ut_ad(dict_lru_validate());
+
+ i = len = UT_LIST_GET_LEN(dict_sys.table_LRU);
+
+ if (len < max_tables) {
+ return(0);
+ }
+
+ check_up_to = len - ((len * pct_check) / 100);
+
+ /* Check for overflow */
+ ut_a(i == 0 || check_up_to <= i);
+
+ /* Find a suitable candidate to evict from the cache. Don't scan the
+ entire LRU list. Only scan pct_check list entries. */
+
+ for (table = UT_LIST_GET_LAST(dict_sys.table_LRU);
+ table != NULL
+ && i > check_up_to
+ && (len - n_evicted) > max_tables;
+ --i) {
+
+ dict_table_t* prev_table;
+
+ prev_table = UT_LIST_GET_PREV(table_LRU, table);
+
+ if (dict_table_can_be_evicted(table)) {
+ ut_ad(!table->fts);
+ dict_sys.remove(table, true);
+
+ ++n_evicted;
+ }
+
+ table = prev_table;
+ }
+
+ return(n_evicted);
+}
+
+/** Looks for an index with the given id given a table instance.
+@param[in] table table instance
+@param[in] id index id
+@return index or NULL */
+dict_index_t*
+dict_table_find_index_on_id(
+ const dict_table_t* table,
+ index_id_t id)
+{
+ dict_index_t* index;
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (id == index->id) {
+ /* Found */
+
+ return(index);
+ }
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************//**
+Looks for an index with the given id. NOTE that we do not reserve
+the dictionary mutex: this function is for emergency purposes like
+printing info of a corrupt database page!
+@return index or NULL if not found in cache */
+dict_index_t*
+dict_index_find_on_id_low(
+/*======================*/
+ index_id_t id) /*!< in: index id */
+{
+ if (!dict_sys.is_initialised()) return NULL;
+
+ dict_table_t* table;
+
+ for (table = UT_LIST_GET_FIRST(dict_sys.table_LRU);
+ table != NULL;
+ table = UT_LIST_GET_NEXT(table_LRU, table)) {
+
+ dict_index_t* index = dict_table_find_index_on_id(table, id);
+
+ if (index != NULL) {
+ return(index);
+ }
+ }
+
+ for (table = UT_LIST_GET_FIRST(dict_sys.table_non_LRU);
+ table != NULL;
+ table = UT_LIST_GET_NEXT(table_LRU, table)) {
+
+ dict_index_t* index = dict_table_find_index_on_id(table, id);
+
+ if (index != NULL) {
+ return(index);
+ }
+ }
+
+ return(NULL);
+}
+
+/** Function object to remove a foreign key constraint from the
+referenced_set of the referenced table. The foreign key object is
+also removed from the dictionary cache. The foreign key constraint
+is not removed from the foreign_set of the table containing the
+constraint. */
+struct dict_foreign_remove_partial
+{
+ void operator()(dict_foreign_t* foreign) {
+ dict_table_t* table = foreign->referenced_table;
+ if (table != NULL) {
+ table->referenced_set.erase(foreign);
+ }
+ dict_foreign_free(foreign);
+ }
+};
+
+/**********************************************************************//**
+Renames a table object.
+@return TRUE if success */
+dberr_t
+dict_table_rename_in_cache(
+/*=======================*/
+ dict_table_t* table, /*!< in/out: table */
+ const char* new_name, /*!< in: new name */
+ bool rename_also_foreigns,
+ /*!< in: in ALTER TABLE we want
+ to preserve the original table name
+ in constraints which reference it */
+ bool replace_new_file)
+ /*!< in: whether to replace the
+ file with the new name
+ (as part of rolling back TRUNCATE) */
+{
+ dberr_t err;
+ dict_foreign_t* foreign;
+ ulint fold;
+ char old_name[MAX_FULL_NAME_LEN + 1];
+ os_file_type_t ftype;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ /* store the old/current name to an automatic variable */
+ ut_a(strlen(table->name.m_name) < sizeof old_name);
+ strcpy(old_name, table->name.m_name);
+
+ fold = ut_fold_string(new_name);
+
+ /* Look for a table with the same name: error if such exists */
+ dict_table_t* table2;
+ HASH_SEARCH(name_hash, &dict_sys.table_hash, fold,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ (strcmp(table2->name.m_name, new_name) == 0));
+ DBUG_EXECUTE_IF("dict_table_rename_in_cache_failure",
+ if (table2 == NULL) {
+ table2 = (dict_table_t*) -1;
+ } );
+ if (table2) {
+ ib::error() << "Cannot rename table '" << old_name
+ << "' to '" << new_name << "' since the"
+ " dictionary cache already contains '" << new_name << "'.";
+ return(DB_ERROR);
+ }
+
+ /* If the table is stored in a single-table tablespace, rename the
+ .ibd file and rebuild the .isl file if needed. */
+
+ if (!table->space) {
+ bool exists;
+ char* filepath;
+
+ ut_ad(dict_table_is_file_per_table(table));
+ ut_ad(!table->is_temporary());
+
+ /* Make sure the data_dir_path is set. */
+ dict_get_and_save_data_dir_path(table, true);
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ ut_a(table->data_dir_path);
+
+ filepath = fil_make_filepath(
+ table->data_dir_path, table->name.m_name,
+ IBD, true);
+ } else {
+ filepath = fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
+ }
+
+ if (filepath == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ fil_delete_tablespace(table->space_id, !table->space);
+
+ /* Delete any temp file hanging around. */
+ if (os_file_status(filepath, &exists, &ftype)
+ && exists
+ && !os_file_delete_if_exists(innodb_temp_file_key,
+ filepath, NULL)) {
+
+ ib::info() << "Delete of " << filepath << " failed.";
+ }
+ ut_free(filepath);
+
+ } else if (dict_table_is_file_per_table(table)) {
+ char* new_path;
+ const char* old_path = UT_LIST_GET_FIRST(table->space->chain)
+ ->name;
+
+ ut_ad(!table->is_temporary());
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ new_path = os_file_make_new_pathname(
+ old_path, new_name);
+ err = RemoteDatafile::create_link_file(
+ new_name, new_path);
+
+ if (err != DB_SUCCESS) {
+ ut_free(new_path);
+ return(DB_TABLESPACE_EXISTS);
+ }
+ } else {
+ new_path = fil_make_filepath(
+ NULL, new_name, IBD, false);
+ }
+
+ /* New filepath must not exist. */
+ err = table->space->rename(new_name, new_path, true,
+ replace_new_file);
+ ut_free(new_path);
+
+ /* If the tablespace is remote, a new .isl file was created
+ If success, delete the old one. If not, delete the new one. */
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ RemoteDatafile::delete_link_file(
+ err == DB_SUCCESS ? old_name : new_name);
+ }
+
+ if (err != DB_SUCCESS) {
+ return err;
+ }
+ }
+
+ /* Remove table from the hash tables of tables */
+ HASH_DELETE(dict_table_t, name_hash, &dict_sys.table_hash,
+ ut_fold_string(old_name), table);
+
+ if (strlen(new_name) > strlen(table->name.m_name)) {
+ /* We allocate MAX_FULL_NAME_LEN + 1 bytes here to avoid
+ memory fragmentation, we assume a repeated calls of
+ ut_realloc() with the same size do not cause fragmentation */
+ ut_a(strlen(new_name) <= MAX_FULL_NAME_LEN);
+
+ table->name.m_name = static_cast<char*>(
+ ut_realloc(table->name.m_name, MAX_FULL_NAME_LEN + 1));
+ }
+ strcpy(table->name.m_name, new_name);
+
+ /* Add table to hash table of tables */
+ HASH_INSERT(dict_table_t, name_hash, &dict_sys.table_hash, fold,
+ table);
+
+ if (!rename_also_foreigns) {
+ /* In ALTER TABLE we think of the rename table operation
+ in the direction table -> temporary table (#sql...)
+ as dropping the table with the old name and creating
+ a new with the new name. Thus we kind of drop the
+ constraints from the dictionary cache here. The foreign key
+ constraints will be inherited to the new table from the
+ system tables through a call of dict_load_foreigns. */
+
+ /* Remove the foreign constraints from the cache */
+ std::for_each(table->foreign_set.begin(),
+ table->foreign_set.end(),
+ dict_foreign_remove_partial());
+ table->foreign_set.clear();
+
+ /* Reset table field in referencing constraints */
+ for (dict_foreign_set::iterator it
+ = table->referenced_set.begin();
+ it != table->referenced_set.end();
+ ++it) {
+
+ foreign = *it;
+ foreign->referenced_table = NULL;
+ foreign->referenced_index = NULL;
+
+ }
+
+ /* Make the set of referencing constraints empty */
+ table->referenced_set.clear();
+
+ return(DB_SUCCESS);
+ }
+
+ /* Update the table name fields in foreign constraints, and update also
+ the constraint id of new format >= 4.0.18 constraints. Note that at
+ this point we have already changed table->name to the new name. */
+
+ dict_foreign_set fk_set;
+
+ for (;;) {
+
+ dict_foreign_set::iterator it
+ = table->foreign_set.begin();
+
+ if (it == table->foreign_set.end()) {
+ break;
+ }
+
+ foreign = *it;
+
+ if (foreign->referenced_table) {
+ foreign->referenced_table->referenced_set.erase(foreign);
+ }
+
+ if (strlen(foreign->foreign_table_name)
+ < strlen(table->name.m_name)) {
+ /* Allocate a longer name buffer;
+ TODO: store buf len to save memory */
+
+ foreign->foreign_table_name = mem_heap_strdup(
+ foreign->heap, table->name.m_name);
+ dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
+ } else {
+ strcpy(foreign->foreign_table_name,
+ table->name.m_name);
+ dict_mem_foreign_table_name_lookup_set(foreign, FALSE);
+ }
+ if (strchr(foreign->id, '/')) {
+ /* This is a >= 4.0.18 format id */
+
+ ulint db_len;
+ char* old_id;
+ char old_name_cs_filename[MAX_FULL_NAME_LEN+1];
+ uint errors = 0;
+
+ /* All table names are internally stored in charset
+ my_charset_filename (except the temp tables and the
+ partition identifier suffix in partition tables). The
+ foreign key constraint names are internally stored
+ in UTF-8 charset. The variable fkid here is used
+ to store foreign key constraint name in charset
+ my_charset_filename for comparison further below. */
+ char fkid[MAX_TABLE_NAME_LEN+20];
+ ibool on_tmp = FALSE;
+
+ /* The old table name in my_charset_filename is stored
+ in old_name_cs_filename */
+
+ strcpy(old_name_cs_filename, old_name);
+ old_name_cs_filename[MAX_FULL_NAME_LEN] = '\0';
+ if (strstr(old_name, TEMP_TABLE_PATH_PREFIX) == NULL) {
+
+ innobase_convert_to_system_charset(
+ strchr(old_name_cs_filename, '/') + 1,
+ strchr(old_name, '/') + 1,
+ MAX_TABLE_NAME_LEN, &errors);
+
+ if (errors) {
+ /* There has been an error to convert
+ old table into UTF-8. This probably
+ means that the old table name is
+ actually in UTF-8. */
+ innobase_convert_to_filename_charset(
+ strchr(old_name_cs_filename,
+ '/') + 1,
+ strchr(old_name, '/') + 1,
+ MAX_TABLE_NAME_LEN);
+ } else {
+ /* Old name already in
+ my_charset_filename */
+ strcpy(old_name_cs_filename, old_name);
+ old_name_cs_filename[MAX_FULL_NAME_LEN]
+ = '\0';
+ }
+ }
+
+ strncpy(fkid, foreign->id, MAX_TABLE_NAME_LEN);
+
+ if (strstr(fkid, TEMP_TABLE_PATH_PREFIX) == NULL) {
+ innobase_convert_to_filename_charset(
+ strchr(fkid, '/') + 1,
+ strchr(foreign->id, '/') + 1,
+ MAX_TABLE_NAME_LEN+20);
+ } else {
+ on_tmp = TRUE;
+ }
+
+ old_id = mem_strdup(foreign->id);
+
+ if (strlen(fkid) > strlen(old_name_cs_filename)
+ + ((sizeof dict_ibfk) - 1)
+ && !memcmp(fkid, old_name_cs_filename,
+ strlen(old_name_cs_filename))
+ && !memcmp(fkid + strlen(old_name_cs_filename),
+ dict_ibfk, (sizeof dict_ibfk) - 1)) {
+
+ /* This is a generated >= 4.0.18 format id */
+
+ char table_name[MAX_TABLE_NAME_LEN + 1];
+ uint errors = 0;
+
+ if (strlen(table->name.m_name)
+ > strlen(old_name)) {
+ foreign->id = static_cast<char*>(
+ mem_heap_alloc(
+ foreign->heap,
+ strlen(table->name.m_name)
+ + strlen(old_id) + 1));
+ }
+
+ /* Convert the table name to UTF-8 */
+ strncpy(table_name, table->name.m_name,
+ MAX_TABLE_NAME_LEN);
+ table_name[MAX_TABLE_NAME_LEN] = '\0';
+ innobase_convert_to_system_charset(
+ strchr(table_name, '/') + 1,
+ strchr(table->name.m_name, '/') + 1,
+ MAX_TABLE_NAME_LEN, &errors);
+
+ if (errors) {
+ /* Table name could not be converted
+ from charset my_charset_filename to
+ UTF-8. This means that the table name
+ is already in UTF-8 (#mysql50#). */
+ strncpy(table_name, table->name.m_name,
+ MAX_TABLE_NAME_LEN);
+ table_name[MAX_TABLE_NAME_LEN] = '\0';
+ }
+
+ /* Replace the prefix 'databasename/tablename'
+ with the new names */
+ strcpy(foreign->id, table_name);
+ if (on_tmp) {
+ strcat(foreign->id,
+ old_id + strlen(old_name));
+ } else {
+ sprintf(strchr(foreign->id, '/') + 1,
+ "%s%s",
+ strchr(table_name, '/') +1,
+ strstr(old_id, "_ibfk_") );
+ }
+
+ } else {
+ /* This is a >= 4.0.18 format id where the user
+ gave the id name */
+ db_len = dict_get_db_name_len(
+ table->name.m_name) + 1;
+
+ if (db_len - 1
+ > dict_get_db_name_len(foreign->id)) {
+
+ foreign->id = static_cast<char*>(
+ mem_heap_alloc(
+ foreign->heap,
+ db_len + strlen(old_id) + 1));
+ }
+
+ /* Replace the database prefix in id with the
+ one from table->name */
+
+ memcpy(foreign->id,
+ table->name.m_name, db_len);
+
+ strcpy(foreign->id + db_len,
+ dict_remove_db_name(old_id));
+ }
+
+ ut_free(old_id);
+ }
+
+ table->foreign_set.erase(it);
+ fk_set.insert(foreign);
+
+ if (foreign->referenced_table) {
+ foreign->referenced_table->referenced_set.insert(foreign);
+ }
+ }
+
+ ut_a(table->foreign_set.empty());
+ table->foreign_set.swap(fk_set);
+
+ for (dict_foreign_set::iterator it = table->referenced_set.begin();
+ it != table->referenced_set.end();
+ ++it) {
+
+ foreign = *it;
+
+ if (strlen(foreign->referenced_table_name)
+ < strlen(table->name.m_name)) {
+ /* Allocate a longer name buffer;
+ TODO: store buf len to save memory */
+
+ foreign->referenced_table_name = mem_heap_strdup(
+ foreign->heap, table->name.m_name);
+
+ dict_mem_referenced_table_name_lookup_set(
+ foreign, TRUE);
+ } else {
+ /* Use the same buffer */
+ strcpy(foreign->referenced_table_name,
+ table->name.m_name);
+
+ dict_mem_referenced_table_name_lookup_set(
+ foreign, FALSE);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**********************************************************************//**
+Change the id of a table object in the dictionary cache. This is used in
+DISCARD TABLESPACE. */
+void
+dict_table_change_id_in_cache(
+/*==========================*/
+ dict_table_t* table, /*!< in/out: table object already in cache */
+ table_id_t new_id) /*!< in: new id to set */
+{
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(!table->is_temporary());
+
+ /* Remove the table from the hash table of id's */
+
+ HASH_DELETE(dict_table_t, id_hash, &dict_sys.table_id_hash,
+ ut_fold_ull(table->id), table);
+ table->id = new_id;
+
+ /* Add the table back to the hash table */
+ HASH_INSERT(dict_table_t, id_hash, &dict_sys.table_id_hash,
+ ut_fold_ull(table->id), table);
+}
+
+/** Evict a table definition from the InnoDB data dictionary cache.
+@param[in,out] table cached table definition to be evicted
+@param[in] lru whether this is part of least-recently-used eviction
+@param[in] keep whether to keep (not free) the object */
+void dict_sys_t::remove(dict_table_t* table, bool lru, bool keep)
+{
+ dict_foreign_t* foreign;
+ dict_index_t* index;
+
+ ut_ad(dict_lru_validate());
+ ut_a(table->get_ref_count() == 0);
+ ut_a(table->n_rec_locks == 0);
+ ut_ad(find(table));
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ /* Remove the foreign constraints from the cache */
+ std::for_each(table->foreign_set.begin(), table->foreign_set.end(),
+ dict_foreign_remove_partial());
+ table->foreign_set.clear();
+
+ /* Reset table field in referencing constraints */
+ for (dict_foreign_set::iterator it = table->referenced_set.begin();
+ it != table->referenced_set.end();
+ ++it) {
+
+ foreign = *it;
+ foreign->referenced_table = NULL;
+ foreign->referenced_index = NULL;
+ }
+
+ /* Remove the indexes from the cache */
+
+ for (index = UT_LIST_GET_LAST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_LAST(table->indexes)) {
+
+ dict_index_remove_from_cache_low(table, index, lru);
+ }
+
+ /* Remove table from the hash tables of tables */
+
+ HASH_DELETE(dict_table_t, name_hash, &table_hash,
+ ut_fold_string(table->name.m_name), table);
+
+ hash_table_t* id_hash = table->is_temporary()
+ ? &temp_id_hash : &table_id_hash;
+ const ulint id_fold = ut_fold_ull(table->id);
+ HASH_DELETE(dict_table_t, id_hash, id_hash, id_fold, table);
+
+ /* Remove table from LRU or non-LRU list. */
+ if (table->can_be_evicted) {
+ UT_LIST_REMOVE(table_LRU, table);
+ } else {
+ UT_LIST_REMOVE(table_non_LRU, table);
+ }
+
+ if (lru && table->drop_aborted) {
+ /* When evicting the table definition,
+ drop the orphan indexes from the data dictionary
+ and free the index pages. */
+ trx_t* trx = trx_create();
+
+ ut_d(dict_sys.assert_locked());
+ /* Mimic row_mysql_lock_data_dictionary(). */
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+ row_merge_drop_indexes_dict(trx, table->id);
+ trx_commit_for_mysql(trx);
+ trx->dict_operation_lock_mode = 0;
+ trx->free();
+ }
+
+ /* Free virtual column template if any */
+ if (table->vc_templ != NULL) {
+ dict_free_vc_templ(table->vc_templ);
+ UT_DELETE(table->vc_templ);
+ }
+
+ table->autoinc_mutex.~mutex();
+
+ if (keep) {
+ return;
+ }
+
+#ifdef BTR_CUR_HASH_ADAPT
+ if (UNIV_UNLIKELY(UT_LIST_GET_LEN(table->freed_indexes) != 0)) {
+ if (table->fts) {
+ fts_optimize_remove_table(table);
+ fts_free(table);
+ table->fts = NULL;
+ }
+
+ table->vc_templ = NULL;
+ table->id = 0;
+ return;
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ dict_mem_table_free(table);
+}
+
+/****************************************************************//**
+If the given column name is reserved for InnoDB system columns, return
+TRUE.
+@return TRUE if name is reserved */
+ibool
+dict_col_name_is_reserved(
+/*======================*/
+ const char* name) /*!< in: column name */
+{
+ static const char* reserved_names[] = {
+ "DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR"
+ };
+
+ compile_time_assert(UT_ARR_SIZE(reserved_names) == DATA_N_SYS_COLS);
+
+ for (ulint i = 0; i < UT_ARR_SIZE(reserved_names); i++) {
+ if (innobase_strcasecmp(name, reserved_names[i]) == 0) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/** Adds an index to the dictionary cache, with possible indexing newly
+added column.
+@param[in,out] index index; NOTE! The index memory
+ object is freed in this function!
+@param[in] page_no root page number of the index
+@param[in] add_v virtual columns being added along with ADD INDEX
+@return DB_SUCCESS, or DB_CORRUPTION */
+dberr_t
+dict_index_add_to_cache(
+ dict_index_t*& index,
+ ulint page_no,
+ const dict_add_v_col_t* add_v)
+{
+ dict_index_t* new_index;
+ ulint n_ord;
+ ulint i;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_ad(index->n_def == index->n_fields);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(!dict_index_is_online_ddl(index));
+ ut_ad(!dict_index_is_ibuf(index));
+
+ ut_d(mem_heap_validate(index->heap));
+ ut_a(!dict_index_is_clust(index)
+ || UT_LIST_GET_LEN(index->table->indexes) == 0);
+ ut_ad(dict_index_is_clust(index) || !index->table->no_rollback());
+
+ if (!dict_index_find_cols(index, add_v)) {
+
+ dict_mem_index_free(index);
+ index = NULL;
+ return DB_CORRUPTION;
+ }
+
+ /* Build the cache internal representation of the index,
+ containing also the added system fields */
+
+ if (dict_index_is_clust(index)) {
+ new_index = dict_index_build_internal_clust(index);
+ } else {
+ new_index = (index->type & DICT_FTS)
+ ? dict_index_build_internal_fts(index)
+ : dict_index_build_internal_non_clust(index);
+ new_index->n_core_null_bytes = static_cast<uint8_t>(
+ UT_BITS_IN_BYTES(unsigned(new_index->n_nullable)));
+ }
+
+ /* Set the n_fields value in new_index to the actual defined
+ number of fields in the cache internal representation */
+
+ new_index->n_fields = new_index->n_def;
+ new_index->trx_id = index->trx_id;
+ new_index->set_committed(index->is_committed());
+ new_index->nulls_equal = index->nulls_equal;
+#ifdef MYSQL_INDEX_DISABLE_AHI
+ new_index->disable_ahi = index->disable_ahi;
+#endif
+
+ n_ord = new_index->n_uniq;
+ /* Flag the ordering columns and also set column max_prefix */
+
+ for (i = 0; i < n_ord; i++) {
+ const dict_field_t* field
+ = dict_index_get_nth_field(new_index, i);
+
+ /* Check the column being added in the index for
+ the first time and flag the ordering column. */
+ if (field->col->ord_part == 0 ) {
+ field->col->max_prefix = field->prefix_len;
+ field->col->ord_part = 1;
+ } else if (field->prefix_len == 0) {
+ /* Set the max_prefix for a column to 0 if
+ its prefix length is 0 (for this index)
+ even if it was a part of any other index
+ with some prefix length. */
+ field->col->max_prefix = 0;
+ } else if (field->col->max_prefix != 0
+ && field->prefix_len
+ > field->col->max_prefix) {
+ /* Set the max_prefix value based on the
+ prefix_len. */
+ ut_ad(field->col->is_binary()
+ || field->prefix_len % field->col->mbmaxlen == 0);
+ field->col->max_prefix = field->prefix_len;
+ }
+ ut_ad(field->col->ord_part == 1);
+ }
+
+ new_index->stat_n_diff_key_vals =
+ static_cast<ib_uint64_t*>(mem_heap_zalloc(
+ new_index->heap,
+ dict_index_get_n_unique(new_index)
+ * sizeof(*new_index->stat_n_diff_key_vals)));
+
+ new_index->stat_n_sample_sizes =
+ static_cast<ib_uint64_t*>(mem_heap_zalloc(
+ new_index->heap,
+ dict_index_get_n_unique(new_index)
+ * sizeof(*new_index->stat_n_sample_sizes)));
+
+ new_index->stat_n_non_null_key_vals =
+ static_cast<ib_uint64_t*>(mem_heap_zalloc(
+ new_index->heap,
+ dict_index_get_n_unique(new_index)
+ * sizeof(*new_index->stat_n_non_null_key_vals)));
+
+ new_index->stat_index_size = 1;
+ new_index->stat_n_leaf_pages = 1;
+
+ new_index->stat_defrag_n_pages_freed = 0;
+ new_index->stat_defrag_n_page_split = 0;
+
+ new_index->stat_defrag_sample_next_slot = 0;
+ memset(&new_index->stat_defrag_data_size_sample,
+ 0x0, sizeof(ulint) * STAT_DEFRAG_DATA_SIZE_N_SAMPLE);
+
+ /* Add the new index as the last index for the table */
+
+ UT_LIST_ADD_LAST(new_index->table->indexes, new_index);
+#ifdef BTR_CUR_ADAPT
+ new_index->search_info = btr_search_info_create(new_index->heap);
+#endif /* BTR_CUR_ADAPT */
+
+ new_index->page = unsigned(page_no);
+ rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
+ SYNC_INDEX_TREE);
+
+ new_index->n_core_fields = new_index->n_fields;
+
+ dict_mem_index_free(index);
+ index = new_index;
+ return DB_SUCCESS;
+}
+
+/**********************************************************************//**
+Removes an index from the dictionary cache. */
+static
+void
+dict_index_remove_from_cache_low(
+/*=============================*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_index_t* index, /*!< in, own: index */
+ ibool lru_evict) /*!< in: TRUE if index being evicted
+ to make room in the table LRU list */
+{
+ ut_ad(table && index);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_ad(table->id);
+#ifdef BTR_CUR_HASH_ADAPT
+ ut_ad(!index->freed());
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ /* No need to acquire the dict_index_t::lock here because
+ there can't be any active operations on this index (or table). */
+
+ if (index->online_log) {
+ ut_ad(index->online_status == ONLINE_INDEX_CREATION);
+ row_log_free(index->online_log);
+ index->online_log = NULL;
+ }
+
+ /* Remove the index from the list of indexes of the table */
+ UT_LIST_REMOVE(table->indexes, index);
+
+ /* The index is being dropped, remove any compression stats for it. */
+ if (!lru_evict && DICT_TF_GET_ZIP_SSIZE(index->table->flags)) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index.erase(index->id);
+ mutex_exit(&page_zip_stat_per_index_mutex);
+ }
+
+ /* Remove the index from affected virtual column index list */
+ index->detach_columns();
+
+#ifdef BTR_CUR_HASH_ADAPT
+ /* We always create search info whether or not adaptive
+ hash index is enabled or not. */
+ /* We are not allowed to free the in-memory index struct
+ dict_index_t until all entries in the adaptive hash index
+ that point to any of the page belonging to his b-tree index
+ are dropped. This is so because dropping of these entries
+ require access to dict_index_t struct. To avoid such scenario
+ We keep a count of number of such pages in the search_info and
+ only free the dict_index_t struct when this count drops to
+ zero. See also: dict_table_can_be_evicted() */
+
+ if (index->n_ahi_pages()) {
+ index->set_freed();
+ UT_LIST_ADD_LAST(table->freed_indexes, index);
+ return;
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ rw_lock_free(&index->lock);
+
+ dict_mem_index_free(index);
+}
+
+/**********************************************************************//**
+Removes an index from the dictionary cache. */
+void
+dict_index_remove_from_cache(
+/*=========================*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_index_t* index) /*!< in, own: index */
+{
+ dict_index_remove_from_cache_low(table, index, FALSE);
+}
+
+/** Tries to find column names for the index and sets the col field of the
+index.
+@param[in] table table
+@param[in,out] index index
+@param[in] add_v new virtual columns added along with an add index call
+@return whether the column names were found */
+static
+bool
+dict_index_find_cols(
+ dict_index_t* index,
+ const dict_add_v_col_t* add_v)
+{
+ std::vector<ulint, ut_allocator<ulint> > col_added;
+ std::vector<ulint, ut_allocator<ulint> > v_col_added;
+
+ const dict_table_t* table = index->table;
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ for (ulint i = 0; i < index->n_fields; i++) {
+ ulint j;
+ dict_field_t* field = dict_index_get_nth_field(index, i);
+
+ for (j = 0; j < table->n_cols; j++) {
+ if (!innobase_strcasecmp(dict_table_get_col_name(table, j),
+ field->name)) {
+
+ /* Check if same column is being assigned again
+ which suggest that column has duplicate name. */
+ bool exists =
+ std::find(col_added.begin(),
+ col_added.end(), j)
+ != col_added.end();
+
+ if (exists) {
+ /* Duplicate column found. */
+ goto dup_err;
+ }
+
+ field->col = dict_table_get_nth_col(table, j);
+
+ col_added.push_back(j);
+
+ goto found;
+ }
+ }
+
+ /* Let's check if it is a virtual column */
+ for (j = 0; j < table->n_v_cols; j++) {
+ if (!strcmp(dict_table_get_v_col_name(table, j),
+ field->name)) {
+
+ /* Check if same column is being assigned again
+ which suggest that column has duplicate name. */
+ bool exists =
+ std::find(v_col_added.begin(),
+ v_col_added.end(), j)
+ != v_col_added.end();
+
+ if (exists) {
+ /* Duplicate column found. */
+ break;
+ }
+
+ field->col = reinterpret_cast<dict_col_t*>(
+ dict_table_get_nth_v_col(table, j));
+
+ v_col_added.push_back(j);
+
+ goto found;
+ }
+ }
+
+ if (add_v) {
+ for (j = 0; j < add_v->n_v_col; j++) {
+ if (!strcmp(add_v->v_col_name[j],
+ field->name)) {
+ field->col = const_cast<dict_col_t*>(
+ &add_v->v_col[j].m_col);
+ goto found;
+ }
+ }
+ }
+
+dup_err:
+#ifdef UNIV_DEBUG
+ /* It is an error not to find a matching column. */
+ ib::error() << "No matching column for " << field->name
+ << " in index " << index->name
+ << " of table " << table->name;
+#endif /* UNIV_DEBUG */
+ return(FALSE);
+
+found:
+ ;
+ }
+
+ return(TRUE);
+}
+
+/*******************************************************************//**
+Adds a column to index. */
+void
+dict_index_add_col(
+/*===============*/
+ dict_index_t* index, /*!< in/out: index */
+ const dict_table_t* table, /*!< in: table */
+ dict_col_t* col, /*!< in: column */
+ ulint prefix_len) /*!< in: column prefix length */
+{
+ dict_field_t* field;
+ const char* col_name;
+
+ if (col->is_virtual()) {
+ dict_v_col_t* v_col = reinterpret_cast<dict_v_col_t*>(col);
+ /* Register the index with the virtual column index list */
+ v_col->v_indexes.push_front(dict_v_idx_t(index, index->n_def));
+ col_name = dict_table_get_v_col_name_mysql(
+ table, dict_col_get_no(col));
+ } else {
+ col_name = dict_table_get_col_name(table, dict_col_get_no(col));
+ }
+
+ dict_mem_index_add_field(index, col_name, prefix_len);
+
+ field = dict_index_get_nth_field(index, unsigned(index->n_def) - 1);
+
+ field->col = col;
+ field->fixed_len = static_cast<uint16_t>(
+ dict_col_get_fixed_size(
+ col, dict_table_is_comp(table)))
+ & ((1U << 10) - 1);
+
+ if (prefix_len && field->fixed_len > prefix_len) {
+ field->fixed_len = static_cast<uint16_t>(prefix_len)
+ & ((1U << 10) - 1);
+ }
+
+ /* Long fixed-length fields that need external storage are treated as
+ variable-length fields, so that the extern flag can be embedded in
+ the length word. */
+
+ if (field->fixed_len > DICT_MAX_FIXED_COL_LEN) {
+ field->fixed_len = 0;
+ }
+
+ /* The comparison limit above must be constant. If it were
+ changed, the disk format of some fixed-length columns would
+ change, which would be a disaster. */
+ compile_time_assert(DICT_MAX_FIXED_COL_LEN == 768);
+
+ if (!(col->prtype & DATA_NOT_NULL)) {
+ index->n_nullable++;
+ }
+}
+
+/*******************************************************************//**
+Copies fields contained in index2 to index1. */
+static
+void
+dict_index_copy(
+/*============*/
+ dict_index_t* index1, /*!< in: index to copy to */
+ const dict_index_t* index2, /*!< in: index to copy from */
+ ulint start, /*!< in: first position to copy */
+ ulint end) /*!< in: last position to copy */
+{
+ dict_field_t* field;
+ ulint i;
+
+ /* Copy fields contained in index2 */
+
+ for (i = start; i < end; i++) {
+
+ field = dict_index_get_nth_field(index2, i);
+
+ dict_index_add_col(index1, index2->table, field->col,
+ field->prefix_len);
+ }
+}
+
+/*******************************************************************//**
+Copies types of fields contained in index to tuple. */
+void
+dict_index_copy_types(
+/*==================*/
+ dtuple_t* tuple, /*!< in/out: data tuple */
+ const dict_index_t* index, /*!< in: index */
+ ulint n_fields) /*!< in: number of
+ field types to copy */
+{
+ ulint i;
+
+ if (dict_index_is_ibuf(index)) {
+ dtuple_set_types_binary(tuple, n_fields);
+
+ return;
+ }
+
+ for (i = 0; i < n_fields; i++) {
+ const dict_field_t* ifield;
+ dtype_t* dfield_type;
+
+ ifield = dict_index_get_nth_field(index, i);
+ dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
+ dict_col_copy_type(dict_field_get_col(ifield), dfield_type);
+ if (dict_index_is_spatial(index)
+ && DATA_GEOMETRY_MTYPE(dfield_type->mtype)) {
+ dfield_type->prtype |= DATA_GIS_MBR;
+ }
+ }
+}
+
+/** Copies types of virtual columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value. This function should
+be called right after dtuple_create().
+@param[in,out] tuple data tuple
+@param[in] table table
+*/
+void
+dict_table_copy_v_types(
+ dtuple_t* tuple,
+ const dict_table_t* table)
+{
+ /* tuple could have more virtual columns than existing table,
+ if we are calling this for creating index along with adding
+ virtual columns */
+ ulint n_fields = ut_min(dtuple_get_n_v_fields(tuple),
+ static_cast<ulint>(table->n_v_def));
+
+ for (ulint i = 0; i < n_fields; i++) {
+
+ dfield_t* dfield = dtuple_get_nth_v_field(tuple, i);
+ dtype_t* dtype = dfield_get_type(dfield);
+
+ dfield_set_null(dfield);
+ dict_col_copy_type(
+ &(dict_table_get_nth_v_col(table, i)->m_col),
+ dtype);
+ }
+}
+/*******************************************************************//**
+Copies types of columns contained in table to tuple and sets all
+fields of the tuple to the SQL NULL value. This function should
+be called right after dtuple_create(). */
+void
+dict_table_copy_types(
+/*==================*/
+ dtuple_t* tuple, /*!< in/out: data tuple */
+ const dict_table_t* table) /*!< in: table */
+{
+ ulint i;
+
+ for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
+
+ dfield_t* dfield = dtuple_get_nth_field(tuple, i);
+ dtype_t* dtype = dfield_get_type(dfield);
+
+ dfield_set_null(dfield);
+ dict_col_copy_type(dict_table_get_nth_col(table, i), dtype);
+ }
+
+ dict_table_copy_v_types(tuple, table);
+}
+
+/*******************************************************************//**
+Builds the internal dictionary cache representation for a clustered
+index, containing also system fields not defined by the user.
+@return own: the internal representation of the clustered index */
+static
+dict_index_t*
+dict_index_build_internal_clust(
+/*============================*/
+ dict_index_t* index) /*!< in: user representation of
+ a clustered index */
+{
+ dict_table_t* table = index->table;
+ dict_index_t* new_index;
+ dict_field_t* field;
+ ulint trx_id_pos;
+ ulint i;
+ ibool* indexed;
+
+ ut_ad(index->is_primary());
+ ut_ad(!index->has_virtual());
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ /* Create a new index object with certainly enough fields */
+ new_index = dict_mem_index_create(index->table, index->name,
+ index->type,
+ unsigned(index->n_fields
+ + table->n_cols));
+
+ /* Copy other relevant data from the old index struct to the new
+ struct: it inherits the values */
+
+ new_index->n_user_defined_cols = index->n_fields;
+
+ new_index->id = index->id;
+
+ /* Copy the fields of index */
+ dict_index_copy(new_index, index, 0, index->n_fields);
+
+ if (dict_index_is_unique(index)) {
+ /* Only the fields defined so far are needed to identify
+ the index entry uniquely */
+
+ new_index->n_uniq = new_index->n_def;
+ } else {
+ /* Also the row id is needed to identify the entry */
+ new_index->n_uniq = unsigned(new_index->n_def + 1)
+ & dict_index_t::MAX_N_FIELDS;
+ }
+
+ new_index->trx_id_offset = 0;
+
+ /* Add system columns, trx id first */
+
+ trx_id_pos = new_index->n_def;
+
+ compile_time_assert(DATA_ROW_ID == 0);
+ compile_time_assert(DATA_TRX_ID == 1);
+ compile_time_assert(DATA_ROLL_PTR == 2);
+
+ if (!dict_index_is_unique(index)) {
+ dict_index_add_col(new_index, table,
+ dict_table_get_sys_col(
+ table, DATA_ROW_ID),
+ 0);
+ trx_id_pos++;
+ }
+
+ dict_index_add_col(
+ new_index, table,
+ dict_table_get_sys_col(table, DATA_TRX_ID), 0);
+
+ for (i = 0; i < trx_id_pos; i++) {
+
+ ulint fixed_size = dict_col_get_fixed_size(
+ dict_index_get_nth_col(new_index, i),
+ dict_table_is_comp(table));
+
+ if (fixed_size == 0) {
+ new_index->trx_id_offset = 0;
+
+ break;
+ }
+
+ dict_field_t* field = dict_index_get_nth_field(
+ new_index, i);
+ if (field->prefix_len > 0) {
+ new_index->trx_id_offset = 0;
+
+ break;
+ }
+
+ /* Add fixed_size to new_index->trx_id_offset.
+ Because the latter is a bit-field, an overflow
+ can theoretically occur. Check for it. */
+ fixed_size += new_index->trx_id_offset;
+
+ new_index->trx_id_offset = static_cast<unsigned>(fixed_size)
+ & ((1U << 12) - 1);
+
+ if (new_index->trx_id_offset != fixed_size) {
+ /* Overflow. Pretend that this is a
+ variable-length PRIMARY KEY. */
+ ut_ad(0);
+ new_index->trx_id_offset = 0;
+ break;
+ }
+ }
+
+ dict_index_add_col(
+ new_index, table,
+ dict_table_get_sys_col(table, DATA_ROLL_PTR), 0);
+
+ /* Remember the table columns already contained in new_index */
+ indexed = static_cast<ibool*>(
+ ut_zalloc_nokey(table->n_cols * sizeof *indexed));
+
+ /* Mark the table columns already contained in new_index */
+ for (i = 0; i < new_index->n_def; i++) {
+
+ field = dict_index_get_nth_field(new_index, i);
+
+ /* If there is only a prefix of the column in the index
+ field, do not mark the column as contained in the index */
+
+ if (field->prefix_len == 0) {
+
+ indexed[field->col->ind] = TRUE;
+ }
+ }
+
+ /* Add to new_index non-system columns of table not yet included
+ there */
+ for (i = 0; i + DATA_N_SYS_COLS < ulint(table->n_cols); i++) {
+ dict_col_t* col = dict_table_get_nth_col(table, i);
+ ut_ad(col->mtype != DATA_SYS);
+
+ if (!indexed[col->ind]) {
+ dict_index_add_col(new_index, table, col, 0);
+ }
+ }
+
+ ut_free(indexed);
+
+ ut_ad(UT_LIST_GET_LEN(table->indexes) == 0);
+
+ new_index->n_core_null_bytes = table->supports_instant()
+ ? dict_index_t::NO_CORE_NULL_BYTES
+ : static_cast<uint8_t>(
+ UT_BITS_IN_BYTES(unsigned(new_index->n_nullable)));
+ new_index->cached = TRUE;
+
+ return(new_index);
+}
+
+/*******************************************************************//**
+Builds the internal dictionary cache representation for a non-clustered
+index, containing also system fields not defined by the user.
+@return own: the internal representation of the non-clustered index */
+static
+dict_index_t*
+dict_index_build_internal_non_clust(
+/*================================*/
+ dict_index_t* index) /*!< in: user representation of
+ a non-clustered index */
+{
+ dict_field_t* field;
+ dict_index_t* new_index;
+ dict_index_t* clust_index;
+ dict_table_t* table = index->table;
+ ulint i;
+ ibool* indexed;
+
+ ut_ad(table && index);
+ ut_ad(!dict_index_is_clust(index));
+ ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ /* The clustered index should be the first in the list of indexes */
+ clust_index = UT_LIST_GET_FIRST(table->indexes);
+
+ ut_ad(clust_index);
+ ut_ad(dict_index_is_clust(clust_index));
+ ut_ad(!dict_index_is_ibuf(clust_index));
+
+ /* Create a new index */
+ new_index = dict_mem_index_create(
+ index->table, index->name, index->type,
+ ulint(index->n_fields + 1 + clust_index->n_uniq));
+
+ /* Copy other relevant data from the old index
+ struct to the new struct: it inherits the values */
+
+ new_index->n_user_defined_cols = index->n_fields;
+
+ new_index->id = index->id;
+
+ /* Copy fields from index to new_index */
+ dict_index_copy(new_index, index, 0, index->n_fields);
+
+ /* Remember the table columns already contained in new_index */
+ indexed = static_cast<ibool*>(
+ ut_zalloc_nokey(table->n_cols * sizeof *indexed));
+
+ /* Mark the table columns already contained in new_index */
+ for (i = 0; i < new_index->n_def; i++) {
+
+ field = dict_index_get_nth_field(new_index, i);
+
+ if (field->col->is_virtual()) {
+ continue;
+ }
+
+ /* If there is only a prefix of the column in the index
+ field, do not mark the column as contained in the index */
+
+ if (field->prefix_len == 0) {
+
+ indexed[field->col->ind] = TRUE;
+ }
+ }
+
+ /* Add to new_index the columns necessary to determine the clustered
+ index entry uniquely */
+
+ for (i = 0; i < clust_index->n_uniq; i++) {
+
+ field = dict_index_get_nth_field(clust_index, i);
+
+ if (!indexed[field->col->ind]) {
+ dict_index_add_col(new_index, table, field->col,
+ field->prefix_len);
+ } else if (dict_index_is_spatial(index)) {
+ /*For spatial index, we still need to add the
+ field to index. */
+ dict_index_add_col(new_index, table, field->col,
+ field->prefix_len);
+ }
+ }
+
+ ut_free(indexed);
+
+ if (dict_index_is_unique(index)) {
+ new_index->n_uniq = index->n_fields;
+ } else {
+ new_index->n_uniq = new_index->n_def;
+ }
+
+ /* Set the n_fields value in new_index to the actual defined
+ number of fields */
+
+ new_index->n_fields = new_index->n_def;
+
+ new_index->cached = TRUE;
+
+ return(new_index);
+}
+
+/***********************************************************************
+Builds the internal dictionary cache representation for an FTS index.
+@return own: the internal representation of the FTS index */
+static
+dict_index_t*
+dict_index_build_internal_fts(
+/*==========================*/
+ dict_index_t* index) /*!< in: user representation of an FTS index */
+{
+ dict_index_t* new_index;
+
+ ut_ad(index->type == DICT_FTS);
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ /* Create a new index */
+ new_index = dict_mem_index_create(index->table, index->name,
+ index->type, index->n_fields);
+
+ /* Copy other relevant data from the old index struct to the new
+ struct: it inherits the values */
+
+ new_index->n_user_defined_cols = index->n_fields;
+
+ new_index->id = index->id;
+
+ /* Copy fields from index to new_index */
+ dict_index_copy(new_index, index, 0, index->n_fields);
+
+ new_index->n_uniq = 0;
+ new_index->cached = TRUE;
+
+ dict_table_t* table = index->table;
+
+ if (table->fts->cache == NULL) {
+ table->fts->cache = fts_cache_create(table);
+ }
+
+ rw_lock_x_lock(&table->fts->cache->init_lock);
+ /* Notify the FTS cache about this index. */
+ fts_cache_index_cache_create(table, new_index);
+ rw_lock_x_unlock(&table->fts->cache->init_lock);
+
+ return(new_index);
+}
+/*====================== FOREIGN KEY PROCESSING ========================*/
+
+/*********************************************************************//**
+Checks if a table is referenced by foreign keys.
+@return TRUE if table is referenced by a foreign key */
+ibool
+dict_table_is_referenced_by_foreign_key(
+/*====================================*/
+ const dict_table_t* table) /*!< in: InnoDB table */
+{
+ return(!table->referenced_set.empty());
+}
+
+/**********************************************************************//**
+Removes a foreign constraint struct from the dictionary cache. */
+void
+dict_foreign_remove_from_cache(
+/*===========================*/
+ dict_foreign_t* foreign) /*!< in, own: foreign constraint */
+{
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_a(foreign);
+
+ if (foreign->referenced_table != NULL) {
+ foreign->referenced_table->referenced_set.erase(foreign);
+ }
+
+ if (foreign->foreign_table != NULL) {
+ foreign->foreign_table->foreign_set.erase(foreign);
+ }
+
+ dict_foreign_free(foreign);
+}
+
+/**********************************************************************//**
+Looks for the foreign constraint from the foreign and referenced lists
+of a table.
+@return foreign constraint */
+static
+dict_foreign_t*
+dict_foreign_find(
+/*==============*/
+ dict_table_t* table, /*!< in: table object */
+ dict_foreign_t* foreign) /*!< in: foreign constraint */
+{
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ ut_ad(dict_foreign_set_validate(table->foreign_set));
+ ut_ad(dict_foreign_set_validate(table->referenced_set));
+
+ dict_foreign_set::iterator it = table->foreign_set.find(foreign);
+
+ if (it != table->foreign_set.end()) {
+ return(*it);
+ }
+
+ it = table->referenced_set.find(foreign);
+
+ if (it != table->referenced_set.end()) {
+ return(*it);
+ }
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Tries to find an index whose first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return matching index, NULL if not found */
+dict_index_t*
+dict_foreign_find_index(
+/*====================*/
+ const dict_table_t* table, /*!< in: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ const dict_index_t* types_idx,
+ /*!< in: NULL or an index
+ whose types the column types
+ must match */
+ bool check_charsets,
+ /*!< in: whether to check
+ charsets. only has an effect
+ if types_idx != NULL */
+ ulint check_null,
+ /*!< in: nonzero if none of
+ the columns must be declared
+ NOT NULL */
+ fkerr_t* error, /*!< out: error code */
+ ulint* err_col_no,
+ /*!< out: column number where
+ error happened */
+ dict_index_t** err_index)
+ /*!< out: index where error
+ happened */
+{
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ if (error) {
+ *error = FK_INDEX_NOT_FOUND;
+ }
+
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index;
+ index = dict_table_get_next_index(index)) {
+ if (types_idx != index
+ && !index->to_be_dropped
+ && !dict_index_is_online_ddl(index)
+ && dict_foreign_qualify_index(
+ table, col_names, columns, n_cols,
+ index, types_idx,
+ check_charsets, check_null,
+ error, err_col_no, err_index)) {
+ if (error) {
+ *error = FK_SUCCESS;
+ }
+
+ return(index);
+ }
+ }
+
+ return(NULL);
+}
+/**********************************************************************//**
+Report an error in a foreign key definition. */
+static
+void
+dict_foreign_error_report_low(
+/*==========================*/
+ FILE* file, /*!< in: output stream */
+ const char* name) /*!< in: table name */
+{
+ rewind(file);
+ ut_print_timestamp(file);
+ fprintf(file, " Error in foreign key constraint of table %s:\n",
+ name);
+}
+
+/**********************************************************************//**
+Report an error in a foreign key definition. */
+static
+void
+dict_foreign_error_report(
+/*======================*/
+ FILE* file, /*!< in: output stream */
+ dict_foreign_t* fk, /*!< in: foreign key constraint */
+ const char* msg) /*!< in: the error message */
+{
+ std::string fk_str;
+ mutex_enter(&dict_foreign_err_mutex);
+ dict_foreign_error_report_low(file, fk->foreign_table_name);
+ fputs(msg, file);
+ fputs(" Constraint:\n", file);
+ fk_str = dict_print_info_on_foreign_key_in_create_format(NULL, fk, TRUE);
+ fputs(fk_str.c_str(), file);
+ putc('\n', file);
+ if (fk->foreign_index) {
+ fprintf(file, "The index in the foreign key in table is"
+ " %s\n%s\n", fk->foreign_index->name(),
+ FOREIGN_KEY_CONSTRAINTS_MSG);
+ }
+ mutex_exit(&dict_foreign_err_mutex);
+}
+
+/**********************************************************************//**
+Adds a foreign key constraint object to the dictionary cache. May free
+the object if there already is an object with the same identifier in.
+At least one of the foreign table and the referenced table must already
+be in the dictionary cache!
+@return DB_SUCCESS or error code */
+dberr_t
+dict_foreign_add_to_cache(
+/*======================*/
+ dict_foreign_t* foreign,
+ /*!< in, own: foreign key constraint */
+ const char** col_names,
+ /*!< in: column names, or NULL to use
+ foreign->foreign_table->col_names */
+ bool check_charsets,
+ /*!< in: whether to check charset
+ compatibility */
+ dict_err_ignore_t ignore_err)
+ /*!< in: error to be ignored */
+{
+ dict_table_t* for_table;
+ dict_table_t* ref_table;
+ dict_foreign_t* for_in_cache = NULL;
+ dict_index_t* index;
+ ibool added_to_referenced_list= FALSE;
+ FILE* ef = dict_foreign_err_file;
+
+ DBUG_ENTER("dict_foreign_add_to_cache");
+ DBUG_PRINT("dict_foreign_add_to_cache", ("id: %s", foreign->id));
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ for_table = dict_table_check_if_in_cache_low(
+ foreign->foreign_table_name_lookup);
+
+ ref_table = dict_table_check_if_in_cache_low(
+ foreign->referenced_table_name_lookup);
+ ut_a(for_table || ref_table);
+
+ if (for_table) {
+ for_in_cache = dict_foreign_find(for_table, foreign);
+ }
+
+ if (!for_in_cache && ref_table) {
+ for_in_cache = dict_foreign_find(ref_table, foreign);
+ }
+
+ if (for_in_cache) {
+ dict_foreign_free(foreign);
+ } else {
+ for_in_cache = foreign;
+
+ }
+
+ if (ref_table && !for_in_cache->referenced_table) {
+ index = dict_foreign_find_index(
+ ref_table, NULL,
+ for_in_cache->referenced_col_names,
+ for_in_cache->n_fields, for_in_cache->foreign_index,
+ check_charsets, false);
+
+ if (index == NULL
+ && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) {
+ dict_foreign_error_report(
+ ef, for_in_cache,
+ "there is no index in referenced table"
+ " which would contain\n"
+ "the columns as the first columns,"
+ " or the data types in the\n"
+ "referenced table do not match"
+ " the ones in table.");
+
+ if (for_in_cache == foreign) {
+ dict_foreign_free(foreign);
+ }
+
+ DBUG_RETURN(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ for_in_cache->referenced_table = ref_table;
+ for_in_cache->referenced_index = index;
+
+ std::pair<dict_foreign_set::iterator, bool> ret
+ = ref_table->referenced_set.insert(for_in_cache);
+
+ ut_a(ret.second); /* second is true if the insertion
+ took place */
+ added_to_referenced_list = TRUE;
+ }
+
+ if (for_table && !for_in_cache->foreign_table) {
+ index = dict_foreign_find_index(
+ for_table, col_names,
+ for_in_cache->foreign_col_names,
+ for_in_cache->n_fields,
+ for_in_cache->referenced_index, check_charsets,
+ for_in_cache->type
+ & (DICT_FOREIGN_ON_DELETE_SET_NULL
+ | DICT_FOREIGN_ON_UPDATE_SET_NULL));
+
+ if (index == NULL
+ && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) {
+ dict_foreign_error_report(
+ ef, for_in_cache,
+ "there is no index in the table"
+ " which would contain\n"
+ "the columns as the first columns,"
+ " or the data types in the\n"
+ "table do not match"
+ " the ones in the referenced table\n"
+ "or one of the ON ... SET NULL columns"
+ " is declared NOT NULL.");
+
+ if (for_in_cache == foreign) {
+ if (added_to_referenced_list) {
+ const dict_foreign_set::size_type
+ n = ref_table->referenced_set
+ .erase(for_in_cache);
+
+ ut_a(n == 1); /* the number of
+ elements removed must
+ be one */
+ }
+
+ dict_foreign_free(foreign);
+ }
+
+ DBUG_RETURN(DB_CANNOT_ADD_CONSTRAINT);
+ }
+
+ for_in_cache->foreign_table = for_table;
+ for_in_cache->foreign_index = index;
+
+ std::pair<dict_foreign_set::iterator, bool> ret
+ = for_table->foreign_set.insert(for_in_cache);
+
+ ut_a(ret.second); /* second is true if the insertion
+ took place */
+ }
+
+ /* We need to move the table to the non-LRU end of the table LRU
+ list. Otherwise it will be evicted from the cache. */
+
+ if (ref_table != NULL) {
+ dict_sys.prevent_eviction(ref_table);
+ }
+
+ if (for_table != NULL) {
+ dict_sys.prevent_eviction(for_table);
+ }
+
+ ut_ad(dict_lru_validate());
+ DBUG_RETURN(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Scans from pointer onwards. Stops if is at the start of a copy of
+'string' where characters are compared without case sensitivity, and
+only outside `` or "" quotes. Stops also at NUL.
+@return scanned up to this */
+static
+const char*
+dict_scan_to(
+/*=========*/
+ const char* ptr, /*!< in: scan from */
+ const char* string) /*!< in: look for this */
+{
+ char quote = '\0';
+ bool escape = false;
+
+ for (; *ptr; ptr++) {
+ if (*ptr == quote) {
+ /* Closing quote character: do not look for
+ starting quote or the keyword. */
+
+ /* If the quote character is escaped by a
+ backslash, ignore it. */
+ if (escape) {
+ escape = false;
+ } else {
+ quote = '\0';
+ }
+ } else if (quote) {
+ /* Within quotes: do nothing. */
+ if (escape) {
+ escape = false;
+ } else if (*ptr == '\\') {
+ escape = true;
+ }
+ } else if (*ptr == '`' || *ptr == '"' || *ptr == '\'') {
+ /* Starting quote: remember the quote character. */
+ quote = *ptr;
+ } else {
+ /* Outside quotes: look for the keyword. */
+ ulint i;
+ for (i = 0; string[i]; i++) {
+ if (toupper((int)(unsigned char)(ptr[i]))
+ != toupper((int)(unsigned char)
+ (string[i]))) {
+ goto nomatch;
+ }
+ }
+ break;
+nomatch:
+ ;
+ }
+ }
+
+ return(ptr);
+}
+
+/*********************************************************************//**
+Accepts a specified string. Comparisons are case-insensitive.
+@return if string was accepted, the pointer is moved after that, else
+ptr is returned */
+static
+const char*
+dict_accept(
+/*========*/
+ CHARSET_INFO* cs, /*!< in: the character set of ptr */
+ const char* ptr, /*!< in: scan from this */
+ const char* string, /*!< in: accept only this string as the next
+ non-whitespace string */
+ ibool* success)/*!< out: TRUE if accepted */
+{
+ const char* old_ptr = ptr;
+ const char* old_ptr2;
+
+ *success = FALSE;
+
+ while (my_isspace(cs, *ptr)) {
+ ptr++;
+ }
+
+ old_ptr2 = ptr;
+
+ ptr = dict_scan_to(ptr, string);
+
+ if (*ptr == '\0' || old_ptr2 != ptr) {
+ return(old_ptr);
+ }
+
+ *success = TRUE;
+
+ return ptr + strlen(string);
+}
+
+/*********************************************************************//**
+Scans an id. For the lexical definition of an 'id', see the code below.
+Strips backquotes or double quotes from around the id.
+@return scanned to */
+static
+const char*
+dict_scan_id(
+/*=========*/
+ CHARSET_INFO* cs, /*!< in: the character set of ptr */
+ const char* ptr, /*!< in: scanned to */
+ mem_heap_t* heap, /*!< in: heap where to allocate the id
+ (NULL=id will not be allocated, but it
+ will point to string near ptr) */
+ const char** id, /*!< out,own: the id; NULL if no id was
+ scannable */
+ ibool table_id,/*!< in: TRUE=convert the allocated id
+ as a table name; FALSE=convert to UTF-8 */
+ ibool accept_also_dot)
+ /*!< in: TRUE if also a dot can appear in a
+ non-quoted id; in a quoted id it can appear
+ always */
+{
+ char quote = '\0';
+ ulint len = 0;
+ const char* s;
+ char* str;
+ char* dst;
+
+ *id = NULL;
+
+ while (my_isspace(cs, *ptr)) {
+ ptr++;
+ }
+
+ if (*ptr == '\0') {
+
+ return(ptr);
+ }
+
+ if (*ptr == '`' || *ptr == '"') {
+ quote = *ptr++;
+ }
+
+ s = ptr;
+
+ if (quote) {
+ for (;;) {
+ if (!*ptr) {
+ /* Syntax error */
+ return(ptr);
+ }
+ if (*ptr == quote) {
+ ptr++;
+ if (*ptr != quote) {
+ break;
+ }
+ }
+ ptr++;
+ len++;
+ }
+ } else {
+ while (!my_isspace(cs, *ptr) && *ptr != '(' && *ptr != ')'
+ && (accept_also_dot || *ptr != '.')
+ && *ptr != ',' && *ptr != '\0') {
+
+ ptr++;
+ }
+
+ len = ulint(ptr - s);
+ }
+
+ if (heap == NULL) {
+ /* no heap given: id will point to source string */
+ *id = s;
+ return(ptr);
+ }
+
+ if (quote) {
+ char* d;
+
+ str = d = static_cast<char*>(
+ mem_heap_alloc(heap, len + 1));
+
+ while (len--) {
+ if ((*d++ = *s++) == quote) {
+ s++;
+ }
+ }
+ *d++ = 0;
+ len = ulint(d - str);
+ ut_ad(*s == quote);
+ ut_ad(s + 1 == ptr);
+ } else {
+ str = mem_heap_strdupl(heap, s, len);
+ }
+
+ if (!table_id) {
+convert_id:
+ /* Convert the identifier from connection character set
+ to UTF-8. */
+ len = 3 * len + 1;
+ *id = dst = static_cast<char*>(mem_heap_alloc(heap, len));
+
+ innobase_convert_from_id(cs, dst, str, len);
+ } else if (!strncmp(str, srv_mysql50_table_name_prefix,
+ sizeof(srv_mysql50_table_name_prefix) - 1)) {
+ /* This is a pre-5.1 table name
+ containing chars other than [A-Za-z0-9].
+ Discard the prefix and use raw UTF-8 encoding. */
+ str += sizeof(srv_mysql50_table_name_prefix) - 1;
+ len -= sizeof(srv_mysql50_table_name_prefix) - 1;
+ goto convert_id;
+ } else {
+ /* Encode using filename-safe characters. */
+ len = 5 * len + 1;
+ *id = dst = static_cast<char*>(mem_heap_alloc(heap, len));
+
+ innobase_convert_from_table_id(cs, dst, str, len);
+ }
+
+ return(ptr);
+}
+
+/*********************************************************************//**
+Open a table from its database and table name, this is currently used by
+foreign constraint parser to get the referenced table.
+@return complete table name with database and table name, allocated from
+heap memory passed in */
+char*
+dict_get_referenced_table(
+ const char* name, /*!< in: foreign key table name */
+ const char* database_name, /*!< in: table db name */
+ ulint database_name_len, /*!< in: db name length */
+ const char* table_name, /*!< in: table name */
+ ulint table_name_len, /*!< in: table name length */
+ dict_table_t** table, /*!< out: table object or NULL */
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ CHARSET_INFO* from_cs) /*!< in: table name charset */
+{
+ char* ref;
+ char db_name[MAX_DATABASE_NAME_LEN];
+ char tbl_name[MAX_TABLE_NAME_LEN];
+ CHARSET_INFO* to_cs = &my_charset_filename;
+ uint errors;
+ ut_ad(database_name || name);
+ ut_ad(table_name);
+
+ if (!strncmp(table_name, srv_mysql50_table_name_prefix,
+ sizeof(srv_mysql50_table_name_prefix) - 1)) {
+ /* This is a pre-5.1 table name
+ containing chars other than [A-Za-z0-9].
+ Discard the prefix and use raw UTF-8 encoding. */
+ table_name += sizeof(srv_mysql50_table_name_prefix) - 1;
+ table_name_len -= sizeof(srv_mysql50_table_name_prefix) - 1;
+
+ to_cs = system_charset_info;
+ }
+
+ table_name_len = strconvert(from_cs, table_name, table_name_len, to_cs,
+ tbl_name, MAX_TABLE_NAME_LEN, &errors);
+ table_name = tbl_name;
+
+ if (database_name) {
+ to_cs = &my_charset_filename;
+ if (!strncmp(database_name, srv_mysql50_table_name_prefix,
+ sizeof(srv_mysql50_table_name_prefix) - 1)) {
+ database_name
+ += sizeof(srv_mysql50_table_name_prefix) - 1;
+ database_name_len
+ -= sizeof(srv_mysql50_table_name_prefix) - 1;
+ to_cs = system_charset_info;
+ }
+
+ database_name_len = strconvert(
+ from_cs, database_name, database_name_len, to_cs,
+ db_name, MAX_DATABASE_NAME_LEN, &errors);
+ database_name = db_name;
+ } else {
+ /* Use the database name of the foreign key table */
+
+ database_name = name;
+ database_name_len = dict_get_db_name_len(name);
+ }
+
+ /* Copy database_name, '/', table_name, '\0' */
+ ref = static_cast<char*>(mem_heap_alloc(
+ heap, database_name_len + table_name_len + 2));
+ memcpy(ref, database_name, database_name_len);
+ ref[database_name_len] = '/';
+ memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
+
+ /* Values; 0 = Store and compare as given; case sensitive
+ 1 = Store and compare in lower; case insensitive
+ 2 = Store as given, compare in lower; case semi-sensitive */
+ if (innobase_get_lower_case_table_names() == 2) {
+ innobase_casedn_str(ref);
+ *table = dict_table_get_low(ref);
+ memcpy(ref, database_name, database_name_len);
+ ref[database_name_len] = '/';
+ memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
+
+ } else {
+#ifndef _WIN32
+ if (innobase_get_lower_case_table_names() == 1) {
+ innobase_casedn_str(ref);
+ }
+#else
+ innobase_casedn_str(ref);
+#endif /* !_WIN32 */
+ *table = dict_table_get_low(ref);
+ }
+
+ return(ref);
+}
+
+/*********************************************************************//**
+Removes MySQL comments from an SQL string. A comment is either
+(a) '#' to the end of the line,
+(b) '--[space]' to the end of the line, or
+(c) '[slash][asterisk]' till the next '[asterisk][slash]' (like the familiar
+C comment syntax).
+@return own: SQL string stripped from comments; the caller must free
+this with ut_free()! */
+static
+char*
+dict_strip_comments(
+/*================*/
+ const char* sql_string, /*!< in: SQL string */
+ size_t sql_length) /*!< in: length of sql_string */
+{
+ char* str;
+ const char* sptr;
+ const char* eptr = sql_string + sql_length;
+ char* ptr;
+ /* unclosed quote character (0 if none) */
+ char quote = 0;
+ bool escape = false;
+
+ DBUG_ENTER("dict_strip_comments");
+
+ DBUG_PRINT("dict_strip_comments", ("%s", sql_string));
+
+ str = static_cast<char*>(ut_malloc_nokey(sql_length + 1));
+
+ sptr = sql_string;
+ ptr = str;
+
+ for (;;) {
+scan_more:
+ if (sptr >= eptr || *sptr == '\0') {
+end_of_string:
+ *ptr = '\0';
+
+ ut_a(ptr <= str + sql_length);
+
+ DBUG_PRINT("dict_strip_comments", ("%s", str));
+ DBUG_RETURN(str);
+ }
+
+ if (*sptr == quote) {
+ /* Closing quote character: do not look for
+ starting quote or comments. */
+
+ /* If the quote character is escaped by a
+ backslash, ignore it. */
+ if (escape) {
+ escape = false;
+ } else {
+ quote = 0;
+ }
+ } else if (quote) {
+ /* Within quotes: do not look for
+ starting quotes or comments. */
+ if (escape) {
+ escape = false;
+ } else if (*sptr == '\\') {
+ escape = true;
+ }
+ } else if (*sptr == '"' || *sptr == '`' || *sptr == '\'') {
+ /* Starting quote: remember the quote character. */
+ quote = *sptr;
+ } else if (*sptr == '#'
+ || (sptr[0] == '-' && sptr[1] == '-'
+ && sptr[2] == ' ')) {
+ for (;;) {
+ if (++sptr >= eptr) {
+ goto end_of_string;
+ }
+
+ /* In Unix a newline is 0x0A while in Windows
+ it is 0x0D followed by 0x0A */
+
+ switch (*sptr) {
+ case (char) 0X0A:
+ case (char) 0x0D:
+ case '\0':
+ goto scan_more;
+ }
+ }
+ } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') {
+ sptr += 2;
+ for (;;) {
+ if (sptr >= eptr) {
+ goto end_of_string;
+ }
+
+ switch (*sptr) {
+ case '\0':
+ goto scan_more;
+ case '*':
+ if (sptr[1] == '/') {
+ sptr += 2;
+ goto scan_more;
+ }
+ }
+
+ sptr++;
+ }
+ }
+
+ *ptr = *sptr;
+
+ ptr++;
+ sptr++;
+ }
+}
+
+/*********************************************************************//**
+Finds the highest [number] for foreign key constraints of the table. Looks
+only at the >= 4.0.18-format id's, which are of the form
+databasename/tablename_ibfk_[number].
+@return highest number, 0 if table has no new format foreign key constraints */
+ulint
+dict_table_get_highest_foreign_id(
+/*==============================*/
+ dict_table_t* table) /*!< in: table in the dictionary memory cache */
+{
+ dict_foreign_t* foreign;
+ char* endp;
+ ulint biggest_id = 0;
+ ulint id;
+ ulint len;
+
+ DBUG_ENTER("dict_table_get_highest_foreign_id");
+
+ ut_a(table);
+
+ len = strlen(table->name.m_name);
+
+ for (dict_foreign_set::iterator it = table->foreign_set.begin();
+ it != table->foreign_set.end();
+ ++it) {
+ char fkid[MAX_TABLE_NAME_LEN+20];
+ foreign = *it;
+
+ strcpy(fkid, foreign->id);
+ /* Convert foreign key identifier on dictionary memory
+ cache to filename charset. */
+ innobase_convert_to_filename_charset(
+ strchr(fkid, '/') + 1,
+ strchr(foreign->id, '/') + 1,
+ MAX_TABLE_NAME_LEN);
+
+ if (strlen(fkid) > ((sizeof dict_ibfk) - 1) + len
+ && 0 == memcmp(fkid, table->name.m_name, len)
+ && 0 == memcmp(fkid + len,
+ dict_ibfk, (sizeof dict_ibfk) - 1)
+ && fkid[len + ((sizeof dict_ibfk) - 1)] != '0') {
+ /* It is of the >= 4.0.18 format */
+
+ id = strtoul(fkid + len
+ + ((sizeof dict_ibfk) - 1),
+ &endp, 10);
+ if (*endp == '\0') {
+ ut_a(id != biggest_id);
+
+ if (id > biggest_id) {
+ biggest_id = id;
+ }
+ }
+ }
+ }
+
+ DBUG_PRINT("dict_table_get_highest_foreign_id",
+ ("id: " ULINTPF, biggest_id));
+
+ DBUG_RETURN(biggest_id);
+}
+
+/**********************************************************************//**
+Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
+@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
+constraint id does not match */
+dberr_t
+dict_foreign_parse_drop_constraints(
+/*================================*/
+ mem_heap_t* heap, /*!< in: heap from which we can
+ allocate memory */
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table, /*!< in: table */
+ ulint* n, /*!< out: number of constraints
+ to drop */
+ const char*** constraints_to_drop) /*!< out: id's of the
+ constraints to drop */
+{
+ ibool success;
+ char* str;
+ size_t len;
+ const char* ptr;
+ const char* ptr1;
+ const char* id;
+ CHARSET_INFO* cs;
+
+ ut_a(trx->mysql_thd);
+
+ cs = thd_charset(trx->mysql_thd);
+
+ *n = 0;
+
+ *constraints_to_drop = static_cast<const char**>(
+ mem_heap_alloc(heap, 1000 * sizeof(char*)));
+
+ ptr = innobase_get_stmt_unsafe(trx->mysql_thd, &len);
+
+ str = dict_strip_comments(ptr, len);
+
+ ptr = str;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+loop:
+ ptr = dict_scan_to(ptr, "DROP");
+
+ if (*ptr == '\0') {
+ ut_free(str);
+
+ return(DB_SUCCESS);
+ }
+
+ ptr = dict_accept(cs, ptr, "DROP", &success);
+
+ if (!my_isspace(cs, *ptr)) {
+
+ goto loop;
+ }
+
+ ptr = dict_accept(cs, ptr, "FOREIGN", &success);
+
+ if (!success || !my_isspace(cs, *ptr)) {
+
+ goto loop;
+ }
+
+ ptr = dict_accept(cs, ptr, "KEY", &success);
+
+ if (!success) {
+
+ goto syntax_error;
+ }
+
+ ptr1 = dict_accept(cs, ptr, "IF", &success);
+
+ if (success && my_isspace(cs, *ptr1)) {
+ ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
+ if (success) {
+ ptr = ptr1;
+ }
+ }
+
+ ptr = dict_scan_id(cs, ptr, heap, &id, FALSE, TRUE);
+
+ if (id == NULL) {
+
+ goto syntax_error;
+ }
+
+ ut_a(*n < 1000);
+ (*constraints_to_drop)[*n] = id;
+ (*n)++;
+
+ if (std::find_if(table->foreign_set.begin(),
+ table->foreign_set.end(),
+ dict_foreign_matches_id(id))
+ == table->foreign_set.end()) {
+
+ if (!srv_read_only_mode) {
+ FILE* ef = dict_foreign_err_file;
+
+ mutex_enter(&dict_foreign_err_mutex);
+ rewind(ef);
+ ut_print_timestamp(ef);
+ fputs(" Error in dropping of a foreign key"
+ " constraint of table ", ef);
+ ut_print_name(ef, NULL, table->name.m_name);
+ fprintf(ef, ",\nin SQL command\n%s"
+ "\nCannot find a constraint with the"
+ " given id %s.\n", str, id);
+ mutex_exit(&dict_foreign_err_mutex);
+ }
+
+ ut_free(str);
+
+ return(DB_CANNOT_DROP_CONSTRAINT);
+ }
+
+ goto loop;
+
+syntax_error:
+ if (!srv_read_only_mode) {
+ FILE* ef = dict_foreign_err_file;
+
+ mutex_enter(&dict_foreign_err_mutex);
+ rewind(ef);
+ ut_print_timestamp(ef);
+ fputs(" Syntax error in dropping of a"
+ " foreign key constraint of table ", ef);
+ ut_print_name(ef, NULL, table->name.m_name);
+ fprintf(ef, ",\n"
+ "close to:\n%s\n in SQL command\n%s\n", ptr, str);
+ mutex_exit(&dict_foreign_err_mutex);
+ }
+
+ ut_free(str);
+
+ return(DB_CANNOT_DROP_CONSTRAINT);
+}
+
+/*==================== END OF FOREIGN KEY PROCESSING ====================*/
+
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+Assumes that dict_sys.mutex is already being held.
+@return index, NULL if not found */
+dict_index_t*
+dict_index_get_if_in_cache_low(
+/*===========================*/
+ index_id_t index_id) /*!< in: index id */
+{
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ return(dict_index_find_on_id_low(index_id));
+}
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Returns an index object if it is found in the dictionary cache.
+@return index, NULL if not found */
+dict_index_t*
+dict_index_get_if_in_cache(
+/*=======================*/
+ index_id_t index_id) /*!< in: index id */
+{
+ dict_index_t* index;
+
+ if (!dict_sys.is_initialised()) {
+ return(NULL);
+ }
+
+ mutex_enter(&dict_sys.mutex);
+
+ index = dict_index_get_if_in_cache_low(index_id);
+
+ mutex_exit(&dict_sys.mutex);
+
+ return(index);
+}
+
+/**********************************************************************//**
+Checks that a tuple has n_fields_cmp value in a sensible range, so that
+no comparison can occur with the page number field in a node pointer.
+@return TRUE if ok */
+ibool
+dict_index_check_search_tuple(
+/*==========================*/
+ const dict_index_t* index, /*!< in: index tree */
+ const dtuple_t* tuple) /*!< in: tuple used in a search */
+{
+ ut_ad(dtuple_get_n_fields_cmp(tuple)
+ <= dict_index_get_n_unique_in_tree(index));
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
+Builds a node pointer out of a physical record and a page number.
+@return own: node pointer */
+dtuple_t*
+dict_index_build_node_ptr(
+/*======================*/
+ const dict_index_t* index, /*!< in: index */
+ const rec_t* rec, /*!< in: record for which to build node
+ pointer */
+ ulint page_no,/*!< in: page number to put in node
+ pointer */
+ mem_heap_t* heap, /*!< in: memory heap where pointer
+ created */
+ ulint level) /*!< in: level of rec in tree:
+ 0 means leaf level */
+{
+ dtuple_t* tuple;
+ dfield_t* field;
+ byte* buf;
+ ulint n_unique;
+
+ if (dict_index_is_ibuf(index)) {
+ /* In a universal index tree, we take the whole record as
+ the node pointer if the record is on the leaf level,
+ on non-leaf levels we remove the last field, which
+ contains the page number of the child page */
+
+ ut_a(!dict_table_is_comp(index->table));
+ n_unique = rec_get_n_fields_old(rec);
+
+ if (level > 0) {
+ ut_a(n_unique > 1);
+ n_unique--;
+ }
+ } else {
+ n_unique = dict_index_get_n_unique_in_tree_nonleaf(index);
+ }
+
+ tuple = dtuple_create(heap, n_unique + 1);
+
+ /* When searching in the tree for the node pointer, we must not do
+ comparison on the last field, the page number field, as on upper
+ levels in the tree there may be identical node pointers with a
+ different page number; therefore, we set the n_fields_cmp to one
+ less: */
+
+ dtuple_set_n_fields_cmp(tuple, n_unique);
+
+ dict_index_copy_types(tuple, index, n_unique);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ mach_write_to_4(buf, page_no);
+
+ field = dtuple_get_nth_field(tuple, n_unique);
+ dfield_set_data(field, buf, 4);
+
+ dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4);
+
+ rec_copy_prefix_to_dtuple(tuple, rec, index,
+ level ? 0 : index->n_core_fields,
+ n_unique, heap);
+ dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple)
+ | REC_STATUS_NODE_PTR);
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ return(tuple);
+}
+
+/** Convert a physical record into a search tuple.
+@param[in] rec index record (not necessarily in an index page)
+@param[in] index index
+@param[in] leaf whether rec is in a leaf page
+@param[in] n_fields number of data fields
+@param[in,out] heap memory heap for allocation
+@return own: data tuple */
+dtuple_t*
+dict_index_build_data_tuple(
+ const rec_t* rec,
+ const dict_index_t* index,
+ bool leaf,
+ ulint n_fields,
+ mem_heap_t* heap)
+{
+ ut_ad(!index->is_clust());
+
+ dtuple_t* tuple = dtuple_create(heap, n_fields);
+
+ dict_index_copy_types(tuple, index, n_fields);
+
+ rec_copy_prefix_to_dtuple(tuple, rec, index,
+ leaf ? n_fields : 0, n_fields, heap);
+
+ ut_ad(dtuple_check_typed(tuple));
+
+ return(tuple);
+}
+
+/*********************************************************************//**
+Calculates the minimum record length in an index. */
+ulint
+dict_index_calc_min_rec_len(
+/*========================*/
+ const dict_index_t* index) /*!< in: index */
+{
+ ulint sum = 0;
+ ulint i;
+ ulint comp = dict_table_is_comp(index->table);
+
+ if (comp) {
+ ulint nullable = 0;
+ sum = REC_N_NEW_EXTRA_BYTES;
+ for (i = 0; i < dict_index_get_n_fields(index); i++) {
+ const dict_col_t* col
+ = dict_index_get_nth_col(index, i);
+ ulint size = dict_col_get_fixed_size(col, comp);
+ sum += size;
+ if (!size) {
+ size = col->len;
+ sum += size < 128 ? 1 : 2;
+ }
+ if (!(col->prtype & DATA_NOT_NULL)) {
+ nullable++;
+ }
+ }
+
+ /* round the NULL flags up to full bytes */
+ sum += UT_BITS_IN_BYTES(nullable);
+
+ return(sum);
+ }
+
+ for (i = 0; i < dict_index_get_n_fields(index); i++) {
+ sum += dict_col_get_fixed_size(
+ dict_index_get_nth_col(index, i), comp);
+ }
+
+ if (sum > 127) {
+ sum += 2 * dict_index_get_n_fields(index);
+ } else {
+ sum += dict_index_get_n_fields(index);
+ }
+
+ sum += REC_N_OLD_EXTRA_BYTES;
+
+ return(sum);
+}
+
+/**********************************************************************//**
+Outputs info on a foreign key of a table in a format suitable for
+CREATE TABLE. */
+std::string
+dict_print_info_on_foreign_key_in_create_format(
+/*============================================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_foreign_t* foreign, /*!< in: foreign key constraint */
+ ibool add_newline) /*!< in: whether to add a newline */
+{
+ const char* stripped_id;
+ ulint i;
+ std::string str;
+
+ if (strchr(foreign->id, '/')) {
+ /* Strip the preceding database name from the constraint id */
+ stripped_id = foreign->id + 1
+ + dict_get_db_name_len(foreign->id);
+ } else {
+ stripped_id = foreign->id;
+ }
+
+ str.append(",");
+
+ if (add_newline) {
+ /* SHOW CREATE TABLE wants constraints each printed nicely
+ on its own line, while error messages want no newlines
+ inserted. */
+ str.append("\n ");
+ }
+
+ str.append(" CONSTRAINT ");
+
+ str.append(innobase_quote_identifier(trx, stripped_id));
+ str.append(" FOREIGN KEY (");
+
+ for (i = 0;;) {
+ str.append(innobase_quote_identifier(trx, foreign->foreign_col_names[i]));
+
+ if (++i < foreign->n_fields) {
+ str.append(", ");
+ } else {
+ break;
+ }
+ }
+
+ str.append(") REFERENCES ");
+
+ if (dict_tables_have_same_db(foreign->foreign_table_name_lookup,
+ foreign->referenced_table_name_lookup)) {
+ /* Do not print the database name of the referenced table */
+ str.append(ut_get_name(trx,
+ dict_remove_db_name(
+ foreign->referenced_table_name)));
+ } else {
+ str.append(ut_get_name(trx,
+ foreign->referenced_table_name));
+ }
+
+ str.append(" (");
+
+ for (i = 0;;) {
+ str.append(innobase_quote_identifier(trx,
+ foreign->referenced_col_names[i]));
+
+ if (++i < foreign->n_fields) {
+ str.append(", ");
+ } else {
+ break;
+ }
+ }
+
+ str.append(")");
+
+ if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) {
+ str.append(" ON DELETE CASCADE");
+ }
+
+ if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) {
+ str.append(" ON DELETE SET NULL");
+ }
+
+ if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
+ str.append(" ON DELETE NO ACTION");
+ }
+
+ if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
+ str.append(" ON UPDATE CASCADE");
+ }
+
+ if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
+ str.append(" ON UPDATE SET NULL");
+ }
+
+ if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
+ str.append(" ON UPDATE NO ACTION");
+ }
+
+ return str;
+}
+
+/**********************************************************************//**
+Outputs info on foreign keys of a table. */
+std::string
+dict_print_info_on_foreign_keys(
+/*============================*/
+ ibool create_table_format, /*!< in: if TRUE then print in
+ a format suitable to be inserted into
+ a CREATE TABLE, otherwise in the format
+ of SHOW TABLE STATUS */
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table) /*!< in: table */
+{
+ dict_foreign_t* foreign;
+ std::string str;
+
+ mutex_enter(&dict_sys.mutex);
+
+ for (dict_foreign_set::iterator it = table->foreign_set.begin();
+ it != table->foreign_set.end();
+ ++it) {
+
+ foreign = *it;
+
+ if (create_table_format) {
+ str.append(
+ dict_print_info_on_foreign_key_in_create_format(
+ trx, foreign, TRUE));
+ } else {
+ ulint i;
+ str.append("; (");
+
+ for (i = 0; i < foreign->n_fields; i++) {
+ if (i) {
+ str.append(" ");
+ }
+
+ str.append(innobase_quote_identifier(trx,
+ foreign->foreign_col_names[i]));
+ }
+
+ str.append(") REFER ");
+ str.append(ut_get_name(trx,
+ foreign->referenced_table_name));
+ str.append(")");
+
+ for (i = 0; i < foreign->n_fields; i++) {
+ if (i) {
+ str.append(" ");
+ }
+ str.append(innobase_quote_identifier(
+ trx,
+ foreign->referenced_col_names[i]));
+ }
+
+ str.append(")");
+
+ if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) {
+ str.append(" ON DELETE CASCADE");
+ }
+
+ if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) {
+ str.append(" ON DELETE SET NULL");
+ }
+
+ if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
+ str.append(" ON DELETE NO ACTION");
+ }
+
+ if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
+ str.append(" ON UPDATE CASCADE");
+ }
+
+ if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
+ str.append(" ON UPDATE SET NULL");
+ }
+
+ if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
+ str.append(" ON UPDATE NO ACTION");
+ }
+ }
+ }
+
+ mutex_exit(&dict_sys.mutex);
+ return str;
+}
+
+/** Given a space_id of a file-per-table tablespace, search the
+dict_sys.table_LRU list and return the dict_table_t* pointer for it.
+@param space tablespace
+@return table if found, NULL if not */
+static
+dict_table_t*
+dict_find_single_table_by_space(const fil_space_t* space)
+{
+ dict_table_t* table;
+ ulint num_item;
+ ulint count = 0;
+
+ ut_ad(space->id > 0);
+
+ if (!dict_sys.is_initialised()) {
+ /* This could happen when it's in redo processing. */
+ return(NULL);
+ }
+
+ table = UT_LIST_GET_FIRST(dict_sys.table_LRU);
+ num_item = UT_LIST_GET_LEN(dict_sys.table_LRU);
+
+ /* This function intentionally does not acquire mutex as it is used
+ by error handling code in deep call stack as last means to avoid
+ killing the server, so it worth to risk some consequences for
+ the action. */
+ while (table && count < num_item) {
+ if (table->space == space) {
+ if (dict_table_is_file_per_table(table)) {
+ return(table);
+ }
+ return(NULL);
+ }
+
+ table = UT_LIST_GET_NEXT(table_LRU, table);
+ count++;
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************//**
+Flags a table with specified space_id corrupted in the data dictionary
+cache
+@return true if successful */
+bool dict_set_corrupted_by_space(const fil_space_t* space)
+{
+ dict_table_t* table;
+
+ table = dict_find_single_table_by_space(space);
+
+ if (!table) {
+ return false;
+ }
+
+ /* mark the table->corrupted bit only, since the caller
+ could be too deep in the stack for SYS_INDEXES update */
+ table->corrupted = true;
+ table->file_unreadable = true;
+ return true;
+}
+
+/** Flag a table encrypted in the data dictionary cache. */
+void dict_set_encrypted_by_space(const fil_space_t* space)
+{
+ if (dict_table_t* table = dict_find_single_table_by_space(space)) {
+ table->file_unreadable = true;
+ }
+}
+
+/**********************************************************************//**
+Flags an index corrupted both in the data dictionary cache
+and in the SYS_INDEXES */
+void
+dict_set_corrupted(
+/*===============*/
+ dict_index_t* index, /*!< in/out: index */
+ trx_t* trx, /*!< in/out: transaction */
+ const char* ctx) /*!< in: context */
+{
+ mem_heap_t* heap;
+ mtr_t mtr;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ byte* buf;
+ const char* status;
+ btr_cur_t cursor;
+ bool locked = RW_X_LATCH == trx->dict_operation_lock_mode;
+
+ if (!locked) {
+ row_mysql_lock_data_dictionary(trx);
+ }
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_ad(!dict_table_is_comp(dict_sys.sys_tables));
+ ut_ad(!dict_table_is_comp(dict_sys.sys_indexes));
+ ut_ad(!sync_check_iterate(dict_sync_check()));
+
+ /* Mark the table as corrupted only if the clustered index
+ is corrupted */
+ if (dict_index_is_clust(index)) {
+ index->table->corrupted = TRUE;
+ }
+
+ if (index->type & DICT_CORRUPT) {
+ /* The index was already flagged corrupted. */
+ ut_ad(!dict_index_is_clust(index) || index->table->corrupted);
+ goto func_exit;
+ }
+
+ /* If this is read only mode, do not update SYS_INDEXES, just
+ mark it as corrupted in memory */
+ if (high_level_read_only) {
+ index->type |= DICT_CORRUPT;
+ goto func_exit;
+ }
+
+ heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
+ + sizeof(que_fork_t) + sizeof(upd_node_t)
+ + sizeof(upd_t) + 12));
+ mtr_start(&mtr);
+ index->type |= DICT_CORRUPT;
+
+ sys_index = UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes);
+
+ /* Find the index row in SYS_INDEXES */
+ tuple = dtuple_create(heap, 2);
+
+ dfield = dtuple_get_nth_field(tuple, 0);
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, index->table->id);
+ dfield_set_data(dfield, buf, 8);
+
+ dfield = dtuple_get_nth_field(tuple, 1);
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, index->id);
+ dfield_set_data(dfield, buf, 8);
+
+ dict_index_copy_types(tuple, sys_index, 2);
+
+ btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_LE,
+ BTR_MODIFY_LEAF,
+ &cursor, 0, __FILE__, __LINE__, &mtr);
+
+ if (cursor.low_match == dtuple_get_n_fields(tuple)) {
+ /* UPDATE SYS_INDEXES SET TYPE=index->type
+ WHERE TABLE_ID=index->table->id AND INDEX_ID=index->id */
+ ulint len;
+ byte* field = rec_get_nth_field_old(
+ btr_cur_get_rec(&cursor),
+ DICT_FLD__SYS_INDEXES__TYPE, &len);
+ if (len != 4) {
+ goto fail;
+ }
+ mtr.write<4>(*btr_cur_get_block(&cursor), field, index->type);
+ status = "Flagged";
+ } else {
+fail:
+ status = "Unable to flag";
+ }
+
+ mtr_commit(&mtr);
+ mem_heap_empty(heap);
+ ib::error() << status << " corruption of " << index->name
+ << " in table " << index->table->name << " in " << ctx;
+ mem_heap_free(heap);
+
+func_exit:
+ if (!locked) {
+ row_mysql_unlock_data_dictionary(trx);
+ }
+}
+
+/** Flags an index corrupted in the data dictionary cache only. This
+is used mostly to mark a corrupted index when index's own dictionary
+is corrupted, and we force to load such index for repair purpose
+@param[in,out] index index which is corrupted */
+void
+dict_set_corrupted_index_cache_only(
+ dict_index_t* index)
+{
+ ut_ad(index != NULL);
+ ut_ad(index->table != NULL);
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_ad(!dict_table_is_comp(dict_sys.sys_tables));
+ ut_ad(!dict_table_is_comp(dict_sys.sys_indexes));
+
+ /* Mark the table as corrupted only if the clustered index
+ is corrupted */
+ if (dict_index_is_clust(index)) {
+ index->table->corrupted = TRUE;
+ index->table->file_unreadable = true;
+ }
+
+ index->type |= DICT_CORRUPT;
+}
+
+/** Sets merge_threshold in the SYS_INDEXES
+@param[in,out] index index
+@param[in] merge_threshold value to set */
+void
+dict_index_set_merge_threshold(
+ dict_index_t* index,
+ ulint merge_threshold)
+{
+ mem_heap_t* heap;
+ mtr_t mtr;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ byte* buf;
+ btr_cur_t cursor;
+
+ ut_ad(index != NULL);
+ ut_ad(!dict_table_is_comp(dict_sys.sys_tables));
+ ut_ad(!dict_table_is_comp(dict_sys.sys_indexes));
+
+ dict_sys_lock();
+
+ heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
+ + sizeof(que_fork_t) + sizeof(upd_node_t)
+ + sizeof(upd_t) + 12));
+
+ mtr_start(&mtr);
+
+ sys_index = UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes);
+
+ /* Find the index row in SYS_INDEXES */
+ tuple = dtuple_create(heap, 2);
+
+ dfield = dtuple_get_nth_field(tuple, 0);
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, index->table->id);
+ dfield_set_data(dfield, buf, 8);
+
+ dfield = dtuple_get_nth_field(tuple, 1);
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, index->id);
+ dfield_set_data(dfield, buf, 8);
+
+ dict_index_copy_types(tuple, sys_index, 2);
+
+ btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_GE,
+ BTR_MODIFY_LEAF,
+ &cursor, 0, __FILE__, __LINE__, &mtr);
+
+ if (cursor.up_match == dtuple_get_n_fields(tuple)
+ && rec_get_n_fields_old(btr_cur_get_rec(&cursor))
+ == DICT_NUM_FIELDS__SYS_INDEXES) {
+ ulint len;
+ byte* field = rec_get_nth_field_old(
+ btr_cur_get_rec(&cursor),
+ DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD, &len);
+
+ ut_ad(len == 4);
+ mtr.write<4,mtr_t::MAYBE_NOP>(*btr_cur_get_block(&cursor),
+ field, merge_threshold);
+ }
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ dict_sys_unlock();
+}
+
+#ifdef UNIV_DEBUG
+/** Sets merge_threshold for all indexes in the list of tables
+@param[in] list pointer to the list of tables */
+inline
+void
+dict_set_merge_threshold_list_debug(
+ UT_LIST_BASE_NODE_T(dict_table_t)* list,
+ uint merge_threshold_all)
+{
+ for (dict_table_t* table = UT_LIST_GET_FIRST(*list);
+ table != NULL;
+ table = UT_LIST_GET_NEXT(table_LRU, table)) {
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+ rw_lock_x_lock(dict_index_get_lock(index));
+ index->merge_threshold = merge_threshold_all
+ & ((1U << 6) - 1);
+ rw_lock_x_unlock(dict_index_get_lock(index));
+ }
+ }
+}
+
+/** Sets merge_threshold for all indexes in dictionary cache for debug.
+@param[in] merge_threshold_all value to set for all indexes */
+void
+dict_set_merge_threshold_all_debug(
+ uint merge_threshold_all)
+{
+ mutex_enter(&dict_sys.mutex);
+
+ dict_set_merge_threshold_list_debug(
+ &dict_sys.table_LRU, merge_threshold_all);
+ dict_set_merge_threshold_list_debug(
+ &dict_sys.table_non_LRU, merge_threshold_all);
+
+ mutex_exit(&dict_sys.mutex);
+}
+
+#endif /* UNIV_DEBUG */
+
+/** Get an index by name.
+@param[in] table the table where to look for the index
+@param[in] name the index name to look for
+@return index, NULL if does not exist */
+dict_index_t*
+dict_table_get_index_on_name(dict_table_t* table, const char* name)
+{
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(table);
+
+ while (index != NULL) {
+ if (index->is_committed() && !strcmp(index->name, name)) {
+ return(index);
+ }
+
+ index = dict_table_get_next_index(index);
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************//**
+Replace the index passed in with another equivalent index in the
+foreign key lists of the table.
+@return whether all replacements were found */
+bool
+dict_foreign_replace_index(
+/*=======================*/
+ dict_table_t* table, /*!< in/out: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ const dict_index_t* index) /*!< in: index to be replaced */
+{
+ bool found = true;
+ dict_foreign_t* foreign;
+
+ ut_ad(index->to_be_dropped);
+ ut_ad(index->table == table);
+
+ for (dict_foreign_set::iterator it = table->foreign_set.begin();
+ it != table->foreign_set.end();
+ ++it) {
+
+ foreign = *it;
+ if (foreign->foreign_index == index) {
+ ut_ad(foreign->foreign_table == index->table);
+
+ dict_index_t* new_index = dict_foreign_find_index(
+ foreign->foreign_table, col_names,
+ foreign->foreign_col_names,
+ foreign->n_fields, index,
+ /*check_charsets=*/TRUE, /*check_null=*/FALSE,
+ NULL, NULL, NULL);
+ if (new_index) {
+ ut_ad(new_index->table == index->table);
+ ut_ad(!new_index->to_be_dropped);
+ } else {
+ found = false;
+ }
+
+ foreign->foreign_index = new_index;
+ }
+ }
+
+ for (dict_foreign_set::iterator it = table->referenced_set.begin();
+ it != table->referenced_set.end();
+ ++it) {
+
+ foreign = *it;
+ if (foreign->referenced_index == index) {
+ ut_ad(foreign->referenced_table == index->table);
+
+ dict_index_t* new_index = dict_foreign_find_index(
+ foreign->referenced_table, NULL,
+ foreign->referenced_col_names,
+ foreign->n_fields, index,
+ /*check_charsets=*/TRUE, /*check_null=*/FALSE,
+ NULL, NULL, NULL);
+ /* There must exist an alternative index,
+ since this must have been checked earlier. */
+ if (new_index) {
+ ut_ad(new_index->table == index->table);
+ ut_ad(!new_index->to_be_dropped);
+ } else {
+ found = false;
+ }
+
+ foreign->referenced_index = new_index;
+ }
+ }
+
+ return(found);
+}
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Check for duplicate index entries in a table [using the index name] */
+void
+dict_table_check_for_dup_indexes(
+/*=============================*/
+ const dict_table_t* table, /*!< in: Check for dup indexes
+ in this table */
+ enum check_name check) /*!< in: whether and when to allow
+ temporary index names */
+{
+ /* Check for duplicates, ignoring indexes that are marked
+ as to be dropped */
+
+ const dict_index_t* index1;
+ const dict_index_t* index2;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ /* The primary index _must_ exist */
+ ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
+
+ index1 = UT_LIST_GET_FIRST(table->indexes);
+
+ do {
+ if (!index1->is_committed()) {
+ ut_a(!dict_index_is_clust(index1));
+
+ switch (check) {
+ case CHECK_ALL_COMPLETE:
+ ut_error;
+ case CHECK_ABORTED_OK:
+ switch (dict_index_get_online_status(index1)) {
+ case ONLINE_INDEX_COMPLETE:
+ case ONLINE_INDEX_CREATION:
+ ut_error;
+ break;
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ break;
+ }
+ /* fall through */
+ case CHECK_PARTIAL_OK:
+ break;
+ }
+ }
+
+ for (index2 = UT_LIST_GET_NEXT(indexes, index1);
+ index2 != NULL;
+ index2 = UT_LIST_GET_NEXT(indexes, index2)) {
+ ut_ad(index1->is_committed()
+ != index2->is_committed()
+ || strcmp(index1->name, index2->name) != 0);
+ }
+
+ index1 = UT_LIST_GET_NEXT(indexes, index1);
+ } while (index1);
+}
+#endif /* UNIV_DEBUG */
+
+/** Auxiliary macro used inside dict_table_schema_check(). */
+#define CREATE_TYPES_NAMES() \
+ dtype_sql_name((unsigned) req_schema->columns[i].mtype, \
+ (unsigned) req_schema->columns[i].prtype_mask, \
+ (unsigned) req_schema->columns[i].len, \
+ req_type, sizeof(req_type)); \
+ dtype_sql_name(table->cols[j].mtype, \
+ table->cols[j].prtype, \
+ table->cols[j].len, \
+ actual_type, sizeof(actual_type))
+
+/*********************************************************************//**
+Checks whether a table exists and whether it has the given structure.
+The table must have the same number of columns with the same names and
+types. The order of the columns does not matter.
+The caller must own the dictionary mutex.
+dict_table_schema_check() @{
+@return DB_SUCCESS if the table exists and contains the necessary columns */
+dberr_t
+dict_table_schema_check(
+/*====================*/
+ dict_table_schema_t* req_schema, /*!< in/out: required table
+ schema */
+ char* errstr, /*!< out: human readable error
+ message if != DB_SUCCESS is
+ returned */
+ size_t errstr_sz) /*!< in: errstr size */
+{
+ char buf[MAX_FULL_NAME_LEN];
+ char req_type[64];
+ char actual_type[64];
+ dict_table_t* table;
+ ulint i;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ table = dict_table_get_low(req_schema->table_name);
+
+ if (table == NULL) {
+ bool should_print=true;
+ /* no such table */
+
+ if (innobase_strcasecmp(req_schema->table_name, "mysql/innodb_table_stats") == 0) {
+ if (innodb_table_stats_not_found_reported == false) {
+ innodb_table_stats_not_found = true;
+ innodb_table_stats_not_found_reported = true;
+ } else {
+ should_print = false;
+ }
+ } else if (innobase_strcasecmp(req_schema->table_name, "mysql/innodb_index_stats") == 0 ) {
+ if (innodb_index_stats_not_found_reported == false) {
+ innodb_index_stats_not_found = true;
+ innodb_index_stats_not_found_reported = true;
+ } else {
+ should_print = false;
+ }
+ }
+
+ if (should_print) {
+ snprintf(errstr, errstr_sz,
+ "Table %s not found.",
+ ut_format_name(req_schema->table_name,
+ buf, sizeof(buf)));
+ return(DB_TABLE_NOT_FOUND);
+ } else {
+ return(DB_STATS_DO_NOT_EXIST);
+ }
+ }
+
+ if (!table->is_readable() && !table->space) {
+ /* missing tablespace */
+
+ snprintf(errstr, errstr_sz,
+ "Tablespace for table %s is missing.",
+ ut_format_name(req_schema->table_name,
+ buf, sizeof(buf)));
+
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ if (ulint(table->n_def - DATA_N_SYS_COLS) != req_schema->n_cols) {
+ /* the table has a different number of columns than required */
+ snprintf(errstr, errstr_sz,
+ "%s has %d columns but should have " ULINTPF ".",
+ ut_format_name(req_schema->table_name, buf,
+ sizeof buf),
+ table->n_def - DATA_N_SYS_COLS,
+ req_schema->n_cols);
+
+ return(DB_ERROR);
+ }
+
+ /* For each column from req_schema->columns[] search
+ whether it is present in table->cols[].
+ The following algorithm is O(n_cols^2), but is optimized to
+ be O(n_cols) if the columns are in the same order in both arrays. */
+
+ for (i = 0; i < req_schema->n_cols; i++) {
+ ulint j = dict_table_has_column(
+ table, req_schema->columns[i].name, i);
+
+ if (j == table->n_def) {
+
+ snprintf(errstr, errstr_sz,
+ "required column %s"
+ " not found in table %s.",
+ req_schema->columns[i].name,
+ ut_format_name(
+ req_schema->table_name,
+ buf, sizeof(buf)));
+
+ return(DB_ERROR);
+ }
+
+ /* we found a column with the same name on j'th position,
+ compare column types and flags */
+
+ /* check length for exact match */
+ if (req_schema->columns[i].len == table->cols[j].len) {
+ } else if (!strcmp(req_schema->table_name, TABLE_STATS_NAME)
+ || !strcmp(req_schema->table_name,
+ INDEX_STATS_NAME)) {
+ ut_ad(table->cols[j].len < req_schema->columns[i].len);
+ ib::warn() << "Table " << req_schema->table_name
+ << " has length mismatch in the"
+ << " column name "
+ << req_schema->columns[i].name
+ << ". Please run mysql_upgrade";
+ } else {
+ CREATE_TYPES_NAMES();
+
+ snprintf(errstr, errstr_sz,
+ "Column %s in table %s is %s"
+ " but should be %s (length mismatch).",
+ req_schema->columns[i].name,
+ ut_format_name(req_schema->table_name,
+ buf, sizeof(buf)),
+ actual_type, req_type);
+
+ return(DB_ERROR);
+ }
+
+ /*
+ check mtype for exact match.
+ This check is relaxed to allow use to use TIMESTAMP
+ (ie INT) for last_update instead of DATA_BINARY.
+ We have to test for both values as the innodb_table_stats
+ table may come from MySQL and have the old type.
+ */
+ if (req_schema->columns[i].mtype != table->cols[j].mtype &&
+ !(req_schema->columns[i].mtype == DATA_INT &&
+ table->cols[j].mtype == DATA_FIXBINARY))
+ {
+ CREATE_TYPES_NAMES();
+
+ snprintf(errstr, errstr_sz,
+ "Column %s in table %s is %s"
+ " but should be %s (type mismatch).",
+ req_schema->columns[i].name,
+ ut_format_name(req_schema->table_name,
+ buf, sizeof(buf)),
+ actual_type, req_type);
+
+ return(DB_ERROR);
+ }
+
+ /* check whether required prtype mask is set */
+ if (req_schema->columns[i].prtype_mask != 0
+ && (table->cols[j].prtype
+ & req_schema->columns[i].prtype_mask)
+ != req_schema->columns[i].prtype_mask) {
+
+ CREATE_TYPES_NAMES();
+
+ snprintf(errstr, errstr_sz,
+ "Column %s in table %s is %s"
+ " but should be %s (flags mismatch).",
+ req_schema->columns[i].name,
+ ut_format_name(req_schema->table_name,
+ buf, sizeof(buf)),
+ actual_type, req_type);
+
+ return(DB_ERROR);
+ }
+ }
+
+ if (req_schema->n_foreign != table->foreign_set.size()) {
+ snprintf(
+ errstr, errstr_sz,
+ "Table %s has " ULINTPF " foreign key(s) pointing"
+ " to other tables, but it must have " ULINTPF ".",
+ ut_format_name(req_schema->table_name,
+ buf, sizeof(buf)),
+ static_cast<ulint>(table->foreign_set.size()),
+ req_schema->n_foreign);
+ return(DB_ERROR);
+ }
+
+ if (req_schema->n_referenced != table->referenced_set.size()) {
+ snprintf(
+ errstr, errstr_sz,
+ "There are " ULINTPF " foreign key(s) pointing to %s, "
+ "but there must be " ULINTPF ".",
+ static_cast<ulint>(table->referenced_set.size()),
+ ut_format_name(req_schema->table_name,
+ buf, sizeof(buf)),
+ req_schema->n_referenced);
+ return(DB_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+/* @} */
+
+/*********************************************************************//**
+Converts a database and table name from filesystem encoding
+(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
+strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
+at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
+void
+dict_fs2utf8(
+/*=========*/
+ const char* db_and_table, /*!< in: database and table names,
+ e.g. d@i1b/a@q1b@1Kc */
+ char* db_utf8, /*!< out: database name, e.g. dцb */
+ size_t db_utf8_size, /*!< in: dbname_utf8 size */
+ char* table_utf8, /*!< out: table name, e.g. aюbØc */
+ size_t table_utf8_size)/*!< in: table_utf8 size */
+{
+ char db[MAX_DATABASE_NAME_LEN + 1];
+ ulint db_len;
+ uint errors;
+
+ db_len = dict_get_db_name_len(db_and_table);
+
+ ut_a(db_len <= sizeof(db));
+
+ memcpy(db, db_and_table, db_len);
+ db[db_len] = '\0';
+
+ strconvert(
+ &my_charset_filename, db, uint(db_len), system_charset_info,
+ db_utf8, uint(db_utf8_size), &errors);
+
+ /* convert each # to @0023 in table name and store the result in buf */
+ const char* table = dict_remove_db_name(db_and_table);
+ const char* table_p;
+ char buf[MAX_TABLE_NAME_LEN * 5 + 1];
+ char* buf_p;
+ for (table_p = table, buf_p = buf; table_p[0] != '\0'; table_p++) {
+ if (table_p[0] != '#') {
+ buf_p[0] = table_p[0];
+ buf_p++;
+ } else {
+ buf_p[0] = '@';
+ buf_p[1] = '0';
+ buf_p[2] = '0';
+ buf_p[3] = '2';
+ buf_p[4] = '3';
+ buf_p += 5;
+ }
+ ut_a((size_t) (buf_p - buf) < sizeof(buf));
+ }
+ buf_p[0] = '\0';
+
+ errors = 0;
+ strconvert(
+ &my_charset_filename, buf, (uint) (buf_p - buf),
+ system_charset_info,
+ table_utf8, uint(table_utf8_size),
+ &errors);
+
+ if (errors != 0) {
+ snprintf(table_utf8, table_utf8_size, "%s%s",
+ srv_mysql50_table_name_prefix, table);
+ }
+}
+
+/** Resize the hash tables based on the current buffer pool size. */
+void dict_sys_t::resize()
+{
+ ut_ad(this == &dict_sys);
+ ut_ad(is_initialised());
+ mutex_enter(&mutex);
+
+ /* all table entries are in table_LRU and table_non_LRU lists */
+ table_hash.free();
+ table_id_hash.free();
+ temp_id_hash.free();
+
+ const ulint hash_size = buf_pool_get_curr_size()
+ / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE);
+ table_hash.create(hash_size);
+ table_id_hash.create(hash_size);
+ temp_id_hash.create(hash_size);
+
+ for (dict_table_t *table= UT_LIST_GET_FIRST(table_LRU); table;
+ table= UT_LIST_GET_NEXT(table_LRU, table))
+ {
+ ut_ad(!table->is_temporary());
+ ulint fold= ut_fold_string(table->name.m_name);
+ ulint id_fold= ut_fold_ull(table->id);
+
+ HASH_INSERT(dict_table_t, name_hash, &table_hash, fold, table);
+ HASH_INSERT(dict_table_t, id_hash, &table_id_hash, id_fold, table);
+ }
+
+ for (dict_table_t *table = UT_LIST_GET_FIRST(table_non_LRU); table;
+ table= UT_LIST_GET_NEXT(table_LRU, table))
+ {
+ ulint fold= ut_fold_string(table->name.m_name);
+ ulint id_fold= ut_fold_ull(table->id);
+
+ HASH_INSERT(dict_table_t, name_hash, &table_hash, fold, table);
+
+ hash_table_t *id_hash= table->is_temporary()
+ ? &temp_id_hash : &table_id_hash;
+
+ HASH_INSERT(dict_table_t, id_hash, id_hash, id_fold, table);
+ }
+
+ mutex_exit(&mutex);
+}
+
+/** Close the data dictionary cache on shutdown. */
+void dict_sys_t::close()
+{
+ ut_ad(this == &dict_sys);
+ if (!is_initialised()) return;
+
+ mutex_enter(&mutex);
+
+ /* Free the hash elements. We don't remove them from the table
+ because we are going to destroy the table anyway. */
+ for (ulint i= table_hash.n_cells; i--; )
+ while (dict_table_t *table= static_cast<dict_table_t*>
+ (HASH_GET_FIRST(&table_hash, i)))
+ dict_sys.remove(table);
+
+ table_hash.free();
+
+ /* table_id_hash contains the same elements as in table_hash,
+ therefore we don't delete the individual elements. */
+ table_id_hash.free();
+
+ /* No temporary tables should exist at this point. */
+ temp_id_hash.free();
+
+ mutex_exit(&mutex);
+ mutex_free(&mutex);
+ rw_lock_free(&latch);
+
+ mutex_free(&dict_foreign_err_mutex);
+
+ if (dict_foreign_err_file)
+ {
+ my_fclose(dict_foreign_err_file, MYF(MY_WME));
+ dict_foreign_err_file = NULL;
+ }
+
+ m_initialised= false;
+}
+
+#ifdef UNIV_DEBUG
+/**********************************************************************//**
+Validate the dictionary table LRU list.
+@return TRUE if valid */
+static
+ibool
+dict_lru_validate(void)
+/*===================*/
+{
+ dict_table_t* table;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ for (table = UT_LIST_GET_FIRST(dict_sys.table_LRU);
+ table != NULL;
+ table = UT_LIST_GET_NEXT(table_LRU, table)) {
+
+ ut_a(table->can_be_evicted);
+ }
+
+ for (table = UT_LIST_GET_FIRST(dict_sys.table_non_LRU);
+ table != NULL;
+ table = UT_LIST_GET_NEXT(table_LRU, table)) {
+
+ ut_a(!table->can_be_evicted);
+ }
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Check an index to see whether its first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return true if the index qualifies, otherwise false */
+bool
+dict_foreign_qualify_index(
+/*=======================*/
+ const dict_table_t* table, /*!< in: table */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use table->col_names */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ const dict_index_t* index, /*!< in: index to check */
+ const dict_index_t* types_idx,
+ /*!< in: NULL or an index
+ whose types the column types
+ must match */
+ bool check_charsets,
+ /*!< in: whether to check
+ charsets. only has an effect
+ if types_idx != NULL */
+ ulint check_null,
+ /*!< in: nonzero if none of
+ the columns must be declared
+ NOT NULL */
+ fkerr_t* error, /*!< out: error code */
+ ulint* err_col_no,
+ /*!< out: column number where
+ error happened */
+ dict_index_t** err_index)
+ /*!< out: index where error
+ happened */
+{
+ if (dict_index_get_n_fields(index) < n_cols) {
+ return(false);
+ }
+
+ if (index->type & (DICT_SPATIAL | DICT_FTS | DICT_CORRUPT)) {
+ return false;
+ }
+
+ if (index->online_status >= ONLINE_INDEX_ABORTED) {
+ return false;
+ }
+
+ for (ulint i = 0; i < n_cols; i++) {
+ dict_field_t* field;
+ const char* col_name;
+ ulint col_no;
+
+ field = dict_index_get_nth_field(index, i);
+ col_no = dict_col_get_no(field->col);
+
+ if (field->prefix_len != 0) {
+ /* We do not accept column prefix
+ indexes here */
+ if (error && err_col_no && err_index) {
+ *error = FK_IS_PREFIX_INDEX;
+ *err_col_no = i;
+ *err_index = (dict_index_t*)index;
+ }
+ return(false);
+ }
+
+ if (check_null
+ && (field->col->prtype & DATA_NOT_NULL)) {
+ if (error && err_col_no && err_index) {
+ *error = FK_COL_NOT_NULL;
+ *err_col_no = i;
+ *err_index = (dict_index_t*)index;
+ }
+ return(false);
+ }
+
+ if (field->col->is_virtual()) {
+ col_name = "";
+ for (ulint j = 0; j < table->n_v_def; j++) {
+ col_name = dict_table_get_v_col_name(table, j);
+ if (innobase_strcasecmp(field->name,col_name) == 0) {
+ break;
+ }
+ }
+ } else {
+ col_name = col_names
+ ? col_names[col_no]
+ : dict_table_get_col_name(table, col_no);
+ }
+
+ if (0 != innobase_strcasecmp(columns[i], col_name)) {
+ return(false);
+ }
+
+ if (types_idx && !cmp_cols_are_equal(
+ dict_index_get_nth_col(index, i),
+ dict_index_get_nth_col(types_idx, i),
+ check_charsets)) {
+ if (error && err_col_no && err_index) {
+ *error = FK_COLS_NOT_EQUAL;
+ *err_col_no = i;
+ *err_index = (dict_index_t*)index;
+ }
+
+ return(false);
+ }
+ }
+
+ return(true);
+}
+
+/*********************************************************************//**
+Update the state of compression failure padding heuristics. This is
+called whenever a compression operation succeeds or fails.
+The caller must be holding info->mutex */
+static
+void
+dict_index_zip_pad_update(
+/*======================*/
+ zip_pad_info_t* info, /*<! in/out: info to be updated */
+ ulint zip_threshold) /*<! in: zip threshold value */
+{
+ ulint total;
+ ulint fail_pct;
+
+ ut_ad(info);
+ ut_ad(info->pad % ZIP_PAD_INCR == 0);
+
+ total = info->success + info->failure;
+
+ ut_ad(total > 0);
+
+ if (zip_threshold == 0) {
+ /* User has just disabled the padding. */
+ return;
+ }
+
+ if (total < ZIP_PAD_ROUND_LEN) {
+ /* We are in middle of a round. Do nothing. */
+ return;
+ }
+
+ /* We are at a 'round' boundary. Reset the values but first
+ calculate fail rate for our heuristic. */
+ fail_pct = (info->failure * 100) / total;
+ info->failure = 0;
+ info->success = 0;
+
+ if (fail_pct > zip_threshold) {
+ /* Compression failures are more then user defined
+ threshold. Increase the pad size to reduce chances of
+ compression failures.
+
+ Only do increment if it won't increase padding
+ beyond max pad size. */
+ if (info->pad + ZIP_PAD_INCR
+ < (srv_page_size * zip_pad_max) / 100) {
+ info->pad.fetch_add(ZIP_PAD_INCR);
+
+ MONITOR_INC(MONITOR_PAD_INCREMENTS);
+ }
+
+ info->n_rounds = 0;
+
+ } else {
+ /* Failure rate was OK. Another successful round
+ completed. */
+ ++info->n_rounds;
+
+ /* If enough successful rounds are completed with
+ compression failure rate in control, decrease the
+ padding. */
+ if (info->n_rounds >= ZIP_PAD_SUCCESSFUL_ROUND_LIMIT
+ && info->pad > 0) {
+ info->pad.fetch_sub(ZIP_PAD_INCR);
+
+ info->n_rounds = 0;
+
+ MONITOR_INC(MONITOR_PAD_DECREMENTS);
+ }
+ }
+}
+
+/*********************************************************************//**
+This function should be called whenever a page is successfully
+compressed. Updates the compression padding information. */
+void
+dict_index_zip_success(
+/*===================*/
+ dict_index_t* index) /*!< in/out: index to be updated. */
+{
+ ulint zip_threshold = zip_failure_threshold_pct;
+ if (!zip_threshold) {
+ /* Disabled by user. */
+ return;
+ }
+
+ index->zip_pad.mutex.lock();
+ ++index->zip_pad.success;
+ dict_index_zip_pad_update(&index->zip_pad, zip_threshold);
+ index->zip_pad.mutex.unlock();
+}
+
+/*********************************************************************//**
+This function should be called whenever a page compression attempt
+fails. Updates the compression padding information. */
+void
+dict_index_zip_failure(
+/*===================*/
+ dict_index_t* index) /*!< in/out: index to be updated. */
+{
+ ulint zip_threshold = zip_failure_threshold_pct;
+ if (!zip_threshold) {
+ /* Disabled by user. */
+ return;
+ }
+
+ index->zip_pad.mutex.lock();
+ ++index->zip_pad.failure;
+ dict_index_zip_pad_update(&index->zip_pad, zip_threshold);
+ index->zip_pad.mutex.unlock();
+}
+
+/*********************************************************************//**
+Return the optimal page size, for which page will likely compress.
+@return page size beyond which page might not compress */
+ulint
+dict_index_zip_pad_optimal_page_size(
+/*=================================*/
+ dict_index_t* index) /*!< in: index for which page size
+ is requested */
+{
+ ulint pad;
+ ulint min_sz;
+ ulint sz;
+
+ if (!zip_failure_threshold_pct) {
+ /* Disabled by user. */
+ return(srv_page_size);
+ }
+
+ pad = index->zip_pad.pad;
+
+ ut_ad(pad < srv_page_size);
+ sz = srv_page_size - pad;
+
+ /* Min size allowed by user. */
+ ut_ad(zip_pad_max < 100);
+ min_sz = (srv_page_size * (100 - zip_pad_max)) / 100;
+
+ return(ut_max(sz, min_sz));
+}
+
+/*************************************************************//**
+Convert table flag to row format string.
+@return row format name. */
+const char*
+dict_tf_to_row_format_string(
+/*=========================*/
+ ulint table_flag) /*!< in: row format setting */
+{
+ switch (dict_tf_get_rec_format(table_flag)) {
+ case REC_FORMAT_REDUNDANT:
+ return("ROW_TYPE_REDUNDANT");
+ case REC_FORMAT_COMPACT:
+ return("ROW_TYPE_COMPACT");
+ case REC_FORMAT_COMPRESSED:
+ return("ROW_TYPE_COMPRESSED");
+ case REC_FORMAT_DYNAMIC:
+ return("ROW_TYPE_DYNAMIC");
+ }
+
+ ut_error;
+ return(0);
+}
+
+bool dict_table_t::is_stats_table() const
+{
+ return !strcmp(name.m_name, TABLE_STATS_NAME) ||
+ !strcmp(name.m_name, INDEX_STATS_NAME);
+}
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
new file mode 100644
index 00000000..34b04eb3
--- /dev/null
+++ b/storage/innobase/dict/dict0load.cc
@@ -0,0 +1,3687 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0load.cc
+Loads to the memory cache database object definitions
+from dictionary tables
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0load.h"
+
+#include "mysql_version.h"
+#include "btr0pcur.h"
+#include "btr0btr.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "dict0dict.h"
+#include "dict0mem.h"
+#include "dict0priv.h"
+#include "dict0stats.h"
+#include "fsp0file.h"
+#include "fts0priv.h"
+#include "mach0data.h"
+#include "page0page.h"
+#include "rem0cmp.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "fts0opt.h"
+
+/** Following are the InnoDB system tables. The positions in
+this array are referenced by enum dict_system_table_id. */
+static const char* SYSTEM_TABLE_NAME[] = {
+ "SYS_TABLES",
+ "SYS_INDEXES",
+ "SYS_COLUMNS",
+ "SYS_FIELDS",
+ "SYS_FOREIGN",
+ "SYS_FOREIGN_COLS",
+ "SYS_TABLESPACES",
+ "SYS_DATAFILES",
+ "SYS_VIRTUAL"
+};
+
+/** Loads a table definition and also all its index definitions.
+
+Loads those foreign key constraints whose referenced table is already in
+dictionary cache. If a foreign key constraint is not loaded, then the
+referenced table is pushed into the output stack (fk_tables), if it is not
+NULL. These tables must be subsequently loaded so that all the foreign
+key constraints are loaded into memory.
+
+@param[in] name Table name in the db/tablename format
+@param[in] ignore_err Error to be ignored when loading table
+ and its index definition
+@param[out] fk_tables Related table names that must also be
+ loaded to ensure that all foreign key
+ constraints are loaded.
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the
+file_unreadable flag in the table object we return */
+static
+dict_table_t*
+dict_load_table_one(
+ const table_name_t& name,
+ dict_err_ignore_t ignore_err,
+ dict_names_t& fk_tables);
+
+/** Load a table definition from a SYS_TABLES record to dict_table_t.
+Do not load any columns or indexes.
+@param[in] name Table name
+@param[in] rec SYS_TABLES record
+@param[out,own] table table, or NULL
+@return error message
+@retval NULL on success */
+static const char* dict_load_table_low(const table_name_t& name,
+ const rec_t* rec, dict_table_t** table)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/** Load an index definition from a SYS_INDEXES record to dict_index_t.
+If allocate=TRUE, we will create a dict_index_t structure and fill it
+accordingly. If allocated=FALSE, the dict_index_t will be supplied by
+the caller and filled with information read from the record.
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_index_low(
+ byte* table_id, /*!< in/out: table id (8 bytes),
+ an "in" value if allocate=TRUE
+ and "out" when allocate=FALSE */
+ mem_heap_t* heap, /*!< in/out: temporary memory heap */
+ const rec_t* rec, /*!< in: SYS_INDEXES record */
+ ibool allocate, /*!< in: TRUE=allocate *index,
+ FALSE=fill in a pre-allocated
+ *index */
+ dict_index_t** index); /*!< out,own: index, or NULL */
+
+/** Load a table column definition from a SYS_COLUMNS record to dict_table_t.
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_column_low(
+ dict_table_t* table, /*!< in/out: table, could be NULL
+ if we just populate a dict_column_t
+ struct with information from
+ a SYS_COLUMNS record */
+ mem_heap_t* heap, /*!< in/out: memory heap
+ for temporary storage */
+ dict_col_t* column, /*!< out: dict_column_t to fill,
+ or NULL if table != NULL */
+ table_id_t* table_id, /*!< out: table id */
+ const char** col_name, /*!< out: column name */
+ const rec_t* rec, /*!< in: SYS_COLUMNS record */
+ ulint* nth_v_col); /*!< out: if not NULL, this
+ records the "n" of "nth" virtual
+ column */
+
+/** Load a virtual column "mapping" (to base columns) information
+from a SYS_VIRTUAL record
+@param[in,out] table table
+@param[in,out] column mapped base column's dict_column_t
+@param[in,out] table_id table id
+@param[in,out] pos virtual column position
+@param[in,out] base_pos base column position
+@param[in] rec SYS_VIRTUAL record
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_virtual_low(
+ dict_table_t* table,
+ dict_col_t** column,
+ table_id_t* table_id,
+ ulint* pos,
+ ulint* base_pos,
+ const rec_t* rec);
+
+/** Load an index field definition from a SYS_FIELDS record to dict_index_t.
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_field_low(
+ byte* index_id, /*!< in/out: index id (8 bytes)
+ an "in" value if index != NULL
+ and "out" if index == NULL */
+ dict_index_t* index, /*!< in/out: index, could be NULL
+ if we just populate a dict_field_t
+ struct with information from
+ a SYS_FIELDS record */
+ dict_field_t* sys_field, /*!< out: dict_field_t to be
+ filled */
+ ulint* pos, /*!< out: Field position */
+ byte* last_index_id, /*!< in: last index id */
+ mem_heap_t* heap, /*!< in/out: memory heap
+ for temporary storage */
+ const rec_t* rec); /*!< in: SYS_FIELDS record */
+
+/* If this flag is TRUE, then we will load the cluster index's (and tables')
+metadata even if it is marked as "corrupted". */
+my_bool srv_load_corrupted;
+
+#ifdef UNIV_DEBUG
+/****************************************************************//**
+Compare the name of an index column.
+@return TRUE if the i'th column of index is 'name'. */
+static
+ibool
+name_of_col_is(
+/*===========*/
+ const dict_table_t* table, /*!< in: table */
+ const dict_index_t* index, /*!< in: index */
+ ulint i, /*!< in: index field offset */
+ const char* name) /*!< in: name to compare to */
+{
+ ulint tmp = dict_col_get_no(dict_field_get_col(
+ dict_index_get_nth_field(
+ index, i)));
+
+ return(strcmp(name, dict_table_get_col_name(table, tmp)) == 0);
+}
+#endif /* UNIV_DEBUG */
+
+/********************************************************************//**
+Finds the first table name in the given database.
+@return own: table name, NULL if does not exist; the caller must free
+the memory in the string! */
+char*
+dict_get_first_table_name_in_db(
+/*============================*/
+ const char* name) /*!< in: database name which ends in '/' */
+{
+ dict_table_t* sys_tables;
+ btr_pcur_t pcur;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ dfield_t* dfield;
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ heap = mem_heap_create(1000);
+
+ mtr_start(&mtr);
+
+ sys_tables = dict_table_get_low("SYS_TABLES");
+ sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
+ ut_ad(!dict_table_is_comp(sys_tables));
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(dfield, name, strlen(name));
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+loop:
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ /* Not found */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__NAME, &len);
+
+ if (len < strlen(name)
+ || memcmp(name, field, strlen(name))) {
+ /* Not found */
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(NULL);
+ }
+
+ if (!rec_get_deleted_flag(rec, 0)) {
+
+ /* We found one */
+
+ char* table_name = mem_strdupl((char*) field, len);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(table_name);
+ }
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ goto loop;
+}
+
+/********************************************************************//**
+This function gets the next system table record as it scans the table.
+@return the next record if found, NULL if end of scan */
+static
+const rec_t*
+dict_getnext_system_low(
+/*====================*/
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor to the
+ record*/
+ mtr_t* mtr) /*!< in: the mini-transaction */
+{
+ rec_t* rec = NULL;
+
+ while (!rec || rec_get_deleted_flag(rec, 0)) {
+ btr_pcur_move_to_next_user_rec(pcur, mtr);
+
+ rec = btr_pcur_get_rec(pcur);
+
+ if (!btr_pcur_is_on_user_rec(pcur)) {
+ /* end of index */
+ btr_pcur_close(pcur);
+
+ return(NULL);
+ }
+ }
+
+ /* Get a record, let's save the position */
+ btr_pcur_store_position(pcur, mtr);
+
+ return(rec);
+}
+
+/********************************************************************//**
+This function opens a system table, and returns the first record.
+@return first record of the system table */
+const rec_t*
+dict_startscan_system(
+/*==================*/
+ btr_pcur_t* pcur, /*!< out: persistent cursor to
+ the record */
+ mtr_t* mtr, /*!< in: the mini-transaction */
+ dict_system_id_t system_id) /*!< in: which system table to open */
+{
+ dict_table_t* system_table;
+ dict_index_t* clust_index;
+ const rec_t* rec;
+
+ ut_a(system_id < SYS_NUM_SYSTEM_TABLES);
+
+ system_table = dict_table_get_low(SYSTEM_TABLE_NAME[system_id]);
+
+ clust_index = UT_LIST_GET_FIRST(system_table->indexes);
+
+ btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, pcur,
+ true, 0, mtr);
+
+ rec = dict_getnext_system_low(pcur, mtr);
+
+ return(rec);
+}
+
+/********************************************************************//**
+This function gets the next system table record as it scans the table.
+@return the next record if found, NULL if end of scan */
+const rec_t*
+dict_getnext_system(
+/*================*/
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor
+ to the record */
+ mtr_t* mtr) /*!< in: the mini-transaction */
+{
+ const rec_t* rec;
+
+ /* Restore the position */
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
+
+ /* Get the next record */
+ rec = dict_getnext_system_low(pcur, mtr);
+
+ return(rec);
+}
+
+/********************************************************************//**
+This function processes one SYS_TABLES record and populate the dict_table_t
+struct for the table.
+@return error message, or NULL on success */
+const char*
+dict_process_sys_tables_rec_and_mtr_commit(
+/*=======================================*/
+ mem_heap_t* heap, /*!< in/out: temporary memory heap */
+ const rec_t* rec, /*!< in: SYS_TABLES record */
+ dict_table_t** table, /*!< out: dict_table_t to fill */
+ bool cached, /*!< in: whether to load from cache */
+ mtr_t* mtr) /*!< in/out: mini-transaction,
+ will be committed */
+{
+ ulint len;
+ const char* field;
+
+ field = (const char*) rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__NAME, &len);
+
+ ut_a(!rec_get_deleted_flag(rec, 0));
+
+ ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_S_FIX));
+
+ /* Get the table name */
+ table_name_t table_name(mem_heap_strdupl(heap, field, len));
+
+ if (cached) {
+ /* Commit before load the table again */
+ mtr_commit(mtr);
+
+ *table = dict_table_get_low(table_name.m_name);
+ return *table ? NULL : "Table not found in cache";
+ } else {
+ const char* err = dict_load_table_low(table_name, rec, table);
+ mtr_commit(mtr);
+ return err;
+ }
+}
+
+/********************************************************************//**
+This function parses a SYS_INDEXES record and populate a dict_index_t
+structure with the information from the record. For detail information
+about SYS_INDEXES fields, please refer to dict_boot() function.
+@return error message, or NULL on success */
+const char*
+dict_process_sys_indexes_rec(
+/*=========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_INDEXES rec */
+ dict_index_t* index, /*!< out: index to be filled */
+ table_id_t* table_id) /*!< out: index table id */
+{
+ const char* err_msg;
+ byte* buf;
+
+ ut_d(index->is_dummy = true);
+ ut_d(index->in_instant_init = false);
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+
+ /* Parse the record, and get "dict_index_t" struct filled */
+ err_msg = dict_load_index_low(buf, heap, rec, FALSE, &index);
+
+ *table_id = mach_read_from_8(buf);
+
+ return(err_msg);
+}
+
+/********************************************************************//**
+This function parses a SYS_COLUMNS record and populate a dict_column_t
+structure with the information from the record.
+@return error message, or NULL on success */
+const char*
+dict_process_sys_columns_rec(
+/*=========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_COLUMNS rec */
+ dict_col_t* column, /*!< out: dict_col_t to be filled */
+ table_id_t* table_id, /*!< out: table id */
+ const char** col_name, /*!< out: column name */
+ ulint* nth_v_col) /*!< out: if virtual col, this is
+ record's sequence number */
+{
+ const char* err_msg;
+
+ /* Parse the record, and get "dict_col_t" struct filled */
+ err_msg = dict_load_column_low(NULL, heap, column,
+ table_id, col_name, rec, nth_v_col);
+
+ return(err_msg);
+}
+
+/** This function parses a SYS_VIRTUAL record and extracts virtual column
+information
+@param[in] rec current SYS_COLUMNS rec
+@param[in,out] table_id table id
+@param[in,out] pos virtual column position
+@param[in,out] base_pos base column position
+@return error message, or NULL on success */
+const char*
+dict_process_sys_virtual_rec(
+ const rec_t* rec,
+ table_id_t* table_id,
+ ulint* pos,
+ ulint* base_pos)
+{
+ const char* err_msg;
+
+ /* Parse the record, and get "dict_col_t" struct filled */
+ err_msg = dict_load_virtual_low(NULL, NULL, table_id,
+ pos, base_pos, rec);
+
+ return(err_msg);
+}
+
+/********************************************************************//**
+This function parses a SYS_FIELDS record and populates a dict_field_t
+structure with the information from the record.
+@return error message, or NULL on success */
+const char*
+dict_process_sys_fields_rec(
+/*========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_FIELDS rec */
+ dict_field_t* sys_field, /*!< out: dict_field_t to be
+ filled */
+ ulint* pos, /*!< out: Field position */
+ index_id_t* index_id, /*!< out: current index id */
+ index_id_t last_id) /*!< in: previous index id */
+{
+ byte* buf;
+ byte* last_index_id;
+ const char* err_msg;
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+
+ last_index_id = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(last_index_id, last_id);
+
+ err_msg = dict_load_field_low(buf, NULL, sys_field,
+ pos, last_index_id, heap, rec);
+
+ *index_id = mach_read_from_8(buf);
+
+ return(err_msg);
+
+}
+
+/********************************************************************//**
+This function parses a SYS_FOREIGN record and populate a dict_foreign_t
+structure with the information from the record. For detail information
+about SYS_FOREIGN fields, please refer to dict_load_foreign() function.
+@return error message, or NULL on success */
+const char*
+dict_process_sys_foreign_rec(
+/*=========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_FOREIGN rec */
+ dict_foreign_t* foreign) /*!< out: dict_foreign_t struct
+ to be filled */
+{
+ ulint len;
+ const byte* field;
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return("delete-marked record in SYS_FOREIGN");
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FOREIGN) {
+ return("wrong number of columns in SYS_FOREIGN record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN__ID, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+err_len:
+ return("incorrect column length in SYS_FOREIGN");
+ }
+
+ /* This receives a dict_foreign_t* that points to a stack variable.
+ So dict_foreign_free(foreign) is not used as elsewhere.
+ Since the heap used here is freed elsewhere, foreign->heap
+ is not assigned. */
+ foreign->id = mem_heap_strdupl(heap, (const char*) field, len);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_FOREIGN__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ /* The _lookup versions of the referenced and foreign table names
+ are not assigned since they are not used in this dict_foreign_t */
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ foreign->foreign_table_name = mem_heap_strdupl(
+ heap, (const char*) field, len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ foreign->referenced_table_name = mem_heap_strdupl(
+ heap, (const char*) field, len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN__N_COLS, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+ uint32_t n_fields_and_type = mach_read_from_4(field);
+
+ foreign->type = n_fields_and_type >> 24 & ((1U << 6) - 1);
+ foreign->n_fields = n_fields_and_type & dict_index_t::MAX_N_FIELDS;
+
+ return(NULL);
+}
+
+/********************************************************************//**
+This function parses a SYS_FOREIGN_COLS record and extract necessary
+information from the record and return to caller.
+@return error message, or NULL on success */
+const char*
+dict_process_sys_foreign_col_rec(
+/*=============================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_FOREIGN_COLS rec */
+ const char** name, /*!< out: foreign key constraint name */
+ const char** for_col_name, /*!< out: referencing column name */
+ const char** ref_col_name, /*!< out: referenced column name
+ in referenced table */
+ ulint* pos) /*!< out: column position */
+{
+ ulint len;
+ const byte* field;
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return("delete-marked record in SYS_FOREIGN_COLS");
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FOREIGN_COLS) {
+ return("wrong number of columns in SYS_FOREIGN_COLS record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+err_len:
+ return("incorrect column length in SYS_FOREIGN_COLS");
+ }
+ *name = mem_heap_strdupl(heap, (char*) field, len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+ *pos = mach_read_from_4(field);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ *for_col_name = mem_heap_strdupl(heap, (char*) field, len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ *ref_col_name = mem_heap_strdupl(heap, (char*) field, len);
+
+ return(NULL);
+}
+
+/********************************************************************//**
+This function parses a SYS_TABLESPACES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+const char*
+dict_process_sys_tablespaces(
+/*=========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_TABLESPACES rec */
+ uint32_t* space, /*!< out: tablespace identifier */
+ const char** name, /*!< out: tablespace name */
+ ulint* flags) /*!< out: tablespace flags */
+{
+ ulint len;
+ const byte* field;
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return("delete-marked record in SYS_TABLESPACES");
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLESPACES) {
+ return("wrong number of columns in SYS_TABLESPACES record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
+ if (len != DICT_FLD_LEN_SPACE) {
+err_len:
+ return("incorrect column length in SYS_TABLESPACES");
+ }
+ *space = mach_read_from_4(field);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLESPACES__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ *name = mem_heap_strdupl(heap, (char*) field, len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__FLAGS, &len);
+ if (len != DICT_FLD_LEN_FLAGS) {
+ goto err_len;
+ }
+ *flags = mach_read_from_4(field);
+
+ return(NULL);
+}
+
+/********************************************************************//**
+This function parses a SYS_DATAFILES record, extracts necessary
+information from the record and returns it to the caller.
+@return error message, or NULL on success */
+const char*
+dict_process_sys_datafiles(
+/*=======================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_DATAFILES rec */
+ uint32_t* space, /*!< out: space id */
+ const char** path) /*!< out: datafile paths */
+{
+ ulint len;
+ const byte* field;
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return("delete-marked record in SYS_DATAFILES");
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_DATAFILES) {
+ return("wrong number of columns in SYS_DATAFILES record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_DATAFILES__SPACE, &len);
+ if (len != DICT_FLD_LEN_SPACE) {
+err_len:
+ return("incorrect column length in SYS_DATAFILES");
+ }
+ *space = mach_read_from_4(field);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_DATAFILES__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ *path = mem_heap_strdupl(heap, (char*) field, len);
+
+ return(NULL);
+}
+
+/** Get the first filepath from SYS_DATAFILES for a given space_id.
+@param[in] space_id Tablespace ID
+@return First filepath (caller must invoke ut_free() on it)
+@retval NULL if no SYS_DATAFILES entry was found. */
+static char*
+dict_get_first_path(
+ ulint space_id)
+{
+ mtr_t mtr;
+ dict_table_t* sys_datafiles;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ byte* buf;
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ char* filepath = NULL;
+ mem_heap_t* heap = mem_heap_create(1024);
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ mtr_start(&mtr);
+
+ sys_datafiles = dict_table_get_low("SYS_DATAFILES");
+ sys_index = UT_LIST_GET_FIRST(sys_datafiles->indexes);
+
+ ut_ad(!dict_table_is_comp(sys_datafiles));
+ ut_ad(name_of_col_is(sys_datafiles, sys_index,
+ DICT_FLD__SYS_DATAFILES__SPACE, "SPACE"));
+ ut_ad(name_of_col_is(sys_datafiles, sys_index,
+ DICT_FLD__SYS_DATAFILES__PATH, "PATH"));
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, DICT_FLD__SYS_DATAFILES__SPACE);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(buf, space_id);
+
+ dfield_set_data(dfield, buf, 4);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ /* Get the filepath from this SYS_DATAFILES record. */
+ if (btr_pcur_is_on_user_rec(&pcur)) {
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_DATAFILES__SPACE, &len);
+ ut_a(len == 4);
+
+ if (space_id == mach_read_from_4(field)) {
+ /* A record for this space ID was found. */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
+
+ ut_ad(len > 0);
+ ut_ad(len < OS_FILE_MAX_PATH);
+
+ if (len > 0 && len < UNIV_SQL_NULL) {
+ filepath = mem_strdupl(
+ reinterpret_cast<const char*>(field),
+ len);
+ ut_ad(filepath != NULL);
+
+ /* The dictionary may have been written on
+ another OS. */
+ os_normalize_path(filepath);
+ }
+ }
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(filepath);
+}
+
+/** Update the record for space_id in SYS_TABLESPACES to this filepath.
+@param[in] space_id Tablespace ID
+@param[in] filepath Tablespace filepath
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
+dberr_t
+dict_update_filepath(
+ ulint space_id,
+ const char* filepath)
+{
+ if (!srv_sys_tablespaces_open) {
+ /* Startup procedure is not yet ready for updates. */
+ return(DB_SUCCESS);
+ }
+
+ dberr_t err = DB_SUCCESS;
+ trx_t* trx;
+
+ ut_d(dict_sys.assert_locked());
+
+ trx = trx_create();
+ trx->op_info = "update filepath";
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "space", space_id);
+ pars_info_add_str_literal(info, "path", filepath);
+
+ err = que_eval_sql(info,
+ "PROCEDURE UPDATE_FILEPATH () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_DATAFILES"
+ " SET PATH = :path\n"
+ " WHERE SPACE = :space;\n"
+ "END;\n", FALSE, trx);
+
+ trx_commit_for_mysql(trx);
+ trx->dict_operation_lock_mode = 0;
+ trx->free();
+
+ if (UNIV_LIKELY(err == DB_SUCCESS)) {
+ /* We just updated SYS_DATAFILES due to the contents in
+ a link file. Make a note that we did this. */
+ ib::info() << "The InnoDB data dictionary table SYS_DATAFILES"
+ " for tablespace ID " << space_id
+ << " was updated to use file " << filepath << ".";
+ } else {
+ ib::warn() << "Error occurred while updating InnoDB data"
+ " dictionary table SYS_DATAFILES for tablespace ID "
+ << space_id << " to file " << filepath << ": "
+ << err << ".";
+ }
+
+ return(err);
+}
+
+/** Replace records in SYS_TABLESPACES and SYS_DATAFILES associated with
+the given space_id using an independent transaction.
+@param[in] space_id Tablespace ID
+@param[in] name Tablespace name
+@param[in] filepath First filepath
+@param[in] fsp_flags Tablespace flags
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
+dberr_t
+dict_replace_tablespace_and_filepath(
+ ulint space_id,
+ const char* name,
+ const char* filepath,
+ ulint fsp_flags)
+{
+ if (!srv_sys_tablespaces_open) {
+ /* Startup procedure is not yet ready for updates.
+ Return success since this will likely get updated
+ later. */
+ return(DB_SUCCESS);
+ }
+
+ dberr_t err = DB_SUCCESS;
+ trx_t* trx;
+
+ DBUG_EXECUTE_IF("innodb_fail_to_update_tablespace_dict",
+ return(DB_INTERRUPTED););
+
+ ut_d(dict_sys.assert_locked());
+ ut_ad(filepath);
+
+ trx = trx_create();
+ trx->op_info = "insert tablespace and filepath";
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ /* A record for this space ID was not found in
+ SYS_DATAFILES. Assume the record is also missing in
+ SYS_TABLESPACES. Insert records into them both. */
+ err = dict_replace_tablespace_in_dictionary(
+ space_id, name, fsp_flags, filepath, trx);
+
+ trx_commit_for_mysql(trx);
+ trx->dict_operation_lock_mode = 0;
+ trx->free();
+
+ return(err);
+}
+
+/** Check the validity of a SYS_TABLES record
+Make sure the fields are the right length and that they
+do not contain invalid contents.
+@param[in] rec SYS_TABLES record
+@return error message, or NULL on success */
+static
+const char*
+dict_sys_tables_rec_check(
+ const rec_t* rec)
+{
+ const byte* field;
+ ulint len;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return("delete-marked record in SYS_TABLES");
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) {
+ return("wrong number of columns in SYS_TABLES record");
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+err_len:
+ return("incorrect column length in SYS_TABLES");
+ }
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__ID, &len);
+ if (len != 8) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
+ if (field == NULL || len != 4) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__TYPE, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__MIX_ID, &len);
+ if (len != 8) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
+ if (field == NULL || len != 4) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLES__CLUSTER_ID, &len);
+ if (len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__SPACE, &len);
+ if (field == NULL || len != 4) {
+ goto err_len;
+ }
+
+ return(NULL);
+}
+
+/** Read and return the contents of a SYS_TABLESPACES record.
+@param[in] rec A record of SYS_TABLESPACES
+@param[out] id Pointer to the space_id for this table
+@param[in,out] name Buffer for Tablespace Name of length NAME_LEN
+@param[out] flags Pointer to tablespace flags
+@return true if the record was read correctly, false if not. */
+bool
+dict_sys_tablespaces_rec_read(
+ const rec_t* rec,
+ ulint* id,
+ char* name,
+ ulint* flags)
+{
+ const byte* field;
+ ulint len;
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
+ if (len != DICT_FLD_LEN_SPACE) {
+ ib::error() << "Wrong field length in SYS_TABLESPACES.SPACE: "
+ << len;
+ return(false);
+ }
+ *id = mach_read_from_4(field);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ ib::error() << "Wrong field length in SYS_TABLESPACES.NAME: "
+ << len;
+ return(false);
+ }
+ strncpy(name, reinterpret_cast<const char*>(field), NAME_LEN);
+
+ /* read the 4 byte flags from the TYPE field */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__FLAGS, &len);
+ if (len != 4) {
+ ib::error() << "Wrong field length in SYS_TABLESPACES.FLAGS: "
+ << len;
+ return(false);
+ }
+ *flags = mach_read_from_4(field);
+
+ return(true);
+}
+
+/** Check if SYS_TABLES.TYPE is valid
+@param[in] type SYS_TABLES.TYPE
+@param[in] not_redundant whether ROW_FORMAT=REDUNDANT is not used
+@return whether the SYS_TABLES.TYPE value is valid */
+static
+bool
+dict_sys_tables_type_valid(ulint type, bool not_redundant)
+{
+ /* The DATA_DIRECTORY flag can be assigned fully independently
+ of all other persistent table flags. */
+ type &= ~DICT_TF_MASK_DATA_DIR;
+
+ if (type == 1) {
+ return(true); /* ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT */
+ }
+
+ if (!(type & 1)) {
+ /* For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT,
+ SYS_TABLES.TYPE=1. Else, it is the same as
+ dict_table_t::flags, and the least significant bit
+ would be set. So, the bit never can be 0. */
+ return(false);
+ }
+
+ if (!not_redundant) {
+ /* SYS_TABLES.TYPE must be 1 or 1|DICT_TF_MASK_NO_ROLLBACK
+ for ROW_FORMAT=REDUNDANT. */
+ return !(type & ~(1U | DICT_TF_MASK_NO_ROLLBACK));
+ }
+
+ if (type >= 1U << DICT_TF_POS_UNUSED) {
+ /* Some unknown bits are set. */
+ return(false);
+ }
+
+ return(dict_tf_is_valid_not_redundant(type));
+}
+
+/** Convert SYS_TABLES.TYPE to dict_table_t::flags.
+@param[in] type SYS_TABLES.TYPE
+@param[in] not_redundant whether ROW_FORMAT=REDUNDANT is not used
+@return table flags */
+static
+ulint
+dict_sys_tables_type_to_tf(ulint type, bool not_redundant)
+{
+ ut_ad(dict_sys_tables_type_valid(type, not_redundant));
+ ulint flags = not_redundant ? 1 : 0;
+
+ /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+ PAGE_COMPRESSION_LEVEL are the same. */
+ flags |= type & (DICT_TF_MASK_ZIP_SSIZE
+ | DICT_TF_MASK_ATOMIC_BLOBS
+ | DICT_TF_MASK_DATA_DIR
+ | DICT_TF_MASK_PAGE_COMPRESSION
+ | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+ | DICT_TF_MASK_NO_ROLLBACK);
+
+ ut_ad(dict_tf_is_valid(flags));
+ return(flags);
+}
+
+/** Read and return 5 integer fields from a SYS_TABLES record.
+@param[in] rec A record of SYS_TABLES
+@param[in] name Table Name, the same as SYS_TABLES.NAME
+@param[out] table_id Pointer to the table_id for this table
+@param[out] space_id Pointer to the space_id for this table
+@param[out] n_cols Pointer to number of columns for this table.
+@param[out] flags Pointer to table flags
+@param[out] flags2 Pointer to table flags2
+@return true if the record was read correctly, false if not. */
+MY_ATTRIBUTE((warn_unused_result))
+static
+bool
+dict_sys_tables_rec_read(
+ const rec_t* rec,
+ const table_name_t& table_name,
+ table_id_t* table_id,
+ ulint* space_id,
+ ulint* n_cols,
+ ulint* flags,
+ ulint* flags2)
+{
+ const byte* field;
+ ulint len;
+ ulint type;
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__ID, &len);
+ ut_ad(len == 8);
+ *table_id = static_cast<table_id_t>(mach_read_from_8(field));
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__SPACE, &len);
+ ut_ad(len == 4);
+ *space_id = mach_read_from_4(field);
+
+ /* Read the 4 byte flags from the TYPE field */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__TYPE, &len);
+ ut_a(len == 4);
+ type = mach_read_from_4(field);
+
+ /* Handle MDEV-12873 InnoDB SYS_TABLES.TYPE incompatibility
+ for PAGE_COMPRESSED=YES in MariaDB 10.2.2 to 10.2.6.
+
+ MariaDB 10.2.2 introduced the SHARED_SPACE flag from MySQL 5.7,
+ shifting the flags PAGE_COMPRESSION, PAGE_COMPRESSION_LEVEL,
+ ATOMIC_WRITES (repurposed to NO_ROLLBACK in 10.3.1) by one bit.
+ The SHARED_SPACE flag would always
+ be written as 0 by MariaDB, because MariaDB does not support
+ CREATE TABLESPACE or CREATE TABLE...TABLESPACE for InnoDB.
+
+ So, instead of the bits AALLLLCxxxxxxx we would have
+ AALLLLC0xxxxxxx if the table was created with MariaDB 10.2.2
+ to 10.2.6. (AA=ATOMIC_WRITES, LLLL=PAGE_COMPRESSION_LEVEL,
+ C=PAGE_COMPRESSED, xxxxxxx=7 bits that were not moved.)
+
+ The case LLLLC=00000 is not a problem. The problem is the case
+ AALLLL10DB00001 where D is the (mostly ignored) DATA_DIRECTORY
+ flag and B is the ATOMIC_BLOBS flag (1 for ROW_FORMAT=DYNAMIC
+ and 0 for ROW_FORMAT=COMPACT in this case). Other low-order
+ bits must be so, because PAGE_COMPRESSED=YES is only allowed
+ for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT, not for
+ ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPRESSED.
+
+ Starting with MariaDB 10.2.4, the flags would be
+ 00LLLL10DB00001, because ATOMIC_WRITES is always written as 0.
+
+ We will concentrate on the PAGE_COMPRESSION_LEVEL and
+ PAGE_COMPRESSED=YES. PAGE_COMPRESSED=NO implies
+ PAGE_COMPRESSION_LEVEL=0, and in that case all the affected
+ bits will be 0. For PAGE_COMPRESSED=YES, the values 1..9 are
+ allowed for PAGE_COMPRESSION_LEVEL. That is, we must interpret
+ the bits AALLLL10DB00001 as AALLLL1DB00001.
+
+ If someone created a table in MariaDB 10.2.2 or 10.2.3 with
+ the attribute ATOMIC_WRITES=OFF (value 2) and without
+ PAGE_COMPRESSED=YES or PAGE_COMPRESSION_LEVEL, that should be
+ rejected. The value ATOMIC_WRITES=ON (1) would look like
+ ATOMIC_WRITES=OFF, but it would be ignored starting with
+ MariaDB 10.2.4. */
+ compile_time_assert(DICT_TF_POS_PAGE_COMPRESSION == 7);
+ compile_time_assert(DICT_TF_POS_UNUSED == 14);
+
+ if ((type & 0x19f) != 0x101) {
+ /* The table cannot have been created with MariaDB
+ 10.2.2 to 10.2.6, because they would write the
+ low-order bits of SYS_TABLES.TYPE as 0b10xx00001 for
+ PAGE_COMPRESSED=YES. No adjustment is applicable. */
+ } else if (type >= 3 << 13) {
+ /* 10.2.2 and 10.2.3 write ATOMIC_WRITES less than 3,
+ and no other flags above that can be set for the
+ SYS_TABLES.TYPE to be in the 10.2.2..10.2.6 format.
+ This would in any case be invalid format for 10.2 and
+ earlier releases. */
+ ut_ad(!dict_sys_tables_type_valid(type, true));
+ } else {
+ /* SYS_TABLES.TYPE is of the form AALLLL10DB00001. We
+ must still validate that the LLLL bits are between 0
+ and 9 before we can discard the extraneous 0 bit. */
+ ut_ad(!DICT_TF_GET_PAGE_COMPRESSION(type));
+
+ if ((((type >> 9) & 0xf) - 1) < 9) {
+ ut_ad(DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type) & 1);
+
+ type = (type & 0x7fU) | (type >> 1 & ~0x7fU);
+
+ ut_ad(DICT_TF_GET_PAGE_COMPRESSION(type));
+ ut_ad(DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type) >= 1);
+ ut_ad(DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type) <= 9);
+ } else {
+ ut_ad(!dict_sys_tables_type_valid(type, true));
+ }
+ }
+
+ /* The low order bit of SYS_TABLES.TYPE is always set to 1. But in
+ dict_table_t::flags the low order bit is used to determine if the
+ ROW_FORMAT=REDUNDANT (0) or anything else (1).
+ Read the 4 byte N_COLS field and look at the high order bit. It
+ should be set for COMPACT and later. It should not be set for
+ REDUNDANT. */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
+ ut_a(len == 4);
+ *n_cols = mach_read_from_4(field);
+
+ const bool not_redundant = 0 != (*n_cols & DICT_N_COLS_COMPACT);
+
+ if (!dict_sys_tables_type_valid(type, not_redundant)) {
+ ib::error() << "Table " << table_name << " in InnoDB"
+ " data dictionary contains invalid flags."
+ " SYS_TABLES.TYPE=" << type <<
+ " SYS_TABLES.N_COLS=" << *n_cols;
+ return(false);
+ }
+
+ *flags = dict_sys_tables_type_to_tf(type, not_redundant);
+
+ /* For tables created before MySQL 4.1, there may be
+ garbage in SYS_TABLES.MIX_LEN where flags2 are found. Such tables
+ would always be in ROW_FORMAT=REDUNDANT which do not have the
+ high bit set in n_cols, and flags would be zero.
+ MySQL 4.1 was the first version to support innodb_file_per_table,
+ that is, *space_id != 0. */
+ if (not_redundant || *space_id != 0 || *n_cols & DICT_N_COLS_COMPACT) {
+
+ /* Get flags2 from SYS_TABLES.MIX_LEN */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
+ *flags2 = mach_read_from_4(field);
+
+ if (!dict_tf2_is_valid(*flags, *flags2)) {
+ ib::error() << "Table " << table_name << " in InnoDB"
+ " data dictionary contains invalid flags."
+ " SYS_TABLES.TYPE=" << type
+ << " SYS_TABLES.MIX_LEN=" << *flags2;
+ return(false);
+ }
+
+ /* DICT_TF2_FTS will be set when indexes are being loaded */
+ *flags2 &= ~DICT_TF2_FTS;
+
+ /* Now that we have used this bit, unset it. */
+ *n_cols &= ~DICT_N_COLS_COMPACT;
+ } else {
+ *flags2 = 0;
+ }
+
+ return(true);
+}
+
+/** Load and check each non-predefined tablespace mentioned in SYS_TABLES.
+Search SYS_TABLES and check each tablespace mentioned that has not
+already been added to the fil_system. If it is valid, add it to the
+file_system list.
+@return the highest space ID found. */
+static ulint dict_check_sys_tables()
+{
+ ulint max_space_id = 0;
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mtr_t mtr;
+
+ DBUG_ENTER("dict_check_sys_tables");
+
+ ut_d(dict_sys.assert_locked());
+
+ mtr_start(&mtr);
+
+ /* Before traversing SYS_TABLES, let's make sure we have
+ SYS_TABLESPACES and SYS_DATAFILES loaded. */
+ dict_table_t* sys_tablespaces;
+ dict_table_t* sys_datafiles;
+ sys_tablespaces = dict_table_get_low("SYS_TABLESPACES");
+ ut_a(sys_tablespaces != NULL);
+ sys_datafiles = dict_table_get_low("SYS_DATAFILES");
+ ut_a(sys_datafiles != NULL);
+
+ for (rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
+ rec != NULL;
+ mtr.commit(), mtr.start(),
+ rec = dict_getnext_system(&pcur, &mtr)) {
+ const byte* field;
+ ulint len;
+ table_id_t table_id;
+ ulint space_id;
+ ulint n_cols;
+ ulint flags;
+ ulint flags2;
+
+ /* If a table record is not useable, ignore it and continue
+ on to the next record. Error messages were logged. */
+ if (dict_sys_tables_rec_check(rec) != NULL) {
+ continue;
+ }
+
+ /* Copy the table name from rec */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__NAME, &len);
+
+ table_name_t table_name(mem_strdupl((char*) field, len));
+ DBUG_PRINT("dict_check_sys_tables",
+ ("name: %p, '%s'", table_name.m_name,
+ table_name.m_name));
+
+ if (!dict_sys_tables_rec_read(rec, table_name,
+ &table_id, &space_id,
+ &n_cols, &flags, &flags2)
+ || space_id == TRX_SYS_SPACE) {
+next:
+ ut_free(table_name.m_name);
+ continue;
+ }
+
+ if (strstr(table_name.m_name, "/" TEMP_FILE_PREFIX "-")) {
+ /* This table will be dropped by
+ row_mysql_drop_garbage_tables().
+ We do not care if the file exists. */
+ goto next;
+ }
+
+ if (flags2 & DICT_TF2_DISCARDED) {
+ ib::info() << "Ignoring tablespace for " << table_name
+ << " because the DISCARD flag is set .";
+ goto next;
+ }
+
+ /* For tables or partitions using .ibd files, the flag
+ DICT_TF2_USE_FILE_PER_TABLE was not set in MIX_LEN
+ before MySQL 5.6.5. The flag should not have been
+ introduced in persistent storage. MariaDB will keep
+ setting the flag when writing SYS_TABLES entries for
+ newly created or rebuilt tables or partitions, but
+ will otherwise ignore the flag. */
+
+ /* Now that we have the proper name for this tablespace,
+ look to see if it is already in the tablespace cache. */
+ if (const fil_space_t* space
+ = fil_space_for_table_exists_in_mem(
+ space_id, table_name.m_name, flags)) {
+ /* Recovery can open a datafile that does not
+ match SYS_DATAFILES. If they don't match, update
+ SYS_DATAFILES. */
+ char *dict_path = dict_get_first_path(space_id);
+ const char *fil_path = space->chain.start->name;
+ if (dict_path
+ && strcmp(dict_path, fil_path)) {
+ dict_update_filepath(space_id, fil_path);
+ }
+ ut_free(dict_path);
+ ut_free(table_name.m_name);
+ continue;
+ }
+
+ /* Set the expected filepath from the data dictionary.
+ If the file is found elsewhere (from an ISL or the default
+ location) or this path is the same file but looks different,
+ fil_ibd_open() will update the dictionary with what is
+ opened. */
+ char* filepath = dict_get_first_path(space_id);
+
+ /* Check that the .ibd file exists. */
+ if (!fil_ibd_open(
+ false,
+ !srv_read_only_mode && srv_log_file_size != 0,
+ FIL_TYPE_TABLESPACE,
+ space_id, dict_tf_to_fsp_flags(flags),
+ table_name, filepath)) {
+ ib::warn() << "Ignoring tablespace for "
+ << table_name
+ << " because it could not be opened.";
+ }
+
+ max_space_id = ut_max(max_space_id, space_id);
+
+ ut_free(table_name.m_name);
+ ut_free(filepath);
+ }
+
+ mtr_commit(&mtr);
+
+ DBUG_RETURN(max_space_id);
+}
+
+/** Check each tablespace found in the data dictionary.
+Then look at each table defined in SYS_TABLES that has a space_id > 0
+to find all the file-per-table tablespaces.
+
+In a crash recovery we already have some tablespace objects created from
+processing the REDO log. Any other tablespace in SYS_TABLESPACES not
+previously used in recovery will be opened here. We will compare the
+space_id information in the data dictionary to what we find in the
+tablespace file. In addition, more validation will be done if recovery
+was needed and force_recovery is not set.
+
+We also scan the biggest space id, and store it to fil_system. */
+void dict_check_tablespaces_and_store_max_id()
+{
+ mtr_t mtr;
+
+ DBUG_ENTER("dict_check_tablespaces_and_store_max_id");
+
+ dict_sys_lock();
+
+ /* Initialize the max space_id from sys header */
+ mtr.start();
+ ulint max_space_id = mach_read_from_4(DICT_HDR_MAX_SPACE_ID
+ + DICT_HDR
+ + dict_hdr_get(&mtr)->frame);
+ mtr.commit();
+
+ fil_set_max_space_id_if_bigger(max_space_id);
+
+ /* Open all tablespaces referenced in SYS_TABLES.
+ This will update SYS_TABLESPACES and SYS_DATAFILES if it
+ finds any file-per-table tablespaces not already there. */
+ max_space_id = dict_check_sys_tables();
+ fil_set_max_space_id_if_bigger(max_space_id);
+
+ dict_sys_unlock();
+
+ DBUG_VOID_RETURN;
+}
+
+/** Error message for a delete-marked record in dict_load_column_low() */
+static const char* dict_load_column_del = "delete-marked record in SYS_COLUMN";
+
+/** Load a table column definition from a SYS_COLUMNS record to dict_table_t.
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_column_low(
+ dict_table_t* table, /*!< in/out: table, could be NULL
+ if we just populate a dict_column_t
+ struct with information from
+ a SYS_COLUMNS record */
+ mem_heap_t* heap, /*!< in/out: memory heap
+ for temporary storage */
+ dict_col_t* column, /*!< out: dict_column_t to fill,
+ or NULL if table != NULL */
+ table_id_t* table_id, /*!< out: table id */
+ const char** col_name, /*!< out: column name */
+ const rec_t* rec, /*!< in: SYS_COLUMNS record */
+ ulint* nth_v_col) /*!< out: if not NULL, this
+ records the "n" of "nth" virtual
+ column */
+{
+ char* name;
+ const byte* field;
+ ulint len;
+ ulint mtype;
+ ulint prtype;
+ ulint col_len;
+ ulint pos;
+ ulint num_base;
+
+ ut_ad(!table == !!column);
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return(dict_load_column_del);
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_COLUMNS) {
+ return("wrong number of columns in SYS_COLUMNS record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len);
+ if (len != 8) {
+err_len:
+ return("incorrect column length in SYS_COLUMNS");
+ }
+
+ if (table_id) {
+ *table_id = mach_read_from_8(field);
+ } else if (table->id != mach_read_from_8(field)) {
+ return("SYS_COLUMNS.TABLE_ID mismatch");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_COLUMNS__POS, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+
+ pos = mach_read_from_4(field);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_COLUMNS__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_COLUMNS__NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ name = mem_heap_strdupl(heap, (const char*) field, len);
+
+ if (col_name) {
+ *col_name = name;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_COLUMNS__MTYPE, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+
+ mtype = mach_read_from_4(field);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_COLUMNS__PRTYPE, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+ prtype = mach_read_from_4(field);
+
+ if (dtype_get_charset_coll(prtype) == 0
+ && dtype_is_string_type(mtype)) {
+ /* The table was created with < 4.1.2. */
+
+ if (dtype_is_binary_string_type(mtype, prtype)) {
+ /* Use the binary collation for
+ string columns of binary type. */
+
+ prtype = dtype_form_prtype(
+ prtype,
+ DATA_MYSQL_BINARY_CHARSET_COLL);
+ } else {
+ /* Use the default charset for
+ other than binary columns. */
+
+ prtype = dtype_form_prtype(
+ prtype,
+ data_mysql_default_charset_coll);
+ }
+ }
+
+ if (table && table->n_def != pos && !(prtype & DATA_VIRTUAL)) {
+ return("SYS_COLUMNS.POS mismatch");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_COLUMNS__LEN, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+ col_len = mach_read_from_4(field);
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_COLUMNS__PREC, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+ num_base = mach_read_from_4(field);
+
+ if (table) {
+ if (prtype & DATA_VIRTUAL) {
+#ifdef UNIV_DEBUG
+ dict_v_col_t* vcol =
+#endif
+ dict_mem_table_add_v_col(
+ table, heap, name, mtype,
+ prtype, col_len,
+ dict_get_v_col_mysql_pos(pos), num_base);
+ ut_ad(vcol->v_pos == dict_get_v_col_pos(pos));
+ } else {
+ ut_ad(num_base == 0);
+ dict_mem_table_add_col(table, heap, name, mtype,
+ prtype, col_len);
+ }
+ } else {
+ dict_mem_fill_column_struct(column, pos, mtype,
+ prtype, col_len);
+ }
+
+ /* Report the virtual column number */
+ if ((prtype & DATA_VIRTUAL) && nth_v_col != NULL) {
+ *nth_v_col = dict_get_v_col_pos(pos);
+ }
+
+ return(NULL);
+}
+
+/** Error message for a delete-marked record in dict_load_virtual_low() */
+static const char* dict_load_virtual_del = "delete-marked record in SYS_VIRTUAL";
+
+/** Load a virtual column "mapping" (to base columns) information
+from a SYS_VIRTUAL record
+@param[in,out] table table
+@param[in,out] column mapped base column's dict_column_t
+@param[in,out] table_id table id
+@param[in,out] pos virtual column position
+@param[in,out] base_pos base column position
+@param[in] rec SYS_VIRTUAL record
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_virtual_low(
+ dict_table_t* table,
+ dict_col_t** column,
+ table_id_t* table_id,
+ ulint* pos,
+ ulint* base_pos,
+ const rec_t* rec)
+{
+ const byte* field;
+ ulint len;
+ ulint base;
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return(dict_load_virtual_del);
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_VIRTUAL) {
+ return("wrong number of columns in SYS_VIRTUAL record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_VIRTUAL__TABLE_ID, &len);
+ if (len != 8) {
+err_len:
+ return("incorrect column length in SYS_VIRTUAL");
+ }
+
+ if (table_id != NULL) {
+ *table_id = mach_read_from_8(field);
+ } else if (table->id != mach_read_from_8(field)) {
+ return("SYS_VIRTUAL.TABLE_ID mismatch");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_VIRTUAL__POS, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+
+ if (pos != NULL) {
+ *pos = mach_read_from_4(field);
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_VIRTUAL__BASE_POS, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+
+ base = mach_read_from_4(field);
+
+ if (base_pos != NULL) {
+ *base_pos = base;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_VIRTUAL__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_VIRTUAL__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ if (column != NULL) {
+ *column = dict_table_get_nth_col(table, base);
+ }
+
+ return(NULL);
+}
+
+/********************************************************************//**
+Loads definitions for table columns. */
+static
+void
+dict_load_columns(
+/*==============*/
+ dict_table_t* table, /*!< in/out: table */
+ mem_heap_t* heap) /*!< in/out: memory heap
+ for temporary storage */
+{
+ dict_table_t* sys_columns;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ const rec_t* rec;
+ byte* buf;
+ ulint i;
+ mtr_t mtr;
+ ulint n_skipped = 0;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ mtr_start(&mtr);
+
+ sys_columns = dict_table_get_low("SYS_COLUMNS");
+ sys_index = UT_LIST_GET_FIRST(sys_columns->indexes);
+ ut_ad(!dict_table_is_comp(sys_columns));
+
+ ut_ad(name_of_col_is(sys_columns, sys_index,
+ DICT_FLD__SYS_COLUMNS__NAME, "NAME"));
+ ut_ad(name_of_col_is(sys_columns, sys_index,
+ DICT_FLD__SYS_COLUMNS__PREC, "PREC"));
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, table->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_ad(table->n_t_cols == static_cast<ulint>(
+ table->n_cols) + static_cast<ulint>(table->n_v_cols));
+
+ for (i = 0;
+ i + DATA_N_SYS_COLS < table->n_t_cols + n_skipped;
+ i++) {
+ const char* err_msg;
+ const char* name = NULL;
+ ulint nth_v_col = ULINT_UNDEFINED;
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur));
+
+ err_msg = dict_load_column_low(table, heap, NULL, NULL,
+ &name, rec, &nth_v_col);
+
+ if (err_msg == dict_load_column_del) {
+ n_skipped++;
+ goto next_rec;
+ } else if (err_msg) {
+ ib::fatal() << err_msg;
+ }
+
+ /* Note: Currently we have one DOC_ID column that is
+ shared by all FTS indexes on a table. And only non-virtual
+ column can be used for FULLTEXT index */
+ if (innobase_strcasecmp(name,
+ FTS_DOC_ID_COL_NAME) == 0
+ && nth_v_col == ULINT_UNDEFINED) {
+ dict_col_t* col;
+ /* As part of normal loading of tables the
+ table->flag is not set for tables with FTS
+ till after the FTS indexes are loaded. So we
+ create the fts_t instance here if there isn't
+ one already created.
+
+ This case does not arise for table create as
+ the flag is set before the table is created. */
+ if (table->fts == NULL) {
+ table->fts = fts_create(table);
+ }
+
+ ut_a(table->fts->doc_col == ULINT_UNDEFINED);
+
+ col = dict_table_get_nth_col(table, i - n_skipped);
+
+ ut_ad(col->len == sizeof(doc_id_t));
+
+ if (col->prtype & DATA_FTS_DOC_ID) {
+ DICT_TF2_FLAG_SET(
+ table, DICT_TF2_FTS_HAS_DOC_ID);
+ DICT_TF2_FLAG_UNSET(
+ table, DICT_TF2_FTS_ADD_DOC_ID);
+ }
+
+ table->fts->doc_col = i - n_skipped;
+ }
+next_rec:
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/** Loads SYS_VIRTUAL info for one virtual column
+@param[in,out] table table
+@param[in] nth_v_col virtual column sequence num
+@param[in,out] v_col virtual column
+@param[in,out] heap memory heap
+*/
+static
+void
+dict_load_virtual_one_col(
+ dict_table_t* table,
+ ulint nth_v_col,
+ dict_v_col_t* v_col,
+ mem_heap_t* heap)
+{
+ dict_table_t* sys_virtual;
+ dict_index_t* sys_virtual_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ const rec_t* rec;
+ byte* buf;
+ ulint i = 0;
+ mtr_t mtr;
+ ulint skipped = 0;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ if (v_col->num_base == 0) {
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ sys_virtual = dict_table_get_low("SYS_VIRTUAL");
+ sys_virtual_index = UT_LIST_GET_FIRST(sys_virtual->indexes);
+ ut_ad(!dict_table_is_comp(sys_virtual));
+
+ ut_ad(name_of_col_is(sys_virtual, sys_virtual_index,
+ DICT_FLD__SYS_VIRTUAL__POS, "POS"));
+
+ tuple = dtuple_create(heap, 2);
+
+ /* table ID field */
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, table->id);
+
+ dfield_set_data(dfield, buf, 8);
+
+ /* virtual column pos field */
+ dfield = dtuple_get_nth_field(tuple, 1);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ ulint vcol_pos = dict_create_v_col_pos(nth_v_col, v_col->m_col.ind);
+ mach_write_to_4(buf, vcol_pos);
+
+ dfield_set_data(dfield, buf, 4);
+
+ dict_index_copy_types(tuple, sys_virtual_index, 2);
+
+ btr_pcur_open_on_user_rec(sys_virtual_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ for (i = 0; i < unsigned{v_col->num_base} + skipped; i++) {
+ const char* err_msg;
+ ulint pos;
+
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur));
+
+ err_msg = dict_load_virtual_low(table,
+ &v_col->base_col[i - skipped],
+ NULL,
+ &pos, NULL, rec);
+
+ if (err_msg) {
+ if (err_msg != dict_load_virtual_del) {
+ ib::fatal() << err_msg;
+ } else {
+ skipped++;
+ }
+ } else {
+ ut_ad(pos == vcol_pos);
+ }
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/** Loads info from SYS_VIRTUAL for virtual columns.
+@param[in,out] table table
+@param[in] heap memory heap
+*/
+static
+void
+dict_load_virtual(
+ dict_table_t* table,
+ mem_heap_t* heap)
+{
+ for (ulint i = 0; i < table->n_v_cols; i++) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(table, i);
+
+ dict_load_virtual_one_col(table, i, v_col, heap);
+ }
+}
+
+/** Error message for a delete-marked record in dict_load_field_low() */
+static const char* dict_load_field_del = "delete-marked record in SYS_FIELDS";
+
+/** Load an index field definition from a SYS_FIELDS record to dict_index_t.
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_field_low(
+ byte* index_id, /*!< in/out: index id (8 bytes)
+ an "in" value if index != NULL
+ and "out" if index == NULL */
+ dict_index_t* index, /*!< in/out: index, could be NULL
+ if we just populate a dict_field_t
+ struct with information from
+ a SYS_FIELDS record */
+ dict_field_t* sys_field, /*!< out: dict_field_t to be
+ filled */
+ ulint* pos, /*!< out: Field position */
+ byte* last_index_id, /*!< in: last index id */
+ mem_heap_t* heap, /*!< in/out: memory heap
+ for temporary storage */
+ const rec_t* rec) /*!< in: SYS_FIELDS record */
+{
+ const byte* field;
+ ulint len;
+ unsigned pos_and_prefix_len;
+ unsigned prefix_len;
+ bool first_field;
+ ulint position;
+
+ /* Either index or sys_field is supplied, not both */
+ ut_a((!index) || (!sys_field));
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return(dict_load_field_del);
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FIELDS) {
+ return("wrong number of columns in SYS_FIELDS record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FIELDS__INDEX_ID, &len);
+ if (len != 8) {
+err_len:
+ return("incorrect column length in SYS_FIELDS");
+ }
+
+ if (!index) {
+ ut_a(last_index_id);
+ memcpy(index_id, (const char*) field, 8);
+ first_field = memcmp(index_id, last_index_id, 8);
+ } else {
+ first_field = (index->n_def == 0);
+ if (memcmp(field, index_id, 8)) {
+ return("SYS_FIELDS.INDEX_ID mismatch");
+ }
+ }
+
+ /* The next field stores the field position in the index and a
+ possible column prefix length if the index field does not
+ contain the whole column. The storage format is like this: if
+ there is at least one prefix field in the index, then the HIGH
+ 2 bytes contain the field number (index->n_def) and the low 2
+ bytes the prefix length for the field. Otherwise the field
+ number (index->n_def) is contained in the 2 LOW bytes. */
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FIELDS__POS, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+
+ pos_and_prefix_len = mach_read_from_4(field);
+
+ if (index && UNIV_UNLIKELY
+ ((pos_and_prefix_len & 0xFFFFUL) != index->n_def
+ && (pos_and_prefix_len >> 16 & 0xFFFF) != index->n_def)) {
+ return("SYS_FIELDS.POS mismatch");
+ }
+
+ if (first_field || pos_and_prefix_len > 0xFFFFUL) {
+ prefix_len = pos_and_prefix_len & 0xFFFFUL;
+ position = (pos_and_prefix_len & 0xFFFF0000UL) >> 16;
+ } else {
+ prefix_len = 0;
+ position = pos_and_prefix_len & 0xFFFFUL;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_FIELDS__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_FIELDS__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FIELDS__COL_NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ if (index) {
+ dict_mem_index_add_field(
+ index, mem_heap_strdupl(heap, (const char*) field, len),
+ prefix_len);
+ } else {
+ ut_a(sys_field);
+ ut_a(pos);
+
+ sys_field->name = mem_heap_strdupl(
+ heap, (const char*) field, len);
+ sys_field->prefix_len = prefix_len & ((1U << 12) - 1);
+ *pos = position;
+ }
+
+ return(NULL);
+}
+
+/********************************************************************//**
+Loads definitions for index fields.
+@return DB_SUCCESS if ok, DB_CORRUPTION if corruption */
+static
+ulint
+dict_load_fields(
+/*=============*/
+ dict_index_t* index, /*!< in/out: index whose fields to load */
+ mem_heap_t* heap) /*!< in: memory heap for temporary storage */
+{
+ dict_table_t* sys_fields;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ const rec_t* rec;
+ byte* buf;
+ ulint i;
+ mtr_t mtr;
+ dberr_t error;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ mtr_start(&mtr);
+
+ sys_fields = dict_table_get_low("SYS_FIELDS");
+ sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);
+ ut_ad(!dict_table_is_comp(sys_fields));
+ ut_ad(name_of_col_is(sys_fields, sys_index,
+ DICT_FLD__SYS_FIELDS__COL_NAME, "COL_NAME"));
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, index->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (i = 0; i < index->n_fields; i++) {
+ const char* err_msg;
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur));
+
+ err_msg = dict_load_field_low(buf, index, NULL, NULL, NULL,
+ heap, rec);
+
+ if (err_msg == dict_load_field_del) {
+ /* There could be delete marked records in
+ SYS_FIELDS because SYS_FIELDS.INDEX_ID can be
+ updated by ALTER TABLE ADD INDEX. */
+
+ goto next_rec;
+ } else if (err_msg) {
+ ib::error() << err_msg;
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+next_rec:
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ error = DB_SUCCESS;
+func_exit:
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ return(error);
+}
+
+/** Error message for a delete-marked record in dict_load_index_low() */
+static const char* dict_load_index_del = "delete-marked record in SYS_INDEXES";
+/** Error message for table->id mismatch in dict_load_index_low() */
+static const char* dict_load_index_id_err = "SYS_INDEXES.TABLE_ID mismatch";
+/** Error message for SYS_TABLES flags mismatch in dict_load_table_low() */
+static const char* dict_load_table_flags = "incorrect flags in SYS_TABLES";
+
+/** Load an index definition from a SYS_INDEXES record to dict_index_t.
+If allocate=TRUE, we will create a dict_index_t structure and fill it
+accordingly. If allocated=FALSE, the dict_index_t will be supplied by
+the caller and filled with information read from the record.
+@return error message
+@retval NULL on success */
+static
+const char*
+dict_load_index_low(
+ byte* table_id, /*!< in/out: table id (8 bytes),
+ an "in" value if allocate=TRUE
+ and "out" when allocate=FALSE */
+ mem_heap_t* heap, /*!< in/out: temporary memory heap */
+ const rec_t* rec, /*!< in: SYS_INDEXES record */
+ ibool allocate, /*!< in: TRUE=allocate *index,
+ FALSE=fill in a pre-allocated
+ *index */
+ dict_index_t** index) /*!< out,own: index, or NULL */
+{
+ const byte* field;
+ ulint len;
+ ulint name_len;
+ char* name_buf;
+ index_id_t id;
+ ulint n_fields;
+ ulint type;
+ unsigned merge_threshold;
+
+ if (allocate) {
+ /* If allocate=TRUE, no dict_index_t will
+ be supplied. Initialize "*index" to NULL */
+ *index = NULL;
+ }
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return(dict_load_index_del);
+ }
+
+ if (rec_get_n_fields_old(rec) == DICT_NUM_FIELDS__SYS_INDEXES) {
+ /* MERGE_THRESHOLD exists */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD, &len);
+ switch (len) {
+ case 4:
+ merge_threshold = mach_read_from_4(field);
+ break;
+ case UNIV_SQL_NULL:
+ merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+ break;
+ default:
+ return("incorrect MERGE_THRESHOLD length"
+ " in SYS_INDEXES");
+ }
+ } else if (rec_get_n_fields_old(rec)
+ == DICT_NUM_FIELDS__SYS_INDEXES - 1) {
+ /* MERGE_THRESHOLD doesn't exist */
+
+ merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
+ } else {
+ return("wrong number of columns in SYS_INDEXES record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
+ if (len != 8) {
+err_len:
+ return("incorrect column length in SYS_INDEXES");
+ }
+
+ if (!allocate) {
+ /* We are reading a SYS_INDEXES record. Copy the table_id */
+ memcpy(table_id, (const char*) field, 8);
+ } else if (memcmp(field, table_id, 8)) {
+ /* Caller supplied table_id, verify it is the same
+ id as on the index record */
+ return(dict_load_index_id_err);
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__ID, &len);
+ if (len != 8) {
+ goto err_len;
+ }
+
+ id = mach_read_from_8(field);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_INDEXES__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_INDEXES__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__NAME, &name_len);
+ if (name_len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ name_buf = mem_heap_strdupl(heap, (const char*) field,
+ name_len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__N_FIELDS, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+ n_fields = mach_read_from_4(field);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+ type = mach_read_from_4(field);
+ if (type & (~0U << DICT_IT_BITS)) {
+ return("unknown SYS_INDEXES.TYPE bits");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
+ if (len != 4) {
+ goto err_len;
+ }
+
+ if (allocate) {
+ *index = dict_mem_index_create(NULL, name_buf, type, n_fields);
+ } else {
+ ut_a(*index);
+
+ dict_mem_fill_index_struct(*index, NULL, name_buf,
+ type, n_fields);
+ }
+
+ (*index)->id = id;
+ (*index)->page = mach_read_from_4(field);
+ ut_ad((*index)->page);
+ (*index)->merge_threshold = merge_threshold & ((1U << 6) - 1);
+
+ return(NULL);
+}
+
+/********************************************************************//**
+Loads definitions for table indexes. Adds them to the data dictionary
+cache.
+@return DB_SUCCESS if ok, DB_CORRUPTION if corruption of dictionary
+table or DB_UNSUPPORTED if table has unknown index type */
+static MY_ATTRIBUTE((nonnull))
+dberr_t
+dict_load_indexes(
+/*==============*/
+ dict_table_t* table, /*!< in/out: table */
+ mem_heap_t* heap, /*!< in: memory heap for temporary storage */
+ dict_err_ignore_t ignore_err)
+ /*!< in: error to be ignored when
+ loading the index definition */
+{
+ dict_table_t* sys_indexes;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ const rec_t* rec;
+ byte* buf;
+ mtr_t mtr;
+ dberr_t error = DB_SUCCESS;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ mtr_start(&mtr);
+
+ sys_indexes = dict_table_get_low("SYS_INDEXES");
+ sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes);
+ ut_ad(!dict_table_is_comp(sys_indexes));
+ ut_ad(name_of_col_is(sys_indexes, sys_index,
+ DICT_FLD__SYS_INDEXES__NAME, "NAME"));
+ ut_ad(name_of_col_is(sys_indexes, sys_index,
+ DICT_FLD__SYS_INDEXES__PAGE_NO, "PAGE_NO"));
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
+ mach_write_to_8(buf, table->id);
+
+ dfield_set_data(dfield, buf, 8);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (;;) {
+ dict_index_t* index = NULL;
+ const char* err_msg;
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+
+ /* We should allow the table to open even
+ without index when DICT_ERR_IGNORE_CORRUPT is set.
+ DICT_ERR_IGNORE_CORRUPT is currently only set
+ for drop table */
+ if (dict_table_get_first_index(table) == NULL
+ && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
+ ib::warn() << "Cannot load table "
+ << table->name
+ << " because it has no indexes in"
+ " InnoDB internal data dictionary.";
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ break;
+ }
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ if ((ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)
+ && (rec_get_n_fields_old(rec)
+ == DICT_NUM_FIELDS__SYS_INDEXES
+ /* a record for older SYS_INDEXES table
+ (missing merge_threshold column) is acceptable. */
+ || rec_get_n_fields_old(rec)
+ == DICT_NUM_FIELDS__SYS_INDEXES - 1)) {
+ const byte* field;
+ ulint len;
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__NAME, &len);
+
+ if (len != UNIV_SQL_NULL
+ && static_cast<char>(*field)
+ == static_cast<char>(*TEMP_INDEX_PREFIX_STR)) {
+ /* Skip indexes whose name starts with
+ TEMP_INDEX_PREFIX_STR, because they will
+ be dropped by row_merge_drop_temp_indexes()
+ during crash recovery. */
+ goto next_rec;
+ }
+ }
+
+ err_msg = dict_load_index_low(buf, heap, rec, TRUE, &index);
+ ut_ad((index == NULL && err_msg != NULL)
+ || (index != NULL && err_msg == NULL));
+
+ if (err_msg == dict_load_index_id_err) {
+ /* TABLE_ID mismatch means that we have
+ run out of index definitions for the table. */
+
+ if (dict_table_get_first_index(table) == NULL
+ && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
+
+ ib::warn() << "Failed to load the"
+ " clustered index for table "
+ << table->name
+ << " because of the following error: "
+ << err_msg << "."
+ " Refusing to load the rest of the"
+ " indexes (if any) and the whole table"
+ " altogether.";
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ break;
+ } else if (err_msg == dict_load_index_del) {
+ /* Skip delete-marked records. */
+ goto next_rec;
+ } else if (err_msg) {
+ ib::error() << err_msg;
+ if (ignore_err & DICT_ERR_IGNORE_CORRUPT) {
+ goto next_rec;
+ }
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ ut_ad(index);
+ ut_ad(!dict_index_is_online_ddl(index));
+
+ /* Check whether the index is corrupted */
+ if (index->is_corrupted()) {
+ ib::error() << "Index " << index->name
+ << " of table " << table->name
+ << " is corrupted";
+
+ if (!srv_load_corrupted
+ && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)
+ && dict_index_is_clust(index)) {
+ dict_mem_index_free(index);
+
+ error = DB_INDEX_CORRUPT;
+ goto func_exit;
+ } else {
+ /* We will load the index if
+ 1) srv_load_corrupted is TRUE
+ 2) ignore_err is set with
+ DICT_ERR_IGNORE_CORRUPT
+ 3) if the index corrupted is a secondary
+ index */
+ ib::info() << "Load corrupted index "
+ << index->name
+ << " of table " << table->name;
+ }
+ }
+
+ if (index->type & DICT_FTS
+ && !dict_table_has_fts_index(table)) {
+ /* This should have been created by now. */
+ ut_a(table->fts != NULL);
+ DICT_TF2_FLAG_SET(table, DICT_TF2_FTS);
+ }
+
+ /* We check for unsupported types first, so that the
+ subsequent checks are relevant for the supported types. */
+ if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE
+ | DICT_CORRUPT | DICT_FTS
+ | DICT_SPATIAL | DICT_VIRTUAL)) {
+
+ ib::error() << "Unknown type " << index->type
+ << " of index " << index->name
+ << " of table " << table->name;
+
+ error = DB_UNSUPPORTED;
+ dict_mem_index_free(index);
+ goto func_exit;
+ } else if (index->page == FIL_NULL
+ && table->is_readable()
+ && (!(index->type & DICT_FTS))) {
+
+ ib::error() << "Trying to load index " << index->name
+ << " for table " << table->name
+ << ", but the index tree has been freed!";
+
+ if (ignore_err & DICT_ERR_IGNORE_INDEX_ROOT) {
+ /* If caller can tolerate this error,
+ we will continue to load the index and
+ let caller deal with this error. However
+ mark the index and table corrupted. We
+ only need to mark such in the index
+ dictionary cache for such metadata corruption,
+ since we would always be able to set it
+ when loading the dictionary cache */
+ index->table = table;
+ dict_set_corrupted_index_cache_only(index);
+
+ ib::info() << "Index is corrupt but forcing"
+ " load into data dictionary";
+ } else {
+corrupted:
+ dict_mem_index_free(index);
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+ } else if (!dict_index_is_clust(index)
+ && NULL == dict_table_get_first_index(table)) {
+
+ ib::error() << "Trying to load index " << index->name
+ << " for table " << table->name
+ << ", but the first index is not clustered!";
+
+ goto corrupted;
+ } else if (dict_is_sys_table(table->id)
+ && (dict_index_is_clust(index)
+ || ((table == dict_sys.sys_tables)
+ && !strcmp("ID_IND", index->name)))) {
+
+ /* The index was created in memory already at booting
+ of the database server */
+ dict_mem_index_free(index);
+ } else {
+ dict_load_fields(index, heap);
+ index->table = table;
+
+ /* The data dictionary tables should never contain
+ invalid index definitions. If we ignored this error
+ and simply did not load this index definition, the
+ .frm file would disagree with the index definitions
+ inside InnoDB. */
+ if ((error = dict_index_add_to_cache(index,
+ index->page))
+ != DB_SUCCESS) {
+ goto func_exit;
+ }
+ }
+next_rec:
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ ut_ad(table->fts_doc_id_index == NULL);
+
+ if (table->fts != NULL) {
+ table->fts_doc_id_index = dict_table_get_index_on_name(
+ table, FTS_DOC_ID_INDEX_NAME);
+ }
+
+ /* If the table contains FTS indexes, populate table->fts->indexes */
+ if (dict_table_has_fts_index(table)) {
+ ut_ad(table->fts_doc_id_index != NULL);
+ /* table->fts->indexes should have been created. */
+ ut_a(table->fts->indexes != NULL);
+ dict_table_get_all_fts_indexes(table, table->fts->indexes);
+ }
+
+func_exit:
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(error);
+}
+
+/** Load a table definition from a SYS_TABLES record to dict_table_t.
+Do not load any columns or indexes.
+@param[in] name Table name
+@param[in] rec SYS_TABLES record
+@param[out,own] table table, or NULL
+@return error message
+@retval NULL on success */
+static const char* dict_load_table_low(const table_name_t& name,
+ const rec_t* rec, dict_table_t** table)
+{
+ table_id_t table_id;
+ ulint space_id;
+ ulint n_cols;
+ ulint t_num;
+ ulint flags;
+ ulint flags2;
+ ulint n_v_col;
+
+ if (const char* error_text = dict_sys_tables_rec_check(rec)) {
+ *table = NULL;
+ return(error_text);
+ }
+
+ if (!dict_sys_tables_rec_read(rec, name, &table_id, &space_id,
+ &t_num, &flags, &flags2)) {
+ *table = NULL;
+ return(dict_load_table_flags);
+ }
+
+ dict_table_decode_n_col(t_num, &n_cols, &n_v_col);
+
+ *table = dict_mem_table_create(
+ name.m_name, NULL, n_cols + n_v_col, n_v_col, flags, flags2);
+ (*table)->space_id = space_id;
+ (*table)->id = table_id;
+ (*table)->file_unreadable = !!(flags2 & DICT_TF2_DISCARDED);
+
+ return(NULL);
+}
+
+/********************************************************************//**
+Using the table->heap, copy the null-terminated filepath into
+table->data_dir_path and replace the 'databasename/tablename.ibd'
+portion with 'tablename'.
+This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
+Make this data directory path only if it has not yet been saved. */
+static
+void
+dict_save_data_dir_path(
+/*====================*/
+ dict_table_t* table, /*!< in/out: table */
+ const char* filepath) /*!< in: filepath of tablespace */
+{
+ ut_ad(mutex_own(&dict_sys.mutex));
+ ut_a(DICT_TF_HAS_DATA_DIR(table->flags));
+
+ ut_a(!table->data_dir_path);
+ ut_a(filepath);
+
+ /* Be sure this filepath is not the default filepath. */
+ char* default_filepath = fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
+ if (default_filepath) {
+ if (0 != strcmp(filepath, default_filepath)) {
+ ulint pathlen = strlen(filepath);
+ ut_a(pathlen < OS_FILE_MAX_PATH);
+ ut_a(0 == strcmp(filepath + pathlen - 4, DOT_IBD));
+
+ table->data_dir_path = mem_heap_strdup(
+ table->heap, filepath);
+ os_file_make_data_dir_path(table->data_dir_path);
+ }
+
+ ut_free(default_filepath);
+ }
+}
+
+/** Make sure the data_dir_path is saved in dict_table_t if DATA DIRECTORY
+was used. Try to read it from the fil_system first, then from SYS_DATAFILES.
+@param[in] table Table object
+@param[in] dict_mutex_own true if dict_sys.mutex is owned already */
+void
+dict_get_and_save_data_dir_path(
+ dict_table_t* table,
+ bool dict_mutex_own)
+{
+ ut_ad(!table->is_temporary());
+ ut_ad(!table->space || table->space->id == table->space_id);
+
+ if (!table->data_dir_path && table->space_id && table->space) {
+ if (!dict_mutex_own) {
+ dict_mutex_enter_for_mysql();
+ }
+
+ table->flags |= 1 << DICT_TF_POS_DATA_DIR
+ & ((1U << DICT_TF_BITS) - 1);
+ dict_save_data_dir_path(table,
+ table->space->chain.start->name);
+
+ if (table->data_dir_path == NULL) {
+ /* Since we did not set the table data_dir_path,
+ unset the flag. This does not change SYS_DATAFILES
+ or SYS_TABLES or FSP_SPACE_FLAGS on the header page
+ of the tablespace, but it makes dict_table_t
+ consistent. */
+ table->flags &= ~DICT_TF_MASK_DATA_DIR
+ & ((1U << DICT_TF_BITS) - 1);
+ }
+
+ if (!dict_mutex_own) {
+ dict_mutex_exit_for_mysql();
+ }
+ }
+}
+
+/** Loads a table definition and also all its index definitions, and also
+the cluster definition if the table is a member in a cluster. Also loads
+all foreign key constraints where the foreign key is in the table or where
+a foreign key references columns in this table.
+@param[in] name Table name in the dbname/tablename format
+@param[in] ignore_err Error to be ignored when loading
+ table and its index definition
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the file_unreadable
+flag in the table object we return. */
+dict_table_t* dict_load_table(const char* name, dict_err_ignore_t ignore_err)
+{
+ dict_names_t fk_list;
+ dict_table_t* result;
+ dict_names_t::iterator i;
+
+ DBUG_ENTER("dict_load_table");
+ DBUG_PRINT("dict_load_table", ("loading table: '%s'", name));
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ result = dict_table_check_if_in_cache_low(name);
+
+ if (!result) {
+ result = dict_load_table_one(const_cast<char*>(name),
+ ignore_err, fk_list);
+ while (!fk_list.empty()) {
+ if (!dict_table_check_if_in_cache_low(fk_list.front()))
+ dict_load_table_one(
+ const_cast<char*>(fk_list.front()),
+ ignore_err, fk_list);
+ fk_list.pop_front();
+ }
+ }
+
+ DBUG_RETURN(result);
+}
+
+/** Opens a tablespace for dict_load_table_one()
+@param[in,out] table A table that refers to the tablespace to open
+@param[in] ignore_err Whether to ignore an error. */
+UNIV_INLINE
+void
+dict_load_tablespace(
+ dict_table_t* table,
+ dict_err_ignore_t ignore_err)
+{
+ ut_ad(!table->is_temporary());
+ ut_ad(!table->space);
+ ut_ad(table->space_id < SRV_SPACE_ID_UPPER_BOUND);
+ ut_ad(fil_system.sys_space);
+
+ if (table->space_id == TRX_SYS_SPACE) {
+ table->space = fil_system.sys_space;
+ return;
+ }
+
+ if (table->flags2 & DICT_TF2_DISCARDED) {
+ ib::warn() << "Tablespace for table " << table->name
+ << " is set as discarded.";
+ table->file_unreadable = true;
+ return;
+ }
+
+ /* The tablespace may already be open. */
+ table->space = fil_space_for_table_exists_in_mem(
+ table->space_id, table->name.m_name, table->flags);
+ if (table->space) {
+ return;
+ }
+
+ if (ignore_err == DICT_ERR_IGNORE_DROP) {
+ table->file_unreadable = true;
+ return;
+ }
+
+ if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) {
+ ib::error() << "Failed to find tablespace for table "
+ << table->name << " in the cache. Attempting"
+ " to load the tablespace with space id "
+ << table->space_id;
+ }
+
+ /* Use the remote filepath if needed. This parameter is optional
+ in the call to fil_ibd_open(). If not supplied, it will be built
+ from the table->name. */
+ char* filepath = NULL;
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ /* This will set table->data_dir_path from either
+ fil_system or SYS_DATAFILES */
+ dict_get_and_save_data_dir_path(table, true);
+
+ if (table->data_dir_path) {
+ filepath = fil_make_filepath(
+ table->data_dir_path,
+ table->name.m_name, IBD, true);
+ }
+ }
+
+ /* Try to open the tablespace. We set the 2nd param (fix_dict) to
+ false because we do not have an x-lock on dict_sys.latch */
+ table->space = fil_ibd_open(
+ true, false, FIL_TYPE_TABLESPACE, table->space_id,
+ dict_tf_to_fsp_flags(table->flags),
+ table->name, filepath);
+
+ if (!table->space) {
+ /* We failed to find a sensible tablespace file */
+ table->file_unreadable = true;
+ }
+
+ ut_free(filepath);
+}
+
+/** Loads a table definition and also all its index definitions.
+
+Loads those foreign key constraints whose referenced table is already in
+dictionary cache. If a foreign key constraint is not loaded, then the
+referenced table is pushed into the output stack (fk_tables), if it is not
+NULL. These tables must be subsequently loaded so that all the foreign
+key constraints are loaded into memory.
+
+@param[in] name Table name in the db/tablename format
+@param[in] ignore_err Error to be ignored when loading table
+ and its index definition
+@param[out] fk_tables Related table names that must also be
+ loaded to ensure that all foreign key
+ constraints are loaded.
+@return table, NULL if does not exist; if the table is stored in an
+.ibd file, but the file does not exist, then we set the
+file_unreadable flag in the table object we return */
+static
+dict_table_t*
+dict_load_table_one(
+ const table_name_t& name,
+ dict_err_ignore_t ignore_err,
+ dict_names_t& fk_tables)
+{
+ dberr_t err;
+ dict_table_t* sys_tables;
+ btr_pcur_t pcur;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ mem_heap_t* heap;
+ dfield_t* dfield;
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ mtr_t mtr;
+
+ DBUG_ENTER("dict_load_table_one");
+ DBUG_PRINT("dict_load_table_one", ("table: %s", name.m_name));
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ heap = mem_heap_create(32000);
+
+ mtr_start(&mtr);
+
+ sys_tables = dict_table_get_low("SYS_TABLES");
+ sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
+ ut_ad(!dict_table_is_comp(sys_tables));
+ ut_ad(name_of_col_is(sys_tables, sys_index,
+ DICT_FLD__SYS_TABLES__ID, "ID"));
+ ut_ad(name_of_col_is(sys_tables, sys_index,
+ DICT_FLD__SYS_TABLES__N_COLS, "N_COLS"));
+ ut_ad(name_of_col_is(sys_tables, sys_index,
+ DICT_FLD__SYS_TABLES__TYPE, "TYPE"));
+ ut_ad(name_of_col_is(sys_tables, sys_index,
+ DICT_FLD__SYS_TABLES__MIX_LEN, "MIX_LEN"));
+ ut_ad(name_of_col_is(sys_tables, sys_index,
+ DICT_FLD__SYS_TABLES__SPACE, "SPACE"));
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(dfield, name.m_name, strlen(name.m_name));
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)
+ || rec_get_deleted_flag(rec, 0)) {
+ /* Not found */
+err_exit:
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ DBUG_RETURN(NULL);
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__NAME, &len);
+
+ /* Check if the table name in record is the searched one */
+ if (len != strlen(name.m_name)
+ || memcmp(name.m_name, field, len)) {
+
+ goto err_exit;
+ }
+
+ dict_table_t* table;
+ if (const char* err_msg = dict_load_table_low(name, rec, &table)) {
+ if (err_msg != dict_load_table_flags) {
+ ib::error() << err_msg;
+ }
+ goto err_exit;
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ dict_load_tablespace(table, ignore_err);
+
+ dict_load_columns(table, heap);
+
+ dict_load_virtual(table, heap);
+
+ dict_table_add_system_columns(table, heap);
+
+ table->can_be_evicted = true;
+ table->add_to_cache();
+
+ mem_heap_empty(heap);
+
+ ut_ad(dict_tf2_is_valid(table->flags, table->flags2));
+
+ /* If there is no tablespace for the table then we only need to
+ load the index definitions. So that we can IMPORT the tablespace
+ later. When recovering table locks for resurrected incomplete
+ transactions, the tablespace should exist, because DDL operations
+ were not allowed while the table is being locked by a transaction. */
+ dict_err_ignore_t index_load_err =
+ !(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)
+ && !table->is_readable()
+ ? DICT_ERR_IGNORE_ALL
+ : ignore_err;
+
+ err = dict_load_indexes(table, heap, index_load_err);
+
+ if (err == DB_INDEX_CORRUPT) {
+ /* Refuse to load the table if the table has a corrupted
+ cluster index */
+ if (!srv_load_corrupted) {
+
+ ib::error() << "Load table " << table->name
+ << " failed, the table has"
+ " corrupted clustered indexes. Turn on"
+ " 'innodb_force_load_corrupted' to drop it";
+ dict_sys.remove(table);
+ table = NULL;
+ goto func_exit;
+ } else {
+ if (table->indexes.start->is_corrupted()) {
+ table->corrupted = true;
+ }
+ }
+ }
+
+ if (err == DB_SUCCESS && table->is_readable()) {
+ const auto root = dict_table_get_first_index(table)->page;
+
+ if (root >= table->space->get_size()) {
+corrupted:
+ table->corrupted = true;
+ table->file_unreadable = true;
+ err = DB_CORRUPTION;
+ } else {
+ const page_id_t page_id(table->space->id, root);
+ mtr.start();
+ buf_block_t* block = buf_page_get(
+ page_id, table->space->zip_size(),
+ RW_S_LATCH, &mtr);
+ const bool corrupted = !block
+ || page_get_space_id(block->frame)
+ != page_id.space()
+ || page_get_page_no(block->frame)
+ != page_id.page_no()
+ || (mach_read_from_2(FIL_PAGE_TYPE
+ + block->frame)
+ != FIL_PAGE_INDEX
+ && mach_read_from_2(FIL_PAGE_TYPE
+ + block->frame)
+ != FIL_PAGE_TYPE_INSTANT);
+ mtr.commit();
+ if (corrupted) {
+ goto corrupted;
+ }
+
+ if (table->supports_instant()) {
+ err = btr_cur_instant_init(table);
+ }
+ }
+ }
+
+ /* Initialize table foreign_child value. Its value could be
+ changed when dict_load_foreigns() is called below */
+ table->fk_max_recusive_level = 0;
+
+ /* If the force recovery flag is set, we open the table irrespective
+ of the error condition, since the user may want to dump data from the
+ clustered index. However we load the foreign key information only if
+ all indexes were loaded. */
+ if (!table->is_readable()) {
+ /* Don't attempt to load the indexes from disk. */
+ } else if (err == DB_SUCCESS) {
+ err = dict_load_foreigns(table->name.m_name, NULL,
+ true, true,
+ ignore_err, fk_tables);
+
+ if (err != DB_SUCCESS) {
+ ib::warn() << "Load table " << table->name
+ << " failed, the table has missing"
+ " foreign key indexes. Turn off"
+ " 'foreign_key_checks' and try again.";
+
+ dict_sys.remove(table);
+ table = NULL;
+ } else {
+ dict_mem_table_fill_foreign_vcol_set(table);
+ table->fk_max_recusive_level = 0;
+ }
+ } else {
+ dict_index_t* index;
+
+ /* Make sure that at least the clustered index was loaded.
+ Otherwise refuse to load the table */
+ index = dict_table_get_first_index(table);
+
+ if (!srv_force_recovery
+ || !index
+ || !index->is_primary()) {
+ dict_sys.remove(table);
+ table = NULL;
+ } else if (index->is_corrupted()
+ && table->is_readable()) {
+ /* It is possible we force to load a corrupted
+ clustered index if srv_load_corrupted is set.
+ Mark the table as corrupted in this case */
+ table->corrupted = true;
+ }
+ }
+
+func_exit:
+ mem_heap_free(heap);
+
+ ut_ad(!table
+ || (ignore_err & ~DICT_ERR_IGNORE_FK_NOKEY)
+ || !table->is_readable()
+ || !table->corrupted);
+
+ if (table && table->fts) {
+ if (!(dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID))) {
+ /* the table->fts could be created in dict_load_column
+ when a user defined FTS_DOC_ID is present, but no
+ FTS */
+ fts_free(table);
+ } else if (fts_optimize_wq) {
+ fts_optimize_add_table(table);
+ } else if (table->can_be_evicted) {
+ /* fts_optimize_thread is not started yet.
+ So make the table as non-evictable from cache. */
+ dict_sys.prevent_eviction(table);
+ }
+ }
+
+ ut_ad(err != DB_SUCCESS || dict_foreign_set_validate(*table));
+
+ DBUG_RETURN(table);
+}
+
+/***********************************************************************//**
+Loads a table object based on the table id.
+@return table; NULL if table does not exist */
+dict_table_t*
+dict_load_table_on_id(
+/*==================*/
+ table_id_t table_id, /*!< in: table id */
+ dict_err_ignore_t ignore_err) /*!< in: errors to ignore
+ when loading the table */
+{
+ byte id_buf[8];
+ btr_pcur_t pcur;
+ mem_heap_t* heap;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ dict_index_t* sys_table_ids;
+ dict_table_t* sys_tables;
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ dict_table_t* table;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ table = NULL;
+
+ /* NOTE that the operation of this function is protected by
+ the dictionary mutex, and therefore no deadlocks can occur
+ with other dictionary operations. */
+
+ mtr_start(&mtr);
+ /*---------------------------------------------------*/
+ /* Get the secondary index based on ID for table SYS_TABLES */
+ sys_tables = dict_sys.sys_tables;
+ sys_table_ids = dict_table_get_next_index(
+ dict_table_get_first_index(sys_tables));
+ ut_ad(!dict_table_is_comp(sys_tables));
+ ut_ad(!dict_index_is_clust(sys_table_ids));
+ heap = mem_heap_create(256);
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ /* Write the table id in byte format to id_buf */
+ mach_write_to_8(id_buf, table_id);
+
+ dfield_set_data(dfield, id_buf, 8);
+ dict_index_copy_types(tuple, sys_table_ids, 1);
+
+ btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (page_rec_is_user_rec(rec)) {
+ /*---------------------------------------------------*/
+ /* Now we have the record in the secondary index
+ containing the table ID and NAME */
+check_rec:
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLE_IDS__ID, &len);
+ ut_ad(len == 8);
+
+ /* Check if the table id in record is the one searched for */
+ if (table_id == mach_read_from_8(field)) {
+ if (rec_get_deleted_flag(rec, 0)) {
+ /* Until purge has completed, there
+ may be delete-marked duplicate records
+ for the same SYS_TABLES.ID, but different
+ SYS_TABLES.NAME. */
+ while (btr_pcur_move_to_next(&pcur, &mtr)) {
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (page_rec_is_user_rec(rec)) {
+ goto check_rec;
+ }
+ }
+ } else {
+ /* Now we get the table name from the record */
+ field = rec_get_nth_field_old(rec,
+ DICT_FLD__SYS_TABLE_IDS__NAME, &len);
+ /* Load the table definition to memory */
+ char* table_name = mem_heap_strdupl(
+ heap, (char*) field, len);
+ table = dict_load_table(table_name, ignore_err);
+ }
+ }
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(table);
+}
+
+/********************************************************************//**
+This function is called when the database is booted. Loads system table
+index definitions except for the clustered index which is added to the
+dictionary cache at booting before calling this function. */
+void
+dict_load_sys_table(
+/*================*/
+ dict_table_t* table) /*!< in: system table */
+{
+ mem_heap_t* heap;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ heap = mem_heap_create(1000);
+
+ dict_load_indexes(table, heap, DICT_ERR_IGNORE_NONE);
+
+ mem_heap_free(heap);
+}
+
+/********************************************************************//**
+Loads foreign key constraint col names (also for the referenced table).
+Members that must be set (and valid) in foreign:
+foreign->heap
+foreign->n_fields
+foreign->id ('\0'-terminated)
+Members that will be created and set by this function:
+foreign->foreign_col_names[i]
+foreign->referenced_col_names[i]
+(for i=0..foreign->n_fields-1) */
+static
+void
+dict_load_foreign_cols(
+/*===================*/
+ dict_foreign_t* foreign)/*!< in/out: foreign constraint object */
+{
+ dict_table_t* sys_foreign_cols;
+ dict_index_t* sys_index;
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ ulint i;
+ mtr_t mtr;
+ size_t id_len;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ id_len = strlen(foreign->id);
+
+ foreign->foreign_col_names = static_cast<const char**>(
+ mem_heap_alloc(foreign->heap,
+ foreign->n_fields * sizeof(void*)));
+
+ foreign->referenced_col_names = static_cast<const char**>(
+ mem_heap_alloc(foreign->heap,
+ foreign->n_fields * sizeof(void*)));
+
+ mtr_start(&mtr);
+
+ sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS");
+
+ sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes);
+ ut_ad(!dict_table_is_comp(sys_foreign_cols));
+
+ tuple = dtuple_create(foreign->heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(dfield, foreign->id, id_len);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ for (i = 0; i < foreign->n_fields; i++) {
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ ut_a(btr_pcur_is_on_user_rec(&pcur));
+ ut_a(!rec_get_deleted_flag(rec, 0));
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len);
+
+ if (len != id_len || memcmp(foreign->id, field, len)) {
+ const rec_t* pos;
+ ulint pos_len;
+ const rec_t* for_col_name;
+ ulint for_col_name_len;
+ const rec_t* ref_col_name;
+ ulint ref_col_name_len;
+
+ pos = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__POS,
+ &pos_len);
+
+ for_col_name = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME,
+ &for_col_name_len);
+
+ ref_col_name = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME,
+ &ref_col_name_len);
+
+ ib::fatal sout;
+
+ sout << "Unable to load column names for foreign"
+ " key '" << foreign->id
+ << "' because it was not found in"
+ " InnoDB internal table SYS_FOREIGN_COLS. The"
+ " closest entry we found is:"
+ " (ID='";
+ sout.write(field, len);
+ sout << "', POS=" << mach_read_from_4(pos)
+ << ", FOR_COL_NAME='";
+ sout.write(for_col_name, for_col_name_len);
+ sout << "', REF_COL_NAME='";
+ sout.write(ref_col_name, ref_col_name_len);
+ sout << "')";
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len);
+ ut_a(len == 4);
+ ut_a(i == mach_read_from_4(field));
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len);
+ foreign->foreign_col_names[i] = mem_heap_strdupl(
+ foreign->heap, (char*) field, len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len);
+ foreign->referenced_col_names[i] = mem_heap_strdupl(
+ foreign->heap, (char*) field, len);
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+}
+
+/***********************************************************************//**
+Loads a foreign key constraint to the dictionary cache. If the referenced
+table is not yet loaded, it is added in the output parameter (fk_tables).
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull(1), warn_unused_result))
+dberr_t
+dict_load_foreign(
+/*==============*/
+ const char* id,
+ /*!< in: foreign constraint id, must be
+ '\0'-terminated */
+ const char** col_names,
+ /*!< in: column names, or NULL
+ to use foreign->foreign_table->col_names */
+ bool check_recursive,
+ /*!< in: whether to record the foreign table
+ parent count to avoid unlimited recursive
+ load of chained foreign tables */
+ bool check_charsets,
+ /*!< in: whether to check charset
+ compatibility */
+ dict_err_ignore_t ignore_err,
+ /*!< in: error to be ignored */
+ dict_names_t& fk_tables)
+ /*!< out: the foreign key constraint is added
+ to the dictionary cache only if the referenced
+ table is already in cache. Otherwise, the
+ foreign key constraint is not added to cache,
+ and the referenced table is added to this
+ stack. */
+{
+ dict_foreign_t* foreign;
+ dict_table_t* sys_foreign;
+ btr_pcur_t pcur;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ mem_heap_t* heap2;
+ dfield_t* dfield;
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ mtr_t mtr;
+ dict_table_t* for_table;
+ dict_table_t* ref_table;
+ size_t id_len;
+
+ DBUG_ENTER("dict_load_foreign");
+ DBUG_PRINT("dict_load_foreign",
+ ("id: '%s', check_recursive: %d", id, check_recursive));
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ id_len = strlen(id);
+
+ heap2 = mem_heap_create(1000);
+
+ mtr_start(&mtr);
+
+ sys_foreign = dict_table_get_low("SYS_FOREIGN");
+
+ sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes);
+ ut_ad(!dict_table_is_comp(sys_foreign));
+
+ tuple = dtuple_create(heap2, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(dfield, id, id_len);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)
+ || rec_get_deleted_flag(rec, 0)) {
+ /* Not found */
+
+ ib::error() << "Cannot load foreign constraint " << id
+ << ": could not find the relevant record in "
+ << "SYS_FOREIGN";
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap2);
+
+ DBUG_RETURN(DB_ERROR);
+ }
+
+ field = rec_get_nth_field_old(rec, DICT_FLD__SYS_FOREIGN__ID, &len);
+
+ /* Check if the id in record is the searched one */
+ if (len != id_len || memcmp(id, field, len)) {
+ {
+ ib::error err;
+ err << "Cannot load foreign constraint " << id
+ << ": found ";
+ err.write(field, len);
+ err << " instead in SYS_FOREIGN";
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap2);
+
+ DBUG_RETURN(DB_ERROR);
+ }
+
+ /* Read the table names and the number of columns associated
+ with the constraint */
+
+ mem_heap_free(heap2);
+
+ foreign = dict_mem_foreign_create();
+
+ uint32_t n_fields_and_type = mach_read_from_4(
+ rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN__N_COLS, &len));
+
+ ut_a(len == 4);
+
+ /* We store the type in the bits 24..29 of n_fields_and_type. */
+
+ foreign->type = (n_fields_and_type >> 24) & ((1U << 6) - 1);
+ foreign->n_fields = n_fields_and_type & dict_index_t::MAX_N_FIELDS;
+
+ foreign->id = mem_heap_strdupl(foreign->heap, id, id_len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len);
+
+ foreign->foreign_table_name = mem_heap_strdupl(
+ foreign->heap, (char*) field, len);
+ dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
+
+ const ulint foreign_table_name_len = len;
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len);
+ foreign->referenced_table_name = mem_heap_strdupl(
+ foreign->heap, (char*) field, len);
+ dict_mem_referenced_table_name_lookup_set(foreign, TRUE);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ dict_load_foreign_cols(foreign);
+
+ ref_table = dict_table_check_if_in_cache_low(
+ foreign->referenced_table_name_lookup);
+ for_table = dict_table_check_if_in_cache_low(
+ foreign->foreign_table_name_lookup);
+
+ if (!for_table) {
+ /* To avoid recursively loading the tables related through
+ the foreign key constraints, the child table name is saved
+ here. The child table will be loaded later, along with its
+ foreign key constraint. */
+
+ ut_a(ref_table != NULL);
+ fk_tables.push_back(
+ mem_heap_strdupl(ref_table->heap,
+ foreign->foreign_table_name_lookup,
+ foreign_table_name_len));
+
+ dict_foreign_remove_from_cache(foreign);
+ DBUG_RETURN(DB_SUCCESS);
+ }
+
+ ut_a(for_table || ref_table);
+
+ /* Note that there may already be a foreign constraint object in
+ the dictionary cache for this constraint: then the following
+ call only sets the pointers in it to point to the appropriate table
+ and index objects and frees the newly created object foreign.
+ Adding to the cache should always succeed since we are not creating
+ a new foreign key constraint but loading one from the data
+ dictionary. */
+
+ DBUG_RETURN(dict_foreign_add_to_cache(foreign, col_names,
+ check_charsets,
+ ignore_err));
+}
+
+/***********************************************************************//**
+Loads foreign key constraints where the table is either the foreign key
+holder or where the table is referenced by a foreign key. Adds these
+constraints to the data dictionary.
+
+The foreign key constraint is loaded only if the referenced table is also
+in the dictionary cache. If the referenced table is not in dictionary
+cache, then it is added to the output parameter (fk_tables).
+
+@return DB_SUCCESS or error code */
+dberr_t
+dict_load_foreigns(
+ const char* table_name, /*!< in: table name */
+ const char** col_names, /*!< in: column names, or NULL
+ to use table->col_names */
+ bool check_recursive,/*!< in: Whether to check
+ recursive load of tables
+ chained by FK */
+ bool check_charsets, /*!< in: whether to check
+ charset compatibility */
+ dict_err_ignore_t ignore_err, /*!< in: error to be ignored */
+ dict_names_t& fk_tables)
+ /*!< out: stack of table
+ names which must be loaded
+ subsequently to load all the
+ foreign key constraints. */
+{
+ ulint tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1)
+ / sizeof(ulint)];
+ btr_pcur_t pcur;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ dict_index_t* sec_index;
+ dict_table_t* sys_foreign;
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ dberr_t err;
+ mtr_t mtr;
+
+ DBUG_ENTER("dict_load_foreigns");
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ sys_foreign = dict_table_get_low("SYS_FOREIGN");
+
+ if (sys_foreign == NULL) {
+ /* No foreign keys defined yet in this database */
+
+ ib::info() << "No foreign key system tables in the database";
+ DBUG_RETURN(DB_ERROR);
+ }
+
+ ut_ad(!dict_table_is_comp(sys_foreign));
+ mtr_start(&mtr);
+
+ /* Get the secondary index based on FOR_NAME from table
+ SYS_FOREIGN */
+
+ sec_index = dict_table_get_next_index(
+ dict_table_get_first_index(sys_foreign));
+ ut_ad(!dict_index_is_clust(sec_index));
+start_load:
+
+ tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1, 0);
+ dfield = dtuple_get_nth_field(tuple, 0);
+
+ dfield_set_data(dfield, table_name, strlen(table_name));
+ dict_index_copy_types(tuple, sec_index, 1);
+
+ btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+loop:
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ /* End of index */
+
+ goto load_next_index;
+ }
+
+ /* Now we have the record in the secondary index containing a table
+ name and a foreign constraint ID */
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME, &len);
+
+ /* Check if the table name in the record is the one searched for; the
+ following call does the comparison in the latin1_swedish_ci
+ charset-collation, in a case-insensitive way. */
+
+ if (0 != cmp_data_data(dfield_get_type(dfield)->mtype,
+ dfield_get_type(dfield)->prtype,
+ static_cast<const byte*>(
+ dfield_get_data(dfield)),
+ dfield_get_len(dfield),
+ field, len)) {
+
+ goto load_next_index;
+ }
+
+ /* Since table names in SYS_FOREIGN are stored in a case-insensitive
+ order, we have to check that the table name matches also in a binary
+ string comparison. On Unix, MySQL allows table names that only differ
+ in character case. If lower_case_table_names=2 then what is stored
+ may not be the same case, but the previous comparison showed that they
+ match with no-case. */
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ goto next_rec;
+ }
+
+ if (innobase_get_lower_case_table_names() != 2
+ && memcmp(field, table_name, len)) {
+ goto next_rec;
+ }
+
+ /* Now we get a foreign key constraint id */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__ID, &len);
+
+ /* Copy the string because the page may be modified or evicted
+ after mtr_commit() below. */
+ char fk_id[MAX_TABLE_NAME_LEN + 1];
+
+ ut_a(len <= MAX_TABLE_NAME_LEN);
+ memcpy(fk_id, field, len);
+ fk_id[len] = '\0';
+
+ btr_pcur_store_position(&pcur, &mtr);
+
+ mtr_commit(&mtr);
+
+ /* Load the foreign constraint definition to the dictionary cache */
+
+ err = dict_load_foreign(fk_id, col_names,
+ check_recursive, check_charsets, ignore_err,
+ fk_tables);
+
+ if (err != DB_SUCCESS) {
+ btr_pcur_close(&pcur);
+
+ DBUG_RETURN(err);
+ }
+
+ mtr_start(&mtr);
+
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+next_rec:
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ goto loop;
+
+load_next_index:
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ sec_index = dict_table_get_next_index(sec_index);
+
+ if (sec_index != NULL) {
+
+ mtr_start(&mtr);
+
+ /* Switch to scan index on REF_NAME, fk_max_recusive_level
+ already been updated when scanning FOR_NAME index, no need to
+ update again */
+ check_recursive = FALSE;
+
+ goto start_load;
+ }
+
+ DBUG_RETURN(DB_SUCCESS);
+}
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
new file mode 100644
index 00000000..97889e22
--- /dev/null
+++ b/storage/innobase/dict/dict0mem.cc
@@ -0,0 +1,1396 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file dict/dict0mem.cc
+Data dictionary memory object creation
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include "ha_prototypes.h"
+#include <mysql_com.h>
+
+#include "dict0mem.h"
+#include "rem0rec.h"
+#include "data0type.h"
+#include "mach0data.h"
+#include "dict0dict.h"
+#include "fts0priv.h"
+#include "lock0lock.h"
+#include "sync0sync.h"
+#include "row0row.h"
+#include "sql_string.h"
+#include <iostream>
+
+#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
+ creating a table or index object */
+
+/** System databases */
+static const char* innobase_system_databases[] = {
+ "mysql/",
+ "information_schema/",
+ "performance_schema/",
+ NullS
+};
+
+/** Determine if a table belongs to innobase_system_databases[]
+@param[in] name database_name/table_name
+@return whether the database_name is in innobase_system_databases[] */
+static bool dict_mem_table_is_system(const char *name)
+{
+ /* table has the following format: database/table
+ and some system table are of the form SYS_* */
+ if (!strchr(name, '/')) {
+ return true;
+ }
+ size_t table_len = strlen(name);
+ const char *system_db;
+ int i = 0;
+ while ((system_db = innobase_system_databases[i++])
+ && (system_db != NullS)) {
+ size_t len = strlen(system_db);
+ if (table_len > len && !strncmp(name, system_db, len)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/** The start of the table basename suffix for partitioned tables */
+const char table_name_t::part_suffix[4]
+#ifdef _WIN32
+= "#p#";
+#else
+= "#P#";
+#endif
+
+/** Display an identifier.
+@param[in,out] s output stream
+@param[in] id_name SQL identifier (other than table name)
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& s,
+ const id_name_t& id_name)
+{
+ const char q = '`';
+ const char* c = id_name;
+ s << q;
+ for (; *c != 0; c++) {
+ if (*c == q) {
+ s << *c;
+ }
+ s << *c;
+ }
+ s << q;
+ return(s);
+}
+
+/** Display a table name.
+@param[in,out] s output stream
+@param[in] table_name table name
+@return the output stream */
+std::ostream&
+operator<<(
+ std::ostream& s,
+ const table_name_t& table_name)
+{
+ return(s << ut_get_name(NULL, table_name.m_name));
+}
+
+bool dict_col_t::same_encoding(uint16_t a, uint16_t b)
+{
+ if (const CHARSET_INFO *acs= get_charset(a, MYF(MY_WME)))
+ if (const CHARSET_INFO *bcs= get_charset(b, MYF(MY_WME)))
+ return Charset(bcs).encoding_allows_reinterpret_as(acs);
+ return false;
+}
+
+/** Create a table memory object.
+@param name table name
+@param space tablespace
+@param n_cols total number of columns (both virtual and non-virtual)
+@param n_v_cols number of virtual columns
+@param flags table flags
+@param flags2 table flags2
+@return own: table object */
+dict_table_t *dict_mem_table_create(const char *name, fil_space_t *space,
+ ulint n_cols, ulint n_v_cols, ulint flags,
+ ulint flags2)
+{
+ dict_table_t* table;
+ mem_heap_t* heap;
+
+ ut_ad(name);
+ ut_ad(!space
+ || space->purpose == FIL_TYPE_TABLESPACE
+ || space->purpose == FIL_TYPE_TEMPORARY
+ || space->purpose == FIL_TYPE_IMPORT);
+ ut_a(dict_tf2_is_valid(flags, flags2));
+ ut_a(!(flags2 & DICT_TF2_UNUSED_BIT_MASK));
+
+ heap = mem_heap_create(DICT_HEAP_SIZE);
+
+ table = static_cast<dict_table_t*>(
+ mem_heap_zalloc(heap, sizeof(*table)));
+
+ lock_table_lock_list_init(&table->locks);
+
+ UT_LIST_INIT(table->indexes, &dict_index_t::indexes);
+#ifdef BTR_CUR_HASH_ADAPT
+ UT_LIST_INIT(table->freed_indexes, &dict_index_t::indexes);
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ table->heap = heap;
+
+ ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
+
+ table->flags = static_cast<unsigned>(flags)
+ & ((1U << DICT_TF_BITS) - 1);
+ table->flags2 = static_cast<unsigned>(flags2)
+ & ((1U << DICT_TF2_BITS) - 1);
+ table->name.m_name = mem_strdup(name);
+ table->is_system_db = dict_mem_table_is_system(table->name.m_name);
+ table->space = space;
+ table->space_id = space ? space->id : ULINT_UNDEFINED;
+ table->n_t_cols = static_cast<unsigned>(n_cols + DATA_N_SYS_COLS)
+ & dict_index_t::MAX_N_FIELDS;
+ table->n_v_cols = static_cast<unsigned>(n_v_cols)
+ & dict_index_t::MAX_N_FIELDS;
+ table->n_cols = static_cast<unsigned>(
+ table->n_t_cols - table->n_v_cols)
+ & dict_index_t::MAX_N_FIELDS;
+
+ table->cols = static_cast<dict_col_t*>(
+ mem_heap_alloc(heap, table->n_cols * sizeof(dict_col_t)));
+ table->v_cols = static_cast<dict_v_col_t*>(
+ mem_heap_alloc(heap, n_v_cols * sizeof(*table->v_cols)));
+ for (ulint i = n_v_cols; i--; ) {
+ new (&table->v_cols[i]) dict_v_col_t();
+ }
+
+ table->autoinc_lock = static_cast<ib_lock_t*>(
+ mem_heap_alloc(heap, lock_get_size()));
+
+ /* If the table has an FTS index or we are in the process
+ of building one, create the table->fts */
+ if (dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
+ table->fts = fts_create(table);
+ table->fts->cache = fts_cache_create(table);
+ }
+
+ new(&table->foreign_set) dict_foreign_set();
+ new(&table->referenced_set) dict_foreign_set();
+
+ return(table);
+}
+
+/****************************************************************//**
+Free a table memory object. */
+void
+dict_mem_table_free(
+/*================*/
+ dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(UT_LIST_GET_LEN(table->indexes) == 0);
+#ifdef BTR_CUR_HASH_ADAPT
+ ut_ad(UT_LIST_GET_LEN(table->freed_indexes) == 0);
+#endif /* BTR_CUR_HASH_ADAPT */
+ ut_d(table->cached = FALSE);
+
+ if (dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
+ if (table->fts) {
+ fts_free(table);
+ }
+ }
+
+ dict_mem_table_free_foreign_vcol_set(table);
+
+ table->foreign_set.~dict_foreign_set();
+ table->referenced_set.~dict_foreign_set();
+
+ ut_free(table->name.m_name);
+ table->name.m_name = NULL;
+
+ /* Clean up virtual index info structures that are registered
+ with virtual columns */
+ for (ulint i = 0; i < table->n_v_def; i++) {
+ dict_table_get_nth_v_col(table, i)->~dict_v_col_t();
+ }
+
+ UT_DELETE(table->s_cols);
+
+ mem_heap_free(table->heap);
+}
+
+/****************************************************************//**
+Append 'name' to 'col_names'. @see dict_table_t::col_names
+@return new column names array */
+static
+const char*
+dict_add_col_name(
+/*==============*/
+ const char* col_names, /*!< in: existing column names, or
+ NULL */
+ ulint cols, /*!< in: number of existing columns */
+ const char* name, /*!< in: new column name */
+ mem_heap_t* heap) /*!< in: heap */
+{
+ ulint old_len;
+ ulint new_len;
+ ulint total_len;
+ char* res;
+
+ ut_ad(!cols == !col_names);
+
+ /* Find out length of existing array. */
+ if (col_names) {
+ const char* s = col_names;
+ ulint i;
+
+ for (i = 0; i < cols; i++) {
+ s += strlen(s) + 1;
+ }
+
+ old_len = unsigned(s - col_names);
+ } else {
+ old_len = 0;
+ }
+
+ new_len = strlen(name) + 1;
+ total_len = old_len + new_len;
+
+ res = static_cast<char*>(mem_heap_alloc(heap, total_len));
+
+ if (old_len > 0) {
+ memcpy(res, col_names, old_len);
+ }
+
+ memcpy(res + old_len, name, new_len);
+
+ return(res);
+}
+
+/**********************************************************************//**
+Adds a column definition to a table. */
+void
+dict_mem_table_add_col(
+/*===================*/
+ dict_table_t* table, /*!< in: table */
+ mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */
+ const char* name, /*!< in: column name, or NULL */
+ ulint mtype, /*!< in: main datatype */
+ ulint prtype, /*!< in: precise type */
+ ulint len) /*!< in: precision */
+{
+ dict_col_t* col;
+ unsigned i;
+
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(!heap == !name);
+
+ ut_ad(!(prtype & DATA_VIRTUAL));
+
+ i = table->n_def++;
+
+ table->n_t_def++;
+
+ if (name) {
+ if (table->n_def == table->n_cols) {
+ heap = table->heap;
+ }
+ if (i && !table->col_names) {
+ /* All preceding column names are empty. */
+ char* s = static_cast<char*>(
+ mem_heap_zalloc(heap, table->n_def));
+
+ table->col_names = s;
+ }
+
+ table->col_names = dict_add_col_name(table->col_names,
+ i, name, heap);
+ }
+
+ col = dict_table_get_nth_col(table, i);
+
+ dict_mem_fill_column_struct(col, i, mtype, prtype, len);
+
+ switch (prtype & DATA_VERSIONED) {
+ case DATA_VERS_START:
+ ut_ad(!table->vers_start);
+ table->vers_start = i & dict_index_t::MAX_N_FIELDS;
+ break;
+ case DATA_VERS_END:
+ ut_ad(!table->vers_end);
+ table->vers_end = i & dict_index_t::MAX_N_FIELDS;
+ }
+}
+
+/** Adds a virtual column definition to a table.
+@param[in,out] table table
+@param[in,out] heap temporary memory heap, or NULL. It is
+ used to store name when we have not finished
+ adding all columns. When all columns are
+ added, the whole name will copy to memory from
+ table->heap
+@param[in] name column name
+@param[in] mtype main datatype
+@param[in] prtype precise type
+@param[in] len length
+@param[in] pos position in a table
+@param[in] num_base number of base columns
+@return the virtual column definition */
+dict_v_col_t*
+dict_mem_table_add_v_col(
+ dict_table_t* table,
+ mem_heap_t* heap,
+ const char* name,
+ ulint mtype,
+ ulint prtype,
+ ulint len,
+ ulint pos,
+ ulint num_base)
+{
+ dict_v_col_t* v_col;
+
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(!heap == !name);
+
+ ut_ad(prtype & DATA_VIRTUAL);
+
+ unsigned i = table->n_v_def++;
+
+ table->n_t_def++;
+
+ if (name != NULL) {
+ if (table->n_v_def == table->n_v_cols) {
+ heap = table->heap;
+ }
+
+ if (i && !table->v_col_names) {
+ /* All preceding column names are empty. */
+ char* s = static_cast<char*>(
+ mem_heap_zalloc(heap, table->n_v_def));
+
+ table->v_col_names = s;
+ }
+
+ table->v_col_names = dict_add_col_name(table->v_col_names,
+ i, name, heap);
+ }
+
+ v_col = &table->v_cols[i];
+
+ dict_mem_fill_column_struct(&v_col->m_col, pos, mtype, prtype, len);
+ v_col->v_pos = i & dict_index_t::MAX_N_FIELDS;
+
+ if (num_base != 0) {
+ v_col->base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
+ table->heap, num_base * sizeof(
+ *v_col->base_col)));
+ } else {
+ v_col->base_col = NULL;
+ }
+
+ v_col->num_base = static_cast<unsigned>(num_base)
+ & dict_index_t::MAX_N_FIELDS;
+
+ /* Initialize the index list for virtual columns */
+ ut_ad(v_col->v_indexes.empty());
+
+ return(v_col);
+}
+
+/** Adds a stored column definition to a table.
+@param[in] table table
+@param[in] num_base number of base columns. */
+void
+dict_mem_table_add_s_col(
+ dict_table_t* table,
+ ulint num_base)
+{
+ unsigned i = unsigned(table->n_def) - 1;
+ dict_col_t* col = dict_table_get_nth_col(table, i);
+ dict_s_col_t s_col;
+
+ ut_ad(col != NULL);
+
+ if (table->s_cols == NULL) {
+ table->s_cols = UT_NEW_NOKEY(dict_s_col_list());
+ }
+
+ s_col.m_col = col;
+ s_col.s_pos = i + table->n_v_def;
+
+ if (num_base != 0) {
+ s_col.base_col = static_cast<dict_col_t**>(mem_heap_zalloc(
+ table->heap, num_base * sizeof(dict_col_t*)));
+ } else {
+ s_col.base_col = NULL;
+ }
+
+ s_col.num_base = num_base;
+ table->s_cols->push_front(s_col);
+}
+
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+static MY_ATTRIBUTE((nonnull))
+void
+dict_mem_table_col_rename_low(
+/*==========================*/
+ dict_table_t* table, /*!< in/out: table */
+ unsigned i, /*!< in: column offset corresponding to s */
+ const char* to, /*!< in: new column name */
+ const char* s, /*!< in: pointer to table->col_names */
+ bool is_virtual)
+ /*!< in: if this is a virtual column */
+{
+ char* t_col_names = const_cast<char*>(
+ is_virtual ? table->v_col_names : table->col_names);
+ ulint n_col = is_virtual ? table->n_v_def : table->n_def;
+
+ size_t from_len = strlen(s), to_len = strlen(to);
+
+ ut_ad(i < table->n_def || is_virtual);
+ ut_ad(i < table->n_v_def || !is_virtual);
+
+ ut_ad(from_len <= NAME_LEN);
+ ut_ad(to_len <= NAME_LEN);
+
+ char from[NAME_LEN + 1];
+ strncpy(from, s, sizeof from - 1);
+ from[sizeof from - 1] = '\0';
+
+ if (from_len == to_len) {
+ /* The easy case: simply replace the column name in
+ table->col_names. */
+ strcpy(const_cast<char*>(s), to);
+ } else {
+ /* We need to adjust all affected index->field
+ pointers, as in dict_index_add_col(). First, copy
+ table->col_names. */
+ ulint prefix_len = ulint(s - t_col_names);
+
+ for (; i < n_col; i++) {
+ s += strlen(s) + 1;
+ }
+
+ ulint full_len = ulint(s - t_col_names);
+ char* col_names;
+
+ if (to_len > from_len) {
+ col_names = static_cast<char*>(
+ mem_heap_alloc(
+ table->heap,
+ full_len + to_len - from_len));
+
+ memcpy(col_names, t_col_names, prefix_len);
+ } else {
+ col_names = const_cast<char*>(t_col_names);
+ }
+
+ memcpy(col_names + prefix_len, to, to_len);
+ memmove(col_names + prefix_len + to_len,
+ t_col_names + (prefix_len + from_len),
+ full_len - (prefix_len + from_len));
+
+ /* Replace the field names in every index. */
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ ulint n_fields = dict_index_get_n_fields(index);
+
+ for (ulint i = 0; i < n_fields; i++) {
+ dict_field_t* field
+ = dict_index_get_nth_field(
+ index, i);
+
+ ut_ad(!field->name
+ == field->col->is_dropped());
+ if (!field->name) {
+ /* dropped columns lack a name */
+ ut_ad(index->is_instant());
+ continue;
+ }
+
+ /* if is_virtual and that in field->col does
+ not match, continue */
+ if ((!is_virtual) !=
+ (!field->col->is_virtual())) {
+ continue;
+ }
+
+ ulint name_ofs
+ = ulint(field->name - t_col_names);
+ if (name_ofs <= prefix_len) {
+ field->name = col_names + name_ofs;
+ } else {
+ ut_a(name_ofs < full_len);
+ field->name = col_names
+ + name_ofs + to_len - from_len;
+ }
+ }
+ }
+
+ if (is_virtual) {
+ table->v_col_names = col_names;
+ } else {
+ table->col_names = col_names;
+ }
+ }
+
+ /* Virtual columns are not allowed for foreign key */
+ if (is_virtual) {
+ return;
+ }
+
+ dict_foreign_t* foreign;
+
+ /* Replace the field names in every foreign key constraint. */
+ for (dict_foreign_set::iterator it = table->foreign_set.begin();
+ it != table->foreign_set.end();
+ ++it) {
+
+ foreign = *it;
+
+ if (foreign->foreign_index == NULL) {
+ /* We may go here when we set foreign_key_checks to 0,
+ and then try to rename a column and modify the
+ corresponding foreign key constraint. The index
+ would have been dropped, we have to find an equivalent
+ one */
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+ if (strcmp(foreign->foreign_col_names[f], from)
+ == 0) {
+
+ char** rc = const_cast<char**>(
+ foreign->foreign_col_names
+ + f);
+
+ if (to_len <= strlen(*rc)) {
+ memcpy(*rc, to, to_len + 1);
+ } else {
+ *rc = static_cast<char*>(
+ mem_heap_dup(
+ foreign->heap,
+ to,
+ to_len + 1));
+ }
+ }
+ }
+
+ /* New index can be null if InnoDB already dropped
+ the foreign index when FOREIGN_KEY_CHECKS is
+ disabled */
+ foreign->foreign_index = dict_foreign_find_index(
+ foreign->foreign_table, NULL,
+ foreign->foreign_col_names,
+ foreign->n_fields, NULL, true, false,
+ NULL, NULL, NULL);
+
+ } else {
+
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+ /* These can point straight to
+ table->col_names, because the foreign key
+ constraints will be freed at the same time
+ when the table object is freed. */
+ foreign->foreign_col_names[f]
+ = dict_index_get_nth_field(
+ foreign->foreign_index,
+ f)->name;
+ }
+ }
+ }
+
+ for (dict_foreign_set::iterator it = table->referenced_set.begin();
+ it != table->referenced_set.end();
+ ++it) {
+
+ foreign = *it;
+
+ if (!foreign->referenced_index) {
+ /* Referenced index could have been dropped
+ when foreign_key_checks is disabled. In that case,
+ rename the corresponding referenced_col_names and
+ find the equivalent referenced index also */
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+
+ const char*& rc =
+ foreign->referenced_col_names[f];
+ if (strcmp(rc, from)) {
+ continue;
+ }
+
+ if (to_len <= strlen(rc)) {
+ memcpy(const_cast<char*>(rc), to,
+ to_len + 1);
+ } else {
+ rc = static_cast<char*>(
+ mem_heap_dup(
+ foreign->heap,
+ to, to_len + 1));
+ }
+ }
+
+ /* New index can be null if InnoDB already dropped
+ the referenced index when FOREIGN_KEY_CHECKS is
+ disabled */
+ foreign->referenced_index = dict_foreign_find_index(
+ foreign->referenced_table, NULL,
+ foreign->referenced_col_names,
+ foreign->n_fields, NULL, true, false,
+ NULL, NULL, NULL);
+ return;
+ }
+
+
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+ /* foreign->referenced_col_names[] need to be
+ copies, because the constraint may become
+ orphan when foreign_key_checks=0 and the
+ parent table is dropped. */
+
+ const char* col_name = dict_index_get_nth_field(
+ foreign->referenced_index, f)->name;
+
+ if (strcmp(foreign->referenced_col_names[f],
+ col_name)) {
+ char** rc = const_cast<char**>(
+ foreign->referenced_col_names + f);
+ size_t col_name_len_1 = strlen(col_name) + 1;
+
+ if (col_name_len_1 <= strlen(*rc) + 1) {
+ memcpy(*rc, col_name, col_name_len_1);
+ } else {
+ *rc = static_cast<char*>(
+ mem_heap_dup(
+ foreign->heap,
+ col_name,
+ col_name_len_1));
+ }
+ }
+ }
+ }
+}
+
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+void
+dict_mem_table_col_rename(
+/*======================*/
+ dict_table_t* table, /*!< in/out: table */
+ ulint nth_col,/*!< in: column index */
+ const char* from, /*!< in: old column name */
+ const char* to, /*!< in: new column name */
+ bool is_virtual)
+ /*!< in: if this is a virtual column */
+{
+ const char* s = is_virtual ? table->v_col_names : table->col_names;
+
+ ut_ad((!is_virtual && nth_col < table->n_def)
+ || (is_virtual && nth_col < table->n_v_def));
+
+ for (ulint i = 0; i < nth_col; i++) {
+ size_t len = strlen(s);
+ ut_ad(len > 0);
+ s += len + 1;
+ }
+
+ ut_ad(!my_strcasecmp(system_charset_info, from, s));
+
+ dict_mem_table_col_rename_low(table, static_cast<unsigned>(nth_col),
+ to, s, is_virtual);
+}
+
+/**********************************************************************//**
+This function populates a dict_col_t memory structure with
+supplied information. */
+void
+dict_mem_fill_column_struct(
+/*========================*/
+ dict_col_t* column, /*!< out: column struct to be
+ filled */
+ ulint col_pos, /*!< in: column position */
+ ulint mtype, /*!< in: main data type */
+ ulint prtype, /*!< in: precise type */
+ ulint col_len) /*!< in: column length */
+{
+ unsigned mbminlen, mbmaxlen;
+
+ column->ind = static_cast<unsigned>(col_pos)
+ & dict_index_t::MAX_N_FIELDS;
+ column->ord_part = 0;
+ column->max_prefix = 0;
+ column->mtype = static_cast<uint8_t>(mtype);
+ column->prtype = static_cast<unsigned>(prtype);
+ column->len = static_cast<uint16_t>(col_len);
+ dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
+ column->mbminlen = mbminlen & 7;
+ column->mbmaxlen = mbmaxlen & 7;
+ column->def_val.data = NULL;
+ column->def_val.len = UNIV_SQL_DEFAULT;
+ ut_ad(!column->is_dropped());
+}
+
+/**********************************************************************//**
+Creates an index memory object.
+@return own: index object */
+dict_index_t*
+dict_mem_index_create(
+/*==================*/
+ dict_table_t* table, /*!< in: table */
+ const char* index_name, /*!< in: index name */
+ ulint type, /*!< in: DICT_UNIQUE,
+ DICT_CLUSTERED, ... ORed */
+ ulint n_fields) /*!< in: number of fields */
+{
+ dict_index_t* index;
+ mem_heap_t* heap;
+
+ ut_ad(!table || table->magic_n == DICT_TABLE_MAGIC_N);
+ ut_ad(index_name);
+
+ heap = mem_heap_create(DICT_HEAP_SIZE);
+
+ index = static_cast<dict_index_t*>(
+ mem_heap_zalloc(heap, sizeof(*index)));
+ index->table = table;
+
+ dict_mem_fill_index_struct(index, heap, index_name, type, n_fields);
+
+ new (&index->zip_pad.mutex) std::mutex();
+
+ if (type & DICT_SPATIAL) {
+ index->rtr_track = new
+ (mem_heap_alloc(heap, sizeof *index->rtr_track))
+ rtr_info_track_t();
+ mutex_create(LATCH_ID_RTR_ACTIVE_MUTEX,
+ &index->rtr_track->rtr_active_mutex);
+ }
+
+ return(index);
+}
+
+/**********************************************************************//**
+Creates and initializes a foreign constraint memory object.
+@return own: foreign constraint struct */
+dict_foreign_t*
+dict_mem_foreign_create(void)
+/*=========================*/
+{
+ dict_foreign_t* foreign;
+ mem_heap_t* heap;
+ DBUG_ENTER("dict_mem_foreign_create");
+
+ heap = mem_heap_create(100);
+
+ foreign = static_cast<dict_foreign_t*>(
+ mem_heap_zalloc(heap, sizeof(dict_foreign_t)));
+
+ foreign->heap = heap;
+
+ foreign->v_cols = NULL;
+
+ DBUG_PRINT("dict_mem_foreign_create", ("heap: %p", heap));
+
+ DBUG_RETURN(foreign);
+}
+
+/**********************************************************************//**
+Sets the foreign_table_name_lookup pointer based on the value of
+lower_case_table_names. If that is 0 or 1, foreign_table_name_lookup
+will point to foreign_table_name. If 2, then another string is
+allocated from foreign->heap and set to lower case. */
+void
+dict_mem_foreign_table_name_lookup_set(
+/*===================================*/
+ dict_foreign_t* foreign, /*!< in/out: foreign struct */
+ ibool do_alloc) /*!< in: is an alloc needed */
+{
+ if (innobase_get_lower_case_table_names() == 2) {
+ if (do_alloc) {
+ ulint len;
+
+ len = strlen(foreign->foreign_table_name) + 1;
+
+ foreign->foreign_table_name_lookup =
+ static_cast<char*>(
+ mem_heap_alloc(foreign->heap, len));
+ }
+ strcpy(foreign->foreign_table_name_lookup,
+ foreign->foreign_table_name);
+ innobase_casedn_str(foreign->foreign_table_name_lookup);
+ } else {
+ foreign->foreign_table_name_lookup
+ = foreign->foreign_table_name;
+ }
+}
+
+/**********************************************************************//**
+Sets the referenced_table_name_lookup pointer based on the value of
+lower_case_table_names. If that is 0 or 1, referenced_table_name_lookup
+will point to referenced_table_name. If 2, then another string is
+allocated from foreign->heap and set to lower case. */
+void
+dict_mem_referenced_table_name_lookup_set(
+/*======================================*/
+ dict_foreign_t* foreign, /*!< in/out: foreign struct */
+ ibool do_alloc) /*!< in: is an alloc needed */
+{
+ if (innobase_get_lower_case_table_names() == 2) {
+ if (do_alloc) {
+ ulint len;
+
+ len = strlen(foreign->referenced_table_name) + 1;
+
+ foreign->referenced_table_name_lookup =
+ static_cast<char*>(
+ mem_heap_alloc(foreign->heap, len));
+ }
+ strcpy(foreign->referenced_table_name_lookup,
+ foreign->referenced_table_name);
+ innobase_casedn_str(foreign->referenced_table_name_lookup);
+ } else {
+ foreign->referenced_table_name_lookup
+ = foreign->referenced_table_name;
+ }
+}
+
+/** Fill the virtual column set with virtual column information
+present in the given virtual index.
+@param[in] index virtual index
+@param[out] v_cols virtual column set. */
+static
+void
+dict_mem_fill_vcol_has_index(
+ const dict_index_t* index,
+ dict_vcol_set** v_cols)
+{
+ for (ulint i = 0; i < index->table->n_v_cols; i++) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(
+ index->table, i);
+ if (!v_col->m_col.ord_part) {
+ continue;
+ }
+
+ for (const auto& v_idx : v_col->v_indexes) {
+ if (v_idx.index != index) {
+ continue;
+ }
+
+ if (*v_cols == NULL) {
+ *v_cols = UT_NEW_NOKEY(dict_vcol_set());
+ }
+
+ (*v_cols)->insert(v_col);
+ }
+ }
+}
+
+/** Fill the virtual column set with the virtual column of the index
+if the index contains given column name.
+@param[in] col_name column name
+@param[in] table innodb table object
+@param[out] v_cols set of virtual column information. */
+static
+void
+dict_mem_fill_vcol_from_v_indexes(
+ const char* col_name,
+ const dict_table_t* table,
+ dict_vcol_set** v_cols)
+{
+ /* virtual column can't be Primary Key, so start with
+ secondary index */
+ for (dict_index_t* index = dict_table_get_next_index(
+ dict_table_get_first_index(table));
+ index;
+ index = dict_table_get_next_index(index)) {
+
+ /* Skip if the index have newly added
+ virtual column because field name is NULL.
+ Later virtual column set will be
+ refreshed during loading of table. */
+ if (!dict_index_has_virtual(index)
+ || index->has_new_v_col()) {
+ continue;
+ }
+
+ for (ulint i = 0; i < index->n_fields; i++) {
+ dict_field_t* field =
+ dict_index_get_nth_field(index, i);
+
+ if (strcmp(field->name, col_name) == 0) {
+ dict_mem_fill_vcol_has_index(
+ index, v_cols);
+ }
+ }
+ }
+}
+
+/** Fill the virtual column set with virtual columns which have base columns
+as the given col_name
+@param[in] col_name column name
+@param[in] table table object
+@param[out] v_cols set of virtual columns. */
+static
+void
+dict_mem_fill_vcol_set_for_base_col(
+ const char* col_name,
+ const dict_table_t* table,
+ dict_vcol_set** v_cols)
+{
+ for (ulint i = 0; i < table->n_v_cols; i++) {
+ dict_v_col_t* v_col = dict_table_get_nth_v_col(table, i);
+
+ if (!v_col->m_col.ord_part) {
+ continue;
+ }
+
+ for (ulint j = 0; j < unsigned{v_col->num_base}; j++) {
+ if (strcmp(col_name, dict_table_get_col_name(
+ table,
+ v_col->base_col[j]->ind)) == 0) {
+
+ if (*v_cols == NULL) {
+ *v_cols = UT_NEW_NOKEY(dict_vcol_set());
+ }
+
+ (*v_cols)->insert(v_col);
+ }
+ }
+ }
+}
+
+/** Fills the dependent virtual columns in a set.
+Reason for being dependent are
+1) FK can be present on base column of virtual columns
+2) FK can be present on column which is a part of virtual index
+@param[in,out] foreign foreign key information. */
+void
+dict_mem_foreign_fill_vcol_set(
+ dict_foreign_t* foreign)
+{
+ ulint type = foreign->type;
+
+ if (type == 0) {
+ return;
+ }
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ /** FK can be present on base columns
+ of virtual columns. */
+ dict_mem_fill_vcol_set_for_base_col(
+ foreign->foreign_col_names[i],
+ foreign->foreign_table,
+ &foreign->v_cols);
+
+ /** FK can be present on the columns
+ which can be a part of virtual index. */
+ dict_mem_fill_vcol_from_v_indexes(
+ foreign->foreign_col_names[i],
+ foreign->foreign_table,
+ &foreign->v_cols);
+ }
+}
+
+/** Fill virtual columns set in each fk constraint present in the table.
+@param[in,out] table innodb table object. */
+void
+dict_mem_table_fill_foreign_vcol_set(
+ dict_table_t* table)
+{
+ dict_foreign_set fk_set = table->foreign_set;
+ dict_foreign_t* foreign;
+
+ dict_foreign_set::iterator it;
+ for (it = fk_set.begin(); it != fk_set.end(); ++it) {
+ foreign = *it;
+
+ dict_mem_foreign_fill_vcol_set(foreign);
+ }
+}
+
+/** Free the vcol_set from all foreign key constraint on the table.
+@param[in,out] table innodb table object. */
+void
+dict_mem_table_free_foreign_vcol_set(
+ dict_table_t* table)
+{
+ dict_foreign_set fk_set = table->foreign_set;
+ dict_foreign_t* foreign;
+
+ dict_foreign_set::iterator it;
+ for (it = fk_set.begin(); it != fk_set.end(); ++it) {
+
+ foreign = *it;
+
+ if (foreign->v_cols != NULL) {
+ UT_DELETE(foreign->v_cols);
+ foreign->v_cols = NULL;
+ }
+ }
+}
+
+/**********************************************************************//**
+Adds a field definition to an index. NOTE: does not take a copy
+of the column name if the field is a column. The memory occupied
+by the column name may be released only after publishing the index. */
+void
+dict_mem_index_add_field(
+/*=====================*/
+ dict_index_t* index, /*!< in: index */
+ const char* name, /*!< in: column name */
+ ulint prefix_len) /*!< in: 0 or the column prefix length
+ in a MySQL index like
+ INDEX (textcol(25)) */
+{
+ dict_field_t* field;
+
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ index->n_def++;
+
+ field = dict_index_get_nth_field(index, unsigned(index->n_def) - 1);
+
+ field->name = name;
+ field->prefix_len = prefix_len & ((1U << 12) - 1);
+}
+
+/**********************************************************************//**
+Frees an index memory object. */
+void
+dict_mem_index_free(
+/*================*/
+ dict_index_t* index) /*!< in: index */
+{
+ ut_ad(index);
+ ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+ index->zip_pad.mutex.~mutex();
+
+ if (dict_index_is_spatial(index)) {
+ for (auto& rtr_info : index->rtr_track->rtr_active) {
+ rtr_info->index = NULL;
+ }
+
+ mutex_destroy(&index->rtr_track->rtr_active_mutex);
+ index->rtr_track->~rtr_info_track_t();
+ }
+
+ index->detach_columns();
+ mem_heap_free(index->heap);
+}
+
+/** Create a temporary tablename like "#sql-ibNNN".
+@param[in] heap A memory heap
+@param[in] dbtab Table name in the form database/table name
+@param[in] id Table id
+@return A unique temporary tablename suitable for InnoDB use */
+char*
+dict_mem_create_temporary_tablename(
+ mem_heap_t* heap,
+ const char* dbtab,
+ table_id_t id)
+{
+ size_t size;
+ char* name;
+ const char* dbend = strchr(dbtab, '/');
+ ut_ad(dbend);
+ size_t dblen = size_t(dbend - dbtab) + 1;
+
+ size = dblen + (sizeof(TEMP_FILE_PREFIX) + 3 + 20);
+ name = static_cast<char*>(mem_heap_alloc(heap, size));
+ memcpy(name, dbtab, dblen);
+ snprintf(name + dblen, size - dblen,
+ TEMP_FILE_PREFIX_INNODB UINT64PF, id);
+
+ return(name);
+}
+
+/** Validate the search order in the foreign key set.
+@param[in] fk_set the foreign key set to be validated
+@return true if search order is fine in the set, false otherwise. */
+bool
+dict_foreign_set_validate(
+ const dict_foreign_set& fk_set)
+{
+ dict_foreign_not_exists not_exists(fk_set);
+
+ dict_foreign_set::const_iterator it = std::find_if(
+ fk_set.begin(), fk_set.end(), not_exists);
+
+ if (it == fk_set.end()) {
+ return(true);
+ }
+
+ dict_foreign_t* foreign = *it;
+ std::cerr << "Foreign key lookup failed: " << *foreign;
+ std::cerr << fk_set;
+ ut_ad(0);
+ return(false);
+}
+
+/** Validate the search order in the foreign key sets of the table
+(foreign_set and referenced_set).
+@param[in] table table whose foreign key sets are to be validated
+@return true if foreign key sets are fine, false otherwise. */
+bool
+dict_foreign_set_validate(
+ const dict_table_t& table)
+{
+ return(dict_foreign_set_validate(table.foreign_set)
+ && dict_foreign_set_validate(table.referenced_set));
+}
+
+std::ostream&
+operator<< (std::ostream& out, const dict_foreign_t& foreign)
+{
+ out << "[dict_foreign_t: id='" << foreign.id << "'";
+
+ if (foreign.foreign_table_name != NULL) {
+ out << ",for: '" << foreign.foreign_table_name << "'";
+ }
+
+ out << "]";
+ return(out);
+}
+
+std::ostream&
+operator<< (std::ostream& out, const dict_foreign_set& fk_set)
+{
+ out << "[dict_foreign_set:";
+ std::for_each(fk_set.begin(), fk_set.end(), dict_foreign_print(out));
+ out << "]" << std::endl;
+ return(out);
+}
+
+/** Check whether fulltext index gets affected by foreign
+key constraint. */
+bool dict_foreign_t::affects_fulltext() const
+{
+ if (foreign_table == referenced_table || !foreign_table->fts)
+ return false;
+
+ for (ulint i= 0; i < n_fields; i++)
+ {
+ const dict_col_t *col= dict_index_get_nth_col(foreign_index, i);
+ if (dict_table_is_fts_column(foreign_table->fts->indexes, col->ind,
+ col->is_virtual()) != ULINT_UNDEFINED)
+ return true;
+ }
+
+ return false;
+}
+
+/** Reconstruct the clustered index fields. */
+inline void dict_index_t::reconstruct_fields()
+{
+ DBUG_ASSERT(is_primary());
+
+ n_fields = (n_fields + table->instant->n_dropped)
+ & dict_index_t::MAX_N_FIELDS;
+ n_def = (n_def + table->instant->n_dropped)
+ & dict_index_t::MAX_N_FIELDS;
+
+ const unsigned n_first = first_user_field();
+
+ dict_field_t* tfields = static_cast<dict_field_t*>(
+ mem_heap_zalloc(heap, n_fields * sizeof *fields));
+
+ memcpy(tfields, fields, n_first * sizeof *fields);
+
+ n_nullable = 0;
+ ulint n_core_null = 0;
+ const bool comp = dict_table_is_comp(table);
+ const auto* field_map_it = table->instant->field_map;
+ for (unsigned i = n_first, j = 0; i < n_fields; ) {
+ dict_field_t& f = tfields[i++];
+ auto c = *field_map_it++;
+ if (c.is_dropped()) {
+ f.col = &table->instant->dropped[j++];
+ DBUG_ASSERT(f.col->is_dropped());
+ f.fixed_len = dict_col_get_fixed_size(f.col, comp)
+ & ((1U << 10) - 1);
+ } else {
+ DBUG_ASSERT(!c.is_not_null());
+ const auto old = std::find_if(
+ fields + n_first, fields + n_fields,
+ [c](const dict_field_t& o)
+ { return o.col->ind == c.ind(); });
+ ut_ad(old >= &fields[n_first]);
+ ut_ad(old < &fields[n_fields]);
+ DBUG_ASSERT(!old->prefix_len);
+ DBUG_ASSERT(old->col == &table->cols[c.ind()]);
+ f = *old;
+ }
+
+ f.col->clear_instant();
+ if (f.col->is_nullable()) {
+ n_nullable++;
+ n_core_null += i <= n_core_fields;
+ }
+ }
+
+ fields = tfields;
+ n_core_null_bytes = static_cast<byte>(UT_BITS_IN_BYTES(n_core_null));
+}
+
+/** Reconstruct dropped or reordered columns.
+@param[in] metadata data from serialise_columns()
+@param[in] len length of the metadata, in bytes
+@return whether parsing the metadata failed */
+bool dict_table_t::deserialise_columns(const byte* metadata, ulint len)
+{
+ DBUG_ASSERT(!instant);
+
+ unsigned num_non_pk_fields = mach_read_from_4(metadata);
+ metadata += 4;
+
+ if (num_non_pk_fields >= REC_MAX_N_FIELDS - 3) {
+ return true;
+ }
+
+ dict_index_t* index = UT_LIST_GET_FIRST(indexes);
+
+ if (num_non_pk_fields < unsigned(index->n_fields)
+ - index->first_user_field()) {
+ return true;
+ }
+
+ field_map_element_t* field_map = static_cast<field_map_element_t*>(
+ mem_heap_alloc(heap,
+ num_non_pk_fields * sizeof *field_map));
+
+ unsigned n_dropped_cols = 0;
+
+ for (unsigned i = 0; i < num_non_pk_fields; i++) {
+ auto c = field_map[i] = mach_read_from_2(metadata);
+ metadata += 2;
+
+ if (field_map[i].is_dropped()) {
+ if (c.ind() > DICT_MAX_FIXED_COL_LEN + 1) {
+ return true;
+ }
+ n_dropped_cols++;
+ } else if (c >= n_cols) {
+ return true;
+ }
+ }
+
+ dict_col_t* dropped_cols = static_cast<dict_col_t*>(mem_heap_zalloc(
+ heap, n_dropped_cols * sizeof(dict_col_t)));
+ instant = new (mem_heap_alloc(heap, sizeof *instant)) dict_instant_t();
+ instant->n_dropped = n_dropped_cols;
+ instant->dropped = dropped_cols;
+ instant->field_map = field_map;
+
+ dict_col_t* col = dropped_cols;
+ for (unsigned i = 0; i < num_non_pk_fields; i++) {
+ if (field_map[i].is_dropped()) {
+ auto fixed_len = field_map[i].ind();
+ DBUG_ASSERT(fixed_len <= DICT_MAX_FIXED_COL_LEN + 1);
+ (col++)->set_dropped(field_map[i].is_not_null(),
+ fixed_len == 1,
+ fixed_len > 1 ? fixed_len - 1
+ : 0);
+ }
+ }
+ DBUG_ASSERT(col == &dropped_cols[n_dropped_cols]);
+
+ UT_LIST_GET_FIRST(indexes)->reconstruct_fields();
+ return false;
+}
+
+/** Check if record in clustered index is historical row.
+@param[in] rec clustered row
+@param[in] offsets offsets
+@return true if row is historical */
+bool
+dict_index_t::vers_history_row(
+ const rec_t* rec,
+ const rec_offs* offsets)
+{
+ ut_ad(is_primary());
+
+ ulint len;
+ dict_col_t& col= table->cols[table->vers_end];
+ ut_ad(col.vers_sys_end());
+ ulint nfield = dict_col_get_clust_pos(&col, this);
+ const byte *data = rec_get_nth_field(rec, offsets, nfield, &len);
+ if (col.vers_native()) {
+ ut_ad(len == sizeof trx_id_max_bytes);
+ return 0 != memcmp(data, trx_id_max_bytes, len);
+ }
+ ut_ad(len == sizeof timestamp_max_bytes);
+ return 0 != memcmp(data, timestamp_max_bytes, len);
+}
+
+/** Check if record in secondary index is historical row.
+@param[in] rec record in a secondary index
+@param[out] history_row true if row is historical
+@return true on error */
+bool
+dict_index_t::vers_history_row(
+ const rec_t* rec,
+ bool &history_row)
+{
+ ut_ad(!is_primary());
+
+ bool error = false;
+ mem_heap_t* heap = NULL;
+ dict_index_t* clust_index = NULL;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ mtr_t mtr;
+ mtr.start();
+
+ rec_t* clust_rec =
+ row_get_clust_rec(BTR_SEARCH_LEAF, rec, this, &clust_index, &mtr);
+ if (clust_rec) {
+ offsets = rec_get_offsets(clust_rec, clust_index, offsets,
+ clust_index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+
+ history_row = clust_index->vers_history_row(clust_rec, offsets);
+ } else {
+ ib::error() << "foreign constraints: secondary index is out of "
+ "sync";
+ ut_ad("secondary index is out of sync" == 0);
+ error = true;
+ }
+ mtr.commit();
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ return(error);
+}
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
new file mode 100644
index 00000000..42f75252
--- /dev/null
+++ b/storage/innobase/dict/dict0stats.cc
@@ -0,0 +1,4306 @@
+/*****************************************************************************
+
+Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0stats.cc
+Code used for calculating and manipulating table statistics.
+
+Created Jan 06, 2010 Vasil Dimov
+*******************************************************/
+
+#include "dict0stats.h"
+#include "ut0ut.h"
+#include "ut0rnd.h"
+#include "dyn0buf.h"
+#include "row0sel.h"
+#include "trx0trx.h"
+#include "pars0pars.h"
+#include <mysql_com.h>
+#include "btr0btr.h"
+#include "sync0sync.h"
+
+#include <algorithm>
+#include <map>
+#include <vector>
+
+/* Sampling algorithm description @{
+
+The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
+let it be A, which is the number of leaf pages to analyze for a given index
+for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
+analyzed).
+
+Let the total number of leaf pages in the table be T.
+Level 0 - leaf pages, level H - root.
+
+Definition: N-prefix-boring record is a record on a non-leaf page that equals
+the next (to the right, cross page boundaries, skipping the supremum and
+infimum) record on the same level when looking at the fist n-prefix columns.
+The last (user) record on a level is not boring (it does not match the
+non-existent user record to the right). We call the records boring because all
+the records on the page below a boring record are equal to that boring record.
+
+We avoid diving below boring records when searching for a leaf page to
+estimate the number of distinct records because we know that such a leaf
+page will have number of distinct records == 1.
+
+For each n-prefix: start from the root level and full scan subsequent lower
+levels until a level that contains at least A*10 distinct records is found.
+Lets call this level LA.
+As an optimization the search is canceled if it has reached level 1 (never
+descend to the level 0 (leaf)) and also if the next level to be scanned
+would contain more than A pages. The latter is because the user has asked
+to analyze A leaf pages and it does not make sense to scan much more than
+A non-leaf pages with the sole purpose of finding a good sample of A leaf
+pages.
+
+After finding the appropriate level LA with >A*10 distinct records (or less in
+the exceptions described above), divide it into groups of equal records and
+pick A such groups. Then pick the last record from each group. For example,
+let the level be:
+
+index: 0,1,2,3,4,5,6,7,8,9,10
+record: 1,1,1,2,2,7,7,7,7,7,9
+
+There are 4 groups of distinct records and if A=2 random ones are selected,
+e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
+
+After selecting A records as described above, dive below them to find A leaf
+pages and analyze them, finding the total number of distinct records. The
+dive to the leaf level is performed by selecting a non-boring record from
+each page and diving below it.
+
+This way, a total of A leaf pages are analyzed for the given n-prefix.
+
+Let the number of different key values found in each leaf page i be Pi (i=1..A).
+Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
+Let the number of different key values on level LA be N_DIFF_LA.
+Let the total number of records on level LA be TOTAL_LA.
+Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
+leaf level.
+Let the number of leaf pages be N.
+Then the total number of different key values on the leaf level is:
+N * R * N_DIFF_AVG_LEAF.
+See REF01 for the implementation.
+
+The above describes how to calculate the cardinality of an index.
+This algorithm is executed for each n-prefix of a multi-column index
+where n=1..n_uniq.
+@} */
+
+/* names of the tables from the persistent statistics storage */
+#define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats"
+#define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats"
+
+#ifdef UNIV_STATS_DEBUG
+#define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
+#else /* UNIV_STATS_DEBUG */
+#define DEBUG_PRINTF(fmt, ...) /* noop */
+#endif /* UNIV_STATS_DEBUG */
+
+/* Gets the number of leaf pages to sample in persistent stats estimation */
+#define N_SAMPLE_PAGES(index) \
+ static_cast<ib_uint64_t>( \
+ (index)->table->stats_sample_pages != 0 \
+ ? (index)->table->stats_sample_pages \
+ : srv_stats_persistent_sample_pages)
+
+/* number of distinct records on a given level that are required to stop
+descending to lower levels and fetch N_SAMPLE_PAGES(index) records
+from that level */
+#define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10)
+
+/* A dynamic array where we store the boundaries of each distinct group
+of keys. For example if a btree level is:
+index: 0,1,2,3,4,5,6,7,8,9,10,11,12
+data: b,b,b,b,b,b,g,g,j,j,j, x, y
+then we would store 5,7,10,11,12 in the array. */
+typedef std::vector<ib_uint64_t, ut_allocator<ib_uint64_t> > boundaries_t;
+
+/** Allocator type used for index_map_t. */
+typedef ut_allocator<std::pair<const char* const, dict_index_t*> >
+ index_map_t_allocator;
+
+/** Auxiliary map used for sorting indexes by name in dict_stats_save(). */
+typedef std::map<const char*, dict_index_t*, ut_strcmp_functor,
+ index_map_t_allocator> index_map_t;
+
+/*********************************************************************//**
+Checks whether an index should be ignored in stats manipulations:
+* stats fetch
+* stats recalc
+* stats save
+@return true if exists and all tables are ok */
+UNIV_INLINE
+bool
+dict_stats_should_ignore_index(
+/*===========================*/
+ const dict_index_t* index) /*!< in: index */
+{
+ return((index->type & (DICT_FTS | DICT_SPATIAL))
+ || index->is_corrupted()
+ || index->to_be_dropped
+ || !index->is_committed());
+}
+
+/*********************************************************************//**
+Checks whether the persistent statistics storage exists and that all
+tables have the proper structure.
+@return true if exists and all tables are ok */
+static
+bool
+dict_stats_persistent_storage_check(
+/*================================*/
+ bool caller_has_dict_sys_mutex) /*!< in: true if the caller
+ owns dict_sys.mutex */
+{
+ /* definition for the table TABLE_STATS_NAME */
+ dict_col_meta_t table_stats_columns[] = {
+ {"database_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 192},
+
+ {"table_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 597},
+
+ {"last_update", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 4},
+
+ {"n_rows", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 8},
+
+ {"clustered_index_size", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 8},
+
+ {"sum_of_other_index_sizes", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 8}
+ };
+ dict_table_schema_t table_stats_schema = {
+ TABLE_STATS_NAME,
+ UT_ARR_SIZE(table_stats_columns),
+ table_stats_columns,
+ 0 /* n_foreign */,
+ 0 /* n_referenced */
+ };
+
+ /* definition for the table INDEX_STATS_NAME */
+ dict_col_meta_t index_stats_columns[] = {
+ {"database_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 192},
+
+ {"table_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 597},
+
+ {"index_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 192},
+
+ {"last_update", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 4},
+
+ {"stat_name", DATA_VARMYSQL,
+ DATA_NOT_NULL, 64*3},
+
+ {"stat_value", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED, 8},
+
+ {"sample_size", DATA_INT,
+ DATA_UNSIGNED, 8},
+
+ {"stat_description", DATA_VARMYSQL,
+ DATA_NOT_NULL, 1024*3}
+ };
+ dict_table_schema_t index_stats_schema = {
+ INDEX_STATS_NAME,
+ UT_ARR_SIZE(index_stats_columns),
+ index_stats_columns,
+ 0 /* n_foreign */,
+ 0 /* n_referenced */
+ };
+
+ char errstr[512];
+ dberr_t ret;
+
+ if (!caller_has_dict_sys_mutex) {
+ mutex_enter(&dict_sys.mutex);
+ }
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ /* first check table_stats */
+ ret = dict_table_schema_check(&table_stats_schema, errstr,
+ sizeof(errstr));
+ if (ret == DB_SUCCESS) {
+ /* if it is ok, then check index_stats */
+ ret = dict_table_schema_check(&index_stats_schema, errstr,
+ sizeof(errstr));
+ }
+
+ if (!caller_has_dict_sys_mutex) {
+ mutex_exit(&dict_sys.mutex);
+ }
+
+ if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) {
+ ib::error() << errstr;
+ return(false);
+ } else if (ret == DB_STATS_DO_NOT_EXIST) {
+ return false;
+ }
+ /* else */
+
+ return(true);
+}
+
+/** Executes a given SQL statement using the InnoDB internal SQL parser.
+This function will free the pinfo object.
+@param[in,out] pinfo pinfo to pass to que_eval_sql() must already
+have any literals bound to it
+@param[in] sql SQL string to execute
+@param[in,out] trx in case of NULL the function will allocate and
+free the trx object. If it is not NULL then it will be rolled back
+only in the case of error, but not freed.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+dict_stats_exec_sql(
+ pars_info_t* pinfo,
+ const char* sql,
+ trx_t* trx)
+{
+ dberr_t err;
+ bool trx_started = false;
+
+ ut_d(dict_sys.assert_locked());
+
+ if (!dict_stats_persistent_storage_check(true)) {
+ pars_info_free(pinfo);
+ return(DB_STATS_DO_NOT_EXIST);
+ }
+
+ if (trx == NULL) {
+ trx = trx_create();
+ trx_started = true;
+
+ if (srv_read_only_mode) {
+ trx_start_internal_read_only(trx);
+ } else {
+ trx_start_internal(trx);
+ }
+ }
+
+ err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
+
+ DBUG_EXECUTE_IF("stats_index_error",
+ if (!trx_started) {
+ err = DB_STATS_DO_NOT_EXIST;
+ trx->error_state = DB_STATS_DO_NOT_EXIST;
+ });
+
+ if (!trx_started && err == DB_SUCCESS) {
+ return(DB_SUCCESS);
+ }
+
+ if (err == DB_SUCCESS) {
+ trx_commit_for_mysql(trx);
+ } else {
+ trx->op_info = "rollback of internal trx on stats tables";
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+ trx->rollback();
+ trx->dict_operation_lock_mode = 0;
+ trx->op_info = "";
+ ut_a(trx->error_state == DB_SUCCESS);
+ }
+
+ if (trx_started) {
+ trx->free();
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Duplicate a table object and its indexes.
+This function creates a dummy dict_table_t object and initializes the
+following table and index members:
+dict_table_t::id (copied)
+dict_table_t::heap (newly created)
+dict_table_t::name (copied)
+dict_table_t::corrupted (copied)
+dict_table_t::indexes<> (newly created)
+dict_table_t::magic_n
+for each entry in dict_table_t::indexes, the following are initialized:
+(indexes that have DICT_FTS set in index->type are skipped)
+dict_index_t::id (copied)
+dict_index_t::name (copied)
+dict_index_t::table_name (points to the copied table name)
+dict_index_t::table (points to the above semi-initialized object)
+dict_index_t::type (copied)
+dict_index_t::to_be_dropped (copied)
+dict_index_t::online_status (copied)
+dict_index_t::n_uniq (copied)
+dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
+dict_index_t::indexes<> (newly created)
+dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
+dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
+dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
+dict_index_t::magic_n
+The returned object should be freed with dict_stats_table_clone_free()
+when no longer needed.
+@return incomplete table object */
+static
+dict_table_t*
+dict_stats_table_clone_create(
+/*==========================*/
+ const dict_table_t* table) /*!< in: table whose stats to copy */
+{
+ size_t heap_size;
+ dict_index_t* index;
+
+ /* Estimate the size needed for the table and all of its indexes */
+
+ heap_size = 0;
+ heap_size += sizeof(dict_table_t);
+ heap_size += strlen(table->name.m_name) + 1;
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (dict_stats_should_ignore_index(index)) {
+ continue;
+ }
+
+ ut_ad(!dict_index_is_ibuf(index));
+
+ ulint n_uniq = dict_index_get_n_unique(index);
+
+ heap_size += sizeof(dict_index_t);
+ heap_size += strlen(index->name) + 1;
+ heap_size += n_uniq * sizeof(index->fields[0]);
+ for (ulint i = 0; i < n_uniq; i++) {
+ heap_size += strlen(index->fields[i].name) + 1;
+ }
+ heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
+ heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
+ heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
+ }
+
+ /* Allocate the memory and copy the members */
+
+ mem_heap_t* heap;
+
+ heap = mem_heap_create(heap_size);
+
+ dict_table_t* t;
+
+ t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
+
+ MEM_CHECK_DEFINED(&table->id, sizeof(table->id));
+ t->id = table->id;
+
+ t->heap = heap;
+
+ t->name.m_name = mem_heap_strdup(heap, table->name.m_name);
+
+ t->corrupted = table->corrupted;
+
+ UT_LIST_INIT(t->indexes, &dict_index_t::indexes);
+#ifdef BTR_CUR_HASH_ADAPT
+ UT_LIST_INIT(t->freed_indexes, &dict_index_t::indexes);
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (dict_stats_should_ignore_index(index)) {
+ continue;
+ }
+
+ ut_ad(!dict_index_is_ibuf(index));
+
+ dict_index_t* idx;
+
+ idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
+
+ MEM_CHECK_DEFINED(&index->id, sizeof(index->id));
+ idx->id = index->id;
+
+ idx->name = mem_heap_strdup(heap, index->name);
+
+ idx->table = t;
+
+ idx->type = index->type;
+
+ idx->to_be_dropped = 0;
+
+ idx->online_status = ONLINE_INDEX_COMPLETE;
+ idx->set_committed(true);
+
+ idx->n_uniq = index->n_uniq;
+
+ idx->fields = (dict_field_t*) mem_heap_alloc(
+ heap, idx->n_uniq * sizeof(idx->fields[0]));
+
+ for (ulint i = 0; i < idx->n_uniq; i++) {
+ idx->fields[i].name = mem_heap_strdup(
+ heap, index->fields[i].name);
+ }
+
+ /* hook idx into t->indexes */
+ UT_LIST_ADD_LAST(t->indexes, idx);
+
+ idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
+ heap,
+ idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
+
+ idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
+ heap,
+ idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
+
+ idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
+ heap,
+ idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
+ ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
+
+ idx->stat_defrag_n_page_split = 0;
+ idx->stat_defrag_n_pages_freed = 0;
+ }
+
+ ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
+
+ return(t);
+}
+
+/*********************************************************************//**
+Free the resources occupied by an object returned by
+dict_stats_table_clone_create(). */
+static
+void
+dict_stats_table_clone_free(
+/*========================*/
+ dict_table_t* t) /*!< in: dummy table object to free */
+{
+ mem_heap_free(t->heap);
+}
+
+/*********************************************************************//**
+Write all zeros (or 1 where it makes sense) into an index
+statistics members. The resulting stats correspond to an empty index. */
+static
+void
+dict_stats_empty_index(
+/*===================*/
+ dict_index_t* index, /*!< in/out: index */
+ bool empty_defrag_stats)
+ /*!< in: whether to empty defrag stats */
+{
+ ut_ad(!(index->type & DICT_FTS));
+ ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ ulint n_uniq = index->n_uniq;
+
+ for (ulint i = 0; i < n_uniq; i++) {
+ index->stat_n_diff_key_vals[i] = 0;
+ index->stat_n_sample_sizes[i] = 1;
+ index->stat_n_non_null_key_vals[i] = 0;
+ }
+
+ index->stat_index_size = 1;
+ index->stat_n_leaf_pages = 1;
+
+ if (empty_defrag_stats) {
+ dict_stats_empty_defrag_stats(index);
+ dict_stats_empty_defrag_summary(index);
+ }
+}
+
+/*********************************************************************//**
+Write all zeros (or 1 where it makes sense) into a table and its indexes'
+statistics members. The resulting stats correspond to an empty table. */
+static
+void
+dict_stats_empty_table(
+/*===================*/
+ dict_table_t* table, /*!< in/out: table */
+ bool empty_defrag_stats)
+ /*!< in: whether to empty defrag stats */
+{
+ mutex_enter(&dict_sys.mutex);
+
+ /* Zero the stats members */
+ table->stat_n_rows = 0;
+ table->stat_clustered_index_size = 1;
+ /* 1 page for each index, not counting the clustered */
+ table->stat_sum_of_other_index_sizes
+ = UT_LIST_GET_LEN(table->indexes) - 1;
+ table->stat_modified_counter = 0;
+
+ dict_index_t* index;
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (index->type & DICT_FTS) {
+ continue;
+ }
+
+ ut_ad(!dict_index_is_ibuf(index));
+
+ dict_stats_empty_index(index, empty_defrag_stats);
+ }
+
+ table->stat_initialized = TRUE;
+ mutex_exit(&dict_sys.mutex);
+}
+
+/*********************************************************************//**
+Check whether index's stats are initialized (assert if they are not). */
+static
+void
+dict_stats_assert_initialized_index(
+/*================================*/
+ const dict_index_t* index) /*!< in: index */
+{
+ MEM_CHECK_DEFINED(
+ index->stat_n_diff_key_vals,
+ index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
+
+ MEM_CHECK_DEFINED(
+ index->stat_n_sample_sizes,
+ index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
+
+ MEM_CHECK_DEFINED(
+ index->stat_n_non_null_key_vals,
+ index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
+
+ MEM_CHECK_DEFINED(
+ &index->stat_index_size,
+ sizeof(index->stat_index_size));
+
+ MEM_CHECK_DEFINED(
+ &index->stat_n_leaf_pages,
+ sizeof(index->stat_n_leaf_pages));
+}
+
+/*********************************************************************//**
+Check whether table's stats are initialized (assert if they are not). */
+static
+void
+dict_stats_assert_initialized(
+/*==========================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_a(table->stat_initialized);
+
+ MEM_CHECK_DEFINED(&table->stats_last_recalc,
+ sizeof table->stats_last_recalc);
+
+ MEM_CHECK_DEFINED(&table->stat_persistent,
+ sizeof table->stat_persistent);
+
+ MEM_CHECK_DEFINED(&table->stats_auto_recalc,
+ sizeof table->stats_auto_recalc);
+
+ MEM_CHECK_DEFINED(&table->stats_sample_pages,
+ sizeof table->stats_sample_pages);
+
+ MEM_CHECK_DEFINED(&table->stat_n_rows,
+ sizeof table->stat_n_rows);
+
+ MEM_CHECK_DEFINED(&table->stat_clustered_index_size,
+ sizeof table->stat_clustered_index_size);
+
+ MEM_CHECK_DEFINED(&table->stat_sum_of_other_index_sizes,
+ sizeof table->stat_sum_of_other_index_sizes);
+
+ MEM_CHECK_DEFINED(&table->stat_modified_counter,
+ sizeof table->stat_modified_counter);
+
+ MEM_CHECK_DEFINED(&table->stats_bg_flag,
+ sizeof table->stats_bg_flag);
+
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (!dict_stats_should_ignore_index(index)) {
+ dict_stats_assert_initialized_index(index);
+ }
+ }
+}
+
+#define INDEX_EQ(i1, i2) \
+ ((i1) != NULL \
+ && (i2) != NULL \
+ && (i1)->id == (i2)->id \
+ && strcmp((i1)->name, (i2)->name) == 0)
+
+/*********************************************************************//**
+Copy table and index statistics from one table to another, including index
+stats. Extra indexes in src are ignored and extra indexes in dst are
+initialized to correspond to an empty index. */
+static
+void
+dict_stats_copy(
+/*============*/
+ dict_table_t* dst, /*!< in/out: destination table */
+ const dict_table_t* src, /*!< in: source table */
+ bool reset_ignored_indexes) /*!< in: if true, set ignored indexes
+ to have the same statistics as if
+ the table was empty */
+{
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ dst->stats_last_recalc = src->stats_last_recalc;
+ dst->stat_n_rows = src->stat_n_rows;
+ dst->stat_clustered_index_size = src->stat_clustered_index_size;
+ dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
+ dst->stat_modified_counter = src->stat_modified_counter;
+
+ dict_index_t* dst_idx;
+ dict_index_t* src_idx;
+
+ for (dst_idx = dict_table_get_first_index(dst),
+ src_idx = dict_table_get_first_index(src);
+ dst_idx != NULL;
+ dst_idx = dict_table_get_next_index(dst_idx),
+ (src_idx != NULL
+ && (src_idx = dict_table_get_next_index(src_idx)))) {
+
+ if (dict_stats_should_ignore_index(dst_idx)) {
+ if (reset_ignored_indexes) {
+ /* Reset index statistics for all ignored indexes,
+ unless they are FT indexes (these have no statistics)*/
+ if (dst_idx->type & DICT_FTS) {
+ continue;
+ }
+ dict_stats_empty_index(dst_idx, true);
+ } else {
+ continue;
+ }
+ }
+
+ ut_ad(!dict_index_is_ibuf(dst_idx));
+
+ if (!INDEX_EQ(src_idx, dst_idx)) {
+ for (src_idx = dict_table_get_first_index(src);
+ src_idx != NULL;
+ src_idx = dict_table_get_next_index(src_idx)) {
+
+ if (INDEX_EQ(src_idx, dst_idx)) {
+ break;
+ }
+ }
+ }
+
+ if (!INDEX_EQ(src_idx, dst_idx)) {
+ dict_stats_empty_index(dst_idx, true);
+ continue;
+ }
+
+ ulint n_copy_el;
+
+ if (dst_idx->n_uniq > src_idx->n_uniq) {
+ n_copy_el = src_idx->n_uniq;
+ /* Since src is smaller some elements in dst
+ will remain untouched by the following memmove(),
+ thus we init all of them here. */
+ dict_stats_empty_index(dst_idx, true);
+ } else {
+ n_copy_el = dst_idx->n_uniq;
+ }
+
+ memmove(dst_idx->stat_n_diff_key_vals,
+ src_idx->stat_n_diff_key_vals,
+ n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
+
+ memmove(dst_idx->stat_n_sample_sizes,
+ src_idx->stat_n_sample_sizes,
+ n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
+
+ memmove(dst_idx->stat_n_non_null_key_vals,
+ src_idx->stat_n_non_null_key_vals,
+ n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
+
+ dst_idx->stat_index_size = src_idx->stat_index_size;
+
+ dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
+
+ dst_idx->stat_defrag_modified_counter =
+ src_idx->stat_defrag_modified_counter;
+ dst_idx->stat_defrag_n_pages_freed =
+ src_idx->stat_defrag_n_pages_freed;
+ dst_idx->stat_defrag_n_page_split =
+ src_idx->stat_defrag_n_page_split;
+ }
+
+ dst->stat_initialized = TRUE;
+}
+
+/** Duplicate the stats of a table and its indexes.
+This function creates a dummy dict_table_t object and copies the input
+table's stats into it. The returned table object is not in the dictionary
+cache and cannot be accessed by any other threads. In addition to the
+members copied in dict_stats_table_clone_create() this function initializes
+the following:
+dict_table_t::stat_initialized
+dict_table_t::stat_persistent
+dict_table_t::stat_n_rows
+dict_table_t::stat_clustered_index_size
+dict_table_t::stat_sum_of_other_index_sizes
+dict_table_t::stat_modified_counter
+dict_index_t::stat_n_diff_key_vals[]
+dict_index_t::stat_n_sample_sizes[]
+dict_index_t::stat_n_non_null_key_vals[]
+dict_index_t::stat_index_size
+dict_index_t::stat_n_leaf_pages
+dict_index_t::stat_defrag_modified_counter
+dict_index_t::stat_defrag_n_pages_freed
+dict_index_t::stat_defrag_n_page_split
+The returned object should be freed with dict_stats_snapshot_free()
+when no longer needed.
+@param[in] table table whose stats to copy
+@return incomplete table object */
+static
+dict_table_t*
+dict_stats_snapshot_create(
+ dict_table_t* table)
+{
+ mutex_enter(&dict_sys.mutex);
+
+ dict_stats_assert_initialized(table);
+
+ dict_table_t* t;
+
+ t = dict_stats_table_clone_create(table);
+
+ dict_stats_copy(t, table, false);
+
+ t->stat_persistent = table->stat_persistent;
+ t->stats_auto_recalc = table->stats_auto_recalc;
+ t->stats_sample_pages = table->stats_sample_pages;
+ t->stats_bg_flag = table->stats_bg_flag;
+
+ mutex_exit(&dict_sys.mutex);
+
+ return(t);
+}
+
+/*********************************************************************//**
+Free the resources occupied by an object returned by
+dict_stats_snapshot_create(). */
+static
+void
+dict_stats_snapshot_free(
+/*=====================*/
+ dict_table_t* t) /*!< in: dummy table object to free */
+{
+ dict_stats_table_clone_free(t);
+}
+
+/*********************************************************************//**
+Calculates new estimates for index statistics. This function is
+relatively quick and is used to calculate transient statistics that
+are not saved on disk. This was the only way to calculate statistics
+before the Persistent Statistics feature was introduced.
+This function doesn't update the defragmentation related stats.
+Only persistent statistics supports defragmentation stats. */
+static
+void
+dict_stats_update_transient_for_index(
+/*==================================*/
+ dict_index_t* index) /*!< in/out: index */
+{
+ if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+ && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
+ || !dict_index_is_clust(index))) {
+ /* If we have set a high innodb_force_recovery
+ level, do not calculate statistics, as a badly
+ corrupted index can cause a crash in it.
+ Initialize some bogus index cardinality
+ statistics, so that the data can be queried in
+ various means, also via secondary indexes. */
+ mutex_enter(&dict_sys.mutex);
+ dict_stats_empty_index(index, false);
+ mutex_exit(&dict_sys.mutex);
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ } else if (ibuf_debug && !dict_index_is_clust(index)) {
+ mutex_enter(&dict_sys.mutex);
+ dict_stats_empty_index(index, false);
+ mutex_exit(&dict_sys.mutex);
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+ } else {
+ mtr_t mtr;
+ ulint size;
+
+ mtr.start();
+ mtr_s_lock_index(index, &mtr);
+ size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
+
+ if (size != ULINT_UNDEFINED) {
+ index->stat_index_size = size;
+
+ size = btr_get_size(
+ index, BTR_N_LEAF_PAGES, &mtr);
+ }
+
+ mtr.commit();
+
+ switch (size) {
+ case ULINT_UNDEFINED:
+ mutex_enter(&dict_sys.mutex);
+ dict_stats_empty_index(index, false);
+ mutex_exit(&dict_sys.mutex);
+ return;
+ case 0:
+ /* The root node of the tree is a leaf */
+ size = 1;
+ }
+
+ index->stat_n_leaf_pages = size;
+
+ /* Do not continue if table decryption has failed or
+ table is already marked as corrupted. */
+ if (index->is_readable()) {
+ std::vector<index_field_stats_t> stats
+ = btr_estimate_number_of_different_key_vals(
+ index);
+
+ if (!stats.empty()) {
+ ut_ad(!mutex_own(&dict_sys.mutex));
+ mutex_enter(&dict_sys.mutex);
+ for (size_t i = 0; i < stats.size(); ++i) {
+ index->stat_n_diff_key_vals[i]
+ = stats[i].n_diff_key_vals;
+ index->stat_n_sample_sizes[i]
+ = stats[i].n_sample_sizes;
+ index->stat_n_non_null_key_vals[i]
+ = stats[i].n_non_null_key_vals;
+ }
+ mutex_exit(&dict_sys.mutex);
+ }
+ }
+ }
+}
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively quick and is used to calculate transient statistics that
+are not saved on disk.
+This was the only way to calculate statistics before the
+Persistent Statistics feature was introduced. */
+static
+void
+dict_stats_update_transient(
+/*========================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ ut_ad(!mutex_own(&dict_sys.mutex));
+
+ dict_index_t* index;
+ ulint sum_of_index_sizes = 0;
+
+ /* Find out the sizes of the indexes and how many different values
+ for the key they approximately have */
+
+ index = dict_table_get_first_index(table);
+
+ if (!table->space) {
+ /* Nothing to do. */
+ dict_stats_empty_table(table, true);
+ return;
+ } else if (index == NULL) {
+ /* Table definition is corrupt */
+
+ ib::warn() << "Table " << table->name
+ << " has no indexes. Cannot calculate statistics.";
+ dict_stats_empty_table(table, true);
+ return;
+ }
+
+ for (; index != NULL; index = dict_table_get_next_index(index)) {
+
+ ut_ad(!dict_index_is_ibuf(index));
+
+ if (index->type & (DICT_FTS | DICT_SPATIAL)) {
+ continue;
+ }
+
+ if (dict_stats_should_ignore_index(index)
+ || !index->is_readable()) {
+ mutex_enter(&dict_sys.mutex);
+ dict_stats_empty_index(index, false);
+ mutex_exit(&dict_sys.mutex);
+ continue;
+ }
+
+ dict_stats_update_transient_for_index(index);
+
+ sum_of_index_sizes += index->stat_index_size;
+ }
+
+ mutex_enter(&dict_sys.mutex);
+
+ index = dict_table_get_first_index(table);
+
+ table->stat_n_rows = index->stat_n_diff_key_vals[
+ dict_index_get_n_unique(index) - 1];
+
+ table->stat_clustered_index_size = index->stat_index_size;
+
+ table->stat_sum_of_other_index_sizes = sum_of_index_sizes
+ - index->stat_index_size;
+
+ table->stats_last_recalc = time(NULL);
+
+ table->stat_modified_counter = 0;
+
+ table->stat_initialized = TRUE;
+
+ mutex_exit(&dict_sys.mutex);
+}
+
+/* @{ Pseudo code about the relation between the following functions
+
+let N = N_SAMPLE_PAGES(index)
+
+dict_stats_analyze_index()
+ for each n_prefix
+ search for good enough level:
+ dict_stats_analyze_index_level() // only called if level has <= N pages
+ // full scan of the level in one mtr
+ collect statistics about the given level
+ if we are not satisfied with the level, search next lower level
+ we have found a good enough level here
+ dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
+ // full scan of the level in one mtr
+ dive below some records and analyze the leaf page there:
+ dict_stats_analyze_index_below_cur()
+@} */
+
+/*********************************************************************//**
+Find the total number and the number of distinct keys on a given level in
+an index. Each of the 1..n_uniq prefixes are looked up and the results are
+saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
+records on the level is saved in total_recs.
+Also, the index of the last record in each group of equal records is saved
+in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
+record on the level and continues cross pages boundaries, counting from 0. */
+static
+void
+dict_stats_analyze_index_level(
+/*===========================*/
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: level */
+ ib_uint64_t* n_diff, /*!< out: array for number of
+ distinct keys for all prefixes */
+ ib_uint64_t* total_recs, /*!< out: total number of records */
+ ib_uint64_t* total_pages, /*!< out: total number of pages */
+ boundaries_t* n_diff_boundaries,/*!< out: boundaries of the groups
+ of distinct keys */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ulint n_uniq;
+ mem_heap_t* heap;
+ btr_pcur_t pcur;
+ const page_t* page;
+ const rec_t* rec;
+ const rec_t* prev_rec;
+ bool prev_rec_is_copied;
+ byte* prev_rec_buf = NULL;
+ ulint prev_rec_buf_size = 0;
+ rec_offs* rec_offsets;
+ rec_offs* prev_rec_offsets;
+ ulint i;
+
+ DEBUG_PRINTF(" %s(table=%s, index=%s, level=" ULINTPF ")\n",
+ __func__, index->table->name, index->name, level);
+
+ ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_SX_LOCK));
+
+ n_uniq = dict_index_get_n_unique(index);
+
+ /* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
+ memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
+
+ /* Allocate space for the offsets header (the allocation size at
+ offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_uniq + 1,
+ so that this will never be less than the size calculated in
+ rec_get_offsets_func(). */
+ i = (REC_OFFS_HEADER_SIZE + 1 + 1) + n_uniq;
+
+ heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
+ rec_offsets = static_cast<rec_offs*>(
+ mem_heap_alloc(heap, i * sizeof *rec_offsets));
+ prev_rec_offsets = static_cast<rec_offs*>(
+ mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
+ rec_offs_set_n_alloc(rec_offsets, i);
+ rec_offs_set_n_alloc(prev_rec_offsets, i);
+
+ /* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
+ if (n_diff_boundaries != NULL) {
+ for (i = 0; i < n_uniq; i++) {
+ n_diff_boundaries[i].erase(
+ n_diff_boundaries[i].begin(),
+ n_diff_boundaries[i].end());
+ }
+ }
+
+ /* Position pcur on the leftmost record on the leftmost page
+ on the desired level. */
+
+ btr_pcur_open_at_index_side(
+ true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
+ &pcur, true, level, mtr);
+ btr_pcur_move_to_next_on_page(&pcur);
+
+ page = btr_pcur_get_page(&pcur);
+
+ /* The page must not be empty, except when
+ it is the root page (and the whole index is empty). */
+ ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
+ ut_ad(btr_pcur_get_rec(&pcur)
+ == page_rec_get_next_const(page_get_infimum_rec(page)));
+
+ /* check that we are indeed on the desired level */
+ ut_a(btr_page_get_level(page) == level);
+
+ /* there should not be any pages on the left */
+ ut_a(!page_has_prev(page));
+
+ if (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+ btr_pcur_get_rec(&pcur), page_is_comp(page))) {
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
+ if (level == 0) {
+ /* Skip the metadata pseudo-record */
+ ut_ad(index->is_instant());
+ btr_pcur_move_to_next_user_rec(&pcur, mtr);
+ }
+ } else {
+ /* The first record on the leftmost page must be
+ marked as such on each level except the leaf level. */
+ ut_a(level == 0);
+ }
+
+ prev_rec = NULL;
+ prev_rec_is_copied = false;
+
+ /* no records by default */
+ *total_recs = 0;
+
+ *total_pages = 0;
+
+ /* iterate over all user records on this level
+ and compare each two adjacent ones, even the last on page
+ X and the fist on page X+1 */
+ for (;
+ btr_pcur_is_on_user_rec(&pcur);
+ btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
+
+ bool rec_is_last_on_page;
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ /* If rec and prev_rec are on different pages, then prev_rec
+ must have been copied, because we hold latch only on the page
+ where rec resides. */
+ if (prev_rec != NULL
+ && page_align(rec) != page_align(prev_rec)) {
+
+ ut_a(prev_rec_is_copied);
+ }
+
+ rec_is_last_on_page =
+ page_rec_is_supremum(page_rec_get_next_const(rec));
+
+ /* increment the pages counter at the end of each page */
+ if (rec_is_last_on_page) {
+
+ (*total_pages)++;
+ }
+
+ /* Skip delete-marked records on the leaf level. If we
+ do not skip them, then ANALYZE quickly after DELETE
+ could count them or not (purge may have already wiped
+ them away) which brings non-determinism. We skip only
+ leaf-level delete marks because delete marks on
+ non-leaf level do not make sense. */
+
+ if (level == 0
+ && !srv_stats_include_delete_marked
+ && rec_get_deleted_flag(
+ rec,
+ page_is_comp(btr_pcur_get_page(&pcur)))) {
+
+ if (rec_is_last_on_page
+ && !prev_rec_is_copied
+ && prev_rec != NULL) {
+ /* copy prev_rec */
+
+ prev_rec_offsets = rec_get_offsets(
+ prev_rec, index, prev_rec_offsets,
+ index->n_core_fields,
+ n_uniq, &heap);
+
+ prev_rec = rec_copy_prefix_to_buf(
+ prev_rec, index, n_uniq,
+ &prev_rec_buf, &prev_rec_buf_size);
+
+ prev_rec_is_copied = true;
+ }
+
+ continue;
+ }
+ rec_offsets = rec_get_offsets(rec, index, rec_offsets,
+ level ? 0 : index->n_core_fields,
+ n_uniq, &heap);
+
+ (*total_recs)++;
+
+ if (prev_rec != NULL) {
+ ulint matched_fields;
+
+ prev_rec_offsets = rec_get_offsets(
+ prev_rec, index, prev_rec_offsets,
+ level ? 0 : index->n_core_fields,
+ n_uniq, &heap);
+
+ cmp_rec_rec(prev_rec, rec,
+ prev_rec_offsets, rec_offsets, index,
+ false, &matched_fields);
+
+ for (i = matched_fields; i < n_uniq; i++) {
+
+ if (n_diff_boundaries != NULL) {
+ /* push the index of the previous
+ record, that is - the last one from
+ a group of equal keys */
+
+ ib_uint64_t idx;
+
+ /* the index of the current record
+ is total_recs - 1, the index of the
+ previous record is total_recs - 2;
+ we know that idx is not going to
+ become negative here because if we
+ are in this branch then there is a
+ previous record and thus
+ total_recs >= 2 */
+ idx = *total_recs - 2;
+
+ n_diff_boundaries[i].push_back(idx);
+ }
+
+ /* increment the number of different keys
+ for n_prefix=i+1 (e.g. if i=0 then we increment
+ for n_prefix=1 which is stored in n_diff[0]) */
+ n_diff[i]++;
+ }
+ } else {
+ /* this is the first non-delete marked record */
+ for (i = 0; i < n_uniq; i++) {
+ n_diff[i] = 1;
+ }
+ }
+
+ if (rec_is_last_on_page) {
+ /* end of a page has been reached */
+
+ /* we need to copy the record instead of assigning
+ like prev_rec = rec; because when we traverse the
+ records on this level at some point we will jump from
+ one page to the next and then rec and prev_rec will
+ be on different pages and
+ btr_pcur_move_to_next_user_rec() will release the
+ latch on the page that prev_rec is on */
+ prev_rec = rec_copy_prefix_to_buf(
+ rec, index, n_uniq,
+ &prev_rec_buf, &prev_rec_buf_size);
+ prev_rec_is_copied = true;
+
+ } else {
+ /* still on the same page, the next call to
+ btr_pcur_move_to_next_user_rec() will not jump
+ on the next page, we can simply assign pointers
+ instead of copying the records like above */
+
+ prev_rec = rec;
+ prev_rec_is_copied = false;
+ }
+ }
+
+ /* if *total_pages is left untouched then the above loop was not
+ entered at all and there is one page in the whole tree which is
+ empty or the loop was entered but this is level 0, contains one page
+ and all records are delete-marked */
+ if (*total_pages == 0) {
+
+ ut_ad(level == 0);
+ ut_ad(*total_recs == 0);
+
+ *total_pages = 1;
+ }
+
+ /* if there are records on this level and boundaries
+ should be saved */
+ if (*total_recs > 0 && n_diff_boundaries != NULL) {
+
+ /* remember the index of the last record on the level as the
+ last one from the last group of equal keys; this holds for
+ all possible prefixes */
+ for (i = 0; i < n_uniq; i++) {
+ ib_uint64_t idx;
+
+ idx = *total_recs - 1;
+
+ n_diff_boundaries[i].push_back(idx);
+ }
+ }
+
+ /* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
+ for i=0..n_uniq-1 */
+
+#ifdef UNIV_STATS_DEBUG
+ for (i = 0; i < n_uniq; i++) {
+
+ DEBUG_PRINTF(" %s(): total recs: " UINT64PF
+ ", total pages: " UINT64PF
+ ", n_diff[" ULINTPF "]: " UINT64PF "\n",
+ __func__, *total_recs,
+ *total_pages,
+ i, n_diff[i]);
+
+#if 0
+ if (n_diff_boundaries != NULL) {
+ ib_uint64_t j;
+
+ DEBUG_PRINTF(" %s(): boundaries[%lu]: ",
+ __func__, i);
+
+ for (j = 0; j < n_diff[i]; j++) {
+ ib_uint64_t idx;
+
+ idx = n_diff_boundaries[i][j];
+
+ DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
+ j, idx);
+ }
+ DEBUG_PRINTF("\n");
+ }
+#endif
+ }
+#endif /* UNIV_STATS_DEBUG */
+
+ /* Release the latch on the last page, because that is not done by
+ btr_pcur_close(). This function works also for non-leaf pages. */
+ btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
+
+ btr_pcur_close(&pcur);
+ ut_free(prev_rec_buf);
+ mem_heap_free(heap);
+}
+
+/** Scan a page, reading records from left to right and counting the number
+of distinct records (looking only at the first n_prefix
+columns) and the number of external pages pointed by records from this page.
+If scan_method is QUIT_ON_FIRST_NON_BORING then the function
+will return as soon as it finds a record that does not match its neighbor
+to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
+returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
+equal) or 2 (the function found a non-boring record and returned).
+@param[out] out_rec record, or NULL
+@param[out] offsets1 rec_get_offsets() working space (must
+be big enough)
+@param[out] offsets2 rec_get_offsets() working space (must
+be big enough)
+@param[in] index index of the page
+@param[in] page the page to scan
+@param[in] n_prefix look at the first n_prefix columns
+@param[in] n_core 0, or index->n_core_fields for leaf
+@param[out] n_diff number of distinct records encountered
+@param[out] n_external_pages if this is non-NULL then it will be set
+to the number of externally stored pages which were encountered
+@return offsets1 or offsets2 (the offsets of *out_rec),
+or NULL if the page is empty and does not contain user records. */
+UNIV_INLINE
+rec_offs*
+dict_stats_scan_page(
+ const rec_t** out_rec,
+ rec_offs* offsets1,
+ rec_offs* offsets2,
+ const dict_index_t* index,
+ const page_t* page,
+ ulint n_prefix,
+ ulint n_core,
+ ib_uint64_t* n_diff,
+ ib_uint64_t* n_external_pages)
+{
+ rec_offs* offsets_rec = offsets1;
+ rec_offs* offsets_next_rec = offsets2;
+ const rec_t* rec;
+ const rec_t* next_rec;
+ /* A dummy heap, to be passed to rec_get_offsets().
+ Because offsets1,offsets2 should be big enough,
+ this memory heap should never be used. */
+ mem_heap_t* heap = NULL;
+ ut_ad(!!n_core == page_is_leaf(page));
+ const rec_t* (*get_next)(const rec_t*)
+ = !n_core || srv_stats_include_delete_marked
+ ? page_rec_get_next_const
+ : page_rec_get_next_non_del_marked;
+
+ const bool should_count_external_pages = n_external_pages != NULL;
+
+ if (should_count_external_pages) {
+ *n_external_pages = 0;
+ }
+
+ rec = get_next(page_get_infimum_rec(page));
+
+ if (page_rec_is_supremum(rec)) {
+ /* the page is empty or contains only delete-marked records */
+ *n_diff = 0;
+ *out_rec = NULL;
+ return(NULL);
+ }
+
+ offsets_rec = rec_get_offsets(rec, index, offsets_rec, n_core,
+ ULINT_UNDEFINED, &heap);
+
+ if (should_count_external_pages) {
+ *n_external_pages += btr_rec_get_externally_stored_len(
+ rec, offsets_rec);
+ }
+
+ next_rec = get_next(rec);
+
+ *n_diff = 1;
+
+ while (!page_rec_is_supremum(next_rec)) {
+
+ ulint matched_fields;
+
+ offsets_next_rec = rec_get_offsets(next_rec, index,
+ offsets_next_rec, n_core,
+ ULINT_UNDEFINED,
+ &heap);
+
+ /* check whether rec != next_rec when looking at
+ the first n_prefix fields */
+ cmp_rec_rec(rec, next_rec, offsets_rec, offsets_next_rec,
+ index, false, &matched_fields);
+
+ if (matched_fields < n_prefix) {
+ /* rec != next_rec, => rec is non-boring */
+
+ (*n_diff)++;
+
+ if (!n_core) {
+ break;
+ }
+ }
+
+ rec = next_rec;
+ /* Assign offsets_rec = offsets_next_rec so that
+ offsets_rec matches with rec which was just assigned
+ rec = next_rec above. Also need to point
+ offsets_next_rec to the place where offsets_rec was
+ pointing before because we have just 2 placeholders
+ where data is actually stored: offsets1 and offsets2
+ and we are using them in circular fashion
+ (offsets[_next]_rec are just pointers to those
+ placeholders). */
+ std::swap(offsets_rec, offsets_next_rec);
+
+ if (should_count_external_pages) {
+ *n_external_pages += btr_rec_get_externally_stored_len(
+ rec, offsets_rec);
+ }
+
+ next_rec = get_next(next_rec);
+ }
+
+ /* offsets1,offsets2 should have been big enough */
+ ut_a(heap == NULL);
+ *out_rec = rec;
+ return(offsets_rec);
+}
+
+/** Dive below the current position of a cursor and calculate the number of
+distinct records on the leaf page, when looking at the fist n_prefix
+columns. Also calculate the number of external pages pointed by records
+on the leaf page.
+@param[in] cur cursor
+@param[in] n_prefix look at the first n_prefix columns
+when comparing records
+@param[out] n_diff number of distinct records
+@param[out] n_external_pages number of external pages
+@return number of distinct records on the leaf page */
+static
+void
+dict_stats_analyze_index_below_cur(
+ const btr_cur_t* cur,
+ ulint n_prefix,
+ ib_uint64_t* n_diff,
+ ib_uint64_t* n_external_pages)
+{
+ dict_index_t* index;
+ buf_block_t* block;
+ const page_t* page;
+ mem_heap_t* heap;
+ const rec_t* rec;
+ rec_offs* offsets1;
+ rec_offs* offsets2;
+ rec_offs* offsets_rec;
+ ulint size;
+ mtr_t mtr;
+
+ index = btr_cur_get_index(cur);
+
+ /* Allocate offsets for the record and the node pointer, for
+ node pointer records. In a secondary index, the node pointer
+ record will consist of all index fields followed by a child
+ page number.
+ Allocate space for the offsets header (the allocation size at
+ offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
+ so that this will never be less than the size calculated in
+ rec_get_offsets_func(). */
+ size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
+
+ heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
+
+ offsets1 = static_cast<rec_offs*>(mem_heap_alloc(
+ heap, size * sizeof *offsets1));
+
+ offsets2 = static_cast<rec_offs*>(mem_heap_alloc(
+ heap, size * sizeof *offsets2));
+
+ rec_offs_set_n_alloc(offsets1, size);
+ rec_offs_set_n_alloc(offsets2, size);
+
+ rec = btr_cur_get_rec(cur);
+ page = page_align(rec);
+ ut_ad(!page_rec_is_leaf(rec));
+
+ offsets_rec = rec_get_offsets(rec, index, offsets1, 0,
+ ULINT_UNDEFINED, &heap);
+
+ page_id_t page_id(index->table->space_id,
+ btr_node_ptr_get_child_page_no(
+ rec, offsets_rec));
+ const ulint zip_size = index->table->space->zip_size();
+
+ /* assume no external pages by default - in case we quit from this
+ function without analyzing any leaf pages */
+ *n_external_pages = 0;
+
+ mtr_start(&mtr);
+
+ /* descend to the leaf level on the B-tree */
+ for (;;) {
+
+ dberr_t err = DB_SUCCESS;
+
+ block = buf_page_get_gen(page_id, zip_size,
+ RW_S_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__, &mtr, &err,
+ !index->is_clust()
+ && 1 == btr_page_get_level(page));
+
+ page = buf_block_get_frame(block);
+
+ if (page_is_leaf(page)) {
+ /* leaf level */
+ break;
+ }
+ /* else */
+
+ /* search for the first non-boring record on the page */
+ offsets_rec = dict_stats_scan_page(
+ &rec, offsets1, offsets2, index, page, n_prefix,
+ 0, n_diff, NULL);
+
+ /* pages on level > 0 are not allowed to be empty */
+ ut_a(offsets_rec != NULL);
+ /* if page is not empty (offsets_rec != NULL) then n_diff must
+ be > 0, otherwise there is a bug in dict_stats_scan_page() */
+ ut_a(*n_diff > 0);
+
+ if (*n_diff == 1) {
+ mtr_commit(&mtr);
+
+ /* page has all keys equal and the end of the page
+ was reached by dict_stats_scan_page(), no need to
+ descend to the leaf level */
+ mem_heap_free(heap);
+ /* can't get an estimate for n_external_pages here
+ because we do not dive to the leaf level, assume no
+ external pages (*n_external_pages was assigned to 0
+ above). */
+ return;
+ }
+ /* else */
+
+ /* when we instruct dict_stats_scan_page() to quit on the
+ first non-boring record it finds, then the returned n_diff
+ can either be 0 (empty page), 1 (page has all keys equal) or
+ 2 (non-boring record was found) */
+ ut_a(*n_diff == 2);
+
+ /* we have a non-boring record in rec, descend below it */
+
+ page_id.set_page_no(
+ btr_node_ptr_get_child_page_no(rec, offsets_rec));
+ }
+
+ /* make sure we got a leaf page as a result from the above loop */
+ ut_ad(page_is_leaf(page));
+
+ /* scan the leaf page and find the number of distinct keys,
+ when looking only at the first n_prefix columns; also estimate
+ the number of externally stored pages pointed by records on this
+ page */
+
+ offsets_rec = dict_stats_scan_page(
+ &rec, offsets1, offsets2, index, page, n_prefix,
+ index->n_core_fields, n_diff,
+ n_external_pages);
+
+#if 0
+ DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
+ __func__, page_no, n_diff);
+#endif
+
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+}
+
+/** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
+for each n-columns prefix (n from 1 to n_uniq). */
+struct n_diff_data_t {
+ /** Index of the level on which the descent through the btree
+ stopped. level 0 is the leaf level. This is >= 1 because we
+ avoid scanning the leaf level because it may contain too many
+ pages and doing so is useless when combined with the random dives -
+ if we are to scan the leaf level, this means a full scan and we can
+ simply do that instead of fiddling with picking random records higher
+ in the tree and to dive below them. At the start of the analyzing
+ we may decide to do full scan of the leaf level, but then this
+ structure is not used in that code path. */
+ ulint level;
+
+ /** Number of records on the level where the descend through the btree
+ stopped. When we scan the btree from the root, we stop at some mid
+ level, choose some records from it and dive below them towards a leaf
+ page to analyze. */
+ ib_uint64_t n_recs_on_level;
+
+ /** Number of different key values that were found on the mid level. */
+ ib_uint64_t n_diff_on_level;
+
+ /** Number of leaf pages that are analyzed. This is also the same as
+ the number of records that we pick from the mid level and dive below
+ them. */
+ ib_uint64_t n_leaf_pages_to_analyze;
+
+ /** Cumulative sum of the number of different key values that were
+ found on all analyzed pages. */
+ ib_uint64_t n_diff_all_analyzed_pages;
+
+ /** Cumulative sum of the number of external pages (stored outside of
+ the btree but in the same file segment). */
+ ib_uint64_t n_external_pages_sum;
+};
+
+/** Estimate the number of different key values in an index when looking at
+the first n_prefix columns. For a given level in an index select
+n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
+them to the corresponding leaf pages, then scan those leaf pages and save the
+sampling results in n_diff_data->n_diff_all_analyzed_pages.
+@param[in] index index
+@param[in] n_prefix look at first 'n_prefix' columns when
+comparing records
+@param[in] boundaries a vector that contains
+n_diff_data->n_diff_on_level integers each of which represents the index (on
+level 'level', counting from left/smallest to right/biggest from 0) of the
+last record from each group of distinct keys
+@param[in,out] n_diff_data n_diff_all_analyzed_pages and
+n_external_pages_sum in this structure will be set by this function. The
+members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
+caller in advance - they are used by some calculations inside this function
+@param[in,out] mtr mini-transaction */
+static
+void
+dict_stats_analyze_index_for_n_prefix(
+ dict_index_t* index,
+ ulint n_prefix,
+ const boundaries_t* boundaries,
+ n_diff_data_t* n_diff_data,
+ mtr_t* mtr)
+{
+ btr_pcur_t pcur;
+ const page_t* page;
+ ib_uint64_t rec_idx;
+ ib_uint64_t i;
+
+#if 0
+ DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu,"
+ " n_diff_on_level=" UINT64PF ")\n",
+ __func__, index->table->name, index->name, level,
+ n_prefix, n_diff_data->n_diff_on_level);
+#endif
+
+ ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_SX_LOCK));
+
+ /* Position pcur on the leftmost record on the leftmost page
+ on the desired level. */
+
+ btr_pcur_open_at_index_side(
+ true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
+ &pcur, true, n_diff_data->level, mtr);
+ btr_pcur_move_to_next_on_page(&pcur);
+
+ page = btr_pcur_get_page(&pcur);
+
+ const rec_t* first_rec = btr_pcur_get_rec(&pcur);
+
+ /* We shouldn't be scanning the leaf level. The caller of this function
+ should have stopped the descend on level 1 or higher. */
+ ut_ad(n_diff_data->level > 0);
+ ut_ad(!page_is_leaf(page));
+
+ /* The page must not be empty, except when
+ it is the root page (and the whole index is empty). */
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
+ ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
+
+ /* check that we are indeed on the desired level */
+ ut_a(btr_page_get_level(page) == n_diff_data->level);
+
+ /* there should not be any pages on the left */
+ ut_a(!page_has_prev(page));
+
+ /* check whether the first record on the leftmost page is marked
+ as such; we are on a non-leaf level */
+ ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
+ & REC_INFO_MIN_REC_FLAG);
+
+ const ib_uint64_t last_idx_on_level = boundaries->at(
+ static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
+
+ rec_idx = 0;
+
+ n_diff_data->n_diff_all_analyzed_pages = 0;
+ n_diff_data->n_external_pages_sum = 0;
+
+ for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
+ /* there are n_diff_on_level elements
+ in 'boundaries' and we divide those elements
+ into n_leaf_pages_to_analyze segments, for example:
+
+ let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
+ segment i=0: [0, 24]
+ segment i=1: [25, 49]
+ segment i=2: [50, 74]
+ segment i=3: [75, 99] or
+
+ let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
+ segment i=0: [0, 0] or
+
+ let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
+ segment i=0: [0, 0]
+ segment i=1: [1, 1] or
+
+ let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
+ segment i=0: [0, 0]
+ segment i=1: [1, 2]
+ segment i=2: [3, 4]
+ segment i=3: [5, 6]
+ segment i=4: [7, 8]
+ segment i=5: [9, 10]
+ segment i=6: [11, 12]
+
+ then we select a random record from each segment and dive
+ below it */
+ const ib_uint64_t n_diff = n_diff_data->n_diff_on_level;
+ const ib_uint64_t n_pick
+ = n_diff_data->n_leaf_pages_to_analyze;
+
+ const ib_uint64_t left = n_diff * i / n_pick;
+ const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1;
+
+ ut_a(left <= right);
+ ut_a(right <= last_idx_on_level);
+
+ const ulint rnd = ut_rnd_interval(
+ static_cast<ulint>(right - left));
+
+ const ib_uint64_t dive_below_idx
+ = boundaries->at(static_cast<unsigned>(left + rnd));
+
+#if 0
+ DEBUG_PRINTF(" %s(): dive below record with index="
+ UINT64PF "\n", __func__, dive_below_idx);
+#endif
+
+ /* seek to the record with index dive_below_idx */
+ while (rec_idx < dive_below_idx
+ && btr_pcur_is_on_user_rec(&pcur)) {
+
+ btr_pcur_move_to_next_user_rec(&pcur, mtr);
+ rec_idx++;
+ }
+
+ /* if the level has finished before the record we are
+ searching for, this means that the B-tree has changed in
+ the meantime, quit our sampling and use whatever stats
+ we have collected so far */
+ if (rec_idx < dive_below_idx) {
+
+ ut_ad(!btr_pcur_is_on_user_rec(&pcur));
+ break;
+ }
+
+ /* it could be that the tree has changed in such a way that
+ the record under dive_below_idx is the supremum record, in
+ this case rec_idx == dive_below_idx and pcur is positioned
+ on the supremum, we do not want to dive below it */
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ break;
+ }
+
+ ut_a(rec_idx == dive_below_idx);
+
+ ib_uint64_t n_diff_on_leaf_page;
+ ib_uint64_t n_external_pages;
+
+ dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
+ n_prefix,
+ &n_diff_on_leaf_page,
+ &n_external_pages);
+
+ /* We adjust n_diff_on_leaf_page here to avoid counting
+ one value twice - once as the last on some page and once
+ as the first on another page. Consider the following example:
+ Leaf level:
+ page: (2,2,2,2,3,3)
+ ... many pages like (3,3,3,3,3,3) ...
+ page: (3,3,3,3,5,5)
+ ... many pages like (5,5,5,5,5,5) ...
+ page: (5,5,5,5,8,8)
+ page: (8,8,8,8,9,9)
+ our algo would (correctly) get an estimate that there are
+ 2 distinct records per page (average). Having 4 pages below
+ non-boring records, it would (wrongly) estimate the number
+ of distinct records to 8. */
+ if (n_diff_on_leaf_page > 0) {
+ n_diff_on_leaf_page--;
+ }
+
+ n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
+
+ n_diff_data->n_external_pages_sum += n_external_pages;
+ }
+
+ btr_pcur_close(&pcur);
+}
+
+/** statistics for an index */
+struct index_stats_t
+{
+ std::vector<index_field_stats_t> stats;
+ ulint index_size;
+ ulint n_leaf_pages;
+
+ index_stats_t(ulint n_uniq) : index_size(1), n_leaf_pages(1)
+ {
+ stats.reserve(n_uniq);
+ for (ulint i= 0; i < n_uniq; ++i)
+ stats.push_back(index_field_stats_t(0, 1, 0));
+ }
+};
+
+/** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
+@param[in] n_diff_data input data to use to derive the results
+@param[in,out] index_stats index stats to set */
+UNIV_INLINE
+void
+dict_stats_index_set_n_diff(
+ const n_diff_data_t* n_diff_data,
+ index_stats_t& index_stats)
+{
+ for (ulint n_prefix = index_stats.stats.size();
+ n_prefix >= 1;
+ n_prefix--) {
+ /* n_diff_all_analyzed_pages can be 0 here if
+ all the leaf pages sampled contained only
+ delete-marked records. In this case we should assign
+ 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
+ the formula below does. */
+
+ const n_diff_data_t* data = &n_diff_data[n_prefix - 1];
+
+ ut_ad(data->n_leaf_pages_to_analyze > 0);
+ ut_ad(data->n_recs_on_level > 0);
+
+ ib_uint64_t n_ordinary_leaf_pages;
+
+ if (data->level == 1) {
+ /* If we know the number of records on level 1, then
+ this number is the same as the number of pages on
+ level 0 (leaf). */
+ n_ordinary_leaf_pages = data->n_recs_on_level;
+ } else {
+ /* If we analyzed D ordinary leaf pages and found E
+ external pages in total linked from those D ordinary
+ leaf pages, then this means that the ratio
+ ordinary/external is D/E. Then the ratio ordinary/total
+ is D / (D + E). Knowing that the total number of pages
+ is T (including ordinary and external) then we estimate
+ that the total number of ordinary leaf pages is
+ T * D / (D + E). */
+ n_ordinary_leaf_pages
+ = index_stats.n_leaf_pages
+ * data->n_leaf_pages_to_analyze
+ / (data->n_leaf_pages_to_analyze
+ + data->n_external_pages_sum);
+ }
+
+ /* See REF01 for an explanation of the algorithm */
+ index_stats.stats[n_prefix - 1].n_diff_key_vals
+ = n_ordinary_leaf_pages
+
+ * data->n_diff_on_level
+ / data->n_recs_on_level
+
+ * data->n_diff_all_analyzed_pages
+ / data->n_leaf_pages_to_analyze;
+
+ index_stats.stats[n_prefix - 1].n_sample_sizes
+ = data->n_leaf_pages_to_analyze;
+
+ DEBUG_PRINTF(" %s(): n_diff=" UINT64PF
+ " for n_prefix=" ULINTPF
+ " (" ULINTPF
+ " * " UINT64PF " / " UINT64PF
+ " * " UINT64PF " / " UINT64PF ")\n",
+ __func__,
+ index_stats.stats[n_prefix - 1].n_diff_key_vals,
+ n_prefix,
+ index_stats.n_leaf_pages,
+ data->n_diff_on_level,
+ data->n_recs_on_level,
+ data->n_diff_all_analyzed_pages,
+ data->n_leaf_pages_to_analyze);
+ }
+}
+
+/** Calculates new statistics for a given index and saves them to the index
+members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
+stat_n_leaf_pages. This function can be slow.
+@param[in] index index to analyze
+@return index stats */
+static index_stats_t dict_stats_analyze_index(dict_index_t* index)
+{
+ ulint root_level;
+ ulint level;
+ bool level_is_analyzed;
+ ulint n_uniq;
+ ulint n_prefix;
+ ib_uint64_t total_recs;
+ ib_uint64_t total_pages;
+ mtr_t mtr;
+ ulint size;
+ index_stats_t result(index->n_uniq);
+ DBUG_ENTER("dict_stats_analyze_index");
+
+ DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
+ dict_index_get_online_status(index)));
+
+ ut_ad(!mutex_own(&dict_sys.mutex)); // because this function is slow
+ ut_ad(index->table->get_ref_count());
+
+ /* Disable update statistic for Rtree */
+ if (dict_index_is_spatial(index)) {
+ DBUG_RETURN(result);
+ }
+
+ DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name());
+
+ mtr.start();
+ mtr_s_lock_index(index, &mtr);
+ size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
+
+ if (size != ULINT_UNDEFINED) {
+ result.index_size = size;
+ size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
+ }
+
+ /* Release the X locks on the root page taken by btr_get_size() */
+ mtr.commit();
+
+ switch (size) {
+ case ULINT_UNDEFINED:
+ dict_stats_assert_initialized_index(index);
+ DBUG_RETURN(result);
+ case 0:
+ /* The root node of the tree is a leaf */
+ size = 1;
+ }
+
+ result.n_leaf_pages = size;
+
+ mtr.start();
+ mtr_sx_lock_index(index, &mtr);
+ root_level = btr_height_get(index, &mtr);
+
+ n_uniq = dict_index_get_n_unique(index);
+
+ /* If the tree has just one level (and one page) or if the user
+ has requested to sample too many pages then do full scan.
+
+ For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
+ will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
+ pages will be sampled. If that number is bigger than the total
+ number of leaf pages then do full scan of the leaf level instead
+ since it will be faster and will give better results. */
+
+ if (root_level == 0
+ || N_SAMPLE_PAGES(index) * n_uniq > result.n_leaf_pages) {
+
+ if (root_level == 0) {
+ DEBUG_PRINTF(" %s(): just one page,"
+ " doing full scan\n", __func__);
+ } else {
+ DEBUG_PRINTF(" %s(): too many pages requested for"
+ " sampling, doing full scan\n", __func__);
+ }
+
+ /* do full scan of level 0; save results directly
+ into the index */
+
+ dict_stats_analyze_index_level(index,
+ 0 /* leaf level */,
+ index->stat_n_diff_key_vals,
+ &total_recs,
+ &total_pages,
+ NULL /* boundaries not needed */,
+ &mtr);
+
+ mtr.commit();
+
+ mutex_enter(&dict_sys.mutex);
+ for (ulint i = 0; i < n_uniq; i++) {
+ result.stats[i].n_diff_key_vals = index->stat_n_diff_key_vals[i];
+ result.stats[i].n_sample_sizes = total_pages;
+ result.stats[i].n_non_null_key_vals = index->stat_n_non_null_key_vals[i];
+ }
+ result.n_leaf_pages = index->stat_n_leaf_pages;
+ mutex_exit(&dict_sys.mutex);
+
+ DBUG_RETURN(result);
+ }
+
+ /* For each level that is being scanned in the btree, this contains the
+ number of different key values for all possible n-column prefixes. */
+ ib_uint64_t* n_diff_on_level = UT_NEW_ARRAY(
+ ib_uint64_t, n_uniq, mem_key_dict_stats_n_diff_on_level);
+
+ /* For each level that is being scanned in the btree, this contains the
+ index of the last record from each group of equal records (when
+ comparing only the first n columns, n=1..n_uniq). */
+ boundaries_t* n_diff_boundaries = UT_NEW_ARRAY_NOKEY(boundaries_t,
+ n_uniq);
+
+ /* For each n-column prefix this array contains the input data that is
+ used to calculate dict_index_t::stat_n_diff_key_vals[]. */
+ n_diff_data_t* n_diff_data = UT_NEW_ARRAY_NOKEY(n_diff_data_t, n_uniq);
+
+ /* total_recs is also used to estimate the number of pages on one
+ level below, so at the start we have 1 page (the root) */
+ total_recs = 1;
+
+ /* Here we use the following optimization:
+ If we find that level L is the first one (searching from the
+ root) that contains at least D distinct keys when looking at
+ the first n_prefix columns, then:
+ if we look at the first n_prefix-1 columns then the first
+ level that contains D distinct keys will be either L or a
+ lower one.
+ So if we find that the first level containing D distinct
+ keys (on n_prefix columns) is L, we continue from L when
+ searching for D distinct keys on n_prefix-1 columns. */
+ level = root_level;
+ level_is_analyzed = false;
+
+ for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
+
+ DEBUG_PRINTF(" %s(): searching level with >=%llu "
+ "distinct records, n_prefix=" ULINTPF "\n",
+ __func__, N_DIFF_REQUIRED(index), n_prefix);
+
+ /* Commit the mtr to release the tree S lock to allow
+ other threads to do some work too. */
+ mtr.commit();
+ mtr.start();
+ mtr_sx_lock_index(index, &mtr);
+ if (root_level != btr_height_get(index, &mtr)) {
+ /* Just quit if the tree has changed beyond
+ recognition here. The old stats from previous
+ runs will remain in the values that we have
+ not calculated yet. Initially when the index
+ object is created the stats members are given
+ some sensible values so leaving them untouched
+ here even the first time will not cause us to
+ read uninitialized memory later. */
+ break;
+ }
+
+ /* check whether we should pick the current level;
+ we pick level 1 even if it does not have enough
+ distinct records because we do not want to scan the
+ leaf level because it may contain too many records */
+ if (level_is_analyzed
+ && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
+ || level == 1)) {
+
+ goto found_level;
+ }
+
+ /* search for a level that contains enough distinct records */
+
+ if (level_is_analyzed && level > 1) {
+
+ /* if this does not hold we should be on
+ "found_level" instead of here */
+ ut_ad(n_diff_on_level[n_prefix - 1]
+ < N_DIFF_REQUIRED(index));
+
+ level--;
+ level_is_analyzed = false;
+ }
+
+ /* descend into the tree, searching for "good enough" level */
+ for (;;) {
+
+ /* make sure we do not scan the leaf level
+ accidentally, it may contain too many pages */
+ ut_ad(level > 0);
+
+ /* scanning the same level twice is an optimization
+ bug */
+ ut_ad(!level_is_analyzed);
+
+ /* Do not scan if this would read too many pages.
+ Here we use the following fact:
+ the number of pages on level L equals the number
+ of records on level L+1, thus we deduce that the
+ following call would scan total_recs pages, because
+ total_recs is left from the previous iteration when
+ we scanned one level upper or we have not scanned any
+ levels yet in which case total_recs is 1. */
+ if (total_recs > N_SAMPLE_PAGES(index)) {
+
+ /* if the above cond is true then we are
+ not at the root level since on the root
+ level total_recs == 1 (set before we
+ enter the n-prefix loop) and cannot
+ be > N_SAMPLE_PAGES(index) */
+ ut_a(level != root_level);
+
+ /* step one level back and be satisfied with
+ whatever it contains */
+ level++;
+ level_is_analyzed = true;
+
+ break;
+ }
+
+ dict_stats_analyze_index_level(index,
+ level,
+ n_diff_on_level,
+ &total_recs,
+ &total_pages,
+ n_diff_boundaries,
+ &mtr);
+
+ level_is_analyzed = true;
+
+ if (level == 1
+ || n_diff_on_level[n_prefix - 1]
+ >= N_DIFF_REQUIRED(index)) {
+ /* we have reached the last level we could scan
+ or we found a good level with many distinct
+ records */
+ break;
+ }
+
+ level--;
+ level_is_analyzed = false;
+ }
+found_level:
+
+ DEBUG_PRINTF(" %s(): found level " ULINTPF
+ " that has " UINT64PF
+ " distinct records for n_prefix=" ULINTPF "\n",
+ __func__, level, n_diff_on_level[n_prefix - 1],
+ n_prefix);
+ /* here we are either on level 1 or the level that we are on
+ contains >= N_DIFF_REQUIRED distinct keys or we did not scan
+ deeper levels because they would contain too many pages */
+
+ ut_ad(level > 0);
+
+ ut_ad(level_is_analyzed);
+
+ /* if any of these is 0 then there is exactly one page in the
+ B-tree and it is empty and we should have done full scan and
+ should not be here */
+ ut_ad(total_recs > 0);
+ ut_ad(n_diff_on_level[n_prefix - 1] > 0);
+
+ ut_ad(N_SAMPLE_PAGES(index) > 0);
+
+ n_diff_data_t* data = &n_diff_data[n_prefix - 1];
+
+ data->level = level;
+
+ data->n_recs_on_level = total_recs;
+
+ data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
+
+ data->n_leaf_pages_to_analyze = std::min(
+ N_SAMPLE_PAGES(index),
+ n_diff_on_level[n_prefix - 1]);
+
+ /* pick some records from this level and dive below them for
+ the given n_prefix */
+
+ dict_stats_analyze_index_for_n_prefix(
+ index, n_prefix, &n_diff_boundaries[n_prefix - 1],
+ data, &mtr);
+ }
+
+ mtr.commit();
+
+ UT_DELETE_ARRAY(n_diff_boundaries);
+
+ UT_DELETE_ARRAY(n_diff_on_level);
+
+ /* n_prefix == 0 means that the above loop did not end up prematurely
+ due to tree being changed and so n_diff_data[] is set up. */
+ if (n_prefix == 0) {
+ dict_stats_index_set_n_diff(n_diff_data, result);
+ }
+
+ UT_DELETE_ARRAY(n_diff_data);
+
+ DBUG_RETURN(result);
+}
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively slow and is used to calculate persistent statistics that
+will be saved on disk.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+dict_stats_update_persistent(
+/*=========================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ dict_index_t* index;
+
+ DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
+
+ DEBUG_SYNC_C("dict_stats_update_persistent");
+
+ /* analyze the clustered index first */
+
+ index = dict_table_get_first_index(table);
+
+ if (index == NULL
+ || index->is_corrupted()
+ || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
+
+ /* Table definition is corrupt */
+ dict_stats_empty_table(table, true);
+
+ return(DB_CORRUPTION);
+ }
+
+ ut_ad(!dict_index_is_ibuf(index));
+ mutex_enter(&dict_sys.mutex);
+ dict_stats_empty_index(index, false);
+ mutex_exit(&dict_sys.mutex);
+
+ index_stats_t stats = dict_stats_analyze_index(index);
+
+ mutex_enter(&dict_sys.mutex);
+ index->stat_index_size = stats.index_size;
+ index->stat_n_leaf_pages = stats.n_leaf_pages;
+ for (size_t i = 0; i < stats.stats.size(); ++i) {
+ index->stat_n_diff_key_vals[i] = stats.stats[i].n_diff_key_vals;
+ index->stat_n_sample_sizes[i] = stats.stats[i].n_sample_sizes;
+ index->stat_n_non_null_key_vals[i] = stats.stats[i].n_non_null_key_vals;
+ }
+
+ ulint n_unique = dict_index_get_n_unique(index);
+
+ table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1];
+
+ table->stat_clustered_index_size = index->stat_index_size;
+
+ /* analyze other indexes from the table, if any */
+
+ table->stat_sum_of_other_index_sizes = 0;
+
+ for (index = dict_table_get_next_index(index);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ ut_ad(!dict_index_is_ibuf(index));
+
+ if (index->type & (DICT_FTS | DICT_SPATIAL)) {
+ continue;
+ }
+
+ dict_stats_empty_index(index, false);
+
+ if (dict_stats_should_ignore_index(index)) {
+ continue;
+ }
+
+ if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
+ mutex_exit(&dict_sys.mutex);
+ stats = dict_stats_analyze_index(index);
+ mutex_enter(&dict_sys.mutex);
+
+ index->stat_index_size = stats.index_size;
+ index->stat_n_leaf_pages = stats.n_leaf_pages;
+ for (size_t i = 0; i < stats.stats.size(); ++i) {
+ index->stat_n_diff_key_vals[i]
+ = stats.stats[i].n_diff_key_vals;
+ index->stat_n_sample_sizes[i]
+ = stats.stats[i].n_sample_sizes;
+ index->stat_n_non_null_key_vals[i]
+ = stats.stats[i].n_non_null_key_vals;
+ }
+ }
+
+ table->stat_sum_of_other_index_sizes
+ += index->stat_index_size;
+ }
+
+ table->stats_last_recalc = time(NULL);
+
+ table->stat_modified_counter = 0;
+
+ table->stat_initialized = TRUE;
+
+ dict_stats_assert_initialized(table);
+
+ mutex_exit(&dict_sys.mutex);
+
+ return(DB_SUCCESS);
+}
+
+#include "mysql_com.h"
+/** Save an individual index's statistic into the persistent statistics
+storage.
+@param[in] index index to be updated
+@param[in] last_update timestamp of the stat
+@param[in] stat_name name of the stat
+@param[in] stat_value value of the stat
+@param[in] sample_size n pages sampled or NULL
+@param[in] stat_description description of the stat
+@param[in,out] trx in case of NULL the function will
+allocate and free the trx object. If it is not NULL then it will be
+rolled back only in the case of error, but not freed.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_save_index_stat(
+ dict_index_t* index,
+ time_t last_update,
+ const char* stat_name,
+ ib_uint64_t stat_value,
+ ib_uint64_t* sample_size,
+ const char* stat_description,
+ trx_t* trx)
+{
+ dberr_t ret;
+ pars_info_t* pinfo;
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ ut_ad(!trx || trx->internal || trx->mysql_thd);
+ ut_d(dict_sys.assert_locked());
+
+ dict_fs2utf8(index->table->name.m_name, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
+ pinfo = pars_info_create();
+ pars_info_add_str_literal(pinfo, "database_name", db_utf8);
+ pars_info_add_str_literal(pinfo, "table_name", table_utf8);
+ pars_info_add_str_literal(pinfo, "index_name", index->name);
+ MEM_CHECK_DEFINED(&last_update, 4);
+ pars_info_add_int4_literal(pinfo, "last_update", uint32(last_update));
+ MEM_CHECK_DEFINED(stat_name, strlen(stat_name));
+ pars_info_add_str_literal(pinfo, "stat_name", stat_name);
+ MEM_CHECK_DEFINED(&stat_value, 8);
+ pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
+ if (sample_size != NULL) {
+ MEM_CHECK_DEFINED(sample_size, 8);
+ pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
+ } else {
+ pars_info_add_literal(pinfo, "sample_size", NULL,
+ UNIV_SQL_NULL, DATA_FIXBINARY, 0);
+ }
+ pars_info_add_str_literal(pinfo, "stat_description",
+ stat_description);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE INDEX_STATS_SAVE () IS\n"
+ "BEGIN\n"
+
+ "DELETE FROM \"" INDEX_STATS_NAME "\"\n"
+ "WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name AND\n"
+ "index_name = :index_name AND\n"
+ "stat_name = :stat_name;\n"
+
+ "INSERT INTO \"" INDEX_STATS_NAME "\"\n"
+ "VALUES\n"
+ "(\n"
+ ":database_name,\n"
+ ":table_name,\n"
+ ":index_name,\n"
+ ":last_update,\n"
+ ":stat_name,\n"
+ ":stat_value,\n"
+ ":sample_size,\n"
+ ":stat_description\n"
+ ");\n"
+ "END;", trx);
+
+ if (UNIV_UNLIKELY(ret != DB_SUCCESS)) {
+ if (innodb_index_stats_not_found == false &&
+ index->stats_error_printed == false) {
+ ib::error() << "Cannot save index statistics for table "
+ << index->table->name
+ << ", index " << index->name
+ << ", stat name \"" << stat_name << "\": "
+ << ret;
+ index->stats_error_printed = true;
+ }
+ }
+
+ return(ret);
+}
+
+/** Report an error if updating table statistics failed because
+.ibd file is missing, table decryption failed or table is corrupted.
+@param[in,out] table Table
+@param[in] defragment true if statistics is for defragment
+@retval DB_DECRYPTION_FAILED if decryption of the table failed
+@retval DB_TABLESPACE_DELETED if .ibd file is missing
+@retval DB_CORRUPTION if table is marked as corrupted */
+dberr_t
+dict_stats_report_error(dict_table_t* table, bool defragment)
+{
+ dberr_t err;
+
+ const char* df = defragment ? " defragment" : "";
+
+ if (!table->space) {
+ ib::warn() << "Cannot save" << df << " statistics for table "
+ << table->name
+ << " because the .ibd file is missing. "
+ << TROUBLESHOOTING_MSG;
+ err = DB_TABLESPACE_DELETED;
+ } else {
+ ib::warn() << "Cannot save" << df << " statistics for table "
+ << table->name
+ << " because file "
+ << table->space->chain.start->name
+ << (table->corrupted
+ ? " is corrupted."
+ : " cannot be decrypted.");
+ err = table->corrupted ? DB_CORRUPTION : DB_DECRYPTION_FAILED;
+ }
+
+ dict_stats_empty_table(table, defragment);
+ return err;
+}
+
+/** Save the table's statistics into the persistent statistics storage.
+@param[in] table_orig table whose stats to save
+@param[in] only_for_index if this is non-NULL, then stats for indexes
+that are not equal to it will not be saved, if NULL, then all indexes' stats
+are saved
+@return DB_SUCCESS or error code */
+static
+dberr_t
+dict_stats_save(
+ dict_table_t* table_orig,
+ const index_id_t* only_for_index)
+{
+ pars_info_t* pinfo;
+ dberr_t ret;
+ dict_table_t* table;
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ if (high_level_read_only) {
+ return DB_READ_ONLY;
+ }
+
+ if (!table_orig->is_readable()) {
+ return (dict_stats_report_error(table_orig));
+ }
+
+ table = dict_stats_snapshot_create(table_orig);
+
+ dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
+ const time_t now = time(NULL);
+ dict_sys_lock();
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "database_name", db_utf8);
+ pars_info_add_str_literal(pinfo, "table_name", table_utf8);
+ pars_info_add_int4_literal(pinfo, "last_update", uint32(now));
+ pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
+ pars_info_add_ull_literal(pinfo, "clustered_index_size",
+ table->stat_clustered_index_size);
+ pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
+ table->stat_sum_of_other_index_sizes);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE TABLE_STATS_SAVE () IS\n"
+ "BEGIN\n"
+
+ "DELETE FROM \"" TABLE_STATS_NAME "\"\n"
+ "WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name;\n"
+
+ "INSERT INTO \"" TABLE_STATS_NAME "\"\n"
+ "VALUES\n"
+ "(\n"
+ ":database_name,\n"
+ ":table_name,\n"
+ ":last_update,\n"
+ ":n_rows,\n"
+ ":clustered_index_size,\n"
+ ":sum_of_other_index_sizes\n"
+ ");\n"
+ "END;", NULL);
+
+ if (UNIV_UNLIKELY(ret != DB_SUCCESS)) {
+ ib::error() << "Cannot save table statistics for table "
+ << table->name << ": " << ret;
+func_exit:
+ dict_sys_unlock();
+ dict_stats_snapshot_free(table);
+ return ret;
+ }
+
+ trx_t* trx = trx_create();
+ trx_start_internal(trx);
+
+ dict_index_t* index;
+ index_map_t indexes(
+ (ut_strcmp_functor()),
+ index_map_t_allocator(mem_key_dict_stats_index_map_t));
+
+ /* Below we do all the modifications in innodb_index_stats in a single
+ transaction for performance reasons. Modifying more than one row in a
+ single transaction may deadlock with other transactions if they
+ lock the rows in different order. Other transaction could be for
+ example when we DROP a table and do
+ DELETE FROM innodb_index_stats WHERE database_name = '...'
+ AND table_name = '...'; which will affect more than one row. To
+ prevent deadlocks we always lock the rows in the same order - the
+ order of the PK, which is (database_name, table_name, index_name,
+ stat_name). This is why below we sort the indexes by name and then
+ for each index, do the mods ordered by stat_name. */
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ indexes[index->name] = index;
+ }
+
+ index_map_t::const_iterator it;
+
+ for (it = indexes.begin(); it != indexes.end(); ++it) {
+
+ index = it->second;
+
+ if (only_for_index != NULL && index->id != *only_for_index) {
+ continue;
+ }
+
+ if (dict_stats_should_ignore_index(index)) {
+ continue;
+ }
+
+ ut_ad(!dict_index_is_ibuf(index));
+
+ for (unsigned i = 0; i < index->n_uniq; i++) {
+
+ char stat_name[16];
+ char stat_description[1024];
+
+ snprintf(stat_name, sizeof(stat_name),
+ "n_diff_pfx%02u", i + 1);
+
+ /* craft a string that contains the column names */
+ snprintf(stat_description, sizeof(stat_description),
+ "%s", index->fields[0].name());
+ for (unsigned j = 1; j <= i; j++) {
+ size_t len;
+
+ len = strlen(stat_description);
+
+ snprintf(stat_description + len,
+ sizeof(stat_description) - len,
+ ",%s", index->fields[j].name());
+ }
+
+ ret = dict_stats_save_index_stat(
+ index, now, stat_name,
+ index->stat_n_diff_key_vals[i],
+ &index->stat_n_sample_sizes[i],
+ stat_description, trx);
+
+ if (ret != DB_SUCCESS) {
+ goto end;
+ }
+ }
+
+ ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
+ index->stat_n_leaf_pages,
+ NULL,
+ "Number of leaf pages "
+ "in the index", trx);
+ if (ret != DB_SUCCESS) {
+ goto end;
+ }
+
+ ret = dict_stats_save_index_stat(index, now, "size",
+ index->stat_index_size,
+ NULL,
+ "Number of pages "
+ "in the index", trx);
+ if (ret != DB_SUCCESS) {
+ goto end;
+ }
+ }
+
+ trx_commit_for_mysql(trx);
+
+end:
+ trx->free();
+ goto func_exit;
+}
+
+/*********************************************************************//**
+Called for the row that is selected by
+SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
+The second argument is a pointer to the table and the fetched stats are
+written to it.
+@return non-NULL dummy */
+static
+ibool
+dict_stats_fetch_table_stats_step(
+/*==============================*/
+ void* node_void, /*!< in: select node */
+ void* table_void) /*!< out: table */
+{
+ sel_node_t* node = (sel_node_t*) node_void;
+ dict_table_t* table = (dict_table_t*) table_void;
+ que_common_t* cnode;
+ int i;
+
+ /* this should loop exactly 3 times - for
+ n_rows,clustered_index_size,sum_of_other_index_sizes */
+ for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
+ cnode != NULL;
+ cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
+ i++) {
+
+ const byte* data;
+ dfield_t* dfield = que_node_get_val(cnode);
+ dtype_t* type = dfield_get_type(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ data = static_cast<const byte*>(dfield_get_data(dfield));
+
+ switch (i) {
+ case 0: /* mysql.innodb_table_stats.n_rows */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8);
+
+ table->stat_n_rows = mach_read_from_8(data);
+
+ break;
+
+ case 1: /* mysql.innodb_table_stats.clustered_index_size */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8);
+
+ table->stat_clustered_index_size
+ = (ulint) mach_read_from_8(data);
+
+ break;
+
+ case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8);
+
+ table->stat_sum_of_other_index_sizes
+ = (ulint) mach_read_from_8(data);
+
+ break;
+
+ default:
+
+ /* someone changed SELECT
+ n_rows,clustered_index_size,sum_of_other_index_sizes
+ to select more columns from innodb_table_stats without
+ adjusting here */
+ ut_error;
+ }
+ }
+
+ /* if i < 3 this means someone changed the
+ SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
+ to select less columns from innodb_table_stats without adjusting here;
+ if i > 3 we would have ut_error'ed earlier */
+ ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
+
+ /* XXX this is not used but returning non-NULL is necessary */
+ return(TRUE);
+}
+
+/** Aux struct used to pass a table and a boolean to
+dict_stats_fetch_index_stats_step(). */
+struct index_fetch_t {
+ dict_table_t* table; /*!< table whose indexes are to be modified */
+ bool stats_were_modified; /*!< will be set to true if at
+ least one index stats were modified */
+};
+
+/*********************************************************************//**
+Called for the rows that are selected by
+SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
+The second argument is a pointer to the table and the fetched stats are
+written to its indexes.
+Let a table has N indexes and each index has Ui unique columns for i=1..N,
+then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
+So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
+N*AVG(Ui). In each call it searches for the currently fetched index into
+table->indexes linearly, assuming this list is not sorted. Thus, overall,
+fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
+is the number of indexes.
+This can be improved if we sort table->indexes in a temporary area just once
+and then search in that sorted list. Then the complexity will be O(N*log(N)).
+We assume a table will not have more than 100 indexes, so we go with the
+simpler N^2 algorithm.
+@return non-NULL dummy */
+static
+ibool
+dict_stats_fetch_index_stats_step(
+/*==============================*/
+ void* node_void, /*!< in: select node */
+ void* arg_void) /*!< out: table + a flag that tells if we
+ modified anything */
+{
+ sel_node_t* node = (sel_node_t*) node_void;
+ index_fetch_t* arg = (index_fetch_t*) arg_void;
+ dict_table_t* table = arg->table;
+ dict_index_t* index = NULL;
+ que_common_t* cnode;
+ const char* stat_name = NULL;
+ ulint stat_name_len = ULINT_UNDEFINED;
+ ib_uint64_t stat_value = UINT64_UNDEFINED;
+ ib_uint64_t sample_size = UINT64_UNDEFINED;
+ int i;
+
+ /* this should loop exactly 4 times - for the columns that
+ were selected: index_name,stat_name,stat_value,sample_size */
+ for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
+ cnode != NULL;
+ cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
+ i++) {
+
+ const byte* data;
+ dfield_t* dfield = que_node_get_val(cnode);
+ dtype_t* type = dfield_get_type(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ data = static_cast<const byte*>(dfield_get_data(dfield));
+
+ switch (i) {
+ case 0: /* mysql.innodb_index_stats.index_name */
+
+ ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
+
+ /* search for index in table's indexes whose name
+ matches data; the fetched index name is in data,
+ has no terminating '\0' and has length len */
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (index->is_committed()
+ && strlen(index->name) == len
+ && memcmp(index->name, data, len) == 0) {
+ /* the corresponding index was found */
+ break;
+ }
+ }
+
+ /* if index is NULL here this means that
+ mysql.innodb_index_stats contains more rows than the
+ number of indexes in the table; this is ok, we just
+ return ignoring those extra rows; in other words
+ dict_stats_fetch_index_stats_step() has been called
+ for a row from index_stats with unknown index_name
+ column */
+ if (index == NULL) {
+
+ return(TRUE);
+ }
+
+ break;
+
+ case 1: /* mysql.innodb_index_stats.stat_name */
+
+ ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
+
+ ut_a(index != NULL);
+
+ stat_name = (const char*) data;
+ stat_name_len = len;
+
+ break;
+
+ case 2: /* mysql.innodb_index_stats.stat_value */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8);
+
+ ut_a(index != NULL);
+ ut_a(stat_name != NULL);
+ ut_a(stat_name_len != ULINT_UNDEFINED);
+
+ stat_value = mach_read_from_8(data);
+
+ break;
+
+ case 3: /* mysql.innodb_index_stats.sample_size */
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == 8 || len == UNIV_SQL_NULL);
+
+ ut_a(index != NULL);
+ ut_a(stat_name != NULL);
+ ut_a(stat_name_len != ULINT_UNDEFINED);
+ ut_a(stat_value != UINT64_UNDEFINED);
+
+ if (len == UNIV_SQL_NULL) {
+ break;
+ }
+ /* else */
+
+ sample_size = mach_read_from_8(data);
+
+ break;
+
+ default:
+
+ /* someone changed
+ SELECT index_name,stat_name,stat_value,sample_size
+ to select more columns from innodb_index_stats without
+ adjusting here */
+ ut_error;
+ }
+ }
+
+ /* if i < 4 this means someone changed the
+ SELECT index_name,stat_name,stat_value,sample_size
+ to select less columns from innodb_index_stats without adjusting here;
+ if i > 4 we would have ut_error'ed earlier */
+ ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
+
+ ut_a(index != NULL);
+ ut_a(stat_name != NULL);
+ ut_a(stat_name_len != ULINT_UNDEFINED);
+ ut_a(stat_value != UINT64_UNDEFINED);
+ /* sample_size could be UINT64_UNDEFINED here, if it is NULL */
+
+#define PFX "n_diff_pfx"
+#define PFX_LEN 10
+
+ if (stat_name_len == 4 /* strlen("size") */
+ && strncasecmp("size", stat_name, stat_name_len) == 0) {
+ index->stat_index_size = (ulint) stat_value;
+ arg->stats_were_modified = true;
+ } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
+ && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
+ == 0) {
+ index->stat_n_leaf_pages = (ulint) stat_value;
+ arg->stats_were_modified = true;
+ } else if (stat_name_len == 12 /* strlen("n_page_split") */
+ && strncasecmp("n_page_split", stat_name, stat_name_len)
+ == 0) {
+ index->stat_defrag_n_page_split = (ulint) stat_value;
+ arg->stats_were_modified = true;
+ } else if (stat_name_len == 13 /* strlen("n_pages_freed") */
+ && strncasecmp("n_pages_freed", stat_name, stat_name_len)
+ == 0) {
+ index->stat_defrag_n_pages_freed = (ulint) stat_value;
+ arg->stats_were_modified = true;
+ } else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
+ && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
+
+ const char* num_ptr;
+ unsigned long n_pfx;
+
+ /* point num_ptr into "1" from "n_diff_pfx12..." */
+ num_ptr = stat_name + PFX_LEN;
+
+ /* stat_name should have exactly 2 chars appended to PFX
+ and they should be digits */
+ if (stat_name_len != PFX_LEN + 2
+ || num_ptr[0] < '0' || num_ptr[0] > '9'
+ || num_ptr[1] < '0' || num_ptr[1] > '9') {
+
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ dict_fs2utf8(table->name.m_name,
+ db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
+ ib::info out;
+ out << "Ignoring strange row from "
+ << INDEX_STATS_NAME_PRINT << " WHERE"
+ " database_name = '" << db_utf8
+ << "' AND table_name = '" << table_utf8
+ << "' AND index_name = '" << index->name()
+ << "' AND stat_name = '";
+ out.write(stat_name, stat_name_len);
+ out << "'; because stat_name is malformed";
+ return(TRUE);
+ }
+ /* else */
+
+ /* extract 12 from "n_diff_pfx12..." into n_pfx
+ note that stat_name does not have a terminating '\0' */
+ n_pfx = ulong(num_ptr[0] - '0') * 10 + ulong(num_ptr[1] - '0');
+
+ ulint n_uniq = index->n_uniq;
+
+ if (n_pfx == 0 || n_pfx > n_uniq) {
+
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ dict_fs2utf8(table->name.m_name,
+ db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
+ ib::info out;
+ out << "Ignoring strange row from "
+ << INDEX_STATS_NAME_PRINT << " WHERE"
+ " database_name = '" << db_utf8
+ << "' AND table_name = '" << table_utf8
+ << "' AND index_name = '" << index->name()
+ << "' AND stat_name = '";
+ out.write(stat_name, stat_name_len);
+ out << "'; because stat_name is out of range, the index"
+ " has " << n_uniq << " unique columns";
+
+ return(TRUE);
+ }
+ /* else */
+
+ index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
+
+ if (sample_size != UINT64_UNDEFINED) {
+ index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
+ } else {
+ /* hmm, strange... the user must have UPDATEd the
+ table manually and SET sample_size = NULL */
+ index->stat_n_sample_sizes[n_pfx - 1] = 0;
+ }
+
+ index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
+
+ arg->stats_were_modified = true;
+ } else {
+ /* silently ignore rows with unknown stat_name, the
+ user may have developed her own stats */
+ }
+
+ /* XXX this is not used but returning non-NULL is necessary */
+ return(TRUE);
+}
+
+/*********************************************************************//**
+Read table's statistics from the persistent statistics storage.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+dict_stats_fetch_from_ps(
+/*=====================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ index_fetch_t index_fetch_arg;
+ trx_t* trx;
+ pars_info_t* pinfo;
+ dberr_t ret;
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ ut_ad(!mutex_own(&dict_sys.mutex));
+
+ /* Initialize all stats to dummy values before fetching because if
+ the persistent storage contains incomplete stats (e.g. missing stats
+ for some index) then we would end up with (partially) uninitialized
+ stats. */
+ dict_stats_empty_table(table, true);
+
+ trx = trx_create();
+
+ /* Use 'read-uncommitted' so that the SELECTs we execute
+ do not get blocked in case some user has locked the rows we
+ are SELECTing */
+
+ trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+
+ if (srv_read_only_mode) {
+ trx_start_internal_read_only(trx);
+ } else {
+ trx_start_internal(trx);
+ }
+
+ dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "database_name", db_utf8);
+
+ pars_info_add_str_literal(pinfo, "table_name", table_utf8);
+
+ pars_info_bind_function(pinfo,
+ "fetch_table_stats_step",
+ dict_stats_fetch_table_stats_step,
+ table);
+
+ index_fetch_arg.table = table;
+ index_fetch_arg.stats_were_modified = false;
+ pars_info_bind_function(pinfo,
+ "fetch_index_stats_step",
+ dict_stats_fetch_index_stats_step,
+ &index_fetch_arg);
+
+ ret = que_eval_sql(pinfo,
+ "PROCEDURE FETCH_STATS () IS\n"
+ "found INT;\n"
+ "DECLARE FUNCTION fetch_table_stats_step;\n"
+ "DECLARE FUNCTION fetch_index_stats_step;\n"
+ "DECLARE CURSOR table_stats_cur IS\n"
+ " SELECT\n"
+ /* if you change the selected fields, be
+ sure to adjust
+ dict_stats_fetch_table_stats_step() */
+ " n_rows,\n"
+ " clustered_index_size,\n"
+ " sum_of_other_index_sizes\n"
+ " FROM \"" TABLE_STATS_NAME "\"\n"
+ " WHERE\n"
+ " database_name = :database_name AND\n"
+ " table_name = :table_name;\n"
+ "DECLARE CURSOR index_stats_cur IS\n"
+ " SELECT\n"
+ /* if you change the selected fields, be
+ sure to adjust
+ dict_stats_fetch_index_stats_step() */
+ " index_name,\n"
+ " stat_name,\n"
+ " stat_value,\n"
+ " sample_size\n"
+ " FROM \"" INDEX_STATS_NAME "\"\n"
+ " WHERE\n"
+ " database_name = :database_name AND\n"
+ " table_name = :table_name;\n"
+
+ "BEGIN\n"
+
+ "OPEN table_stats_cur;\n"
+ "FETCH table_stats_cur INTO\n"
+ " fetch_table_stats_step();\n"
+ "IF (SQL % NOTFOUND) THEN\n"
+ " CLOSE table_stats_cur;\n"
+ " RETURN;\n"
+ "END IF;\n"
+ "CLOSE table_stats_cur;\n"
+
+ "OPEN index_stats_cur;\n"
+ "found := 1;\n"
+ "WHILE found = 1 LOOP\n"
+ " FETCH index_stats_cur INTO\n"
+ " fetch_index_stats_step();\n"
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE index_stats_cur;\n"
+
+ "END;",
+ TRUE, trx);
+ /* pinfo is freed by que_eval_sql() */
+
+ trx_commit_for_mysql(trx);
+
+ trx->free();
+
+ if (!index_fetch_arg.stats_were_modified) {
+ return(DB_STATS_DO_NOT_EXIST);
+ }
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Clear defragmentation stats modified counter for all indices in table. */
+static
+void
+dict_stats_empty_defrag_modified_counter(
+ dict_table_t* table) /*!< in: table */
+{
+ dict_index_t* index;
+ ut_a(table);
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ index->stat_defrag_modified_counter = 0;
+ }
+}
+
+/*********************************************************************//**
+Fetches or calculates new estimates for index statistics. */
+void
+dict_stats_update_for_index(
+/*========================*/
+ dict_index_t* index) /*!< in/out: index */
+{
+ DBUG_ENTER("dict_stats_update_for_index");
+
+ ut_ad(!mutex_own(&dict_sys.mutex));
+
+ if (dict_stats_is_persistent_enabled(index->table)) {
+
+ if (dict_stats_persistent_storage_check(false)) {
+ index_stats_t stats = dict_stats_analyze_index(index);
+ mutex_enter(&dict_sys.mutex);
+ index->stat_index_size = stats.index_size;
+ index->stat_n_leaf_pages = stats.n_leaf_pages;
+ for (size_t i = 0; i < stats.stats.size(); ++i) {
+ index->stat_n_diff_key_vals[i]
+ = stats.stats[i].n_diff_key_vals;
+ index->stat_n_sample_sizes[i]
+ = stats.stats[i].n_sample_sizes;
+ index->stat_n_non_null_key_vals[i]
+ = stats.stats[i].n_non_null_key_vals;
+ }
+ index->table->stat_sum_of_other_index_sizes
+ += index->stat_index_size;
+ mutex_exit(&dict_sys.mutex);
+
+ dict_stats_save(index->table, &index->id);
+ DBUG_VOID_RETURN;
+ }
+ /* else */
+
+ if (innodb_index_stats_not_found == false &&
+ index->stats_error_printed == false) {
+ /* Fall back to transient stats since the persistent
+ storage is not present or is corrupted */
+
+ ib::info() << "Recalculation of persistent statistics"
+ " requested for table " << index->table->name
+ << " index " << index->name
+ << " but the required"
+ " persistent statistics storage is not present or is"
+ " corrupted. Using transient stats instead.";
+ index->stats_error_printed = false;
+ }
+ }
+
+ dict_stats_update_transient_for_index(index);
+
+ DBUG_VOID_RETURN;
+}
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_update(
+/*==============*/
+ dict_table_t* table, /*!< in/out: table */
+ dict_stats_upd_option_t stats_upd_option)
+ /*!< in: whether to (re) calc
+ the stats or to fetch them from
+ the persistent statistics
+ storage */
+{
+ ut_ad(!mutex_own(&dict_sys.mutex));
+
+ if (!table->is_readable()) {
+ return (dict_stats_report_error(table));
+ } else if (srv_force_recovery > SRV_FORCE_NO_IBUF_MERGE) {
+ /* If we have set a high innodb_force_recovery level, do
+ not calculate statistics, as a badly corrupted index can
+ cause a crash in it. */
+ dict_stats_empty_table(table, false);
+ return(DB_SUCCESS);
+ }
+
+ switch (stats_upd_option) {
+ case DICT_STATS_RECALC_PERSISTENT:
+
+ if (srv_read_only_mode) {
+ goto transient;
+ }
+
+ /* Persistent recalculation requested, called from
+ 1) ANALYZE TABLE, or
+ 2) the auto recalculation background thread, or
+ 3) open table if stats do not exist on disk and auto recalc
+ is enabled */
+
+ /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
+ persistent stats enabled */
+ ut_a(strchr(table->name.m_name, '/') != NULL);
+
+ /* check if the persistent statistics storage exists
+ before calling the potentially slow function
+ dict_stats_update_persistent(); that is a
+ prerequisite for dict_stats_save() succeeding */
+ if (dict_stats_persistent_storage_check(false)) {
+
+ dberr_t err;
+
+ err = dict_stats_update_persistent(table);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ err = dict_stats_save(table, NULL);
+
+ return(err);
+ }
+
+ /* Fall back to transient stats since the persistent
+ storage is not present or is corrupted */
+
+ if (innodb_table_stats_not_found == false &&
+ table->stats_error_printed == false) {
+ ib::warn() << "Recalculation of persistent statistics"
+ " requested for table "
+ << table->name
+ << " but the required persistent"
+ " statistics storage is not present or is corrupted."
+ " Using transient stats instead.";
+ table->stats_error_printed = true;
+ }
+
+ goto transient;
+
+ case DICT_STATS_RECALC_TRANSIENT:
+
+ goto transient;
+
+ case DICT_STATS_EMPTY_TABLE:
+
+ dict_stats_empty_table(table, true);
+
+ /* If table is using persistent stats,
+ then save the stats on disk */
+
+ if (dict_stats_is_persistent_enabled(table)) {
+
+ if (dict_stats_persistent_storage_check(false)) {
+
+ return(dict_stats_save(table, NULL));
+ }
+
+ return(DB_STATS_DO_NOT_EXIST);
+ }
+
+ return(DB_SUCCESS);
+
+ case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
+
+ /* fetch requested, either fetch from persistent statistics
+ storage or use the old method */
+
+ if (table->stat_initialized) {
+ return(DB_SUCCESS);
+ }
+
+ /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
+ persistent stats enabled */
+ ut_a(strchr(table->name.m_name, '/') != NULL);
+
+ if (!dict_stats_persistent_storage_check(false)) {
+ /* persistent statistics storage does not exist
+ or is corrupted, calculate the transient stats */
+
+ if (innodb_table_stats_not_found == false &&
+ table->stats_error_printed == false) {
+ ib::error() << "Fetch of persistent statistics"
+ " requested for table "
+ << table->name
+ << " but the required system tables "
+ << TABLE_STATS_NAME_PRINT
+ << " and " << INDEX_STATS_NAME_PRINT
+ << " are not present or have unexpected"
+ " structure. Using transient stats instead.";
+ table->stats_error_printed = true;
+ }
+
+ goto transient;
+ }
+
+ dict_table_t* t;
+
+ /* Create a dummy table object with the same name and
+ indexes, suitable for fetching the stats into it. */
+ t = dict_stats_table_clone_create(table);
+
+ dberr_t err = dict_stats_fetch_from_ps(t);
+
+ t->stats_last_recalc = table->stats_last_recalc;
+ t->stat_modified_counter = 0;
+ dict_stats_empty_defrag_modified_counter(t);
+
+ switch (err) {
+ case DB_SUCCESS:
+
+ mutex_enter(&dict_sys.mutex);
+
+ /* Pass reset_ignored_indexes=true as parameter
+ to dict_stats_copy. This will cause statictics
+ for corrupted indexes to be set to empty values */
+ dict_stats_copy(table, t, true);
+
+ dict_stats_assert_initialized(table);
+
+ mutex_exit(&dict_sys.mutex);
+
+ dict_stats_table_clone_free(t);
+
+ return(DB_SUCCESS);
+ case DB_STATS_DO_NOT_EXIST:
+
+ dict_stats_table_clone_free(t);
+
+ if (srv_read_only_mode) {
+ goto transient;
+ }
+
+ if (dict_stats_auto_recalc_is_enabled(table)) {
+ return(dict_stats_update(
+ table,
+ DICT_STATS_RECALC_PERSISTENT));
+ }
+
+ ib::info() << "Trying to use table " << table->name
+ << " which has persistent statistics enabled,"
+ " but auto recalculation turned off and the"
+ " statistics do not exist in "
+ TABLE_STATS_NAME_PRINT
+ " and " INDEX_STATS_NAME_PRINT
+ ". Please either run \"ANALYZE TABLE "
+ << table->name << ";\" manually or enable the"
+ " auto recalculation with \"ALTER TABLE "
+ << table->name << " STATS_AUTO_RECALC=1;\"."
+ " InnoDB will now use transient statistics for "
+ << table->name << ".";
+
+ goto transient;
+ default:
+
+ dict_stats_table_clone_free(t);
+
+ if (innodb_table_stats_not_found == false &&
+ table->stats_error_printed == false) {
+ ib::error() << "Error fetching persistent statistics"
+ " for table "
+ << table->name
+ << " from " TABLE_STATS_NAME_PRINT " and "
+ INDEX_STATS_NAME_PRINT ": " << err
+ << ". Using transient stats method instead.";
+ }
+
+ goto transient;
+ }
+ /* no "default:" in order to produce a compilation warning
+ about unhandled enumeration value */
+ }
+
+transient:
+ dict_stats_update_transient(table);
+
+ return(DB_SUCCESS);
+}
+
+/** Remove the information for a particular index's stats from the persistent
+storage if it exists and if there is data stored for this index.
+This function creates its own trx and commits it.
+
+We must modify system tables in a separate transaction in order to
+adhere to the InnoDB design constraint that dict_sys.latch prevents
+lock waits on system tables. If we modified system and user tables in
+the same transaction, we should exclusively hold dict_sys.latch until
+the transaction is committed, and effectively block other transactions
+that will attempt to open any InnoDB tables. Because we have no
+guarantee that user transactions will be committed fast, we cannot
+afford to keep the system tables locked in a user transaction.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_drop_index(
+/*==================*/
+ const char* db_and_table,/*!< in: db and table, e.g. 'db/table' */
+ const char* iname, /*!< in: index name */
+ char* errstr, /*!< out: error message if != DB_SUCCESS
+ is returned */
+ ulint errstr_sz)/*!< in: size of the errstr buffer */
+{
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+ pars_info_t* pinfo;
+ dberr_t ret;
+
+ ut_ad(!mutex_own(&dict_sys.mutex));
+
+ /* skip indexes whose table names do not contain a database name
+ e.g. if we are dropping an index from SYS_TABLES */
+ if (strchr(db_and_table, '/') == NULL) {
+
+ return(DB_SUCCESS);
+ }
+
+ dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "database_name", db_utf8);
+
+ pars_info_add_str_literal(pinfo, "table_name", table_utf8);
+
+ pars_info_add_str_literal(pinfo, "index_name", iname);
+
+ dict_sys_lock();
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE DROP_INDEX_STATS () IS\n"
+ "BEGIN\n"
+ "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name AND\n"
+ "index_name = :index_name;\n"
+ "END;\n", NULL);
+
+ dict_sys_unlock();
+
+ if (ret == DB_STATS_DO_NOT_EXIST) {
+ ret = DB_SUCCESS;
+ }
+
+ if (ret != DB_SUCCESS) {
+ snprintf(errstr, errstr_sz,
+ "Unable to delete statistics for index %s"
+ " from %s%s: %s. They can be deleted later using"
+ " DELETE FROM %s WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s' AND"
+ " index_name = '%s';",
+ iname,
+ INDEX_STATS_NAME_PRINT,
+ (ret == DB_LOCK_WAIT_TIMEOUT
+ ? " because the rows are locked"
+ : ""),
+ ut_strerr(ret),
+ INDEX_STATS_NAME_PRINT,
+ db_utf8,
+ table_utf8,
+ iname);
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: %s\n", errstr);
+ }
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Executes
+DELETE FROM mysql.innodb_table_stats
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_delete_from_table_stats(
+/*===============================*/
+ const char* database_name, /*!< in: database name, e.g. 'db' */
+ const char* table_name) /*!< in: table name, e.g. 'table' */
+{
+ pars_info_t* pinfo;
+ dberr_t ret;
+
+ ut_d(dict_sys.assert_locked());
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "database_name", database_name);
+ pars_info_add_str_literal(pinfo, "table_name", table_name);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
+ "BEGIN\n"
+ "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name;\n"
+ "END;\n", NULL);
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Executes
+DELETE FROM mysql.innodb_index_stats
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_delete_from_index_stats(
+/*===============================*/
+ const char* database_name, /*!< in: database name, e.g. 'db' */
+ const char* table_name) /*!< in: table name, e.g. 'table' */
+{
+ pars_info_t* pinfo;
+ dberr_t ret;
+
+ ut_d(dict_sys.assert_locked());
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "database_name", database_name);
+ pars_info_add_str_literal(pinfo, "table_name", table_name);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
+ "BEGIN\n"
+ "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name;\n"
+ "END;\n", NULL);
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Removes the statistics for a table and all of its indexes from the
+persistent statistics storage if it exists and if there is data stored for
+the table. This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_drop_table(
+/*==================*/
+ const char* db_and_table, /*!< in: db and table, e.g. 'db/table' */
+ char* errstr, /*!< out: error message
+ if != DB_SUCCESS is returned */
+ ulint errstr_sz) /*!< in: size of errstr buffer */
+{
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+ dberr_t ret;
+
+ ut_d(dict_sys.assert_locked());
+
+ /* skip tables that do not contain a database name
+ e.g. if we are dropping SYS_TABLES */
+ if (strchr(db_and_table, '/') == NULL) {
+
+ return(DB_SUCCESS);
+ }
+
+ /* skip innodb_table_stats and innodb_index_stats themselves */
+ if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
+ || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
+
+ return(DB_SUCCESS);
+ }
+
+ dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
+ ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
+
+ if (ret == DB_SUCCESS) {
+ ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
+ }
+
+ if (ret == DB_STATS_DO_NOT_EXIST) {
+ ret = DB_SUCCESS;
+ }
+
+ if (ret != DB_SUCCESS) {
+
+ snprintf(errstr, errstr_sz,
+ "Unable to delete statistics for table %s.%s: %s."
+ " They can be deleted later using"
+
+ " DELETE FROM %s WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s';"
+
+ " DELETE FROM %s WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s';",
+
+ db_utf8, table_utf8,
+ ut_strerr(ret),
+
+ INDEX_STATS_NAME_PRINT,
+ db_utf8, table_utf8,
+
+ TABLE_STATS_NAME_PRINT,
+ db_utf8, table_utf8);
+ }
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Executes
+UPDATE mysql.innodb_table_stats SET
+database_name = '...', table_name = '...'
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_rename_table_in_table_stats(
+/*===================================*/
+ const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
+ const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
+ const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
+ const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
+{
+ pars_info_t* pinfo;
+ dberr_t ret;
+
+ ut_d(dict_sys.assert_locked());
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
+ pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
+ pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
+ pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE RENAME_TABLE_IN_TABLE_STATS () IS\n"
+ "BEGIN\n"
+ "UPDATE \"" TABLE_STATS_NAME "\" SET\n"
+ "database_name = :new_dbname_utf8,\n"
+ "table_name = :new_tablename_utf8\n"
+ "WHERE\n"
+ "database_name = :old_dbname_utf8 AND\n"
+ "table_name = :old_tablename_utf8;\n"
+ "END;\n", NULL);
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Executes
+UPDATE mysql.innodb_index_stats SET
+database_name = '...', table_name = '...'
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_rename_table_in_index_stats(
+/*===================================*/
+ const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
+ const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
+ const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
+ const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
+{
+ pars_info_t* pinfo;
+ dberr_t ret;
+
+ ut_d(dict_sys.assert_locked());
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
+ pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
+ pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
+ pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE RENAME_TABLE_IN_INDEX_STATS () IS\n"
+ "BEGIN\n"
+ "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
+ "database_name = :new_dbname_utf8,\n"
+ "table_name = :new_tablename_utf8\n"
+ "WHERE\n"
+ "database_name = :old_dbname_utf8 AND\n"
+ "table_name = :old_tablename_utf8;\n"
+ "END;\n", NULL);
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Renames a table in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+dberr_t
+dict_stats_rename_table(
+/*====================*/
+ const char* old_name, /*!< in: old name, e.g. 'db/table' */
+ const char* new_name, /*!< in: new name, e.g. 'db/table' */
+ char* errstr, /*!< out: error string if != DB_SUCCESS
+ is returned */
+ size_t errstr_sz) /*!< in: errstr size */
+{
+ char old_db_utf8[MAX_DB_UTF8_LEN];
+ char new_db_utf8[MAX_DB_UTF8_LEN];
+ char old_table_utf8[MAX_TABLE_UTF8_LEN];
+ char new_table_utf8[MAX_TABLE_UTF8_LEN];
+ dberr_t ret;
+
+ /* skip innodb_table_stats and innodb_index_stats themselves */
+ if (strcmp(old_name, TABLE_STATS_NAME) == 0
+ || strcmp(old_name, INDEX_STATS_NAME) == 0
+ || strcmp(new_name, TABLE_STATS_NAME) == 0
+ || strcmp(new_name, INDEX_STATS_NAME) == 0) {
+
+ return(DB_SUCCESS);
+ }
+
+ dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
+ old_table_utf8, sizeof(old_table_utf8));
+
+ dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
+ new_table_utf8, sizeof(new_table_utf8));
+
+ dict_sys_lock();
+
+ ulint n_attempts = 0;
+ do {
+ n_attempts++;
+
+ ret = dict_stats_rename_table_in_table_stats(
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8);
+
+ if (ret == DB_DUPLICATE_KEY) {
+ dict_stats_delete_from_table_stats(
+ new_db_utf8, new_table_utf8);
+ }
+
+ if (ret == DB_STATS_DO_NOT_EXIST) {
+ ret = DB_SUCCESS;
+ }
+
+ if (ret != DB_SUCCESS) {
+ dict_sys_unlock();
+ os_thread_sleep(200000 /* 0.2 sec */);
+ dict_sys_lock();
+ }
+ } while ((ret == DB_DEADLOCK
+ || ret == DB_DUPLICATE_KEY
+ || ret == DB_LOCK_WAIT_TIMEOUT)
+ && n_attempts < 5);
+
+ if (ret != DB_SUCCESS) {
+ snprintf(errstr, errstr_sz,
+ "Unable to rename statistics from"
+ " %s.%s to %s.%s in %s: %s."
+ " They can be renamed later using"
+
+ " UPDATE %s SET"
+ " database_name = '%s',"
+ " table_name = '%s'"
+ " WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s';",
+
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8,
+ TABLE_STATS_NAME_PRINT,
+ ut_strerr(ret),
+
+ TABLE_STATS_NAME_PRINT,
+ new_db_utf8, new_table_utf8,
+ old_db_utf8, old_table_utf8);
+ dict_sys_unlock();
+ return(ret);
+ }
+ /* else */
+
+ n_attempts = 0;
+ do {
+ n_attempts++;
+
+ ret = dict_stats_rename_table_in_index_stats(
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8);
+
+ if (ret == DB_DUPLICATE_KEY) {
+ dict_stats_delete_from_index_stats(
+ new_db_utf8, new_table_utf8);
+ }
+
+ if (ret == DB_STATS_DO_NOT_EXIST) {
+ ret = DB_SUCCESS;
+ }
+
+ if (ret != DB_SUCCESS) {
+ dict_sys_unlock();
+ os_thread_sleep(200000 /* 0.2 sec */);
+ dict_sys_lock();
+ }
+ } while ((ret == DB_DEADLOCK
+ || ret == DB_DUPLICATE_KEY
+ || ret == DB_LOCK_WAIT_TIMEOUT)
+ && n_attempts < 5);
+
+ dict_sys_unlock();
+
+ if (ret != DB_SUCCESS) {
+ snprintf(errstr, errstr_sz,
+ "Unable to rename statistics from"
+ " %s.%s to %s.%s in %s: %s."
+ " They can be renamed later using"
+
+ " UPDATE %s SET"
+ " database_name = '%s',"
+ " table_name = '%s'"
+ " WHERE"
+ " database_name = '%s' AND"
+ " table_name = '%s';",
+
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8,
+ INDEX_STATS_NAME_PRINT,
+ ut_strerr(ret),
+
+ INDEX_STATS_NAME_PRINT,
+ new_db_utf8, new_table_utf8,
+ old_db_utf8, old_table_utf8);
+ }
+
+ return(ret);
+}
+
+/*********************************************************************//**
+Renames an index in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
+if the persistent stats do not exist. */
+dberr_t
+dict_stats_rename_index(
+/*====================*/
+ const dict_table_t* table, /*!< in: table whose index
+ is renamed */
+ const char* old_index_name, /*!< in: old index name */
+ const char* new_index_name) /*!< in: new index name */
+{
+ dict_sys_lock();
+
+ if (!dict_stats_persistent_storage_check(true)) {
+ dict_sys_unlock();
+ return(DB_STATS_DO_NOT_EXIST);
+ }
+
+ char dbname_utf8[MAX_DB_UTF8_LEN];
+ char tablename_utf8[MAX_TABLE_UTF8_LEN];
+
+ dict_fs2utf8(table->name.m_name, dbname_utf8, sizeof(dbname_utf8),
+ tablename_utf8, sizeof(tablename_utf8));
+
+ pars_info_t* pinfo;
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "dbname_utf8", dbname_utf8);
+ pars_info_add_str_literal(pinfo, "tablename_utf8", tablename_utf8);
+ pars_info_add_str_literal(pinfo, "new_index_name", new_index_name);
+ pars_info_add_str_literal(pinfo, "old_index_name", old_index_name);
+
+ dberr_t ret;
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE RENAME_INDEX_IN_INDEX_STATS () IS\n"
+ "BEGIN\n"
+ "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
+ "index_name = :new_index_name\n"
+ "WHERE\n"
+ "database_name = :dbname_utf8 AND\n"
+ "table_name = :tablename_utf8 AND\n"
+ "index_name = :old_index_name;\n"
+ "END;\n", NULL);
+
+ dict_sys_unlock();
+
+ return(ret);
+}
+
+/* tests @{ */
+#ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
+
+/* The following unit tests test some of the functions in this file
+individually, such testing cannot be performed by the mysql-test framework
+via SQL. */
+
+/* test_dict_table_schema_check() @{ */
+void
+test_dict_table_schema_check()
+{
+ /*
+ CREATE TABLE tcheck (
+ c01 VARCHAR(123),
+ c02 INT,
+ c03 INT NOT NULL,
+ c04 INT UNSIGNED,
+ c05 BIGINT,
+ c06 BIGINT UNSIGNED NOT NULL,
+ c07 TIMESTAMP
+ ) ENGINE=INNODB;
+ */
+ /* definition for the table 'test/tcheck' */
+ dict_col_meta_t columns[] = {
+ {"c01", DATA_VARCHAR, 0, 123},
+ {"c02", DATA_INT, 0, 4},
+ {"c03", DATA_INT, DATA_NOT_NULL, 4},
+ {"c04", DATA_INT, DATA_UNSIGNED, 4},
+ {"c05", DATA_INT, 0, 8},
+ {"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
+ {"c07", DATA_INT, 0, 4},
+ {"c_extra", DATA_INT, 0, 4}
+ };
+ dict_table_schema_t schema = {
+ "test/tcheck",
+ 0 /* will be set individually for each test below */,
+ columns
+ };
+ char errstr[512];
+
+ snprintf(errstr, sizeof(errstr), "Table not found");
+
+ /* prevent any data dictionary modifications while we are checking
+ the tables' structure */
+
+ mutex_enter(&dict_sys.mutex);
+
+ /* check that a valid table is reported as valid */
+ schema.n_cols = 7;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ == DB_SUCCESS) {
+ printf("OK: test.tcheck ok\n");
+ } else {
+ printf("ERROR: %s\n", errstr);
+ printf("ERROR: test.tcheck not present or corrupted\n");
+ goto test_dict_table_schema_check_end;
+ }
+
+ /* check columns with wrong length */
+ schema.columns[1].len = 8;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ printf("OK: test.tcheck.c02 has different length and is"
+ " reported as corrupted\n");
+ } else {
+ printf("OK: test.tcheck.c02 has different length but is"
+ " reported as ok\n");
+ goto test_dict_table_schema_check_end;
+ }
+ schema.columns[1].len = 4;
+
+ /* request that c02 is NOT NULL while actually it does not have
+ this flag set */
+ schema.columns[1].prtype_mask |= DATA_NOT_NULL;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ printf("OK: test.tcheck.c02 does not have NOT NULL while"
+ " it should and is reported as corrupted\n");
+ } else {
+ printf("ERROR: test.tcheck.c02 does not have NOT NULL while"
+ " it should and is not reported as corrupted\n");
+ goto test_dict_table_schema_check_end;
+ }
+ schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
+
+ /* check a table that contains some extra columns */
+ schema.n_cols = 6;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ == DB_SUCCESS) {
+ printf("ERROR: test.tcheck has more columns but is not"
+ " reported as corrupted\n");
+ goto test_dict_table_schema_check_end;
+ } else {
+ printf("OK: test.tcheck has more columns and is"
+ " reported as corrupted\n");
+ }
+
+ /* check a table that has some columns missing */
+ schema.n_cols = 8;
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ printf("OK: test.tcheck has missing columns and is"
+ " reported as corrupted\n");
+ } else {
+ printf("ERROR: test.tcheck has missing columns but is"
+ " reported as ok\n");
+ goto test_dict_table_schema_check_end;
+ }
+
+ /* check non-existent table */
+ schema.table_name = "test/tcheck_nonexistent";
+ if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
+ != DB_SUCCESS) {
+ printf("OK: test.tcheck_nonexistent is not present\n");
+ } else {
+ printf("ERROR: test.tcheck_nonexistent is present!?\n");
+ goto test_dict_table_schema_check_end;
+ }
+
+test_dict_table_schema_check_end:
+
+ mutex_exit(&dict_sys.mutex);
+}
+/* @} */
+
+/* save/fetch aux macros @{ */
+#define TEST_DATABASE_NAME "foobardb"
+#define TEST_TABLE_NAME "test_dict_stats"
+
+#define TEST_N_ROWS 111
+#define TEST_CLUSTERED_INDEX_SIZE 222
+#define TEST_SUM_OF_OTHER_INDEX_SIZES 333
+
+#define TEST_IDX1_NAME "tidx1"
+#define TEST_IDX1_COL1_NAME "tidx1_col1"
+#define TEST_IDX1_INDEX_SIZE 123
+#define TEST_IDX1_N_LEAF_PAGES 234
+#define TEST_IDX1_N_DIFF1 50
+#define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500
+
+#define TEST_IDX2_NAME "tidx2"
+#define TEST_IDX2_COL1_NAME "tidx2_col1"
+#define TEST_IDX2_COL2_NAME "tidx2_col2"
+#define TEST_IDX2_COL3_NAME "tidx2_col3"
+#define TEST_IDX2_COL4_NAME "tidx2_col4"
+#define TEST_IDX2_INDEX_SIZE 321
+#define TEST_IDX2_N_LEAF_PAGES 432
+#define TEST_IDX2_N_DIFF1 60
+#define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600
+#define TEST_IDX2_N_DIFF2 61
+#define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610
+#define TEST_IDX2_N_DIFF3 62
+#define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620
+#define TEST_IDX2_N_DIFF4 63
+#define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630
+/* @} */
+
+/* test_dict_stats_save() @{ */
+void
+test_dict_stats_save()
+{
+ dict_table_t table;
+ dict_index_t index1;
+ dict_field_t index1_fields[1];
+ ib_uint64_t index1_stat_n_diff_key_vals[1];
+ ib_uint64_t index1_stat_n_sample_sizes[1];
+ dict_index_t index2;
+ dict_field_t index2_fields[4];
+ ib_uint64_t index2_stat_n_diff_key_vals[4];
+ ib_uint64_t index2_stat_n_sample_sizes[4];
+ dberr_t ret;
+
+ /* craft a dummy dict_table_t */
+ table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+ table.stat_n_rows = TEST_N_ROWS;
+ table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
+ table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
+ UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
+#ifdef BTR_CUR_HASH_ADAPT
+ UT_LIST_INIT(table.freed_indexes, &dict_index_t::indexes);
+#endif /* BTR_CUR_HASH_ADAPT */
+ UT_LIST_ADD_LAST(table.indexes, &index1);
+ UT_LIST_ADD_LAST(table.indexes, &index2);
+ ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
+ ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
+
+ index1.name = TEST_IDX1_NAME;
+ index1.table = &table;
+ index1.cached = 1;
+ index1.n_uniq = 1;
+ index1.fields = index1_fields;
+ index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
+ index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
+ index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
+ index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
+ index1_fields[0].name = TEST_IDX1_COL1_NAME;
+ index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
+ index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
+
+ ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
+ index2.name = TEST_IDX2_NAME;
+ index2.table = &table;
+ index2.cached = 1;
+ index2.n_uniq = 4;
+ index2.fields = index2_fields;
+ index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
+ index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
+ index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
+ index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
+ index2_fields[0].name = TEST_IDX2_COL1_NAME;
+ index2_fields[1].name = TEST_IDX2_COL2_NAME;
+ index2_fields[2].name = TEST_IDX2_COL3_NAME;
+ index2_fields[3].name = TEST_IDX2_COL4_NAME;
+ index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
+ index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
+ index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
+ index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
+ index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
+ index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
+ index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
+ index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
+
+ ret = dict_stats_save(&table, NULL);
+
+ ut_a(ret == DB_SUCCESS);
+
+ printf("\nOK: stats saved successfully, now go ahead and read"
+ " what's inside %s and %s:\n\n",
+ TABLE_STATS_NAME_PRINT,
+ INDEX_STATS_NAME_PRINT);
+
+ printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
+ "FROM %s\n"
+ "WHERE\n"
+ "database_name = '%s' AND\n"
+ "table_name = '%s' AND\n"
+ "n_rows = %d AND\n"
+ "clustered_index_size = %d AND\n"
+ "sum_of_other_index_sizes = %d;\n"
+ "\n",
+ TABLE_STATS_NAME_PRINT,
+ TEST_DATABASE_NAME,
+ TEST_TABLE_NAME,
+ TEST_N_ROWS,
+ TEST_CLUSTERED_INDEX_SIZE,
+ TEST_SUM_OF_OTHER_INDEX_SIZES);
+
+ printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
+ "FROM %s\n"
+ "WHERE\n"
+ "database_name = '%s' AND\n"
+ "table_name = '%s' AND\n"
+ "index_name = '%s' AND\n"
+ "(\n"
+ " (stat_name = 'size' AND stat_value = %d AND"
+ " sample_size IS NULL) OR\n"
+ " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
+ " sample_size IS NULL) OR\n"
+ " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s')\n"
+ ");\n"
+ "\n",
+ INDEX_STATS_NAME_PRINT,
+ TEST_DATABASE_NAME,
+ TEST_TABLE_NAME,
+ TEST_IDX1_NAME,
+ TEST_IDX1_INDEX_SIZE,
+ TEST_IDX1_N_LEAF_PAGES,
+ TEST_IDX1_N_DIFF1,
+ TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
+ TEST_IDX1_COL1_NAME);
+
+ printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
+ "FROM %s\n"
+ "WHERE\n"
+ "database_name = '%s' AND\n"
+ "table_name = '%s' AND\n"
+ "index_name = '%s' AND\n"
+ "(\n"
+ " (stat_name = 'size' AND stat_value = %d AND"
+ " sample_size IS NULL) OR\n"
+ " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
+ " sample_size IS NULL) OR\n"
+ " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s') OR\n"
+ " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s,%s') OR\n"
+ " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
+ " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
+ " sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
+ ");\n"
+ "\n",
+ INDEX_STATS_NAME_PRINT,
+ TEST_DATABASE_NAME,
+ TEST_TABLE_NAME,
+ TEST_IDX2_NAME,
+ TEST_IDX2_INDEX_SIZE,
+ TEST_IDX2_N_LEAF_PAGES,
+ TEST_IDX2_N_DIFF1,
+ TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
+ TEST_IDX2_N_DIFF2,
+ TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
+ TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
+ TEST_IDX2_N_DIFF3,
+ TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
+ TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
+ TEST_IDX2_N_DIFF4,
+ TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
+ TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
+ TEST_IDX2_COL4_NAME);
+}
+/* @} */
+
+/* test_dict_stats_fetch_from_ps() @{ */
+void
+test_dict_stats_fetch_from_ps()
+{
+ dict_table_t table;
+ dict_index_t index1;
+ ib_uint64_t index1_stat_n_diff_key_vals[1];
+ ib_uint64_t index1_stat_n_sample_sizes[1];
+ dict_index_t index2;
+ ib_uint64_t index2_stat_n_diff_key_vals[4];
+ ib_uint64_t index2_stat_n_sample_sizes[4];
+ dberr_t ret;
+
+ /* craft a dummy dict_table_t */
+ table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
+ UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
+#ifdef BTR_CUR_HASH_ADAPT
+ UT_LIST_INIT(table.freed_indexes, &dict_index_t::indexes);
+#endif /* BTR_CUR_HASH_ADAPT */
+ UT_LIST_ADD_LAST(table.indexes, &index1);
+ UT_LIST_ADD_LAST(table.indexes, &index2);
+ ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
+
+ index1.name = TEST_IDX1_NAME;
+ ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
+ index1.cached = 1;
+ index1.n_uniq = 1;
+ index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
+ index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
+
+ index2.name = TEST_IDX2_NAME;
+ ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
+ index2.cached = 1;
+ index2.n_uniq = 4;
+ index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
+ index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
+
+ ret = dict_stats_fetch_from_ps(&table);
+
+ ut_a(ret == DB_SUCCESS);
+
+ ut_a(table.stat_n_rows == TEST_N_ROWS);
+ ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
+ ut_a(table.stat_sum_of_other_index_sizes
+ == TEST_SUM_OF_OTHER_INDEX_SIZES);
+
+ ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
+ ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
+ ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
+ ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
+
+ ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
+ ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
+ ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
+ ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
+ ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
+ ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
+ ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
+
+ printf("OK: fetch successful\n");
+}
+/* @} */
+
+/* test_dict_stats_all() @{ */
+void
+test_dict_stats_all()
+{
+ test_dict_table_schema_check();
+
+ test_dict_stats_save();
+
+ test_dict_stats_fetch_from_ps();
+}
+/* @} */
+
+#endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
+/* @} */
diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc
new file mode 100644
index 00000000..afeb8ef6
--- /dev/null
+++ b/storage/innobase/dict/dict0stats_bg.cc
@@ -0,0 +1,479 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0stats_bg.cc
+Code used for background table and index stats gathering.
+
+Created Apr 25, 2012 Vasil Dimov
+*******************************************************/
+
+#include "dict0dict.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+#include "dict0defrag_bg.h"
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "fil0fil.h"
+#ifdef WITH_WSREP
+# include "trx0trx.h"
+# include "mysql/service_wsrep.h"
+# include "wsrep.h"
+# include "log.h"
+# include "wsrep_mysqld.h"
+#endif
+
+#include <vector>
+
+/** Minimum time interval between stats recalc for a given table */
+#define MIN_RECALC_INTERVAL 10 /* seconds */
+static void dict_stats_schedule(int ms);
+
+#ifdef UNIV_DEBUG
+/** Used by SET GLOBAL innodb_dict_stats_disabled_debug = 1; */
+my_bool innodb_dict_stats_disabled_debug;
+#endif /* UNIV_DEBUG */
+
+/** This mutex protects the "recalc_pool" variable. */
+static ib_mutex_t recalc_pool_mutex;
+
+/** Allocator type, used by std::vector */
+typedef ut_allocator<table_id_t>
+ recalc_pool_allocator_t;
+
+/** The multitude of tables whose stats are to be automatically
+recalculated - an STL vector */
+typedef std::vector<table_id_t, recalc_pool_allocator_t>
+ recalc_pool_t;
+
+/** Iterator type for iterating over the elements of objects of type
+recalc_pool_t. */
+typedef recalc_pool_t::iterator
+ recalc_pool_iterator_t;
+
+/** Pool where we store information on which tables are to be processed
+by background statistics gathering. */
+static recalc_pool_t recalc_pool;
+/** Whether the global data structures have been initialized */
+static bool stats_initialised;
+
+/*****************************************************************//**
+Free the resources occupied by the recalc pool, called once during
+thread de-initialization. */
+static void dict_stats_recalc_pool_deinit()
+{
+ ut_ad(!srv_read_only_mode);
+
+ recalc_pool.clear();
+ defrag_pool.clear();
+ /*
+ recalc_pool may still have its buffer allocated. It will free it when
+ its destructor is called.
+ The problem is, memory leak detector is run before the recalc_pool's
+ destructor is invoked, and will report recalc_pool's buffer as leaked
+ memory. To avoid that, we force recalc_pool to surrender its buffer
+ to empty_pool object, which will free it when leaving this function:
+ */
+ recalc_pool_t recalc_empty_pool;
+ defrag_pool_t defrag_empty_pool;
+ recalc_pool.swap(recalc_empty_pool);
+ defrag_pool.swap(defrag_empty_pool);
+}
+
+/*****************************************************************//**
+Add a table to the recalc pool, which is processed by the
+background stats gathering thread. Only the table id is added to the
+list, so the table can be closed after being enqueued and it will be
+opened when needed. If the table does not exist later (has been DROPped),
+then it will be removed from the pool and skipped. */
+static
+void
+dict_stats_recalc_pool_add(
+/*=======================*/
+ const dict_table_t* table, /*!< in: table to add */
+ bool schedule_dict_stats_task = true /*!< in: schedule dict stats task */
+)
+{
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&recalc_pool_mutex);
+
+ /* quit if already in the list */
+ for (recalc_pool_iterator_t iter = recalc_pool.begin();
+ iter != recalc_pool.end();
+ ++iter) {
+
+ if (*iter == table->id) {
+ mutex_exit(&recalc_pool_mutex);
+ return;
+ }
+ }
+
+ recalc_pool.push_back(table->id);
+ if (recalc_pool.size() == 1 && schedule_dict_stats_task) {
+ dict_stats_schedule_now();
+ }
+ mutex_exit(&recalc_pool_mutex);
+
+}
+
+#ifdef WITH_WSREP
+/** Update the table modification counter and if necessary,
+schedule new estimates for table and index statistics to be calculated.
+@param[in,out] table persistent or temporary table
+@param[in] thd current session */
+void dict_stats_update_if_needed(dict_table_t *table, const trx_t &trx)
+#else
+/** Update the table modification counter and if necessary,
+schedule new estimates for table and index statistics to be calculated.
+@param[in,out] table persistent or temporary table */
+void dict_stats_update_if_needed_func(dict_table_t *table)
+#endif
+{
+ ut_ad(!mutex_own(&dict_sys.mutex));
+
+ if (UNIV_UNLIKELY(!table->stat_initialized)) {
+ /* The table may have been evicted from dict_sys
+ and reloaded internally by InnoDB for FOREIGN KEY
+ processing, but not reloaded by the SQL layer.
+
+ We can (re)compute the transient statistics when the
+ table is actually loaded by the SQL layer.
+
+ Note: If InnoDB persistent statistics are enabled,
+ we will skip the updates. We must do this, because
+ dict_table_get_n_rows() below assumes that the
+ statistics have been initialized. The DBA may have
+ to execute ANALYZE TABLE. */
+ return;
+ }
+
+ ulonglong counter = table->stat_modified_counter++;
+ ulonglong n_rows = dict_table_get_n_rows(table);
+
+ if (dict_stats_is_persistent_enabled(table)) {
+ if (counter > n_rows / 10 /* 10% */
+ && dict_stats_auto_recalc_is_enabled(table)) {
+
+#ifdef WITH_WSREP
+ /* Do not add table to background
+ statistic calculation if this thread is not a
+ applier (as all DDL, which is replicated (i.e
+ is binlogged in master node), will be executed
+ with high priority (a.k.a BF) in slave nodes)
+ and is BF. This could again lead BF lock
+ waits in applier node but it is better than
+ no persistent index/table statistics at
+ applier nodes. TODO: allow BF threads
+ wait for these InnoDB internal SQL-parser
+ generated row locks and allow BF thread
+ lock waits to be enqueued at head of waiting
+ queue. */
+ if (trx.is_wsrep()
+ && !wsrep_thd_is_applying(trx.mysql_thd)
+ && wsrep_thd_is_BF(trx.mysql_thd, 0)) {
+ WSREP_DEBUG("Avoiding background statistics"
+ " calculation for table %s.",
+ table->name.m_name);
+ return;
+ }
+#endif /* WITH_WSREP */
+
+ dict_stats_recalc_pool_add(table);
+ table->stat_modified_counter = 0;
+ }
+ return;
+ }
+
+ /* Calculate new statistics if 1 / 16 of table has been modified
+ since the last time a statistics batch was run.
+ We calculate statistics at most every 16th round, since we may have
+ a counter table which is very small and updated very often. */
+ ulonglong threshold = 16 + n_rows / 16; /* 6.25% */
+
+ if (srv_stats_modified_counter) {
+ threshold = std::min(srv_stats_modified_counter, threshold);
+ }
+
+ if (counter > threshold) {
+ /* this will reset table->stat_modified_counter to 0 */
+ dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT);
+ }
+}
+
+/*****************************************************************//**
+Get a table from the auto recalc pool. The returned table id is removed
+from the pool.
+@return true if the pool was non-empty and "id" was set, false otherwise */
+static
+bool
+dict_stats_recalc_pool_get(
+/*=======================*/
+ table_id_t* id) /*!< out: table id, or unmodified if list is
+ empty */
+{
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&recalc_pool_mutex);
+
+ if (recalc_pool.empty()) {
+ mutex_exit(&recalc_pool_mutex);
+ return(false);
+ }
+
+ *id = recalc_pool.at(0);
+
+ recalc_pool.erase(recalc_pool.begin());
+
+ mutex_exit(&recalc_pool_mutex);
+
+ return(true);
+}
+
+/*****************************************************************//**
+Delete a given table from the auto recalc pool.
+dict_stats_recalc_pool_del() */
+void
+dict_stats_recalc_pool_del(
+/*=======================*/
+ const dict_table_t* table) /*!< in: table to remove */
+{
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ mutex_enter(&recalc_pool_mutex);
+
+ ut_ad(table->id > 0);
+
+ for (recalc_pool_iterator_t iter = recalc_pool.begin();
+ iter != recalc_pool.end();
+ ++iter) {
+
+ if (*iter == table->id) {
+ /* erase() invalidates the iterator */
+ recalc_pool.erase(iter);
+ break;
+ }
+ }
+
+ mutex_exit(&recalc_pool_mutex);
+}
+
+/*****************************************************************//**
+Wait until background stats thread has stopped using the specified table.
+The caller must have locked the data dictionary using
+row_mysql_lock_data_dictionary() and this function may unlock it temporarily
+and restore the lock before it exits.
+The background stats thread is guaranteed not to start using the specified
+table after this function returns and before the caller unlocks the data
+dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
+under dict_sys.mutex. */
+void
+dict_stats_wait_bg_to_stop_using_table(
+/*===================================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx) /*!< in/out: transaction to use for
+ unlocking/locking the data dict */
+{
+ while (!dict_stats_stop_bg(table)) {
+ DICT_BG_YIELD(trx);
+ }
+}
+
+/*****************************************************************//**
+Initialize global variables needed for the operation of dict_stats_thread()
+Must be called before dict_stats_thread() is started. */
+void dict_stats_init()
+{
+ ut_ad(!srv_read_only_mode);
+
+ /* The recalc_pool_mutex is acquired from:
+ 1) the background stats gathering thread before any other latch
+ and released without latching anything else in between (thus
+ any level would do here)
+ 2) from dict_stats_update_if_needed()
+ and released without latching anything else in between. We know
+ that dict_sys.mutex (SYNC_DICT) is not acquired when
+ dict_stats_update_if_needed() is called and it may be acquired
+ inside that function (thus a level <=SYNC_DICT would do).
+ 3) from row_drop_table_for_mysql() after dict_sys.mutex (SYNC_DICT)
+ and dict_sys.latch (SYNC_DICT_OPERATION) have been locked
+ (thus a level <SYNC_DICT && <SYNC_DICT_OPERATION would do)
+ So we choose SYNC_STATS_AUTO_RECALC to be about below SYNC_DICT. */
+
+ mutex_create(LATCH_ID_RECALC_POOL, &recalc_pool_mutex);
+
+ dict_defrag_pool_init();
+ stats_initialised = true;
+}
+
+/*****************************************************************//**
+Free resources allocated by dict_stats_init(), must be called
+after dict_stats task has exited. */
+void dict_stats_deinit()
+{
+ if (!stats_initialised) {
+ return;
+ }
+
+ ut_ad(!srv_read_only_mode);
+ stats_initialised = false;
+
+ dict_stats_recalc_pool_deinit();
+ dict_defrag_pool_deinit();
+
+ mutex_free(&recalc_pool_mutex);
+}
+
+/**
+Get the first table that has been added for auto recalc and eventually
+update its stats.
+@return whether the first entry can be processed immediately */
+static bool dict_stats_process_entry_from_recalc_pool()
+{
+ table_id_t table_id;
+
+ ut_ad(!srv_read_only_mode);
+
+next_table_id:
+ /* pop the first table from the auto recalc pool */
+ if (!dict_stats_recalc_pool_get(&table_id)) {
+ /* no tables for auto recalc */
+ return false;
+ }
+
+ dict_table_t* table;
+
+ mutex_enter(&dict_sys.mutex);
+
+ table = dict_table_open_on_id(table_id, TRUE, DICT_TABLE_OP_NORMAL);
+
+ if (table == NULL) {
+ /* table does not exist, must have been DROPped
+ after its id was enqueued */
+ mutex_exit(&dict_sys.mutex);
+ goto next_table_id;
+ }
+
+ ut_ad(!table->is_temporary());
+
+ if (!table->is_accessible()) {
+ dict_table_close(table, TRUE, FALSE);
+ mutex_exit(&dict_sys.mutex);
+ goto next_table_id;
+ }
+
+ table->stats_bg_flag |= BG_STAT_IN_PROGRESS;
+
+ mutex_exit(&dict_sys.mutex);
+
+ /* time() could be expensive, the current function
+ is called once every time a table has been changed more than 10% and
+ on a system with lots of small tables, this could become hot. If we
+ find out that this is a problem, then the check below could eventually
+ be replaced with something else, though a time interval is the natural
+ approach. */
+ int ret;
+ if (difftime(time(NULL), table->stats_last_recalc)
+ < MIN_RECALC_INTERVAL) {
+
+ /* Stats were (re)calculated not long ago. To avoid
+ too frequent stats updates we put back the table on
+ the auto recalc list and do nothing. */
+
+ dict_stats_recalc_pool_add(table, false);
+ dict_stats_schedule(MIN_RECALC_INTERVAL*1000);
+ ret = false;
+ } else {
+
+ dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
+ ret = true;
+ }
+
+ mutex_enter(&dict_sys.mutex);
+
+ table->stats_bg_flag = BG_STAT_NONE;
+
+ dict_table_close(table, TRUE, FALSE);
+
+ mutex_exit(&dict_sys.mutex);
+ return ret;
+}
+
+#ifdef UNIV_DEBUG
+/** Disables dict stats thread. It's used by:
+ SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
+@param[in] save immediate result from check function */
+void dict_stats_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+ const void* save)
+{
+ const bool disable = *static_cast<const my_bool*>(save);
+ if (disable)
+ dict_stats_shutdown();
+ else
+ dict_stats_start();
+}
+#endif /* UNIV_DEBUG */
+
+static tpool::timer* dict_stats_timer;
+static std::mutex dict_stats_mutex;
+
+static void dict_stats_func(void*)
+{
+ while (dict_stats_process_entry_from_recalc_pool()) {}
+ dict_defrag_process_entries_from_defrag_pool();
+}
+
+
+void dict_stats_start()
+{
+ std::lock_guard<std::mutex> lk(dict_stats_mutex);
+ if (!dict_stats_timer)
+ dict_stats_timer= srv_thread_pool->create_timer(dict_stats_func);
+}
+
+
+static void dict_stats_schedule(int ms)
+{
+ std::unique_lock<std::mutex> lk(dict_stats_mutex, std::defer_lock);
+ /*
+ Use try_lock() to avoid deadlock in dict_stats_shutdown(), which
+ uses dict_stats_mutex too. If there is simultaneous timer reschedule,
+ the first one will win, which is fine.
+ */
+ if (!lk.try_lock())
+ {
+ return;
+ }
+ if (dict_stats_timer)
+ dict_stats_timer->set_time(ms,0);
+}
+
+void dict_stats_schedule_now()
+{
+ dict_stats_schedule(0);
+}
+
+/** Shut down the dict_stats_thread. */
+void dict_stats_shutdown()
+{
+ std::lock_guard<std::mutex> lk(dict_stats_mutex);
+ delete dict_stats_timer;
+ dict_stats_timer= 0;
+}