summaryrefslogtreecommitdiffstats
path: root/storage/innobase/row/row0import.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/row/row0import.cc')
-rw-r--r--storage/innobase/row/row0import.cc4585
1 files changed, 4585 insertions, 0 deletions
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
new file mode 100644
index 00000000..d2609fdb
--- /dev/null
+++ b/storage/innobase/row/row0import.cc
@@ -0,0 +1,4585 @@
+/*****************************************************************************
+
+Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2023, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0import.cc
+Import a tablespace to a running instance.
+
+Created 2012-02-08 by Sunny Bains.
+*******************************************************/
+
+#include "row0import.h"
+#include "btr0pcur.h"
+#ifdef BTR_CUR_HASH_ADAPT
+# include "btr0sea.h"
+#endif
+#include "buf0flu.h"
+#include "que0que.h"
+#include "dict0boot.h"
+#include "dict0load.h"
+#include "pars0pars.h"
+#include "row0row.h"
+#include "row0sel.h"
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "row0quiesce.h"
+#include "fil0pagecompress.h"
+#include "trx0undo.h"
+#include "lock0lock.h"
+#include "lzo/lzo1x.h"
+#include "snappy-c.h"
+#include "log.h"
+
+#include "scope.h"
+
+#include <vector>
+
+#ifdef HAVE_MY_AES_H
+#include <my_aes.h>
+#endif
+
+using st_::span;
+
+/** The size of the buffer to use for IO.
+@param n physical page size
+@return number of pages */
+#define IO_BUFFER_SIZE(n) ((1024 * 1024) / (n))
+
+/** For gathering stats on records during phase I */
+struct row_stats_t {
+ ulint m_n_deleted; /*!< Number of deleted records
+ found in the index */
+
+ ulint m_n_purged; /*!< Number of records purged
+ optimisatically */
+
+ ulint m_n_rows; /*!< Number of rows */
+
+ ulint m_n_purge_failed; /*!< Number of deleted rows
+ that could not be purged */
+};
+
+/** Index information required by IMPORT. */
+struct row_index_t {
+ index_id_t m_id; /*!< Index id of the table
+ in the exporting server */
+ byte* m_name; /*!< Index name */
+
+ uint32_t m_space; /*!< Space where it is placed */
+
+ uint32_t m_page_no; /*!< Root page number */
+
+ ulint m_type; /*!< Index type */
+
+ ulint m_trx_id_offset; /*!< Relevant only for clustered
+ indexes, offset of transaction
+ id system column */
+
+ ulint m_n_user_defined_cols; /*!< User defined columns */
+
+ ulint m_n_uniq; /*!< Number of columns that can
+ uniquely identify the row */
+
+ ulint m_n_nullable; /*!< Number of nullable
+ columns */
+
+ ulint m_n_fields; /*!< Total number of fields */
+
+ dict_field_t* m_fields; /*!< Index fields */
+
+ const dict_index_t*
+ m_srv_index; /*!< Index instance in the
+ importing server */
+
+ row_stats_t m_stats; /*!< Statistics gathered during
+ the import phase */
+
+};
+
+/** Meta data required by IMPORT. */
+struct row_import {
+ row_import() UNIV_NOTHROW
+ :
+ m_table(NULL),
+ m_version(0),
+ m_hostname(NULL),
+ m_table_name(NULL),
+ m_autoinc(0),
+ m_zip_size(0),
+ m_flags(0),
+ m_n_cols(0),
+ m_cols(NULL),
+ m_col_names(NULL),
+ m_n_indexes(0),
+ m_indexes(NULL),
+ m_missing(true) { }
+
+ ~row_import() UNIV_NOTHROW;
+
+ /** Find the index entry in in the indexes array.
+ @param name index name
+ @return instance if found else 0. */
+ row_index_t* get_index(const char* name) const UNIV_NOTHROW;
+
+ /** Get the number of rows in the index.
+ @param name index name
+ @return number of rows (doesn't include delete marked rows). */
+ ulint get_n_rows(const char* name) const UNIV_NOTHROW;
+
+ /** Find the ordinal value of the column name in the cfg table columns.
+ @param name of column to look for.
+ @return ULINT_UNDEFINED if not found. */
+ ulint find_col(const char* name) const UNIV_NOTHROW;
+
+ /** Get the number of rows for which purge failed during the
+ convert phase.
+ @param name index name
+ @return number of rows for which purge failed. */
+ ulint get_n_purge_failed(const char* name) const UNIV_NOTHROW;
+
+ /** Check if the index is clean. ie. no delete-marked records
+ @param name index name
+ @return true if index needs to be purged. */
+ bool requires_purge(const char* name) const UNIV_NOTHROW
+ {
+ return(get_n_purge_failed(name) > 0);
+ }
+
+ /** Set the index root <space, pageno> using the index name */
+ void set_root_by_name() UNIV_NOTHROW;
+
+ /** Set the index root <space, pageno> using a heuristic
+ @return DB_SUCCESS or error code */
+ dberr_t set_root_by_heuristic() UNIV_NOTHROW;
+
+ /** Check if the index schema that was read from the .cfg file
+ matches the in memory index definition.
+ Note: It will update row_import_t::m_srv_index to map the meta-data
+ read from the .cfg file to the server index instance.
+ @return DB_SUCCESS or error code. */
+ dberr_t match_index_columns(
+ THD* thd,
+ const dict_index_t* index) UNIV_NOTHROW;
+
+ /** Check if the table schema that was read from the .cfg file
+ matches the in memory table definition.
+ @param thd MySQL session variable
+ @return DB_SUCCESS or error code. */
+ dberr_t match_table_columns(
+ THD* thd) UNIV_NOTHROW;
+
+ /** Check if the table (and index) schema that was read from the
+ .cfg file matches the in memory table definition.
+ @param thd MySQL session variable
+ @return DB_SUCCESS or error code. */
+ dberr_t match_schema(
+ THD* thd) UNIV_NOTHROW;
+
+ dberr_t match_flags(THD *thd) const ;
+
+
+ dict_table_t* m_table; /*!< Table instance */
+
+ ulint m_version; /*!< Version of config file */
+
+ byte* m_hostname; /*!< Hostname where the
+ tablespace was exported */
+ byte* m_table_name; /*!< Exporting instance table
+ name */
+
+ ib_uint64_t m_autoinc; /*!< Next autoinc value */
+
+ ulint m_zip_size; /*!< ROW_FORMAT=COMPRESSED
+ page size, or 0 */
+
+ ulint m_flags; /*!< Table flags */
+
+ ulint m_n_cols; /*!< Number of columns in the
+ meta-data file */
+
+ dict_col_t* m_cols; /*!< Column data */
+
+ byte** m_col_names; /*!< Column names, we store the
+ column naems separately becuase
+ there is no field to store the
+ value in dict_col_t */
+
+ ulint m_n_indexes; /*!< Number of indexes,
+ including clustered index */
+
+ row_index_t* m_indexes; /*!< Index meta data */
+
+ bool m_missing; /*!< true if a .cfg file was
+ found and was readable */
+};
+
+struct fil_iterator_t {
+ pfs_os_file_t file; /*!< File handle */
+ const char* filepath; /*!< File path name */
+ os_offset_t start; /*!< From where to start */
+ os_offset_t end; /*!< Where to stop */
+ os_offset_t file_size; /*!< File size in bytes */
+ ulint n_io_buffers; /*!< Number of pages to use
+ for IO */
+ byte* io_buffer; /*!< Buffer to use for IO */
+ fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */
+ byte* crypt_io_buffer; /*!< IO buffer when encrypted */
+};
+
+/** Use the page cursor to iterate over records in a block. */
+class RecIterator {
+public:
+ /** Default constructor */
+ RecIterator() UNIV_NOTHROW
+ {
+ memset(&m_cur, 0x0, sizeof(m_cur));
+ /* Make page_cur_delete_rec() happy. */
+ m_mtr.start();
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
+ }
+
+ /** Position the cursor on the first user record. */
+ rec_t* open(buf_block_t* block, const dict_index_t* index) noexcept
+ MY_ATTRIBUTE((warn_unused_result))
+ {
+ m_cur.index = const_cast<dict_index_t*>(index);
+ page_cur_set_before_first(block, &m_cur);
+ return next();
+ }
+
+ /** Move to the next record. */
+ rec_t* next() noexcept MY_ATTRIBUTE((warn_unused_result))
+ {
+ return page_cur_move_to_next(&m_cur);
+ }
+
+ /**
+ @return the current record */
+ rec_t* current() UNIV_NOTHROW
+ {
+ ut_ad(!end());
+ return(page_cur_get_rec(&m_cur));
+ }
+
+ buf_block_t* current_block() const { return m_cur.block; }
+
+ /**
+ @return true if cursor is at the end */
+ bool end() UNIV_NOTHROW
+ {
+ return(page_cur_is_after_last(&m_cur) == TRUE);
+ }
+
+ /** Remove the current record
+ @return true on success */
+ bool remove(rec_offs* offsets) UNIV_NOTHROW
+ {
+ const dict_index_t* const index = m_cur.index;
+ ut_ad(page_is_leaf(m_cur.block->page.frame));
+ /* We can't end up with an empty page unless it is root. */
+ if (page_get_n_recs(m_cur.block->page.frame) <= 1) {
+ return(false);
+ }
+
+ if (!rec_offs_any_extern(offsets)
+ && m_cur.block->page.id().page_no() != index->page
+ && ((page_get_data_size(m_cur.block->page.frame)
+ - rec_offs_size(offsets)
+ < BTR_CUR_PAGE_COMPRESS_LIMIT(index))
+ || !page_has_siblings(m_cur.block->page.frame)
+ || (page_get_n_recs(m_cur.block->page.frame) < 2))) {
+ return false;
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ page_zip_des_t* page_zip = buf_block_get_page_zip(m_cur.block);
+ ut_a(!page_zip || page_zip_validate(
+ page_zip, m_cur.block->page.frame, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ page_cur_delete_rec(&m_cur, offsets, &m_mtr);
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(
+ page_zip, m_cur.block->page.frame, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ return true;
+ }
+
+private:
+ page_cur_t m_cur;
+public:
+ mtr_t m_mtr;
+};
+
+/** Class that purges delete marked records from indexes, both secondary
+and cluster. It does a pessimistic delete. This should only be done if we
+couldn't purge the delete marked reocrds during Phase I. */
+class IndexPurge {
+public:
+ /** Constructor
+ @param trx the user transaction covering the import tablespace
+ @param index to be imported
+ @param space_id space id of the tablespace */
+ IndexPurge(
+ trx_t* trx,
+ dict_index_t* index) UNIV_NOTHROW
+ :
+ m_trx(trx),
+ m_index(index),
+ m_n_rows(0)
+ {
+ ib::info() << "Phase II - Purge records from index "
+ << index->name;
+ }
+
+ /** Destructor */
+ ~IndexPurge() UNIV_NOTHROW = default;
+
+ /** Purge delete marked records.
+ @return DB_SUCCESS or error code. */
+ dberr_t garbage_collect() UNIV_NOTHROW;
+
+ /** The number of records that are not delete marked.
+ @return total records in the index after purge */
+ ulint get_n_rows() const UNIV_NOTHROW
+ {
+ return(m_n_rows);
+ }
+
+private:
+ /** Begin import, position the cursor on the first record. */
+ inline bool open() noexcept;
+
+ /** Close the persistent cursor and commit the mini-transaction. */
+ void close() noexcept { m_mtr.commit(); btr_pcur_close(&m_pcur); }
+
+ /** Position the cursor on the next record.
+ @return DB_SUCCESS or error code */
+ dberr_t next() noexcept;
+
+ /** Store the persistent cursor position and reopen the
+ B-tree cursor in BTR_MODIFY_TREE mode, because the
+ tree structure may be changed during a pessimistic delete. */
+ inline dberr_t purge_pessimistic_delete() noexcept;
+
+ /** Purge a delete-marked record. */
+ dberr_t purge() noexcept;
+
+protected:
+ // Disable copying
+ IndexPurge();
+ IndexPurge(const IndexPurge&);
+ IndexPurge &operator=(const IndexPurge&);
+
+private:
+ trx_t* m_trx; /*!< User transaction */
+ mtr_t m_mtr; /*!< Mini-transaction */
+ btr_pcur_t m_pcur; /*!< Persistent cursor */
+ dict_index_t* m_index; /*!< Index to be processed */
+ ulint m_n_rows; /*!< Records in index */
+};
+
+/** Functor that is called for each physical page that is read from the
+tablespace file. */
+class AbstractCallback
+{
+public:
+ /** Constructor
+ @param trx covering transaction */
+ AbstractCallback(trx_t* trx, uint32_t space_id)
+ :
+ m_zip_size(0),
+ m_trx(trx),
+ m_space(space_id),
+ m_xdes(),
+ m_xdes_page_no(UINT32_MAX),
+ m_space_flags(UINT32_MAX) UNIV_NOTHROW { }
+
+ /** Free any extent descriptor instance */
+ virtual ~AbstractCallback()
+ {
+ UT_DELETE_ARRAY(m_xdes);
+ }
+
+ /** Determine the page size to use for traversing the tablespace
+ @param file_size size of the tablespace file in bytes
+ @param block contents of the first page in the tablespace file.
+ @retval DB_SUCCESS or error code. */
+ virtual dberr_t init(
+ os_offset_t file_size,
+ const buf_block_t* block) UNIV_NOTHROW;
+
+ /** @return true if compressed table. */
+ bool is_compressed_table() const UNIV_NOTHROW
+ {
+ return get_zip_size();
+ }
+
+ /** @return the tablespace flags */
+ uint32_t get_space_flags() const { return m_space_flags; }
+
+ /**
+ Set the name of the physical file and the file handle that is used
+ to open it for the file that is being iterated over.
+ @param filename the physical name of the tablespace file
+ @param file OS file handle */
+ void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
+ {
+ m_file = file;
+ m_filepath = filename;
+ }
+
+ ulint get_zip_size() const { return m_zip_size; }
+ ulint physical_size() const
+ {
+ return m_zip_size ? m_zip_size : srv_page_size;
+ }
+
+ const char* filename() const { return m_filepath; }
+
+ /**
+ Called for every page in the tablespace. If the page was not
+ updated then its state must be set to BUF_PAGE_NOT_USED. For
+ compressed tables the page descriptor memory will be at offset:
+ block->page.frame + srv_page_size;
+ @param block block read from file, note it is not from the buffer pool
+ @retval DB_SUCCESS or error code. */
+ virtual dberr_t operator()(buf_block_t* block) UNIV_NOTHROW = 0;
+
+ /** @return the tablespace identifier */
+ uint32_t get_space_id() const { return m_space; }
+
+ bool is_interrupted() const { return trx_is_interrupted(m_trx); }
+
+ /**
+ Get the data page depending on the table type, compressed or not.
+ @param block - block read from disk
+ @retval the buffer frame */
+ static byte* get_frame(const buf_block_t* block)
+ {
+ return block->page.zip.data
+ ? block->page.zip.data : block->page.frame;
+ }
+
+ /** Invoke the functionality for the callback */
+ virtual dberr_t run(const fil_iterator_t& iter,
+ buf_block_t* block) UNIV_NOTHROW = 0;
+
+protected:
+ /** Get the physical offset of the extent descriptor within the page.
+ @param page_no page number of the extent descriptor
+ @param page contents of the page containing the extent descriptor.
+ @return the start of the xdes array in a page */
+ const xdes_t* xdes(
+ ulint page_no,
+ const page_t* page) const UNIV_NOTHROW
+ {
+ ulint offset;
+
+ offset = xdes_calc_descriptor_index(get_zip_size(), page_no);
+
+ return(page + XDES_ARR_OFFSET + XDES_SIZE * offset);
+ }
+
+ /** Set the current page directory (xdes). If the extent descriptor is
+ marked as free then free the current extent descriptor and set it to
+ 0. This implies that all pages that are covered by this extent
+ descriptor are also freed.
+
+ @param page_no offset of page within the file
+ @param page page contents
+ @return DB_SUCCESS or error code. */
+ dberr_t set_current_xdes(
+ uint32_t page_no,
+ const page_t* page) UNIV_NOTHROW
+ {
+ m_xdes_page_no = page_no;
+
+ UT_DELETE_ARRAY(m_xdes);
+ m_xdes = NULL;
+
+ if (mach_read_from_4(XDES_ARR_OFFSET + XDES_STATE + page)
+ != XDES_FREE) {
+ const ulint physical_size = m_zip_size
+ ? m_zip_size : srv_page_size;
+
+ m_xdes = UT_NEW_ARRAY_NOKEY(xdes_t, physical_size);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_13",
+ UT_DELETE_ARRAY(m_xdes);
+ m_xdes = NULL;
+ );
+
+ if (m_xdes == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memcpy(m_xdes, page, physical_size);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ /** Check if the page is marked as free in the extent descriptor.
+ @param page_no page number to check in the extent descriptor.
+ @return true if the page is marked as free */
+ bool is_free(uint32_t page_no) const UNIV_NOTHROW
+ {
+ ut_a(xdes_calc_descriptor_page(get_zip_size(), page_no)
+ == m_xdes_page_no);
+
+ if (m_xdes != 0) {
+ const xdes_t* xdesc = xdes(page_no, m_xdes);
+ ulint pos = page_no % FSP_EXTENT_SIZE;
+
+ return xdes_is_free(xdesc, pos);
+ }
+
+ /* If the current xdes was free, the page must be free. */
+ return(true);
+ }
+
+protected:
+ /** The ROW_FORMAT=COMPRESSED page size, or 0. */
+ ulint m_zip_size;
+
+ /** File handle to the tablespace */
+ pfs_os_file_t m_file;
+
+ /** Physical file path. */
+ const char* m_filepath;
+
+ /** Covering transaction. */
+ trx_t* m_trx;
+
+ /** Space id of the file being iterated over. */
+ uint32_t m_space;
+
+ /** Current extent descriptor page */
+ xdes_t* m_xdes;
+
+ /** Physical page offset in the file of the extent descriptor */
+ uint32_t m_xdes_page_no;
+
+ /** Flags value read from the header page */
+ uint32_t m_space_flags;
+};
+
+ATTRIBUTE_COLD static dberr_t invalid_space_flags(uint32_t flags)
+{
+ if (fsp_flags_is_incompatible_mysql(flags))
+ {
+ sql_print_error("InnoDB: unsupported MySQL tablespace");
+ return DB_UNSUPPORTED;
+ }
+
+ sql_print_error("InnoDB: Invalid FSP_SPACE_FLAGS=0x%" PRIx32, flags);
+ return DB_CORRUPTION;
+}
+
+/** Determine the page size to use for traversing the tablespace
+@param file_size size of the tablespace file in bytes
+@param block contents of the first page in the tablespace file.
+@retval DB_SUCCESS or error code. */
+dberr_t
+AbstractCallback::init(
+ os_offset_t file_size,
+ const buf_block_t* block) UNIV_NOTHROW
+{
+ const page_t* page = block->page.frame;
+
+ m_space_flags = fsp_header_get_flags(page);
+ if (!fil_space_t::is_valid_flags(m_space_flags, true)) {
+ uint32_t cflags = fsp_flags_convert_from_101(m_space_flags);
+ if (cflags == UINT32_MAX) {
+ return DB_CORRUPTION;
+ }
+ m_space_flags = cflags;
+ }
+
+ /* Clear the DATA_DIR flag, which is basically garbage. */
+ m_space_flags &= ~(1U << FSP_FLAGS_POS_RESERVED);
+ m_zip_size = fil_space_t::zip_size(m_space_flags);
+ const ulint logical_size = fil_space_t::logical_size(m_space_flags);
+ const ulint physical_size = fil_space_t::physical_size(m_space_flags);
+
+ if (logical_size != srv_page_size) {
+
+ ib::error() << "Page size " << logical_size
+ << " of ibd file is not the same as the server page"
+ " size " << srv_page_size;
+
+ return(DB_CORRUPTION);
+
+ } else if (file_size & (physical_size - 1)) {
+
+ ib::error() << "File size " << file_size << " is not a"
+ " multiple of the page size "
+ << physical_size;
+
+ return(DB_CORRUPTION);
+ }
+
+ if (m_space == UINT32_MAX) {
+ m_space = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID
+ + page);
+ }
+
+ return set_current_xdes(0, page);
+}
+
+/**
+TODO: This can be made parallel trivially by chunking up the file
+and creating a callback per thread.. Main benefit will be to use
+multiple CPUs for checksums and compressed tables. We have to do
+compressed tables block by block right now. Secondly we need to
+decompress/compress and copy too much of data. These are
+CPU intensive.
+
+Iterate over all the pages in the tablespace.
+@param iter - Tablespace iterator
+@param block - block to use for IO
+@param callback - Callback to inspect and update page contents
+@retval DB_SUCCESS or error code */
+static dberr_t fil_iterate(
+ const fil_iterator_t& iter,
+ buf_block_t* block,
+ AbstractCallback& callback);
+
+/**
+Try and determine the index root pages by checking if the next/prev
+pointers are both FIL_NULL. We need to ensure that skip deleted pages. */
+struct FetchIndexRootPages : public AbstractCallback {
+
+ /** Index information gathered from the .ibd file. */
+ struct Index {
+
+ Index(index_id_t id, uint32_t page_no)
+ :
+ m_id(id),
+ m_page_no(page_no) { }
+
+ index_id_t m_id; /*!< Index id */
+ uint32_t m_page_no; /*!< Root page number */
+ };
+
+ /** Constructor
+ @param trx covering (user) transaction
+ @param table table definition in server .*/
+ FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
+ :
+ AbstractCallback(trx, UINT32_MAX),
+ m_table(table), m_index(0, 0) UNIV_NOTHROW { }
+
+ /** Destructor */
+ ~FetchIndexRootPages() UNIV_NOTHROW override = default;
+
+ /** Fetch the clustered index root page in the tablespace
+ @param iter Tablespace iterator
+ @param block Block to use for IO
+ @retval DB_SUCCESS or error code */
+ dberr_t run(const fil_iterator_t& iter,
+ buf_block_t* block) UNIV_NOTHROW override;
+
+ /** Called for each block as it is read from the file.
+ @param block block to convert, it is not from the buffer pool.
+ @retval DB_SUCCESS or error code. */
+ dberr_t operator()(buf_block_t* block) UNIV_NOTHROW override;
+
+ /** Update the import configuration that will be used to import
+ the tablespace. */
+ dberr_t build_row_import(row_import* cfg) const UNIV_NOTHROW;
+
+ /** Table definition in server. */
+ const dict_table_t* m_table;
+
+ /** Index information */
+ Index m_index;
+};
+
+/** Called for each block as it is read from the file. Check index pages to
+determine the exact row format. We can't get that from the tablespace
+header flags alone.
+
+@param block block to convert, it is not from the buffer pool.
+@retval DB_SUCCESS or error code. */
+dberr_t FetchIndexRootPages::operator()(buf_block_t* block) UNIV_NOTHROW
+{
+ if (is_interrupted()) return DB_INTERRUPTED;
+
+ const page_t* page = get_frame(block);
+
+ m_index.m_id = btr_page_get_index_id(page);
+ m_index.m_page_no = block->page.id().page_no();
+
+ /* Check that the tablespace flags match the table flags. */
+ const uint32_t expected = dict_tf_to_fsp_flags(m_table->flags);
+ if (!fsp_flags_match(expected, m_space_flags)) {
+ ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Expected FSP_SPACE_FLAGS=0x%x, .ibd "
+ "file contains 0x%x.",
+ unsigned(expected),
+ unsigned(m_space_flags));
+ return(DB_CORRUPTION);
+ }
+
+ if (!page_is_comp(block->page.frame) !=
+ !dict_table_is_comp(m_table)) {
+ ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "ROW_FORMAT mismatch");
+ return DB_CORRUPTION;
+ }
+
+ return DB_SUCCESS;
+}
+
+/**
+Update the import configuration that will be used to import the tablespace.
+@return error code or DB_SUCCESS */
+dberr_t
+FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
+{
+ ut_a(cfg->m_table == m_table);
+ cfg->m_zip_size = m_zip_size;
+ cfg->m_n_indexes = 1;
+
+ if (cfg->m_n_indexes == 0) {
+
+ ib::error() << "No B+Tree found in tablespace";
+
+ return(DB_CORRUPTION);
+ }
+
+ cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_11",
+ UT_DELETE_ARRAY(cfg->m_indexes);
+ cfg->m_indexes = NULL;
+ );
+
+ if (cfg->m_indexes == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
+
+ row_index_t* cfg_index = cfg->m_indexes;
+
+ char name[BUFSIZ];
+
+ snprintf(name, sizeof(name), "index" IB_ID_FMT, m_index.m_id);
+
+ ulint len = strlen(name) + 1;
+
+ cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_12",
+ UT_DELETE_ARRAY(cfg_index->m_name);
+ cfg_index->m_name = NULL;
+ );
+
+ if (cfg_index->m_name == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memcpy(cfg_index->m_name, name, len);
+
+ cfg_index->m_id = m_index.m_id;
+
+ cfg_index->m_space = m_space;
+
+ cfg_index->m_page_no = m_index.m_page_no;
+
+ return(DB_SUCCESS);
+}
+
+/* Functor that is called for each physical page that is read from the
+tablespace file.
+
+ 1. Check each page for corruption.
+
+ 2. Update the space id and LSN on every page
+ * For the header page
+ - Validate the flags
+ - Update the LSN
+
+ 3. On Btree pages
+ * Set the index id
+ * Update the max trx id
+ * In a cluster index, update the system columns
+ * In a cluster index, update the BLOB ptr, set the space id
+ * Purge delete marked records, but only if they can be easily
+ removed from the page
+ * Keep a counter of number of rows, ie. non-delete-marked rows
+ * Keep a counter of number of delete marked rows
+ * Keep a counter of number of purge failure
+ * If a page is stamped with an index id that isn't in the .cfg file
+ we assume it is deleted and the page can be ignored.
+
+ 4. Set the page state to dirty so that it will be written to disk.
+*/
+class PageConverter : public AbstractCallback {
+public:
+ /** Constructor
+ @param cfg config of table being imported.
+ @param space_id tablespace identifier
+ @param trx transaction covering the import */
+ PageConverter(row_import* cfg, uint32_t space_id, trx_t* trx)
+ :
+ AbstractCallback(trx, space_id),
+ m_cfg(cfg),
+ m_index(cfg->m_indexes),
+ m_rec_iter(),
+ m_offsets_(), m_offsets(m_offsets_),
+ m_heap(0),
+ m_cluster_index(dict_table_get_first_index(cfg->m_table))
+ {
+ rec_offs_init(m_offsets_);
+ }
+
+ ~PageConverter() UNIV_NOTHROW override
+ {
+ if (m_heap != 0) {
+ mem_heap_free(m_heap);
+ }
+ }
+
+ dberr_t run(const fil_iterator_t& iter,
+ buf_block_t* block) UNIV_NOTHROW override
+ {
+ return fil_iterate(iter, block, *this);
+ }
+
+ /** Called for each block as it is read from the file.
+ @param block block to convert, it is not from the buffer pool.
+ @retval DB_SUCCESS or error code. */
+ dberr_t operator()(buf_block_t* block) UNIV_NOTHROW override;
+
+private:
+ /** Update the page, set the space id, max trx id and index id.
+ @param block block read from file
+ @param page_type type of the page
+ @retval DB_SUCCESS or error code */
+ dberr_t update_page(buf_block_t* block, uint16_t& page_type)
+ UNIV_NOTHROW;
+
+ /** Update the space, index id, trx id.
+ @param block block to convert
+ @return DB_SUCCESS or error code */
+ dberr_t update_index_page(buf_block_t* block) UNIV_NOTHROW;
+
+ /** Update the BLOB refrences and write UNDO log entries for
+ rows that can't be purged optimistically.
+ @param block block to update
+ @retval DB_SUCCESS or error code */
+ dberr_t update_records(buf_block_t* block) UNIV_NOTHROW;
+
+ /** Validate the space flags and update tablespace header page.
+ @param block block read from file, not from the buffer pool.
+ @retval DB_SUCCESS or error code */
+ dberr_t update_header(buf_block_t* block) UNIV_NOTHROW;
+
+ /** Adjust the BLOB reference for a single column that is externally stored
+ @param rec record to update
+ @param offsets column offsets for the record
+ @param i column ordinal value
+ @return DB_SUCCESS or error code */
+ dberr_t adjust_cluster_index_blob_column(
+ rec_t* rec,
+ const rec_offs* offsets,
+ ulint i) UNIV_NOTHROW;
+
+ /** Adjusts the BLOB reference in the clustered index row for all
+ externally stored columns.
+ @param rec record to update
+ @param offsets column offsets for the record
+ @return DB_SUCCESS or error code */
+ dberr_t adjust_cluster_index_blob_columns(
+ rec_t* rec,
+ const rec_offs* offsets) UNIV_NOTHROW;
+
+ /** In the clustered index, adjist the BLOB pointers as needed.
+ Also update the BLOB reference, write the new space id.
+ @param rec record to update
+ @param offsets column offsets for the record
+ @return DB_SUCCESS or error code */
+ dberr_t adjust_cluster_index_blob_ref(
+ rec_t* rec,
+ const rec_offs* offsets) UNIV_NOTHROW;
+
+ /** Purge delete-marked records, only if it is possible to do
+ so without re-organising the B+tree.
+ @retval true if purged */
+ bool purge() UNIV_NOTHROW;
+
+ /** Adjust the BLOB references and sys fields for the current record.
+ @param rec record to update
+ @param offsets column offsets for the record
+ @return DB_SUCCESS or error code. */
+ dberr_t adjust_cluster_record(
+ rec_t* rec,
+ const rec_offs* offsets) UNIV_NOTHROW;
+
+ /** Find an index with the matching id.
+ @return row_index_t* instance or 0 */
+ row_index_t* find_index(index_id_t id) UNIV_NOTHROW
+ {
+ row_index_t* index = &m_cfg->m_indexes[0];
+
+ for (ulint i = 0; i < m_cfg->m_n_indexes; ++i, ++index) {
+ if (id == index->m_id) {
+ return(index);
+ }
+ }
+
+ return(0);
+
+ }
+private:
+ /** Config for table that is being imported. */
+ row_import* m_cfg;
+
+ /** Current index whose pages are being imported */
+ row_index_t* m_index;
+
+ /** Iterator over records in a block */
+ RecIterator m_rec_iter;
+
+ /** Record offset */
+ rec_offs m_offsets_[REC_OFFS_NORMAL_SIZE];
+
+ /** Pointer to m_offsets_ */
+ rec_offs* m_offsets;
+
+ /** Memory heap for the record offsets */
+ mem_heap_t* m_heap;
+
+ /** Cluster index instance */
+ dict_index_t* m_cluster_index;
+};
+
+/**
+row_import destructor. */
+row_import::~row_import() UNIV_NOTHROW
+{
+ for (ulint i = 0; m_indexes != 0 && i < m_n_indexes; ++i) {
+ UT_DELETE_ARRAY(m_indexes[i].m_name);
+
+ if (m_indexes[i].m_fields == NULL) {
+ continue;
+ }
+
+ dict_field_t* fields = m_indexes[i].m_fields;
+ ulint n_fields = m_indexes[i].m_n_fields;
+
+ for (ulint j = 0; j < n_fields; ++j) {
+ UT_DELETE_ARRAY(const_cast<char*>(fields[j].name()));
+ }
+
+ UT_DELETE_ARRAY(fields);
+ }
+
+ for (ulint i = 0; m_col_names != 0 && i < m_n_cols; ++i) {
+ UT_DELETE_ARRAY(m_col_names[i]);
+ }
+
+ UT_DELETE_ARRAY(m_cols);
+ UT_DELETE_ARRAY(m_indexes);
+ UT_DELETE_ARRAY(m_col_names);
+ UT_DELETE_ARRAY(m_table_name);
+ UT_DELETE_ARRAY(m_hostname);
+}
+
+/** Find the index entry in in the indexes array.
+@param name index name
+@return instance if found else 0. */
+row_index_t*
+row_import::get_index(
+ const char* name) const UNIV_NOTHROW
+{
+ for (ulint i = 0; i < m_n_indexes; ++i) {
+ const char* index_name;
+ row_index_t* index = &m_indexes[i];
+
+ index_name = reinterpret_cast<const char*>(index->m_name);
+
+ if (strcmp(index_name, name) == 0) {
+
+ return(index);
+ }
+ }
+
+ return(0);
+}
+
+/** Get the number of rows in the index.
+@param name index name
+@return number of rows (doesn't include delete marked rows). */
+ulint
+row_import::get_n_rows(
+ const char* name) const UNIV_NOTHROW
+{
+ const row_index_t* index = get_index(name);
+
+ ut_a(name != 0);
+
+ return(index->m_stats.m_n_rows);
+}
+
+/** Get the number of rows for which purge failed uding the convert phase.
+@param name index name
+@return number of rows for which purge failed. */
+ulint
+row_import::get_n_purge_failed(
+ const char* name) const UNIV_NOTHROW
+{
+ const row_index_t* index = get_index(name);
+
+ ut_a(name != 0);
+
+ return(index->m_stats.m_n_purge_failed);
+}
+
+/** Find the ordinal value of the column name in the cfg table columns.
+@param name of column to look for.
+@return ULINT_UNDEFINED if not found. */
+ulint
+row_import::find_col(
+ const char* name) const UNIV_NOTHROW
+{
+ for (ulint i = 0; i < m_n_cols; ++i) {
+ const char* col_name;
+
+ col_name = reinterpret_cast<const char*>(m_col_names[i]);
+
+ if (strcmp(col_name, name) == 0) {
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**
+Check if the index schema that was read from the .cfg file matches the
+in memory index definition.
+@return DB_SUCCESS or error code. */
+dberr_t
+row_import::match_index_columns(
+ THD* thd,
+ const dict_index_t* index) UNIV_NOTHROW
+{
+ row_index_t* cfg_index;
+ dberr_t err = DB_SUCCESS;
+
+ cfg_index = get_index(index->name);
+
+ if (cfg_index == 0) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index %s not found in tablespace meta-data file.",
+ index->name());
+
+ return(DB_ERROR);
+ }
+
+ if (cfg_index->m_n_fields != index->n_fields) {
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index field count %u doesn't match"
+ " tablespace metadata file value " ULINTPF,
+ index->n_fields, cfg_index->m_n_fields);
+
+ return(DB_ERROR);
+ }
+
+ cfg_index->m_srv_index = index;
+
+ const dict_field_t* field = index->fields;
+ const dict_field_t* cfg_field = cfg_index->m_fields;
+
+ for (ulint i = 0; i < index->n_fields; ++i, ++field, ++cfg_field) {
+
+ if (field->name() && cfg_field->name()
+ && strcmp(field->name(), cfg_field->name()) != 0) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index field name %s doesn't match"
+ " tablespace metadata field name %s"
+ " for field position " ULINTPF,
+ field->name(), cfg_field->name(), i);
+
+ err = DB_ERROR;
+ }
+
+ if (cfg_field->prefix_len != field->prefix_len) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index %s field %s prefix len %u"
+ " doesn't match metadata file value %u",
+ index->name(), field->name(),
+ field->prefix_len, cfg_field->prefix_len);
+
+ err = DB_ERROR;
+ }
+
+ if (cfg_field->fixed_len != field->fixed_len) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index %s field %s fixed len %u"
+ " doesn't match metadata file value %u",
+ index->name(), field->name(),
+ field->fixed_len,
+ cfg_field->fixed_len);
+
+ err = DB_ERROR;
+ }
+ }
+
+ return(err);
+}
+
+/** Check if the table schema that was read from the .cfg file matches the
+in memory table definition.
+@param thd MySQL session variable
+@return DB_SUCCESS or error code. */
+dberr_t
+row_import::match_table_columns(
+ THD* thd) UNIV_NOTHROW
+{
+ dberr_t err = DB_SUCCESS;
+ const dict_col_t* col = m_table->cols;
+
+ for (ulint i = 0; i < m_table->n_cols; ++i, ++col) {
+
+ const char* col_name;
+ ulint cfg_col_index;
+
+ col_name = dict_table_get_col_name(
+ m_table, dict_col_get_no(col));
+
+ cfg_col_index = find_col(col_name);
+
+ if (cfg_col_index == ULINT_UNDEFINED) {
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s not found in tablespace.",
+ col_name);
+
+ err = DB_ERROR;
+ } else if (cfg_col_index != col->ind) {
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s ordinal value mismatch, it's at %u"
+ " in the table and " ULINTPF
+ " in the tablespace meta-data file",
+ col_name, col->ind, cfg_col_index);
+
+ err = DB_ERROR;
+ } else {
+ const dict_col_t* cfg_col;
+
+ cfg_col = &m_cols[cfg_col_index];
+ ut_a(cfg_col->ind == cfg_col_index);
+
+ if (cfg_col->prtype != col->prtype) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s precise type mismatch,"
+ " it's 0X%X in the table and 0X%X"
+ " in the tablespace meta file",
+ col_name, col->prtype, cfg_col->prtype);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->mtype != col->mtype) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s main type mismatch,"
+ " it's 0X%X in the table and 0X%X"
+ " in the tablespace meta file",
+ col_name, col->mtype, cfg_col->mtype);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->len != col->len) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s length mismatch,"
+ " it's %u in the table and %u"
+ " in the tablespace meta file",
+ col_name, col->len, cfg_col->len);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->mbminlen != col->mbminlen
+ || cfg_col->mbmaxlen != col->mbmaxlen) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s multi-byte len mismatch,"
+ " it's %u-%u in the table and %u-%u"
+ " in the tablespace meta file",
+ col_name, col->mbminlen, col->mbmaxlen,
+ cfg_col->mbminlen, cfg_col->mbmaxlen);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->ind != col->ind) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s position mismatch,"
+ " it's %u in the table and %u"
+ " in the tablespace meta file",
+ col_name, col->ind, cfg_col->ind);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->ord_part != col->ord_part) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s ordering mismatch,"
+ " it's %u in the table and %u"
+ " in the tablespace meta file",
+ col_name, col->ord_part,
+ cfg_col->ord_part);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->max_prefix != col->max_prefix) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s max prefix mismatch"
+ " it's %u in the table and %u"
+ " in the tablespace meta file",
+ col_name, col->max_prefix,
+ cfg_col->max_prefix);
+ err = DB_ERROR;
+ }
+ }
+ }
+
+ return(err);
+}
+
+dberr_t row_import::match_flags(THD *thd) const
+{
+ ulint mismatch= (m_table->flags ^ m_flags) & ~DICT_TF_MASK_DATA_DIR;
+ if (!mismatch)
+ return DB_SUCCESS;
+
+ const char *msg;
+ if (mismatch & DICT_TF_MASK_ZIP_SSIZE)
+ {
+ if ((m_table->flags & DICT_TF_MASK_ZIP_SSIZE) &&
+ (m_flags & DICT_TF_MASK_ZIP_SSIZE))
+ {
+ switch (m_flags & DICT_TF_MASK_ZIP_SSIZE) {
+ case 0U << DICT_TF_POS_ZIP_SSIZE:
+ goto uncompressed;
+ case 1U << DICT_TF_POS_ZIP_SSIZE:
+ msg= "ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=1";
+ break;
+ case 2U << DICT_TF_POS_ZIP_SSIZE:
+ msg= "ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=2";
+ break;
+ case 3U << DICT_TF_POS_ZIP_SSIZE:
+ msg= "ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4";
+ break;
+ case 4U << DICT_TF_POS_ZIP_SSIZE:
+ msg= "ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8";
+ break;
+ case 5U << DICT_TF_POS_ZIP_SSIZE:
+ msg= "ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=16";
+ break;
+ default:
+ msg= "strange KEY_BLOCK_SIZE";
+ }
+ }
+ else if (m_flags & DICT_TF_MASK_ZIP_SSIZE)
+ msg= "ROW_FORMAT=COMPRESSED";
+ else
+ goto uncompressed;
+ }
+ else
+ {
+ uncompressed:
+ msg= (m_flags & DICT_TF_MASK_ATOMIC_BLOBS) ? "ROW_FORMAT=DYNAMIC"
+ : (m_flags & DICT_TF_MASK_COMPACT) ? "ROW_FORMAT=COMPACT"
+ : "ROW_FORMAT=REDUNDANT";
+ }
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+ "Table flags don't match, server table has 0x%x and the meta-data "
+ "file has 0x%zx; .cfg file uses %s",
+ m_table->flags, m_flags, msg);
+
+ return DB_ERROR;
+}
+
+/** Check if the table (and index) schema that was read from the .cfg file
+matches the in memory table definition.
+@param thd MySQL session variable
+@return DB_SUCCESS or error code. */
+dberr_t
+row_import::match_schema(
+ THD* thd) UNIV_NOTHROW
+{
+ /* Do some simple checks. */
+
+ if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
+
+ /* If the number of indexes don't match then it is better
+ to abort the IMPORT. It is easy for the user to create a
+ table matching the IMPORT definition. */
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+ "Number of indexes don't match, table has " ULINTPF
+ " indexes but the tablespace meta-data file has "
+ ULINTPF " indexes",
+ UT_LIST_GET_LEN(m_table->indexes), m_n_indexes);
+
+ return(DB_ERROR);
+ }
+
+ dberr_t err = match_table_columns(thd);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Check if the index definitions match. */
+
+ const dict_index_t* index;
+
+ for (index = UT_LIST_GET_FIRST(m_table->indexes);
+ index != 0;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ dberr_t index_err;
+
+ index_err = match_index_columns(thd, index);
+
+ if (index_err != DB_SUCCESS) {
+ err = index_err;
+ }
+ }
+
+ return(err);
+}
+
+/**
+Set the index root <space, pageno>, using index name. */
+void
+row_import::set_root_by_name() UNIV_NOTHROW
+{
+ row_index_t* cfg_index = m_indexes;
+
+ for (ulint i = 0; i < m_n_indexes; ++i, ++cfg_index) {
+ dict_index_t* index;
+
+ const char* index_name;
+
+ index_name = reinterpret_cast<const char*>(cfg_index->m_name);
+
+ index = dict_table_get_index_on_name(m_table, index_name);
+
+ /* We've already checked that it exists. */
+ ut_a(index != 0);
+
+ index->page = cfg_index->m_page_no;
+ }
+}
+
+/**
+Set the index root <space, pageno>, using a heuristic.
+@return DB_SUCCESS or error code */
+dberr_t
+row_import::set_root_by_heuristic() UNIV_NOTHROW
+{
+ row_index_t* cfg_index = m_indexes;
+
+ ut_a(m_n_indexes > 0);
+
+ // TODO: For now use brute force, based on ordinality
+
+ if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
+
+ ib::warn() << "Table " << m_table->name << " should have "
+ << UT_LIST_GET_LEN(m_table->indexes) << " indexes but"
+ " the tablespace has " << m_n_indexes << " indexes";
+ }
+
+ ulint i = 0;
+ dberr_t err = DB_SUCCESS;
+
+ for (dict_index_t* index = UT_LIST_GET_FIRST(m_table->indexes);
+ index != 0;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ if (index->type & DICT_FTS) {
+ index->type |= DICT_CORRUPT;
+ ib::warn() << "Skipping FTS index: " << index->name;
+ } else if (i < m_n_indexes) {
+
+ UT_DELETE_ARRAY(cfg_index[i].m_name);
+
+ ulint len = strlen(index->name) + 1;
+
+ cfg_index[i].m_name = UT_NEW_ARRAY_NOKEY(byte, len);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_14",
+ UT_DELETE_ARRAY(cfg_index[i].m_name);
+ cfg_index[i].m_name = NULL;
+ );
+
+ if (cfg_index[i].m_name == NULL) {
+ err = DB_OUT_OF_MEMORY;
+ break;
+ }
+
+ memcpy(cfg_index[i].m_name, index->name, len);
+
+ cfg_index[i].m_srv_index = index;
+
+ index->page = cfg_index[i++].m_page_no;
+ }
+ }
+
+ return(err);
+}
+
+/**
+Purge delete marked records.
+@return DB_SUCCESS or error code. */
+dberr_t
+IndexPurge::garbage_collect() UNIV_NOTHROW
+{
+ ibool comp = dict_table_is_comp(m_index->table);
+
+ /* Open the persistent cursor and start the mini-transaction. */
+
+ dberr_t err = open() ? next() : DB_CORRUPTION;
+
+ for (; err == DB_SUCCESS; err = next()) {
+
+ rec_t* rec = btr_pcur_get_rec(&m_pcur);
+ ibool deleted = rec_get_deleted_flag(rec, comp);
+
+ if (!deleted) {
+ ++m_n_rows;
+ } else {
+ err = purge();
+ if (err != DB_SUCCESS) {
+ break;
+ }
+ }
+ }
+
+ /* Close the persistent cursor and commit the mini-transaction. */
+
+ close();
+
+ return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
+}
+
+/**
+Begin import, position the cursor on the first record. */
+inline bool IndexPurge::open() noexcept
+{
+ m_mtr.start();
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
+
+ btr_pcur_init(&m_pcur);
+
+ if (m_pcur.open_leaf(true, m_index, BTR_MODIFY_LEAF, &m_mtr) != DB_SUCCESS)
+ return false;
+
+ rec_t *rec= page_rec_get_next(btr_pcur_get_rec(&m_pcur));
+ if (!rec)
+ return false;
+ if (rec_is_metadata(rec, *m_index))
+ /* Skip the metadata pseudo-record. */
+ btr_pcur_get_page_cur(&m_pcur)->rec= rec;
+ return true;
+}
+
+/**
+Position the cursor on the next record.
+@return DB_SUCCESS or error code */
+dberr_t IndexPurge::next() noexcept
+{
+ if (UNIV_UNLIKELY(!btr_pcur_move_to_next_on_page(&m_pcur))) {
+ return DB_CORRUPTION;
+ }
+
+ /* When switching pages, commit the mini-transaction
+ in order to release the latch on the old page. */
+
+ if (!btr_pcur_is_after_last_on_page(&m_pcur)) {
+ return(DB_SUCCESS);
+ } else if (trx_is_interrupted(m_trx)) {
+ /* Check after every page because the check
+ is expensive. */
+ return(DB_INTERRUPTED);
+ }
+
+ btr_pcur_store_position(&m_pcur, &m_mtr);
+
+ mtr_commit(&m_mtr);
+
+ mtr_start(&m_mtr);
+
+ mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+
+ if (m_pcur.restore_position(BTR_MODIFY_LEAF, &m_mtr)
+ == btr_pcur_t::CORRUPTED) {
+ return DB_CORRUPTION;
+ }
+ /* The following is based on btr_pcur_move_to_next_user_rec(). */
+ m_pcur.old_rec = nullptr;
+ ut_ad(m_pcur.latch_mode == BTR_MODIFY_LEAF);
+ do {
+ if (btr_pcur_is_after_last_on_page(&m_pcur)) {
+ if (btr_pcur_is_after_last_in_tree(&m_pcur)) {
+ return DB_END_OF_INDEX;
+ }
+
+ if (dberr_t err = btr_pcur_move_to_next_page(&m_pcur,
+ &m_mtr)) {
+ return err;
+ }
+ } else if (!btr_pcur_move_to_next_on_page(&m_pcur)) {
+ return DB_CORRUPTION;
+ }
+ } while (!btr_pcur_is_on_user_rec(&m_pcur));
+
+ return DB_SUCCESS;
+}
+
+/**
+Store the persistent cursor position and reopen the
+B-tree cursor in BTR_MODIFY_TREE mode, because the
+tree structure may be changed during a pessimistic delete. */
+inline dberr_t IndexPurge::purge_pessimistic_delete() noexcept
+{
+ dberr_t err;
+ if (m_pcur.restore_position(BTR_PURGE_TREE, &m_mtr) != btr_pcur_t::CORRUPTED)
+ {
+ ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(&m_pcur),
+ m_index->table->not_redundant()));
+ btr_cur_pessimistic_delete(&err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0,
+ false, &m_mtr);
+ }
+ else
+ err= DB_CORRUPTION;
+
+ m_mtr.commit();
+ return err;
+}
+
+dberr_t IndexPurge::purge() noexcept
+{
+ btr_pcur_store_position(&m_pcur, &m_mtr);
+ m_mtr.commit();
+ m_mtr.start();
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
+ dberr_t err= purge_pessimistic_delete();
+
+ m_mtr.start();
+ m_mtr.set_log_mode(MTR_LOG_NO_REDO);
+ if (err == DB_SUCCESS)
+ err= (m_pcur.restore_position(BTR_MODIFY_LEAF, &m_mtr) ==
+ btr_pcur_t::CORRUPTED)
+ ? DB_CORRUPTION : DB_SUCCESS;
+ return err;
+}
+
+/** Adjust the BLOB reference for a single column that is externally stored
+@param rec record to update
+@param offsets column offsets for the record
+@param i column ordinal value
+@return DB_SUCCESS or error code */
+inline
+dberr_t
+PageConverter::adjust_cluster_index_blob_column(
+ rec_t* rec,
+ const rec_offs* offsets,
+ ulint i) UNIV_NOTHROW
+{
+ ulint len;
+ byte* field;
+
+ field = rec_get_nth_field(rec, offsets, i, &len);
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_2",
+ len = BTR_EXTERN_FIELD_REF_SIZE - 1;);
+
+ if (len < BTR_EXTERN_FIELD_REF_SIZE) {
+
+ ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_INNODB_INDEX_CORRUPT,
+ "Externally stored column(" ULINTPF
+ ") has a reference length of " ULINTPF
+ " in the cluster index %s",
+ i, len, m_cluster_index->name());
+
+ return(DB_CORRUPTION);
+ }
+
+ field += len - (BTR_EXTERN_FIELD_REF_SIZE - BTR_EXTERN_SPACE_ID);
+
+ mach_write_to_4(field, get_space_id());
+
+ if (UNIV_LIKELY_NULL(m_rec_iter.current_block()->page.zip.data)) {
+ page_zip_write_blob_ptr(
+ m_rec_iter.current_block(), rec, m_cluster_index,
+ offsets, i, &m_rec_iter.m_mtr);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Adjusts the BLOB reference in the clustered index row for all externally
+stored columns.
+@param rec record to update
+@param offsets column offsets for the record
+@return DB_SUCCESS or error code */
+inline
+dberr_t
+PageConverter::adjust_cluster_index_blob_columns(
+ rec_t* rec,
+ const rec_offs* offsets) UNIV_NOTHROW
+{
+ ut_ad(rec_offs_any_extern(offsets));
+
+ /* Adjust the space_id in the BLOB pointers. */
+
+ for (ulint i = 0; i < rec_offs_n_fields(offsets); ++i) {
+
+ /* Only if the column is stored "externally". */
+
+ if (rec_offs_nth_extern(offsets, i)) {
+ dberr_t err;
+
+ err = adjust_cluster_index_blob_column(rec, offsets, i);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** In the clustered index, adjust BLOB pointers as needed. Also update the
+BLOB reference, write the new space id.
+@param rec record to update
+@param offsets column offsets for the record
+@return DB_SUCCESS or error code */
+inline
+dberr_t
+PageConverter::adjust_cluster_index_blob_ref(
+ rec_t* rec,
+ const rec_offs* offsets) UNIV_NOTHROW
+{
+ if (rec_offs_any_extern(offsets)) {
+ dberr_t err;
+
+ err = adjust_cluster_index_blob_columns(rec, offsets);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Purge delete-marked records, only if it is possible to do so without
+re-organising the B+tree.
+@return true if purge succeeded */
+inline bool PageConverter::purge() UNIV_NOTHROW
+{
+ /* We can't have a page that is empty and not root. */
+ if (m_rec_iter.remove(m_offsets)) {
+
+ ++m_index->m_stats.m_n_purged;
+
+ return(true);
+ } else {
+ ++m_index->m_stats.m_n_purge_failed;
+ }
+
+ return(false);
+}
+
+/** Adjust the BLOB references and sys fields for the current record.
+@param rec record to update
+@param offsets column offsets for the record
+@return DB_SUCCESS or error code. */
+inline
+dberr_t
+PageConverter::adjust_cluster_record(
+ rec_t* rec,
+ const rec_offs* offsets) UNIV_NOTHROW
+{
+ dberr_t err;
+
+ if ((err = adjust_cluster_index_blob_ref(rec, offsets)) == DB_SUCCESS) {
+
+ /* Reset DB_TRX_ID and DB_ROLL_PTR. Normally, these fields
+ are only written in conjunction with other changes to the
+ record. */
+ ulint trx_id_pos = m_cluster_index->n_uniq
+ ? m_cluster_index->n_uniq : 1;
+ if (UNIV_LIKELY_NULL(m_rec_iter.current_block()
+ ->page.zip.data)) {
+ page_zip_write_trx_id_and_roll_ptr(
+ m_rec_iter.current_block(),
+ rec, m_offsets, trx_id_pos,
+ 0, roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS,
+ &m_rec_iter.m_mtr);
+ } else {
+ ulint len;
+ byte* ptr = rec_get_nth_field(
+ rec, m_offsets, trx_id_pos, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ memcpy(ptr, reset_trx_id, sizeof reset_trx_id);
+ }
+ }
+
+ return(err);
+}
+
+/** Update the BLOB refrences and write UNDO log entries for
+rows that can't be purged optimistically.
+@param block block to update
+@retval DB_SUCCESS or error code */
+inline
+dberr_t
+PageConverter::update_records(
+ buf_block_t* block) UNIV_NOTHROW
+{
+ ibool comp = dict_table_is_comp(m_cfg->m_table);
+ bool clust_index = m_index->m_srv_index == m_cluster_index;
+
+ /* This will also position the cursor on the first user record. */
+
+ if (!m_rec_iter.open(block, m_index->m_srv_index)) {
+ return DB_CORRUPTION;
+ }
+
+ while (!m_rec_iter.end()) {
+ rec_t* rec = m_rec_iter.current();
+ ibool deleted = rec_get_deleted_flag(rec, comp);
+
+ /* For the clustered index we have to adjust the BLOB
+ reference and the system fields irrespective of the
+ delete marked flag. The adjustment of delete marked
+ cluster records is required for purge to work later. */
+
+ if (deleted || clust_index) {
+ m_offsets = rec_get_offsets(
+ rec, m_index->m_srv_index, m_offsets,
+ m_index->m_srv_index->n_core_fields,
+ ULINT_UNDEFINED, &m_heap);
+ }
+
+ if (clust_index) {
+
+ dberr_t err = adjust_cluster_record(rec, m_offsets);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ /* If it is a delete marked record then try an
+ optimistic delete. */
+
+ if (deleted) {
+ ++m_index->m_stats.m_n_deleted;
+ /* A successful purge will move the cursor to the
+ next record. */
+
+ if (purge()) {
+ continue;
+ }
+ } else {
+ ++m_index->m_stats.m_n_rows;
+ }
+
+ if (!m_rec_iter.next()) {
+ return DB_CORRUPTION;
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Update the space, index id, trx id.
+@return DB_SUCCESS or error code */
+inline
+dberr_t
+PageConverter::update_index_page(
+ buf_block_t* block) UNIV_NOTHROW
+{
+ const page_id_t page_id(block->page.id());
+
+ if (is_free(page_id.page_no())) {
+ return(DB_SUCCESS);
+ }
+
+ buf_frame_t* page = block->page.frame;
+ const index_id_t id = btr_page_get_index_id(page);
+
+ if (id != m_index->m_id) {
+ row_index_t* index = find_index(id);
+
+ if (UNIV_UNLIKELY(!index)) {
+ if (!m_cfg->m_missing) {
+ ib::warn() << "Unknown index id " << id
+ << " on page " << page_id.page_no();
+ }
+ return DB_SUCCESS;
+ }
+
+ m_index = index;
+ }
+
+ /* If the .cfg file is missing and there is an index mismatch
+ then ignore the error. */
+ if (m_cfg->m_missing && !m_index->m_srv_index) {
+ return(DB_SUCCESS);
+ }
+
+ if (m_index && page_id.page_no() == m_index->m_page_no) {
+ byte *b = FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF + FSEG_HDR_SPACE
+ + page;
+ mach_write_to_4(b, page_id.space());
+
+ memcpy(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP + FSEG_HDR_SPACE
+ + page, b, 4);
+ if (UNIV_LIKELY_NULL(block->page.zip.data)) {
+ memcpy(&block->page.zip.data[FIL_PAGE_DATA
+ + PAGE_BTR_SEG_TOP
+ + FSEG_HDR_SPACE], b, 4);
+ memcpy(&block->page.zip.data[FIL_PAGE_DATA
+ + PAGE_BTR_SEG_LEAF
+ + FSEG_HDR_SPACE], b, 4);
+ }
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!block->page.zip.data || page_zip_validate(&block->page.zip, page,
+ m_index->m_srv_index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ /* This has to be written to uncompressed index header. Set it to
+ the current index id. */
+ mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID),
+ m_index->m_srv_index->id);
+ if (UNIV_LIKELY_NULL(block->page.zip.data)) {
+ memcpy(&block->page.zip.data[PAGE_HEADER + PAGE_INDEX_ID],
+ &block->page.frame[PAGE_HEADER + PAGE_INDEX_ID], 8);
+ }
+
+ if (m_index->m_srv_index->is_clust()) {
+ if (page_id.page_no() != m_index->m_srv_index->page) {
+ goto clear_page_max_trx_id;
+ }
+ } else if (page_is_leaf(page)) {
+ /* Set PAGE_MAX_TRX_ID on secondary index leaf pages. */
+ mach_write_to_8(&block->page.frame
+ [PAGE_HEADER + PAGE_MAX_TRX_ID], m_trx->id);
+ if (UNIV_LIKELY_NULL(block->page.zip.data)) {
+ memcpy_aligned<8>(&block->page.zip.data
+ [PAGE_HEADER + PAGE_MAX_TRX_ID],
+ &block->page.frame
+ [PAGE_HEADER + PAGE_MAX_TRX_ID], 8);
+ }
+ } else {
+clear_page_max_trx_id:
+ /* Clear PAGE_MAX_TRX_ID so that it can be
+ used for other purposes in the future. IMPORT
+ in MySQL 5.6, 5.7 and MariaDB 10.0 and 10.1
+ would set the field to the transaction ID even
+ on clustered index pages. */
+ memset_aligned<8>(&block->page.frame
+ [PAGE_HEADER + PAGE_MAX_TRX_ID],
+ 0, 8);
+ if (UNIV_LIKELY_NULL(block->page.zip.data)) {
+ memset_aligned<8>(&block->page.zip.data
+ [PAGE_HEADER + PAGE_MAX_TRX_ID],
+ 0, 8);
+ }
+ }
+
+ if (page_is_empty(page)) {
+
+ /* Only a root page can be empty. */
+ if (page_has_siblings(page)) {
+ // TODO: We should relax this and skip secondary
+ // indexes. Mark them as corrupt because they can
+ // always be rebuilt.
+ return(DB_CORRUPTION);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ return page_is_leaf(block->page.frame)
+ ? update_records(block)
+ : DB_SUCCESS;
+}
+
+/** Validate the space flags and update tablespace header page.
+@param block block read from file, not from the buffer pool.
+@retval DB_SUCCESS or error code */
+inline dberr_t PageConverter::update_header(buf_block_t* block) UNIV_NOTHROW
+{
+ byte *frame= get_frame(block);
+ if (memcmp_aligned<2>(FIL_PAGE_SPACE_ID + frame,
+ FSP_HEADER_OFFSET + FSP_SPACE_ID + frame, 4))
+ ib::warn() << "Space id check in the header failed: ignored";
+ else if (!mach_read_from_4(FIL_PAGE_SPACE_ID + frame))
+ return DB_CORRUPTION;
+
+ memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
+
+ /* Write space_id to the tablespace header, page 0. */
+ mach_write_to_4(FIL_PAGE_SPACE_ID + frame, get_space_id());
+ memcpy_aligned<2>(FSP_HEADER_OFFSET + FSP_SPACE_ID + frame,
+ FIL_PAGE_SPACE_ID + frame, 4);
+ /* Write back the adjusted flags. */
+ mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + frame, m_space_flags);
+
+ return DB_SUCCESS;
+}
+
+/** Update the page, set the space id, max trx id and index id.
+@param block block read from file
+@retval DB_SUCCESS or error code */
+inline
+dberr_t
+PageConverter::update_page(buf_block_t* block, uint16_t& page_type)
+ UNIV_NOTHROW
+{
+ dberr_t err = DB_SUCCESS;
+
+ ut_ad(!block->page.zip.data == !is_compressed_table());
+
+ switch (page_type = fil_page_get_type(get_frame(block))) {
+ case FIL_PAGE_TYPE_FSP_HDR:
+ ut_a(block->page.id().page_no() == 0);
+ /* Work directly on the uncompressed page headers. */
+ return(update_header(block));
+
+ case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
+ /* We need to decompress the contents
+ before we can do anything. */
+
+ if (is_compressed_table() && !buf_zip_decompress(block, TRUE)) {
+ return(DB_CORRUPTION);
+ }
+
+ /* fall through */
+ case FIL_PAGE_TYPE_INSTANT:
+ /* This is on every page in the tablespace. */
+ mach_write_to_4(
+ get_frame(block)
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
+
+ /* Only update the Btree nodes. */
+ return(update_index_page(block));
+
+ case FIL_PAGE_TYPE_SYS:
+ /* This is page 0 in the system tablespace. */
+ return(DB_CORRUPTION);
+
+ case FIL_PAGE_TYPE_XDES:
+ err = set_current_xdes(
+ block->page.id().page_no(), get_frame(block));
+ /* fall through */
+ case FIL_PAGE_INODE:
+ case FIL_PAGE_TYPE_TRX_SYS:
+ case FIL_PAGE_IBUF_FREE_LIST:
+ case FIL_PAGE_TYPE_ALLOCATED:
+ case FIL_PAGE_IBUF_BITMAP:
+ case FIL_PAGE_TYPE_BLOB:
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+
+ /* Work directly on the uncompressed page headers. */
+ /* This is on every page in the tablespace. */
+ mach_write_to_4(
+ get_frame(block)
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
+
+ return(err);
+ }
+
+ ib::warn() << "Unknown page type (" << page_type << ")";
+
+ return(DB_CORRUPTION);
+}
+
+/** Called for every page in the tablespace. If the page was not
+updated then its state must be set to BUF_PAGE_NOT_USED.
+@param block block read from file, note it is not from the buffer pool
+@retval DB_SUCCESS or error code. */
+dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
+{
+ /* If we already had an old page with matching number
+ in the buffer pool, evict it now, because
+ we no longer evict the pages on DISCARD TABLESPACE. */
+ buf_page_get_low(block->page.id(), get_zip_size(), RW_NO_LATCH,
+ nullptr, BUF_PEEK_IF_IN_POOL,
+ nullptr, nullptr, false);
+
+ uint16_t page_type;
+
+ if (dberr_t err = update_page(block, page_type)) {
+ return err;
+ }
+
+ const bool full_crc32 = fil_space_t::full_crc32(get_space_flags());
+ byte* frame = get_frame(block);
+ memset_aligned<8>(frame + FIL_PAGE_LSN, 0, 8);
+
+ if (!block->page.zip.data) {
+ buf_flush_init_for_writing(
+ NULL, block->page.frame, NULL, full_crc32);
+ } else if (fil_page_type_is_index(page_type)) {
+ buf_flush_init_for_writing(
+ NULL, block->page.zip.data, &block->page.zip,
+ full_crc32);
+ } else {
+ /* Calculate and update the checksum of non-index
+ pages for ROW_FORMAT=COMPRESSED tables. */
+ buf_flush_update_zip_checksum(
+ block->page.zip.data, block->zip_size());
+ }
+
+ return DB_SUCCESS;
+}
+
+/*****************************************************************//**
+Clean up after import tablespace. */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_import_cleanup(
+/*===============*/
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
+ dberr_t err) /*!< in: error code */
+{
+ if (err != DB_SUCCESS) {
+ dict_table_t* table = prebuilt->table;
+ table->file_unreadable = true;
+ if (table->space) {
+ fil_close_tablespace(table->space_id);
+ table->space = NULL;
+ }
+
+ prebuilt->trx->error_info = NULL;
+
+ ib::info() << "Discarding tablespace of table "
+ << table->name << ": " << err;
+
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+ index->page = FIL_NULL;
+ }
+ }
+
+ DBUG_EXECUTE_IF("ib_import_before_commit_crash", DBUG_SUICIDE(););
+
+ prebuilt->trx->commit();
+
+ if (prebuilt->trx->dict_operation_lock_mode) {
+ row_mysql_unlock_data_dictionary(prebuilt->trx);
+ }
+
+ prebuilt->trx->op_info = "";
+
+ DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
+
+ return(err);
+}
+
+/*****************************************************************//**
+Report error during tablespace import. */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_import_error(
+/*=============*/
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
+ dberr_t err) /*!< in: error code */
+{
+ if (!trx_is_interrupted(prebuilt->trx)) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name),
+ prebuilt->table->name.m_name);
+
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_WARN,
+ ER_INNODB_IMPORT_ERROR,
+ table_name, (ulong) err, ut_strerr(err));
+ }
+
+ return row_import_cleanup(prebuilt, err);
+}
+
+/*****************************************************************//**
+Adjust the root page index node and leaf node segment headers, update
+with the new space id. For all the table's secondary indexes.
+@return error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_import_adjust_root_pages_of_secondary_indexes(
+/*==============================================*/
+ trx_t* trx, /*!< in: transaction used for
+ the import */
+ dict_table_t* table, /*!< in: table the indexes
+ belong to */
+ const row_import& cfg) /*!< Import context */
+{
+ dict_index_t* index;
+ ulint n_rows_in_table;
+ dberr_t err = DB_SUCCESS;
+
+ /* Skip the clustered index. */
+ index = dict_table_get_first_index(table);
+
+ n_rows_in_table = cfg.get_n_rows(index->name);
+
+ DBUG_EXECUTE_IF("ib_import_sec_rec_count_mismatch_failure",
+ n_rows_in_table++;);
+
+ /* Adjust the root pages of the secondary indexes only. */
+ while ((index = dict_table_get_next_index(index)) != NULL) {
+ ut_a(!dict_index_is_clust(index));
+
+ if (!(index->type & DICT_CORRUPT)
+ && index->page != FIL_NULL) {
+
+ /* Update the Btree segment headers for index node and
+ leaf nodes in the root page. Set the new space id. */
+
+ err = btr_root_adjust_on_import(index);
+ } else {
+ ib::warn() << "Skip adjustment of root pages for"
+ " index " << index->name << ".";
+
+ err = DB_CORRUPTION;
+ }
+
+ if (err != DB_SUCCESS) {
+
+ if (index->type & DICT_CLUSTERED) {
+ break;
+ }
+
+ ib_errf(trx->mysql_thd,
+ IB_LOG_LEVEL_WARN,
+ ER_INNODB_INDEX_CORRUPT,
+ "Index %s not found or corrupt,"
+ " you should recreate this index.",
+ index->name());
+
+ /* Do not bail out, so that the data
+ can be recovered. */
+
+ err = DB_SUCCESS;
+ index->type |= DICT_CORRUPT;
+ continue;
+ }
+
+ /* If we failed to purge any records in the index then
+ do it the hard way.
+
+ TODO: We can do this in the first pass by generating UNDO log
+ records for the failed rows. */
+
+ if (!cfg.requires_purge(index->name)) {
+ continue;
+ }
+
+ IndexPurge purge(trx, index);
+
+ trx->op_info = "secondary: purge delete marked records";
+
+ err = purge.garbage_collect();
+
+ trx->op_info = "";
+
+ if (err != DB_SUCCESS) {
+ break;
+ } else if (purge.get_n_rows() != n_rows_in_table) {
+
+ ib_errf(trx->mysql_thd,
+ IB_LOG_LEVEL_WARN,
+ ER_INNODB_INDEX_CORRUPT,
+ "Index '%s' contains " ULINTPF " entries, "
+ "should be " ULINTPF ", you should recreate "
+ "this index.", index->name(),
+ purge.get_n_rows(), n_rows_in_table);
+
+ index->type |= DICT_CORRUPT;
+
+ /* Do not bail out, so that the data
+ can be recovered. */
+
+ err = DB_SUCCESS;
+ }
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Ensure that dict_sys.row_id exceeds SELECT MAX(DB_ROW_ID). */
+MY_ATTRIBUTE((nonnull)) static
+void
+row_import_set_sys_max_row_id(
+/*==========================*/
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
+ handler */
+ const dict_table_t* table) /*!< in: table to import */
+{
+ const rec_t* rec;
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ row_id_t row_id = 0;
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(table);
+ ut_ad(index->is_primary());
+ ut_ad(dict_index_is_auto_gen_clust(index));
+
+ mtr_start(&mtr);
+
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ if (pcur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr)
+ == DB_SUCCESS) {
+ rec = btr_pcur_move_to_prev_on_page(&pcur);
+
+ if (!rec) {
+ /* The table is corrupted. */
+ } else if (page_rec_is_infimum(rec)) {
+ /* The table is empty. */
+ } else if (rec_is_metadata(rec, *index)) {
+ /* The clustered index contains the metadata
+ record only, that is, the table is empty. */
+ } else {
+ row_id = mach_read_from_6(rec);
+ }
+ }
+
+ mtr_commit(&mtr);
+
+ if (row_id) {
+ /* Update the system row id if the imported index row id is
+ greater than the max system row id. */
+ dict_sys.update_row_id(row_id);
+ }
+}
+
+/*****************************************************************//**
+Read the a string from the meta data file.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_import_cfg_read_string(
+/*=======================*/
+ FILE* file, /*!< in/out: File to read from */
+ byte* ptr, /*!< out: string to read */
+ ulint max_len) /*!< in: maximum length of the output
+ buffer in bytes */
+{
+ DBUG_EXECUTE_IF("ib_import_string_read_error",
+ errno = EINVAL; return(DB_IO_ERROR););
+
+ ulint len = 0;
+
+ while (!feof(file)) {
+ int ch = fgetc(file);
+
+ if (ch == EOF) {
+ break;
+ } else if (ch != 0) {
+ if (len < max_len) {
+ ptr[len++] = static_cast<byte>(ch);
+ } else {
+ break;
+ }
+ /* max_len includes the NUL byte */
+ } else if (len != max_len - 1) {
+ break;
+ } else {
+ ptr[len] = 0;
+ return(DB_SUCCESS);
+ }
+ }
+
+ errno = EINVAL;
+
+ return(DB_IO_ERROR);
+}
+
+/*********************************************************************//**
+Write the meta data (index user fields) config file.
+@return DB_SUCCESS or error code. */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_import_cfg_read_index_fields(
+/*=============================*/
+ FILE* file, /*!< in: file to write to */
+ THD* thd, /*!< in/out: session */
+ row_index_t* index) /*!< Index being read in */
+{
+ byte row[sizeof(ib_uint32_t) * 3];
+ ulint n_fields = index->m_n_fields;
+
+ index->m_fields = UT_NEW_ARRAY_NOKEY(dict_field_t, n_fields);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_4",
+ UT_DELETE_ARRAY(index->m_fields);
+ index->m_fields = NULL;
+ );
+
+ if (index->m_fields == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ dict_field_t* field = index->m_fields;
+
+ for (ulint i = 0; i < n_fields; ++i, ++field) {
+ byte* ptr = row;
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_1",
+ (void) fseek(file, 0L, SEEK_END););
+
+ if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while reading index fields.");
+
+ return(DB_IO_ERROR);
+ }
+
+ new (field) dict_field_t();
+
+ field->prefix_len = mach_read_from_4(ptr) & ((1U << 12) - 1);
+ ptr += sizeof(ib_uint32_t);
+
+ field->fixed_len = mach_read_from_4(ptr) & ((1U << 10) - 1);
+ ptr += sizeof(ib_uint32_t);
+
+ /* Include the NUL byte in the length. */
+ ulint len = mach_read_from_4(ptr);
+
+ byte* name = UT_NEW_ARRAY_NOKEY(byte, len);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_5",
+ UT_DELETE_ARRAY(name);
+ name = NULL;
+ );
+
+ if (name == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ field->name = reinterpret_cast<const char*>(name);
+
+ dberr_t err = row_import_cfg_read_string(file, name, len);
+
+ if (err != DB_SUCCESS) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while parsing table name.");
+
+ return(err);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Read the index names and root page numbers of the indexes and set the values.
+Row format [root_page_no, len of str, str ... ]
+@return DB_SUCCESS or error code. */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_import_read_index_data(
+/*=======================*/
+ FILE* file, /*!< in: File to read from */
+ THD* thd, /*!< in: session */
+ row_import* cfg) /*!< in/out: meta-data read */
+{
+ byte* ptr;
+ row_index_t* cfg_index;
+ byte row[sizeof(index_id_t) + sizeof(ib_uint32_t) * 9];
+
+ /* FIXME: What is the max value? */
+ ut_a(cfg->m_n_indexes > 0);
+ ut_a(cfg->m_n_indexes < 1024);
+
+ cfg->m_indexes = UT_NEW_ARRAY_NOKEY(row_index_t, cfg->m_n_indexes);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_6",
+ UT_DELETE_ARRAY(cfg->m_indexes);
+ cfg->m_indexes = NULL;
+ );
+
+ if (cfg->m_indexes == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
+
+ cfg_index = cfg->m_indexes;
+
+ for (ulint i = 0; i < cfg->m_n_indexes; ++i, ++cfg_index) {
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_2",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the index data. */
+ size_t n_bytes = fread(row, 1, sizeof(row), file);
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error",
+ (void) fseek(file, 0L, SEEK_END););
+
+ if (n_bytes != sizeof(row)) {
+ char msg[BUFSIZ];
+
+ snprintf(msg, sizeof(msg),
+ "while reading index meta-data, expected "
+ "to read " ULINTPF
+ " bytes but read only " ULINTPF " bytes",
+ sizeof(row), n_bytes);
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno), msg);
+
+ ib::error() << "IO Error: " << msg;
+
+ return(DB_IO_ERROR);
+ }
+
+ ptr = row;
+
+ cfg_index->m_id = mach_read_from_8(ptr);
+ ptr += sizeof(index_id_t);
+
+ cfg_index->m_space = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_page_no = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_type = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_trx_id_offset = mach_read_from_4(ptr);
+ if (cfg_index->m_trx_id_offset != mach_read_from_4(ptr)) {
+ ut_ad(0);
+ /* Overflow. Pretend that the clustered index
+ has a variable-length PRIMARY KEY. */
+ cfg_index->m_trx_id_offset = 0;
+ }
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_n_user_defined_cols = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_n_uniq = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_n_nullable = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_n_fields = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ /* The NUL byte is included in the name length. */
+ ulint len = mach_read_from_4(ptr);
+
+ if (len > OS_FILE_MAX_PATH) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_INNODB_INDEX_CORRUPT,
+ "Index name length (" ULINTPF ") is too long, "
+ "the meta-data is corrupt", len);
+
+ return(DB_CORRUPTION);
+ }
+
+ cfg_index->m_name = UT_NEW_ARRAY_NOKEY(byte, len);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_7",
+ UT_DELETE_ARRAY(cfg_index->m_name);
+ cfg_index->m_name = NULL;
+ );
+
+ if (cfg_index->m_name == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ dberr_t err;
+
+ err = row_import_cfg_read_string(file, cfg_index->m_name, len);
+
+ if (err != DB_SUCCESS) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while parsing index name.");
+
+ return(err);
+ }
+
+ err = row_import_cfg_read_index_fields(file, thd, cfg_index);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Set the index root page number for v1 format.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_import_read_indexes(
+/*====================*/
+ FILE* file, /*!< in: File to read from */
+ THD* thd, /*!< in: session */
+ row_import* cfg) /*!< in/out: meta-data read */
+{
+ byte row[sizeof(ib_uint32_t)];
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_3",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the number of indexes. */
+ if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while reading number of indexes.");
+
+ return(DB_IO_ERROR);
+ }
+
+ cfg->m_n_indexes = mach_read_from_4(row);
+
+ if (cfg->m_n_indexes == 0) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ "Number of indexes in meta-data file is 0");
+
+ return(DB_CORRUPTION);
+
+ } else if (cfg->m_n_indexes > 1024) {
+ // FIXME: What is the upper limit? */
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ "Number of indexes in meta-data file is too high: "
+ ULINTPF, cfg->m_n_indexes);
+ cfg->m_n_indexes = 0;
+
+ return(DB_CORRUPTION);
+ }
+
+ return(row_import_read_index_data(file, thd, cfg));
+}
+
+/*********************************************************************//**
+Read the meta data (table columns) config file. Deserialise the contents of
+dict_col_t structure, along with the column name. */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_import_read_columns(
+/*====================*/
+ FILE* file, /*!< in: file to write to */
+ THD* thd, /*!< in/out: session */
+ row_import* cfg) /*!< in/out: meta-data read */
+{
+ dict_col_t* col;
+ byte row[sizeof(ib_uint32_t) * 8];
+
+ /* FIXME: What should the upper limit be? */
+ ut_a(cfg->m_n_cols > 0);
+ ut_a(cfg->m_n_cols < 1024);
+
+ cfg->m_cols = UT_NEW_ARRAY_NOKEY(dict_col_t, cfg->m_n_cols);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_8",
+ UT_DELETE_ARRAY(cfg->m_cols);
+ cfg->m_cols = NULL;
+ );
+
+ if (cfg->m_cols == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ cfg->m_col_names = UT_NEW_ARRAY_NOKEY(byte*, cfg->m_n_cols);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_9",
+ UT_DELETE_ARRAY(cfg->m_col_names);
+ cfg->m_col_names = NULL;
+ );
+
+ if (cfg->m_col_names == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memset(cfg->m_cols, 0x0, sizeof(cfg->m_cols) * cfg->m_n_cols);
+ memset(cfg->m_col_names, 0x0, sizeof(cfg->m_col_names) * cfg->m_n_cols);
+
+ col = cfg->m_cols;
+
+ for (ulint i = 0; i < cfg->m_n_cols; ++i, ++col) {
+ byte* ptr = row;
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_4",
+ (void) fseek(file, 0L, SEEK_END););
+
+ if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while reading table column meta-data.");
+
+ return(DB_IO_ERROR);
+ }
+
+ col->prtype = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ col->mtype = static_cast<byte>(mach_read_from_4(ptr));
+ ptr += sizeof(ib_uint32_t);
+
+ col->len = static_cast<uint16_t>(mach_read_from_4(ptr));
+ ptr += sizeof(ib_uint32_t);
+
+ uint32_t mbminmaxlen = mach_read_from_4(ptr);
+ col->mbmaxlen = (mbminmaxlen / 5) & 7;
+ col->mbminlen = (mbminmaxlen % 5) & 7;
+ ptr += sizeof(ib_uint32_t);
+
+ col->ind = mach_read_from_4(ptr) & dict_index_t::MAX_N_FIELDS;
+ ptr += sizeof(ib_uint32_t);
+
+ col->ord_part = mach_read_from_4(ptr) & 1;
+ ptr += sizeof(ib_uint32_t);
+
+ col->max_prefix = mach_read_from_4(ptr) & ((1U << 12) - 1);
+ ptr += sizeof(ib_uint32_t);
+
+ /* Read in the column name as [len, byte array]. The len
+ includes the NUL byte. */
+
+ ulint len = mach_read_from_4(ptr);
+
+ /* FIXME: What is the maximum column name length? */
+ if (len == 0 || len > 128) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_IO_READ_ERROR,
+ "Column name length " ULINTPF ", is invalid",
+ len);
+
+ return(DB_CORRUPTION);
+ }
+
+ cfg->m_col_names[i] = UT_NEW_ARRAY_NOKEY(byte, len);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_10",
+ UT_DELETE_ARRAY(cfg->m_col_names[i]);
+ cfg->m_col_names[i] = NULL;
+ );
+
+ if (cfg->m_col_names[i] == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ dberr_t err;
+
+ err = row_import_cfg_read_string(
+ file, cfg->m_col_names[i], len);
+
+ if (err != DB_SUCCESS) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while parsing table column name.");
+
+ return(err);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Read the contents of the <tablespace>.cfg file.
+@return DB_SUCCESS or error code. */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_import_read_v1(
+/*===============*/
+ FILE* file, /*!< in: File to read from */
+ THD* thd, /*!< in: session */
+ row_import* cfg) /*!< out: meta data */
+{
+ byte value[sizeof(ib_uint32_t)];
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_5",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the hostname where the tablespace was exported. */
+ if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while reading meta-data export hostname length.");
+
+ return(DB_IO_ERROR);
+ }
+
+ ulint len = mach_read_from_4(value);
+
+ /* NUL byte is part of name length. */
+ cfg->m_hostname = UT_NEW_ARRAY_NOKEY(byte, len);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_1",
+ UT_DELETE_ARRAY(cfg->m_hostname);
+ cfg->m_hostname = NULL;
+ );
+
+ if (cfg->m_hostname == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ dberr_t err = row_import_cfg_read_string(file, cfg->m_hostname, len);
+
+ if (err != DB_SUCCESS) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while parsing export hostname.");
+
+ return(err);
+ }
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_6",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the table name of tablespace that was exported. */
+ if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while reading meta-data table name length.");
+
+ return(DB_IO_ERROR);
+ }
+
+ len = mach_read_from_4(value);
+
+ /* NUL byte is part of name length. */
+ cfg->m_table_name = UT_NEW_ARRAY_NOKEY(byte, len);
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_2",
+ UT_DELETE_ARRAY(cfg->m_table_name);
+ cfg->m_table_name = NULL;
+ );
+
+ if (cfg->m_table_name == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ err = row_import_cfg_read_string(file, cfg->m_table_name, len);
+
+ if (err != DB_SUCCESS) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while parsing table name.");
+
+ return(err);
+ }
+
+ ib::info() << "Importing tablespace for table '" << cfg->m_table_name
+ << "' that was exported from host '" << cfg->m_hostname << "'";
+
+ byte row[sizeof(ib_uint32_t) * 3];
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_7",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the autoinc value. */
+ if (fread(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while reading autoinc value.");
+
+ return(DB_IO_ERROR);
+ }
+
+ cfg->m_autoinc = mach_read_from_8(row);
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_8",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the tablespace page size. */
+ if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while reading meta-data header.");
+
+ return(DB_IO_ERROR);
+ }
+
+ byte* ptr = row;
+
+ const ulint logical_page_size = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ if (logical_page_size != srv_page_size) {
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+ "Tablespace to be imported has a different"
+ " page size than this server. Server page size"
+ " is %lu, whereas tablespace page size"
+ " is " ULINTPF,
+ srv_page_size,
+ logical_page_size);
+
+ return(DB_ERROR);
+ }
+
+ cfg->m_flags = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg->m_zip_size = dict_tf_get_zip_size(cfg->m_flags);
+ cfg->m_n_cols = mach_read_from_4(ptr);
+
+ if (!dict_tf_is_valid(cfg->m_flags)) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Invalid table flags: " ULINTPF, cfg->m_flags);
+
+ return(DB_CORRUPTION);
+ }
+
+ err = row_import_read_columns(file, thd, cfg);
+
+ if (err == DB_SUCCESS) {
+ err = row_import_read_indexes(file, thd, cfg);
+ }
+
+ return(err);
+}
+
+/**
+Read the contents of the <tablespace>.cfg file.
+@return DB_SUCCESS or error code. */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_import_read_meta_data(
+/*======================*/
+ FILE* file, /*!< in: File to read from */
+ THD* thd, /*!< in: session */
+ row_import& cfg) /*!< out: contents of the .cfg file */
+{
+ byte row[sizeof(ib_uint32_t)];
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_9",
+ (void) fseek(file, 0L, SEEK_END););
+
+ if (fread(&row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno),
+ "while reading meta-data version.");
+
+ return(DB_IO_ERROR);
+ }
+
+ cfg.m_version = mach_read_from_4(row);
+
+ /* Check the version number. */
+ switch (cfg.m_version) {
+ case IB_EXPORT_CFG_VERSION_V1:
+
+ return(row_import_read_v1(file, thd, &cfg));
+ default:
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ "Unsupported meta-data version number (" ULINTPF "), "
+ "file ignored", cfg.m_version);
+ }
+
+ return(DB_ERROR);
+}
+
+#define BTR_BLOB_HDR_PART_LEN 0 /*!< BLOB part len on this page */
+#define BTR_BLOB_HDR_NEXT_PAGE_NO 4 /*!< next BLOB part page no,
+ FIL_NULL if none */
+#define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB part header, in bytes */
+
+/* decrypt and decompress page if needed */
+static dberr_t decrypt_decompress(fil_space_crypt_t *space_crypt,
+ uint32_t space_flags, span<byte> page,
+ uint32_t space_id, byte *page_compress_buf)
+{
+ auto *data= page.data();
+
+ if (space_crypt && space_crypt->should_encrypt())
+ {
+ if (!buf_page_verify_crypt_checksum(data, space_flags))
+ return DB_CORRUPTION;
+
+ if (dberr_t err= fil_space_decrypt(space_id, space_flags, space_crypt,
+ data, page.size(), data))
+ return err;
+ }
+
+ bool page_compressed= false;
+
+ if (fil_space_t::full_crc32(space_flags) &&
+ fil_space_t::is_compressed(space_flags))
+ page_compressed= buf_page_is_compressed(data, space_flags);
+ else
+ {
+ switch (fil_page_get_type(data)) {
+ case FIL_PAGE_PAGE_COMPRESSED:
+ case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
+ page_compressed= true;
+ }
+ }
+
+ if (page_compressed)
+ {
+ auto compress_length=
+ fil_page_decompress(page_compress_buf, data, space_flags);
+ ut_ad(compress_length != srv_page_size);
+
+ if (compress_length == 0)
+ return DB_CORRUPTION;
+ }
+
+ return DB_SUCCESS;
+}
+
+static size_t get_buf_size()
+{
+ return srv_page_size + (
+ provider_service_lzo->is_loaded ? LZO1X_1_15_MEM_COMPRESS :
+ provider_service_snappy->is_loaded ? snappy_max_compressed_length(srv_page_size) :
+ 0
+ );
+}
+
+/* find, parse instant metadata, performing variaous checks,
+and apply it to dict_table_t
+@return DB_SUCCESS or some error */
+static dberr_t handle_instant_metadata(dict_table_t *table,
+ const row_import &cfg)
+{
+ dict_get_and_save_data_dir_path(table);
+
+ char *filepath;
+ if (DICT_TF_HAS_DATA_DIR(table->flags))
+ {
+ ut_a(table->data_dir_path);
+ filepath= fil_make_filepath(table->data_dir_path, table->name, IBD, true);
+ }
+ else
+ filepath= fil_make_filepath(nullptr, table->name, IBD, false);
+
+ if (!filepath)
+ return DB_OUT_OF_MEMORY;
+
+ SCOPE_EXIT([filepath]() { ut_free(filepath); });
+
+ bool success;
+ auto file= os_file_create_simple_no_error_handling(
+ innodb_data_file_key, filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, false,
+ &success);
+ if (!success)
+ return DB_IO_ERROR;
+
+ if (os_file_get_size(file) < srv_page_size)
+ return DB_CORRUPTION;
+
+ SCOPE_EXIT([&file]() { os_file_close(file); });
+
+ std::unique_ptr<byte[], decltype(&aligned_free)> first_page(
+ static_cast<byte *>(aligned_malloc(srv_page_size, srv_page_size)),
+ &aligned_free);
+
+ if (dberr_t err= os_file_read(IORequestReadPartial, file, first_page.get(),
+ 0, srv_page_size, nullptr))
+ return err;
+
+ auto space_flags= fsp_header_get_flags(first_page.get());
+
+ if (!fil_space_t::is_valid_flags(space_flags, true))
+ {
+ auto cflags= fsp_flags_convert_from_101(space_flags);
+ if (cflags == UINT32_MAX)
+ return invalid_space_flags(space_flags);
+ space_flags= static_cast<decltype(space_flags)>(cflags);
+ }
+
+ if (!cfg.m_missing)
+ {
+ if (dberr_t err= cfg.match_flags(current_thd))
+ return err;
+ }
+
+ const unsigned zip_size= fil_space_t::zip_size(space_flags);
+ const unsigned physical_size= zip_size ? zip_size : unsigned(srv_page_size);
+ ut_ad(physical_size <= UNIV_PAGE_SIZE_MAX);
+ const uint32_t space_id= page_get_space_id(first_page.get());
+
+ auto *space_crypt= fil_space_read_crypt_data(zip_size, first_page.get());
+ SCOPE_EXIT([&space_crypt]() {
+ if (space_crypt)
+ fil_space_destroy_crypt_data(&space_crypt);
+ });
+
+ std::unique_ptr<byte[], decltype(&aligned_free)> page(
+ static_cast<byte *>(
+ aligned_malloc(UNIV_PAGE_SIZE_MAX, UNIV_PAGE_SIZE_MAX)),
+ &aligned_free);
+
+ if (dberr_t err= os_file_read(
+ IORequestReadPartial, file, page.get(), 3 * physical_size,
+ physical_size, nullptr))
+ return err;
+
+ std::unique_ptr<byte[]> page_compress_buf(new byte[get_buf_size()]);
+
+ if (dberr_t err= decrypt_decompress(space_crypt, space_flags,
+ {page.get(), static_cast<size_t>
+ (physical_size)},
+ space_id, page_compress_buf.get()))
+ return err;
+
+ if (table->supports_instant())
+ {
+ dict_index_t *index= dict_table_get_first_index(table);
+
+ if (!page_is_comp(page.get()) != !dict_table_is_comp(table))
+ {
+ ib_errf(current_thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+ "ROW_FORMAT mismatch");
+ return DB_CORRUPTION;
+ }
+
+ if (btr_cur_instant_root_init(index, page.get()))
+ return DB_CORRUPTION;
+
+ ut_ad(index->n_core_null_bytes != dict_index_t::NO_CORE_NULL_BYTES);
+
+ if (fil_page_get_type(page.get()) == FIL_PAGE_INDEX)
+ {
+ ut_ad(!index->is_instant());
+ return DB_SUCCESS;
+ }
+
+ mem_heap_t *heap= NULL;
+ SCOPE_EXIT([&heap]() {
+ if (heap)
+ mem_heap_free(heap);
+ });
+
+ while (btr_page_get_level(page.get()) != 0)
+ {
+ const rec_t *rec= page_rec_get_next(page_get_infimum_rec(page.get()));
+ if (!rec)
+ return DB_CORRUPTION;
+
+ /* Relax the assertion in rec_init_offsets(). */
+ ut_ad(!index->in_instant_init);
+ ut_d(index->in_instant_init= true);
+ rec_offs *offsets=
+ rec_get_offsets(rec, index, nullptr, 0, ULINT_UNDEFINED, &heap);
+ ut_d(index->in_instant_init= false);
+
+ uint64_t child_page_no= btr_node_ptr_get_child_page_no(rec, offsets);
+
+ if (dberr_t err=
+ os_file_read(IORequestReadPartial, file, page.get(),
+ child_page_no * physical_size, physical_size, nullptr))
+ return err;
+
+ if (dberr_t err= decrypt_decompress(space_crypt, space_flags,
+ {page.get(), static_cast<size_t>
+ (physical_size)}, space_id,
+ page_compress_buf.get()))
+ return err;
+ }
+
+ const auto *rec= page_rec_get_next_const(page_get_infimum_rec(page.get()));
+ const auto comp= dict_table_is_comp(index->table);
+
+ if (!rec || page_rec_is_supremum(rec))
+ {
+ corrupted_metadata:
+ ib::error() << "Table " << index->table->name
+ << " is missing instant ALTER metadata";
+ index->table->corrupted= true;
+ return DB_CORRUPTION;
+ }
+
+ const auto info_bits= rec_get_info_bits(rec, comp);
+ if (!(info_bits & REC_INFO_MIN_REC_FLAG))
+ goto corrupted_metadata;
+
+ if ((info_bits & ~REC_INFO_DELETED_FLAG) != REC_INFO_MIN_REC_FLAG ||
+ (comp && rec_get_status(rec) != REC_STATUS_INSTANT))
+ {
+ incompatible:
+ ib::error() << "Table " << index->table->name
+ << " contains unrecognizable instant ALTER metadata";
+ index->table->corrupted= true;
+ return DB_CORRUPTION;
+ }
+
+ if (info_bits & REC_INFO_DELETED_FLAG)
+ {
+ ulint trx_id_offset= index->trx_id_offset;
+ ut_ad(index->n_uniq);
+
+ if (trx_id_offset)
+ {
+ }
+ else if (index->table->not_redundant())
+ {
+
+ for (uint i= index->n_uniq; i--;)
+ trx_id_offset+= index->fields[i].fixed_len;
+ }
+ else if (rec_get_1byte_offs_flag(rec))
+ {
+ trx_id_offset= rec_1_get_field_end_info(rec, index->n_uniq - 1);
+ ut_ad(!(trx_id_offset & REC_1BYTE_SQL_NULL_MASK));
+ trx_id_offset&= ~REC_1BYTE_SQL_NULL_MASK;
+ }
+ else
+ {
+ trx_id_offset= rec_2_get_field_end_info(rec, index->n_uniq - 1);
+ ut_ad(!(trx_id_offset & REC_2BYTE_SQL_NULL_MASK));
+ trx_id_offset&= ~REC_2BYTE_SQL_NULL_MASK;
+ }
+
+ const byte *ptr=
+ rec + trx_id_offset + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
+
+ if (mach_read_from_4(ptr + BTR_EXTERN_LEN))
+ goto incompatible;
+
+ uint len= mach_read_from_4(ptr + BTR_EXTERN_LEN + 4);
+ if (!len || mach_read_from_4(ptr + BTR_EXTERN_OFFSET) != FIL_PAGE_DATA)
+ goto incompatible;
+
+ std::unique_ptr<byte[], decltype(&aligned_free)>
+ second_page(static_cast<byte*>(aligned_malloc(physical_size,
+ physical_size)),
+ &aligned_free);
+
+ if (dberr_t err=
+ os_file_read(IORequestReadPartial, file, second_page.get(),
+ physical_size *
+ mach_read_from_4(ptr + BTR_EXTERN_PAGE_NO),
+ physical_size, nullptr))
+ return err;
+
+ if (dberr_t err= decrypt_decompress(space_crypt, space_flags,
+ {second_page.get(),
+ static_cast<size_t>(physical_size)},
+ space_id, page_compress_buf.get()))
+ return err;
+
+ if (fil_page_get_type(second_page.get()) != FIL_PAGE_TYPE_BLOB ||
+ mach_read_from_4(
+ &second_page[FIL_PAGE_DATA + BTR_BLOB_HDR_NEXT_PAGE_NO]) !=
+ FIL_NULL ||
+ mach_read_from_4(
+ &second_page[FIL_PAGE_DATA + BTR_BLOB_HDR_PART_LEN]) != len)
+ goto incompatible;
+
+ /* The unused part of the BLOB page should be zero-filled. */
+ for (const byte *
+ b= second_page.get() + (FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE) +
+ len,
+ *const end= second_page.get() + srv_page_size - BTR_EXTERN_LEN;
+ b < end;)
+ {
+ if (*b++)
+ goto incompatible;
+ }
+
+ if (index->table->deserialise_columns(
+ &second_page[FIL_PAGE_DATA + BTR_BLOB_HDR_SIZE], len))
+ goto incompatible;
+ }
+
+ rec_offs *offsets= rec_get_offsets(
+ rec, index, nullptr, index->n_core_fields, ULINT_UNDEFINED, &heap);
+ if (rec_offs_any_default(offsets))
+ {
+ inconsistent:
+ goto incompatible;
+ }
+
+ /* In fact, because we only ever append fields to the metadata
+ record, it is also OK to perform READ UNCOMMITTED and
+ then ignore any extra fields, provided that
+ trx_sys.is_registered(DB_TRX_ID). */
+ if (rec_offs_n_fields(offsets) >
+ ulint(index->n_fields) + !!index->table->instant &&
+ !trx_sys.is_registered(current_trx(),
+ row_get_rec_trx_id(rec, index, offsets)))
+ goto inconsistent;
+
+ for (unsigned i= index->n_core_fields; i < index->n_fields; i++)
+ {
+ dict_col_t *col= index->fields[i].col;
+ const unsigned o= i + !!index->table->instant;
+ ulint len;
+ const byte *data= rec_get_nth_field(rec, offsets, o, &len);
+ ut_ad(!col->is_added());
+ ut_ad(!col->def_val.data);
+ col->def_val.len= len;
+ switch (len) {
+ case UNIV_SQL_NULL:
+ continue;
+ case 0:
+ col->def_val.data= field_ref_zero;
+ continue;
+ }
+ ut_ad(len != UNIV_SQL_DEFAULT);
+ if (!rec_offs_nth_extern(offsets, o))
+ col->def_val.data= mem_heap_dup(index->table->heap, data, len);
+ else if (len < BTR_EXTERN_FIELD_REF_SIZE ||
+ !memcmp(data + len - BTR_EXTERN_FIELD_REF_SIZE, field_ref_zero,
+ BTR_EXTERN_FIELD_REF_SIZE))
+ {
+ col->def_val.len= UNIV_SQL_DEFAULT;
+ goto inconsistent;
+ }
+ else
+ {
+ col->def_val.data= btr_copy_externally_stored_field(
+ &col->def_val.len, data, srv_page_size, len, index->table->heap);
+ }
+ }
+ }
+
+ return DB_SUCCESS;
+}
+
+/**
+Read the contents of the <tablename>.cfg file.
+@return DB_SUCCESS or error code. */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_import_read_cfg(
+/*================*/
+ dict_table_t* table, /*!< in: table */
+ THD* thd, /*!< in: session */
+ row_import& cfg) /*!< out: contents of the .cfg file */
+{
+ dberr_t err;
+ char name[OS_FILE_MAX_PATH];
+
+ cfg.m_table = table;
+
+ srv_get_meta_data_filename(table, name, sizeof(name));
+
+ FILE* file = fopen(name, "rb");
+
+ if (file == NULL) {
+ char msg[BUFSIZ];
+
+ snprintf(msg, sizeof(msg),
+ "Error opening '%s', will attempt to import"
+ " without schema verification", name);
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_READ_ERROR,
+ (ulong) errno, strerror(errno), msg);
+
+ cfg.m_missing = true;
+
+ err = DB_FAIL;
+ } else {
+
+ cfg.m_missing = false;
+
+ err = row_import_read_meta_data(file, thd, cfg);
+ fclose(file);
+ }
+
+ return(err);
+}
+
+/** Update the root page numbers and tablespace ID of a table.
+@param[in,out] trx dictionary transaction
+@param[in,out] table persistent table
+@param[in] reset whether to reset the fields to FIL_NULL
+@return DB_SUCCESS or error code */
+dberr_t
+row_import_update_index_root(trx_t* trx, dict_table_t* table, bool reset)
+{
+ const dict_index_t* index;
+ que_t* graph = 0;
+ dberr_t err = DB_SUCCESS;
+
+ ut_ad(reset || table->space->id == table->space_id);
+
+ static const char sql[] = {
+ "PROCEDURE UPDATE_INDEX_ROOT() IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_INDEXES\n"
+ "SET SPACE = :space,\n"
+ " PAGE_NO = :page,\n"
+ " TYPE = :type\n"
+ "WHERE TABLE_ID = :table_id AND ID = :index_id;\n"
+ "END;\n"};
+
+ table->def_trx_id = trx->id;
+
+ for (index = dict_table_get_first_index(table);
+ index != 0;
+ index = dict_table_get_next_index(index)) {
+
+ pars_info_t* info;
+ ib_uint32_t page;
+ ib_uint32_t space;
+ ib_uint32_t type;
+ index_id_t index_id;
+ table_id_t table_id;
+
+ info = (graph != 0) ? graph->info : pars_info_create();
+
+ mach_write_to_4(
+ reinterpret_cast<byte*>(&type),
+ index->type);
+
+ mach_write_to_4(
+ reinterpret_cast<byte*>(&page),
+ reset ? FIL_NULL : index->page);
+
+ mach_write_to_4(
+ reinterpret_cast<byte*>(&space),
+ reset ? FIL_NULL : index->table->space_id);
+
+ mach_write_to_8(
+ reinterpret_cast<byte*>(&index_id),
+ index->id);
+
+ mach_write_to_8(
+ reinterpret_cast<byte*>(&table_id),
+ table->id);
+
+ /* If we set the corrupt bit during the IMPORT phase then
+ we need to update the system tables. */
+ pars_info_bind_int4_literal(info, "type", &type);
+ pars_info_bind_int4_literal(info, "space", &space);
+ pars_info_bind_int4_literal(info, "page", &page);
+ pars_info_bind_ull_literal(info, "index_id", &index_id);
+ pars_info_bind_ull_literal(info, "table_id", &table_id);
+
+ if (graph == 0) {
+ graph = pars_sql(info, sql);
+ ut_a(graph);
+ graph->trx = trx;
+ }
+
+ que_thr_t* thr;
+
+ ut_a(thr = que_fork_start_command(graph));
+
+ que_run_threads(thr);
+
+ DBUG_EXECUTE_IF("ib_import_internal_error",
+ trx->error_state = DB_ERROR;);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_INTERNAL_ERROR,
+ "While updating the <space, root page"
+ " number> of index %s - %s",
+ index->name(), ut_strerr(err));
+
+ break;
+ }
+ }
+
+ que_graph_free(graph);
+
+ return(err);
+}
+
+/** Callback arg for row_import_set_discarded. */
+struct discard_t {
+ ib_uint32_t flags2; /*!< Value read from column */
+ bool state; /*!< New state of the flag */
+ ulint n_recs; /*!< Number of recs processed */
+};
+
+/******************************************************************//**
+Fetch callback that sets or unsets the DISCARDED tablespace flag in
+SYS_TABLES. The flags is stored in MIX_LEN column.
+@return FALSE if all OK */
+static
+ibool
+row_import_set_discarded(
+/*=====================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: bool set/unset flag */
+{
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ discard_t* discard = static_cast<discard_t*>(user_arg);
+ dfield_t* dfield = que_node_get_val(node->select_list);
+ dtype_t* type = dfield_get_type(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == sizeof(ib_uint32_t));
+
+ ulint flags2 = mach_read_from_4(
+ static_cast<byte*>(dfield_get_data(dfield)));
+
+ if (discard->state) {
+ flags2 |= DICT_TF2_DISCARDED;
+ } else {
+ flags2 &= ~DICT_TF2_DISCARDED;
+ }
+
+ mach_write_to_4(reinterpret_cast<byte*>(&discard->flags2), flags2);
+
+ ++discard->n_recs;
+
+ /* There should be at most one matching record. */
+ ut_a(discard->n_recs == 1);
+
+ return(FALSE);
+}
+
+/** Update the DICT_TF2_DISCARDED flag in SYS_TABLES.MIX_LEN.
+@param[in,out] trx dictionary transaction
+@param[in] table_id table identifier
+@param[in] discarded whether to set or clear the flag
+@return DB_SUCCESS or error code */
+dberr_t row_import_update_discarded_flag(trx_t* trx, table_id_t table_id,
+ bool discarded)
+{
+ pars_info_t* info;
+ discard_t discard;
+
+ static const char sql[] =
+ "PROCEDURE UPDATE_DISCARDED_FLAG() IS\n"
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS\n"
+ " SELECT MIX_LEN"
+ " FROM SYS_TABLES"
+ " WHERE ID = :table_id FOR UPDATE;"
+ "\n"
+ "BEGIN\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "UPDATE SYS_TABLES"
+ " SET MIX_LEN = :flags2"
+ " WHERE ID = :table_id;\n"
+ "CLOSE c;\n"
+ "END;\n";
+
+ discard.n_recs = 0;
+ discard.state = discarded;
+ discard.flags2 = ULINT32_UNDEFINED;
+
+ info = pars_info_create();
+
+ pars_info_add_ull_literal(info, "table_id", table_id);
+ pars_info_bind_int4_literal(info, "flags2", &discard.flags2);
+
+ pars_info_bind_function(
+ info, "my_func", row_import_set_discarded, &discard);
+
+ dberr_t err = que_eval_sql(info, sql, trx);
+
+ ut_a(discard.n_recs == 1);
+ ut_a(discard.flags2 != ULINT32_UNDEFINED);
+
+ return(err);
+}
+
+/** InnoDB writes page by page when there is page compressed
+tablespace involved. It does help to save the disk space when
+punch hole is enabled
+@param iter Tablespace iterator
+@param full_crc32 whether the file is in the full_crc32 format
+@param offset offset of the file to be written
+@param writeptr buffer to be written
+@param n_bytes number of bytes to be written
+@param try_punch_only Try the range punch only because the
+ current range is full of empty pages
+@return DB_SUCCESS */
+static
+dberr_t fil_import_compress_fwrite(const fil_iterator_t &iter,
+ bool full_crc32,
+ os_offset_t offset,
+ const byte *writeptr,
+ ulint n_bytes,
+ bool try_punch_only= false)
+{
+ if (dberr_t err= os_file_punch_hole(iter.file, offset, n_bytes))
+ return err;
+
+ if (try_punch_only)
+ return DB_SUCCESS;
+
+ for (ulint j= 0; j < n_bytes; j+= srv_page_size)
+ {
+ /* Read the original data length from block and
+ safer to read FIL_PAGE_COMPRESSED_SIZE because it
+ is not encrypted*/
+ ulint n_write_bytes= srv_page_size;
+ if (j || offset)
+ {
+ n_write_bytes= mach_read_from_2(writeptr + j + FIL_PAGE_DATA);
+ const unsigned ptype= mach_read_from_2(writeptr + j + FIL_PAGE_TYPE);
+ /* Ignore the empty page */
+ if (ptype == 0 && n_write_bytes == 0)
+ continue;
+ if (full_crc32)
+ n_write_bytes= buf_page_full_crc32_size(writeptr + j,
+ nullptr, nullptr);
+ else
+ {
+ n_write_bytes+= ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ ? FIL_PAGE_DATA + FIL_PAGE_ENCRYPT_COMP_METADATA_LEN
+ : FIL_PAGE_DATA + FIL_PAGE_COMP_METADATA_LEN;
+ }
+ }
+
+ if (dberr_t err= os_file_write(IORequestWrite, iter.filepath, iter.file,
+ writeptr + j, offset + j, n_write_bytes))
+ return err;
+ }
+
+ return DB_SUCCESS;
+}
+
+dberr_t FetchIndexRootPages::run(const fil_iterator_t& iter,
+ buf_block_t* block) UNIV_NOTHROW
+{
+ const unsigned zip_size= fil_space_t::zip_size(m_space_flags);
+ const unsigned size= zip_size ? zip_size : unsigned(srv_page_size);
+ byte* page_compress_buf= static_cast<byte*>(malloc(get_buf_size()));
+ const bool full_crc32 = fil_space_t::full_crc32(m_space_flags);
+ bool skip_checksum_check = false;
+ ut_ad(!srv_read_only_mode);
+
+ if (!page_compress_buf)
+ return DB_OUT_OF_MEMORY;
+
+ const bool encrypted= iter.crypt_data != NULL &&
+ iter.crypt_data->should_encrypt();
+ byte* const readptr= iter.io_buffer;
+ block->page.frame= readptr;
+
+ if (block->page.zip.data)
+ block->page.zip.data= readptr;
+
+ bool page_compressed= false;
+
+ dberr_t err= os_file_read(IORequestReadPartial, iter.file, readptr,
+ 3 * size, size, nullptr);
+ if (err != DB_SUCCESS)
+ {
+ ib::error() << iter.filepath << ": os_file_read() failed";
+ goto func_exit;
+ }
+
+ if (page_get_page_no(readptr) != 3)
+ {
+page_corrupted:
+ ib::warn() << filename() << ": Page 3 at offset "
+ << 3 * size << " looks corrupted.";
+ err= DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ block->page.id_.set_page_no(3);
+ if (full_crc32 && fil_space_t::is_compressed(m_space_flags))
+ page_compressed= buf_page_is_compressed(readptr, m_space_flags);
+ else
+ {
+ switch (fil_page_get_type(readptr)) {
+ case FIL_PAGE_PAGE_COMPRESSED:
+ case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
+ if (block->page.zip.data)
+ goto page_corrupted;
+ page_compressed= true;
+ }
+ }
+
+ if (encrypted)
+ {
+ if (!buf_page_verify_crypt_checksum(readptr, m_space_flags))
+ goto page_corrupted;
+
+ if ((err= fil_space_decrypt(get_space_id(), m_space_flags, iter.crypt_data,
+ readptr, size, readptr)))
+ goto func_exit;
+ }
+
+ /* For full_crc32 format, skip checksum check
+ after decryption. */
+ skip_checksum_check= full_crc32 && encrypted;
+
+ if (page_compressed)
+ {
+ ulint compress_length= fil_page_decompress(page_compress_buf,
+ readptr,
+ m_space_flags);
+ ut_ad(compress_length != srv_page_size);
+ if (compress_length == 0)
+ goto page_corrupted;
+ }
+ else if (!skip_checksum_check
+ && buf_page_is_corrupted(false, readptr, m_space_flags))
+ goto page_corrupted;
+
+ err= this->operator()(block);
+func_exit:
+ free(page_compress_buf);
+ return err;
+}
+
+static dberr_t fil_iterate(
+ const fil_iterator_t& iter,
+ buf_block_t* block,
+ AbstractCallback& callback)
+{
+ os_offset_t offset;
+ const ulint size = callback.physical_size();
+ ulint n_bytes = iter.n_io_buffers * size;
+
+ byte* page_compress_buf= static_cast<byte*>(malloc(get_buf_size()));
+ ut_ad(!srv_read_only_mode);
+
+ if (!page_compress_buf) {
+ return DB_OUT_OF_MEMORY;
+ }
+
+ uint32_t actual_space_id = 0;
+ const bool full_crc32 = fil_space_t::full_crc32(
+ callback.get_space_flags());
+
+ /* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless
+ copying for non-index pages. Unfortunately, it is
+ required by buf_zip_decompress() */
+ dberr_t err = DB_SUCCESS;
+ bool page_compressed = false;
+ bool punch_hole = !my_test_if_thinly_provisioned(iter.file);
+
+ for (offset = iter.start; offset < iter.end; offset += n_bytes) {
+ if (callback.is_interrupted()) {
+ err = DB_INTERRUPTED;
+ goto func_exit;
+ }
+
+ byte* io_buffer = iter.io_buffer;
+ block->page.frame = io_buffer;
+
+ if (block->page.zip.data) {
+ /* Zip IO is done in the compressed page buffer. */
+ io_buffer = block->page.zip.data;
+ }
+
+ /* We have to read the exact number of bytes. Otherwise the
+ InnoDB IO functions croak on failed reads. */
+
+ n_bytes = ulint(ut_min(os_offset_t(n_bytes),
+ iter.end - offset));
+
+ ut_ad(n_bytes > 0);
+ ut_ad(!(n_bytes % size));
+
+ const bool encrypted = iter.crypt_data != NULL
+ && iter.crypt_data->should_encrypt();
+ /* Use additional crypt io buffer if tablespace is encrypted */
+ byte* const readptr = encrypted
+ ? iter.crypt_io_buffer : io_buffer;
+ byte* const writeptr = readptr;
+
+ err = os_file_read(IORequestReadPartial, iter.file, readptr,
+ offset, n_bytes, nullptr);
+ if (err != DB_SUCCESS) {
+ ib::error() << iter.filepath
+ << ": os_file_read() failed";
+ goto func_exit;
+ }
+
+ bool updated = false;
+ os_offset_t page_off = offset;
+ ulint n_pages_read = n_bytes / size;
+ /* This block is not attached to buf_pool */
+ block->page.id_.set_page_no(uint32_t(page_off / size));
+
+ for (ulint i = 0; i < n_pages_read;
+ ++block->page.id_,
+ ++i, page_off += size, block->page.frame += size) {
+ byte* src = readptr + i * size;
+ const ulint page_no = page_get_page_no(src);
+ if (!page_no && block->page.id().page_no()) {
+ if (!buf_is_zeroes(span<const byte>(src,
+ size))) {
+ goto page_corrupted;
+ }
+ /* Proceed to the next page,
+ because this one is all zero. */
+ continue;
+ }
+
+ if (page_no != block->page.id().page_no()) {
+page_corrupted:
+ ib::warn() << callback.filename()
+ << ": Page " << (offset / size)
+ << " at offset " << offset
+ << " looks corrupted.";
+ err = DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ if (block->page.id().page_no() == 0) {
+ actual_space_id = mach_read_from_4(
+ src + FIL_PAGE_SPACE_ID);
+ }
+
+ const uint16_t type = fil_page_get_type(src);
+ page_compressed =
+ (full_crc32
+ && fil_space_t::is_compressed(
+ callback.get_space_flags())
+ && buf_page_is_compressed(
+ src, callback.get_space_flags()))
+ || type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || type == FIL_PAGE_PAGE_COMPRESSED;
+
+ if (page_compressed && block->page.zip.data) {
+ goto page_corrupted;
+ }
+
+ bool decrypted = false;
+ byte* dst = io_buffer + i * size;
+ bool frame_changed = false;
+ uint key_version = buf_page_get_key_version(
+ src, callback.get_space_flags());
+
+ if (!encrypted) {
+ } else if (!key_version) {
+ if (block->page.id().page_no() == 0
+ && block->page.zip.data) {
+ block->page.zip.data = src;
+ frame_changed = true;
+ } else if (!page_compressed
+ && type != FIL_PAGE_TYPE_XDES
+ && !block->page.zip.data) {
+ block->page.frame = src;
+ frame_changed = true;
+ } else {
+ ut_ad(dst != src);
+ memcpy(dst, src, size);
+ }
+ } else {
+ if (!buf_page_verify_crypt_checksum(
+ src, callback.get_space_flags())) {
+ goto page_corrupted;
+ }
+
+ if ((err = fil_space_decrypt(
+ actual_space_id,
+ callback.get_space_flags(),
+ iter.crypt_data, dst,
+ callback.physical_size(),
+ src))) {
+ goto func_exit;
+ }
+
+ decrypted = true;
+ updated = true;
+ }
+
+ /* For full_crc32 format, skip checksum check
+ after decryption. */
+ bool skip_checksum_check = full_crc32 && encrypted;
+
+ /* If the original page is page_compressed, we need
+ to decompress it before adjusting further. */
+ if (page_compressed) {
+ ulint compress_length = fil_page_decompress(
+ page_compress_buf, dst,
+ callback.get_space_flags());
+ ut_ad(compress_length != srv_page_size);
+ if (compress_length == 0) {
+ goto page_corrupted;
+ }
+ updated = true;
+ } else if (!skip_checksum_check
+ && buf_page_is_corrupted(
+ false,
+ encrypted && !frame_changed
+ ? dst : src,
+ callback.get_space_flags())) {
+ goto page_corrupted;
+ }
+
+ if ((err = callback(block)) != DB_SUCCESS) {
+ goto func_exit;
+ } else if (!updated) {
+ updated = !!block->page.frame;
+ }
+
+ /* If tablespace is encrypted we use additional
+ temporary scratch area where pages are read
+ for decrypting readptr == crypt_io_buffer != io_buffer.
+
+ Destination for decryption is a buffer pool block
+ block->page.frame == dst == io_buffer that is updated.
+ Pages that did not require decryption even when
+ tablespace is marked as encrypted are not copied
+ instead block->page.frame is set to src == readptr.
+
+ For encryption we again use temporary scratch area
+ writeptr != io_buffer == dst
+ that is then written to the tablespace
+
+ (1) For normal tables io_buffer == dst == writeptr
+ (2) For only page compressed tables
+ io_buffer == dst == writeptr
+ (3) For encrypted (and page compressed)
+ readptr != io_buffer == dst != writeptr
+ */
+
+ ut_ad(!encrypted && !page_compressed ?
+ src == dst && dst == writeptr + (i * size):1);
+ ut_ad(page_compressed && !encrypted ?
+ src == dst && dst == writeptr + (i * size):1);
+ ut_ad(encrypted ?
+ src != dst && dst != writeptr + (i * size):1);
+
+ /* When tablespace is encrypted or compressed its
+ first page (i.e. page 0) is not encrypted or
+ compressed and there is no need to copy frame. */
+ if (encrypted && block->page.id().page_no() != 0) {
+ byte *local_frame = callback.get_frame(block);
+ ut_ad((writeptr + (i * size)) != local_frame);
+ memcpy((writeptr + (i * size)), local_frame, size);
+ }
+
+ if (frame_changed) {
+ if (block->page.zip.data) {
+ block->page.zip.data = dst;
+ } else {
+ block->page.frame = dst;
+ }
+ }
+
+ src = io_buffer + (i * size);
+
+ if (page_compressed) {
+ updated = true;
+ if (ulint len = fil_page_compress(
+ src,
+ page_compress_buf,
+ callback.get_space_flags(),
+ 512,/* FIXME: proper block size */
+ encrypted)) {
+ /* FIXME: remove memcpy() */
+ memcpy(src, page_compress_buf, len);
+ memset(src + len, 0,
+ srv_page_size - len);
+ }
+ }
+
+ /* Encrypt the page if encryption was used. */
+ if (encrypted && decrypted) {
+ byte *dest = writeptr + i * size;
+
+ byte* tmp = fil_encrypt_buf(
+ iter.crypt_data,
+ block->page.id().space(),
+ block->page.id().page_no(),
+ src, block->zip_size(), dest,
+ full_crc32);
+
+ if (tmp == src) {
+ /* TODO: remove unnecessary memcpy's */
+ ut_ad(dest != src);
+ memcpy(dest, src, size);
+ }
+
+ updated = true;
+ }
+
+ /* Write checksum for the compressed full crc32 page.*/
+ if (full_crc32 && page_compressed) {
+ ut_ad(updated);
+ byte* dest = writeptr + i * size;
+ ut_d(bool comp = false);
+ ut_d(bool corrupt = false);
+ ulint size = buf_page_full_crc32_size(
+ dest,
+#ifdef UNIV_DEBUG
+ &comp, &corrupt
+#else
+ NULL, NULL
+#endif
+ );
+ ut_ad(!comp == (size == srv_page_size));
+ ut_ad(!corrupt);
+ mach_write_to_4(dest + (size - 4),
+ my_crc32c(0, dest, size - 4));
+ }
+ }
+
+ if (page_compressed && punch_hole) {
+ err = fil_import_compress_fwrite(
+ iter, full_crc32, offset, writeptr, n_bytes,
+ !updated);
+
+ if (err != DB_SUCCESS) {
+ punch_hole = false;
+ if (updated) {
+ goto normal_write;
+ }
+ }
+ } else if (updated) {
+normal_write:
+ /* A page was updated in the set, write it back. */
+ err = os_file_write(IORequestWrite,
+ iter.filepath, iter.file,
+ writeptr, offset, n_bytes);
+
+ if (err != DB_SUCCESS) {
+ goto func_exit;
+ }
+ }
+ }
+
+func_exit:
+ free(page_compress_buf);
+ return err;
+}
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return DB_SUCCESS or error code */
+static
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+ dict_table_t* table,
+ ulint n_io_buffers,
+ AbstractCallback& callback)
+{
+ dberr_t err;
+ pfs_os_file_t file;
+ char* filepath;
+
+ ut_a(n_io_buffers > 0);
+ ut_ad(!srv_read_only_mode);
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
+ return(DB_CORRUPTION););
+
+ /* Make sure the data_dir_path is set. */
+ dict_get_and_save_data_dir_path(table);
+
+ ut_ad(!DICT_TF_HAS_DATA_DIR(table->flags) || table->data_dir_path);
+
+ const char *data_dir_path = DICT_TF_HAS_DATA_DIR(table->flags)
+ ? table->data_dir_path : nullptr;
+
+ filepath = fil_make_filepath(data_dir_path,
+ {table->name.m_name,
+ strlen(table->name.m_name)},
+ IBD, data_dir_path != nullptr);
+ if (!filepath) {
+ return(DB_OUT_OF_MEMORY);
+ } else {
+ bool success;
+
+ file = os_file_create_simple_no_error_handling(
+ innodb_data_file_key, filepath,
+ OS_FILE_OPEN, OS_FILE_READ_WRITE, false, &success);
+
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+ ib::error() << "Trying to import a tablespace,"
+ " but could not open the tablespace file "
+ << filepath;
+ ut_free(filepath);
+ return DB_TABLESPACE_NOT_FOUND;
+ } else {
+ err = DB_SUCCESS;
+ }
+ }
+
+ callback.set_file(filepath, file);
+
+ os_offset_t file_size = os_file_get_size(file);
+ ut_a(file_size != (os_offset_t) -1);
+
+ /* Allocate a page to read in the tablespace header, so that we
+ can determine the page size and zip_size (if it is compressed).
+ We allocate an extra page in case it is a compressed table. */
+
+ byte* page = static_cast<byte*>(aligned_malloc(2 * srv_page_size,
+ srv_page_size));
+
+ buf_block_t* block = reinterpret_cast<buf_block_t*>
+ (ut_zalloc_nokey(sizeof *block));
+ block->page.frame = page;
+ block->page.init(buf_page_t::UNFIXED + 1, page_id_t{~0ULL});
+
+ /* Read the first page and determine the page size. */
+
+ err = os_file_read(IORequestReadPartial, file, page, 0, srv_page_size,
+ nullptr);
+
+ if (err == DB_SUCCESS) {
+ err = callback.init(file_size, block);
+ }
+
+ if (err == DB_SUCCESS) {
+ block->page.id_ = page_id_t(callback.get_space_id(), 0);
+ if (ulint zip_size = callback.get_zip_size()) {
+ page_zip_set_size(&block->page.zip, zip_size);
+ /* ROW_FORMAT=COMPRESSED is not optimised for block IO
+ for now. We do the IMPORT page by page. */
+ n_io_buffers = 1;
+ }
+
+ fil_iterator_t iter;
+
+ /* read (optional) crypt data */
+ iter.crypt_data = fil_space_read_crypt_data(
+ callback.get_zip_size(), page);
+
+ /* If tablespace is encrypted, it needs extra buffers */
+ if (iter.crypt_data && n_io_buffers > 1) {
+ /* decrease io buffers so that memory
+ consumption will not double */
+ n_io_buffers /= 2;
+ }
+
+ iter.file = file;
+ iter.start = 0;
+ iter.end = file_size;
+ iter.filepath = filepath;
+ iter.file_size = file_size;
+ iter.n_io_buffers = n_io_buffers;
+
+ /* Add an extra page for compressed page scratch area. */
+ iter.io_buffer = static_cast<byte*>(
+ aligned_malloc((1 + iter.n_io_buffers)
+ << srv_page_size_shift, srv_page_size));
+
+ iter.crypt_io_buffer = iter.crypt_data
+ ? static_cast<byte*>(
+ aligned_malloc((1 + iter.n_io_buffers)
+ << srv_page_size_shift,
+ srv_page_size))
+ : NULL;
+
+ if (block->page.zip.ssize) {
+ ut_ad(iter.n_io_buffers == 1);
+ block->page.frame = iter.io_buffer;
+ block->page.zip.data = block->page.frame
+ + srv_page_size;
+ }
+
+ err = callback.run(iter, block);
+
+ if (iter.crypt_data) {
+ fil_space_destroy_crypt_data(&iter.crypt_data);
+ }
+
+ aligned_free(iter.crypt_io_buffer);
+ aligned_free(iter.io_buffer);
+ }
+
+ if (err == DB_SUCCESS) {
+ ib::info() << "Sync to disk";
+
+ if (!os_file_flush(file)) {
+ ib::info() << "os_file_flush() failed!";
+ err = DB_IO_ERROR;
+ } else {
+ ib::info() << "Sync to disk - done!";
+ }
+ }
+
+ os_file_close(file);
+
+ aligned_free(page);
+ ut_free(filepath);
+ ut_free(block);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary.
+@return error code or DB_SUCCESS */
+dberr_t
+row_import_for_mysql(
+/*=================*/
+ dict_table_t* table, /*!< in/out: table */
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */
+{
+ dberr_t err;
+ ib_uint64_t autoinc = 0;
+ char* filepath = NULL;
+ trx_t* trx = prebuilt->trx;
+
+ /* The caller assured that this is not read_only_mode and that no
+ temorary tablespace is being imported. */
+ ut_ad(!srv_read_only_mode);
+ ut_ad(!table->is_temporary());
+
+ ut_ad(table->space_id);
+ ut_ad(table->space_id < SRV_SPACE_ID_UPPER_BOUND);
+ ut_ad(trx);
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
+ ut_ad(!table->is_readable());
+
+ ibuf_delete_for_discarded_space(table->space_id);
+
+ /* Assign an undo segment for the transaction, so that the
+ transaction will be recovered after a crash. */
+
+ /* TODO: Do not write any undo log for the IMPORT cleanup. */
+ {
+ mtr_t mtr;
+ mtr.start();
+ trx_undo_assign(trx, &err, &mtr);
+ mtr.commit();
+ }
+
+ DBUG_EXECUTE_IF("ib_import_undo_assign_failure",
+ err = DB_TOO_MANY_CONCURRENT_TRXS;);
+
+ if (err == DB_SUCCESS && !trx->has_logged_persistent()) {
+ err = DB_TOO_MANY_CONCURRENT_TRXS;
+ }
+ if (err != DB_SUCCESS) {
+ return row_import_cleanup(prebuilt, err);
+ }
+
+ trx->op_info = "read meta-data file";
+
+ row_import cfg;
+ THD* thd = trx->mysql_thd;
+
+ err = row_import_read_cfg(table, thd, cfg);
+
+ /* Check if the table column definitions match the contents
+ of the config file. */
+
+ if (err == DB_SUCCESS) {
+
+ if (dberr_t err = handle_instant_metadata(table, cfg)) {
+ return row_import_error(prebuilt, err);
+ }
+
+ /* We have a schema file, try and match it with our
+ data dictionary. */
+
+ err = cfg.match_schema(thd);
+
+ /* Update index->page and SYS_INDEXES.PAGE_NO to match the
+ B-tree root page numbers in the tablespace. Use the index
+ name from the .cfg file to find match. */
+
+ if (err == DB_SUCCESS) {
+ cfg.set_root_by_name();
+ autoinc = cfg.m_autoinc;
+ }
+
+ DBUG_EXECUTE_IF("ib_import_set_index_root_failure",
+ err = DB_TOO_MANY_CONCURRENT_TRXS;);
+
+ } else if (cfg.m_missing) {
+ /* We don't have a schema file, we will have to discover
+ the index root pages from the .ibd file and skip the schema
+ matching step. */
+
+ ut_a(err == DB_FAIL);
+
+ cfg.m_zip_size = 0;
+
+ if (UT_LIST_GET_LEN(table->indexes) > 1) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_INTERNAL_ERROR,
+ "Drop all secondary indexes before importing "
+ "table %s when .cfg file is missing.",
+ table->name.m_name);
+ err = DB_ERROR;
+ return row_import_error(prebuilt, err);
+ }
+
+ FetchIndexRootPages fetchIndexRootPages(table, trx);
+
+ err = fil_tablespace_iterate(
+ table, IO_BUFFER_SIZE(srv_page_size),
+ fetchIndexRootPages);
+
+ if (err == DB_SUCCESS) {
+
+ err = fetchIndexRootPages.build_row_import(&cfg);
+
+ /* Update index->page and SYS_INDEXES.PAGE_NO
+ to match the B-tree root page numbers in the
+ tablespace. */
+
+ if (err == DB_SUCCESS) {
+ err = cfg.set_root_by_heuristic();
+
+ if (err == DB_SUCCESS) {
+ err = handle_instant_metadata(table,
+ cfg);
+ }
+ }
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+ return row_import_error(prebuilt, err);
+ }
+
+ trx->op_info = "importing tablespace";
+
+ ib::info() << "Phase I - Update all pages";
+
+ /* Iterate over all the pages and do the sanity checking and
+ the conversion required to import the tablespace. */
+
+ PageConverter converter(&cfg, table->space_id, trx);
+
+ /* Set the IO buffer size in pages. */
+
+ err = fil_tablespace_iterate(
+ table, IO_BUFFER_SIZE(cfg.m_zip_size ? cfg.m_zip_size
+ : srv_page_size), converter);
+
+ DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
+ err = DB_TOO_MANY_CONCURRENT_TRXS;);
+#ifdef BTR_CUR_HASH_ADAPT
+ /* On DISCARD TABLESPACE, we did not drop any adaptive hash
+ index entries. If we replaced the discarded tablespace with a
+ smaller one here, there could still be some adaptive hash
+ index entries that point to cached garbage pages in the buffer
+ pool, because PageConverter::operator() only evicted those
+ pages that were replaced by the imported pages. We must
+ detach any remaining adaptive hash index entries, because the
+ adaptive hash index must be a subset of the table contents;
+ false positives are not tolerated. */
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); index;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+ index = index->clone_if_needed();
+ }
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ if (err != DB_SUCCESS) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name),
+ table->name.m_name);
+
+ if (err != DB_DECRYPTION_FAILED) {
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_INTERNAL_ERROR,
+ "Error importing tablespace for table %s : %s",
+ table_name, ut_strerr(err));
+ }
+
+ return row_import_cleanup(prebuilt, err);
+ }
+
+ /* If the table is stored in a remote tablespace, we need to
+ determine that filepath from the link file and system tables.
+ Find the space ID in SYS_TABLES since this is an ALTER TABLE. */
+ dict_get_and_save_data_dir_path(table);
+
+ ut_ad(!DICT_TF_HAS_DATA_DIR(table->flags) || table->data_dir_path);
+ const char *data_dir_path = DICT_TF_HAS_DATA_DIR(table->flags)
+ ? table->data_dir_path : nullptr;
+ fil_space_t::name_type name{
+ table->name.m_name, strlen(table->name.m_name)};
+
+ filepath = fil_make_filepath(data_dir_path, name, IBD,
+ data_dir_path != nullptr);
+
+ DBUG_EXECUTE_IF(
+ "ib_import_OOM_15",
+ ut_free(filepath);
+ filepath = NULL;
+ );
+
+ if (filepath == NULL) {
+ return row_import_cleanup(prebuilt, DB_OUT_OF_MEMORY);
+ }
+
+ /* Open the tablespace so that we can access via the buffer pool.
+ The tablespace is initially opened as a temporary one, because
+ we will not be writing any redo log for it before we have invoked
+ fil_space_t::set_imported() to declare it a persistent tablespace. */
+
+ table->space = fil_ibd_open(
+ 2, FIL_TYPE_IMPORT, table->space_id,
+ dict_tf_to_fsp_flags(table->flags), name, filepath, &err);
+
+ ut_ad((table->space == NULL) == (err != DB_SUCCESS));
+ DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
+ err = DB_TABLESPACE_NOT_FOUND; table->space = NULL;);
+
+ if (!table->space) {
+ ib_senderrf(thd, IB_LOG_LEVEL_ERROR,
+ ER_GET_ERRMSG,
+ err, ut_strerr(err), filepath);
+ }
+
+ ut_free(filepath);
+
+ if (err == DB_SUCCESS) {
+ err = ibuf_check_bitmap_on_import(trx, table->space);
+ }
+
+ DBUG_EXECUTE_IF("ib_import_check_bitmap_failure", err = DB_CORRUPTION;);
+
+ if (err != DB_SUCCESS) {
+ return row_import_cleanup(prebuilt, err);
+ }
+
+ /* The first index must always be the clustered index. */
+
+ dict_index_t* index = dict_table_get_first_index(table);
+
+ if (!dict_index_is_clust(index)) {
+ return row_import_error(prebuilt, DB_CORRUPTION);
+ }
+
+ /* Update the Btree segment headers for index node and
+ leaf nodes in the root page. Set the new space id. */
+
+ err = btr_root_adjust_on_import(index);
+
+ DBUG_EXECUTE_IF("ib_import_cluster_root_adjust_failure",
+ err = DB_CORRUPTION;);
+
+ if (err != DB_SUCCESS) {
+ return row_import_error(prebuilt, err);
+ } else if (cfg.requires_purge(index->name)) {
+
+ /* Purge any delete-marked records that couldn't be
+ purged during the page conversion phase from the
+ cluster index. */
+
+ IndexPurge purge(trx, index);
+
+ trx->op_info = "cluster: purging delete marked records";
+
+ err = purge.garbage_collect();
+
+ trx->op_info = "";
+ }
+
+ DBUG_EXECUTE_IF("ib_import_cluster_failure", err = DB_CORRUPTION;);
+
+ if (err != DB_SUCCESS) {
+ return row_import_error(prebuilt, err);
+ }
+
+ /* For secondary indexes, purge any records that couldn't be purged
+ during the page conversion phase. */
+
+ err = row_import_adjust_root_pages_of_secondary_indexes(
+ trx, table, cfg);
+
+ DBUG_EXECUTE_IF("ib_import_sec_root_adjust_failure",
+ err = DB_CORRUPTION;);
+
+ if (err != DB_SUCCESS) {
+ return row_import_error(prebuilt, err);
+ }
+
+ /* Ensure that the next available DB_ROW_ID is not smaller than
+ any DB_ROW_ID stored in the table. */
+
+ if (prebuilt->clust_index_was_generated) {
+ row_import_set_sys_max_row_id(prebuilt, table);
+ }
+
+ ib::info() << "Phase III - Flush changes to disk";
+
+ /* Ensure that all pages dirtied during the IMPORT make it to disk.
+ The only dirty pages generated should be from the pessimistic purge
+ of delete marked records that couldn't be purged in Phase I. */
+ while (buf_flush_list_space(prebuilt->table->space));
+
+ for (ulint count = 0; prebuilt->table->space->referenced(); count++) {
+ /* Issue a warning every 10.24 seconds, starting after
+ 2.56 seconds */
+ if ((count & 511) == 128) {
+ ib::warn() << "Waiting for flush to complete on "
+ << prebuilt->table->name;
+ }
+ std::this_thread::sleep_for(std::chrono::milliseconds(20));
+ }
+
+ ib::info() << "Phase IV - Flush complete";
+ prebuilt->table->space->set_imported();
+
+ /* The dictionary latches will be released in in row_import_cleanup()
+ after the transaction commit, for both success and error. */
+
+ row_mysql_lock_data_dictionary(trx);
+
+ /* Update the root pages of the table's indexes. */
+ err = row_import_update_index_root(trx, table, false);
+
+ if (err != DB_SUCCESS) {
+ return row_import_error(prebuilt, err);
+ }
+
+ err = row_import_update_discarded_flag(trx, table->id, false);
+
+ if (err != DB_SUCCESS) {
+ return row_import_error(prebuilt, err);
+ }
+
+ table->file_unreadable = false;
+ table->flags2 &= ~DICT_TF2_DISCARDED & ((1U << DICT_TF2_BITS) - 1);
+
+ /* Set autoinc value read from .cfg file, if one was specified.
+ Otherwise, keep the PAGE_ROOT_AUTO_INC as is. */
+ if (autoinc) {
+ ib::info() << table->name << " autoinc value set to "
+ << autoinc;
+
+ table->autoinc = autoinc--;
+ btr_write_autoinc(dict_table_get_first_index(table), autoinc);
+ }
+
+ return row_import_cleanup(prebuilt, err);
+}