summaryrefslogtreecommitdiffstats
path: root/storage/innobase/fsp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
commita175314c3e5827eb193872241446f2f8f5c9d33c (patch)
treecd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/innobase/fsp
parentInitial commit. (diff)
downloadmariadb-10.5-upstream.tar.xz
mariadb-10.5-upstream.zip
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/fsp')
-rw-r--r--storage/innobase/fsp/fsp0file.cc1043
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc2890
-rw-r--r--storage/innobase/fsp/fsp0space.cc230
-rw-r--r--storage/innobase/fsp/fsp0sysspace.cc994
4 files changed, 5157 insertions, 0 deletions
diff --git a/storage/innobase/fsp/fsp0file.cc b/storage/innobase/fsp/fsp0file.cc
new file mode 100644
index 00000000..57164113
--- /dev/null
+++ b/storage/innobase/fsp/fsp0file.cc
@@ -0,0 +1,1043 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fsp/fsp0file.cc
+Tablespace data file implementation
+
+Created 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#include "fil0fil.h"
+#include "fsp0types.h"
+#include "os0file.h"
+#include "page0page.h"
+#include "srv0start.h"
+
+/** Initialize the name, size and order of this datafile
+@param[in] name tablespace name, will be copied
+@param[in] flags tablespace flags */
+void
+Datafile::init(
+ const char* name,
+ ulint flags)
+{
+ ut_ad(m_name == NULL);
+ ut_ad(name != NULL);
+
+ m_name = mem_strdup(name);
+ m_flags = flags;
+}
+
+/** Release the resources. */
+void
+Datafile::shutdown()
+{
+ close();
+
+ ut_free(m_name);
+ m_name = NULL;
+ free_filepath();
+ free_first_page();
+}
+
+/** Create/open a data file.
+@param[in] read_only_mode if true, then readonly mode checks are enforced.
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::open_or_create(bool read_only_mode)
+{
+ bool success;
+ ut_a(m_filepath != NULL);
+ ut_ad(m_handle == OS_FILE_CLOSED);
+
+ m_handle = os_file_create(
+ innodb_data_file_key, m_filepath, m_open_flags,
+ OS_FILE_NORMAL, OS_DATA_FILE, read_only_mode, &success);
+
+ if (!success) {
+ m_last_os_error = os_file_get_last_error(true);
+ ib::error() << "Cannot open datafile '" << m_filepath << "'";
+ return(DB_CANNOT_OPEN_FILE);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Open a data file in read-only mode to check if it exists so that it
+can be validated.
+@param[in] strict whether to issue error messages
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::open_read_only(bool strict)
+{
+ bool success = false;
+ ut_ad(m_handle == OS_FILE_CLOSED);
+
+ /* This function can be called for file objects that do not need
+ to be opened, which is the case when the m_filepath is NULL */
+ if (m_filepath == NULL) {
+ return(DB_ERROR);
+ }
+
+ set_open_flags(OS_FILE_OPEN);
+ m_handle = os_file_create_simple_no_error_handling(
+ innodb_data_file_key, m_filepath, m_open_flags,
+ OS_FILE_READ_ONLY, true, &success);
+
+ if (success) {
+ m_exists = true;
+ init_file_info();
+
+ return(DB_SUCCESS);
+ }
+
+ if (strict) {
+ m_last_os_error = os_file_get_last_error(true);
+ ib::error() << "Cannot open datafile for read-only: '"
+ << m_filepath << "' OS error: " << m_last_os_error;
+ }
+
+ return(DB_CANNOT_OPEN_FILE);
+}
+
+/** Open a data file in read-write mode during start-up so that
+doublewrite pages can be restored and then it can be validated.*
+@param[in] read_only_mode if true, then readonly mode checks are enforced.
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::open_read_write(bool read_only_mode)
+{
+ bool success = false;
+ ut_ad(m_handle == OS_FILE_CLOSED);
+
+ /* This function can be called for file objects that do not need
+ to be opened, which is the case when the m_filepath is NULL */
+ if (m_filepath == NULL) {
+ return(DB_ERROR);
+ }
+
+ set_open_flags(OS_FILE_OPEN);
+ m_handle = os_file_create_simple_no_error_handling(
+ innodb_data_file_key, m_filepath, m_open_flags,
+ OS_FILE_READ_WRITE, read_only_mode, &success);
+
+ if (!success) {
+ m_last_os_error = os_file_get_last_error(true);
+ ib::error() << "Cannot open datafile for read-write: '"
+ << m_filepath << "'";
+ return(DB_CANNOT_OPEN_FILE);
+ }
+
+ m_exists = true;
+
+ init_file_info();
+
+ return(DB_SUCCESS);
+}
+
+/** Initialize OS specific file info. */
+void
+Datafile::init_file_info()
+{
+#ifdef _WIN32
+ GetFileInformationByHandle((os_file_t)m_handle, &m_file_info);
+#else
+ fstat(m_handle, &m_file_info);
+#endif /* WIN32 */
+}
+
+/** Close a data file.
+@return DB_SUCCESS or error code */
+dberr_t
+Datafile::close()
+{
+ if (m_handle != OS_FILE_CLOSED) {
+ ibool success = os_file_close(m_handle);
+ ut_a(success);
+
+ m_handle = OS_FILE_CLOSED;
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Make a full filepath from a directory path and a filename.
+Prepend the dirpath to filename using the extension given.
+If dirpath is NULL, prepend the default datadir to filepath.
+Store the result in m_filepath.
+@param[in] dirpath directory path
+@param[in] filename filename or filepath
+@param[in] ext filename extension */
+void
+Datafile::make_filepath(
+ const char* dirpath,
+ const char* filename,
+ ib_extention ext)
+{
+ ut_ad(dirpath != NULL || filename != NULL);
+
+ free_filepath();
+
+ m_filepath = fil_make_filepath(dirpath, filename, ext, false);
+
+ ut_ad(m_filepath != NULL);
+
+ set_filename();
+}
+
+/** Set the filepath by duplicating the filepath sent in. This is the
+name of the file with its extension and absolute or relative path.
+@param[in] filepath filepath to set */
+void
+Datafile::set_filepath(const char* filepath)
+{
+ free_filepath();
+ m_filepath = static_cast<char*>(ut_malloc_nokey(strlen(filepath) + 1));
+ ::strcpy(m_filepath, filepath);
+ set_filename();
+}
+
+/** Free the filepath buffer. */
+void
+Datafile::free_filepath()
+{
+ if (m_filepath != NULL) {
+ ut_free(m_filepath);
+ m_filepath = NULL;
+ m_filename = NULL;
+ }
+}
+
+/** Do a quick test if the filepath provided looks the same as this filepath
+byte by byte. If they are two different looking paths to the same file,
+same_as() will be used to show that after the files are opened.
+@param[in] other filepath to compare with
+@retval true if it is the same filename by byte comparison
+@retval false if it looks different */
+bool
+Datafile::same_filepath_as(
+ const char* other) const
+{
+ return(0 == strcmp(m_filepath, other));
+}
+
+/** Test if another opened datafile is the same file as this object.
+@param[in] other Datafile to compare with
+@return true if it is the same file, else false */
+bool
+Datafile::same_as(
+ const Datafile& other) const
+{
+#ifdef _WIN32
+ return(m_file_info.dwVolumeSerialNumber
+ == other.m_file_info.dwVolumeSerialNumber
+ && m_file_info.nFileIndexHigh
+ == other.m_file_info.nFileIndexHigh
+ && m_file_info.nFileIndexLow
+ == other.m_file_info.nFileIndexLow);
+#else
+ return(m_file_info.st_ino == other.m_file_info.st_ino
+ && m_file_info.st_dev == other.m_file_info.st_dev);
+#endif /* WIN32 */
+}
+
+/** Allocate and set the datafile or tablespace name in m_name.
+If a name is provided, use it; else extract a file-per-table
+tablespace name from m_filepath. The value of m_name
+will be freed in the destructor.
+@param[in] name tablespace name if known, NULL if not */
+void
+Datafile::set_name(const char* name)
+{
+ ut_free(m_name);
+
+ if (name != NULL) {
+ m_name = mem_strdup(name);
+ } else {
+ m_name = fil_path_to_space_name(m_filepath);
+ }
+}
+
+/** Reads a few significant fields from the first page of the first
+datafile. The Datafile must already be open.
+@param[in] read_only_mode If true, then readonly mode checks are enforced.
+@return DB_SUCCESS or DB_IO_ERROR if page cannot be read */
+dberr_t
+Datafile::read_first_page(bool read_only_mode)
+{
+ if (m_handle == OS_FILE_CLOSED) {
+
+ dberr_t err = open_or_create(read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ /* Align the memory for a possible read from a raw device */
+
+ m_first_page = static_cast<byte*>(
+ aligned_malloc(UNIV_PAGE_SIZE_MAX, srv_page_size));
+
+ dberr_t err = DB_ERROR;
+ size_t page_size = UNIV_PAGE_SIZE_MAX;
+
+ /* Don't want unnecessary complaints about partial reads. */
+
+ while (page_size >= UNIV_PAGE_SIZE_MIN) {
+
+ ulint n_read = 0;
+
+ err = os_file_read_no_error_handling(
+ IORequestReadPartial, m_handle, m_first_page, 0,
+ page_size, &n_read);
+
+ if (err == DB_IO_ERROR && n_read >= UNIV_PAGE_SIZE_MIN) {
+
+ page_size >>= 1;
+
+ } else if (err == DB_SUCCESS) {
+
+ ut_a(n_read == page_size);
+
+ break;
+
+ } else if (srv_operation == SRV_OPERATION_BACKUP) {
+ break;
+ } else {
+
+ ib::error()
+ << "Cannot read first page of '"
+ << m_filepath << "' "
+ << err;
+ break;
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ if (m_order == 0) {
+ if (memcmp_aligned<2>(FIL_PAGE_SPACE_ID + m_first_page,
+ FSP_HEADER_OFFSET + FSP_SPACE_ID
+ + m_first_page, 4)) {
+ ib::error()
+ << "Inconsistent tablespace ID in "
+ << m_filepath;
+ return DB_CORRUPTION;
+ }
+
+ m_space_id = mach_read_from_4(FIL_PAGE_SPACE_ID
+ + m_first_page);
+ m_flags = fsp_header_get_flags(m_first_page);
+ if (!fil_space_t::is_valid_flags(m_flags, m_space_id)) {
+ ulint cflags = fsp_flags_convert_from_101(m_flags);
+ if (cflags == ULINT_UNDEFINED) {
+ ib::error()
+ << "Invalid flags " << ib::hex(m_flags)
+ << " in " << m_filepath;
+ return(DB_CORRUPTION);
+ } else {
+ m_flags = cflags;
+ }
+ }
+ }
+
+ const size_t physical_size = fil_space_t::physical_size(m_flags);
+
+ if (physical_size > page_size) {
+ ib::error() << "File " << m_filepath
+ << " should be longer than "
+ << page_size << " bytes";
+ return(DB_CORRUPTION);
+ }
+
+ return(err);
+}
+
+/** Free the first page from memory when it is no longer needed. */
+void Datafile::free_first_page()
+{
+ aligned_free(m_first_page);
+ m_first_page= nullptr;
+}
+
+/** Validates the datafile and checks that it conforms with the expected
+space ID and flags. The file should exist and be successfully opened
+in order for this function to validate it.
+@param[in] space_id The expected tablespace ID.
+@param[in] flags The expected tablespace flags.
+@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+m_is_valid is also set true on success, else false. */
+dberr_t
+Datafile::validate_to_dd(ulint space_id, ulint flags)
+{
+ dberr_t err;
+
+ if (!is_open()) {
+ return DB_ERROR;
+ }
+
+ /* Validate this single-table-tablespace with the data dictionary,
+ but do not compare the DATA_DIR flag, in case the tablespace was
+ remotely located. */
+ err = validate_first_page(0);
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ flags &= ~FSP_FLAGS_MEM_MASK;
+
+ /* Make sure the datafile we found matched the space ID.
+ If the datafile is a file-per-table tablespace then also match
+ the row format and zip page size. */
+ if (m_space_id == space_id
+ && (fil_space_t::is_flags_equal(flags, m_flags)
+ || fil_space_t::is_flags_equal(m_flags, flags))) {
+ /* Datafile matches the tablespace expected. */
+ return(DB_SUCCESS);
+ }
+
+ /* else do not use this tablespace. */
+ m_is_valid = false;
+
+ ib::error() << "Refusing to load '" << m_filepath << "' (id="
+ << m_space_id << ", flags=" << ib::hex(m_flags)
+ << "); dictionary contains id="
+ << space_id << ", flags=" << ib::hex(flags);
+
+ return(DB_ERROR);
+}
+
+/** Validates this datafile for the purpose of recovery. The file should
+exist and be successfully opened. We initially open it in read-only mode
+because we just want to read the SpaceID. However, if the first page is
+corrupt and needs to be restored from the doublewrite buffer, we will
+reopen it in write mode and ry to restore that page.
+@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
+m_is_valid is also set true on success, else false. */
+dberr_t
+Datafile::validate_for_recovery()
+{
+ dberr_t err;
+
+ ut_ad(is_open());
+ ut_ad(!srv_read_only_mode);
+
+ err = validate_first_page(0);
+
+ switch (err) {
+ case DB_SUCCESS:
+ case DB_TABLESPACE_EXISTS:
+ break;
+
+ default:
+ /* Re-open the file in read-write mode Attempt to restore
+ page 0 from doublewrite and read the space ID from a survey
+ of the first few pages. */
+ close();
+ err = open_read_write(srv_read_only_mode);
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ err = find_space_id();
+ if (err != DB_SUCCESS || m_space_id == 0) {
+ ib::error() << "Datafile '" << m_filepath << "' is"
+ " corrupted. Cannot determine the space ID from"
+ " the first 64 pages.";
+ return(err);
+ }
+
+ if (restore_from_doublewrite()) {
+ return(DB_CORRUPTION);
+ }
+
+ /* Free the previously read first page and then re-validate. */
+ free_first_page();
+ err = validate_first_page(0);
+ }
+
+ if (err == DB_SUCCESS) {
+ set_name(NULL);
+ }
+
+ return(err);
+}
+
+/** Check the consistency of the first page of a datafile when the
+tablespace is opened. This occurs before the fil_space_t is created
+so the Space ID found here must not already be open.
+m_is_valid is set true on success, else false.
+@param[out] flush_lsn contents of FIL_PAGE_FILE_FLUSH_LSN
+@retval DB_SUCCESS on if the datafile is valid
+@retval DB_CORRUPTION if the datafile is not readable
+@retval DB_TABLESPACE_EXISTS if there is a duplicate space_id */
+dberr_t
+Datafile::validate_first_page(lsn_t* flush_lsn)
+{
+ char* prev_name;
+ char* prev_filepath;
+ const char* error_txt = NULL;
+
+ m_is_valid = true;
+
+ if (m_first_page == NULL
+ && read_first_page(srv_read_only_mode) != DB_SUCCESS) {
+
+ error_txt = "Cannot read first page";
+ } else {
+ ut_ad(m_first_page);
+
+ if (flush_lsn != NULL) {
+
+ *flush_lsn = mach_read_from_8(
+ m_first_page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ }
+ }
+
+ if (error_txt != NULL) {
+err_exit:
+ ib::info() << error_txt << " in datafile: " << m_filepath
+ << ", Space ID:" << m_space_id << ", Flags: "
+ << m_flags;
+ m_is_valid = false;
+ free_first_page();
+ return(DB_CORRUPTION);
+ }
+
+ /* Check if the whole page is blank. */
+ if (!m_space_id && !m_flags) {
+ const byte* b = m_first_page;
+ ulint nonzero_bytes = srv_page_size;
+
+ while (*b == '\0' && --nonzero_bytes != 0) {
+
+ b++;
+ }
+
+ if (nonzero_bytes == 0) {
+ error_txt = "Header page consists of zero bytes";
+ goto err_exit;
+ }
+ }
+
+ if (!fil_space_t::is_valid_flags(m_flags, m_space_id)) {
+ /* Tablespace flags must be valid. */
+ error_txt = "Tablespace flags are invalid";
+ goto err_exit;
+ }
+
+ ulint logical_size = fil_space_t::logical_size(m_flags);
+
+ if (srv_page_size != logical_size) {
+ /* Logical size must be innodb_page_size. */
+ ib::error()
+ << "Data file '" << m_filepath << "' uses page size "
+ << logical_size << ", but the innodb_page_size"
+ " start-up parameter is "
+ << srv_page_size;
+ free_first_page();
+ return(DB_ERROR);
+ }
+
+ if (page_get_page_no(m_first_page) != 0) {
+ /* First page must be number 0 */
+ error_txt = "Header page contains inconsistent data";
+ goto err_exit;
+ }
+
+ if (m_space_id >= SRV_SPACE_ID_UPPER_BOUND) {
+ error_txt = "A bad Space ID was found";
+ goto err_exit;
+ }
+
+ if (buf_page_is_corrupted(false, m_first_page, m_flags)) {
+ /* Look for checksum and other corruptions. */
+ error_txt = "Checksum mismatch";
+ goto err_exit;
+ }
+
+ if (fil_space_read_name_and_filepath(
+ m_space_id, &prev_name, &prev_filepath)) {
+
+ if (0 == strcmp(m_filepath, prev_filepath)) {
+ ut_free(prev_name);
+ ut_free(prev_filepath);
+ return(DB_SUCCESS);
+ }
+
+ /* Make sure the space_id has not already been opened. */
+ ib::error() << "Attempted to open a previously opened"
+ " tablespace. Previous tablespace " << prev_name
+ << " at filepath: " << prev_filepath
+ << " uses space ID: " << m_space_id
+ << ". Cannot open filepath: " << m_filepath
+ << " which uses the same space ID.";
+
+ ut_free(prev_name);
+ ut_free(prev_filepath);
+
+ m_is_valid = false;
+
+ free_first_page();
+
+ return(is_predefined_tablespace(m_space_id)
+ ? DB_CORRUPTION
+ : DB_TABLESPACE_EXISTS);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Determine the space id of the given file descriptor by reading a few
+pages from the beginning of the .ibd file.
+@return DB_SUCCESS if space id was successfully identified, else DB_ERROR. */
+dberr_t
+Datafile::find_space_id()
+{
+ os_offset_t file_size;
+
+ ut_ad(m_handle != OS_FILE_CLOSED);
+
+ file_size = os_file_get_size(m_handle);
+
+ if (file_size == (os_offset_t) -1) {
+ ib::error() << "Could not get file size of datafile '"
+ << m_filepath << "'";
+ return(DB_CORRUPTION);
+ }
+
+ /* Assuming a page size, read the space_id from each page and store it
+ in a map. Find out which space_id is agreed on by majority of the
+ pages. Choose that space_id. */
+ for (ulint page_size = UNIV_ZIP_SIZE_MIN;
+ page_size <= UNIV_PAGE_SIZE_MAX;
+ page_size <<= 1) {
+ /* map[space_id] = count of pages */
+ typedef std::map<
+ ulint,
+ ulint,
+ std::less<ulint>,
+ ut_allocator<std::pair<const ulint, ulint> > >
+ Pages;
+
+ Pages verify;
+ ulint page_count = 64;
+ ulint valid_pages = 0;
+
+ /* Adjust the number of pages to analyze based on file size */
+ while ((page_count * page_size) > file_size) {
+ --page_count;
+ }
+
+ ib::info()
+ << "Page size:" << page_size
+ << ". Pages to analyze:" << page_count;
+
+ byte* page = static_cast<byte*>(
+ aligned_malloc(page_size, page_size));
+
+ ulint fsp_flags;
+ /* provide dummy value if the first os_file_read() fails */
+ switch (srv_checksum_algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
+ fsp_flags = 1U << FSP_FLAGS_FCRC32_POS_MARKER
+ | FSP_FLAGS_FCRC32_PAGE_SSIZE()
+ | innodb_compression_algorithm
+ << FSP_FLAGS_FCRC32_POS_COMPRESSED_ALGO;
+ break;
+ default:
+ fsp_flags = 0;
+ }
+
+ for (ulint j = 0; j < page_count; ++j) {
+ if (os_file_read(IORequestRead, m_handle, page,
+ j * page_size, page_size)) {
+ ib::info()
+ << "READ FAIL: page_no:" << j;
+ continue;
+ }
+
+ if (j == 0) {
+ fsp_flags = mach_read_from_4(
+ page + FSP_HEADER_OFFSET + FSP_SPACE_FLAGS);
+ }
+
+ bool noncompressed_ok = false;
+
+ /* For noncompressed pages, the page size must be
+ equal to srv_page_size. */
+ if (page_size == srv_page_size
+ && !fil_space_t::zip_size(fsp_flags)) {
+ noncompressed_ok = !buf_page_is_corrupted(
+ false, page, fsp_flags);
+ }
+
+ bool compressed_ok = false;
+
+ if (srv_page_size <= UNIV_PAGE_SIZE_DEF
+ && page_size == fil_space_t::zip_size(fsp_flags)) {
+ compressed_ok = !buf_page_is_corrupted(
+ false, page, fsp_flags);
+ }
+
+ if (noncompressed_ok || compressed_ok) {
+
+ ulint space_id = mach_read_from_4(page
+ + FIL_PAGE_SPACE_ID);
+
+ if (space_id > 0) {
+
+ ib::info()
+ << "VALID: space:"
+ << space_id << " page_no:" << j
+ << " page_size:" << page_size;
+
+ ++valid_pages;
+
+ ++verify[space_id];
+ }
+ }
+ }
+
+ aligned_free(page);
+
+ ib::info()
+ << "Page size: " << page_size
+ << ". Possible space_id count:" << verify.size();
+
+ const ulint pages_corrupted = 3;
+
+ for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
+
+ for (Pages::const_iterator it = verify.begin();
+ it != verify.end();
+ ++it) {
+
+ ib::info() << "space_id:" << it->first
+ << ", Number of pages matched: "
+ << it->second << "/" << valid_pages
+ << " (" << page_size << ")";
+
+ if (it->second == (valid_pages - missed)) {
+ ib::info() << "Chosen space:"
+ << it->first;
+
+ m_space_id = it->first;
+ return(DB_SUCCESS);
+ }
+ }
+
+ }
+ }
+
+ return(DB_CORRUPTION);
+}
+
+
+/** Restore the first page of the tablespace from
+the double write buffer.
+@return whether the operation failed */
+bool
+Datafile::restore_from_doublewrite()
+{
+ if (srv_operation != SRV_OPERATION_NORMAL) {
+ return true;
+ }
+
+ /* Find if double write buffer contains page_no of given space id. */
+ const page_id_t page_id(m_space_id, 0);
+ const byte* page = recv_sys.dblwr.find_page(page_id);
+
+ if (!page) {
+ /* If the first page of the given user tablespace is not there
+ in the doublewrite buffer, then the recovery is going to fail
+ now. Hence this is treated as an error. */
+
+ ib::error()
+ << "Corrupted page " << page_id
+ << " of datafile '" << m_filepath
+ << "' could not be found in the doublewrite buffer.";
+
+ return(true);
+ }
+
+ ulint flags = mach_read_from_4(
+ FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
+
+ if (!fil_space_t::is_valid_flags(flags, m_space_id)) {
+ flags = fsp_flags_convert_from_101(flags);
+ /* recv_dblwr_t::validate_page() inside find_page()
+ checked this already. */
+ ut_ad(flags != ULINT_UNDEFINED);
+ /* The flags on the page should be converted later. */
+ }
+
+ ulint physical_size = fil_space_t::physical_size(flags);
+
+ ut_a(page_get_page_no(page) == page_id.page_no());
+
+ ib::info() << "Restoring page " << page_id
+ << " of datafile '" << m_filepath
+ << "' from the doublewrite buffer. Writing "
+ << physical_size << " bytes into file '"
+ << m_filepath << "'";
+
+ return(os_file_write(
+ IORequestWrite,
+ m_filepath, m_handle, page, 0, physical_size)
+ != DB_SUCCESS);
+}
+
+/** Create a link filename based on the contents of m_name,
+open that file, and read the contents into m_filepath.
+@retval DB_SUCCESS if remote linked tablespace file is opened and read.
+@retval DB_CANNOT_OPEN_FILE if the link file does not exist. */
+dberr_t
+RemoteDatafile::open_link_file()
+{
+ if (m_link_filepath == NULL) {
+ m_link_filepath = fil_make_filepath(NULL, name(), ISL, false);
+ }
+
+ m_filepath = read_link_file(m_link_filepath);
+
+ return(m_filepath == NULL ? DB_CANNOT_OPEN_FILE : DB_SUCCESS);
+}
+
+/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
+in read-only mode so that it can be validated.
+@param[in] strict whether to issue error messages
+@return DB_SUCCESS if remote linked tablespace file is found and opened. */
+dberr_t
+RemoteDatafile::open_read_only(bool strict)
+{
+ if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
+ return(DB_ERROR);
+ }
+
+ dberr_t err = Datafile::open_read_only(strict);
+
+ if (err != DB_SUCCESS && strict) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+ ib::error() << "A link file was found named '"
+ << m_link_filepath << "' but the linked tablespace '"
+ << m_filepath << "' could not be opened read-only.";
+ }
+
+ return(err);
+}
+
+/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
+in read-write mode so that it can be restored from doublewrite and validated.
+@param[in] read_only_mode If true, then readonly mode checks are enforced.
+@return DB_SUCCESS if remote linked tablespace file is found and opened. */
+dberr_t
+RemoteDatafile::open_read_write(bool read_only_mode)
+{
+ if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
+ return(DB_ERROR);
+ }
+
+ dberr_t err = Datafile::open_read_write(read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ /* The following call prints an error message */
+ m_last_os_error = os_file_get_last_error(true);
+ ib::error() << "A link file was found named '"
+ << m_link_filepath << "' but the linked data file '"
+ << m_filepath << "' could not be opened for writing.";
+ }
+
+ return(err);
+}
+
+/** Release the resources. */
+void
+RemoteDatafile::shutdown()
+{
+ Datafile::shutdown();
+
+ if (m_link_filepath != 0) {
+ ut_free(m_link_filepath);
+ m_link_filepath = 0;
+ }
+}
+
+/** Creates a new InnoDB Symbolic Link (ISL) file. It is always created
+under the 'datadir' of MySQL. The datadir is the directory of a
+running mysqld program. We can refer to it by simply using the path ".".
+@param[in] name tablespace name
+@param[in] filepath remote filepath of tablespace datafile
+@return DB_SUCCESS or error code */
+dberr_t
+RemoteDatafile::create_link_file(
+ const char* name,
+ const char* filepath)
+{
+ bool success;
+ dberr_t err = DB_SUCCESS;
+ char* link_filepath = NULL;
+ char* prev_filepath = NULL;
+
+ ut_ad(!srv_read_only_mode);
+ ut_ad(0 == strcmp(&filepath[strlen(filepath) - 4], DOT_IBD));
+
+ link_filepath = fil_make_filepath(NULL, name, ISL, false);
+
+ if (link_filepath == NULL) {
+ return(DB_ERROR);
+ }
+
+ prev_filepath = read_link_file(link_filepath);
+ if (prev_filepath) {
+ /* Truncate (starting with MySQL 5.6, probably no
+ longer since MariaDB Server 10.2.19) used to call this
+ with an existing link file which contains the same filepath. */
+ bool same = !strcmp(prev_filepath, filepath);
+ ut_free(prev_filepath);
+ if (same) {
+ ut_free(link_filepath);
+ return(DB_SUCCESS);
+ }
+ }
+
+ /** Check if the file already exists. */
+ FILE* file = NULL;
+ bool exists;
+ os_file_type_t ftype;
+
+ success = os_file_status(link_filepath, &exists, &ftype);
+ ulint error = 0;
+
+ if (success && !exists) {
+
+ file = fopen(link_filepath, "w");
+ if (file == NULL) {
+ /* This call will print its own error message */
+ error = os_file_get_last_error(true);
+ }
+ } else {
+ error = OS_FILE_ALREADY_EXISTS;
+ }
+
+ if (error != 0) {
+
+ ib::error() << "Cannot create file " << link_filepath << ".";
+
+ if (error == OS_FILE_ALREADY_EXISTS) {
+ ib::error() << "The link file: " << link_filepath
+ << " already exists.";
+ err = DB_TABLESPACE_EXISTS;
+
+ } else if (error == OS_FILE_DISK_FULL) {
+ err = DB_OUT_OF_FILE_SPACE;
+
+ } else {
+ err = DB_ERROR;
+ }
+
+ /* file is not open, no need to close it. */
+ ut_free(link_filepath);
+ return(err);
+ }
+
+ ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
+
+ if (rbytes != strlen(filepath)) {
+ error = os_file_get_last_error(true);
+ ib::error() <<
+ "Cannot write link file: "
+ << link_filepath << " filepath: " << filepath;
+ err = DB_ERROR;
+ }
+
+ /* Close the file, we only need it at startup */
+ fclose(file);
+
+ ut_free(link_filepath);
+
+ return(err);
+}
+
+/** Delete an InnoDB Symbolic Link (ISL) file. */
+void
+RemoteDatafile::delete_link_file(void)
+{
+ ut_ad(m_link_filepath != NULL);
+
+ if (m_link_filepath != NULL) {
+ os_file_delete_if_exists(innodb_data_file_key,
+ m_link_filepath, NULL);
+ }
+}
+
+/** Delete an InnoDB Symbolic Link (ISL) file by name.
+@param[in] name tablespace name */
+void
+RemoteDatafile::delete_link_file(
+ const char* name)
+{
+ char* link_filepath = fil_make_filepath(NULL, name, ISL, false);
+
+ if (link_filepath != NULL) {
+ os_file_delete_if_exists(
+ innodb_data_file_key, link_filepath, NULL);
+
+ ut_free(link_filepath);
+ }
+}
+
+/** Read an InnoDB Symbolic Link (ISL) file by name.
+It is always created under the datadir of MySQL.
+For file-per-table tablespaces, the isl file is expected to be
+in a 'database' directory and called 'tablename.isl'.
+The caller must free the memory returned if it is not null.
+@param[in] link_filepath filepath of the ISL file
+@return Filepath of the IBD file read from the ISL file */
+char*
+RemoteDatafile::read_link_file(
+ const char* link_filepath)
+{
+ FILE* file = fopen(link_filepath, "r+b" STR_O_CLOEXEC);
+ if (file == NULL) {
+ return(NULL);
+ }
+
+ char* filepath = static_cast<char*>(ut_malloc_nokey(OS_FILE_MAX_PATH));
+
+ os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
+ fclose(file);
+
+ if (filepath[0] != '\0') {
+ /* Trim whitespace from end of filepath */
+ ulint last_ch = strlen(filepath) - 1;
+ while (last_ch > 4 && filepath[last_ch] <= 0x20) {
+ filepath[last_ch--] = 0x00;
+ }
+ os_normalize_path(filepath);
+ }
+
+ return(filepath);
+}
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
new file mode 100644
index 00000000..3d5a7edd
--- /dev/null
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -0,0 +1,2890 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fsp/fsp0fsp.cc
+File space management
+
+Created 11/29/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fsp0fsp.h"
+#include "buf0buf.h"
+#include "fil0fil.h"
+#include "fil0crypt.h"
+#include "mtr0log.h"
+#include "ut0byte.h"
+#include "page0page.h"
+#include "fut0fut.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "ibuf0ibuf.h"
+#include "btr0btr.h"
+#include "btr0sea.h"
+#include "dict0boot.h"
+#include "log0log.h"
+#include "dict0mem.h"
+#include "fsp0types.h"
+
+// JAN: MySQL 5.7 Encryption
+// #include <my_aes.h>
+
+typedef uint32_t page_no_t;
+
+/** Return an extent to the free list of a space.
+@param[in,out] space tablespace
+@param[in] offset page number in the extent
+@param[in,out] mtr mini-transaction */
+MY_ATTRIBUTE((nonnull))
+static
+void
+fsp_free_extent(
+ fil_space_t* space,
+ page_no_t offset,
+ mtr_t* mtr);
+
+/** Returns the first extent descriptor for a segment.
+We think of the extent lists of the segment catenated in the order
+FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
+@param[in] inode segment inode
+@param[in] space tablespace
+@param[in,out] mtr mini-transaction
+@return the first extent descriptor, or NULL if none */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
+static
+xdes_t*
+fseg_get_first_extent(
+ fseg_inode_t* inode,
+ const fil_space_t* space,
+ mtr_t* mtr);
+
+/** Put new extents to the free list if there are free extents above the free
+limit. If an extent happens to contain an extent descriptor page, the extent
+is put to the FSP_FREE_FRAG list with the page marked as used.
+@param[in] init_space true if this is a single-table tablespace
+and we are only initializing the first extent and the first bitmap pages;
+then we will not allocate more extents
+@param[in,out] space tablespace
+@param[in,out] header tablespace header
+@param[in,out] mtr mini-transaction */
+static ATTRIBUTE_COLD
+void
+fsp_fill_free_list(
+ bool init_space,
+ fil_space_t* space,
+ buf_block_t* header,
+ mtr_t* mtr);
+
+/** Allocates a single free page from a segment.
+This function implements the intelligent allocation strategy which tries to
+minimize file space fragmentation.
+@param[in,out] space tablespace
+@param[in,out] seg_inode segment inode
+@param[in,out] iblock segment inode page
+@param[in] hint hint of which page would be desirable
+@param[in] direction if the new page is needed because of
+an index page split, and records are inserted there in order, into which
+direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
+@param[in,out] mtr mini-transaction
+@param[in,out] init_mtr mtr or another mini-transaction in
+which the page should be initialized.
+@retval NULL if no page could be allocated */
+static
+buf_block_t*
+fseg_alloc_free_page_low(
+ fil_space_t* space,
+ fseg_inode_t* seg_inode,
+ buf_block_t* iblock,
+ uint32_t hint,
+ byte direction,
+#ifdef UNIV_DEBUG
+ bool has_done_reservation,
+ /*!< whether the space has already been reserved */
+#endif /* UNIV_DEBUG */
+ mtr_t* mtr,
+ mtr_t* init_mtr)
+ MY_ATTRIBUTE((warn_unused_result));
+
+/** Get the tablespace header block, SX-latched
+@param[in] space tablespace
+@param[in,out] mtr mini-transaction
+@return pointer to the space header, page x-locked */
+inline buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr)
+{
+ buf_block_t *block= buf_page_get(page_id_t(space->id, 0), space->zip_size(),
+ RW_SX_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ ut_ad(space->id == mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID +
+ block->frame));
+ return block;
+}
+
+/** Set the XDES_FREE_BIT of a page.
+@tparam free desired value of XDES_FREE_BIT
+@param[in] block extent descriptor block
+@param[in,out] descr extent descriptor
+@param[in] offset page offset within the extent
+@param[in,out] mtr mini-transaction */
+template<bool free>
+inline void xdes_set_free(const buf_block_t &block, xdes_t *descr,
+ ulint offset, mtr_t *mtr)
+{
+ ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_SX_FIX |
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(offset < FSP_EXTENT_SIZE);
+ ut_ad(page_align(descr) == block.frame);
+ compile_time_assert(XDES_BITS_PER_PAGE == 2);
+ compile_time_assert(XDES_FREE_BIT == 0);
+ compile_time_assert(XDES_CLEAN_BIT == 1);
+
+ ulint index= XDES_BITS_PER_PAGE * offset;
+ byte *b= &descr[XDES_BITMAP + (index >> 3)];
+ /* xdes_init() should have set all XDES_CLEAN_BIT. */
+ ut_ad(!(~*b & 0xaa));
+ /* Clear or set XDES_FREE_BIT. */
+ byte val= free
+ ? static_cast<byte>(*b | 1 << (index & 7))
+ : static_cast<byte>(*b & ~(1 << (index & 7)));
+ mtr->write<1>(block, b, val);
+}
+
+/**
+Find a free page.
+@param descr extent descriptor
+@param hint page offset to start searching from (towards larger pages)
+@return free page offset
+@retval FIL_NULL if no page is free */
+inline uint32_t xdes_find_free(const xdes_t *descr, uint32_t hint= 0)
+{
+ const uint32_t extent_size= FSP_EXTENT_SIZE;
+ ut_ad(hint < extent_size);
+ for (uint32_t i= hint; i < extent_size; i++)
+ if (xdes_is_free(descr, i))
+ return i;
+ for (uint32_t i= 0; i < hint; i++)
+ if (xdes_is_free(descr, i))
+ return i;
+ return FIL_NULL;
+}
+
+/**
+Determine the number of used pages in a descriptor.
+@param descr file descriptor
+@return number of pages used */
+inline uint32_t xdes_get_n_used(const xdes_t *descr)
+{
+ uint32_t count= 0;
+
+ for (uint32_t i= FSP_EXTENT_SIZE; i--; )
+ if (!xdes_is_free(descr, i))
+ count++;
+
+ return count;
+}
+
+/**
+Determine whether a file extent is full.
+@param descr file descriptor
+@return whether all pages have been allocated */
+inline bool xdes_is_full(const xdes_t *descr)
+{
+ return FSP_EXTENT_SIZE == xdes_get_n_used(descr);
+}
+
+/** Set the state of an extent descriptor.
+@param[in] block extent descriptor block
+@param[in,out] descr extent descriptor
+@param[in] state the state
+@param[in,out] mtr mini-transaction */
+inline void xdes_set_state(const buf_block_t &block, xdes_t *descr,
+ byte state, mtr_t *mtr)
+{
+ ut_ad(descr && mtr);
+ ut_ad(state >= XDES_FREE);
+ ut_ad(state <= XDES_FSEG);
+ ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_SX_FIX |
+ MTR_MEMO_PAGE_X_FIX));
+ ut_ad(page_align(descr) == block.frame);
+ ut_ad(mach_read_from_4(descr + XDES_STATE) <= XDES_FSEG);
+ mtr->write<1>(block, XDES_STATE + 3 + descr, state);
+}
+
+/**********************************************************************//**
+Gets the state of an xdes.
+@return state */
+UNIV_INLINE
+ulint
+xdes_get_state(
+/*===========*/
+ const xdes_t* descr) /*!< in: descriptor */
+{
+ ulint state;
+
+ ut_ad(descr);
+ state = mach_read_from_4(descr + XDES_STATE);
+ ut_ad(state - 1 < XDES_FSEG);
+ return(state);
+}
+
+/**********************************************************************//**
+Inits an extent descriptor to the free and clean state. */
+inline void xdes_init(const buf_block_t &block, xdes_t *descr, mtr_t *mtr)
+{
+ ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_SX_FIX |
+ MTR_MEMO_PAGE_X_FIX));
+ mtr->memset(&block, uint16_t(descr - block.frame) + XDES_BITMAP,
+ XDES_SIZE - XDES_BITMAP, 0xff);
+ xdes_set_state(block, descr, XDES_FREE, mtr);
+}
+
+/** Mark a page used in an extent descriptor.
+@param[in,out] seg_inode segment inode
+@param[in,out] iblock segment inode page
+@param[in] page page number
+@param[in,out] descr extent descriptor
+@param[in,out] xdes extent descriptor page
+@param[in,out] mtr mini-transaction */
+static MY_ATTRIBUTE((nonnull))
+void
+fseg_mark_page_used(fseg_inode_t *seg_inode, buf_block_t *iblock,
+ ulint page, xdes_t *descr, buf_block_t *xdes, mtr_t *mtr)
+{
+ ut_ad(fil_page_get_type(iblock->frame) == FIL_PAGE_INODE);
+ ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+ ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+ ut_ad(!memcmp(seg_inode + FSEG_ID, descr + XDES_ID, 4));
+
+ const uint16_t xoffset= uint16_t(descr - xdes->frame + XDES_FLST_NODE);
+ const uint16_t ioffset= uint16_t(seg_inode - iblock->frame);
+
+ if (!xdes_get_n_used(descr))
+ {
+ /* We move the extent from the free list to the NOT_FULL list */
+ flst_remove(iblock, uint16_t(FSEG_FREE + ioffset), xdes, xoffset, mtr);
+ flst_add_last(iblock, uint16_t(FSEG_NOT_FULL + ioffset),
+ xdes, xoffset, mtr);
+ }
+
+ ut_ad(xdes_is_free(descr, page % FSP_EXTENT_SIZE));
+
+ /* We mark the page as used */
+ xdes_set_free<false>(*xdes, descr, page % FSP_EXTENT_SIZE, mtr);
+
+ byte* p_not_full= seg_inode + FSEG_NOT_FULL_N_USED;
+ const uint32_t not_full_n_used= mach_read_from_4(p_not_full) + 1;
+ mtr->write<4>(*iblock, p_not_full, not_full_n_used);
+ if (xdes_is_full(descr))
+ {
+ /* We move the extent from the NOT_FULL list to the FULL list */
+ flst_remove(iblock, uint16_t(FSEG_NOT_FULL + ioffset), xdes, xoffset, mtr);
+ flst_add_last(iblock, uint16_t(FSEG_FULL + ioffset), xdes, xoffset, mtr);
+ mtr->write<4>(*iblock, seg_inode + FSEG_NOT_FULL_N_USED,
+ not_full_n_used - FSP_EXTENT_SIZE);
+ }
+}
+
+/** Get pointer to a the extent descriptor of a page.
+@param[in,out] sp_header tablespace header page, x-latched
+@param[in] space tablespace
+@param[in] offset page offset
+@param[out] desc_block descriptor block
+@param[in,out] mtr mini-transaction
+@param[in] init_space whether the tablespace is being initialized
+@return pointer to the extent descriptor, NULL if the page does not
+exist in the space or if the offset exceeds free limit */
+UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
+xdes_t*
+xdes_get_descriptor_with_space_hdr(
+ buf_block_t* header,
+ const fil_space_t* space,
+ page_no_t offset,
+ buf_block_t** desc_block,
+ mtr_t* mtr,
+ bool init_space = false)
+{
+ ut_ad(mtr->memo_contains(*space));
+ ut_ad(mtr->memo_contains_flagged(header, MTR_MEMO_PAGE_SX_FIX
+ | MTR_MEMO_PAGE_X_FIX));
+ /* Read free limit and space size */
+ uint32_t limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT
+ + header->frame);
+ uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE
+ + header->frame);
+ ut_ad(limit == space->free_limit
+ || (space->free_limit == 0
+ && (init_space
+ || space->purpose == FIL_TYPE_TEMPORARY
+ || (srv_startup_is_before_trx_rollback_phase
+ && (space->id == TRX_SYS_SPACE
+ || srv_is_undo_tablespace(space->id))))));
+ ut_ad(size == space->size_in_header);
+
+ if ((offset >= size) || (offset >= limit)) {
+ return(NULL);
+ }
+
+ const unsigned zip_size = space->zip_size();
+
+ uint32_t descr_page_no = xdes_calc_descriptor_page(zip_size, offset);
+
+ buf_block_t* block = header;
+
+ if (descr_page_no) {
+ block = buf_page_get(
+ page_id_t(space->id, descr_page_no), zip_size,
+ RW_SX_LATCH, mtr);
+
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ }
+
+ if (desc_block != NULL) {
+ *desc_block = block;
+ }
+
+ return XDES_ARR_OFFSET + XDES_SIZE
+ * xdes_calc_descriptor_index(zip_size, offset)
+ + block->frame;
+}
+
+/** Get the extent descriptor of a page.
+The page where the extent descriptor resides is x-locked. If the page
+offset is equal to the free limit of the space, we will add new
+extents from above the free limit to the space free list, if not free
+limit == space size. This adding is necessary to make the descriptor
+defined, as they are uninitialized above the free limit.
+@param[in] space tablespace
+@param[in] offset page offset; if equal to the free limit, we
+try to add new extents to the space free list
+@param[out] xdes extent descriptor page
+@param[in,out] mtr mini-transaction
+@return the extent descriptor */
+static xdes_t* xdes_get_descriptor(const fil_space_t *space, page_no_t offset,
+ buf_block_t **xdes, mtr_t *mtr)
+{
+ buf_block_t *block= buf_page_get(page_id_t(space->id, 0), space->zip_size(),
+ RW_SX_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ return xdes_get_descriptor_with_space_hdr(block, space, offset, xdes, mtr);
+}
+
+/** Get the extent descriptor of a page.
+The page where the extent descriptor resides is x-locked. If the page
+offset is equal to the free limit of the space, we will add new
+extents from above the free limit to the space free list, if not free
+limit == space size. This adding is necessary to make the descriptor
+defined, as they are uninitialized above the free limit.
+@param[in] space tablespace
+@param[in] page descriptor page offset
+@param[in] offset page offset
+@param[in,out] mtr mini-transaction
+@return the extent descriptor
+@retval NULL if the descriptor is not available */
+MY_ATTRIBUTE((warn_unused_result))
+static
+const xdes_t*
+xdes_get_descriptor_const(
+ const fil_space_t* space,
+ page_no_t page,
+ page_no_t offset,
+ mtr_t* mtr)
+{
+ ut_ad(mtr->memo_contains(space->latch, MTR_MEMO_SX_LOCK));
+ ut_ad(offset < space->free_limit);
+ ut_ad(offset < space->size_in_header);
+
+ const ulint zip_size = space->zip_size();
+
+ if (buf_block_t* block = buf_page_get_gen(page_id_t(space->id, page),
+ zip_size, RW_S_LATCH,
+ nullptr,
+ BUF_GET_POSSIBLY_FREED,
+ __FILE__, __LINE__, mtr)) {
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ if (block->page.status == buf_page_t::FREED) {
+ return nullptr;
+ }
+
+ ut_ad(page != 0 || space->free_limit == mach_read_from_4(
+ FSP_FREE_LIMIT + FSP_HEADER_OFFSET
+ + block->frame));
+ ut_ad(page != 0 || space->size_in_header == mach_read_from_4(
+ FSP_SIZE + FSP_HEADER_OFFSET
+ + block->frame));
+
+ return(block->frame + XDES_ARR_OFFSET + XDES_SIZE
+ * xdes_calc_descriptor_index(zip_size, offset));
+ }
+
+ return(NULL);
+}
+
+/** Get a pointer to the extent descriptor. The page where the
+extent descriptor resides is x-locked.
+@param[in] space tablespace
+@param[in] lst_node file address of the list node
+ contained in the descriptor
+@param[out] block extent descriptor block
+@param[in,out] mtr mini-transaction
+@return pointer to the extent descriptor */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
+UNIV_INLINE
+xdes_t*
+xdes_lst_get_descriptor(
+ const fil_space_t* space,
+ fil_addr_t lst_node,
+ buf_block_t** block,
+ mtr_t* mtr)
+{
+ ut_ad(mtr->memo_contains(*space));
+ return fut_get_ptr(space->id, space->zip_size(),
+ lst_node, RW_SX_LATCH, mtr, block)
+ - XDES_FLST_NODE;
+}
+
+/********************************************************************//**
+Returns page offset of the first page in extent described by a descriptor.
+@return offset of the first page in extent */
+static uint32_t xdes_get_offset(const xdes_t *descr)
+{
+ ut_ad(descr);
+ return page_get_page_no(page_align(descr)) +
+ uint32_t(((page_offset(descr) - XDES_ARR_OFFSET) / XDES_SIZE) *
+ FSP_EXTENT_SIZE);
+}
+
+/** Initialize a file page whose prior contents should be ignored.
+@param[in,out] block buffer pool block */
+void fsp_apply_init_file_page(buf_block_t *block)
+{
+ memset_aligned<UNIV_PAGE_SIZE_MIN>(block->frame, 0, srv_page_size);
+ const page_id_t id(block->page.id());
+
+ mach_write_to_4(block->frame + FIL_PAGE_OFFSET, id.page_no());
+ if (log_sys.is_physical())
+ memset_aligned<8>(block->frame + FIL_PAGE_PREV, 0xff, 8);
+ mach_write_to_4(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, id.space());
+ if (page_zip_des_t* page_zip= buf_block_get_page_zip(block))
+ {
+ memset_aligned<UNIV_ZIP_SIZE_MIN>(page_zip->data, 0,
+ page_zip_get_size(page_zip));
+ static_assert(FIL_PAGE_OFFSET == 4, "compatibility");
+ memcpy_aligned<4>(page_zip->data + FIL_PAGE_OFFSET,
+ block->frame + FIL_PAGE_OFFSET, 4);
+ if (log_sys.is_physical())
+ memset_aligned<8>(page_zip->data + FIL_PAGE_PREV, 0xff, 8);
+ static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2,
+ "not perfect alignment");
+ memcpy_aligned<2>(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4);
+ }
+}
+
+#ifdef UNIV_DEBUG
+/** Assert that the mini-transaction is compatible with
+updating an allocation bitmap page.
+@param[in] mtr mini-transaction */
+void fil_space_t::modify_check(const mtr_t& mtr) const
+{
+ switch (mtr.get_log_mode()) {
+ case MTR_LOG_NONE:
+ /* These modes are only allowed within a non-bitmap page
+ when there is a higher-level redo log record written. */
+ ut_ad(purpose == FIL_TYPE_TABLESPACE
+ || purpose == FIL_TYPE_TEMPORARY);
+ break;
+ case MTR_LOG_NO_REDO:
+ ut_ad(purpose == FIL_TYPE_TEMPORARY
+ || purpose == FIL_TYPE_IMPORT);
+ return;
+ case MTR_LOG_ALL:
+ /* We may only write redo log for a persistent
+ tablespace. */
+ ut_ad(purpose == FIL_TYPE_TABLESPACE);
+ ut_ad(mtr.is_named_space(id));
+ return;
+ }
+
+ ut_ad("invalid log mode" == 0);
+}
+#endif
+
+/**********************************************************************//**
+Writes the space id and flags to a tablespace header. The flags contain
+row type, physical/compressed page size, and logical/uncompressed page
+size of the tablespace. */
+void
+fsp_header_init_fields(
+/*===================*/
+ page_t* page, /*!< in/out: first page in the space */
+ ulint space_id, /*!< in: space id */
+ ulint flags) /*!< in: tablespace flags (FSP_SPACE_FLAGS) */
+{
+ flags &= ~FSP_FLAGS_MEM_MASK;
+ ut_a(fil_space_t::is_valid_flags(flags, space_id));
+
+ mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page,
+ space_id);
+ mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page,
+ flags);
+}
+
+/** Initialize a tablespace header.
+@param[in,out] space tablespace
+@param[in] size current size in blocks
+@param[in,out] mtr mini-transaction */
+void fsp_header_init(fil_space_t* space, uint32_t size, mtr_t* mtr)
+{
+ const page_id_t page_id(space->id, 0);
+ const ulint zip_size = space->zip_size();
+
+ buf_block_t *free_block = buf_LRU_get_free_block(false);
+
+ mtr_x_lock_space(space, mtr);
+
+ buf_block_t* block = buf_page_create(space, 0, zip_size, mtr,
+ free_block);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+
+ if (UNIV_UNLIKELY(block != free_block)) {
+ buf_pool.free_block(free_block);
+ }
+
+ space->size_in_header = size;
+ space->free_len = 0;
+ space->free_limit = 0;
+
+ /* The prior contents of the file page should be ignored */
+
+ fsp_init_file_page(space, block, mtr);
+
+ mtr->write<2>(*block, block->frame + FIL_PAGE_TYPE,
+ FIL_PAGE_TYPE_FSP_HDR);
+
+ mtr->write<4,mtr_t::MAYBE_NOP>(*block, FSP_HEADER_OFFSET + FSP_SPACE_ID
+ + block->frame, space->id);
+ ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_NOT_USED
+ + block->frame));
+ /* recv_sys_t::parse() expects to find a WRITE record that
+ covers all 4 bytes. Therefore, we must specify mtr_t::FORCED
+ in order to avoid optimizing away any unchanged most
+ significant bytes of FSP_SIZE. */
+ mtr->write<4,mtr_t::FORCED>(*block, FSP_HEADER_OFFSET + FSP_SIZE
+ + block->frame, size);
+ ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT
+ + block->frame));
+ if (auto f = space->flags & ~FSP_FLAGS_MEM_MASK) {
+ mtr->write<4,mtr_t::FORCED>(*block,
+ FSP_HEADER_OFFSET + FSP_SPACE_FLAGS
+ + block->frame, f);
+ }
+ ut_ad(0 == mach_read_from_4(FSP_HEADER_OFFSET + FSP_FRAG_N_USED
+ + block->frame));
+
+ flst_init(block, FSP_HEADER_OFFSET + FSP_FREE, mtr);
+ flst_init(block, FSP_HEADER_OFFSET + FSP_FREE_FRAG, mtr);
+ flst_init(block, FSP_HEADER_OFFSET + FSP_FULL_FRAG, mtr);
+ flst_init(block, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL, mtr);
+ flst_init(block, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE, mtr);
+
+ mtr->write<8>(*block, FSP_HEADER_OFFSET + FSP_SEG_ID + block->frame,
+ 1U);
+
+ fsp_fill_free_list(!is_system_tablespace(space->id),
+ space, block, mtr);
+
+ /* Write encryption metadata to page 0 if tablespace is
+ encrypted or encryption is disabled by table option. */
+ if (space->crypt_data &&
+ (space->crypt_data->should_encrypt() ||
+ space->crypt_data->not_encrypted())) {
+ space->crypt_data->write_page0(block, mtr);
+ }
+}
+
+/** Try to extend a single-table tablespace so that a page would fit in the
+data file.
+@param[in,out] space tablespace
+@param[in] page_no page number
+@param[in,out] header tablespace header
+@param[in,out] mtr mini-transaction
+@return true if success */
+static ATTRIBUTE_COLD __attribute__((warn_unused_result))
+bool
+fsp_try_extend_data_file_with_pages(
+ fil_space_t* space,
+ uint32_t page_no,
+ buf_block_t* header,
+ mtr_t* mtr)
+{
+ bool success;
+ ulint size;
+
+ ut_a(!is_system_tablespace(space->id));
+ ut_d(space->modify_check(*mtr));
+
+ size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE + header->frame);
+ ut_ad(size == space->size_in_header);
+
+ ut_a(page_no >= size);
+
+ success = fil_space_extend(space, page_no + 1);
+ /* The size may be less than we wanted if we ran out of disk space. */
+ /* recv_sys_t::parse() expects to find a WRITE record that
+ covers all 4 bytes. Therefore, we must specify mtr_t::FORCED
+ in order to avoid optimizing away any unchanged most
+ significant bytes of FSP_SIZE. */
+ mtr->write<4,mtr_t::FORCED>(*header, FSP_HEADER_OFFSET + FSP_SIZE
+ + header->frame, space->size);
+ space->size_in_header = space->size;
+
+ return(success);
+}
+
+/** Calculate the number of physical pages in an extent for this file.
+@param[in] physical_size page_size of the datafile
+@return number of pages in an extent for this file */
+inline uint32_t fsp_get_extent_size_in_pages(ulint physical_size)
+{
+ return uint32_t((FSP_EXTENT_SIZE << srv_page_size_shift) / physical_size);
+}
+
+
+/** Calculate the number of pages to extend a datafile.
+We extend single-table tablespaces first one extent at a time,
+but 4 at a time for bigger tablespaces. It is not enough to extend always
+by one extent, because we need to add at least one extent to FSP_FREE.
+A single extent descriptor page will track many extents. And the extent
+that uses its extent descriptor page is put onto the FSP_FREE_FRAG list.
+Extents that do not use their extent descriptor page are added to FSP_FREE.
+The physical page size is used to determine how many extents are tracked
+on one extent descriptor page. See xdes_calc_descriptor_page().
+@param[in] physical_size page size in data file
+@param[in] size current number of pages in the datafile
+@return number of pages to extend the file. */
+static uint32_t fsp_get_pages_to_extend_ibd(unsigned physical_size,
+ uint32_t size)
+{
+ uint32_t extent_size = fsp_get_extent_size_in_pages(physical_size);
+ /* The threshold is set at 32MiB except when the physical page
+ size is small enough that it must be done sooner. */
+ uint32_t threshold = std::min(32 * extent_size, physical_size);
+
+ if (size >= threshold) {
+ /* Below in fsp_fill_free_list() we assume
+ that we add at most FSP_FREE_ADD extents at
+ a time */
+ extent_size *= FSP_FREE_ADD;
+ }
+
+ return extent_size;
+}
+
+/** Try to extend the last data file of a tablespace if it is auto-extending.
+@param[in,out] space tablespace
+@param[in,out] header tablespace header
+@param[in,out] mtr mini-transaction
+@return number of pages added
+@retval 0 if the tablespace was not extended */
+ATTRIBUTE_COLD __attribute__((nonnull))
+static
+ulint
+fsp_try_extend_data_file(fil_space_t *space, buf_block_t *header, mtr_t *mtr)
+{
+ const char* OUT_OF_SPACE_MSG =
+ "ran out of space. Please add another file or use"
+ " 'autoextend' for the last file in setting";
+
+ ut_d(space->modify_check(*mtr));
+
+ if (space->id == TRX_SYS_SPACE
+ && !srv_sys_space.can_auto_extend_last_file()) {
+
+ /* We print the error message only once to avoid
+ spamming the error log. Note that we don't need
+ to reset the flag to false as dealing with this
+ error requires server restart. */
+ if (!srv_sys_space.get_tablespace_full_status()) {
+ ib::error() << "The InnoDB system tablespace "
+ << OUT_OF_SPACE_MSG
+ << " innodb_data_file_path.";
+ srv_sys_space.set_tablespace_full_status(true);
+ }
+ return(0);
+ } else if (space->id == SRV_TMP_SPACE_ID
+ && !srv_tmp_space.can_auto_extend_last_file()) {
+
+ /* We print the error message only once to avoid
+ spamming the error log. Note that we don't need
+ to reset the flag to false as dealing with this
+ error requires server restart. */
+ if (!srv_tmp_space.get_tablespace_full_status()) {
+ ib::error() << "The InnoDB temporary tablespace "
+ << OUT_OF_SPACE_MSG
+ << " innodb_temp_data_file_path.";
+ srv_tmp_space.set_tablespace_full_status(true);
+ }
+ return(0);
+ }
+
+ uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE
+ + header->frame);
+ ut_ad(size == space->size_in_header);
+ uint32_t size_increase;
+
+ const unsigned ps = space->physical_size();
+
+ switch (space->id) {
+ case TRX_SYS_SPACE:
+ size_increase = srv_sys_space.get_increment();
+ break;
+ case SRV_TMP_SPACE_ID:
+ size_increase = srv_tmp_space.get_increment();
+ break;
+ default:
+ uint32_t extent_pages = fsp_get_extent_size_in_pages(ps);
+ if (size < extent_pages) {
+ /* Let us first extend the file to extent_size */
+ if (!fsp_try_extend_data_file_with_pages(
+ space, extent_pages - 1, header, mtr)) {
+ return(0);
+ }
+
+ size = extent_pages;
+ }
+
+ size_increase = fsp_get_pages_to_extend_ibd(ps, size);
+ }
+
+ if (size_increase == 0) {
+ return(0);
+ }
+
+ if (!fil_space_extend(space, size + size_increase)) {
+ return(0);
+ }
+
+ /* We ignore any fragments of a full megabyte when storing the size
+ to the space header */
+
+ space->size_in_header = ut_2pow_round(space->size, (1024 * 1024) / ps);
+
+ /* recv_sys_t::parse() expects to find a WRITE record that
+ covers all 4 bytes. Therefore, we must specify mtr_t::FORCED
+ in order to avoid optimizing away any unchanged most
+ significant bytes of FSP_SIZE. */
+ mtr->write<4,mtr_t::FORCED>(*header, FSP_HEADER_OFFSET + FSP_SIZE
+ + header->frame, space->size_in_header);
+
+ return(size_increase);
+}
+
+/** Reset the page type.
+Data files created before MySQL 5.1.48 may contain garbage in FIL_PAGE_TYPE.
+In MySQL 3.23.53, only undo log pages and index pages were tagged.
+Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
+@param[in] block block with invalid FIL_PAGE_TYPE
+@param[in] type expected page type
+@param[in,out] mtr mini-transaction */
+ATTRIBUTE_COLD
+void fil_block_reset_type(const buf_block_t& block, ulint type, mtr_t* mtr)
+{
+ ib::info()
+ << "Resetting invalid page " << block.page.id() << " type "
+ << fil_page_get_type(block.frame) << " to " << type << ".";
+ mtr->write<2>(block, block.frame + FIL_PAGE_TYPE, type);
+}
+
+/** Put new extents to the free list if there are free extents above the free
+limit. If an extent happens to contain an extent descriptor page, the extent
+is put to the FSP_FREE_FRAG list with the page marked as used.
+@param[in] init_space true if this is a single-table tablespace
+and we are only initializing the first extent and the first bitmap pages;
+then we will not allocate more extents
+@param[in,out] space tablespace
+@param[in,out] header tablespace header
+@param[in,out] mtr mini-transaction */
+static
+void
+fsp_fill_free_list(
+ bool init_space,
+ fil_space_t* space,
+ buf_block_t* header,
+ mtr_t* mtr)
+{
+ ut_d(space->modify_check(*mtr));
+
+ /* Check if we can fill free list from above the free list limit */
+ uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE
+ + header->frame);
+ uint32_t limit = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FREE_LIMIT
+ + header->frame);
+
+ ut_ad(size == space->size_in_header);
+ ut_ad(limit == space->free_limit);
+
+ const ulint zip_size = space->zip_size();
+
+ if (size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
+ bool skip_resize = init_space;
+ switch (space->id) {
+ case TRX_SYS_SPACE:
+ skip_resize = !srv_sys_space.can_auto_extend_last_file();
+ break;
+ case SRV_TMP_SPACE_ID:
+ skip_resize = !srv_tmp_space.can_auto_extend_last_file();
+ break;
+ }
+
+ if (!skip_resize) {
+ fsp_try_extend_data_file(space, header, mtr);
+ size = space->size_in_header;
+ }
+ }
+
+ uint32_t count = 0;
+
+ for (uint32_t i = limit, extent_size = FSP_EXTENT_SIZE,
+ physical_size = space->physical_size();
+ (init_space && i < 1)
+ || (i + extent_size <= size && count < FSP_FREE_ADD);
+ i += extent_size) {
+ const bool init_xdes = !ut_2pow_remainder(i, physical_size);
+
+ space->free_limit = i + extent_size;
+ mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FREE_LIMIT
+ + header->frame, i + extent_size);
+
+ if (init_xdes) {
+
+ buf_block_t* block;
+
+ /* We are going to initialize a new descriptor page
+ and a new ibuf bitmap page: the prior contents of the
+ pages should be ignored. */
+
+ if (i > 0) {
+ buf_block_t *f= buf_LRU_get_free_block(false);
+ block= buf_page_create(
+ space, static_cast<uint32_t>(i),
+ zip_size, mtr, f);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ if (UNIV_UNLIKELY(block != f)) {
+ buf_pool.free_block(f);
+ }
+ fsp_init_file_page(space, block, mtr);
+ mtr->write<2>(*block,
+ FIL_PAGE_TYPE + block->frame,
+ FIL_PAGE_TYPE_XDES);
+ }
+
+ if (space->purpose != FIL_TYPE_TEMPORARY) {
+ buf_block_t *f= buf_LRU_get_free_block(false);
+ block = buf_page_create(
+ space,
+ static_cast<uint32_t>(
+ i + FSP_IBUF_BITMAP_OFFSET),
+ zip_size, mtr, f);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ if (UNIV_UNLIKELY(block != f)) {
+ buf_pool.free_block(f);
+ }
+ fsp_init_file_page(space, block, mtr);
+ mtr->write<2>(*block,
+ block->frame + FIL_PAGE_TYPE,
+ FIL_PAGE_IBUF_BITMAP);
+ }
+ }
+
+ buf_block_t* xdes;
+ xdes_t* descr = xdes_get_descriptor_with_space_hdr(
+ header, space, i, &xdes, mtr, init_space);
+ if (xdes != header && !space->full_crc32()) {
+ fil_block_check_type(*xdes, FIL_PAGE_TYPE_XDES, mtr);
+ }
+ xdes_init(*xdes, descr, mtr);
+ const uint16_t xoffset= static_cast<uint16_t>(
+ descr - xdes->frame + XDES_FLST_NODE);
+
+ if (UNIV_UNLIKELY(init_xdes)) {
+
+ /* The first page in the extent is a descriptor page
+ and the second is an ibuf bitmap page: mark them
+ used */
+
+ xdes_set_free<false>(*xdes, descr, 0, mtr);
+ xdes_set_free<false>(*xdes, descr,
+ FSP_IBUF_BITMAP_OFFSET, mtr);
+ xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr);
+
+ flst_add_last(header,
+ FSP_HEADER_OFFSET + FSP_FREE_FRAG,
+ xdes, xoffset, mtr);
+ byte* n_used = FSP_HEADER_OFFSET + FSP_FRAG_N_USED
+ + header->frame;
+ mtr->write<4>(*header, n_used,
+ 2U + mach_read_from_4(n_used));
+ } else {
+ flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE,
+ xdes, xoffset, mtr);
+ count++;
+ }
+ }
+
+ space->free_len += count;
+}
+
+/** Allocates a new free extent.
+@param[in,out] space tablespace
+@param[in] hint hint of which extent would be desirable: any
+page offset in the extent goes; the hint must not be > FSP_FREE_LIMIT
+@param[out] xdes extent descriptor page
+@param[in,out] mtr mini-transaction
+@return extent descriptor, NULL if cannot be allocated */
+static
+xdes_t*
+fsp_alloc_free_extent(
+ fil_space_t* space,
+ uint32_t hint,
+ buf_block_t** xdes,
+ mtr_t* mtr)
+{
+ fil_addr_t first;
+ xdes_t* descr;
+ buf_block_t* desc_block = NULL;
+
+ buf_block_t* header = fsp_get_header(space, mtr);
+
+ descr = xdes_get_descriptor_with_space_hdr(
+ header, space, hint, &desc_block, mtr);
+
+ if (desc_block != header && !space->full_crc32()) {
+ fil_block_check_type(*desc_block, FIL_PAGE_TYPE_XDES, mtr);
+ }
+
+ if (descr && (xdes_get_state(descr) == XDES_FREE)) {
+ /* Ok, we can take this extent */
+ } else {
+ /* Take the first extent in the free list */
+ first = flst_get_first(FSP_HEADER_OFFSET + FSP_FREE
+ + header->frame);
+
+ if (first.page == FIL_NULL) {
+ fsp_fill_free_list(false, space, header, mtr);
+
+ first = flst_get_first(FSP_HEADER_OFFSET + FSP_FREE
+ + header->frame);
+ if (first.page == FIL_NULL) {
+ return nullptr; /* No free extents left */
+ }
+ }
+
+ descr = xdes_lst_get_descriptor(space, first, &desc_block,
+ mtr);
+ }
+
+ flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE, desc_block,
+ static_cast<uint16_t>(
+ descr - desc_block->frame + XDES_FLST_NODE), mtr);
+ space->free_len--;
+ *xdes = desc_block;
+
+ return(descr);
+}
+
+/** Allocate a single free page.
+@param[in,out] header tablespace header
+@param[in,out] xdes extent descriptor page
+@param[in,out] descr extent descriptor
+@param[in] bit slot to allocate in the extent
+@param[in,out] mtr mini-transaction */
+static void
+fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr,
+ ulint bit, mtr_t *mtr)
+{
+ ut_ad(xdes_get_state(descr) == XDES_FREE_FRAG);
+ ut_a(xdes_is_free(descr, bit));
+ xdes_set_free<false>(*xdes, descr, bit, mtr);
+
+ /* Update the FRAG_N_USED field */
+ byte* n_used_p = FSP_HEADER_OFFSET + FSP_FRAG_N_USED + header->frame;
+
+ uint32_t n_used = mach_read_from_4(n_used_p) + 1;
+
+ if (xdes_is_full(descr)) {
+ /* The fragment is full: move it to another list */
+ const uint16_t xoffset= static_cast<uint16_t>(
+ descr - xdes->frame + XDES_FLST_NODE);
+ flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG,
+ xdes, xoffset, mtr);
+ xdes_set_state(*xdes, descr, XDES_FULL_FRAG, mtr);
+
+ flst_add_last(header, FSP_HEADER_OFFSET + FSP_FULL_FRAG,
+ xdes, xoffset, mtr);
+ n_used -= FSP_EXTENT_SIZE;
+ }
+
+ mtr->write<4>(*header, n_used_p, n_used);
+}
+
+/** Gets a buffer block for an allocated page.
+@param[in,out] space tablespace
+@param[in] offset page number of the allocated page
+@param[in,out] mtr mini-transaction
+@return block, initialized */
+static
+buf_block_t*
+fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr)
+{
+ buf_block_t *free_block= buf_LRU_get_free_block(false);
+ buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(offset),
+ space->zip_size(), mtr, free_block);
+ if (UNIV_UNLIKELY(block != free_block))
+ buf_pool.free_block(free_block);
+ fsp_init_file_page(space, block, mtr);
+ return block;
+}
+
+/** Allocates a single free page from a space.
+The page is marked as used.
+@param[in,out] space tablespace
+@param[in] hint hint of which page would be desirable
+@param[in,out] mtr mini-transaction
+@param[in,out] init_mtr mini-transaction in which the page should be
+initialized (may be the same as mtr)
+@retval NULL if no page could be allocated */
+static MY_ATTRIBUTE((warn_unused_result, nonnull))
+buf_block_t*
+fsp_alloc_free_page(
+ fil_space_t* space,
+ uint32_t hint,
+ mtr_t* mtr,
+ mtr_t* init_mtr)
+{
+ fil_addr_t first;
+ xdes_t* descr;
+ const ulint space_id = space->id;
+
+ ut_d(space->modify_check(*mtr));
+ buf_block_t* block = fsp_get_header(space, mtr);
+ buf_block_t *xdes;
+
+ /* Get the hinted descriptor */
+ descr = xdes_get_descriptor_with_space_hdr(block, space, hint, &xdes,
+ mtr);
+
+ if (descr && (xdes_get_state(descr) == XDES_FREE_FRAG)) {
+ /* Ok, we can take this extent */
+ } else {
+ /* Else take the first extent in free_frag list */
+ first = flst_get_first(FSP_HEADER_OFFSET + FSP_FREE_FRAG
+ + block->frame);
+
+ if (first.page == FIL_NULL) {
+ /* There are no partially full fragments: allocate
+ a free extent and add it to the FREE_FRAG list. NOTE
+ that the allocation may have as a side-effect that an
+ extent containing a descriptor page is added to the
+ FREE_FRAG list. But we will allocate our page from the
+ the free extent anyway. */
+
+ descr = fsp_alloc_free_extent(space, hint, &xdes, mtr);
+
+ if (descr == NULL) {
+ /* No free space left */
+
+ return(NULL);
+ }
+
+ xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr);
+ flst_add_last(block, FSP_HEADER_OFFSET + FSP_FREE_FRAG,
+ xdes, static_cast<uint16_t>(
+ descr - xdes->frame
+ + XDES_FLST_NODE), mtr);
+ } else {
+ descr = xdes_lst_get_descriptor(space, first, &xdes,
+ mtr);
+ }
+
+ /* Reset the hint */
+ hint = 0;
+ }
+
+ /* Now we have in descr an extent with at least one free page. Look
+ for a free page in the extent. */
+
+ uint32_t free = xdes_find_free(descr, hint % FSP_EXTENT_SIZE);
+ if (free == FIL_NULL) {
+
+ ut_print_buf(stderr, ((byte*) descr) - 500, 1000);
+ putc('\n', stderr);
+
+ ut_error;
+ }
+
+ uint32_t page_no = xdes_get_offset(descr) + free;
+
+ uint32_t space_size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE
+ + block->frame);
+ ut_ad(space_size == space->size_in_header
+ || (space_id == TRX_SYS_SPACE
+ && srv_startup_is_before_trx_rollback_phase));
+
+ if (space_size <= page_no) {
+ /* It must be that we are extending a single-table tablespace
+ whose size is still < 64 pages */
+
+ ut_a(!is_system_tablespace(space_id));
+ if (page_no >= FSP_EXTENT_SIZE) {
+ ib::error() << "Trying to extend a single-table"
+ " tablespace " << space->name << " , by single"
+ " page(s) though the space size " << space_size
+ << ". Page no " << page_no << ".";
+ return(NULL);
+ }
+
+ if (!fsp_try_extend_data_file_with_pages(space, page_no,
+ block, mtr)) {
+ /* No disk space left */
+ return(NULL);
+ }
+ }
+
+ fsp_alloc_from_free_frag(block, xdes, descr, free, mtr);
+ return fsp_page_create(space, page_no, init_mtr);
+}
+
+/** Frees a single page of a space.
+The page is marked as free and clean.
+@param[in,out] space tablespace
+@param[in] offset page number
+@param[in,out] mtr mini-transaction */
+static void fsp_free_page(fil_space_t* space, page_no_t offset, mtr_t* mtr)
+{
+ xdes_t* descr;
+ ulint state;
+ ulint frag_n_used;
+
+ ut_ad(mtr);
+ ut_d(space->modify_check(*mtr));
+
+ /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
+
+ buf_block_t* header = fsp_get_header(space, mtr);
+ buf_block_t* xdes= 0;
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, offset,
+ &xdes, mtr);
+
+ state = xdes_get_state(descr);
+
+ if (UNIV_UNLIKELY(state != XDES_FREE_FRAG
+ && state != XDES_FULL_FRAG)) {
+ ib::error() << "File space extent descriptor of page "
+ << page_id_t(space->id, offset)
+ << " has state " << state;
+ /* Crash in debug version, so that we get a core dump
+ of this corruption. */
+ ut_ad(0);
+
+ if (state == XDES_FREE) {
+ /* We put here some fault tolerance: if the page
+ is already free, return without doing anything! */
+
+ return;
+ }
+
+ ut_error;
+ }
+
+ if (xdes_is_free(descr, offset % FSP_EXTENT_SIZE)) {
+ ib::error() << "File space extent descriptor of page "
+ << page_id_t(space->id, offset)
+ << " says it is free.";
+ /* Crash in debug version, so that we get a core dump
+ of this corruption. */
+ ut_ad(0);
+
+ /* We put here some fault tolerance: if the page
+ is already free, return without doing anything! */
+
+ return;
+ }
+
+ mtr->free(*space, static_cast<uint32_t>(offset));
+
+ const ulint bit = offset % FSP_EXTENT_SIZE;
+
+ xdes_set_free<true>(*xdes, descr, bit, mtr);
+
+ frag_n_used = mach_read_from_4(FSP_HEADER_OFFSET + FSP_FRAG_N_USED
+ + header->frame);
+
+ const uint16_t xoffset= static_cast<uint16_t>(descr - xdes->frame
+ + XDES_FLST_NODE);
+
+ if (state == XDES_FULL_FRAG) {
+ /* The fragment was full: move it to another list */
+ flst_remove(header, FSP_HEADER_OFFSET + FSP_FULL_FRAG,
+ xdes, xoffset, mtr);
+ xdes_set_state(*xdes, descr, XDES_FREE_FRAG, mtr);
+ flst_add_last(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG,
+ xdes, xoffset, mtr);
+ mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FRAG_N_USED
+ + header->frame,
+ frag_n_used + FSP_EXTENT_SIZE - 1);
+ } else {
+ ut_a(frag_n_used > 0);
+ mtr->write<4>(*header, FSP_HEADER_OFFSET + FSP_FRAG_N_USED
+ + header->frame, frag_n_used - 1);
+ }
+
+ if (!xdes_get_n_used(descr)) {
+ /* The extent has become free: move it to another list */
+ flst_remove(header, FSP_HEADER_OFFSET + FSP_FREE_FRAG,
+ xdes, xoffset, mtr);
+ fsp_free_extent(space, offset, mtr);
+ }
+}
+
+/** Return an extent to the free list of a space.
+@param[in,out] space tablespace
+@param[in] offset page number in the extent
+@param[in,out] mtr mini-transaction */
+static void fsp_free_extent(fil_space_t* space, page_no_t offset, mtr_t* mtr)
+{
+ ut_ad(mtr->memo_contains(*space));
+
+ buf_block_t *block= fsp_get_header(space, mtr);
+ buf_block_t *xdes= 0;
+
+ xdes_t* descr= xdes_get_descriptor_with_space_hdr(block, space, offset,
+ &xdes, mtr);
+ ut_a(xdes_get_state(descr) != XDES_FREE);
+
+ xdes_init(*xdes, descr, mtr);
+
+ flst_add_last(block, FSP_HEADER_OFFSET + FSP_FREE,
+ xdes, static_cast<uint16_t>(descr - xdes->frame +
+ XDES_FLST_NODE), mtr);
+ space->free_len++;
+}
+
+/** @return Number of segment inodes which fit on a single page */
+inline ulint FSP_SEG_INODES_PER_PAGE(ulint physical_size)
+{
+ return (physical_size - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE;
+}
+
+/** Returns the nth inode slot on an inode page.
+@param[in] page segment inode page
+@param[in] i inode index on page
+@return segment inode */
+#define fsp_seg_inode_page_get_nth_inode(page, i) \
+ FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i + page
+
+/** Looks for a used segment inode on a segment inode page.
+@param[in] page segment inode page
+@param[in] physical_size page size
+@return segment inode index, or ULINT_UNDEFINED if not found */
+static
+ulint
+fsp_seg_inode_page_find_used(const page_t* page, ulint physical_size)
+{
+ for (ulint i = 0; i < FSP_SEG_INODES_PER_PAGE(physical_size); i++) {
+ if (!mach_read_from_8(
+ FSEG_ID
+ + fsp_seg_inode_page_get_nth_inode(page, i))) {
+ continue;
+ }
+ /* This is used */
+ ut_ad(FSEG_MAGIC_N_VALUE == mach_read_from_4(
+ FSEG_MAGIC_N
+ + fsp_seg_inode_page_get_nth_inode(page, i)));
+ return i;
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/** Looks for an unused segment inode on a segment inode page.
+@param[in] page segment inode page
+@param[in] i search forward starting from this index
+@param[in] physical_size page size
+@return segment inode index, or ULINT_UNDEFINED if not found */
+static
+ulint
+fsp_seg_inode_page_find_free(const page_t* page, ulint i, ulint physical_size)
+{
+ for (; i < FSP_SEG_INODES_PER_PAGE(physical_size); i++) {
+ if (!mach_read_from_8(
+ FSEG_ID
+ + fsp_seg_inode_page_get_nth_inode(page, i))) {
+ /* This is unused */
+ return i;
+ }
+
+ ut_ad(FSEG_MAGIC_N_VALUE == mach_read_from_4(
+ FSEG_MAGIC_N
+ + fsp_seg_inode_page_get_nth_inode(page, i)));
+ }
+
+ return ULINT_UNDEFINED;
+}
+
+/** Allocate a file segment inode page.
+@param[in,out] space tablespace
+@param[in,out] header tablespace header
+@param[in,out] mtr mini-transaction
+@return whether the allocation succeeded */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
+static
+bool
+fsp_alloc_seg_inode_page(fil_space_t *space, buf_block_t *header, mtr_t *mtr)
+{
+ ut_ad(header->page.id().space() == space->id);
+ buf_block_t *block= fsp_alloc_free_page(space, 0, mtr, mtr);
+
+ if (!block)
+ return false;
+
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
+
+ mtr->write<2>(*block, block->frame + FIL_PAGE_TYPE, FIL_PAGE_INODE);
+
+#ifdef UNIV_DEBUG
+ const byte *inode= FSEG_ID + FSEG_ARR_OFFSET + block->frame;
+ for (ulint i= FSP_SEG_INODES_PER_PAGE(space->physical_size()); i--;
+ inode += FSEG_INODE_SIZE)
+ ut_ad(!mach_read_from_8(inode));
+#endif
+
+ flst_add_last(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE,
+ block, FSEG_INODE_PAGE_NODE, mtr);
+ return true;
+}
+
+/** Allocate a file segment inode.
+@param[in,out] space tablespace
+@param[in,out] header tablespace header
+@param[out] iblock segment inode page
+@param[in,out] mtr mini-transaction
+@return segment inode
+@retval NULL if not enough space */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
+static fseg_inode_t*
+fsp_alloc_seg_inode(fil_space_t *space, buf_block_t *header,
+ buf_block_t **iblock, mtr_t *mtr)
+{
+ buf_block_t* block;
+ fseg_inode_t* inode;
+
+ /* Allocate a new segment inode page if needed. */
+ if (!flst_get_len(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE
+ + header->frame)
+ && !fsp_alloc_seg_inode_page(space, header, mtr)) {
+ return(NULL);
+ }
+ const page_id_t page_id(
+ space->id,
+ flst_get_first(FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE
+ + header->frame).page);
+
+ block = buf_page_get(page_id, space->zip_size(), RW_SX_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
+ if (!space->full_crc32()) {
+ fil_block_check_type(*block, FIL_PAGE_INODE, mtr);
+ }
+
+ const ulint physical_size = space->physical_size();
+
+ ulint n = fsp_seg_inode_page_find_free(block->frame, 0, physical_size);
+
+ ut_a(n < FSP_SEG_INODES_PER_PAGE(physical_size));
+
+ inode = fsp_seg_inode_page_get_nth_inode(block->frame, n);
+
+ if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(block->frame,
+ n + 1,
+ physical_size)) {
+ /* There are no other unused headers left on the page: move it
+ to another list */
+ flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE,
+ block, FSEG_INODE_PAGE_NODE, mtr);
+ flst_add_last(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL,
+ block, FSEG_INODE_PAGE_NODE, mtr);
+ }
+
+ ut_ad(!mach_read_from_8(inode + FSEG_ID)
+ || mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+ *iblock = block;
+ return(inode);
+}
+
+/** Frees a file segment inode.
+@param[in,out] space tablespace
+@param[in,out] inode segment inode
+@param[in,out] iblock segment inode page
+@param[in,out] mtr mini-transaction */
+static void fsp_free_seg_inode(
+ fil_space_t* space,
+ fseg_inode_t* inode,
+ buf_block_t* iblock,
+ mtr_t* mtr)
+{
+ ut_d(space->modify_check(*mtr));
+
+ buf_block_t* header = fsp_get_header(space, mtr);
+
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+
+ const ulint physical_size = space->physical_size();
+
+ if (ULINT_UNDEFINED
+ == fsp_seg_inode_page_find_free(iblock->frame, 0, physical_size)) {
+ /* Move the page to another list */
+ flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL,
+ iblock, FSEG_INODE_PAGE_NODE, mtr);
+ flst_add_last(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE,
+ iblock, FSEG_INODE_PAGE_NODE, mtr);
+ }
+
+ mtr->memset(iblock, page_offset(inode) + FSEG_ID, FSEG_INODE_SIZE, 0);
+
+ if (ULINT_UNDEFINED
+ == fsp_seg_inode_page_find_used(iblock->frame, physical_size)) {
+ /* There are no other used headers left on the page: free it */
+ flst_remove(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE,
+ iblock, FSEG_INODE_PAGE_NODE, mtr);
+ fsp_free_page(space, iblock->page.id().page_no(), mtr);
+ }
+}
+
+/** Returns the file segment inode, page x-latched.
+@param[in] header segment header
+@param[in] space space id
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@param[in,out] mtr mini-transaction
+@param[out] block inode block, or NULL to ignore
+@return segment inode, page x-latched; NULL if the inode is free */
+static
+fseg_inode_t*
+fseg_inode_try_get(
+ const fseg_header_t* header,
+ ulint space,
+ ulint zip_size,
+ mtr_t* mtr,
+ buf_block_t** block)
+{
+ fil_addr_t inode_addr;
+ fseg_inode_t* inode;
+
+ inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
+ inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
+ ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE));
+
+ inode = fut_get_ptr(space, zip_size, inode_addr, RW_SX_LATCH, mtr,
+ block);
+
+ if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) {
+
+ inode = NULL;
+ } else {
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
+ }
+
+ return(inode);
+}
+
+/** Returns the file segment inode, page x-latched.
+@param[in] header segment header
+@param[in] space space id
+@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
+@param[in,out] mtr mini-transaction
+@param[out] block inode block
+@return segment inode, page x-latched */
+static
+fseg_inode_t*
+fseg_inode_get(
+ const fseg_header_t* header,
+ ulint space,
+ ulint zip_size,
+ mtr_t* mtr,
+ buf_block_t** block = NULL)
+{
+ fseg_inode_t* inode
+ = fseg_inode_try_get(header, space, zip_size, mtr, block);
+ ut_a(inode);
+ return(inode);
+}
+
+/** Get the page number from the nth fragment page slot.
+@param inode file segment findex
+@param n slot index
+@return page number
+@retval FIL_NULL if not in use */
+static uint32_t fseg_get_nth_frag_page_no(const fseg_inode_t *inode, ulint n)
+{
+ ut_ad(inode);
+ ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+ return(mach_read_from_4(inode + FSEG_FRAG_ARR
+ + n * FSEG_FRAG_SLOT_SIZE));
+}
+
+/** Set the page number in the nth fragment page slot.
+@param[in,out] inode segment inode
+@param[in,out] iblock segment inode page
+@param[in] n slot index
+@param[in] page_no page number to set
+@param[in,out] mtr mini-transaction */
+inline void fseg_set_nth_frag_page_no(fseg_inode_t *inode, buf_block_t *iblock,
+ ulint n, ulint page_no, mtr_t *mtr)
+{
+ ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
+ ut_ad(mtr->memo_contains_flagged(iblock, MTR_MEMO_PAGE_SX_FIX));
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+
+ mtr->write<4>(*iblock, inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
+ page_no);
+}
+
+/**********************************************************************//**
+Finds a fragment page slot which is free.
+@return slot index; ULINT_UNDEFINED if none found */
+static
+ulint
+fseg_find_free_frag_page_slot(
+/*==========================*/
+ fseg_inode_t* inode) /*!< in: segment inode */
+{
+ ulint i;
+ ulint page_no;
+
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ page_no = fseg_get_nth_frag_page_no(inode, i);
+
+ if (page_no == FIL_NULL) {
+
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**********************************************************************//**
+Finds a fragment page slot which is used and last in the array.
+@return slot index; ULINT_UNDEFINED if none found */
+static
+ulint
+fseg_find_last_used_frag_page_slot(
+/*===============================*/
+ fseg_inode_t* inode) /*!< in: segment inode */
+{
+ ulint i;
+ ulint page_no;
+
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ page_no = fseg_get_nth_frag_page_no(
+ inode, FSEG_FRAG_ARR_N_SLOTS - i - 1);
+
+ if (page_no != FIL_NULL) {
+
+ return(FSEG_FRAG_ARR_N_SLOTS - i - 1);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/** Calculate reserved fragment page slots.
+@param inode file segment index
+@return number of fragment pages */
+static ulint fseg_get_n_frag_pages(const fseg_inode_t *inode)
+{
+ ulint i;
+ ulint count = 0;
+
+ for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
+ if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i)) {
+ count++;
+ }
+ }
+
+ return(count);
+}
+
+/** Create a new segment.
+@param space tablespace
+@param byte_offset byte offset of the created segment header
+@param mtr mini-transaction
+@param has_done_reservation whether fsp_reserve_free_extents() was invoked
+@param block block where segment header is placed,
+ or NULL to allocate an additional page for that
+@return the block where the segment header is placed, x-latched
+@retval NULL if could not create segment because of lack of space */
+buf_block_t*
+fseg_create(fil_space_t *space, ulint byte_offset, mtr_t *mtr,
+ bool has_done_reservation, buf_block_t *block)
+{
+ fseg_inode_t* inode;
+ ib_id_t seg_id;
+ uint32_t n_reserved;
+
+ DBUG_ENTER("fseg_create");
+
+ ut_ad(mtr);
+ ut_ad(byte_offset >= FIL_PAGE_DATA);
+ ut_ad(byte_offset + FSEG_HEADER_SIZE
+ <= srv_page_size - FIL_PAGE_DATA_END);
+
+ mtr_x_lock_space(space, mtr);
+ ut_d(space->modify_check(*mtr));
+
+ if (block) {
+ ut_ad(block->page.id().space() == space->id);
+
+ if (!space->full_crc32()) {
+ fil_block_check_type(*block, block->page.id()
+ == page_id_t(TRX_SYS_SPACE,
+ TRX_SYS_PAGE_NO)
+ ? FIL_PAGE_TYPE_TRX_SYS
+ : FIL_PAGE_TYPE_SYS,
+ mtr);
+ }
+ }
+
+ if (!has_done_reservation
+ && !fsp_reserve_free_extents(&n_reserved, space, 2,
+ FSP_NORMAL, mtr)) {
+ DBUG_RETURN(NULL);
+ }
+
+ buf_block_t* header = fsp_get_header(space, mtr);
+ buf_block_t* iblock;
+
+ inode = fsp_alloc_seg_inode(space, header, &iblock, mtr);
+
+ if (inode == NULL) {
+ goto funct_exit;
+ }
+
+ /* Read the next segment id from space header and increment the
+ value in space header */
+
+ seg_id = mach_read_from_8(FSP_HEADER_OFFSET + FSP_SEG_ID
+ + header->frame);
+
+ mtr->write<8>(*header, FSP_HEADER_OFFSET + FSP_SEG_ID + header->frame,
+ seg_id + 1);
+ mtr->write<8>(*iblock, inode + FSEG_ID, seg_id);
+ ut_ad(!mach_read_from_4(inode + FSEG_NOT_FULL_N_USED));
+
+ flst_init(*iblock, inode + FSEG_FREE, mtr);
+ flst_init(*iblock, inode + FSEG_NOT_FULL, mtr);
+ flst_init(*iblock, inode + FSEG_FULL, mtr);
+
+ mtr->write<4>(*iblock, inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE);
+ compile_time_assert(FSEG_FRAG_SLOT_SIZE == 4);
+ compile_time_assert(FIL_NULL == 0xffffffff);
+ mtr->memset(iblock, uint16_t(inode - iblock->frame) + FSEG_FRAG_ARR,
+ FSEG_FRAG_SLOT_SIZE * FSEG_FRAG_ARR_N_SLOTS, 0xff);
+
+ if (!block) {
+ block = fseg_alloc_free_page_low(space,
+ inode, iblock, 0, FSP_UP,
+#ifdef UNIV_DEBUG
+ has_done_reservation,
+#endif /* UNIV_DEBUG */
+ mtr, mtr);
+
+ /* The allocation cannot fail if we have already reserved a
+ space for the page. */
+ ut_ad(!has_done_reservation || block != NULL);
+
+ if (block == NULL) {
+ fsp_free_seg_inode(space, inode, iblock, mtr);
+ goto funct_exit;
+ }
+
+ ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
+ ut_ad(!fil_page_get_type(block->frame));
+ mtr->write<1>(*block, FIL_PAGE_TYPE + 1 + block->frame,
+ FIL_PAGE_TYPE_SYS);
+ }
+
+ mtr->write<2>(*block, byte_offset + FSEG_HDR_OFFSET
+ + block->frame, page_offset(inode));
+
+ mtr->write<4>(*block, byte_offset + FSEG_HDR_PAGE_NO
+ + block->frame, iblock->page.id().page_no());
+
+ mtr->write<4,mtr_t::MAYBE_NOP>(*block, byte_offset + FSEG_HDR_SPACE
+ + block->frame, space->id);
+
+funct_exit:
+ if (!has_done_reservation) {
+ space->release_free_extents(n_reserved);
+ }
+
+ DBUG_RETURN(block);
+}
+
+/**********************************************************************//**
+Calculates the number of pages reserved by a segment, and how many pages are
+currently used.
+@return number of reserved pages */
+static
+ulint
+fseg_n_reserved_pages_low(
+/*======================*/
+ const fseg_inode_t* inode, /*!< in: segment inode */
+ ulint* used) /*!< out: number of pages used (not
+ more than reserved) */
+{
+ *used = mach_read_from_4(inode + FSEG_NOT_FULL_N_USED)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL)
+ + fseg_get_n_frag_pages(inode);
+
+ return fseg_get_n_frag_pages(inode)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL)
+ + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL);
+}
+
+/** Calculate the number of pages reserved by a segment,
+and how many pages are currently used.
+@param[in] block buffer block containing the file segment header
+@param[in] header file segment header
+@param[out] used number of pages that are used (not more than reserved)
+@param[in,out] mtr mini-transaction
+@return number of reserved pages */
+ulint fseg_n_reserved_pages(const buf_block_t &block,
+ const fseg_header_t *header, ulint *used,
+ mtr_t *mtr)
+{
+ ut_ad(page_align(header) == block.frame);
+ return fseg_n_reserved_pages_low(fseg_inode_get(header,
+ block.page.id().space(),
+ block.zip_size(), mtr),
+ used);
+}
+
+/** Tries to fill the free list of a segment with consecutive free extents.
+This happens if the segment is big enough to allow extents in the free list,
+the free list is empty, and the extents can be allocated consecutively from
+the hint onward.
+@param[in,out] inode segment inode
+@param[in,out] iblock segment inode page
+@param[in] space tablespace
+@param[in] hint hint which extent would be good as the first extent
+@param[in,out] mtr mini-transaction */
+static
+void
+fseg_fill_free_list(
+ fseg_inode_t* inode,
+ buf_block_t* iblock,
+ fil_space_t* space,
+ uint32_t hint,
+ mtr_t* mtr)
+{
+ xdes_t* descr;
+ ulint i;
+ ib_id_t seg_id;
+ ulint reserved;
+ ulint used;
+
+ ut_ad(inode && mtr);
+ ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+ ut_d(space->modify_check(*mtr));
+
+ reserved = fseg_n_reserved_pages_low(inode, &used);
+
+ if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {
+
+ /* The segment is too small to allow extents in free list */
+
+ return;
+ }
+
+ if (flst_get_len(inode + FSEG_FREE) > 0) {
+ /* Free list is not empty */
+
+ return;
+ }
+
+ for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
+ buf_block_t* xdes;
+ descr = xdes_get_descriptor(space, hint, &xdes, mtr);
+
+ if (!descr || (XDES_FREE != xdes_get_state(descr))) {
+ /* We cannot allocate the desired extent: stop */
+ return;
+ }
+
+ descr = fsp_alloc_free_extent(space, hint, &xdes, mtr);
+
+ xdes_set_state(*xdes, descr, XDES_FSEG, mtr);
+
+ seg_id = mach_read_from_8(inode + FSEG_ID);
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
+ mtr->write<8>(*xdes, descr + XDES_ID, seg_id);
+
+ flst_add_last(iblock,
+ static_cast<uint16_t>(inode - iblock->frame
+ + FSEG_FREE), xdes,
+ static_cast<uint16_t>(descr - xdes->frame
+ + XDES_FLST_NODE), mtr);
+ hint += FSP_EXTENT_SIZE;
+ }
+}
+
+/** Allocates a free extent for the segment: looks first in the free list of
+the segment, then tries to allocate from the space free list.
+NOTE that the extent returned still resides in the segment free list, it is
+not yet taken off it!
+@param[in,out] inode segment inode
+@param[in,out] iblock segment inode page
+@param[out] xdes extent descriptor page
+@param[in,out] space tablespace
+@param[in,out] mtr mini-transaction
+@retval NULL if no page could be allocated */
+static
+xdes_t*
+fseg_alloc_free_extent(
+ fseg_inode_t* inode,
+ buf_block_t* iblock,
+ buf_block_t** xdes,
+ fil_space_t* space,
+ mtr_t* mtr)
+{
+ xdes_t* descr;
+ ib_id_t seg_id;
+ fil_addr_t first;
+
+ ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+ ut_d(space->modify_check(*mtr));
+
+ if (flst_get_len(inode + FSEG_FREE) > 0) {
+ /* Segment free list is not empty, allocate from it */
+
+ first = flst_get_first(inode + FSEG_FREE);
+
+ descr = xdes_lst_get_descriptor(space, first, xdes, mtr);
+ } else {
+ /* Segment free list was empty, allocate from space */
+ descr = fsp_alloc_free_extent(space, 0, xdes, mtr);
+
+ if (descr == NULL) {
+
+ return(NULL);
+ }
+
+ seg_id = mach_read_from_8(inode + FSEG_ID);
+
+ xdes_set_state(**xdes, descr, XDES_FSEG, mtr);
+ mtr->write<8,mtr_t::MAYBE_NOP>(**xdes, descr + XDES_ID,
+ seg_id);
+ flst_add_last(iblock,
+ static_cast<uint16_t>(inode - iblock->frame
+ + FSEG_FREE), *xdes,
+ static_cast<uint16_t>(descr - (*xdes)->frame
+ + XDES_FLST_NODE), mtr);
+
+ /* Try to fill the segment free list */
+ fseg_fill_free_list(inode, iblock, space,
+ xdes_get_offset(descr) + FSP_EXTENT_SIZE,
+ mtr);
+ }
+
+ return(descr);
+}
+
+/** Allocates a single free page from a segment.
+This function implements the intelligent allocation strategy which tries to
+minimize file space fragmentation.
+@param[in,out] space tablespace
+@param[in,out] seg_inode segment inode
+@param[in,out] iblock segment inode page
+@param[in] hint hint of which page would be desirable
+@param[in] direction if the new page is needed because of
+an index page split, and records are inserted there in order, into which
+direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
+@param[in,out] mtr mini-transaction
+@param[in,out] init_mtr mtr or another mini-transaction in
+which the page should be initialized.
+@retval NULL if no page could be allocated */
+static
+buf_block_t*
+fseg_alloc_free_page_low(
+ fil_space_t* space,
+ fseg_inode_t* seg_inode,
+ buf_block_t* iblock,
+ uint32_t hint,
+ byte direction,
+#ifdef UNIV_DEBUG
+ bool has_done_reservation,
+ /*!< whether the space has already been reserved */
+#endif /* UNIV_DEBUG */
+ mtr_t* mtr,
+ mtr_t* init_mtr)
+{
+ ib_id_t seg_id;
+ ulint used;
+ ulint reserved;
+ xdes_t* descr; /*!< extent of the hinted page */
+ uint32_t ret_page; /*!< the allocated page offset, FIL_NULL
+ if could not be allocated */
+ xdes_t* ret_descr; /*!< the extent of the allocated page */
+ buf_block_t* xdes;
+ ulint n;
+ const ulint space_id = space->id;
+
+ ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
+ ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
+ ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+ seg_id = mach_read_from_8(seg_inode + FSEG_ID);
+
+ ut_ad(seg_id);
+ ut_d(space->modify_check(*mtr));
+ ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
+
+ reserved = fseg_n_reserved_pages_low(seg_inode, &used);
+
+ buf_block_t* header = fsp_get_header(space, mtr);
+
+ descr = xdes_get_descriptor_with_space_hdr(header, space, hint,
+ &xdes, mtr);
+ if (descr == NULL) {
+ /* Hint outside space or too high above free limit: reset
+ hint */
+ /* The file space header page is always allocated. */
+ hint = 0;
+ descr = xdes_get_descriptor(space, hint, &xdes, mtr);
+ }
+
+ /* In the big if-else below we look for ret_page and ret_descr */
+ /*-------------------------------------------------------------*/
+ if ((xdes_get_state(descr) == XDES_FSEG)
+ && mach_read_from_8(descr + XDES_ID) == seg_id
+ && xdes_is_free(descr, hint % FSP_EXTENT_SIZE)) {
+take_hinted_page:
+ /* 1. We can take the hinted page
+ =================================*/
+ ret_descr = descr;
+ ret_page = hint;
+ /* Skip the check for extending the tablespace. If the
+ page hint were not within the size of the tablespace,
+ we would have got (descr == NULL) above and reset the hint. */
+ goto got_hinted_page;
+ /*-----------------------------------------------------------*/
+ } else if (xdes_get_state(descr) == XDES_FREE
+ && reserved - used < reserved / FSEG_FILLFACTOR
+ && used >= FSEG_FRAG_LIMIT) {
+
+ /* 2. We allocate the free extent from space and can take
+ =========================================================
+ the hinted page
+ ===============*/
+ ret_descr = fsp_alloc_free_extent(space, hint, &xdes, mtr);
+
+ ut_a(ret_descr == descr);
+
+ xdes_set_state(*xdes, ret_descr, XDES_FSEG, mtr);
+ mtr->write<8,mtr_t::MAYBE_NOP>(*xdes, ret_descr + XDES_ID,
+ seg_id);
+ flst_add_last(iblock,
+ static_cast<uint16_t>(seg_inode - iblock->frame
+ + FSEG_FREE), xdes,
+ static_cast<uint16_t>(ret_descr - xdes->frame
+ + XDES_FLST_NODE), mtr);
+
+ /* Try to fill the segment free list */
+ fseg_fill_free_list(seg_inode, iblock, space,
+ hint + FSP_EXTENT_SIZE, mtr);
+ goto take_hinted_page;
+ /*-----------------------------------------------------------*/
+ } else if ((direction != FSP_NO_DIR)
+ && ((reserved - used) < reserved / FSEG_FILLFACTOR)
+ && (used >= FSEG_FRAG_LIMIT)
+ && !!(ret_descr = fseg_alloc_free_extent(seg_inode, iblock,
+ &xdes, space,
+ mtr))) {
+ /* 3. We take any free extent (which was already assigned above
+ ===============================================================
+ in the if-condition to ret_descr) and take the lowest or
+ ========================================================
+ highest page in it, depending on the direction
+ ==============================================*/
+ ret_page = xdes_get_offset(ret_descr);
+
+ if (direction == FSP_DOWN) {
+ ret_page += FSP_EXTENT_SIZE - 1;
+ }
+ ut_ad(!has_done_reservation || ret_page != FIL_NULL);
+ /*-----------------------------------------------------------*/
+ } else if ((xdes_get_state(descr) == XDES_FSEG)
+ && mach_read_from_8(descr + XDES_ID) == seg_id
+ && (!xdes_is_full(descr))) {
+
+ /* 4. We can take the page from the same extent as the
+ ======================================================
+ hinted page (and the extent already belongs to the
+ ==================================================
+ segment)
+ ========*/
+ ret_descr = descr;
+ ret_page = xdes_find_free(ret_descr, hint % FSP_EXTENT_SIZE);
+ if (ret_page == FIL_NULL) {
+ ut_ad(!has_done_reservation);
+ } else {
+ ret_page += xdes_get_offset(ret_descr);
+ }
+ /*-----------------------------------------------------------*/
+ } else if (reserved - used > 0) {
+ /* 5. We take any unused page from the segment
+ ==============================================*/
+ fil_addr_t first;
+
+ if (flst_get_len(seg_inode + FSEG_NOT_FULL) > 0) {
+ first = flst_get_first(seg_inode + FSEG_NOT_FULL);
+ } else if (flst_get_len(seg_inode + FSEG_FREE) > 0) {
+ first = flst_get_first(seg_inode + FSEG_FREE);
+ } else {
+ ut_ad(!has_done_reservation);
+ return(NULL);
+ }
+
+ ret_descr = xdes_lst_get_descriptor(space, first, &xdes, mtr);
+ ret_page = xdes_find_free(ret_descr);
+ if (ret_page == FIL_NULL) {
+ ut_ad(!has_done_reservation);
+ } else {
+ ret_page += xdes_get_offset(ret_descr);
+ }
+ /*-----------------------------------------------------------*/
+ } else if (used < FSEG_FRAG_LIMIT) {
+ /* 6. We allocate an individual page from the space
+ ===================================================*/
+ buf_block_t* block = fsp_alloc_free_page(
+ space, hint, mtr, init_mtr);
+
+ ut_ad(!has_done_reservation || block);
+
+ if (block) {
+ /* Put the page in the fragment page array of the
+ segment */
+ n = fseg_find_free_frag_page_slot(seg_inode);
+ ut_a(n != ULINT_UNDEFINED);
+
+ fseg_set_nth_frag_page_no(
+ seg_inode, iblock, n,
+ block->page.id().page_no(), mtr);
+ }
+
+ /* fsp_alloc_free_page() invoked fsp_init_file_page()
+ already. */
+ return(block);
+ /*-----------------------------------------------------------*/
+ } else {
+ /* 7. We allocate a new extent and take its first page
+ ======================================================*/
+ ret_descr = fseg_alloc_free_extent(seg_inode, iblock, &xdes,
+ space, mtr);
+
+ if (ret_descr == NULL) {
+ ret_page = FIL_NULL;
+ ut_ad(!has_done_reservation);
+ } else {
+ ret_page = xdes_get_offset(ret_descr);
+ ut_ad(!has_done_reservation || ret_page != FIL_NULL);
+ }
+ }
+
+ if (ret_page == FIL_NULL) {
+ /* Page could not be allocated */
+
+ ut_ad(!has_done_reservation);
+ return(NULL);
+ }
+
+ if (space->size <= ret_page && !is_system_tablespace(space_id)) {
+ /* It must be that we are extending a single-table
+ tablespace whose size is still < 64 pages */
+
+ if (ret_page >= FSP_EXTENT_SIZE) {
+ ib::error() << "Error (2): trying to extend"
+ " a single-table tablespace " << space_id
+ << " by single page(s) though the"
+ << " space size " << space->size
+ << ". Page no " << ret_page << ".";
+ ut_ad(!has_done_reservation);
+ return(NULL);
+ }
+
+ if (!fsp_try_extend_data_file_with_pages(
+ space, ret_page, header, mtr)) {
+ /* No disk space left */
+ ut_ad(!has_done_reservation);
+ return(NULL);
+ }
+ }
+
+got_hinted_page:
+ /* ret_descr == NULL if the block was allocated from free_frag
+ (XDES_FREE_FRAG) */
+ if (ret_descr != NULL) {
+ /* At this point we know the extent and the page offset.
+ The extent is still in the appropriate list (FSEG_NOT_FULL
+ or FSEG_FREE), and the page is not yet marked as used. */
+
+ ut_d(buf_block_t* xxdes);
+ ut_ad(xdes_get_descriptor(space, ret_page, &xxdes, mtr)
+ == ret_descr);
+ ut_ad(xdes == xxdes);
+ ut_ad(xdes_is_free(ret_descr, ret_page % FSP_EXTENT_SIZE));
+
+ fseg_mark_page_used(seg_inode, iblock, ret_page, ret_descr,
+ xdes, mtr);
+ }
+
+ return fsp_page_create(space, ret_page, init_mtr);
+}
+
+/**********************************************************************//**
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation.
+@retval NULL if no page could be allocated */
+buf_block_t*
+fseg_alloc_free_page_general(
+/*=========================*/
+ fseg_header_t* seg_header,/*!< in/out: segment header */
+ uint32_t hint, /*!< in: hint of which page would be
+ desirable */
+ byte direction,/*!< in: if the new page is needed because
+ of an index page split, and records are
+ inserted there in order, into which
+ direction they go alphabetically: FSP_DOWN,
+ FSP_UP, FSP_NO_DIR */
+ bool has_done_reservation, /*!< in: true if the caller has
+ already done the reservation for the page
+ with fsp_reserve_free_extents, then there
+ is no need to do the check for this individual
+ page */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
+ in which the page should be initialized. */
+{
+ fseg_inode_t* inode;
+ ulint space_id;
+ fil_space_t* space;
+ buf_block_t* iblock;
+ buf_block_t* block;
+ uint32_t n_reserved;
+
+ space_id = page_get_space_id(page_align(seg_header));
+ space = mtr_x_lock_space(space_id, mtr);
+ inode = fseg_inode_get(seg_header, space_id, space->zip_size(),
+ mtr, &iblock);
+ if (!space->full_crc32()) {
+ fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
+ }
+
+ if (!has_done_reservation
+ && !fsp_reserve_free_extents(&n_reserved, space, 2,
+ FSP_NORMAL, mtr)) {
+ return(NULL);
+ }
+
+ block = fseg_alloc_free_page_low(space,
+ inode, iblock, hint, direction,
+#ifdef UNIV_DEBUG
+ has_done_reservation,
+#endif /* UNIV_DEBUG */
+ mtr, init_mtr);
+
+ /* The allocation cannot fail if we have already reserved a
+ space for the page. */
+ ut_ad(!has_done_reservation || block != NULL);
+
+ if (!has_done_reservation) {
+ space->release_free_extents(n_reserved);
+ }
+
+ return(block);
+}
+
+/** Check that we have at least n_pages frag pages free in the first extent
+of a single-table tablespace, and they are also physically initialized to
+the data file. That is we have already extended the data file so that those
+pages are inside the data file. If not, this function extends the tablespace
+with pages.
+@param[in,out] space tablespace
+@param[in,out] header tablespace header, x-latched
+@param[in] size tablespace size in pages, less than FSP_EXTENT_SIZE
+@param[in,out] mtr mini-transaction
+@param[in] n_pages number of pages to reserve
+@return true if there were at least n_pages free pages, or we were able
+to extend */
+static
+bool
+fsp_reserve_free_pages(
+ fil_space_t* space,
+ buf_block_t* header,
+ ulint size,
+ mtr_t* mtr,
+ uint32_t n_pages)
+{
+ xdes_t* descr;
+
+ ut_a(!is_system_tablespace(space->id));
+ ut_a(size < FSP_EXTENT_SIZE);
+
+ buf_block_t* xdes;
+ descr = xdes_get_descriptor_with_space_hdr(header, space, 0, &xdes,
+ mtr);
+ uint32_t n_used = xdes_get_n_used(descr);
+
+ ut_a(n_used <= size);
+
+ return(size >= n_used + n_pages
+ || fsp_try_extend_data_file_with_pages(
+ space, n_used + n_pages - 1, header, mtr));
+}
+
+/** Reserves free pages from a tablespace. All mini-transactions which may
+use several pages from the tablespace should call this function beforehand
+and reserve enough free extents so that they certainly will be able
+to do their operation, like a B-tree page split, fully. Reservations
+must be released with function fil_space_t::release_free_extents()!
+
+The alloc_type below has the following meaning: FSP_NORMAL means an
+operation which will probably result in more space usage, like an
+insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
+deleting rows, then this allocation will in the long run result in
+less space usage (after a purge); FSP_CLEANING means allocation done
+in a physical record delete (like in a purge) or other cleaning operation
+which will result in less space usage in the long run. We prefer the latter
+two types of allocation: when space is scarce, FSP_NORMAL allocations
+will not succeed, but the latter two allocations will succeed, if possible.
+The purpose is to avoid dead end where the database is full but the
+user cannot free any space because these freeing operations temporarily
+reserve some space.
+
+Single-table tablespaces whose size is < FSP_EXTENT_SIZE pages are a special
+case. In this function we would liberally reserve several extents for
+every page split or merge in a B-tree. But we do not want to waste disk space
+if the table only occupies < FSP_EXTENT_SIZE pages. That is why we apply
+different rules in that special case, just ensuring that there are n_pages
+free pages available.
+
+@param[out] n_reserved number of extents actually reserved; if we
+ return true and the tablespace size is <
+ FSP_EXTENT_SIZE pages, then this can be 0,
+ otherwise it is n_ext
+@param[in,out] space tablespace
+@param[in] n_ext number of extents to reserve
+@param[in] alloc_type page reservation type (FSP_BLOB, etc)
+@param[in,out] mtr the mini transaction
+@param[in] n_pages for small tablespaces (tablespace size is
+ less than FSP_EXTENT_SIZE), number of free
+ pages to reserve.
+@return true if we were able to make the reservation */
+bool
+fsp_reserve_free_extents(
+ uint32_t* n_reserved,
+ fil_space_t* space,
+ uint32_t n_ext,
+ fsp_reserve_t alloc_type,
+ mtr_t* mtr,
+ uint32_t n_pages)
+{
+ ulint reserve;
+
+ ut_ad(mtr);
+ *n_reserved = n_ext;
+
+ const uint32_t extent_size = FSP_EXTENT_SIZE;
+
+ mtr_x_lock_space(space, mtr);
+ const unsigned physical_size = space->physical_size();
+
+ buf_block_t* header = fsp_get_header(space, mtr);
+try_again:
+ uint32_t size = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SIZE
+ + header->frame);
+ ut_ad(size == space->size_in_header);
+
+ if (size < extent_size && n_pages < extent_size / 2) {
+ /* Use different rules for small single-table tablespaces */
+ *n_reserved = 0;
+ return(fsp_reserve_free_pages(space, header, size,
+ mtr, n_pages));
+ }
+
+ uint32_t n_free_list_ext = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE
+ + header->frame);
+ ut_ad(space->free_len == n_free_list_ext);
+
+ uint32_t free_limit = mach_read_from_4(FSP_HEADER_OFFSET
+ + FSP_FREE_LIMIT
+ + header->frame);
+ ut_ad(space->free_limit == free_limit);
+
+ /* Below we play safe when counting free extents above the free limit:
+ some of them will contain extent descriptor pages, and therefore
+ will not be free extents */
+
+ uint32_t n_free_up;
+
+ if (size >= free_limit) {
+ n_free_up = (size - free_limit) / extent_size;
+ if (n_free_up) {
+ n_free_up--;
+ n_free_up -= n_free_up / (physical_size / extent_size);
+ }
+ } else {
+ ut_ad(alloc_type == FSP_BLOB);
+ n_free_up = 0;
+ }
+
+ uint32_t n_free = n_free_list_ext + n_free_up;
+
+ switch (alloc_type) {
+ case FSP_NORMAL:
+ /* We reserve 1 extent + 0.5 % of the space size to undo logs
+ and 1 extent + 0.5 % to cleaning operations; NOTE: this source
+ code is duplicated in the function below! */
+
+ reserve = 2 + ((size / extent_size) * 2) / 200;
+
+ if (n_free <= reserve + n_ext) {
+
+ goto try_to_extend;
+ }
+ break;
+ case FSP_UNDO:
+ /* We reserve 0.5 % of the space size to cleaning operations */
+
+ reserve = 1 + ((size / extent_size) * 1) / 200;
+
+ if (n_free <= reserve + n_ext) {
+
+ goto try_to_extend;
+ }
+ break;
+ case FSP_CLEANING:
+ case FSP_BLOB:
+ reserve = 0;
+ break;
+ default:
+ ut_error;
+ }
+
+ if (space->reserve_free_extents(n_free, n_ext)) {
+ return(true);
+ }
+try_to_extend:
+ if (fsp_try_extend_data_file(space, header, mtr)) {
+ goto try_again;
+ }
+
+ return(false);
+}
+
+/** Frees a single page of a segment.
+@param[in] seg_inode segment inode
+@param[in,out] space tablespace
+@param[in] offset page number
+@param[in,out] mtr mini-transaction */
+static
+void
+fseg_free_page_low(
+ fseg_inode_t* seg_inode,
+ buf_block_t* iblock,
+ fil_space_t* space,
+ page_no_t offset,
+ mtr_t* mtr)
+{
+ ib_id_t descr_id;
+ ib_id_t seg_id;
+
+ ut_ad(seg_inode != NULL);
+ ut_ad(mtr != NULL);
+ ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
+ ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+ ut_ad(iblock->frame == page_align(seg_inode));
+ ut_d(space->modify_check(*mtr));
+
+ const uint32_t extent_size = FSP_EXTENT_SIZE;
+ ut_ad(ut_is_2pow(extent_size));
+ buf_block_t* xdes;
+ xdes_t* descr = xdes_get_descriptor(space, offset, &xdes, mtr);
+
+ if (xdes_is_free(descr, offset & (extent_size - 1))) {
+ ib::fatal() << "InnoDB is trying to free page "
+ << page_id_t(space->id, offset)
+ << " though it is already marked as free in the"
+ " tablespace! The tablespace free space info is"
+ " corrupt. You may need to dump your tables and"
+ " recreate the whole database!"
+ << FORCE_RECOVERY_MSG;
+ }
+
+ if (xdes_get_state(descr) != XDES_FSEG) {
+ /* The page is in the fragment pages of the segment */
+ for (ulint i = 0;; i++) {
+ if (fseg_get_nth_frag_page_no(seg_inode, i)
+ != offset) {
+ continue;
+ }
+
+ compile_time_assert(FIL_NULL == 0xffffffff);
+ mtr->memset(iblock, uint16_t(seg_inode - iblock->frame)
+ + FSEG_FRAG_ARR
+ + i * FSEG_FRAG_SLOT_SIZE, 4, 0xff);
+ break;
+ }
+
+ fsp_free_page(space, offset, mtr);
+ return;
+ }
+
+ /* If we get here, the page is in some extent of the segment */
+
+ descr_id = mach_read_from_8(descr + XDES_ID);
+ seg_id = mach_read_from_8(seg_inode + FSEG_ID);
+
+ if (UNIV_UNLIKELY(descr_id != seg_id)) {
+ fputs("InnoDB: Dump of the tablespace extent descriptor: ",
+ stderr);
+ ut_print_buf(stderr, descr, 40);
+ fputs("\nInnoDB: Dump of the segment inode: ", stderr);
+ ut_print_buf(stderr, seg_inode, 40);
+ putc('\n', stderr);
+
+ ib::fatal() << "InnoDB is trying to free page "
+ << page_id_t(space->id, offset)
+ << ", which does not belong to segment " << descr_id
+ << " but belongs to segment " << seg_id << "."
+ << FORCE_RECOVERY_MSG;
+ }
+
+ byte* p_not_full = seg_inode + FSEG_NOT_FULL_N_USED;
+ uint32_t not_full_n_used = mach_read_from_4(p_not_full);
+ const uint16_t xoffset= uint16_t(descr - xdes->frame + XDES_FLST_NODE);
+ const uint16_t ioffset= uint16_t(seg_inode - iblock->frame);
+
+ if (xdes_is_full(descr)) {
+ /* The fragment is full: move it to another list */
+ flst_remove(iblock, static_cast<uint16_t>(FSEG_FULL + ioffset),
+ xdes, xoffset, mtr);
+ flst_add_last(iblock, static_cast<uint16_t>(FSEG_NOT_FULL
+ + ioffset),
+ xdes, xoffset, mtr);
+ not_full_n_used += extent_size - 1;
+ } else {
+ ut_a(not_full_n_used > 0);
+ not_full_n_used--;
+ }
+
+ mtr->write<4>(*iblock, p_not_full, not_full_n_used);
+
+ const ulint bit = offset & (extent_size - 1);
+
+ xdes_set_free<true>(*xdes, descr, bit, mtr);
+
+ if (!xdes_get_n_used(descr)) {
+ /* The extent has become free: free it to space */
+ flst_remove(iblock, static_cast<uint16_t>(FSEG_NOT_FULL
+ + ioffset),
+ xdes, xoffset, mtr);
+ fsp_free_extent(space, offset, mtr);
+ }
+
+ mtr->free(*space, static_cast<uint32_t>(offset));
+}
+
+/** Free a page in a file segment.
+@param[in,out] seg_header file segment header
+@param[in,out] space tablespace
+@param[in] offset page number
+@param[in,out] mtr mini-transaction */
+void
+fseg_free_page(
+ fseg_header_t* seg_header,
+ fil_space_t* space,
+ uint32_t offset,
+ mtr_t* mtr)
+{
+ DBUG_ENTER("fseg_free_page");
+ fseg_inode_t* seg_inode;
+ buf_block_t* iblock;
+ mtr_x_lock_space(space, mtr);
+
+ DBUG_LOG("fseg_free_page", "space_id: " << space->id
+ << ", page_no: " << offset);
+
+ seg_inode = fseg_inode_get(seg_header, space->id, space->zip_size(),
+ mtr,
+ &iblock);
+ if (!space->full_crc32()) {
+ fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
+ }
+
+ fseg_free_page_low(seg_inode, iblock, space, offset, mtr);
+
+ DBUG_VOID_RETURN;
+}
+
+/** Determine whether a page is free.
+@param[in,out] space tablespace
+@param[in] page page number
+@return whether the page is marked as free */
+bool
+fseg_page_is_free(fil_space_t* space, unsigned page)
+{
+ bool is_free;
+ mtr_t mtr;
+ page_no_t dpage = xdes_calc_descriptor_page(space->zip_size(),
+ page);
+
+ mtr.start();
+ mtr_sx_lock_space(space, &mtr);
+
+ if (page >= space->free_limit || page >= space->size_in_header) {
+ is_free = true;
+ } else if (const xdes_t* descr = xdes_get_descriptor_const(
+ space, dpage, page, &mtr)) {
+ is_free = xdes_is_free(descr, page % FSP_EXTENT_SIZE);
+ } else {
+ is_free = true;
+ }
+ mtr.commit();
+
+ return(is_free);
+}
+
+/** Free an extent of a segment to the space free list.
+@param[in,out] seg_inode segment inode
+@param[in,out] space tablespace
+@param[in] page page number in the extent
+@param[in,out] mtr mini-transaction */
+MY_ATTRIBUTE((nonnull))
+static
+void
+fseg_free_extent(
+ fseg_inode_t* seg_inode,
+ buf_block_t* iblock,
+ fil_space_t* space,
+ uint32_t page,
+ mtr_t* mtr)
+{
+
+ ut_ad(mtr != NULL);
+
+ buf_block_t* xdes;
+ xdes_t* descr = xdes_get_descriptor(space, page, &xdes, mtr);
+
+ ut_a(xdes_get_state(descr) == XDES_FSEG);
+ ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8));
+ ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
+ ut_d(space->modify_check(*mtr));
+ const uint32_t first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
+
+ const uint16_t xoffset= uint16_t(descr - xdes->frame + XDES_FLST_NODE);
+ const uint16_t ioffset= uint16_t(seg_inode - iblock->frame);
+
+ if (xdes_is_full(descr)) {
+ flst_remove(iblock, static_cast<uint16_t>(FSEG_FULL + ioffset),
+ xdes, xoffset, mtr);
+ } else if (!xdes_get_n_used(descr)) {
+ flst_remove(iblock, static_cast<uint16_t>(FSEG_FREE + ioffset),
+ xdes, xoffset, mtr);
+ } else {
+ flst_remove(iblock, static_cast<uint16_t>(FSEG_NOT_FULL
+ + ioffset),
+ xdes, xoffset, mtr);
+ uint32_t not_full_n_used = mach_read_from_4(
+ FSEG_NOT_FULL_N_USED + seg_inode);
+ uint32_t descr_n_used = xdes_get_n_used(descr);
+ ut_a(not_full_n_used >= descr_n_used);
+ mtr->write<4>(*iblock, seg_inode + FSEG_NOT_FULL_N_USED,
+ not_full_n_used - descr_n_used);
+ }
+
+ fsp_free_extent(space, page, mtr);
+
+ for (uint32_t i = 0; i < FSP_EXTENT_SIZE; i++) {
+ if (!xdes_is_free(descr, i)) {
+ buf_page_free(space, first_page_in_extent + i, mtr,
+ __FILE__, __LINE__);
+ }
+ }
+}
+
+/**********************************************************************//**
+Frees part of a segment. This function can be used to free a segment by
+repeatedly calling this function in different mini-transactions. Doing
+the freeing in a single mini-transaction might result in too big a
+mini-transaction.
+@return whether the freeing was completed */
+bool
+fseg_free_step(
+ fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
+ resides on the first page of the frag list
+ of the segment, this pointer becomes obsolete
+ after the last freeing step */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ulint n;
+ fseg_inode_t* inode;
+
+ DBUG_ENTER("fseg_free_step");
+
+ const uint32_t space_id = page_get_space_id(page_align(header));
+ const uint32_t header_page = page_get_page_no(page_align(header));
+
+ fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+ buf_block_t* xdes;
+ xdes_t* descr = xdes_get_descriptor(space, header_page, &xdes, mtr);
+
+ /* Check that the header resides on a page which has not been
+ freed yet */
+
+ ut_a(!xdes_is_free(descr, header_page % FSP_EXTENT_SIZE));
+ buf_block_t* iblock;
+ const ulint zip_size = space->zip_size();
+ inode = fseg_inode_try_get(header, space_id, zip_size, mtr, &iblock);
+
+ if (inode == NULL) {
+ ib::info() << "Double free of inode from "
+ << page_id_t(space_id, header_page);
+ DBUG_RETURN(true);
+ }
+
+ if (!space->full_crc32()) {
+ fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
+ }
+ descr = fseg_get_first_extent(inode, space, mtr);
+
+ if (descr != NULL) {
+ /* Free the extent held by the segment */
+ fseg_free_extent(inode, iblock, space, xdes_get_offset(descr),
+ mtr);
+ DBUG_RETURN(false);
+ }
+
+ /* Free a frag page */
+ n = fseg_find_last_used_frag_page_slot(inode);
+
+ if (n == ULINT_UNDEFINED) {
+ /* Freeing completed: free the segment inode */
+ fsp_free_seg_inode(space, inode, iblock, mtr);
+
+ DBUG_RETURN(true);
+ }
+
+ page_no_t page_no = fseg_get_nth_frag_page_no(inode, n);
+
+ fseg_free_page_low(inode, iblock, space, page_no, mtr);
+
+ buf_page_free(space, page_no, mtr, __FILE__, __LINE__);
+
+ n = fseg_find_last_used_frag_page_slot(inode);
+
+ if (n == ULINT_UNDEFINED) {
+ /* Freeing completed: free the segment inode */
+ fsp_free_seg_inode(space, inode, iblock, mtr);
+
+ DBUG_RETURN(true);
+ }
+
+ DBUG_RETURN(false);
+}
+
+/**********************************************************************//**
+Frees part of a segment. Differs from fseg_free_step because this function
+leaves the header page unfreed.
+@return whether the freeing was completed, except for the header page */
+bool
+fseg_free_step_not_header(
+ fseg_header_t* header, /*!< in: segment header which must reside on
+ the first fragment page of the segment */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ulint n;
+ xdes_t* descr;
+ fseg_inode_t* inode;
+
+ const uint32_t space_id = page_get_space_id(page_align(header));
+ ut_ad(mtr->is_named_space(space_id));
+
+ fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+ buf_block_t* iblock;
+
+ inode = fseg_inode_get(header, space_id, space->zip_size(), mtr,
+ &iblock);
+ if (!space->full_crc32()) {
+ fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
+ }
+
+ descr = fseg_get_first_extent(inode, space, mtr);
+
+ if (descr != NULL) {
+ /* Free the extent held by the segment */
+ fseg_free_extent(inode, iblock, space, xdes_get_offset(descr),
+ mtr);
+ return false;
+ }
+
+ /* Free a frag page */
+
+ n = fseg_find_last_used_frag_page_slot(inode);
+
+ ut_a(n != ULINT_UNDEFINED);
+
+ uint32_t page_no = fseg_get_nth_frag_page_no(inode, n);
+
+ if (page_no == page_get_page_no(page_align(header))) {
+ return true;
+ }
+
+ fseg_free_page_low(inode, iblock, space, page_no, mtr);
+ buf_page_free(space, page_no, mtr, __FILE__, __LINE__);
+ return false;
+}
+
+/** Returns the first extent descriptor for a segment.
+We think of the extent lists of the segment catenated in the order
+FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
+@param[in] inode segment inode
+@param[in] space tablespace
+@param[in,out] mtr mini-transaction
+@return the first extent descriptor, or NULL if none */
+MY_ATTRIBUTE((nonnull, warn_unused_result))
+static
+xdes_t*
+fseg_get_first_extent(
+ fseg_inode_t* inode,
+ const fil_space_t* space,
+ mtr_t* mtr)
+{
+ fil_addr_t first;
+
+ ut_ad(space->id == page_get_space_id(page_align(inode)));
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+
+ if (flst_get_len(inode + FSEG_FULL) > 0) {
+ first = flst_get_first(inode + FSEG_FULL);
+ } else if (flst_get_len(inode + FSEG_NOT_FULL) > 0) {
+ first = flst_get_first(inode + FSEG_NOT_FULL);
+ } else if (flst_get_len(inode + FSEG_FREE) > 0) {
+ first = flst_get_first(inode + FSEG_FREE);
+ } else {
+ return(NULL);
+ }
+
+ DBUG_ASSERT(first.page != FIL_NULL);
+
+ buf_block_t *xdes;
+
+ return(first.page == FIL_NULL ? NULL
+ : xdes_lst_get_descriptor(space, first, &xdes, mtr));
+}
+
+#ifdef UNIV_BTR_PRINT
+/*******************************************************************//**
+Writes info of a segment. */
+static void fseg_print_low(const fseg_inode_t *inode)
+{
+ ulint space;
+ ulint n_used;
+ ulint n_frag;
+ ulint n_free;
+ ulint n_not_full;
+ ulint n_full;
+ ulint reserved;
+ ulint used;
+ ulint page_no;
+ ib_id_t seg_id;
+
+ space = page_get_space_id(page_align(inode));
+ page_no = page_get_page_no(page_align(inode));
+
+ reserved = fseg_n_reserved_pages_low(inode, &used);
+
+ seg_id = mach_read_from_8(inode + FSEG_ID);
+ n_used = mach_read_from_4(inode + FSEG_NOT_FULL_N_USED);
+ n_frag = fseg_get_n_frag_pages(inode);
+ n_free = flst_get_len(inode + FSEG_FREE);
+ n_not_full = flst_get_len(inode + FSEG_NOT_FULL);
+ n_full = flst_get_len(inode + FSEG_FULL);
+
+ ib::info() << "SEGMENT id " << seg_id
+ << " space " << space << ";"
+ << " page " << page_no << ";"
+ << " res " << reserved << " used " << used << ";"
+ << " full ext " << n_full << ";"
+ << " fragm pages " << n_frag << ";"
+ << " free extents " << n_free << ";"
+ << " not full extents " << n_not_full << ": pages " << n_used;
+
+ ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
+}
+
+/*******************************************************************//**
+Writes info of a segment. */
+void
+fseg_print(
+/*=======*/
+ fseg_header_t* header, /*!< in: segment header */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ fseg_inode_t* inode;
+ ulint space_id;
+
+ space_id = page_get_space_id(page_align(header));
+ const fil_space_t* space = mtr_x_lock_space(space_id, mtr);
+
+ inode = fseg_inode_get(header, space_id, space->zip_size(), mtr);
+
+ fseg_print_low(inode);
+}
+#endif /* UNIV_BTR_PRINT */
+
+#ifdef UNIV_DEBUG
+std::ostream &fseg_header::to_stream(std::ostream &out) const
+{
+ out << "[fseg_header_t: space="
+ << mach_read_from_4(m_header + FSEG_HDR_SPACE)
+ << ", page=" << mach_read_from_4(m_header + FSEG_HDR_PAGE_NO)
+ << ", offset=" << mach_read_from_2(m_header + FSEG_HDR_OFFSET) << "]";
+ return out;
+}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/fsp/fsp0space.cc b/storage/innobase/fsp/fsp0space.cc
new file mode 100644
index 00000000..b0a80efe
--- /dev/null
+++ b/storage/innobase/fsp/fsp0space.cc
@@ -0,0 +1,230 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fsp/fsp0space.cc
+Shared tablespace implementation.
+
+Created 2012-11-16 by Sunny Bains as srv/srv0space.cc
+*******************************************************/
+
+#include "fsp0sysspace.h"
+#include "fsp0fsp.h"
+#include "os0file.h"
+#include "my_sys.h"
+
+/** Check if two tablespaces have common data file names.
+@param other_space Tablespace to check against this.
+@return true if they have the same data filenames and paths */
+bool
+Tablespace::intersection(
+ const Tablespace* other_space)
+{
+ for (files_t::const_iterator it(other_space->begin()),
+ end(other_space->end()); it != end; ++it) {
+
+ if (find(it->m_filename)) {
+
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/** Frees the memory allocated by the SysTablespace object. */
+void
+Tablespace::shutdown()
+{
+ for (iterator it = begin(); it != end(); ++it) {
+ it->shutdown();
+ }
+
+ m_files.clear();
+ ut_free(m_path);
+ m_path = NULL;
+ m_space_id = ULINT_UNDEFINED;
+}
+
+/** Note that the data file was found.
+@param[in,out] file Data file object to set */
+void
+Tablespace::file_found(Datafile& file)
+{
+ /* Note that the file exists and can be opened
+ in the appropriate mode. */
+ file.m_exists = true;
+
+ file.set_open_flags(
+ &file == &m_files.front()
+ ? OS_FILE_OPEN_RETRY : OS_FILE_OPEN);
+}
+
+/** Open or Create the data files if they do not exist.
+@param[in] is_temp whether this is a temporary tablespace
+@return DB_SUCCESS or error code */
+dberr_t
+Tablespace::open_or_create(bool is_temp)
+{
+ fil_space_t* space = NULL;
+ dberr_t err = DB_SUCCESS;
+
+ ut_ad(!m_files.empty());
+
+ for (iterator it = begin(); it != end(); ++it) {
+
+ if (it->m_exists) {
+ err = it->open_or_create(
+ m_ignore_read_only
+ ? false : srv_read_only_mode);
+ } else {
+ err = it->open_or_create(
+ m_ignore_read_only
+ ? false : srv_read_only_mode);
+
+ /* Set the correct open flags now that we have
+ successfully created the file. */
+ if (err == DB_SUCCESS) {
+ file_found(*it);
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ /* We can close the handle now and open the tablespace
+ the proper way. */
+ it->close();
+
+ if (it == begin()) {
+ /* First data file. */
+
+ /* Create the tablespace entry for the multi-file
+ tablespace in the tablespace manager. */
+ ulint fsp_flags = 0;
+
+ switch (srv_checksum_algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_FULL_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
+ fsp_flags = (FSP_FLAGS_FCRC32_MASK_MARKER
+ | FSP_FLAGS_FCRC32_PAGE_SSIZE());
+ break;
+ default:
+ fsp_flags = FSP_FLAGS_PAGE_SSIZE();
+ }
+
+ space = fil_space_t::create(
+ m_name, m_space_id, fsp_flags,
+ is_temp
+ ? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE,
+ NULL);
+ if (!space) {
+ return DB_ERROR;
+ }
+ }
+
+ ut_a(fil_validate());
+
+ space->add(it->m_filepath, OS_FILE_CLOSED, it->m_size,
+ false, true);
+ }
+
+ return(err);
+}
+
+/** Find a filename in the list of Datafiles for a tablespace
+@return true if the filename exists in the data files */
+bool
+Tablespace::find(const char* filename) const
+{
+ for (const_iterator it = begin(); it != end(); ++it) {
+
+ if (innobase_strcasecmp(filename, it->m_filename) == 0) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/** Delete all the data files. */
+void
+Tablespace::delete_files()
+{
+ for (iterator it = begin(); it != end(); ++it) {
+
+ it->close();
+
+ bool file_pre_exists;
+ bool success = os_file_delete_if_exists(
+ innodb_data_file_key, it->m_filepath, &file_pre_exists);
+
+ if (success && file_pre_exists) {
+ ib::info() << "Removed temporary tablespace data"
+ " file: \"" << it->m_name << "\"";
+ }
+ }
+}
+
+/** Use the ADD DATAFILE path to create a Datafile object and add it to the
+front of m_files.
+Parse the datafile path into a path and a filename with extension 'ibd'.
+This datafile_path provided may or may not be an absolute path, but it
+must end with the extension .ibd and have a basename of at least 1 byte.
+
+Set tablespace m_path member and add a Datafile with the filename.
+@param[in] datafile_path full path of the tablespace file. */
+dberr_t
+Tablespace::add_datafile(
+ const char* datafile_added)
+{
+ /* The path provided ends in ".ibd". This was assured by
+ validate_create_tablespace_info() */
+ ut_d(const char* dot = strrchr(datafile_added, '.'));
+ ut_ad(dot != NULL && 0 == strcmp(dot, DOT_IBD));
+
+ char* filepath = mem_strdup(datafile_added);
+ os_normalize_path(filepath);
+
+ /* If the path is an absolute path, separate it onto m_path and a
+ basename. For relative paths, make the whole thing a basename so that
+ it can be appended to the datadir. */
+ bool is_abs_path = is_absolute_path(filepath);
+ size_t dirlen = (is_abs_path ? dirname_length(filepath) : 0);
+ const char* basename = filepath + dirlen;
+
+ /* If the pathname contains a directory separator, fill the
+ m_path member which is the default directory for files in this
+ tablespace. Leave it null otherwise. */
+ if (dirlen > 0) {
+ set_path(filepath, dirlen);
+ }
+
+ /* Now add a new Datafile and set the filepath
+ using the m_path created above. */
+ m_files.push_back(Datafile(m_name, m_flags,
+ FIL_IBD_FILE_INITIAL_SIZE, 0));
+ Datafile* datafile = &m_files.back();
+ datafile->make_filepath(m_path, basename, IBD);
+
+ ut_free(filepath);
+
+ return(DB_SUCCESS);
+}
diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc
new file mode 100644
index 00000000..a2c9e1bc
--- /dev/null
+++ b/storage/innobase/fsp/fsp0sysspace.cc
@@ -0,0 +1,994 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fsp/fsp0space.cc
+Multi file, shared, system tablespace implementation.
+
+Created 2012-11-16 by Sunny Bains as srv/srv0space.cc
+Refactored 2013-7-26 by Kevin Lewis
+*******************************************************/
+
+#include "fsp0sysspace.h"
+#include "srv0start.h"
+#include "trx0sys.h"
+#include "dict0load.h"
+#include "mem0mem.h"
+#include "os0file.h"
+#include "row0mysql.h"
+#include "buf0dblwr.h"
+
+/** The server header file is included to access opt_initialize global variable.
+If server passes the option for create/open DB to SE, we should remove such
+direct reference to server header and global variable */
+#include "mysqld.h"
+
+/** The control info of the system tablespace. */
+SysTablespace srv_sys_space;
+
+/** The control info of a temporary table shared tablespace. */
+SysTablespace srv_tmp_space;
+
+/** If the last data file is auto-extended, we add this many pages to it
+at a time. We have to make this public because it is a config variable. */
+uint sys_tablespace_auto_extend_increment;
+
+/** Convert a numeric string that optionally ends in G or M or K,
+ to a number containing megabytes.
+@param[in] str String with a quantity in bytes
+@param[out] megs The number in megabytes
+@return next character in string */
+char*
+SysTablespace::parse_units(
+ char* ptr,
+ ulint* megs)
+{
+ char* endp;
+
+ *megs = strtoul(ptr, &endp, 10);
+
+ ptr = endp;
+
+ switch (*ptr) {
+ case 'G': case 'g':
+ *megs *= 1024;
+ /* fall through */
+ case 'M': case 'm':
+ ++ptr;
+ break;
+ case 'K': case 'k':
+ *megs /= 1024;
+ ++ptr;
+ break;
+ default:
+ *megs /= 1024 * 1024;
+ break;
+ }
+
+ return(ptr);
+}
+
+/** Parse the input params and populate member variables.
+@param[in] filepath path to data files
+@param[in] supports_raw true if the tablespace supports raw devices
+@return true on success parse */
+bool
+SysTablespace::parse_params(
+ const char* filepath_spec,
+ bool supports_raw)
+{
+ char* filepath;
+ ulint size;
+ char* input_str;
+ ulint n_files = 0;
+
+ ut_ad(m_last_file_size_max == 0);
+ ut_ad(!m_auto_extend_last_file);
+
+ char* new_str = mem_strdup(filepath_spec);
+ char* str = new_str;
+
+ input_str = str;
+
+ /*---------------------- PASS 1 ---------------------------*/
+ /* First calculate the number of data files and check syntax:
+ filepath:size[K |M | G];filepath:size[K |M | G]... .
+ Note that a Windows path may contain a drive name and a ':'. */
+ while (*str != '\0') {
+ filepath = str;
+
+ while ((*str != ':' && *str != '\0')
+ || (*str == ':'
+ && (*(str + 1) == '\\' || *(str + 1) == '/'
+ || *(str + 1) == ':'))) {
+ str++;
+ }
+
+ if (*str == '\0') {
+ ut_free(new_str);
+
+ ib::error()
+ << "syntax error in file path or size"
+ " specified is less than 1 megabyte";
+ return(false);
+ }
+
+ str++;
+
+ str = parse_units(str, &size);
+
+ if (0 == strncmp(str, ":autoextend",
+ (sizeof ":autoextend") - 1)) {
+
+ str += (sizeof ":autoextend") - 1;
+
+ if (0 == strncmp(str, ":max:",
+ (sizeof ":max:") - 1)) {
+
+ str += (sizeof ":max:") - 1;
+
+ str = parse_units(str, &size);
+ }
+
+ if (*str != '\0') {
+ ut_free(new_str);
+ ib::error()
+ << "syntax error in file path or"
+ << " size specified is less than"
+ << " 1 megabyte";
+ return(false);
+ }
+ }
+
+ if (::strlen(str) >= 6
+ && *str == 'n'
+ && *(str + 1) == 'e'
+ && *(str + 2) == 'w') {
+
+ if (!supports_raw) {
+ ib::error()
+ << "Tablespace doesn't support raw"
+ " devices";
+ ut_free(new_str);
+ return(false);
+ }
+
+ str += 3;
+ }
+
+ if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
+ str += 3;
+
+ if (!supports_raw) {
+ ib::error()
+ << "Tablespace doesn't support raw"
+ " devices";
+ ut_free(new_str);
+ return(false);
+ }
+ }
+
+ if (size == 0) {
+
+ ut_free(new_str);
+
+ ib::error()
+ << "syntax error in file path or size"
+ " specified is less than 1 megabyte";
+
+ return(false);
+ }
+
+ ++n_files;
+
+ if (*str == ';') {
+ str++;
+ } else if (*str != '\0') {
+ ut_free(new_str);
+
+ ib::error()
+ << "syntax error in file path or size"
+ " specified is less than 1 megabyte";
+ return(false);
+ }
+ }
+
+ if (n_files == 0) {
+
+ /* filepath_spec must contain at least one data file
+ definition */
+
+ ut_free(new_str);
+
+ ib::error()
+ << "syntax error in file path or size specified"
+ " is less than 1 megabyte";
+
+ return(false);
+ }
+
+ /*---------------------- PASS 2 ---------------------------*/
+ /* Then store the actual values to our arrays */
+ str = input_str;
+ ulint order = 0;
+
+ while (*str != '\0') {
+ filepath = str;
+
+ /* Note that we must step over the ':' in a Windows filepath;
+ a Windows path normally looks like C:\ibdata\ibdata1:1G, but
+ a Windows raw partition may have a specification like
+ \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
+
+ while ((*str != ':' && *str != '\0')
+ || (*str == ':'
+ && (*(str + 1) == '\\' || *(str + 1) == '/'
+ || *(str + 1) == ':'))) {
+ str++;
+ }
+
+ if (*str == ':') {
+ /* Make filepath a null-terminated string */
+ *str = '\0';
+ str++;
+ }
+
+ str = parse_units(str, &size);
+
+ if (0 == strncmp(str, ":autoextend",
+ (sizeof ":autoextend") - 1)) {
+
+ m_auto_extend_last_file = true;
+
+ str += (sizeof ":autoextend") - 1;
+
+ if (0 == strncmp(str, ":max:",
+ (sizeof ":max:") - 1)) {
+
+ str += (sizeof ":max:") - 1;
+
+ str = parse_units(str, &m_last_file_size_max);
+ }
+
+ if (*str != '\0') {
+ ut_free(new_str);
+ ib::error() << "syntax error in file path or"
+ " size specified is less than 1"
+ " megabyte";
+ return(false);
+ }
+ }
+
+ m_files.push_back(Datafile(filepath, flags(), uint32_t(size),
+ order));
+ Datafile* datafile = &m_files.back();
+ datafile->make_filepath(path(), filepath, NO_EXT);
+
+ if (::strlen(str) >= 6
+ && *str == 'n'
+ && *(str + 1) == 'e'
+ && *(str + 2) == 'w') {
+
+ ut_a(supports_raw);
+
+ str += 3;
+
+ /* Initialize new raw device only during initialize */
+ /* JAN: TODO: MySQL 5.7 used opt_initialize */
+ m_files.back().m_type =
+ opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
+ }
+
+ if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
+
+ ut_a(supports_raw);
+
+ str += 3;
+
+ /* Initialize new raw device only during initialize */
+ if (m_files.back().m_type == SRV_NOT_RAW) {
+ /* JAN: TODO: MySQL 5.7 used opt_initialize */
+ m_files.back().m_type =
+ opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
+ }
+ }
+
+ if (*str == ';') {
+ ++str;
+ }
+ order++;
+ }
+
+ ut_ad(n_files == ulint(m_files.size()));
+
+ ut_free(new_str);
+
+ return(true);
+}
+
+/** Frees the memory allocated by the parse method. */
+void
+SysTablespace::shutdown()
+{
+ Tablespace::shutdown();
+
+ m_auto_extend_last_file = 0;
+ m_last_file_size_max = 0;
+ m_created_new_raw = 0;
+ m_is_tablespace_full = false;
+ m_sanity_checks_done = false;
+}
+
+/** Verify the size of the physical file.
+@param[in] file data file object
+@return DB_SUCCESS if OK else error code. */
+dberr_t
+SysTablespace::check_size(
+ Datafile& file)
+{
+ os_offset_t size = os_file_get_size(file.m_handle);
+ ut_a(size != (os_offset_t) -1);
+
+ /* Under some error conditions like disk full scenarios
+ or file size reaching filesystem limit the data file
+ could contain an incomplete extent at the end. When we
+ extend a data file and if some failure happens, then
+ also the data file could contain an incomplete extent.
+ So we need to round the size downward to a megabyte.*/
+
+ const uint32_t rounded_size_pages = static_cast<uint32_t>(
+ size >> srv_page_size_shift);
+
+ /* If last file */
+ if (&file == &m_files.back() && m_auto_extend_last_file) {
+
+ if (file.m_size > rounded_size_pages
+ || (m_last_file_size_max > 0
+ && m_last_file_size_max < rounded_size_pages)) {
+ ib::error() << "The Auto-extending " << name()
+ << " data file '" << file.filepath() << "' is"
+ " of a different size " << rounded_size_pages
+ << " pages than specified"
+ " in the .cnf file: initial " << file.m_size
+ << " pages, max " << m_last_file_size_max
+ << " (relevant if non-zero) pages!";
+ return(DB_ERROR);
+ }
+
+ file.m_size = rounded_size_pages;
+ }
+
+ if (rounded_size_pages != file.m_size) {
+ ib::error() << "The " << name() << " data file '"
+ << file.filepath() << "' is of a different size "
+ << rounded_size_pages << " pages"
+ " than the " << file.m_size << " pages specified in"
+ " the .cnf file!";
+ return(DB_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Set the size of the file.
+@param[in] file data file object
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::set_size(
+ Datafile& file)
+{
+ ut_ad(!srv_read_only_mode || m_ignore_read_only);
+
+ /* We created the data file and now write it full of zeros */
+ ib::info() << "Setting file '" << file.filepath() << "' size to "
+ << (file.m_size >> (20U - srv_page_size_shift)) << " MB."
+ " Physically writing the file full; Please wait ...";
+
+ bool success = os_file_set_size(
+ file.m_filepath, file.m_handle,
+ static_cast<os_offset_t>(file.m_size) << srv_page_size_shift);
+
+ if (success) {
+ ib::info() << "File '" << file.filepath() << "' size is now "
+ << (file.m_size >> (20U - srv_page_size_shift))
+ << " MB.";
+ } else {
+ ib::error() << "Could not set the file size of '"
+ << file.filepath() << "'. Probably out of disk space";
+
+ return(DB_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Create a data file.
+@param[in] file data file object
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::create_file(
+ Datafile& file)
+{
+ dberr_t err = DB_SUCCESS;
+
+ ut_a(!file.m_exists);
+ ut_ad(!srv_read_only_mode || m_ignore_read_only);
+
+ switch (file.m_type) {
+ case SRV_NEW_RAW:
+
+ /* The partition is opened, not created; then it is
+ written over */
+ m_created_new_raw = true;
+
+ /* Fall through. */
+
+ case SRV_OLD_RAW:
+
+ srv_start_raw_disk_in_use = TRUE;
+
+ /* Fall through. */
+
+ case SRV_NOT_RAW:
+ err = file.open_or_create(
+ m_ignore_read_only ? false : srv_read_only_mode);
+ break;
+ }
+
+
+ if (err == DB_SUCCESS && file.m_type != SRV_OLD_RAW) {
+ err = set_size(file);
+ }
+
+ return(err);
+}
+
+/** Open a data file.
+@param[in] file data file object
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::open_file(
+ Datafile& file)
+{
+ dberr_t err = DB_SUCCESS;
+
+ ut_a(file.m_exists);
+
+ switch (file.m_type) {
+ case SRV_NEW_RAW:
+ /* The partition is opened, not created; then it is
+ written over */
+ m_created_new_raw = true;
+
+ /* Fall through */
+
+ case SRV_OLD_RAW:
+ srv_start_raw_disk_in_use = TRUE;
+
+ if (srv_read_only_mode && !m_ignore_read_only) {
+ ib::error() << "Can't open a raw device '"
+ << file.m_filepath << "' when"
+ " --innodb-read-only is set";
+
+ return(DB_ERROR);
+ }
+
+ /* Fall through */
+
+ case SRV_NOT_RAW:
+ err = file.open_or_create(
+ m_ignore_read_only ? false : srv_read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ break;
+ }
+
+ switch (file.m_type) {
+ case SRV_NEW_RAW:
+ /* Set file size for new raw device. */
+ err = set_size(file);
+ break;
+
+ case SRV_NOT_RAW:
+ /* Check file size for existing file. */
+ err = check_size(file);
+ break;
+
+ case SRV_OLD_RAW:
+ err = DB_SUCCESS;
+ break;
+
+ }
+
+ if (err != DB_SUCCESS) {
+ file.close();
+ }
+
+ return(err);
+}
+
+/** Check the tablespace header for this tablespace.
+@param[out] flushed_lsn the value of FIL_PAGE_FILE_FLUSH_LSN
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::read_lsn_and_check_flags(lsn_t* flushed_lsn)
+{
+ dberr_t err;
+
+ /* Only relevant for the system tablespace. */
+ ut_ad(space_id() == TRX_SYS_SPACE);
+
+ files_t::iterator it = m_files.begin();
+
+ ut_a(it->m_exists);
+
+ if (it->m_handle == OS_FILE_CLOSED) {
+
+ err = it->open_or_create(
+ m_ignore_read_only ? false : srv_read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ err = it->read_first_page(
+ m_ignore_read_only ? false : srv_read_only_mode);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ ut_a(it->order() == 0);
+
+ if (srv_operation == SRV_OPERATION_NORMAL) {
+ buf_dblwr.init_or_load_pages(it->handle(), it->filepath());
+ }
+
+ /* Check the contents of the first page of the
+ first datafile. */
+ for (int retry = 0; retry < 2; ++retry) {
+
+ err = it->validate_first_page(flushed_lsn);
+
+ if (err != DB_SUCCESS
+ && (retry == 1
+ || it->restore_from_doublewrite())) {
+
+ it->close();
+
+ return(err);
+ }
+ }
+
+ /* Make sure the tablespace space ID matches the
+ space ID on the first page of the first datafile. */
+ if (space_id() != it->m_space_id) {
+
+ ib::error()
+ << "The " << name() << " data file '" << it->name()
+ << "' has the wrong space ID. It should be "
+ << space_id() << ", but " << it->m_space_id
+ << " was found";
+
+ it->close();
+
+ return(err);
+ }
+
+ it->close();
+
+ return(DB_SUCCESS);
+}
+
+/** Check if a file can be opened in the correct mode.
+@param[in] file data file object
+@param[out] reason exact reason if file_status check failed.
+@return DB_SUCCESS or error code. */
+dberr_t
+SysTablespace::check_file_status(
+ const Datafile& file,
+ file_status_t& reason)
+{
+ os_file_stat_t stat;
+
+ memset(&stat, 0x0, sizeof(stat));
+
+ dberr_t err = os_file_get_status(
+ file.m_filepath, &stat, true,
+ m_ignore_read_only ? false : srv_read_only_mode);
+
+ reason = FILE_STATUS_VOID;
+ /* File exists but we can't read the rw-permission settings. */
+ switch (err) {
+ case DB_FAIL:
+ ib::error() << "os_file_get_status() failed on '"
+ << file.filepath()
+ << "'. Can't determine file permissions";
+ err = DB_ERROR;
+ reason = FILE_STATUS_RW_PERMISSION_ERROR;
+ break;
+
+ case DB_SUCCESS:
+
+ /* Note: stat.rw_perm is only valid for "regular" files */
+
+ if (stat.type == OS_FILE_TYPE_FILE) {
+
+ if (!stat.rw_perm) {
+ const char *p = (!srv_read_only_mode
+ || m_ignore_read_only)
+ ? "writable"
+ : "readable";
+
+ ib::error() << "The " << name() << " data file"
+ << " '" << file.name() << "' must be "
+ << p;
+
+ err = DB_ERROR;
+ reason = FILE_STATUS_READ_WRITE_ERROR;
+ }
+
+ } else {
+ /* Not a regular file, bail out. */
+ ib::error() << "The " << name() << " data file '"
+ << file.name() << "' is not a regular"
+ " InnoDB data file.";
+
+ err = DB_ERROR;
+ reason = FILE_STATUS_NOT_REGULAR_FILE_ERROR;
+ }
+ break;
+
+ case DB_NOT_FOUND:
+ break;
+
+ default:
+ ut_ad(0);
+ }
+
+ return(err);
+}
+
+/** Note that the data file was not found.
+@param[in] file data file object
+@param[out] create_new_db true if a new instance to be created
+@return DB_SUCESS or error code */
+dberr_t
+SysTablespace::file_not_found(
+ Datafile& file,
+ bool* create_new_db)
+{
+ file.m_exists = false;
+
+ if (srv_read_only_mode && !m_ignore_read_only) {
+ ib::error() << "Can't create file '" << file.filepath()
+ << "' when --innodb-read-only is set";
+
+ return(DB_ERROR);
+
+ } else if (&file == &m_files.front()) {
+
+ /* First data file. */
+ ut_a(!*create_new_db);
+ *create_new_db = TRUE;
+
+ if (space_id() == TRX_SYS_SPACE) {
+ ib::info() << "The first " << name() << " data file '"
+ << file.name() << "' did not exist."
+ " A new tablespace will be created!";
+ }
+
+ } else {
+ ib::info() << "Need to create a new " << name()
+ << " data file '" << file.name() << "'.";
+ }
+
+ /* Set the file create mode. */
+ switch (file.m_type) {
+ case SRV_NOT_RAW:
+ file.set_open_flags(OS_FILE_CREATE);
+ break;
+
+ case SRV_NEW_RAW:
+ case SRV_OLD_RAW:
+ file.set_open_flags(OS_FILE_OPEN_RAW);
+ break;
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Note that the data file was found.
+@param[in,out] file data file object
+@return true if a new instance to be created */
+bool
+SysTablespace::file_found(
+ Datafile& file)
+{
+ /* Note that the file exists and can be opened
+ in the appropriate mode. */
+ file.m_exists = true;
+
+ /* Set the file open mode */
+ switch (file.m_type) {
+ case SRV_NOT_RAW:
+ file.set_open_flags(
+ &file == &m_files.front()
+ ? OS_FILE_OPEN_RETRY : OS_FILE_OPEN);
+ break;
+
+ case SRV_NEW_RAW:
+ case SRV_OLD_RAW:
+ file.set_open_flags(OS_FILE_OPEN_RAW);
+ break;
+ }
+
+ /* Need to create the system tablespace for new raw device. */
+ return(file.m_type == SRV_NEW_RAW);
+}
+
+/** Check the data file specification.
+@param[out] create_new_db true if a new database is to be created
+@param[in] min_expected_size Minimum expected tablespace size in bytes
+@return DB_SUCCESS if all OK else error code */
+dberr_t
+SysTablespace::check_file_spec(
+ bool* create_new_db,
+ ulint min_expected_size)
+{
+ *create_new_db = FALSE;
+
+ if (m_files.size() >= 1000) {
+ ib::error() << "There must be < 1000 data files in "
+ << name() << " but " << m_files.size() << " have been"
+ " defined.";
+
+ return(DB_ERROR);
+ }
+
+ if (!m_auto_extend_last_file
+ && get_sum_of_sizes()
+ < (min_expected_size >> srv_page_size_shift)) {
+ ib::error() << "Tablespace size must be at least "
+ << (min_expected_size >> 20) << " MB";
+ return(DB_ERROR);
+ }
+
+ dberr_t err = DB_SUCCESS;
+
+ ut_a(!m_files.empty());
+
+ /* If there is more than one data file and the last data file
+ doesn't exist, that is OK. We allow adding of new data files. */
+
+ files_t::iterator begin = m_files.begin();
+ files_t::iterator end = m_files.end();
+
+ for (files_t::iterator it = begin; it != end; ++it) {
+
+ file_status_t reason_if_failed;
+ err = check_file_status(*it, reason_if_failed);
+
+ if (err == DB_NOT_FOUND) {
+
+ err = file_not_found(*it, create_new_db);
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ } else if (err != DB_SUCCESS) {
+ if (reason_if_failed == FILE_STATUS_READ_WRITE_ERROR) {
+ const char* p = (!srv_read_only_mode
+ || m_ignore_read_only)
+ ? "writable" : "readable";
+ ib::error() << "The " << name() << " data file"
+ << " '" << it->name() << "' must be "
+ << p;
+ }
+
+ ut_a(err != DB_FAIL);
+ break;
+
+ } else if (*create_new_db) {
+ ib::error() << "The " << name() << " data file '"
+ << begin->m_name << "' was not found but"
+ " one of the other data files '" << it->m_name
+ << "' exists.";
+
+ err = DB_ERROR;
+ break;
+
+ } else {
+ *create_new_db = file_found(*it);
+ }
+ }
+
+ return(err);
+}
+
+/** Open or create the data files
+@param[in] is_temp whether this is a temporary tablespace
+@param[in] create_new_db whether we are creating a new database
+@param[out] sum_new_sizes sum of sizes of the new files added
+@param[out] flush_lsn FIL_PAGE_FILE_FLUSH_LSN of first file
+@return DB_SUCCESS or error code */
+dberr_t
+SysTablespace::open_or_create(
+ bool is_temp,
+ bool create_new_db,
+ ulint* sum_new_sizes,
+ lsn_t* flush_lsn)
+{
+ dberr_t err = DB_SUCCESS;
+ fil_space_t* space = NULL;
+
+ ut_ad(!m_files.empty());
+
+ if (sum_new_sizes) {
+ *sum_new_sizes = 0;
+ }
+
+ files_t::iterator begin = m_files.begin();
+ files_t::iterator end = m_files.end();
+
+ ut_ad(begin->order() == 0);
+
+ for (files_t::iterator it = begin; it != end; ++it) {
+
+ if (it->m_exists) {
+ err = open_file(*it);
+
+ /* For new raw device increment new size. */
+ if (sum_new_sizes && it->m_type == SRV_NEW_RAW) {
+
+ *sum_new_sizes += it->m_size;
+ }
+
+ } else {
+ err = create_file(*it);
+
+ if (sum_new_sizes) {
+ *sum_new_sizes += it->m_size;
+ }
+
+ /* Set the correct open flags now that we have
+ successfully created the file. */
+ if (err == DB_SUCCESS) {
+ /* We ignore new_db OUT parameter here
+ as the information is known at this stage */
+ file_found(*it);
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ }
+
+ if (!create_new_db && flush_lsn) {
+ /* Validate the header page in the first datafile
+ and read LSNs fom the others. */
+ err = read_lsn_and_check_flags(flush_lsn);
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ /* Close the curent handles, add space and file info to the
+ fil_system cache and the Data Dictionary, and re-open them
+ in file_system cache so that they stay open until shutdown. */
+ ulint node_counter = 0;
+ for (files_t::iterator it = begin; it != end; ++it) {
+ it->close();
+ it->m_exists = true;
+
+ if (it != begin) {
+ } else if (is_temp) {
+ ut_ad(space_id() == SRV_TMP_SPACE_ID);
+ space = fil_space_t::create(
+ name(), SRV_TMP_SPACE_ID, flags(),
+ FIL_TYPE_TEMPORARY, NULL);
+ ut_ad(space == fil_system.temp_space);
+ if (!space) {
+ return DB_ERROR;
+ }
+ ut_ad(!space->is_compressed());
+ ut_ad(space->full_crc32());
+ } else {
+ ut_ad(space_id() == TRX_SYS_SPACE);
+ space = fil_space_t::create(
+ name(), TRX_SYS_SPACE, it->flags(),
+ FIL_TYPE_TABLESPACE, NULL);
+ ut_ad(space == fil_system.sys_space);
+ if (!space) {
+ return DB_ERROR;
+ }
+ }
+
+ ut_a(fil_validate());
+
+ uint32_t max_size = (++node_counter == m_files.size()
+ ? (m_last_file_size_max == 0
+ ? UINT32_MAX
+ : uint32_t(m_last_file_size_max))
+ : it->m_size);
+
+ space->add(it->m_filepath, OS_FILE_CLOSED, it->m_size,
+ it->m_type != SRV_NOT_RAW, true, max_size);
+ }
+
+ return(err);
+}
+
+/** Normalize the file size, convert from megabytes to number of pages. */
+void
+SysTablespace::normalize_size()
+{
+ files_t::iterator end = m_files.end();
+
+ for (files_t::iterator it = m_files.begin(); it != end; ++it) {
+
+ it->m_size <<= (20U - srv_page_size_shift);
+ }
+
+ m_last_file_size_max <<= (20U - srv_page_size_shift);
+}
+
+
+/**
+@return next increment size */
+uint32_t SysTablespace::get_increment() const
+{
+ if (m_last_file_size_max == 0)
+ return get_autoextend_increment();
+
+ if (!is_valid_size())
+ {
+ ib::error() << "The last data file in " << name()
+ << " has a size of " << last_file_size()
+ << " but the max size allowed is "
+ << m_last_file_size_max;
+ }
+
+ return std::min(uint32_t(m_last_file_size_max) - last_file_size(),
+ get_autoextend_increment());
+}
+
+
+/**
+@return true if configured to use raw devices */
+bool
+SysTablespace::has_raw_device()
+{
+ files_t::iterator end = m_files.end();
+
+ for (files_t::iterator it = m_files.begin(); it != end; ++it) {
+
+ if (it->is_raw_device()) {
+ return(true);
+ }
+ }
+
+ return(false);
+}