summaryrefslogtreecommitdiffstats
path: root/extra/mariabackup/fil_cur.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:24:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:24:36 +0000
commit06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch)
treee2c7b5777f728320e5b5542b6213fd3591ba51e2 /extra/mariabackup/fil_cur.cc
parentInitial commit. (diff)
downloadmariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz
mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip
Adding upstream version 1:10.11.6.upstream/1%10.11.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'extra/mariabackup/fil_cur.cc')
-rw-r--r--extra/mariabackup/fil_cur.cc522
1 files changed, 522 insertions, 0 deletions
diff --git a/extra/mariabackup/fil_cur.cc b/extra/mariabackup/fil_cur.cc
new file mode 100644
index 00000000..e0a4711a
--- /dev/null
+++ b/extra/mariabackup/fil_cur.cc
@@ -0,0 +1,522 @@
+/******************************************************
+MariaBackup: hot backup tool for InnoDB
+(c) 2009-2013 Percona LLC and/or its affiliates.
+Originally Created 3/3/2009 Yasufumi Kinoshita
+Written by Alexey Kopytov, Aleksandr Kuzminsky, Stewart Smith, Vadim Tkachenko,
+Yasufumi Kinoshita, Ignacio Nin and Baron Schwartz.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*******************************************************/
+
+/* Source file cursor implementation */
+
+#include <my_global.h>
+#include <my_base.h>
+#include <fil0fil.h>
+#include <fsp0fsp.h>
+#include <srv0start.h>
+#include <trx0sys.h>
+
+#include "fil_cur.h"
+#include "fil0crypt.h"
+#include "fil0pagecompress.h"
+#include "common.h"
+#include "read_filt.h"
+#include "xtrabackup.h"
+#include "backup_debug.h"
+
+/* Size of read buffer in pages (640 pages = 10M for 16K sized pages) */
+#define XB_FIL_CUR_PAGES 640
+
+/***********************************************************************
+Extracts the relative path ("database/table.ibd") of a tablespace from a
+specified possibly absolute path.
+
+For user tablespaces both "./database/table.ibd" and
+"/remote/dir/database/table.ibd" result in "database/table.ibd".
+
+For system tablepsaces (i.e. When is_system is TRUE) both "/remote/dir/ibdata1"
+and "./ibdata1" yield "ibdata1" in the output. */
+const char *
+xb_get_relative_path(
+/*=================*/
+ const char* path, /*!< in: tablespace path (either
+ relative or absolute) */
+ ibool is_system) /*!< in: TRUE for system tablespaces,
+ i.e. when only the filename must be
+ returned. */
+{
+ const char *next;
+ const char *cur;
+ const char *prev;
+
+ prev = NULL;
+ cur = path;
+
+#ifdef _WIN32
+ while ((next = strchr(cur, '\\')) != NULL) {
+ prev = cur;
+ cur = next + 1;
+ }
+#endif
+
+ while ((next = strchr(cur, '/')) != NULL) {
+ prev = cur;
+ cur = next + 1;
+ }
+
+ if (is_system) {
+ return(cur);
+ } else {
+ return((prev == NULL) ? cur : prev);
+ }
+
+}
+
+/**********************************************************************//**
+Closes a file. */
+static
+void
+xb_fil_node_close_file(
+/*===================*/
+ fil_node_t* node) /*!< in: file node */
+{
+ ibool ret;
+
+ mysql_mutex_lock(&fil_system.mutex);
+
+ ut_ad(node);
+ ut_a(!node->being_extended);
+
+ if (node->is_open()) {
+ ret = os_file_close(node->handle);
+ ut_a(ret);
+ node->handle = OS_FILE_CLOSED;
+ }
+
+ mysql_mutex_unlock(&fil_system.mutex);
+}
+
+/************************************************************************
+Open a source file cursor and initialize the associated read filter.
+
+@return XB_FIL_CUR_SUCCESS on success, XB_FIL_CUR_SKIP if the source file must
+be skipped and XB_FIL_CUR_ERROR on error. */
+xb_fil_cur_result_t
+xb_fil_cur_open(
+ /*============*/
+ xb_fil_cur_t* cursor, /*!< out: source file cursor */
+ xb_read_filt_t* read_filter, /*!< in/out: the read filter */
+ fil_node_t* node, /*!< in: source tablespace node */
+ uint thread_n, /*!< thread number for diagnostics */
+ ulonglong max_file_size)
+{
+ bool success;
+ int err;
+ /* Initialize these first so xb_fil_cur_close() handles them correctly
+ in case of error */
+ cursor->buf = NULL;
+ cursor->node = NULL;
+ cursor->n_process_batch = 0;
+
+ cursor->space_id = node->space->id;
+
+ strncpy(cursor->abs_path, node->name, (sizeof cursor->abs_path) - 1);
+ cursor->abs_path[(sizeof cursor->abs_path) - 1] = '\0';
+
+ /* Get the relative path for the destination tablespace name, i.e. the
+ one that can be appended to the backup root directory. Non-system
+ tablespaces may have absolute paths for DATA DIRECTORY.
+ We want to make "local" copies for the backup. */
+ strncpy(cursor->rel_path,
+ xb_get_relative_path(cursor->abs_path, cursor->is_system()),
+ (sizeof cursor->rel_path) - 1);
+ cursor->rel_path[(sizeof cursor->rel_path) - 1] = '\0';
+
+ /* In the backup mode we should already have a tablespace handle created
+ by fil_ibd_load() unless it is a system
+ tablespace. Otherwise we open the file here. */
+ if (!node->is_open()) {
+ ut_ad(cursor->is_system()
+ || srv_operation == SRV_OPERATION_RESTORE_DELTA
+ || xb_close_files);
+
+ node->handle = os_file_create_simple_no_error_handling(
+ 0, node->name,
+ OS_FILE_OPEN,
+ OS_FILE_READ_ALLOW_DELETE, true, &success);
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+
+ msg(thread_n, "mariabackup: error: cannot open "
+ "tablespace %s", cursor->abs_path);
+
+ return(XB_FIL_CUR_SKIP);
+ }
+ }
+
+ ut_ad(node->is_open());
+
+ cursor->node = node;
+ cursor->file = node->handle;
+#ifdef _WIN32
+ HANDLE hDup;
+ DuplicateHandle(GetCurrentProcess(),cursor->file.m_file,
+ GetCurrentProcess(), &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
+ int filenr = _open_osfhandle((intptr_t)hDup, 0);
+ if (filenr < 0) {
+ err = EINVAL;
+ }
+ else {
+ err = _fstat64(filenr, &cursor->statinfo);
+ close(filenr);
+ }
+#else
+ err = fstat(cursor->file.m_file, &cursor->statinfo);
+#endif
+ if (max_file_size < (ulonglong)cursor->statinfo.st_size) {
+ cursor->statinfo.st_size = (ulonglong)max_file_size;
+ }
+ if (err) {
+ msg(thread_n, "mariabackup: error: cannot fstat %s",
+ cursor->abs_path);
+
+ xb_fil_cur_close(cursor);
+
+ return(XB_FIL_CUR_SKIP);
+ }
+
+ if (srv_file_flush_method == SRV_O_DIRECT
+ || srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC) {
+
+ os_file_set_nocache(cursor->file, node->name, "OPEN");
+ }
+
+ posix_fadvise(cursor->file, 0, 0, POSIX_FADV_SEQUENTIAL);
+
+ cursor->page_size = node->space->physical_size();
+ cursor->zip_size = node->space->zip_size();
+
+ /* Allocate read buffer */
+ cursor->buf_size = XB_FIL_CUR_PAGES * cursor->page_size;
+ cursor->buf = static_cast<byte*>(aligned_malloc(cursor->buf_size,
+ srv_page_size));
+
+ cursor->buf_read = 0;
+ cursor->buf_npages = 0;
+ cursor->buf_offset = 0;
+ cursor->buf_page_no = 0;
+ cursor->thread_n = thread_n;
+
+ if (!node->space->crypt_data
+ && os_file_read(IORequestRead,
+ node->handle, cursor->buf, 0,
+ cursor->page_size, nullptr) == DB_SUCCESS) {
+ mysql_mutex_lock(&fil_system.mutex);
+ if (!node->space->crypt_data) {
+ node->space->crypt_data = fil_space_read_crypt_data(
+ node->space->zip_size(), cursor->buf);
+ }
+ mysql_mutex_unlock(&fil_system.mutex);
+ }
+
+ cursor->space_size = uint32_t(cursor->statinfo.st_size
+ / cursor->page_size);
+
+ cursor->read_filter = read_filter;
+ cursor->read_filter->init(&cursor->read_filter_ctxt, cursor,
+ node->space->id);
+
+ return(XB_FIL_CUR_SUCCESS);
+}
+
+static bool page_is_corrupted(const byte *page, ulint page_no,
+ const xb_fil_cur_t *cursor,
+ const fil_space_t *space)
+{
+ byte tmp_frame[UNIV_PAGE_SIZE_MAX];
+ byte tmp_page[UNIV_PAGE_SIZE_MAX];
+ const ulint page_size = cursor->page_size;
+ uint16_t page_type = fil_page_get_type(page);
+
+ /* We ignore the doublewrite buffer pages.*/
+ if (cursor->space_id == TRX_SYS_SPACE
+ && page_no >= FSP_EXTENT_SIZE
+ && page_no < FSP_EXTENT_SIZE * 3) {
+ return false;
+ }
+
+ /* Validate page number. */
+ if (mach_read_from_4(page + FIL_PAGE_OFFSET) != page_no
+ && cursor->space_id != TRX_SYS_SPACE) {
+ /* On pages that are not all zero, the
+ page number must match.
+
+ There may be a mismatch on tablespace ID,
+ because files may be renamed during backup.
+ We disable the page number check
+ on the system tablespace, because it may consist
+ of multiple files, and here we count the pages
+ from the start of each file.)
+
+ The first 38 and last 8 bytes are never encrypted. */
+ const ulint* p = reinterpret_cast<const ulint*>(page);
+ const ulint* const end = reinterpret_cast<const ulint*>(
+ page + page_size);
+ do {
+ if (*p++) {
+ return true;
+ }
+ } while (p != end);
+
+ /* Whole zero page is valid. */
+ return false;
+ }
+
+ if (space->full_crc32()) {
+ return buf_page_is_corrupted(true, page, space->flags);
+ }
+
+ /* Validate encrypted pages. The first page is never encrypted.
+ In the system tablespace, the first page would be written with
+ FIL_PAGE_FILE_FLUSH_LSN at shutdown, and if the LSN exceeds
+ 4,294,967,295, the mach_read_from_4() below would wrongly
+ interpret the page as encrypted. We prevent that by checking
+ page_no first. */
+ if (page_no
+ && mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)
+ && (opt_encrypted_backup
+ || (space->crypt_data
+ && space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED))) {
+
+ if (!fil_space_verify_crypt_checksum(page, space->zip_size()))
+ return true;
+
+ /* Compressed encrypted need to be decrypted
+ and decompressed for verification. */
+ if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ && !opt_extended_validation)
+ return false;
+
+ memcpy(tmp_page, page, page_size);
+
+ if (!space->crypt_data
+ || space->crypt_data->type == CRYPT_SCHEME_UNENCRYPTED
+ || !fil_space_decrypt(space, tmp_frame, tmp_page)) {
+ return true;
+ }
+
+ if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ return buf_page_is_corrupted(true, tmp_page,
+ space->flags);
+ }
+ }
+
+ if (page_type == FIL_PAGE_PAGE_COMPRESSED) {
+ memcpy(tmp_page, page, page_size);
+ }
+
+ if (page_type == FIL_PAGE_PAGE_COMPRESSED
+ || page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ ulint decomp = fil_page_decompress(tmp_frame, tmp_page,
+ space->flags);
+ page_type = fil_page_get_type(tmp_page);
+
+ return (!decomp
+ || (decomp != srv_page_size
+ && cursor->zip_size)
+ || page_type == FIL_PAGE_PAGE_COMPRESSED
+ || page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || buf_page_is_corrupted(true, tmp_page,
+ space->flags));
+ }
+
+ return buf_page_is_corrupted(true, page, space->flags);
+}
+
+/** Reads and verifies the next block of pages from the source
+file. Positions the cursor after the last read non-corrupted page.
+@param[in,out] cursor source file cursor
+@param[out] corrupted_pages adds corrupted pages if
+opt_log_innodb_page_corruption is set
+@return XB_FIL_CUR_SUCCESS if some have been read successfully, XB_FIL_CUR_EOF
+if there are no more pages to read and XB_FIL_CUR_ERROR on error. */
+xb_fil_cur_result_t xb_fil_cur_read(xb_fil_cur_t* cursor,
+ CorruptedPages &corrupted_pages)
+{
+ byte* page;
+ unsigned i;
+ ulint npages;
+ ulint retry_count;
+ xb_fil_cur_result_t ret;
+ ib_int64_t offset;
+ ib_int64_t to_read;
+ const ulint page_size = cursor->page_size;
+ bool defer = false;
+ xb_ad(!cursor->is_system() || page_size == srv_page_size);
+
+ cursor->read_filter->get_next_batch(&cursor->read_filter_ctxt,
+ &offset, &to_read);
+
+ if (to_read == 0LL) {
+ return(XB_FIL_CUR_EOF);
+ }
+
+reinit_buf:
+ cursor->n_process_batch++;
+ if (to_read > (ib_int64_t) cursor->buf_size) {
+ to_read = (ib_int64_t) cursor->buf_size;
+ }
+
+ xb_a(to_read > 0 && to_read <= 0xFFFFFFFFLL);
+
+ if ((to_read & ~(page_size - 1))
+ && offset + to_read == cursor->statinfo.st_size) {
+
+ if (to_read < (ib_int64_t) page_size) {
+ msg(cursor->thread_n, "Warning: junk at the end of "
+ "%s, offset = %llu, to_read = %llu",cursor->abs_path, (ulonglong) offset, (ulonglong) to_read);
+ return(XB_FIL_CUR_EOF);
+ }
+
+ to_read = (ib_int64_t) (((ulint) to_read) &
+ ~(page_size - 1));
+ }
+
+ xb_a((to_read & (page_size - 1)) == 0);
+
+ npages = (ulint) (to_read / page_size);
+
+ retry_count = 10;
+ ret = XB_FIL_CUR_SUCCESS;
+
+ fil_space_t *space = fil_space_t::get(cursor->space_id);
+
+ if (!space) {
+ return XB_FIL_CUR_ERROR;
+ }
+
+read_retry:
+ xtrabackup_io_throttling();
+
+ cursor->buf_read = 0;
+ cursor->buf_npages = 0;
+ cursor->buf_offset = offset;
+ cursor->buf_page_no = static_cast<unsigned>(offset / page_size);
+
+ if (os_file_read(IORequestRead, cursor->file, cursor->buf, offset,
+ (ulint) to_read, nullptr) != DB_SUCCESS) {
+ if (!srv_is_undo_tablespace(cursor->space_id)) {
+ ret = XB_FIL_CUR_ERROR;
+ goto func_exit;
+ }
+
+ if (cursor->buf_page_no
+ >= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) {
+ ret = XB_FIL_CUR_SKIP;
+ goto func_exit;
+ }
+
+ to_read = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES * page_size;
+
+ if (cursor->n_process_batch > 1) {
+ ret = XB_FIL_CUR_ERROR;
+ goto func_exit;
+ }
+
+ space->release();
+ goto reinit_buf;
+ }
+
+ defer = UT_LIST_GET_FIRST(space->chain)->deferred;
+ /* check pages for corruption and re-read if necessary. i.e. in case of
+ partially written pages */
+ for (page = cursor->buf, i = 0; i < npages;
+ page += page_size, i++) {
+ unsigned page_no = cursor->buf_page_no + i;
+
+ if (!defer && page_is_corrupted(page, page_no, cursor, space)) {
+ retry_count--;
+
+ if (retry_count == 0) {
+ const char *ignore_corruption_warn = opt_log_innodb_page_corruption ?
+ " WARNING!!! The corruption is ignored due to"
+ " log-innodb-page-corruption option, the backup can contain"
+ " corrupted data." : "";
+ msg(cursor->thread_n,
+ "Error: failed to read page after "
+ "10 retries. File %s seems to be "
+ "corrupted.%s", cursor->abs_path, ignore_corruption_warn);
+ ut_print_buf(stderr, page, page_size);
+ if (opt_log_innodb_page_corruption) {
+ corrupted_pages.add_page(cursor->node->name,
+ {cursor->node->space->id, page_no});
+ retry_count = 1;
+ }
+ else {
+ ret = XB_FIL_CUR_ERROR;
+ break;
+ }
+ }
+ else {
+ msg(cursor->thread_n, "Database page corruption detected at page "
+ UINT32PF ", retrying...",
+ page_no);
+ std::this_thread::sleep_for(
+ std::chrono::milliseconds(100));
+ goto read_retry;
+ }
+ }
+ DBUG_EXECUTE_FOR_KEY("add_corrupted_page_for",
+ cursor->node->space->name(),
+ {
+ unsigned corrupted_page_no =
+ static_cast<unsigned>(strtoul(dbug_val, NULL, 10));
+ if (page_no == corrupted_page_no)
+ corrupted_pages.add_page(cursor->node->name,
+ {cursor->node->space->id,
+ corrupted_page_no});
+ });
+ cursor->buf_read += page_size;
+ cursor->buf_npages++;
+ }
+
+ posix_fadvise(cursor->file, offset, to_read, POSIX_FADV_DONTNEED);
+func_exit:
+ space->release();
+ return(ret);
+}
+
+/************************************************************************
+Close the source file cursor opened with xb_fil_cur_open() and its
+associated read filter. */
+void
+xb_fil_cur_close(
+/*=============*/
+ xb_fil_cur_t *cursor) /*!< in/out: source file cursor */
+{
+ if (cursor->read_filter) {
+ cursor->read_filter->deinit(&cursor->read_filter_ctxt);
+ }
+
+ aligned_free(cursor->buf);
+ cursor->buf = NULL;
+
+ if (cursor->node != NULL) {
+ xb_fil_node_close_file(cursor->node);
+ cursor->file = OS_FILE_CLOSED;
+ }
+}