diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
commit | 3f619478f796eddbba6e39502fe941b285dd97b1 (patch) | |
tree | e2c7b5777f728320e5b5542b6213fd3591ba51e2 /extra/mariabackup/fil_cur.cc | |
parent | Initial commit. (diff) | |
download | mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.tar.xz mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.zip |
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'extra/mariabackup/fil_cur.cc')
-rw-r--r-- | extra/mariabackup/fil_cur.cc | 522 |
1 files changed, 522 insertions, 0 deletions
diff --git a/extra/mariabackup/fil_cur.cc b/extra/mariabackup/fil_cur.cc new file mode 100644 index 00000000..e0a4711a --- /dev/null +++ b/extra/mariabackup/fil_cur.cc @@ -0,0 +1,522 @@ +/****************************************************** +MariaBackup: hot backup tool for InnoDB +(c) 2009-2013 Percona LLC and/or its affiliates. +Originally Created 3/3/2009 Yasufumi Kinoshita +Written by Alexey Kopytov, Aleksandr Kuzminsky, Stewart Smith, Vadim Tkachenko, +Yasufumi Kinoshita, Ignacio Nin and Baron Schwartz. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*******************************************************/ + +/* Source file cursor implementation */ + +#include <my_global.h> +#include <my_base.h> +#include <fil0fil.h> +#include <fsp0fsp.h> +#include <srv0start.h> +#include <trx0sys.h> + +#include "fil_cur.h" +#include "fil0crypt.h" +#include "fil0pagecompress.h" +#include "common.h" +#include "read_filt.h" +#include "xtrabackup.h" +#include "backup_debug.h" + +/* Size of read buffer in pages (640 pages = 10M for 16K sized pages) */ +#define XB_FIL_CUR_PAGES 640 + +/*********************************************************************** +Extracts the relative path ("database/table.ibd") of a tablespace from a +specified possibly absolute path. + +For user tablespaces both "./database/table.ibd" and +"/remote/dir/database/table.ibd" result in "database/table.ibd". + +For system tablepsaces (i.e. When is_system is TRUE) both "/remote/dir/ibdata1" +and "./ibdata1" yield "ibdata1" in the output. */ +const char * +xb_get_relative_path( +/*=================*/ + const char* path, /*!< in: tablespace path (either + relative or absolute) */ + ibool is_system) /*!< in: TRUE for system tablespaces, + i.e. when only the filename must be + returned. */ +{ + const char *next; + const char *cur; + const char *prev; + + prev = NULL; + cur = path; + +#ifdef _WIN32 + while ((next = strchr(cur, '\\')) != NULL) { + prev = cur; + cur = next + 1; + } +#endif + + while ((next = strchr(cur, '/')) != NULL) { + prev = cur; + cur = next + 1; + } + + if (is_system) { + return(cur); + } else { + return((prev == NULL) ? cur : prev); + } + +} + +/**********************************************************************//** +Closes a file. */ +static +void +xb_fil_node_close_file( +/*===================*/ + fil_node_t* node) /*!< in: file node */ +{ + ibool ret; + + mysql_mutex_lock(&fil_system.mutex); + + ut_ad(node); + ut_a(!node->being_extended); + + if (node->is_open()) { + ret = os_file_close(node->handle); + ut_a(ret); + node->handle = OS_FILE_CLOSED; + } + + mysql_mutex_unlock(&fil_system.mutex); +} + +/************************************************************************ +Open a source file cursor and initialize the associated read filter. + +@return XB_FIL_CUR_SUCCESS on success, XB_FIL_CUR_SKIP if the source file must +be skipped and XB_FIL_CUR_ERROR on error. */ +xb_fil_cur_result_t +xb_fil_cur_open( + /*============*/ + xb_fil_cur_t* cursor, /*!< out: source file cursor */ + xb_read_filt_t* read_filter, /*!< in/out: the read filter */ + fil_node_t* node, /*!< in: source tablespace node */ + uint thread_n, /*!< thread number for diagnostics */ + ulonglong max_file_size) +{ + bool success; + int err; + /* Initialize these first so xb_fil_cur_close() handles them correctly + in case of error */ + cursor->buf = NULL; + cursor->node = NULL; + cursor->n_process_batch = 0; + + cursor->space_id = node->space->id; + + strncpy(cursor->abs_path, node->name, (sizeof cursor->abs_path) - 1); + cursor->abs_path[(sizeof cursor->abs_path) - 1] = '\0'; + + /* Get the relative path for the destination tablespace name, i.e. the + one that can be appended to the backup root directory. Non-system + tablespaces may have absolute paths for DATA DIRECTORY. + We want to make "local" copies for the backup. */ + strncpy(cursor->rel_path, + xb_get_relative_path(cursor->abs_path, cursor->is_system()), + (sizeof cursor->rel_path) - 1); + cursor->rel_path[(sizeof cursor->rel_path) - 1] = '\0'; + + /* In the backup mode we should already have a tablespace handle created + by fil_ibd_load() unless it is a system + tablespace. Otherwise we open the file here. */ + if (!node->is_open()) { + ut_ad(cursor->is_system() + || srv_operation == SRV_OPERATION_RESTORE_DELTA + || xb_close_files); + + node->handle = os_file_create_simple_no_error_handling( + 0, node->name, + OS_FILE_OPEN, + OS_FILE_READ_ALLOW_DELETE, true, &success); + if (!success) { + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + + msg(thread_n, "mariabackup: error: cannot open " + "tablespace %s", cursor->abs_path); + + return(XB_FIL_CUR_SKIP); + } + } + + ut_ad(node->is_open()); + + cursor->node = node; + cursor->file = node->handle; +#ifdef _WIN32 + HANDLE hDup; + DuplicateHandle(GetCurrentProcess(),cursor->file.m_file, + GetCurrentProcess(), &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); + int filenr = _open_osfhandle((intptr_t)hDup, 0); + if (filenr < 0) { + err = EINVAL; + } + else { + err = _fstat64(filenr, &cursor->statinfo); + close(filenr); + } +#else + err = fstat(cursor->file.m_file, &cursor->statinfo); +#endif + if (max_file_size < (ulonglong)cursor->statinfo.st_size) { + cursor->statinfo.st_size = (ulonglong)max_file_size; + } + if (err) { + msg(thread_n, "mariabackup: error: cannot fstat %s", + cursor->abs_path); + + xb_fil_cur_close(cursor); + + return(XB_FIL_CUR_SKIP); + } + + if (srv_file_flush_method == SRV_O_DIRECT + || srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC) { + + os_file_set_nocache(cursor->file, node->name, "OPEN"); + } + + posix_fadvise(cursor->file, 0, 0, POSIX_FADV_SEQUENTIAL); + + cursor->page_size = node->space->physical_size(); + cursor->zip_size = node->space->zip_size(); + + /* Allocate read buffer */ + cursor->buf_size = XB_FIL_CUR_PAGES * cursor->page_size; + cursor->buf = static_cast<byte*>(aligned_malloc(cursor->buf_size, + srv_page_size)); + + cursor->buf_read = 0; + cursor->buf_npages = 0; + cursor->buf_offset = 0; + cursor->buf_page_no = 0; + cursor->thread_n = thread_n; + + if (!node->space->crypt_data + && os_file_read(IORequestRead, + node->handle, cursor->buf, 0, + cursor->page_size, nullptr) == DB_SUCCESS) { + mysql_mutex_lock(&fil_system.mutex); + if (!node->space->crypt_data) { + node->space->crypt_data = fil_space_read_crypt_data( + node->space->zip_size(), cursor->buf); + } + mysql_mutex_unlock(&fil_system.mutex); + } + + cursor->space_size = uint32_t(cursor->statinfo.st_size + / cursor->page_size); + + cursor->read_filter = read_filter; + cursor->read_filter->init(&cursor->read_filter_ctxt, cursor, + node->space->id); + + return(XB_FIL_CUR_SUCCESS); +} + +static bool page_is_corrupted(const byte *page, ulint page_no, + const xb_fil_cur_t *cursor, + const fil_space_t *space) +{ + byte tmp_frame[UNIV_PAGE_SIZE_MAX]; + byte tmp_page[UNIV_PAGE_SIZE_MAX]; + const ulint page_size = cursor->page_size; + uint16_t page_type = fil_page_get_type(page); + + /* We ignore the doublewrite buffer pages.*/ + if (cursor->space_id == TRX_SYS_SPACE + && page_no >= FSP_EXTENT_SIZE + && page_no < FSP_EXTENT_SIZE * 3) { + return false; + } + + /* Validate page number. */ + if (mach_read_from_4(page + FIL_PAGE_OFFSET) != page_no + && cursor->space_id != TRX_SYS_SPACE) { + /* On pages that are not all zero, the + page number must match. + + There may be a mismatch on tablespace ID, + because files may be renamed during backup. + We disable the page number check + on the system tablespace, because it may consist + of multiple files, and here we count the pages + from the start of each file.) + + The first 38 and last 8 bytes are never encrypted. */ + const ulint* p = reinterpret_cast<const ulint*>(page); + const ulint* const end = reinterpret_cast<const ulint*>( + page + page_size); + do { + if (*p++) { + return true; + } + } while (p != end); + + /* Whole zero page is valid. */ + return false; + } + + if (space->full_crc32()) { + return buf_page_is_corrupted(true, page, space->flags); + } + + /* Validate encrypted pages. The first page is never encrypted. + In the system tablespace, the first page would be written with + FIL_PAGE_FILE_FLUSH_LSN at shutdown, and if the LSN exceeds + 4,294,967,295, the mach_read_from_4() below would wrongly + interpret the page as encrypted. We prevent that by checking + page_no first. */ + if (page_no + && mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) + && (opt_encrypted_backup + || (space->crypt_data + && space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED))) { + + if (!fil_space_verify_crypt_checksum(page, space->zip_size())) + return true; + + /* Compressed encrypted need to be decrypted + and decompressed for verification. */ + if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + && !opt_extended_validation) + return false; + + memcpy(tmp_page, page, page_size); + + if (!space->crypt_data + || space->crypt_data->type == CRYPT_SCHEME_UNENCRYPTED + || !fil_space_decrypt(space, tmp_frame, tmp_page)) { + return true; + } + + if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + return buf_page_is_corrupted(true, tmp_page, + space->flags); + } + } + + if (page_type == FIL_PAGE_PAGE_COMPRESSED) { + memcpy(tmp_page, page, page_size); + } + + if (page_type == FIL_PAGE_PAGE_COMPRESSED + || page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + ulint decomp = fil_page_decompress(tmp_frame, tmp_page, + space->flags); + page_type = fil_page_get_type(tmp_page); + + return (!decomp + || (decomp != srv_page_size + && cursor->zip_size) + || page_type == FIL_PAGE_PAGE_COMPRESSED + || page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + || buf_page_is_corrupted(true, tmp_page, + space->flags)); + } + + return buf_page_is_corrupted(true, page, space->flags); +} + +/** Reads and verifies the next block of pages from the source +file. Positions the cursor after the last read non-corrupted page. +@param[in,out] cursor source file cursor +@param[out] corrupted_pages adds corrupted pages if +opt_log_innodb_page_corruption is set +@return XB_FIL_CUR_SUCCESS if some have been read successfully, XB_FIL_CUR_EOF +if there are no more pages to read and XB_FIL_CUR_ERROR on error. */ +xb_fil_cur_result_t xb_fil_cur_read(xb_fil_cur_t* cursor, + CorruptedPages &corrupted_pages) +{ + byte* page; + unsigned i; + ulint npages; + ulint retry_count; + xb_fil_cur_result_t ret; + ib_int64_t offset; + ib_int64_t to_read; + const ulint page_size = cursor->page_size; + bool defer = false; + xb_ad(!cursor->is_system() || page_size == srv_page_size); + + cursor->read_filter->get_next_batch(&cursor->read_filter_ctxt, + &offset, &to_read); + + if (to_read == 0LL) { + return(XB_FIL_CUR_EOF); + } + +reinit_buf: + cursor->n_process_batch++; + if (to_read > (ib_int64_t) cursor->buf_size) { + to_read = (ib_int64_t) cursor->buf_size; + } + + xb_a(to_read > 0 && to_read <= 0xFFFFFFFFLL); + + if ((to_read & ~(page_size - 1)) + && offset + to_read == cursor->statinfo.st_size) { + + if (to_read < (ib_int64_t) page_size) { + msg(cursor->thread_n, "Warning: junk at the end of " + "%s, offset = %llu, to_read = %llu",cursor->abs_path, (ulonglong) offset, (ulonglong) to_read); + return(XB_FIL_CUR_EOF); + } + + to_read = (ib_int64_t) (((ulint) to_read) & + ~(page_size - 1)); + } + + xb_a((to_read & (page_size - 1)) == 0); + + npages = (ulint) (to_read / page_size); + + retry_count = 10; + ret = XB_FIL_CUR_SUCCESS; + + fil_space_t *space = fil_space_t::get(cursor->space_id); + + if (!space) { + return XB_FIL_CUR_ERROR; + } + +read_retry: + xtrabackup_io_throttling(); + + cursor->buf_read = 0; + cursor->buf_npages = 0; + cursor->buf_offset = offset; + cursor->buf_page_no = static_cast<unsigned>(offset / page_size); + + if (os_file_read(IORequestRead, cursor->file, cursor->buf, offset, + (ulint) to_read, nullptr) != DB_SUCCESS) { + if (!srv_is_undo_tablespace(cursor->space_id)) { + ret = XB_FIL_CUR_ERROR; + goto func_exit; + } + + if (cursor->buf_page_no + >= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) { + ret = XB_FIL_CUR_SKIP; + goto func_exit; + } + + to_read = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES * page_size; + + if (cursor->n_process_batch > 1) { + ret = XB_FIL_CUR_ERROR; + goto func_exit; + } + + space->release(); + goto reinit_buf; + } + + defer = UT_LIST_GET_FIRST(space->chain)->deferred; + /* check pages for corruption and re-read if necessary. i.e. in case of + partially written pages */ + for (page = cursor->buf, i = 0; i < npages; + page += page_size, i++) { + unsigned page_no = cursor->buf_page_no + i; + + if (!defer && page_is_corrupted(page, page_no, cursor, space)) { + retry_count--; + + if (retry_count == 0) { + const char *ignore_corruption_warn = opt_log_innodb_page_corruption ? + " WARNING!!! The corruption is ignored due to" + " log-innodb-page-corruption option, the backup can contain" + " corrupted data." : ""; + msg(cursor->thread_n, + "Error: failed to read page after " + "10 retries. File %s seems to be " + "corrupted.%s", cursor->abs_path, ignore_corruption_warn); + ut_print_buf(stderr, page, page_size); + if (opt_log_innodb_page_corruption) { + corrupted_pages.add_page(cursor->node->name, + {cursor->node->space->id, page_no}); + retry_count = 1; + } + else { + ret = XB_FIL_CUR_ERROR; + break; + } + } + else { + msg(cursor->thread_n, "Database page corruption detected at page " + UINT32PF ", retrying...", + page_no); + std::this_thread::sleep_for( + std::chrono::milliseconds(100)); + goto read_retry; + } + } + DBUG_EXECUTE_FOR_KEY("add_corrupted_page_for", + cursor->node->space->name(), + { + unsigned corrupted_page_no = + static_cast<unsigned>(strtoul(dbug_val, NULL, 10)); + if (page_no == corrupted_page_no) + corrupted_pages.add_page(cursor->node->name, + {cursor->node->space->id, + corrupted_page_no}); + }); + cursor->buf_read += page_size; + cursor->buf_npages++; + } + + posix_fadvise(cursor->file, offset, to_read, POSIX_FADV_DONTNEED); +func_exit: + space->release(); + return(ret); +} + +/************************************************************************ +Close the source file cursor opened with xb_fil_cur_open() and its +associated read filter. */ +void +xb_fil_cur_close( +/*=============*/ + xb_fil_cur_t *cursor) /*!< in/out: source file cursor */ +{ + if (cursor->read_filter) { + cursor->read_filter->deinit(&cursor->read_filter_ctxt); + } + + aligned_free(cursor->buf); + cursor->buf = NULL; + + if (cursor->node != NULL) { + xb_fil_node_close_file(cursor->node); + cursor->file = OS_FILE_CLOSED; + } +} |