diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 12:33:02 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 12:33:02 +0000 |
commit | 4fa488fb0159c629483b7994aa84e73926b132b9 (patch) | |
tree | 182a19db69cdcb92be54cc6a5b0b9bfab28f80fd /storage/innobase/log | |
parent | Adding debian version 1:10.11.6-2. (diff) | |
download | mariadb-4fa488fb0159c629483b7994aa84e73926b132b9.tar.xz mariadb-4fa488fb0159c629483b7994aa84e73926b132b9.zip |
Merging upstream version 1:10.11.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/log')
-rw-r--r-- | storage/innobase/log/log0log.cc | 56 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 162 |
2 files changed, 143 insertions, 75 deletions
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 91999c81..9f39b303 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -100,6 +100,7 @@ bool log_t::create() /* LSN 0 and 1 are reserved; @see buf_page_t::oldest_modification_ */ lsn.store(FIRST_LSN, std::memory_order_relaxed); flushed_to_disk_lsn.store(FIRST_LSN, std::memory_order_relaxed); + need_checkpoint.store(true, std::memory_order_relaxed); write_lsn= FIRST_LSN; #ifndef HAVE_PMEM @@ -124,18 +125,17 @@ bool log_t::create() TRASH_ALLOC(flush_buf, buf_size); checkpoint_buf= static_cast<byte*>(aligned_malloc(4096, 4096)); memset_aligned<4096>(checkpoint_buf, 0, 4096); + max_buf_free= buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN; #else ut_ad(!checkpoint_buf); ut_ad(!buf); ut_ad(!flush_buf); + max_buf_free= 1; #endif latch.SRW_LOCK_INIT(log_latch_key); init_lsn_lock(); - max_buf_free= buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN; - set_check_flush_or_checkpoint(); - last_checkpoint_lsn= FIRST_LSN; log_capacity= 0; max_modified_age_async= 0; @@ -236,6 +236,7 @@ void log_t::attach_low(log_file_t file, os_offset_t size) log.close(); mprotect(ptr, size_t(size), PROT_READ); buf= static_cast<byte*>(ptr); + max_buf_free= size; # if defined __linux__ || defined _WIN32 set_block_size(CPU_LEVEL1_DCACHE_LINESIZE); # endif @@ -264,6 +265,7 @@ void log_t::attach_low(log_file_t file, os_offset_t size) TRASH_ALLOC(buf, buf_size); TRASH_ALLOC(flush_buf, buf_size); + max_buf_free= buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN; #endif #if defined __linux__ || defined _WIN32 @@ -813,8 +815,8 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept #ifndef SUX_LOCK_GENERIC ut_ad(latch.is_write_locked()); #endif - ut_ad(!srv_read_only_mode); ut_ad(!is_pmem()); + ut_ad(!srv_read_only_mode); const lsn_t lsn{get_lsn(std::memory_order_relaxed)}; @@ -849,7 +851,7 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept ... /* TODO: Update the LSN and adjust other code. */ #else /* The rest of the block will be written as garbage. - (We want to avoid memset() while holding mutex.) + (We want to avoid memset() while holding exclusive log_sys.latch) This block will be overwritten later, once records beyond the current LSN are generated. */ # ifdef HAVE_valgrind @@ -886,6 +888,7 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept write_lsn= lsn; } + set_check_for_checkpoint(false); return lsn; } @@ -927,8 +930,9 @@ wait and check if an already running write is covering the request. void log_write_up_to(lsn_t lsn, bool durable, const completion_callback *callback) { - ut_ad(!srv_read_only_mode); + ut_ad(!srv_read_only_mode || (log_sys.buf_free < log_sys.max_buf_free)); ut_ad(lsn != LSN_MAX); + ut_ad(lsn != 0); if (UNIV_UNLIKELY(recv_no_ibuf_operations)) { @@ -985,7 +989,6 @@ repeat: @param durable whether to wait for a durable write to complete */ void log_buffer_flush_to_disk(bool durable) { - ut_ad(!srv_read_only_mode); log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), durable); } @@ -1017,16 +1020,6 @@ ATTRIBUTE_COLD void log_write_and_flush() #endif } -/******************************************************************** - -Tries to establish a big enough margin of free space in the log buffer, such -that a new log entry can be catenated without an immediate need for a flush. */ -ATTRIBUTE_COLD static void log_flush_margin() -{ - if (log_sys.buf_free > log_sys.max_buf_free) - log_buffer_flush_to_disk(false); -} - /****************************************************************//** Tries to establish a big enough margin of free space in the log, such that a new log entry can be catenated without an immediate need for a @@ -1034,12 +1027,12 @@ checkpoint. NOTE: this function may only be called if the calling thread owns no synchronization objects! */ ATTRIBUTE_COLD static void log_checkpoint_margin() { - while (log_sys.check_flush_or_checkpoint()) + while (log_sys.check_for_checkpoint()) { log_sys.latch.rd_lock(SRW_LOCK_CALL); ut_ad(!recv_no_log_write); - if (!log_sys.check_flush_or_checkpoint()) + if (!log_sys.check_for_checkpoint()) { func_exit: log_sys.latch.rd_unlock(); @@ -1055,7 +1048,7 @@ func_exit: #ifndef DBUG_OFF skip_checkpoint: #endif - log_sys.set_check_flush_or_checkpoint(false); + log_sys.set_check_for_checkpoint(false); goto func_exit; } @@ -1069,30 +1062,17 @@ func_exit: } } -/** -Checks that there is enough free space in the log to start a new query step. -Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this -function may only be called if the calling thread owns no synchronization -objects! */ -ATTRIBUTE_COLD void log_check_margins() -{ - do - { - log_flush_margin(); - log_checkpoint_margin(); - ut_ad(!recv_no_log_write); - } - while (log_sys.check_flush_or_checkpoint()); -} - /** Wait for a log checkpoint if needed. NOTE that this function may only be called while not holding any synchronization objects except dict_sys.latch. */ void log_free_check() { ut_ad(!lock_sys.is_writer()); - if (log_sys.check_flush_or_checkpoint()) - log_check_margins(); + if (log_sys.check_for_checkpoint()) + { + ut_ad(!recv_no_log_write); + log_checkpoint_margin(); + } } extern void buf_resize_shutdown(); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 3c3fe41e..e72f842f 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -833,7 +833,22 @@ processed: filename= tbl_name + 1; } } - space->add(filename, OS_FILE_CLOSED, size, false, false); + pfs_os_file_t handle= OS_FILE_CLOSED; + if (srv_operation == SRV_OPERATION_RESTORE) + { + /* During mariadb-backup --backup, a table could be renamed, + created and dropped, and we may be missing the file at this + point of --prepare. Try to create the file if it does not exist + already. If the file exists, we'll pass handle=OS_FILE_CLOSED + and the file will be opened normally in fil_space_t::acquire() + inside recv_sys_t::recover_deferred(). */ + bool success; + handle= os_file_create(innodb_data_file_key, filename, + OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT | + OS_FILE_ON_ERROR_SILENT, + OS_FILE_AIO, OS_DATA_FILE, false, &success); + } + space->add(filename, handle, size, false, false); space->recv_size= it->second.size; space->size_in_header= size; return space; @@ -1238,7 +1253,8 @@ static void fil_name_process(const char *name, ulint len, uint32_t space_id, file_name_t& f = p.first->second; - if (auto d = deferred_spaces.find(space_id)) { + auto d = deferred_spaces.find(space_id); + if (d) { if (deleted) { d->deleted = true; goto got_deleted; @@ -1311,7 +1327,16 @@ same_space: FILE_* record. */ ut_ad(space == NULL); - if (srv_force_recovery) { + if (srv_operation == SRV_OPERATION_RESTORE && d + && ftype == FILE_RENAME) { +rename: + d->file_name = fname.name; + f.name = fname.name; + break; + } + + if (srv_force_recovery + || srv_operation == SRV_OPERATION_RESTORE) { /* Without innodb_force_recovery, missing tablespaces will only be reported in @@ -1330,7 +1355,11 @@ same_space: break; case FIL_LOAD_DEFER: - /** Skip the deferred spaces + if (d && ftype == FILE_RENAME + && srv_operation == SRV_OPERATION_RESTORE) { + goto rename; + } + /* Skip the deferred spaces when lsn is already processed */ if (!if_exists) { deferred_spaces.add( @@ -1735,20 +1764,6 @@ dberr_t recv_sys_t::find_checkpoint() { if (wrong_size) return DB_CORRUPTION; - if (log_sys.next_checkpoint_lsn < 8204) - { - /* Before MDEV-14425, InnoDB had a minimum LSN of 8192+12=8204. - Likewise, mariadb-backup --prepare would create an empty - ib_logfile0 after applying the log. We will allow an upgrade - from such an empty log. - - If a user replaces the redo log with an empty file and the - FIL_PAGE_FILE_FLUSH_LSN field was zero in the system - tablespace (see SysTablespace::read_lsn_and_check_flags()) we - must refuse to start up. */ - sql_print_error("InnoDB: ib_logfile0 is empty, and LSN is unknown."); - return DB_CORRUPTION; - } lsn= log_sys.next_checkpoint_lsn; log_sys.format= log_t::FORMAT_3_23; goto upgrade; @@ -2409,7 +2424,7 @@ struct recv_ring : public recv_buf { const size_t s(*this - start); ut_ad(s + len <= srv_page_size); - if (!log_sys.is_encrypted()) + if (!len || !log_sys.is_encrypted()) { if (start.ptr + s == ptr && ptr + len <= end()) return ptr; @@ -3205,7 +3220,7 @@ static buf_block_t *recv_recover_page(buf_block_t *block, mtr_t &mtr, skipped_after_init = false; ut_ad(end_lsn == page_lsn); if (end_lsn != page_lsn) { - sql_print_warning( + sql_print_information( "InnoDB: The last skipped log record" " LSN " LSN_PF " is not equal to page LSN " LSN_PF, @@ -4012,7 +4027,6 @@ static bool recv_scan_log(bool last_phase) const size_t block_size_1{log_sys.get_block_size() - 1}; mysql_mutex_lock(&recv_sys.mutex); - ut_d(recv_sys.after_apply= last_phase); if (!last_phase) recv_sys.clear(); else @@ -4221,6 +4235,7 @@ static bool recv_scan_log(bool last_phase) recv_sys.lsn= rewound_lsn; } func_exit: + ut_d(recv_sys.after_apply= last_phase); mysql_mutex_unlock(&recv_sys.mutex); DBUG_RETURN(!store); } @@ -4507,12 +4522,36 @@ done: return err; } +dberr_t recv_recovery_read_checkpoint() +{ + ut_ad(srv_operation <= SRV_OPERATION_EXPORT_RESTORED || + srv_operation == SRV_OPERATION_RESTORE || + srv_operation == SRV_OPERATION_RESTORE_EXPORT); + ut_d(mysql_mutex_lock(&buf_pool.mutex)); + ut_ad(UT_LIST_GET_LEN(buf_pool.LRU) == 0); + ut_ad(UT_LIST_GET_LEN(buf_pool.unzip_LRU) == 0); + ut_d(mysql_mutex_unlock(&buf_pool.mutex)); + + if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) + { + sql_print_information("InnoDB: innodb_force_recovery=6" + " skips redo log apply"); + return DB_SUCCESS; + } + + log_sys.latch.wr_lock(SRW_LOCK_CALL); + dberr_t err= recv_sys.find_checkpoint(); + log_sys.latch.wr_unlock(); + return err; +} + /** Start recovering from a redo log checkpoint. of first system tablespace page @return error code or DB_SUCCESS */ dberr_t recv_recovery_from_checkpoint_start() { - bool rescan = false; + bool rescan = false; + dberr_t err = DB_SUCCESS; ut_ad(srv_operation <= SRV_OPERATION_EXPORT_RESTORED || srv_operation == SRV_OPERATION_RESTORE @@ -4525,20 +4564,12 @@ dberr_t recv_recovery_from_checkpoint_start() if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) { sql_print_information("InnoDB: innodb_force_recovery=6" " skips redo log apply"); - return(DB_SUCCESS); + return err; } recv_sys.recovery_on = true; log_sys.latch.wr_lock(SRW_LOCK_CALL); - - dberr_t err = recv_sys.find_checkpoint(); - if (err != DB_SUCCESS) { -early_exit: - log_sys.latch.wr_unlock(); - return err; - } - log_sys.set_capacity(); /* Start reading the log from the checkpoint lsn. The variable @@ -4548,7 +4579,9 @@ early_exit: ut_ad(recv_sys.pages.empty()); if (log_sys.format == log_t::FORMAT_3_23) { - goto early_exit; +early_exit: + log_sys.latch.wr_unlock(); + return err; } if (log_sys.is_latest()) { @@ -4843,7 +4876,7 @@ byte *recv_dblwr_t::find_page(const page_id_t page_id, } bool recv_dblwr_t::restore_first_page(uint32_t space_id, const char *name, - os_file_t file) + pfs_os_file_t file) { const page_id_t page_id(space_id, 0); const byte* page= find_page(page_id); @@ -4851,10 +4884,10 @@ bool recv_dblwr_t::restore_first_page(uint32_t space_id, const char *name, { /* If the first page of the given user tablespace is not there in the doublewrite buffer, then the recovery is going to fail - now. Hence this is treated as error. */ - ib::error() - << "Corrupted page " << page_id << " of datafile '" - << name <<"' could not be found in the doublewrite buffer."; + now. Report error only when doublewrite buffer is not empty */ + if (pages.size()) + ib::error() << "Corrupted page " << page_id << " of datafile '" + << name << "' could not be found in the doublewrite buffer."; return true; } @@ -4868,3 +4901,58 @@ bool recv_dblwr_t::restore_first_page(uint32_t space_id, const char *name, IORequestWrite, name, file, page, 0, physical_size) != DB_SUCCESS; } + +uint32_t recv_dblwr_t::find_first_page(const char *name, pfs_os_file_t file) +{ + os_offset_t file_size= os_file_get_size(file); + if (file_size != (os_offset_t) -1) + { + for (const page_t *page : pages) + { + uint32_t space_id= page_get_space_id(page); + byte *read_page= nullptr; + if (page_get_page_no(page) > 0 || space_id == 0) + { +next_page: + aligned_free(read_page); + continue; + } + uint32_t flags= mach_read_from_4( + FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); + page_id_t page_id(space_id, 0); + size_t page_size= fil_space_t::physical_size(flags); + if (file_size < 4 * page_size) + goto next_page; + read_page= + static_cast<byte*>(aligned_malloc(3 * page_size, page_size)); + /* Read 3 pages from the file and match the space id + with the space id which is stored in + doublewrite buffer page. */ + if (os_file_read(IORequestRead, file, read_page, page_size, + 3 * page_size, nullptr) != DB_SUCCESS) + goto next_page; + for (ulint j= 0; j <= 2; j++) + { + byte *cur_page= read_page + j * page_size; + if (buf_is_zeroes(span<const byte>(cur_page, page_size))) + { + space_id= 0; + goto early_exit; + } + if (mach_read_from_4(cur_page + FIL_PAGE_OFFSET) != j + 1 || + memcmp(cur_page + FIL_PAGE_SPACE_ID, + page + FIL_PAGE_SPACE_ID, 4) || + buf_page_is_corrupted(false, cur_page, flags)) + goto next_page; + } + if (!restore_first_page(space_id, name, file)) + { +early_exit: + aligned_free(read_page); + return space_id; + } + break; + } + } + return 0; +} |