summaryrefslogtreecommitdiffstats
path: root/storage/innobase/include/log0recv.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:00:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:00:34 +0000
commit3f619478f796eddbba6e39502fe941b285dd97b1 (patch)
treee2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/include/log0recv.h
parentInitial commit. (diff)
downloadmariadb-3f619478f796eddbba6e39502fe941b285dd97b1.tar.xz
mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.zip
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/include/log0recv.h')
-rw-r--r--storage/innobase/include/log0recv.h491
1 files changed, 491 insertions, 0 deletions
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
new file mode 100644
index 00000000..6d75e15a
--- /dev/null
+++ b/storage/innobase/include/log0recv.h
@@ -0,0 +1,491 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2022, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/log0recv.h
+Recovery
+
+Created 9/20/1997 Heikki Tuuri
+*******************************************************/
+
+#pragma once
+
+#include "ut0new.h"
+#include "buf0types.h"
+#include "log0log.h"
+#include "mtr0types.h"
+
+#include <deque>
+#include <map>
+
+/** @return whether recovery is currently running. */
+#define recv_recovery_is_on() UNIV_UNLIKELY(recv_sys.recovery_on)
+
+ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result))
+/** Apply any buffered redo log to a page.
+@param space tablespace
+@param bpage buffer pool page
+@return whether the page was recovered correctly */
+bool recv_recover_page(fil_space_t* space, buf_page_t* bpage);
+
+/** Start recovering from a redo log checkpoint.
+of first system tablespace page
+@return error code or DB_SUCCESS */
+dberr_t recv_recovery_from_checkpoint_start();
+
+/** Report an operation to create, delete, or rename a file during backup.
+@param[in] space_id tablespace identifier
+@param[in] type file operation redo log type
+@param[in] name file name (not NUL-terminated)
+@param[in] len length of name, in bytes
+@param[in] new_name new file name (NULL if not rename)
+@param[in] new_len length of new_name, in bytes (0 if NULL) */
+extern void (*log_file_op)(uint32_t space_id, int type,
+ const byte* name, ulint len,
+ const byte* new_name, ulint new_len);
+
+/** Report an operation which does undo log tablespace truncation
+during backup
+@param space_id undo tablespace identifier */
+extern void (*undo_space_trunc)(uint32_t space_id);
+
+/** Report an operation which does INIT_PAGE for page0 during backup.
+@param space_id tablespace identifier */
+extern void (*first_page_init)(uint32_t space_id);
+
+/** Stored redo log record */
+struct log_rec_t
+{
+ log_rec_t(lsn_t lsn) : next(nullptr), lsn(lsn) { ut_ad(lsn); }
+ log_rec_t()= delete;
+ log_rec_t(const log_rec_t&)= delete;
+ log_rec_t &operator=(const log_rec_t&)= delete;
+
+ /** next record */
+ log_rec_t *next;
+ /** mtr_t::commit_lsn() of the mini-transaction */
+ const lsn_t lsn;
+};
+
+struct recv_dblwr_t
+{
+ /** Add a page frame to the doublewrite recovery buffer. */
+ void add(byte *page) { pages.push_front(page); }
+
+ /** Validate the page.
+ @param page_id page identifier
+ @param page page contents
+ @param space the tablespace of the page (not available for page 0)
+ @param tmp_buf 2*srv_page_size for decrypting and decompressing any
+ page_compressed or encrypted pages
+ @return whether the page is valid */
+ bool validate_page(const page_id_t page_id, const byte *page,
+ const fil_space_t *space, byte *tmp_buf);
+
+ /** Find a doublewrite copy of a page.
+ @param page_id page identifier
+ @param space tablespace (not available for page_id.page_no()==0)
+ @param tmp_buf 2*srv_page_size for decrypting and decompressing any
+ page_compressed or encrypted pages
+ @return page frame
+ @retval NULL if no valid page for page_id was found */
+ byte* find_page(const page_id_t page_id, const fil_space_t *space= NULL,
+ byte *tmp_buf= NULL);
+
+ /** Restore the first page of the given tablespace from
+ doublewrite buffer.
+ @param space_id tablespace identifier
+ @param name tablespace filepath
+ @param file tablespace file handle
+ @return whether the operation failed */
+ bool restore_first_page(uint32_t space_id, const char *name, os_file_t file);
+
+ typedef std::deque<byte*, ut_allocator<byte*> > list;
+
+ /** Recovered doublewrite buffer page frames */
+ list pages;
+};
+
+/** recv_sys.pages entry; protected by recv_sys.mutex */
+struct page_recv_t
+{
+ /** Recovery status: 0=not in progress, 1=log is being applied,
+ -1=log has been applied and the entry may be erased.
+ Transitions from 1 to -1 are NOT protected by recv_sys.mutex. */
+ Atomic_relaxed<int8_t> being_processed{0};
+ /** Whether reading the page will be skipped */
+ bool skip_read= false;
+ /** Latest written byte offset when applying the log records.
+ @see mtr_t::m_last_offset */
+ uint16_t last_offset= 1;
+ /** log records for a page */
+ class recs_t
+ {
+ /** The first log record */
+ log_rec_t *head= nullptr;
+ /** The last log record */
+ log_rec_t *tail= nullptr;
+ friend struct page_recv_t;
+ public:
+ /** Append a redo log snippet for the page
+ @param recs log snippet */
+ void append(log_rec_t* recs)
+ {
+ if (tail)
+ tail->next= recs;
+ else
+ head= recs;
+ tail= recs;
+ }
+ /** Remove the last records for the page
+ @param start_lsn start of the removed log */
+ ATTRIBUTE_COLD void rewind(lsn_t start_lsn);
+
+ /** @return the last log snippet */
+ const log_rec_t* last() const { return tail; }
+ /** @return the last log snippet */
+ log_rec_t* last() { return tail; }
+
+ class iterator
+ {
+ log_rec_t *cur;
+ public:
+ iterator(log_rec_t* rec) : cur(rec) {}
+ log_rec_t* operator*() const { return cur; }
+ iterator &operator++() { cur= cur->next; return *this; }
+ bool operator!=(const iterator& i) const { return cur != i.cur; }
+ };
+ iterator begin() { return head; }
+ iterator end() { return NULL; }
+ bool empty() const { ut_ad(!head == !tail); return !head; }
+ /** Clear and free the records; @see recv_sys_t::add() */
+ void clear();
+ } log;
+
+ /** Trim old log records for a page.
+ @param start_lsn oldest log sequence number to preserve
+ @return whether all the log for the page was trimmed */
+ inline bool trim(lsn_t start_lsn);
+ /** Ignore any earlier redo log records for this page. */
+ inline void will_not_read();
+};
+
+/** A page initialization operation that was parsed from the redo log */
+struct recv_init
+{
+ /** log sequence number of the page initialization */
+ lsn_t lsn;
+ /** Whether btr_page_create() avoided a read of the page.
+ At the end of the last recovery batch, mark_ibuf_exist()
+ will mark pages for which this flag is set. */
+ bool created;
+};
+
+/** Recovery system data structure */
+struct recv_sys_t
+{
+ using init= recv_init;
+
+ /** mutex protecting this as well as some of page_recv_t */
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex;
+private:
+ /** set when finding a corrupt log block or record, or there is a
+ log parsing buffer overflow */
+ bool found_corrupt_log;
+ /** set when an inconsistency with the file system contents is detected
+ during log scan or apply */
+ bool found_corrupt_fs;
+public:
+ /** @return maximum guaranteed size of a mini-transaction on recovery */
+ static constexpr size_t MTR_SIZE_MAX{1U << 20};
+
+ /** whether we are applying redo log records during crash recovery */
+ bool recovery_on;
+ /** whether recv_recover_page(), invoked from buf_page_t::read_complete(),
+ should apply log records*/
+ bool apply_log_recs;
+ /** number of bytes in log_sys.buf */
+ size_t len;
+ /** start offset of non-parsed log records in log_sys.buf */
+ size_t offset;
+ /** log sequence number of the first non-parsed record */
+ lsn_t lsn;
+ /** log sequence number of the last parsed mini-transaction */
+ lsn_t scanned_lsn;
+ /** log sequence number at the end of the FILE_CHECKPOINT record, or 0 */
+ lsn_t file_checkpoint;
+ /** the time when progress was last reported */
+ time_t progress_time;
+
+ using map = std::map<const page_id_t, page_recv_t,
+ std::less<const page_id_t>,
+ ut_allocator<std::pair<const page_id_t, page_recv_t>>>;
+ /** buffered records waiting to be applied to pages */
+ map pages;
+
+private:
+ /** iterator to pages, used by parse() */
+ map::iterator pages_it;
+
+ /** Process a record that indicates that a tablespace size is being shrunk.
+ @param page_id first page that is not in the file
+ @param lsn log sequence number of the shrink operation */
+ inline void trim(const page_id_t page_id, lsn_t lsn);
+
+ /** Undo tablespaces for which truncate has been logged
+ (indexed by page_id_t::space() - srv_undo_space_id_start) */
+ struct trunc
+ {
+ /** log sequence number of FILE_CREATE, or 0 if none */
+ lsn_t lsn;
+ /** truncated size of the tablespace, or 0 if not truncated */
+ unsigned pages;
+ } truncated_undo_spaces[127];
+
+public:
+ /** The contents of the doublewrite buffer */
+ recv_dblwr_t dblwr;
+
+ __attribute__((warn_unused_result))
+ inline dberr_t read(os_offset_t offset, span<byte> buf);
+ inline size_t files_size();
+ void close_files();
+
+ /** Advance pages_it if it matches the iterator */
+ void pages_it_invalidate(const map::iterator &p)
+ {
+ mysql_mutex_assert_owner(&mutex);
+ if (pages_it == p)
+ pages_it++;
+ }
+ /** Invalidate pages_it if it points to the given tablespace */
+ void pages_it_invalidate(uint32_t space_id)
+ {
+ mysql_mutex_assert_owner(&mutex);
+ if (pages_it != pages.end() && pages_it->first.space() == space_id)
+ pages_it= pages.end();
+ }
+
+private:
+ /** Attempt to initialize a page based on redo log records.
+ @param p iterator
+ @param mtr mini-transaction
+ @param b pre-allocated buffer pool block
+ @param init page initialization
+ @return the recovered block
+ @retval nullptr if the page cannot be initialized based on log records
+ @retval -1 if the page cannot be recovered due to corruption */
+ inline buf_block_t *recover_low(const map::iterator &p, mtr_t &mtr,
+ buf_block_t *b, init &init);
+ /** Attempt to initialize a page based on redo log records.
+ @param page_id page identifier
+ @return the recovered block
+ @retval nullptr if the page cannot be initialized based on log records
+ @retval -1 if the page cannot be recovered due to corruption */
+ ATTRIBUTE_COLD buf_block_t *recover_low(const page_id_t page_id);
+
+ /** All found log files (multiple ones are possible if we are upgrading
+ from before MariaDB Server 10.5.1) */
+ std::vector<log_file_t> files;
+
+ /** Base node of the redo block list.
+ List elements are linked via buf_block_t::unzip_LRU. */
+ UT_LIST_BASE_NODE_T(buf_block_t) blocks;
+
+ /** Allocate a block from the buffer pool for recv_sys.pages */
+ ATTRIBUTE_COLD buf_block_t *add_block();
+
+ /** Wait for buffer pool to become available.
+ @param pages number of buffer pool pages needed */
+ ATTRIBUTE_COLD void wait_for_pool(size_t pages);
+
+ /** Free log for processed pages. */
+ void garbage_collect();
+
+ /** Apply a recovery batch.
+ @param space_id current tablespace identifier
+ @param space current tablespace
+ @param free_block spare buffer block
+ @param last_batch whether it is possible to write more redo log
+ @return whether the caller must provide a new free_block */
+ bool apply_batch(uint32_t space_id, fil_space_t *&space,
+ buf_block_t *&free_block, bool last_batch);
+
+public:
+ /** Apply buffered log to persistent data pages.
+ @param last_batch whether it is possible to write more redo log */
+ void apply(bool last_batch);
+
+#ifdef UNIV_DEBUG
+ /** whether all redo log in the current batch has been applied */
+ bool after_apply= false;
+#endif
+ /** Initialize the redo log recovery subsystem. */
+ void create();
+
+ /** Free most recovery data structures. */
+ void debug_free();
+
+ /** Clean up after create() */
+ void close();
+
+ bool is_initialised() const { return scanned_lsn != 0; }
+
+ /** Find the latest checkpoint.
+ @return error code or DB_SUCCESS */
+ dberr_t find_checkpoint();
+
+ /** Register a redo log snippet for a page.
+ @param it page iterator
+ @param start_lsn start LSN of the mini-transaction
+ @param lsn @see mtr_t::commit_lsn()
+ @param l redo log snippet
+ @param len length of l, in bytes
+ @return whether we ran out of memory */
+ bool add(map::iterator it, lsn_t start_lsn, lsn_t lsn,
+ const byte *l, size_t len);
+
+ /** Parsing result */
+ enum parse_mtr_result {
+ /** a record was successfully parsed */
+ OK,
+ /** the log ended prematurely (need to read more) */
+ PREMATURE_EOF,
+ /** the end of the log was reached */
+ GOT_EOF,
+ /** parse<true>(l, false) ran out of memory */
+ GOT_OOM
+ };
+
+private:
+ /** Parse and register one log_t::FORMAT_10_8 mini-transaction.
+ @tparam store whether to store the records
+ @param l log data source
+ @param if_exists if store: whether to check if the tablespace exists */
+ template<typename source,bool store>
+ inline parse_mtr_result parse(source &l, bool if_exists) noexcept;
+
+ /** Rewind a mini-transaction when parse() runs out of memory.
+ @param l log data source
+ @param begin start of the mini-transaction */
+ template<typename source>
+ ATTRIBUTE_COLD void rewind(source &l, source &begin) noexcept;
+
+ /** Report progress in terms of LSN or pages remaining */
+ ATTRIBUTE_COLD void report_progress() const;
+public:
+ /** Parse and register one log_t::FORMAT_10_8 mini-transaction,
+ handling log_sys.is_pmem() buffer wrap-around.
+ @tparam store whether to store the records
+ @param if_exists if store: whether to check if the tablespace exists */
+ template<bool store>
+ static parse_mtr_result parse_mtr(bool if_exists) noexcept;
+
+ /** Parse and register one log_t::FORMAT_10_8 mini-transaction,
+ handling log_sys.is_pmem() buffer wrap-around.
+ @tparam store whether to store the records
+ @param if_exists if store: whether to check if the tablespace exists */
+ template<bool store>
+ static parse_mtr_result parse_pmem(bool if_exists) noexcept
+#ifdef HAVE_PMEM
+ ;
+#else
+ { return parse_mtr<store>(if_exists); }
+#endif
+
+ /** Erase log records for a page. */
+ void erase(map::iterator p);
+
+ /** Clear a fully processed set of stored redo log records. */
+ void clear();
+
+ /** Determine whether redo log recovery progress should be reported.
+ @param time the current time
+ @return whether progress should be reported
+ (the last report was at least 15 seconds ago) */
+ bool report(time_t time);
+
+ /** The alloc() memory alignment, in bytes */
+ static constexpr size_t ALIGNMENT= sizeof(size_t);
+
+ /** Free a redo log snippet.
+ @param data buffer allocated in add() */
+ inline void free(const void *data);
+
+ /** Remove records for a corrupted page.
+ This function should only be called when innodb_force_recovery is set.
+ @param page_id corrupted page identifier */
+ ATTRIBUTE_COLD void free_corrupted_page(page_id_t page_id);
+
+ /** Flag data file corruption during recovery. */
+ ATTRIBUTE_COLD void set_corrupt_fs();
+ /** Flag log file corruption during recovery. */
+ ATTRIBUTE_COLD void set_corrupt_log();
+
+ /** @return whether data file corruption was found */
+ bool is_corrupt_fs() const { return UNIV_UNLIKELY(found_corrupt_fs); }
+ /** @return whether log file corruption was found */
+ bool is_corrupt_log() const { return UNIV_UNLIKELY(found_corrupt_log); }
+
+ /** Attempt to initialize a page based on redo log records.
+ @param page_id page identifier
+ @return the recovered block
+ @retval nullptr if the page cannot be initialized based on log records
+ @retval -1 if the page cannot be recovered due to corruption */
+ buf_block_t *recover(const page_id_t page_id)
+ {
+ return UNIV_UNLIKELY(recovery_on) ? recover_low(page_id) : nullptr;
+ }
+
+ /** Try to recover a tablespace that was not readable earlier
+ @param p iterator
+ @param name tablespace file name
+ @param free_block spare buffer block
+ @return recovered tablespace
+ @retval nullptr if recovery failed */
+ fil_space_t *recover_deferred(const map::iterator &p,
+ const std::string &name,
+ buf_block_t *&free_block);
+};
+
+/** The recovery system */
+extern recv_sys_t recv_sys;
+
+/** If the following is TRUE, the buffer pool file pages must be invalidated
+after recovery and no ibuf operations are allowed; this will be set if
+recv_sys.pages becomes too full, and log records must be merged
+to file pages already before the recovery is finished: in this case no
+ibuf operations are allowed, as they could modify the pages read in the
+buffer pool before the pages have been recovered to the up-to-date state.
+
+TRUE means that recovery is running and no operations on the log files
+are allowed yet: the variable name is misleading. */
+extern bool recv_no_ibuf_operations;
+/** TRUE when recv_init_crash_recovery() has been called. */
+extern bool recv_needed_recovery;
+#ifdef UNIV_DEBUG
+/** whether writing to the redo log is forbidden;
+protected by exclusive log_sys.latch. */
+extern bool recv_no_log_write;
+#endif /* UNIV_DEBUG */
+
+/** TRUE if buf_page_is_corrupted() should check if the log sequence
+number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
+recv_recovery_from_checkpoint_start(). */
+extern bool recv_lsn_checks_on;