diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
commit | 3f619478f796eddbba6e39502fe941b285dd97b1 (patch) | |
tree | e2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/row/row0uins.cc | |
parent | Initial commit. (diff) | |
download | mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.tar.xz mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.zip |
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/row/row0uins.cc')
-rw-r--r-- | storage/innobase/row/row0uins.cc | 652 |
1 files changed, 652 insertions, 0 deletions
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc new file mode 100644 index 00000000..23255cc9 --- /dev/null +++ b/storage/innobase/row/row0uins.cc @@ -0,0 +1,652 @@ +/***************************************************************************** + +Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, 2023, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file row/row0uins.cc +Fresh insert undo + +Created 2/25/1997 Heikki Tuuri +*******************************************************/ + +#include "row0uins.h" +#include "dict0dict.h" +#include "dict0stats.h" +#include "dict0boot.h" +#include "dict0crea.h" +#include "trx0undo.h" +#include "trx0roll.h" +#include "btr0btr.h" +#include "mach0data.h" +#include "row0undo.h" +#include "row0vers.h" +#include "trx0trx.h" +#include "trx0rec.h" +#include "row0row.h" +#include "row0upd.h" +#include "que0que.h" +#include "ibuf0ibuf.h" +#include "log0log.h" +#include "fil0fil.h" +#include <mysql/service_thd_mdl.h> + +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + +/***************************************************************//** +Removes a clustered index record. The pcur in node was positioned on the +record, now it is detached. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_ins_remove_clust_rec( +/*==========================*/ + undo_node_t* node) /*!< in: undo node */ +{ + dberr_t err; + ulint n_tries = 0; + mtr_t mtr; + dict_index_t* index = node->pcur.index(); + table_id_t table_id = 0; + const bool dict_locked = node->trx->dict_operation_lock_mode; +restart: + MDL_ticket* mdl_ticket = nullptr; + ut_ad(!table_id || dict_locked + || !node->trx->dict_operation_lock_mode); + dict_table_t *table = table_id + ? dict_table_open_on_id(table_id, dict_locked, + DICT_TABLE_OP_OPEN_ONLY_IF_CACHED, + node->trx->mysql_thd, &mdl_ticket) + : nullptr; + + ut_ad(index->is_primary()); + ut_ad(node->trx->in_rollback); + + mtr.start(); + if (index->table->is_temporary()) { + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); + mtr.set_log_mode(MTR_LOG_NO_REDO); + ut_ad(index->table->id >= DICT_HDR_FIRST_ID); + } else { + index->set_modified(mtr); + ut_ad(lock_table_has_locks(index->table)); + } + + /* This is similar to row_undo_mod_clust(). The DDL thread may + already have copied this row from the log to the new table. + We must log the removal, so that the row will be correctly + purged. However, we can log the removal out of sync with the + B-tree modification. */ + ut_a(node->pcur.restore_position( + (node->rec_type == TRX_UNDO_INSERT_METADATA) + ? BTR_MODIFY_TREE + : BTR_MODIFY_LEAF, + &mtr) == btr_pcur_t::SAME_ALL); + rec_t* rec = btr_pcur_get_rec(&node->pcur); + + ut_ad(rec_get_trx_id(rec, index) == node->trx->id + || node->table->is_temporary()); + ut_ad(!rec_get_deleted_flag(rec, index->table->not_redundant()) + || rec_is_alter_metadata(rec, index->table->not_redundant())); + ut_ad(rec_is_metadata(rec, index->table->not_redundant()) + == (node->rec_type == TRX_UNDO_INSERT_METADATA)); + + switch (node->table->id) { + case DICT_COLUMNS_ID: + /* This is rolling back an INSERT into SYS_COLUMNS. + If it was part of an instant ALTER TABLE operation, we + must evict the table definition, so that it can be + reloaded after the dictionary operation has been + completed. At this point, any corresponding operation + to the metadata record will have been rolled back. */ + ut_ad(node->trx->dict_operation_lock_mode); + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); + if (rec_get_n_fields_old(rec) + != DICT_NUM_FIELDS__SYS_COLUMNS + || (rec_get_1byte_offs_flag(rec) + ? rec_1_get_field_end_info(rec, 0) != 8 + : rec_2_get_field_end_info(rec, 0) != 8)) { + break; + } + static_assert(!DICT_FLD__SYS_COLUMNS__TABLE_ID, ""); + node->trx->evict_table(mach_read_from_8(rec)); + break; + case DICT_INDEXES_ID: + ut_ad(node->trx->dict_operation_lock_mode); + ut_ad(node->rec_type == TRX_UNDO_INSERT_REC); + if (!table_id) { + table_id = mach_read_from_8(rec); + if (table_id) { + mtr.commit(); + goto restart; + } + ut_ad("corrupted SYS_INDEXES record" == 0); + } + + pfs_os_file_t d = OS_FILE_CLOSED; + + const uint32_t space_id = dict_drop_index_tree( + &node->pcur, node->trx, &mtr); + if (space_id) { + if (table) { + lock_release_on_rollback(node->trx, + table); + if (!dict_locked) { + dict_sys.lock(SRW_LOCK_CALL); + } + if (table->release()) { + dict_sys.remove(table); + } else if (table->space_id + == space_id) { + table->space = nullptr; + table->file_unreadable = true; + } + if (!dict_locked) { + dict_sys.unlock(); + } + table = nullptr; + if (!mdl_ticket); + else if (MDL_context* mdl_context = + static_cast<MDL_context*>( + thd_mdl_context( + node->trx-> + mysql_thd))) { + mdl_context->release_lock( + mdl_ticket); + mdl_ticket = nullptr; + } + } + + d = fil_delete_tablespace(space_id); + } + + mtr.commit(); + + if (d != OS_FILE_CLOSED) { + os_file_close(d); + } + + if (space_id) { + ibuf_delete_for_discarded_space(space_id); + } + + mtr.start(); + ut_a(node->pcur.restore_position( + BTR_MODIFY_LEAF, &mtr) == btr_pcur_t::SAME_ALL); + } + + err = btr_cur_optimistic_delete(&node->pcur.btr_cur, 0, &mtr); + + if (err != DB_FAIL) { + goto func_exit; + } + + btr_pcur_commit_specify_mtr(&node->pcur, &mtr); +retry: + /* If did not succeed, try pessimistic descent to tree */ + mtr.start(); + if (index->table->is_temporary()) { + mtr.set_log_mode(MTR_LOG_NO_REDO); + } else { + index->set_modified(mtr); + } + ut_a(node->pcur.restore_position(BTR_PURGE_TREE, &mtr) + == btr_pcur_t::SAME_ALL); + + btr_cur_pessimistic_delete(&err, FALSE, &node->pcur.btr_cur, 0, true, + &mtr); + + /* The delete operation may fail if we have little + file space left: TODO: easiest to crash the database + and restart with more file space */ + + if (err == DB_OUT_OF_FILE_SPACE + && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { + + btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); + + n_tries++; + + std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME); + + goto retry; + } + +func_exit: + if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_METADATA) { + /* When rolling back the very first instant ADD COLUMN + operation, reset the root page to the basic state. */ + btr_reset_instant(*index, true, &mtr); + } + + btr_pcur_commit_specify_mtr(&node->pcur, &mtr); + + if (UNIV_LIKELY_NULL(table)) { + dict_table_close(table, dict_locked, + node->trx->mysql_thd, mdl_ticket); + } + + return(err); +} + +/***************************************************************//** +Removes a secondary index entry if found. +@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_ins_remove_sec_low( +/*========================*/ + btr_latch_mode mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, + depending on whether we wish optimistic or + pessimistic descent down the index tree */ + dict_index_t* index, /*!< in: index */ + dtuple_t* entry, /*!< in: index entry to remove */ + que_thr_t* thr) /*!< in: query thread */ +{ + btr_pcur_t pcur; + dberr_t err = DB_SUCCESS; + mtr_t mtr; + const bool modify_leaf = mode == BTR_MODIFY_LEAF; + + pcur.btr_cur.page_cur.index = index; + row_mtr_start(&mtr, index, !modify_leaf); + + if (index->is_spatial()) { + mode = modify_leaf + ? btr_latch_mode(BTR_MODIFY_LEAF + | BTR_RTREE_DELETE_MARK + | BTR_RTREE_UNDO_INS) + : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); + btr_pcur_get_btr_cur(&pcur)->thr = thr; + if (rtr_search(entry, mode, &pcur, &mtr)) { + goto func_exit; + } + + if (rec_get_deleted_flag( + btr_pcur_get_rec(&pcur), + dict_table_is_comp(index->table))) { + ib::error() << "Record found in index " << index->name + << " is deleted marked on insert rollback."; + ut_ad(0); + } + goto found; + } else if (modify_leaf) { + mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; + mtr_s_lock_index(index, &mtr); + } else { + ut_ad(mode == BTR_PURGE_TREE); + mode = BTR_PURGE_TREE_ALREADY_LATCHED; + mtr_x_lock_index(index, &mtr); + } + + switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { + case ROW_BUFFERED: + case ROW_NOT_DELETED_REF: + /* These are invalid outcomes, because the mode passed + to row_search_index_entry() did not include any of the + flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ + ut_error; + case ROW_NOT_FOUND: + break; + case ROW_FOUND: + found: + btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); + + if (modify_leaf) { + err = btr_cur_optimistic_delete(btr_cur, 0, &mtr); + } else { + /* Passing rollback=false here, because we are + deleting a secondary index record: the distinction + only matters when deleting a record that contains + externally stored columns. */ + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, + false, &mtr); + } + } + +func_exit: + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + return(err); +} + +/***************************************************************//** +Removes a secondary index entry from the index if found. Tries first +optimistic, then pessimistic descent down the tree. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_ins_remove_sec( +/*====================*/ + dict_index_t* index, /*!< in: index */ + dtuple_t* entry, /*!< in: index entry to insert */ + que_thr_t* thr) /*!< in: query thread */ +{ + dberr_t err; + ulint n_tries = 0; + + /* Try first optimistic descent to the B-tree */ + + err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr); + + if (err == DB_SUCCESS) { + + return(err); + } + + /* Try then pessimistic descent to the B-tree */ +retry: + err = row_undo_ins_remove_sec_low(BTR_PURGE_TREE, index, entry, thr); + + /* The delete operation may fail if we have little + file space left: TODO: easiest to crash the database + and restart with more file space */ + + if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { + + n_tries++; + + std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME); + + goto retry; + } + + return(err); +} + +/** Parse an insert undo record. +@param[in,out] node row rollback state +@param[in] dict_locked whether the data dictionary cache is locked */ +static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) +{ + dict_index_t* clust_index; + const byte* ptr; + undo_no_t undo_no; + table_id_t table_id; + byte dummy; + bool dummy_extern; + + ut_ad(node->trx->in_rollback); + ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr)); + + ptr = trx_undo_rec_get_pars(node->undo_rec, &node->rec_type, &dummy, + &dummy_extern, &undo_no, &table_id); + + node->update = NULL; + if (!node->is_temp) { + node->table = dict_table_open_on_id(table_id, dict_locked, + DICT_TABLE_OP_NORMAL); + } else if (!dict_locked) { + dict_sys.freeze(SRW_LOCK_CALL); + node->table = dict_sys.acquire_temporary_table(table_id); + dict_sys.unfreeze(); + } else { + node->table = dict_sys.acquire_temporary_table(table_id); + } + + if (!node->table) { + return false; + } + + switch (node->rec_type) { + default: + ut_ad("wrong undo record type" == 0); + goto close_table; + case TRX_UNDO_INSERT_METADATA: + case TRX_UNDO_INSERT_REC: + case TRX_UNDO_EMPTY: + break; + case TRX_UNDO_RENAME_TABLE: + dict_table_t* table = node->table; + ut_ad(!table->is_temporary()); + ut_ad(table->file_unreadable + || dict_table_is_file_per_table(table) + == !is_system_tablespace(table->space_id)); + size_t len = mach_read_from_2(node->undo_rec) + - page_offset(ptr) - 2; + const span<const char> name(reinterpret_cast<const char*>(ptr), + len); + if (strlen(table->name.m_name) != len + || memcmp(table->name.m_name, ptr, len)) { + dict_table_rename_in_cache(table, name, true); + } else if (table->space && table->space->id) { + const auto s = table->space->name(); + if (len != s.size() || memcmp(ptr, s.data(), len)) { + table->rename_tablespace(name, true); + } + } + goto close_table; + } + + if (UNIV_UNLIKELY(!node->table->is_accessible())) { +close_table: + /* Normally, tables should not disappear or become + unaccessible during ROLLBACK, because they should be + protected by InnoDB table locks. Corruption could be + a valid exception. + + FIXME: When running out of temporary tablespace, it + would probably be better to just drop all temporary + tables (and temporary undo log records) of the current + connection, instead of doing this rollback. */ + dict_table_close(node->table, dict_locked); + node->table = NULL; + return false; + } else { + ut_ad(!node->table->skip_alter_undo); + clust_index = dict_table_get_first_index(node->table); + + if (clust_index != NULL) { + switch (node->rec_type) { + case TRX_UNDO_INSERT_REC: + ptr = trx_undo_rec_get_row_ref( + ptr, clust_index, &node->ref, + node->heap); + break; + case TRX_UNDO_EMPTY: + node->ref = nullptr; + return true; + default: + node->ref = &trx_undo_metadata; + if (!row_undo_search_clust_to_pcur(node)) { + /* An error probably occurred during + an insert into the clustered index, + after we wrote the undo log record. */ + goto close_table; + } + return true; + } + + if (!row_undo_search_clust_to_pcur(node)) { + /* An error probably occurred during + an insert into the clustered index, + after we wrote the undo log record. */ + goto close_table; + } + if (node->table->n_v_cols) { + trx_undo_read_v_cols(node->table, ptr, + node->row, false); + } + + } else { + ib::warn() << "Table " << node->table->name + << " has no indexes," + " ignoring the table"; + goto close_table; + } + } + + return true; +} + +/***************************************************************//** +Removes secondary index records. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_ins_remove_sec_rec( +/*========================*/ + undo_node_t* node, /*!< in/out: row undo node */ + que_thr_t* thr) /*!< in: query thread */ +{ + dberr_t err = DB_SUCCESS; + dict_index_t* index; + mem_heap_t* heap; + + heap = mem_heap_create(1024); + + for (index = node->index; index; + index = dict_table_get_next_index(index)) { + if (index->type & (DICT_FTS | DICT_CORRUPT) + || !index->is_committed()) { + continue; + } + + /* An insert undo record TRX_UNDO_INSERT_REC will + always contain all fields of the index. It does not + matter if any indexes were created afterwards; all + index entries can be reconstructed from the row. */ + dtuple_t* entry = row_build_index_entry( + node->row, node->ext, index, heap); + if (UNIV_UNLIKELY(!entry)) { + /* The database must have crashed after + inserting a clustered index record but before + writing all the externally stored columns of + that record, or a statement is being rolled + back because an error occurred while storing + off-page columns. + + Because secondary index entries are inserted + after the clustered index record, we may + assume that the secondary index record does + not exist. */ + } else { + err = row_undo_ins_remove_sec(index, entry, thr); + + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + goto func_exit; + } + } + + mem_heap_empty(heap); + } + +func_exit: + node->index = index; + mem_heap_free(heap); + return(err); +} + +/***********************************************************//** +Undoes a fresh insert of a row to a table. A fresh insert means that +the same clustered index unique key did not have any record, even delete +marked, at the time of the insert. InnoDB is eager in a rollback: +if it figures out that an index record will be removed in the purge +anyway, it will remove it in the rollback. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +dberr_t +row_undo_ins( +/*=========*/ + undo_node_t* node, /*!< in: row undo node */ + que_thr_t* thr) /*!< in: query thread */ +{ + dberr_t err; + const bool dict_locked = node->trx->dict_operation_lock_mode; + + if (!row_undo_ins_parse_undo_rec(node, dict_locked)) { + return DB_SUCCESS; + } + + ut_ad(node->table->is_temporary() + || lock_table_has_locks(node->table)); + + /* Iterate over all the indexes and undo the insert.*/ + + node->index = dict_table_get_first_index(node->table); + ut_ad(dict_index_is_clust(node->index)); + + switch (node->rec_type) { + default: + ut_ad("wrong undo record type" == 0); + /* fall through */ + case TRX_UNDO_INSERT_REC: + /* Skip the clustered index (the first index) */ + node->index = dict_table_get_next_index(node->index); + + err = row_undo_ins_remove_sec_rec(node, thr); + + if (err != DB_SUCCESS) { + break; + } + + log_free_check(); + + if (!dict_locked && node->table->id == DICT_INDEXES_ID) { + dict_sys.lock(SRW_LOCK_CALL); + err = row_undo_ins_remove_clust_rec(node); + dict_sys.unlock(); + } else { + ut_ad(node->table->id != DICT_INDEXES_ID + || !node->table->is_temporary()); + err = row_undo_ins_remove_clust_rec(node); + } + + if (err == DB_SUCCESS && node->table->stat_initialized) { + /* Not protected by dict_sys.latch + or table->stats_mutex_lock() for + performance reasons, we would rather get garbage + in stat_n_rows (which is just an estimate anyway) + than protecting the following code with a latch. */ + dict_table_n_rows_dec(node->table); + + /* Do not attempt to update statistics when + executing ROLLBACK in the InnoDB SQL + interpreter, because in that case we would + already be holding dict_sys.latch, which + would be acquired when updating statistics. */ + if (!dict_locked) { + dict_stats_update_if_needed(node->table, + *node->trx); + } + } + break; + + case TRX_UNDO_INSERT_METADATA: + log_free_check(); + ut_ad(!node->table->is_temporary()); + err = row_undo_ins_remove_clust_rec(node); + break; + case TRX_UNDO_EMPTY: + err = node->table->clear(thr); + break; + } + + dict_table_close(node->table, dict_locked); + + node->table = NULL; + + return(err); +} |