diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
commit | 3f619478f796eddbba6e39502fe941b285dd97b1 (patch) | |
tree | e2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/row/row0umod.cc | |
parent | Initial commit. (diff) | |
download | mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.tar.xz mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.zip |
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/row/row0umod.cc')
-rw-r--r-- | storage/innobase/row/row0umod.cc | 1288 |
1 files changed, 1288 insertions, 0 deletions
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc new file mode 100644 index 00000000..a01eaea5 --- /dev/null +++ b/storage/innobase/row/row0umod.cc @@ -0,0 +1,1288 @@ +/***************************************************************************** + +Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, 2023, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file row/row0umod.cc +Undo modify of a row + +Created 2/27/1997 Heikki Tuuri +*******************************************************/ + +#include "row0umod.h" +#include "dict0dict.h" +#include "dict0stats.h" +#include "dict0boot.h" +#include "trx0undo.h" +#include "trx0roll.h" +#include "trx0purge.h" +#include "btr0btr.h" +#include "mach0data.h" +#include "ibuf0ibuf.h" +#include "row0undo.h" +#include "row0vers.h" +#include "trx0trx.h" +#include "trx0rec.h" +#include "row0row.h" +#include "row0upd.h" +#include "que0que.h" +#include "log0log.h" + +/* Considerations on undoing a modify operation. +(1) Undoing a delete marking: all index records should be found. Some of +them may have delete mark already FALSE, if the delete mark operation was +stopped underway, or if the undo operation ended prematurely because of a +system crash. +(2) Undoing an update of a delete unmarked record: the newer version of +an updated secondary index entry should be removed if no prior version +of the clustered index record requires its existence. Otherwise, it should +be delete marked. +(3) Undoing an update of a delete marked record. In this kind of update a +delete marked clustered index record was delete unmarked and possibly also +some of its fields were changed. Now, it is possible that the delete marked +version has become obsolete at the time the undo is started. */ + +/************************************************************************* +IMPORTANT NOTE: Any operation that generates redo MUST check that there +is enough space in the redo log before for that operation. This is +done by calling log_free_check(). The reason for checking the +availability of the redo log space before the start of the operation is +that we MUST not hold any synchonization objects when performing the +check. +If you make a change in this module make sure that no codepath is +introduced where a call to log_free_check() is bypassed. */ + +/***********************************************************//** +Undoes a modify in a clustered index record. +@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_mod_clust_low( +/*===================*/ + undo_node_t* node, /*!< in: row undo node */ + rec_offs** offsets,/*!< out: rec_get_offsets() on the record */ + mem_heap_t** offsets_heap, + /*!< in/out: memory heap that can be emptied */ + mem_heap_t* heap, /*!< in/out: memory heap */ + byte* sys, /*!< out: DB_TRX_ID, DB_ROLL_PTR + for row_log_table_delete() */ + que_thr_t* thr, /*!< in: query thread */ + mtr_t* mtr, /*!< in: mtr; must be committed before + latching any further pages */ + btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ +{ + btr_pcur_t* pcur; + btr_cur_t* btr_cur; + dberr_t err; + + pcur = &node->pcur; + btr_cur = btr_pcur_get_btr_cur(pcur); + + if (pcur->restore_position(mode, mtr) != btr_pcur_t::SAME_ALL) { + return DB_CORRUPTION; + } + + ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), + btr_cur_get_index(btr_cur)) + == thr_get_trx(thr)->id + || btr_cur_get_index(btr_cur)->table->is_temporary()); + ut_ad(node->ref != &trx_undo_metadata + || node->update->info_bits == REC_INFO_METADATA_ADD + || node->update->info_bits == REC_INFO_METADATA_ALTER); + + if (mode != BTR_MODIFY_TREE) { + ut_ad(mode == BTR_MODIFY_LEAF + || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED); + + err = btr_cur_optimistic_update( + BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG + | BTR_KEEP_SYS_FLAG, + btr_cur, offsets, offsets_heap, + node->update, node->cmpl_info, + thr, thr_get_trx(thr)->id, mtr); + ut_ad(err != DB_SUCCESS || node->ref != &trx_undo_metadata); + } else { + big_rec_t* dummy_big_rec; + + err = btr_cur_pessimistic_update( + BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG + | BTR_KEEP_SYS_FLAG, + btr_cur, offsets, offsets_heap, heap, + &dummy_big_rec, node->update, + node->cmpl_info, thr, thr_get_trx(thr)->id, mtr); + + ut_a(!dummy_big_rec); + + if (err == DB_SUCCESS + && node->ref == &trx_undo_metadata + && btr_cur_get_index(btr_cur)->table->instant + && node->update->info_bits == REC_INFO_METADATA_ADD) { + btr_reset_instant(*btr_cur->index(), false, mtr); + } + } + + if (err != DB_SUCCESS) { + return err; + } + + switch (const auto id = btr_cur_get_index(btr_cur)->table->id) { + unsigned c; + case DICT_TABLES_ID: + if (node->trx != trx_roll_crash_recv_trx) { + break; + } + c = DICT_COL__SYS_TABLES__ID; + goto evict; + case DICT_INDEXES_ID: + if (node->trx != trx_roll_crash_recv_trx) { + break; + } else if (node->rec_type == TRX_UNDO_DEL_MARK_REC + && btr_cur_get_rec(btr_cur) + [8 + 8 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN] + == static_cast<byte>(*TEMP_INDEX_PREFIX_STR)) { + /* We are rolling back the DELETE of metadata + for a failed ADD INDEX operation. This does + not affect any cached table definition, + because we are filtering out such indexes in + dict_load_indexes(). */ + break; + } + /* fall through */ + case DICT_COLUMNS_ID: + static_assert(!DICT_COL__SYS_INDEXES__TABLE_ID, ""); + static_assert(!DICT_COL__SYS_COLUMNS__TABLE_ID, ""); + c = DICT_COL__SYS_COLUMNS__TABLE_ID; + /* This is rolling back an UPDATE or DELETE on SYS_COLUMNS. + If it was part of an instant ALTER TABLE operation, we + must evict the table definition, so that it can be + reloaded after the dictionary operation has been + completed. At this point, any corresponding operation + to the metadata record will have been rolled back. */ + evict: + const dfield_t& table_id = *dtuple_get_nth_field(node->row, c); + ut_ad(dfield_get_len(&table_id) == 8); + node->trx->evict_table(mach_read_from_8( + static_cast<byte*>( + table_id.data)), + id == DICT_COLUMNS_ID); + } + + return DB_SUCCESS; +} + +/** Get the byte offset of the DB_TRX_ID column +@param[in] rec clustered index record +@param[in] index clustered index +@return the byte offset of DB_TRX_ID, from the start of rec */ +static ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index) +{ + ut_ad(index->n_uniq <= MAX_REF_PARTS); + ulint trx_id_offset = index->trx_id_offset; + if (!trx_id_offset) { + /* Reserve enough offsets for the PRIMARY KEY and 2 columns + so that we can access DB_TRX_ID, DB_ROLL_PTR. */ + rec_offs offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2]; + rec_offs_init(offsets_); + mem_heap_t* heap = NULL; + const ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1; + rec_offs* offsets = rec_get_offsets(rec, index, offsets_, + index->n_core_fields, + trx_id_pos + 1, &heap); + ut_ad(!heap); + ulint len; + trx_id_offset = rec_get_nth_field_offs( + offsets, trx_id_pos, &len); + ut_ad(len == DATA_TRX_ID_LEN); + } + + return trx_id_offset; +} + +/** Determine if rollback must execute a purge-like operation. +@param node row undo +@return whether the record should be purged */ +static bool row_undo_mod_must_purge(const undo_node_t &node) +{ + ut_ad(node.rec_type == TRX_UNDO_UPD_DEL_REC); + ut_ad(!node.table->is_temporary()); + + const btr_cur_t &btr_cur= node.pcur.btr_cur; + ut_ad(btr_cur.index()->is_primary()); + DEBUG_SYNC_C("rollback_purge_clust"); + + if (!purge_sys.is_purgeable(node.new_trx_id)) + return false; + + const rec_t *rec= btr_cur_get_rec(&btr_cur); + return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur.index())) == + node.new_trx_id; +} + +/***********************************************************//** +Undoes a modify in a clustered index record. Sets also the node state for the +next round of undo. +@return DB_SUCCESS or error code: we may run out of file space */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_mod_clust( +/*===============*/ + undo_node_t* node, /*!< in: row undo node */ + que_thr_t* thr) /*!< in: query thread */ +{ + btr_pcur_t* pcur; + mtr_t mtr; + dberr_t err; + dict_index_t* index; + + ut_ad(thr_get_trx(thr) == node->trx); + ut_ad(node->trx->in_rollback); + + log_free_check(); + pcur = &node->pcur; + index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur)); + ut_ad(index->is_primary()); + + mtr.start(); + if (index->table->is_temporary()) { + mtr.set_log_mode(MTR_LOG_NO_REDO); + } else { + index->set_modified(mtr); + ut_ad(lock_table_has_locks(index->table)); + } + + mem_heap_t* heap = mem_heap_create(1024); + mem_heap_t* offsets_heap = NULL; + rec_offs* offsets = NULL; + byte sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN]; + + /* Try optimistic processing of the record, keeping changes within + the index page */ + + err = row_undo_mod_clust_low(node, &offsets, &offsets_heap, + heap, sys, thr, &mtr, BTR_MODIFY_LEAF); + + if (err != DB_SUCCESS) { + btr_pcur_commit_specify_mtr(pcur, &mtr); + + /* We may have to modify tree structure: do a pessimistic + descent down the index tree */ + + mtr.start(); + if (index->table->is_temporary()) { + mtr.set_log_mode(MTR_LOG_NO_REDO); + } else { + index->set_modified(mtr); + } + + err = row_undo_mod_clust_low(node, &offsets, &offsets_heap, + heap, sys, thr, &mtr, + BTR_MODIFY_TREE); + ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE); + } + + /** + * when scrubbing, and records gets cleared, + * the transaction id is not present afterwards. + * this is safe as: since the record is on free-list + * it can be reallocated at any time after this mtr-commits + * which is just below + */ + ut_ad(srv_immediate_scrub_data_uncompressed + || row_get_rec_trx_id(btr_pcur_get_rec(pcur), index, offsets) + == node->new_trx_id); + + btr_pcur_commit_specify_mtr(pcur, &mtr); + DEBUG_SYNC_C("rollback_undo_pk"); + + if (err != DB_SUCCESS) { + goto func_exit; + } + + /* FIXME: Perform the below operations in the above + mini-transaction when possible. */ + + if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { + /* In delete-marked records, DB_TRX_ID must + always refer to an existing update_undo log record. */ + ut_ad(node->new_trx_id); + + mtr.start(); + if (pcur->restore_position(BTR_MODIFY_LEAF, &mtr) != + btr_pcur_t::SAME_ALL) { + goto mtr_commit_exit; + } + + ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur), + dict_table_is_comp(node->table))); + + if (index->table->is_temporary()) { + mtr.set_log_mode(MTR_LOG_NO_REDO); + err = btr_cur_optimistic_delete(&pcur->btr_cur, 0, + &mtr); + if (err != DB_FAIL) { + goto mtr_commit_exit; + } + err = DB_SUCCESS; + btr_pcur_commit_specify_mtr(pcur, &mtr); + } else { + index->set_modified(mtr); + if (!row_undo_mod_must_purge(*node)) { + goto mtr_commit_exit; + } + err = btr_cur_optimistic_delete(&pcur->btr_cur, 0, + &mtr); + if (err != DB_FAIL) { + goto mtr_commit_exit; + } + err = DB_SUCCESS; + btr_pcur_commit_specify_mtr(pcur, &mtr); + } + + mtr.start(); + if (pcur->restore_position(BTR_PURGE_TREE, &mtr) != + btr_pcur_t::SAME_ALL) { + goto mtr_commit_exit; + } + + ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur), + dict_table_is_comp(node->table))); + + if (index->table->is_temporary()) { + mtr.set_log_mode(MTR_LOG_NO_REDO); + } else { + if (!row_undo_mod_must_purge(*node)) { + goto mtr_commit_exit; + } + index->set_modified(mtr); + } + + /* This operation is analogous to purge, we can free + also inherited externally stored fields. We can also + assume that the record was complete (including BLOBs), + because it had been delete-marked after it had been + completely inserted. Therefore, we are passing + rollback=false, just like purge does. */ + btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0, + false, &mtr); + ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE); + } else if (!index->table->is_temporary() && node->new_trx_id) { + /* We rolled back a record so that it still exists. + We must reset the DB_TRX_ID if the history is no + longer accessible by any active read view. */ + + mtr.start(); + if (pcur->restore_position(BTR_MODIFY_LEAF, &mtr) + != btr_pcur_t::SAME_ALL + || !purge_sys.is_purgeable(node->new_trx_id)) { + goto mtr_commit_exit; + } + + rec_t* rec = btr_pcur_get_rec(pcur); + ulint trx_id_offset = index->trx_id_offset; + ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1; + /* Reserve enough offsets for the PRIMARY KEY and + 2 columns so that we can access DB_TRX_ID, DB_ROLL_PTR. */ + rec_offs offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2]; + if (trx_id_offset) { +#ifdef UNIV_DEBUG + ut_ad(rec_offs_validate(NULL, index, offsets)); + if (buf_block_get_page_zip( + btr_pcur_get_block(&node->pcur))) { + /* Below, page_zip_write_trx_id_and_roll_ptr() + needs offsets to access DB_TRX_ID,DB_ROLL_PTR. + We already computed offsets for possibly + another record in the clustered index. + Because the PRIMARY KEY is fixed-length, + the offsets for the PRIMARY KEY and + DB_TRX_ID,DB_ROLL_PTR are still valid. + Silence the rec_offs_validate() assertion. */ + rec_offs_make_valid(rec, index, true, offsets); + } +#endif + } else if (rec_is_metadata(rec, *index)) { + ut_ad(!buf_block_get_page_zip(btr_pcur_get_block( + pcur))); + for (unsigned i = index->first_user_field(); i--; ) { + trx_id_offset += index->fields[i].fixed_len; + } + } else { + ut_ad(index->n_uniq <= MAX_REF_PARTS); + rec_offs_init(offsets_); + offsets = rec_get_offsets(rec, index, offsets_, + index->n_core_fields, + trx_id_pos + 2, &heap); + ulint len; + trx_id_offset = rec_get_nth_field_offs( + offsets, trx_id_pos, &len); + ut_ad(len == DATA_TRX_ID_LEN); + } + + if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) { + ut_ad(!rec_get_deleted_flag( + rec, dict_table_is_comp(node->table)) + || rec_is_alter_metadata(rec, *index)); + index->set_modified(mtr); + buf_block_t* block = btr_pcur_get_block(pcur); + if (UNIV_LIKELY_NULL(block->page.zip.data)) { + page_zip_write_trx_id_and_roll_ptr( + block, rec, offsets, trx_id_pos, + 0, 1ULL << ROLL_PTR_INSERT_FLAG_POS, + &mtr); + } else { + size_t offs = page_offset(rec + trx_id_offset); + mtr.memset(block, offs, DATA_TRX_ID_LEN, 0); + offs += DATA_TRX_ID_LEN; + mtr.write<1,mtr_t::MAYBE_NOP>(*block, + block->page.frame + + offs, 0x80U); + mtr.memset(block, offs + 1, + DATA_ROLL_PTR_LEN - 1, 0); + } + } + } else { + goto func_exit; + } + +mtr_commit_exit: + btr_pcur_commit_specify_mtr(pcur, &mtr); + +func_exit: + if (offsets_heap) { + mem_heap_free(offsets_heap); + } + mem_heap_free(heap); + return(err); +} + +/***********************************************************//** +Delete marks or removes a secondary index entry if found. +@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_mod_del_mark_or_remove_sec_low( +/*====================================*/ + undo_node_t* node, /*!< in: row undo node */ + que_thr_t* thr, /*!< in: query thread */ + dict_index_t* index, /*!< in: index */ + dtuple_t* entry, /*!< in: index entry */ + btr_latch_mode mode) /*!< in: latch mode BTR_MODIFY_LEAF or + BTR_MODIFY_TREE */ +{ + btr_pcur_t pcur; + btr_cur_t* btr_cur; + dberr_t err = DB_SUCCESS; + mtr_t mtr; + mtr_t mtr_vers; + const bool modify_leaf = mode == BTR_MODIFY_LEAF; + + row_mtr_start(&mtr, index, !modify_leaf); + + pcur.btr_cur.page_cur.index = index; + btr_cur = btr_pcur_get_btr_cur(&pcur); + + if (index->is_spatial()) { + mode = modify_leaf + ? btr_latch_mode(BTR_MODIFY_LEAF + | BTR_RTREE_DELETE_MARK + | BTR_RTREE_UNDO_INS) + : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); + btr_cur->thr = thr; + if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) { + goto found; + } else { + goto func_exit; + } + } else if (!index->is_committed()) { + /* The index->online_status may change if the index is + or was being created online, but not committed yet. It + is protected by index->lock. */ + if (modify_leaf) { + mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; + mtr_s_lock_index(index, &mtr); + } else { + ut_ad(mode == BTR_PURGE_TREE); + mode = BTR_PURGE_TREE_ALREADY_LATCHED; + mtr_x_lock_index(index, &mtr); + } + } else { + /* For secondary indexes, + index->online_status==ONLINE_INDEX_COMPLETE if + index->is_committed(). */ + ut_ad(!dict_index_is_online_ddl(index)); + } + + switch (UNIV_EXPECT(row_search_index_entry(entry, mode, &pcur, &mtr), + ROW_FOUND)) { + case ROW_NOT_FOUND: + /* In crash recovery, the secondary index record may + be missing if the UPDATE did not have time to insert + the secondary index records before the crash. When we + are undoing that UPDATE in crash recovery, the record + may be missing. + + In normal processing, if an update ends in a deadlock + before it has inserted all updated secondary index + records, then the undo will not find those records. */ + goto func_exit; + case ROW_FOUND: + break; + case ROW_BUFFERED: + case ROW_NOT_DELETED_REF: + /* These are invalid outcomes, because the mode passed + to row_search_index_entry() did not include any of the + flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ + ut_error; + } + +found: + /* We should remove the index record if no prior version of the row, + which cannot be purged yet, requires its existence. If some requires, + we should delete mark the record. */ + + mtr_vers.start(); + + ut_a(node->pcur.restore_position(BTR_SEARCH_LEAF, &mtr_vers) == + btr_pcur_t::SAME_ALL); + + /* For temporary table, we can skip to check older version of + clustered index entry, because there is no MVCC or purge. */ + if (node->table->is_temporary() + || row_vers_old_has_index_entry( + false, btr_pcur_get_rec(&node->pcur), + &mtr_vers, index, entry, 0, 0)) { + btr_rec_set_deleted<true>(btr_cur_get_block(btr_cur), + btr_cur_get_rec(btr_cur), &mtr); + } else { + /* Remove the index record */ + + if (dict_index_is_spatial(index)) { + rec_t* rec = btr_pcur_get_rec(&pcur); + if (rec_get_deleted_flag(rec, + dict_table_is_comp(index->table))) { + ib::error() << "Record found in index " + << index->name << " is deleted marked" + " on rollback update."; + ut_ad(0); + } + } + + if (modify_leaf) { + err = btr_cur_optimistic_delete(btr_cur, 0, &mtr); + } else { + /* Passing rollback=false, + because we are deleting a secondary index record: + the distinction only matters when deleting a + record that contains externally stored columns. */ + ut_ad(!index->is_primary()); + btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, + false, &mtr); + + /* The delete operation may fail if we have little + file space left: TODO: easiest to crash the database + and restart with more file space */ + } + } + + btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers); + +func_exit: + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + return(err); +} + +/***********************************************************//** +Delete marks or removes a secondary index entry if found. +NOTE that if we updated the fields of a delete-marked secondary index record +so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot +return to the original values because we do not know them. But this should +not cause problems because in row0sel.cc, in queries we always retrieve the +clustered index record or an earlier version of it, if the secondary index +record through which we do the search is delete-marked. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_mod_del_mark_or_remove_sec( +/*================================*/ + undo_node_t* node, /*!< in: row undo node */ + que_thr_t* thr, /*!< in: query thread */ + dict_index_t* index, /*!< in: index */ + dtuple_t* entry) /*!< in: index entry */ +{ + dberr_t err; + + err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index, + entry, BTR_MODIFY_LEAF); + if (err == DB_SUCCESS) { + + return(err); + } + + err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index, + entry, BTR_PURGE_TREE); + return(err); +} + +/***********************************************************//** +Delete unmarks a secondary index entry which must be found. It might not be +delete-marked at the moment, but it does not harm to unmark it anyway. We also +need to update the fields of the secondary index record if we updated its +fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. +@retval DB_SUCCESS on success +@retval DB_FAIL if BTR_MODIFY_TREE should be tried +@retval DB_OUT_OF_FILE_SPACE when running out of tablespace +@retval DB_DUPLICATE_KEY if the value was missing + and an insert would lead to a duplicate exists */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_mod_del_unmark_sec_and_undo_update( +/*========================================*/ + btr_latch_mode mode, /*!< in: search mode: BTR_MODIFY_LEAF or + BTR_MODIFY_TREE */ + que_thr_t* thr, /*!< in: query thread */ + dict_index_t* index, /*!< in: index */ + dtuple_t* entry) /*!< in: index entry */ +{ + btr_pcur_t pcur; + btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); + upd_t* update; + dberr_t err = DB_SUCCESS; + big_rec_t* dummy_big_rec; + mtr_t mtr; + trx_t* trx = thr_get_trx(thr); + const ulint flags + = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG; + const auto orig_mode = mode; + + pcur.btr_cur.page_cur.index = index; + ut_ad(trx->id != 0); + + if (index->is_spatial()) { + /* FIXME: Currently we do a 2-pass search for the undo + due to avoid undel-mark a wrong rec in rolling back in + partial update. Later, we could log some info in + secondary index updates to avoid this. */ + static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); + ut_ad(!(mode & 8)); + mode = btr_latch_mode(mode | BTR_RTREE_DELETE_MARK); + } + +try_again: + row_mtr_start(&mtr, index, mode & 8); + + btr_cur->thr = thr; + + if (index->is_spatial()) { + if (!rtr_search(entry, mode, &pcur, &mtr)) { + goto found; + } + + if (mode != orig_mode && btr_cur->rtr_info->fd_del) { + mode = orig_mode; + btr_pcur_close(&pcur); + mtr.commit(); + goto try_again; + } + + goto not_found; + } + + switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { + mem_heap_t* heap; + mem_heap_t* offsets_heap; + rec_offs* offsets; + case ROW_BUFFERED: + case ROW_NOT_DELETED_REF: + /* These are invalid outcomes, because the mode passed + to row_search_index_entry() did not include any of the + flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ + ut_error; + case ROW_NOT_FOUND: +not_found: + if (btr_cur->up_match >= dict_index_get_n_unique(index) + || btr_cur->low_match >= dict_index_get_n_unique(index)) { + ib::warn() << "Record in index " << index->name + << " of table " << index->table->name + << " was not found on rollback, and" + " a duplicate exists: " + << *entry + << " at: " << rec_index_print( + btr_cur_get_rec(btr_cur), index); + err = DB_DUPLICATE_KEY; + break; + } + + ib::warn() << "Record in index " << index->name + << " of table " << index->table->name + << " was not found on rollback, trying to insert: " + << *entry + << " at: " << rec_index_print( + btr_cur_get_rec(btr_cur), index); + + /* Insert the missing record that we were trying to + delete-unmark. */ + big_rec_t* big_rec; + rec_t* insert_rec; + offsets = NULL; + offsets_heap = NULL; + + err = btr_cur_optimistic_insert( + flags, btr_cur, &offsets, &offsets_heap, + entry, &insert_rec, &big_rec, + 0, thr, &mtr); + ut_ad(!big_rec); + + if (err == DB_FAIL && mode == BTR_MODIFY_TREE) { + err = btr_cur_pessimistic_insert( + flags, btr_cur, + &offsets, &offsets_heap, + entry, &insert_rec, &big_rec, + 0, thr, &mtr); + /* There are no off-page columns in + secondary indexes. */ + ut_ad(!big_rec); + } + + if (err == DB_SUCCESS) { + page_update_max_trx_id( + btr_cur_get_block(btr_cur), + btr_cur_get_page_zip(btr_cur), + trx->id, &mtr); + } + + if (offsets_heap) { + mem_heap_free(offsets_heap); + } + + break; + case ROW_FOUND: +found: + btr_rec_set_deleted<false>(btr_cur_get_block(btr_cur), + btr_cur_get_rec(btr_cur), &mtr); + heap = mem_heap_create( + sizeof(upd_t) + + dtuple_get_n_fields(entry) * sizeof(upd_field_t)); + offsets_heap = NULL; + offsets = rec_get_offsets( + btr_cur_get_rec(btr_cur), + index, nullptr, index->n_core_fields, ULINT_UNDEFINED, + &offsets_heap); + update = row_upd_build_sec_rec_difference_binary( + btr_cur_get_rec(btr_cur), index, offsets, entry, heap); + if (upd_get_n_fields(update) == 0) { + + /* Do nothing */ + + } else if (mode != BTR_MODIFY_TREE) { + /* Try an optimistic updating of the record, keeping + changes within the page */ + + /* TODO: pass offsets, not &offsets */ + err = btr_cur_optimistic_update( + flags, btr_cur, &offsets, &offsets_heap, + update, 0, thr, thr_get_trx(thr)->id, &mtr); + switch (err) { + case DB_OVERFLOW: + case DB_UNDERFLOW: + case DB_ZIP_OVERFLOW: + err = DB_FAIL; + default: + break; + } + } else { + err = btr_cur_pessimistic_update( + flags, btr_cur, &offsets, &offsets_heap, + heap, &dummy_big_rec, + update, 0, thr, thr_get_trx(thr)->id, &mtr); + ut_a(!dummy_big_rec); + } + + mem_heap_free(heap); + mem_heap_free(offsets_heap); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + return(err); +} + +/***********************************************************//** +Undoes a modify in secondary indexes when undo record type is UPD_DEL. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_mod_upd_del_sec( +/*=====================*/ + undo_node_t* node, /*!< in: row undo node */ + que_thr_t* thr) /*!< in: query thread */ +{ + mem_heap_t* heap; + dberr_t err = DB_SUCCESS; + + ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); + ut_ad(!node->undo_row); + + heap = mem_heap_create(1024); + + do { + dict_index_t* index = node->index; + + if (index->type & (DICT_FTS | DICT_CORRUPT) + || !index->is_committed()) { + continue; + } + + /* During online index creation, + HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCk + should guarantee that any active transaction has not modified + indexed columns such that col->ord_part was 0 at the + time when the undo log record was written. When we get + to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, + it should always cover all affected indexes. */ + dtuple_t* entry = row_build_index_entry( + node->row, node->ext, index, heap); + + if (UNIV_UNLIKELY(!entry)) { + /* The database must have crashed after + inserting a clustered index record but before + writing all the externally stored columns of + that record. Because secondary index entries + are inserted after the clustered index record, + we may assume that the secondary index record + does not exist. However, this situation may + only occur during the rollback of incomplete + transactions. */ + ut_a(thr_get_trx(thr) == trx_roll_crash_recv_trx); + } else { + err = row_undo_mod_del_mark_or_remove_sec( + node, thr, index, entry); + + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + + break; + } + } + + mem_heap_empty(heap); + } while ((node->index = dict_table_get_next_index(node->index))); + + mem_heap_free(heap); + + return(err); +} + +/***********************************************************//** +Undoes a modify in secondary indexes when undo record type is DEL_MARK. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_mod_del_mark_sec( +/*======================*/ + undo_node_t* node, /*!< in: row undo node */ + que_thr_t* thr) /*!< in: query thread */ +{ + mem_heap_t* heap; + dberr_t err = DB_SUCCESS; + + ut_ad(!node->undo_row); + + heap = mem_heap_create(1024); + + do { + dict_index_t* index = node->index; + + if (index->type & (DICT_FTS | DICT_CORRUPT) + || !index->is_committed()) { + continue; + } + + /* During online index creation, + HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCK + should guarantee that any active transaction has not modified + indexed columns such that col->ord_part was 0 at the + time when the undo log record was written. When we get + to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, + it should always cover all affected indexes. */ + dtuple_t* entry = row_build_index_entry( + node->row, node->ext, index, heap); + + ut_a(entry); + + err = row_undo_mod_del_unmark_sec_and_undo_update( + BTR_MODIFY_LEAF, thr, index, entry); + if (err == DB_FAIL) { + err = row_undo_mod_del_unmark_sec_and_undo_update( + BTR_MODIFY_TREE, thr, index, entry); + } + + if (err == DB_DUPLICATE_KEY) { + index->type |= DICT_CORRUPT; + err = DB_SUCCESS; + /* Do not return any error to the caller. The + duplicate will be reported by ALTER TABLE or + CREATE UNIQUE INDEX. Unfortunately we cannot + report the duplicate key value to the DDL + thread, because the altered_table object is + private to its call stack. */ + } else if (err != DB_SUCCESS) { + break; + } + + mem_heap_empty(heap); + } while ((node->index = dict_table_get_next_index(node->index))); + + mem_heap_free(heap); + + return(err); +} + +/***********************************************************//** +Undoes a modify in secondary indexes when undo record type is UPD_EXIST. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +row_undo_mod_upd_exist_sec( +/*=======================*/ + undo_node_t* node, /*!< in: row undo node */ + que_thr_t* thr) /*!< in: query thread */ +{ + if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { + return DB_SUCCESS; + } + + mem_heap_t* heap = mem_heap_create(1024); + dberr_t err = DB_SUCCESS; + + do { + dict_index_t* index = node->index; + + if (index->type & (DICT_FTS | DICT_CORRUPT) + || !index->is_committed()) { + continue; + } + + if (!row_upd_changes_ord_field_binary_func( + index, node->update, +#ifdef UNIV_DEBUG + thr, +#endif /* UNIV_DEBUG */ + node->row, node->ext, ROW_BUILD_FOR_UNDO)) { + continue; + } + + /* Build the newest version of the index entry */ + dtuple_t* entry = row_build_index_entry( + node->row, node->ext, index, heap); + if (UNIV_UNLIKELY(!entry)) { + /* The server must have crashed in + row_upd_clust_rec_by_insert() before + the updated externally stored columns (BLOBs) + of the new clustered index entry were written. */ + + /* The table must be in DYNAMIC or COMPRESSED + format. REDUNDANT and COMPACT formats + store a local 768-byte prefix of each + externally stored column. */ + ut_a(dict_table_has_atomic_blobs(index->table)); + + /* This is only legitimate when + rolling back an incomplete transaction + after crash recovery. */ + ut_a(thr_get_trx(thr)->is_recovered); + + /* The server must have crashed before + completing the insert of the new + clustered index entry and before + inserting to the secondary indexes. + Because node->row was not yet written + to this index, we can ignore it. But + we must restore node->undo_row. */ + } else { + /* NOTE that if we updated the fields of a + delete-marked secondary index record so that + alphabetically they stayed the same, e.g., + 'abc' -> 'aBc', we cannot return to the + original values because we do not know them. + But this should not cause problems because + in row0sel.cc, in queries we always retrieve + the clustered index record or an earlier + version of it, if the secondary index record + through which we do the search is + delete-marked. */ + + err = row_undo_mod_del_mark_or_remove_sec( + node, thr, index, entry); + if (err != DB_SUCCESS) { + break; + } + } + + mem_heap_empty(heap); + /* We may have to update the delete mark in the + secondary index record of the previous version of + the row. We also need to update the fields of + the secondary index record if we updated its fields + but alphabetically they stayed the same, e.g., + 'abc' -> 'aBc'. */ + entry = row_build_index_entry_low(node->undo_row, + node->undo_ext, + index, heap, + ROW_BUILD_FOR_UNDO); + ut_a(entry); + + err = row_undo_mod_del_unmark_sec_and_undo_update( + BTR_MODIFY_LEAF, thr, index, entry); + if (err == DB_FAIL) { + err = row_undo_mod_del_unmark_sec_and_undo_update( + BTR_MODIFY_TREE, thr, index, entry); + } + + if (err == DB_DUPLICATE_KEY) { + index->type |= DICT_CORRUPT; + err = DB_SUCCESS; + } else if (err != DB_SUCCESS) { + break; + } + + mem_heap_empty(heap); + } while ((node->index = dict_table_get_next_index(node->index))); + + mem_heap_free(heap); + + return(err); +} + +/** Parse an update undo record. +@param[in,out] node row rollback state +@param[in] dict_locked whether the data dictionary cache is locked */ +static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked) +{ + dict_index_t* clust_index; + undo_no_t undo_no; + table_id_t table_id; + trx_id_t trx_id; + roll_ptr_t roll_ptr; + byte info_bits; + byte type; + byte cmpl_info; + bool dummy_extern; + + ut_ad(node->trx->in_rollback); + ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr)); + + const byte *ptr = trx_undo_rec_get_pars( + node->undo_rec, &type, &cmpl_info, + &dummy_extern, &undo_no, &table_id); + node->rec_type = type; + + if (!node->is_temp) { + node->table = dict_table_open_on_id(table_id, dict_locked, + DICT_TABLE_OP_NORMAL); + } else if (!dict_locked) { + dict_sys.freeze(SRW_LOCK_CALL); + node->table = dict_sys.acquire_temporary_table(table_id); + dict_sys.unfreeze(); + } else { + node->table = dict_sys.acquire_temporary_table(table_id); + } + + if (!node->table) { + return false; + } + + ut_ad(!node->table->skip_alter_undo); + + if (UNIV_UNLIKELY(!node->table->is_accessible())) { +close_table: + /* Normally, tables should not disappear or become + unaccessible during ROLLBACK, because they should be + protected by InnoDB table locks. Corruption could be + a valid exception. + + FIXME: When running out of temporary tablespace, it + would probably be better to just drop all temporary + tables (and temporary undo log records) of the current + connection, instead of doing this rollback. */ + dict_table_close(node->table, dict_locked); + node->table = NULL; + return false; + } + + clust_index = dict_table_get_first_index(node->table); + + ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, + &info_bits); + + ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), + node->heap); + + ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, + roll_ptr, info_bits, + node->heap, &(node->update)); + node->new_trx_id = trx_id; + node->cmpl_info = cmpl_info; + ut_ad(!node->ref->info_bits); + + if (node->update->info_bits & REC_INFO_MIN_REC_FLAG) { + if ((node->update->info_bits & ~REC_INFO_DELETED_FLAG) + != REC_INFO_MIN_REC_FLAG) { + ut_ad("wrong info_bits in undo log record" == 0); + goto close_table; + } + /* This must be an undo log record for a subsequent + instant ALTER TABLE, extending the metadata record. */ + ut_ad(clust_index->is_instant()); + ut_ad(clust_index->table->instant + || !(node->update->info_bits & REC_INFO_DELETED_FLAG)); + node->ref = &trx_undo_metadata; + node->update->info_bits = (node->update->info_bits + & REC_INFO_DELETED_FLAG) + ? REC_INFO_METADATA_ALTER + : REC_INFO_METADATA_ADD; + } + + if (!row_undo_search_clust_to_pcur(node)) { + /* As long as this rolling-back transaction exists, + the PRIMARY KEY value pointed to by the undo log + record should exist. + + However, if InnoDB is killed during a rollback, or + shut down during the rollback of recovered + transactions, then after restart we may try to roll + back some of the same undo log records again, because + trx_roll_try_truncate() is not being invoked after + every undo log record. + + It is also possible that the record + was not modified yet (the DB_ROLL_PTR does not match + node->roll_ptr) and thus there is nothing to roll back. + + btr_cur_upd_lock_and_undo() only writes the undo log + record after successfully acquiring an exclusive lock + on the the clustered index record. That lock will not + be released before the transaction is committed or + fully rolled back. (Exception: if the server was + killed, restarted, and shut down again before the + rollback of the recovered transaction was completed, + it is possible that the transaction was partially + rolled back and locks released.) */ + goto close_table; + } + + /* Extract indexed virtual columns from undo log */ + if (node->ref != &trx_undo_metadata && node->table->n_v_cols) { + row_upd_replace_vcol(node->row, node->table, + node->update, false, node->undo_row, + (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) + ? nullptr : ptr); + } + + return true; +} + +/***********************************************************//** +Undoes a modify operation on a row of a table. +@return DB_SUCCESS or error code */ +dberr_t +row_undo_mod( +/*=========*/ + undo_node_t* node, /*!< in: row undo node */ + que_thr_t* thr) /*!< in: query thread */ +{ + dberr_t err = DB_SUCCESS; + ut_ad(thr_get_trx(thr) == node->trx); + const bool dict_locked = node->trx->dict_operation_lock_mode; + + if (!row_undo_mod_parse_undo_rec(node, dict_locked)) { + return DB_SUCCESS; + } + + ut_ad(node->table->is_temporary() + || lock_table_has_locks(node->table)); + node->index = dict_table_get_first_index(node->table); + ut_ad(dict_index_is_clust(node->index)); + + if (node->ref->info_bits) { + ut_ad(node->ref->is_metadata()); + goto rollback_clust; + } + + /* Skip the clustered index (the first index) */ + node->index = dict_table_get_next_index(node->index); + if (node->index) { + switch (node->rec_type) { + case TRX_UNDO_UPD_EXIST_REC: + err = row_undo_mod_upd_exist_sec(node, thr); + break; + case TRX_UNDO_DEL_MARK_REC: + err = row_undo_mod_del_mark_sec(node, thr); + break; + case TRX_UNDO_UPD_DEL_REC: + err = row_undo_mod_upd_del_sec(node, thr); + break; + default: + MY_ASSERT_UNREACHABLE(); + } + } + + if (err == DB_SUCCESS) { +rollback_clust: + err = row_undo_mod_clust(node, thr); + + bool update_statistics + = !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE); + + if (err == DB_SUCCESS && node->table->stat_initialized) { + switch (node->rec_type) { + case TRX_UNDO_UPD_EXIST_REC: + break; + case TRX_UNDO_DEL_MARK_REC: + dict_table_n_rows_inc(node->table); + update_statistics = update_statistics + || !srv_stats_include_delete_marked; + break; + case TRX_UNDO_UPD_DEL_REC: + dict_table_n_rows_dec(node->table); + update_statistics = update_statistics + || !srv_stats_include_delete_marked; + break; + } + + /* Do not attempt to update statistics when + executing ROLLBACK in the InnoDB SQL + interpreter, because in that case we would + already be holding dict_sys.latch, which + would be acquired when updating statistics. */ + if (update_statistics && !dict_locked) { + dict_stats_update_if_needed(node->table, + *node->trx); + } else { + node->table->stat_modified_counter++; + } + } + } + + dict_table_close(node->table, dict_locked); + + node->table = NULL; + + return(err); +} |