summaryrefslogtreecommitdiffstats
path: root/storage/innobase/include/trx0trx.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:00:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:00:34 +0000
commit3f619478f796eddbba6e39502fe941b285dd97b1 (patch)
treee2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/include/trx0trx.h
parentInitial commit. (diff)
downloadmariadb-3f619478f796eddbba6e39502fe941b285dd97b1.tar.xz
mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.zip
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/include/trx0trx.h')
-rw-r--r--storage/innobase/include/trx0trx.h1268
1 files changed, 1268 insertions, 0 deletions
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
new file mode 100644
index 00000000..3cfbe331
--- /dev/null
+++ b/storage/innobase/include/trx0trx.h
@@ -0,0 +1,1268 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2022, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/trx0trx.h
+The transaction
+
+Created 3/26/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef trx0trx_h
+#define trx0trx_h
+
+#include "trx0types.h"
+#include "lock0types.h"
+#include "que0types.h"
+#include "mem0mem.h"
+#include "trx0xa.h"
+#include "ut0vec.h"
+#include "fts0fts.h"
+#include "read0types.h"
+#include "ilist.h"
+#include "row0merge.h"
+
+#include <vector>
+
+// Forward declaration
+struct mtr_t;
+struct rw_trx_hash_element_t;
+
+/******************************************************************//**
+Set detailed error message for the transaction. */
+void
+trx_set_detailed_error(
+/*===================*/
+ trx_t* trx, /*!< in: transaction struct */
+ const char* msg); /*!< in: detailed error message */
+/*************************************************************//**
+Set detailed error message for the transaction from a file. Note that the
+file is rewinded before reading from it. */
+void
+trx_set_detailed_error_from_file(
+/*=============================*/
+ trx_t* trx, /*!< in: transaction struct */
+ FILE* file); /*!< in: file to read message from */
+/****************************************************************//**
+Retrieves the error_info field from a trx.
+@return the error info */
+UNIV_INLINE
+const dict_index_t*
+trx_get_error_info(
+/*===============*/
+ const trx_t* trx); /*!< in: trx object */
+
+/** @return an allocated transaction */
+trx_t *trx_create();
+
+/** At shutdown, frees a transaction object. */
+void trx_free_at_shutdown(trx_t *trx);
+
+/** Disconnect a prepared transaction from MySQL.
+@param[in,out] trx transaction */
+void trx_disconnect_prepared(trx_t *trx);
+
+/** Initialize (resurrect) transactions at startup. */
+dberr_t trx_lists_init_at_db_start();
+
+/*************************************************************//**
+Starts the transaction if it is not yet started. */
+void
+trx_start_if_not_started_xa_low(
+/*============================*/
+ trx_t* trx, /*!< in/out: transaction */
+ bool read_write); /*!< in: true if read write transaction */
+/*************************************************************//**
+Starts the transaction if it is not yet started. */
+void
+trx_start_if_not_started_low(
+/*=========================*/
+ trx_t* trx, /*!< in/out: transaction */
+ bool read_write); /*!< in: true if read write transaction */
+
+/**
+Start a transaction for internal processing.
+@param trx transaction
+@param read_write whether writes may be performed */
+void trx_start_internal_low(trx_t *trx, bool read_write);
+
+#ifdef UNIV_DEBUG
+#define trx_start_if_not_started_xa(t, rw) \
+ do { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_if_not_started_xa_low((t), rw); \
+ } while (false)
+
+#define trx_start_if_not_started(t, rw) \
+ do { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_if_not_started_low((t), rw); \
+ } while (false)
+
+#define trx_start_internal(t) \
+ do { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_internal_low(t, true); \
+ } while (false)
+#define trx_start_internal_read_only(t) \
+ do { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_internal_low(t, false); \
+ } while (false)
+#else
+#define trx_start_if_not_started(t, rw) \
+ trx_start_if_not_started_low((t), rw)
+
+#define trx_start_internal(t) trx_start_internal_low(t, true)
+#define trx_start_internal_read_only(t) trx_start_internal_low(t, false)
+
+#define trx_start_if_not_started_xa(t, rw) \
+ trx_start_if_not_started_xa_low((t), (rw))
+#endif /* UNIV_DEBUG */
+
+/** Start a transaction for a DDL operation.
+@param trx transaction */
+void trx_start_for_ddl_low(trx_t *trx);
+
+#ifdef UNIV_DEBUG
+# define trx_start_for_ddl(t) \
+ do { \
+ ut_ad((t)->start_file == 0); \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_for_ddl_low(t); \
+ } while (0)
+#else
+# define trx_start_for_ddl(t) trx_start_for_ddl_low(t)
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************//**
+Does the transaction commit for MySQL.
+@return DB_SUCCESS or error number */
+dberr_t
+trx_commit_for_mysql(
+/*=================*/
+ trx_t* trx); /*!< in/out: transaction */
+/** XA PREPARE a transaction.
+@param[in,out] trx transaction to prepare */
+void trx_prepare_for_mysql(trx_t* trx);
+/**********************************************************************//**
+This function is used to find number of prepared transactions and
+their transaction objects for a recovery.
+@return number of prepared transactions */
+int
+trx_recover_for_mysql(
+/*==================*/
+ XID* xid_list, /*!< in/out: prepared transactions */
+ uint len); /*!< in: number of slots in xid_list */
+/** Look up an X/Open distributed transaction in XA PREPARE state.
+@param[in] xid X/Open XA transaction identifier
+@return transaction on match (the trx_t::xid will be invalidated);
+note that the trx may have been committed before the caller acquires
+trx_t::mutex
+@retval NULL if no match */
+trx_t* trx_get_trx_by_xid(const XID* xid);
+/** Durably write log until trx->commit_lsn
+(if trx_t::commit_in_memory() was invoked with flush_log_later=true). */
+void trx_commit_complete_for_mysql(trx_t *trx);
+/**********************************************************************//**
+Marks the latest SQL statement ended. */
+void
+trx_mark_sql_stat_end(
+/*==================*/
+ trx_t* trx); /*!< in: trx handle */
+/****************************************************************//**
+Prepares a transaction for commit/rollback. */
+void
+trx_commit_or_rollback_prepare(
+/*===========================*/
+ trx_t* trx); /*!< in/out: transaction */
+/*********************************************************************//**
+Creates a commit command node struct.
+@return own: commit node struct */
+commit_node_t*
+trx_commit_node_create(
+/*===================*/
+ mem_heap_t* heap); /*!< in: mem heap where created */
+/***********************************************************//**
+Performs an execution step for a commit type node in a query graph.
+@return query thread to run next, or NULL */
+que_thr_t*
+trx_commit_step(
+/*============*/
+ que_thr_t* thr); /*!< in: query thread */
+
+/**********************************************************************//**
+Prints info about a transaction. */
+void
+trx_print_low(
+/*==========*/
+ FILE* f,
+ /*!< in: output stream */
+ const trx_t* trx,
+ /*!< in: transaction */
+ ulint max_query_len,
+ /*!< in: max query length to print,
+ or 0 to use the default max length */
+ ulint n_rec_locks,
+ /*!< in: trx->lock.n_rec_locks */
+ ulint n_trx_locks,
+ /*!< in: length of trx->lock.trx_locks */
+ ulint heap_size);
+ /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
+
+/**********************************************************************//**
+Prints info about a transaction.
+When possible, use trx_print() instead. */
+void
+trx_print_latched(
+/*==============*/
+ FILE* f, /*!< in: output stream */
+ const trx_t* trx, /*!< in: transaction */
+ ulint max_query_len); /*!< in: max query length to print,
+ or 0 to use the default max length */
+
+/**********************************************************************//**
+Prints info about a transaction.
+Acquires and releases lock_sys.latch. */
+void
+trx_print(
+/*======*/
+ FILE* f, /*!< in: output stream */
+ const trx_t* trx, /*!< in: transaction */
+ ulint max_query_len); /*!< in: max query length to print,
+ or 0 to use the default max length */
+
+/**********************************************************************//**
+Determines if a transaction is in the given state.
+The caller must hold trx->mutex, or it must be the thread
+that is serving a running transaction.
+A running RW transaction must be in trx_sys.rw_trx_hash.
+@return TRUE if trx->state == state */
+UNIV_INLINE
+bool
+trx_state_eq(
+/*=========*/
+ const trx_t* trx, /*!< in: transaction */
+ trx_state_t state, /*!< in: state;
+ if state != TRX_STATE_NOT_STARTED
+ asserts that
+ trx->state != TRX_STATE_NOT_STARTED */
+ bool relaxed = false)
+ /*!< in: whether to allow
+ trx->state == TRX_STATE_NOT_STARTED
+ after an error has been reported */
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+
+/**********************************************************************//**
+Determines if the currently running transaction has been interrupted.
+@return true if interrupted */
+bool
+trx_is_interrupted(
+/*===============*/
+ const trx_t* trx); /*!< in: transaction */
+
+/*******************************************************************//**
+Calculates the "weight" of a transaction. The weight of one transaction
+is estimated as the number of altered rows + the number of locked rows.
+@param t transaction
+@return transaction weight */
+#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
+
+/** Create the trx_t pool */
+void
+trx_pool_init();
+
+/** Destroy the trx_t pool */
+void
+trx_pool_close();
+
+/**
+Set the transaction as a read-write transaction if it is not already
+tagged as such.
+@param[in,out] trx Transaction that needs to be "upgraded" to RW from RO */
+void
+trx_set_rw_mode(
+ trx_t* trx);
+
+/**
+Transactions that aren't started by the MySQL server don't set
+the trx_t::mysql_thd field. For such transactions we set the lock
+wait timeout to 0 instead of the user configured value that comes
+from innodb_lock_wait_timeout via trx_t::mysql_thd.
+@param trx transaction
+@return lock wait timeout in seconds */
+#define trx_lock_wait_timeout_get(t) \
+ ((t)->mysql_thd != NULL \
+ ? thd_lock_wait_timeout((t)->mysql_thd) \
+ : 0)
+
+typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> > lock_list;
+
+/** The locks and state of an active transaction. Protected by
+lock_sys.latch, trx->mutex or both. */
+struct trx_lock_t
+{
+ /** Lock request being waited for.
+ Set to nonnull when holding lock_sys.latch, lock_sys.wait_mutex and
+ trx->mutex, by the thread that is executing the transaction.
+ Set to nullptr when holding lock_sys.wait_mutex. */
+ Atomic_relaxed<lock_t*> wait_lock;
+ /** Transaction being waited for; protected by lock_sys.wait_mutex */
+ trx_t *wait_trx;
+ /** condition variable for !wait_lock; used with lock_sys.wait_mutex */
+ pthread_cond_t cond;
+ /** lock wait start time */
+ Atomic_relaxed<my_hrtime_t> suspend_time;
+
+#if defined(UNIV_DEBUG) || !defined(DBUG_OFF)
+ /** 2=high priority WSREP thread has marked this trx to abort;
+ 1=another transaction chose this as a victim in deadlock resolution.
+
+ Other threads than the one that is executing the transaction may set
+ flags in this while holding lock_sys.wait_mutex. */
+ Atomic_relaxed<byte> was_chosen_as_deadlock_victim;
+
+ /** Flag the lock owner as a victim in Galera conflict resolution. */
+ void set_wsrep_victim()
+ {
+# if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ /* There is no 8-bit version of the 80386 BTS instruction.
+ Technically, this is the wrong addressing mode (16-bit), but
+ there are other data members stored after the byte. */
+ __asm__ __volatile__("lock btsw $1, %0"
+ : "+m" (was_chosen_as_deadlock_victim));
+# else
+ was_chosen_as_deadlock_victim.fetch_or(2);
+# endif
+ }
+#else /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
+
+ /** High priority WSREP thread has marked this trx to abort or
+ another transaction chose this as a victim in deadlock resolution.
+
+ Other threads than the one that is executing the transaction may set
+ this while holding lock_sys.wait_mutex. */
+ Atomic_relaxed<bool> was_chosen_as_deadlock_victim;
+
+ /** Flag the lock owner as a victim in Galera conflict resolution. */
+ void set_wsrep_victim() { was_chosen_as_deadlock_victim= true; }
+#endif /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
+
+ /** Next available rec_pool[] entry */
+ byte rec_cached;
+ /** Next available table_pool[] entry */
+ byte table_cached;
+
+ que_thr_t* wait_thr; /*!< query thread belonging to this
+ trx that is in waiting
+ state. For threads suspended in a
+ lock wait, this is protected by
+ lock_sys.latch. Otherwise, this may
+ only be modified by the thread that is
+ serving the running transaction. */
+
+ /** Pre-allocated record locks */
+ struct {
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) ib_lock_t lock;
+ } rec_pool[8];
+
+ /** Pre-allocated table locks */
+ ib_lock_t table_pool[8];
+
+ /** Memory heap for trx_locks. Protected by lock_sys.assert_locked()
+ and lock_sys.is_writer() || trx->mutex_is_owner(). */
+ mem_heap_t *lock_heap;
+
+ /** Locks held by the transaction. Protected by lock_sys.assert_locked()
+ and lock_sys.is_writer() || trx->mutex_is_owner().
+ (If lock_sys.latch is only held in shared mode, then the modification
+ must be protected by trx->mutex.) */
+ trx_lock_list_t trx_locks;
+
+ lock_list table_locks; /*!< All table locks requested by this
+ transaction, including AUTOINC locks */
+
+ /** List of pending trx_t::evict_table() */
+ UT_LIST_BASE_NODE_T(dict_table_t) evicted_tables;
+
+ /** number of record locks; protected by lock_sys.assert_locked(page_id) */
+ ulint n_rec_locks;
+};
+
+/** Logical first modification time of a table in a transaction */
+class trx_mod_table_time_t
+{
+ /** Impossible value for trx_t::undo_no */
+ static constexpr undo_no_t NONE= ~undo_no_t{0};
+ /** Theoretical maximum value for trx_t::undo_no.
+ DB_ROLL_PTR is only 7 bytes, so it cannot point to more than
+ this many undo log records. */
+ static constexpr undo_no_t LIMIT= (undo_no_t{1} << (7 * 8)) - 1;
+
+ /** Flag in 'first' to indicate that subsequent operations are
+ covered by a TRX_UNDO_EMPTY record (for the first statement to
+ insert into an empty table) */
+ static constexpr undo_no_t BULK= 1ULL << 63;
+
+ /** First modification of the table, possibly ORed with BULK */
+ undo_no_t first;
+ /** First modification of a system versioned column
+ (NONE= no versioning, BULK= the table was dropped) */
+ undo_no_t first_versioned= NONE;
+#ifdef UNIV_DEBUG
+ /** Whether the modified table is a FTS auxiliary table */
+ bool fts_aux_table= false;
+#endif /* UNIV_DEBUG */
+
+ /** Buffer to store insert opertion */
+ row_merge_bulk_t *bulk_store= nullptr;
+
+ friend struct trx_t;
+public:
+ /** Constructor
+ @param rows number of modified rows so far */
+ trx_mod_table_time_t(undo_no_t rows) : first(rows) { ut_ad(rows < LIMIT); }
+
+#ifdef UNIV_DEBUG
+ /** Validation
+ @param rows number of modified rows so far
+ @return whether the object is valid */
+ bool valid(undo_no_t rows= NONE) const
+ { auto f= first & LIMIT; return f <= first_versioned && f <= rows; }
+#endif /* UNIV_DEBUG */
+ /** @return if versioned columns were modified */
+ bool is_versioned() const { return (~first_versioned & LIMIT) != 0; }
+ /** @return if the table was dropped */
+ bool is_dropped() const { return first_versioned == BULK; }
+
+ /** After writing an undo log record, set is_versioned() if needed
+ @param rows number of modified rows so far */
+ void set_versioned(undo_no_t rows)
+ {
+ ut_ad(first_versioned == NONE);
+ first_versioned= rows;
+ ut_ad(valid(rows));
+ }
+
+ /** After writing an undo log record, note that the table will be dropped */
+ void set_dropped()
+ {
+ ut_ad(first_versioned == NONE);
+ first_versioned= BULK;
+ }
+
+ /** Notify the start of a bulk insert operation
+ @param table table to do bulk operation */
+ void start_bulk_insert(dict_table_t *table)
+ {
+ first|= BULK;
+ if (!table->is_temporary())
+ bulk_store= new row_merge_bulk_t(table);
+ }
+
+ /** Notify the end of a bulk insert operation */
+ void end_bulk_insert() { first&= ~BULK; }
+
+ /** @return whether an insert is covered by TRX_UNDO_EMPTY record */
+ bool is_bulk_insert() const { return first & BULK; }
+
+ /** Invoked after partial rollback
+ @param limit number of surviving modified rows (trx_t::undo_no)
+ @return whether this should be erased from trx_t::mod_tables */
+ bool rollback(undo_no_t limit)
+ {
+ ut_ad(valid());
+ if ((LIMIT & first) >= limit)
+ return true;
+ if (first_versioned < limit)
+ first_versioned= NONE;
+ return false;
+ }
+
+#ifdef UNIV_DEBUG
+ void set_aux_table() { fts_aux_table= true; }
+
+ bool is_aux_table() const { return fts_aux_table; }
+#endif /* UNIV_DEBUG */
+
+ /** @return the first undo record that modified the table */
+ undo_no_t get_first() const
+ {
+ ut_ad(valid());
+ return LIMIT & first;
+ }
+
+ /** Add the tuple to the transaction bulk buffer for the given index.
+ @param entry tuple to be inserted
+ @param index bulk insert for the index
+ @param trx transaction */
+ dberr_t bulk_insert_buffered(const dtuple_t &entry,
+ const dict_index_t &index, trx_t *trx)
+ {
+ return bulk_store->bulk_insert_buffered(entry, index, trx);
+ }
+
+ /** Do bulk insert operation present in the buffered operation
+ @return DB_SUCCESS or error code */
+ dberr_t write_bulk(dict_table_t *table, trx_t *trx);
+
+ /** @return whether the buffer storage exist */
+ bool bulk_buffer_exist() const
+ {
+ return bulk_store && is_bulk_insert();
+ }
+
+ /** Free bulk insert operation */
+ void clear_bulk_buffer()
+ {
+ delete bulk_store;
+ bulk_store= nullptr;
+ }
+};
+
+/** Collection of persistent tables and their first modification
+in a transaction.
+We store pointers to the table objects in memory because
+we know that a table object will not be destroyed while a transaction
+that modified it is running. */
+typedef std::map<
+ dict_table_t*, trx_mod_table_time_t,
+ std::less<dict_table_t*>,
+ ut_allocator<std::pair<dict_table_t* const, trx_mod_table_time_t> > >
+ trx_mod_tables_t;
+
+/** The transaction handle
+
+Normally, there is a 1:1 relationship between a transaction handle
+(trx) and a session (client connection). One session is associated
+with exactly one user transaction. There are some exceptions to this:
+
+* For DDL operations, a subtransaction is allocated that modifies the
+data dictionary tables. Lock waits and deadlocks are prevented by
+acquiring the dict_sys.latch before starting the subtransaction
+and releasing it after committing the subtransaction.
+
+* The purge system uses a special transaction that is not associated
+with any session.
+
+* If the system crashed or it was quickly shut down while there were
+transactions in the ACTIVE or PREPARED state, these transactions would
+no longer be associated with a session when the server is restarted.
+
+A session may be served by at most one thread at a time. The serving
+thread of a session might change in some MySQL implementations.
+Therefore we do not have pthread_self() assertions in the code.
+
+Normally, only the thread that is currently associated with a running
+transaction may access (read and modify) the trx object, and it may do
+so without holding any mutex. The following are exceptions to this:
+
+* trx_rollback_recovered() may access resurrected (connectionless)
+transactions (state == TRX_STATE_ACTIVE && is_recovered)
+while the system is already processing new user transactions (!is_recovered).
+
+* trx_print_low() may access transactions not associated with the current
+thread. The caller must be holding lock_sys.latch.
+
+* When a transaction handle is in the trx_sys.trx_list, some of its fields
+must not be modified without holding trx->mutex.
+
+* The locking code (in particular, lock_deadlock_recursive() and
+lock_rec_convert_impl_to_expl()) will access transactions associated
+to other connections. The locks of transactions are protected by
+lock_sys.latch (insertions also by trx->mutex). */
+
+/** Represents an instance of rollback segment along with its state variables.*/
+struct trx_undo_ptr_t {
+ trx_rseg_t* rseg; /*!< rollback segment assigned to the
+ transaction, or NULL if not assigned
+ yet */
+ trx_undo_t* undo; /*!< pointer to the undo log, or
+ NULL if nothing logged yet */
+};
+
+/** An instance of temporary rollback segment. */
+struct trx_temp_undo_t {
+ /** temporary rollback segment, or NULL if not assigned yet */
+ trx_rseg_t* rseg;
+ /** pointer to the undo log, or NULL if nothing logged yet */
+ trx_undo_t* undo;
+};
+
+/** Rollback segments assigned to a transaction for undo logging. */
+struct trx_rsegs_t {
+ /** undo log ptr holding reference to a rollback segment that resides in
+ system/undo tablespace used for undo logging of tables that needs
+ to be recovered on crash. */
+ trx_undo_ptr_t m_redo;
+
+ /** undo log for temporary tables; discarded immediately after
+ transaction commit/rollback */
+ trx_temp_undo_t m_noredo;
+};
+
+struct trx_t : ilist_node<>
+{
+private:
+ /**
+ Least significant 31 bits is count of references.
+
+ We can't release the locks nor commit the transaction until this reference
+ is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
+ that it is no longer "active".
+
+ If the most significant bit is set this transaction should stop inheriting
+ (GAP)locks. Generally set to true during transaction prepare for RC or lower
+ isolation, if requested. Needed for replication replay where
+ we don't want to get blocked on GAP locks taken for protecting
+ concurrent unique insert or replace operation.
+ */
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE)
+ Atomic_relaxed<uint32_t> skip_lock_inheritance_and_n_ref;
+
+
+public:
+ /** Transaction identifier (0 if no locks were acquired).
+ Set by trx_sys_t::register_rw() or trx_resurrect() before
+ the transaction is added to trx_sys.rw_trx_hash.
+ Cleared in commit_in_memory() after commit_state(),
+ trx_sys_t::deregister_rw(), release_locks(). */
+ trx_id_t id;
+ /** The largest encountered transaction identifier for which no
+ transaction was observed to be active. This is a cache to speed up
+ trx_sys_t::find_same_or_older(). */
+ trx_id_t max_inactive_id;
+
+private:
+ /** mutex protecting state and some of lock
+ (some are protected by lock_sys.latch) */
+ srw_spin_mutex mutex;
+#ifdef UNIV_DEBUG
+ /** The owner of mutex (0 if none); protected by mutex */
+ std::atomic<pthread_t> mutex_owner{0};
+#endif /* UNIV_DEBUG */
+public:
+ void mutex_init() { mutex.init(); }
+ void mutex_destroy() { mutex.destroy(); }
+
+ /** Acquire the mutex */
+ void mutex_lock()
+ {
+ ut_ad(!mutex_is_owner());
+ mutex.wr_lock();
+ ut_ad(!mutex_owner.exchange(pthread_self(),
+ std::memory_order_relaxed));
+ }
+ /** Release the mutex */
+ void mutex_unlock()
+ {
+ ut_ad(mutex_owner.exchange(0, std::memory_order_relaxed)
+ == pthread_self());
+ mutex.wr_unlock();
+ }
+#ifndef SUX_LOCK_GENERIC
+ bool mutex_is_locked() const noexcept { return mutex.is_locked(); }
+#endif
+#ifdef UNIV_DEBUG
+ /** @return whether the current thread holds the mutex */
+ bool mutex_is_owner() const
+ {
+ return mutex_owner.load(std::memory_order_relaxed) ==
+ pthread_self();
+ }
+#endif /* UNIV_DEBUG */
+
+ /** State of the trx from the point of view of concurrency control
+ and the valid state transitions.
+
+ Possible states:
+
+ TRX_STATE_NOT_STARTED
+ TRX_STATE_ACTIVE
+ TRX_STATE_PREPARED
+ TRX_STATE_PREPARED_RECOVERED (special case of TRX_STATE_PREPARED)
+ TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
+
+ Valid state transitions are:
+
+ Regular transactions:
+ * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
+
+ Auto-commit non-locking read-only:
+ * NOT_STARTED -> ACTIVE -> NOT_STARTED
+
+ XA (2PC):
+ * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
+
+ Recovered XA:
+ * NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
+
+ Recovered XA followed by XA ROLLBACK:
+ * NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
+
+ XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
+ * NOT_STARTED -> PREPARED -> (freed)
+
+ Disconnected XA PREPARE transaction can become recovered:
+ * ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
+
+ Latching and various transaction lists membership rules:
+
+ XA (2PC) transactions are always treated as non-autocommit.
+
+ Transitions to ACTIVE or NOT_STARTED occur when transaction
+ is not in rw_trx_hash.
+
+ Autocommit non-locking read-only transactions move between states
+ without holding any mutex. They are not in rw_trx_hash.
+
+ All transactions, unless they are determined to be ac-nl-ro,
+ explicitly tagged as read-only or read-write, will first be put
+ on the read-only transaction list. Only when a !read-only transaction
+ in the read-only list tries to acquire an X or IX lock on a table
+ do we remove it from the read-only list and put it on the read-write
+ list. During this switch we assign it a rollback segment.
+
+ When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
+ in rw_trx_hash.
+
+ ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
+ The transition ACTIVE->PREPARED is protected by trx->mutex.
+
+ ACTIVE->COMMITTED is possible when the transaction is in
+ rw_trx_hash.
+
+ Transitions to COMMITTED are protected by trx_t::mutex. */
+ Atomic_relaxed<trx_state_t> state;
+
+ /** The locks of the transaction. Protected by lock_sys.latch
+ (insertions also by trx_t::mutex). */
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE) trx_lock_t lock;
+
+#ifdef WITH_WSREP
+ /** whether wsrep_on(mysql_thd) held at the start of transaction */
+ byte wsrep;
+ bool is_wsrep() const { return UNIV_UNLIKELY(wsrep); }
+ bool is_wsrep_UK_scan() const { return UNIV_UNLIKELY(wsrep & 2); }
+#else /* WITH_WSREP */
+ bool is_wsrep() const { return false; }
+#endif /* WITH_WSREP */
+
+ /** Consistent read view of the transaction */
+ ReadView read_view;
+
+ /* These fields are not protected by any mutex. */
+
+ /** false=normal transaction, true=recovered (must be rolled back)
+ or disconnected transaction in XA PREPARE STATE.
+
+ This field is accessed by the thread that owns the transaction,
+ without holding any mutex.
+ There is only one foreign-thread access in trx_print_low()
+ and a possible race condition with trx_disconnect_prepared(). */
+ bool is_recovered;
+ const char* op_info; /*!< English text describing the
+ current operation, or an empty
+ string */
+ uint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
+ bool check_foreigns; /*!< normally TRUE, but if the user
+ wants to suppress foreign key checks,
+ (in table imports, for example) we
+ set this FALSE */
+ /** whether an insert into an empty table is active */
+ bool bulk_insert;
+ /*------------------------------*/
+ /* MySQL has a transaction coordinator to coordinate two phase
+ commit between multiple storage engines and the binary log. When
+ an engine participates in a transaction, it's responsible for
+ registering itself using the trans_register_ha() API. */
+ bool is_registered; /* This flag is set to true after the
+ transaction has been registered with
+ the coordinator using the XA API, and
+ is set to false after commit or
+ rollback. */
+ /** whether this is holding the prepare mutex */
+ bool active_commit_ordered;
+ /*------------------------------*/
+ bool check_unique_secondary;
+ /*!< normally TRUE, but if the user
+ wants to speed up inserts by
+ suppressing unique key checks
+ for secondary indexes when we decide
+ if we can use the insert buffer for
+ them, we set this FALSE */
+ bool flush_log_later;/* In 2PC, we hold the
+ prepare_commit mutex across
+ both phases. In that case, we
+ defer flush of the logs to disk
+ until after we release the
+ mutex. */
+ ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
+ /** whether this modifies InnoDB dictionary tables */
+ bool dict_operation;
+#ifdef UNIV_DEBUG
+ /** copy of dict_operation during commit() */
+ bool was_dict_operation;
+#endif
+ /** whether dict_sys.latch is held exclusively; protected by
+ dict_sys.latch */
+ bool dict_operation_lock_mode;
+
+ /** wall-clock time of the latest transition to TRX_STATE_ACTIVE;
+ used for diagnostic purposes only */
+ time_t start_time;
+ /** microsecond_interval_timer() of transaction start */
+ ulonglong start_time_micro;
+ lsn_t commit_lsn; /*!< lsn at the time of the commit */
+ /*------------------------------*/
+ THD* mysql_thd; /*!< MySQL thread handle corresponding
+ to this trx, or NULL */
+
+ const char* mysql_log_file_name;
+ /*!< if MySQL binlog is used, this field
+ contains a pointer to the latest file
+ name; this is NULL if binlog is not
+ used */
+ ulonglong mysql_log_offset;
+ /*!< if MySQL binlog is used, this
+ field contains the end offset of the
+ binlog entry */
+ /*------------------------------*/
+ ib_uint32_t n_mysql_tables_in_use; /*!< number of Innobase tables
+ used in the processing of the current
+ SQL statement in MySQL */
+ ib_uint32_t mysql_n_tables_locked;
+ /*!< how many tables the current SQL
+ statement uses, except those
+ in consistent read */
+
+ /** DB_SUCCESS or error code; usually only the thread that is running
+ the transaction is allowed to modify this field. The only exception is
+ when a thread invokes lock_sys_t::cancel() in order to abort a
+ lock_wait(). That is protected by lock_sys.wait_mutex and lock.wait_lock. */
+ dberr_t error_state;
+
+ const dict_index_t*error_info; /*!< if the error number indicates a
+ duplicate key error, a pointer to
+ the problematic index is stored here */
+ ulint error_key_num; /*!< if the index creation fails to a
+ duplicate key error, a mysql key
+ number of that index is stored here */
+ que_t* graph; /*!< query currently run in the session,
+ or NULL if none; NOTE that the query
+ belongs to the session, and it can
+ survive over a transaction commit, if
+ it is a stored procedure with a COMMIT
+ WORK statement, for instance */
+ /*------------------------------*/
+ UT_LIST_BASE_NODE_T(trx_named_savept_t)
+ trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
+ oldest first */
+ /*------------------------------*/
+ undo_no_t undo_no; /*!< next undo log record number to
+ assign; since the undo log is
+ private for a transaction, this
+ is a simple ascending sequence
+ with no gaps; thus it represents
+ the number of modified/inserted
+ rows in a transaction */
+ trx_savept_t last_sql_stat_start;
+ /*!< undo_no when the last sql statement
+ was started: in case of an error, trx
+ is rolled back down to this number */
+ trx_rsegs_t rsegs; /* rollback segments for undo logging */
+ undo_no_t roll_limit; /*!< least undo number to undo during
+ a partial rollback; 0 otherwise */
+ bool in_rollback; /*!< true when the transaction is
+ executing a partial or full rollback */
+ ulint pages_undone; /*!< number of undo log pages undone
+ since the last undo log truncation */
+ /*------------------------------*/
+ ulint n_autoinc_rows; /*!< no. of AUTO-INC rows required for
+ an SQL statement. This is useful for
+ multi-row INSERTs */
+ ib_vector_t* autoinc_locks; /* AUTOINC locks held by this
+ transaction. Note that these are
+ also in the lock list trx_locks. This
+ vector needs to be freed explicitly
+ when the trx instance is destroyed.
+ Protected by lock_sys.latch. */
+ /*------------------------------*/
+ bool read_only; /*!< true if transaction is flagged
+ as a READ-ONLY transaction.
+ if auto_commit && !will_lock
+ then it will be handled as a
+ AC-NL-RO-SELECT (Auto Commit Non-Locking
+ Read Only Select). A read only
+ transaction will not be assigned an
+ UNDO log. */
+ bool auto_commit; /*!< true if it is an autocommit */
+ bool will_lock; /*!< set to inform trx_start_low() that
+ the transaction may acquire locks */
+ /* True if transaction has to read the undo log and
+ log the DML changes for online DDL table */
+ bool apply_online_log = false;
+
+ /*------------------------------*/
+ fts_trx_t* fts_trx; /*!< FTS information, or NULL if
+ transaction hasn't modified tables
+ with FTS indexes (yet). */
+ doc_id_t fts_next_doc_id;/* The document id used for updates */
+ /*------------------------------*/
+ ib_uint32_t flush_tables; /*!< if "covering" the FLUSH TABLES",
+ count of tables being flushed. */
+
+ /*------------------------------*/
+#ifdef UNIV_DEBUG
+ unsigned start_line; /*!< Track where it was started from */
+ const char* start_file; /*!< Filename where it was started */
+#endif /* UNIV_DEBUG */
+
+ XID xid; /*!< X/Open XA transaction
+ identification to identify a
+ transaction branch */
+ trx_mod_tables_t mod_tables; /*!< List of tables that were modified
+ by this transaction */
+ /*------------------------------*/
+ char* detailed_error; /*!< detailed error message for last
+ error, or empty. */
+ rw_trx_hash_element_t *rw_trx_hash_element;
+ LF_PINS *rw_trx_hash_pins;
+ ulint magic_n;
+
+ /** @return whether any persistent undo log has been generated */
+ bool has_logged_persistent() const
+ {
+ return(rsegs.m_redo.undo);
+ }
+
+ /** @return whether any undo log has been generated */
+ bool has_logged() const
+ {
+ return(has_logged_persistent() || rsegs.m_noredo.undo);
+ }
+
+ /** @return rollback segment for modifying temporary tables */
+ trx_rseg_t* get_temp_rseg()
+ {
+ if (trx_rseg_t* rseg = rsegs.m_noredo.rseg) {
+ ut_ad(id != 0);
+ return(rseg);
+ }
+
+ return(assign_temp_rseg());
+ }
+
+ /** Transition to committed state, to release implicit locks. */
+ inline void commit_state();
+
+ /** Release any explicit locks of a committing transaction. */
+ inline void release_locks();
+
+ /** Evict a table definition due to the rollback of ALTER TABLE.
+ @param table_id table identifier
+ @param reset_only whether to only reset dict_table_t::def_trx_id */
+ void evict_table(table_id_t table_id, bool reset_only= false);
+
+ /** Initiate rollback.
+ @param savept savepoint to which to roll back
+ @return error code or DB_SUCCESS */
+ dberr_t rollback(trx_savept_t *savept= nullptr);
+ /** Roll back an active transaction.
+ @param savept savepoint to which to roll back */
+ inline void rollback_low(trx_savept_t *savept= nullptr);
+ /** Finish rollback.
+ @return whether the rollback was completed normally
+ @retval false if the rollback was aborted by shutdown */
+ inline bool rollback_finish();
+private:
+ /** Apply any changes to tables for which online DDL is in progress. */
+ ATTRIBUTE_COLD void apply_log();
+ /** Process tables that were modified by the committing transaction. */
+ inline void commit_tables();
+ /** Mark a transaction committed in the main memory data structures.
+ @param mtr mini-transaction (if there are any persistent modifications) */
+ inline void commit_in_memory(const mtr_t *mtr);
+ /** Write log for committing the transaction. */
+ void commit_persist();
+ /** Clean up the transaction after commit_in_memory() */
+ void commit_cleanup();
+ /** Commit the transaction in a mini-transaction.
+ @param mtr mini-transaction (if there are any persistent modifications) */
+ void commit_low(mtr_t *mtr= nullptr);
+ /** Commit an empty transaction.
+ @param mtr mini-transaction */
+ void commit_empty(mtr_t *mtr);
+ /** Commit an empty transaction.
+ @param mtr mini-transaction */
+ /** Assign the transaction its history serialisation number and write the
+ UNDO log to the assigned rollback segment.
+ @param mtr mini-transaction */
+ inline void write_serialisation_history(mtr_t *mtr);
+public:
+ /** Commit the transaction. */
+ void commit();
+
+ /** Try to drop a persistent table.
+ @param table persistent table
+ @param fk whether to drop FOREIGN KEY metadata
+ @return error code */
+ dberr_t drop_table(const dict_table_t &table);
+ /** Try to drop the foreign key constraints for a persistent table.
+ @param name name of persistent table
+ @return error code */
+ dberr_t drop_table_foreign(const table_name_t &name);
+ /** Try to drop the statistics for a persistent table.
+ @param name name of persistent table
+ @return error code */
+ dberr_t drop_table_statistics(const table_name_t &name);
+ /** Commit the transaction, possibly after drop_table().
+ @param deleted handles of data files that were deleted */
+ void commit(std::vector<pfs_os_file_t> &deleted);
+
+
+ /** Discard all savepoints */
+ void savepoints_discard()
+ { savepoints_discard(UT_LIST_GET_FIRST(trx_savepoints)); }
+
+
+ /** Discard all savepoints starting from a particular savepoint.
+ @param savept first savepoint to discard */
+ void savepoints_discard(trx_named_savept_t *savept);
+
+
+ bool is_referenced() const
+ {
+ return (skip_lock_inheritance_and_n_ref & ~(1U << 31)) > 0;
+ }
+
+
+ void reference()
+ {
+ ut_d(auto old_n_ref =)
+ skip_lock_inheritance_and_n_ref.fetch_add(1);
+ ut_ad(int32_t(old_n_ref << 1) >= 0);
+ }
+
+ void release_reference()
+ {
+ ut_d(auto old_n_ref =)
+ skip_lock_inheritance_and_n_ref.fetch_sub(1);
+ ut_ad(int32_t(old_n_ref << 1) > 0);
+ }
+
+ bool is_not_inheriting_locks() const
+ {
+ return skip_lock_inheritance_and_n_ref >> 31;
+ }
+
+ void set_skip_lock_inheritance()
+ {
+ ut_d(auto old_n_ref=) skip_lock_inheritance_and_n_ref.fetch_add(1U << 31);
+ ut_ad(!(old_n_ref >> 31));
+ }
+
+ void reset_skip_lock_inheritance()
+ {
+#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
+ __asm__("lock btrl $31, %0" : : "m"(skip_lock_inheritance_and_n_ref));
+#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
+ _interlockedbittestandreset(
+ reinterpret_cast<volatile long *>(&skip_lock_inheritance_and_n_ref),
+ 31);
+#else
+ skip_lock_inheritance_and_n_ref.fetch_and(~1U << 31);
+#endif
+ }
+
+ /** @return whether the table has lock on
+ mysql.innodb_table_stats or mysql.innodb_index_stats */
+ bool has_stats_table_lock() const;
+
+ /** Free the memory to trx_pools */
+ void free();
+
+
+ void assert_freed() const
+ {
+ ut_ad(state == TRX_STATE_NOT_STARTED);
+ ut_ad(!id);
+ ut_ad(!mutex_is_owner());
+ ut_ad(!has_logged());
+ ut_ad(!is_referenced());
+ ut_ad(!is_wsrep());
+ ut_ad(!lock.was_chosen_as_deadlock_victim);
+ ut_ad(mod_tables.empty());
+ ut_ad(!read_view.is_open());
+ ut_ad(!lock.wait_thr);
+ ut_ad(!lock.wait_lock);
+ ut_ad(UT_LIST_GET_LEN(lock.trx_locks) == 0);
+ ut_ad(lock.table_locks.empty());
+ ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks));
+ ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0);
+ ut_ad(!dict_operation);
+ ut_ad(!apply_online_log);
+ ut_ad(!is_not_inheriting_locks());
+ ut_ad(check_foreigns);
+ ut_ad(check_unique_secondary);
+ }
+
+ /** This has to be invoked on SAVEPOINT or at the end of a statement.
+ Even if a TRX_UNDO_EMPTY record was written for this table to cover an
+ insert into an empty table, subsequent operations will have to be covered
+ by row-level undo log records, so that ROLLBACK TO SAVEPOINT or a
+ rollback to the start of a statement will work.
+ @param table table on which any preceding bulk insert ended */
+ void end_bulk_insert(const dict_table_t &table)
+ {
+ auto it= mod_tables.find(const_cast<dict_table_t*>(&table));
+ if (it != mod_tables.end())
+ it->second.end_bulk_insert();
+ }
+
+ /** @return whether this is a non-locking autocommit transaction */
+ bool is_autocommit_non_locking() const { return auto_commit && !will_lock; }
+
+ /** This has to be invoked on SAVEPOINT or at the start of a statement.
+ Even if TRX_UNDO_EMPTY records were written for any table to cover an
+ insert into an empty table, subsequent operations will have to be covered
+ by row-level undo log records, so that ROLLBACK TO SAVEPOINT or a
+ rollback to the start of a statement will work. */
+ void end_bulk_insert()
+ {
+ for (auto& t : mod_tables)
+ t.second.end_bulk_insert();
+ }
+
+ /** @return whether a bulk insert into empty table is in progress */
+ bool is_bulk_insert() const
+ {
+ if (!bulk_insert || check_unique_secondary || check_foreigns)
+ return false;
+ for (const auto& t : mod_tables)
+ if (t.second.is_bulk_insert())
+ return true;
+ return false;
+ }
+
+ /** @return logical modification time of a table only
+ if the table has bulk buffer exist in the transaction */
+ trx_mod_table_time_t *check_bulk_buffer(dict_table_t *table)
+ {
+ if (UNIV_LIKELY(!bulk_insert))
+ return nullptr;
+ ut_ad(!check_unique_secondary);
+ ut_ad(!check_foreigns);
+ auto it= mod_tables.find(table);
+ if (it == mod_tables.end() || !it->second.bulk_buffer_exist())
+ return nullptr;
+ return &it->second;
+ }
+
+ /** Do the bulk insert for the buffered insert operation
+ for the transaction.
+ @return DB_SUCCESS or error code */
+ dberr_t bulk_insert_apply()
+ {
+ return UNIV_UNLIKELY(bulk_insert) ? bulk_insert_apply_low(): DB_SUCCESS;
+ }
+
+private:
+ /** Apply the buffered bulk inserts. */
+ dberr_t bulk_insert_apply_low();
+
+ /** Assign a rollback segment for modifying temporary tables.
+ @return the assigned rollback segment */
+ trx_rseg_t *assign_temp_rseg();
+};
+
+/**
+Check if transaction is started.
+@param[in] trx Transaction whose state we need to check
+@reutrn true if transaction is in state started */
+inline bool trx_is_started(const trx_t* trx)
+{
+ return trx->state != TRX_STATE_NOT_STARTED;
+}
+
+/* Transaction isolation levels (trx->isolation_level) */
+#define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking
+ SELECTs are performed so that
+ we do not look at a possible
+ earlier version of a record;
+ thus they are not 'consistent'
+ reads under this isolation
+ level; otherwise like level
+ 2 */
+
+#define TRX_ISO_READ_COMMITTED 1 /* somewhat Oracle-like
+ isolation, except that in
+ range UPDATE and DELETE we
+ must block phantom rows
+ with next-key locks;
+ SELECT ... FOR UPDATE and ...
+ LOCK IN SHARE MODE only lock
+ the index records, NOT the
+ gaps before them, and thus
+ allow free inserting;
+ each consistent read reads its
+ own snapshot */
+
+#define TRX_ISO_REPEATABLE_READ 2 /* this is the default;
+ all consistent reads in the
+ same trx read the same
+ snapshot;
+ full next-key locking used
+ in locking reads to block
+ insertions into gaps */
+
+#define TRX_ISO_SERIALIZABLE 3 /* all plain SELECTs are
+ converted to LOCK IN SHARE
+ MODE reads */
+
+/* Treatment of duplicate values (trx->duplicates; for example, in inserts).
+Multiple flags can be combined with bitwise OR. */
+#define TRX_DUP_IGNORE 1U /* duplicate rows are to be updated */
+#define TRX_DUP_REPLACE 2U /* duplicate rows are to be replaced */
+
+
+/** Commit node states */
+enum commit_node_state {
+ COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
+ the transaction */
+ COMMIT_NODE_WAIT /*!< commit signal sent to the transaction,
+ waiting for completion */
+};
+
+/** Commit command node in a query graph */
+struct commit_node_t{
+ que_common_t common; /*!< node type: QUE_NODE_COMMIT */
+ enum commit_node_state
+ state; /*!< node execution state */
+};
+
+
+#include "trx0trx.inl"
+
+#endif