diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
commit | 3f619478f796eddbba6e39502fe941b285dd97b1 (patch) | |
tree | e2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/maria/trnman.c | |
parent | Initial commit. (diff) | |
download | mariadb-upstream.tar.xz mariadb-upstream.zip |
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/maria/trnman.c')
-rw-r--r-- | storage/maria/trnman.c | 995 |
1 files changed, 995 insertions, 0 deletions
diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c new file mode 100644 index 00000000..7cac6a2d --- /dev/null +++ b/storage/maria/trnman.c @@ -0,0 +1,995 @@ +/* Copyright (C) 2006-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#include <my_global.h> +#include <my_sys.h> +#include <m_string.h> +#include "trnman.h" +#include "ma_checkpoint.h" +#include "ma_control_file.h" + +/* + status variables: + how many trns in the active list currently, + in the committed list currently, allocated since startup. +*/ +uint trnman_active_transactions, trnman_committed_transactions, + trnman_allocated_transactions; + +#ifdef WORKAROUND_GCC_4_3_2_BUG +volatile +#endif +/* list of active transactions in the trid order */ +static TRN active_list_min, active_list_max; +/* list of committed transactions in the trid order */ +static TRN committed_list_min, committed_list_max; + +/* a counter, used to generate transaction ids */ +static TrID global_trid_generator; + +/* + The minimum existing transaction id for trnman_get_min_trid() + The default value is used when transaction manager not initialize; + Probably called from maria_chk +*/ +static TrID trid_min_read_from= MAX_TRID; + +/* the mutex for everything above */ +static mysql_mutex_t LOCK_trn_list; + +/* LIFO pool of unused TRN structured for reuse */ +static TRN *pool; + +/* a hash for committed transactions that maps trid to a TRN structure */ +static LF_HASH trid_to_trn; + +/* an array that maps short_id of an active transaction to a TRN structure */ +static TRN **short_trid_to_active_trn; + +/* locks for short_trid_to_active_trn and pool */ +static my_bool default_trnman_end_trans_hook(TRN *, my_bool, my_bool); +static void trnman_free_trn(TRN *); + +my_bool (*trnman_end_trans_hook)(TRN *, my_bool, my_bool)= + default_trnman_end_trans_hook; + +/* + Simple interface functions + QQ: if they stay so simple, should we make them inline? +*/ + +uint trnman_increment_locked_tables(TRN *trn) +{ + return trn->locked_tables++; +} + +uint trnman_has_locked_tables(TRN *trn) +{ + return trn->locked_tables; +} + +uint trnman_decrement_locked_tables(TRN *trn) +{ + return --trn->locked_tables; +} + +void trnman_reset_locked_tables(TRN *trn, uint locked_tables) +{ + trn->locked_tables= locked_tables; +} + +#ifdef EXTRA_DEBUG +uint16 trnman_get_flags(TRN *trn) +{ + return trn->flags; +} + +void trnman_set_flags(TRN *trn, uint16 flags) +{ + trn->flags= flags; +} +#endif + +/** Wake up threads waiting for this transaction */ +static void wt_thd_release_self(TRN *trn) +{ + if (trn->wt) + { + WT_RESOURCE_ID rc; + rc.type= &ma_rc_dup_unique; + rc.value= (intptr)trn; + wt_thd_release(trn->wt, & rc); + trn->wt= 0; + } +} + +static my_bool +default_trnman_end_trans_hook(TRN *trn __attribute__ ((unused)), + my_bool commit __attribute__ ((unused)), + my_bool active_transactions + __attribute__ ((unused))) +{ + return 0; +} + + +static uchar *trn_get_hash_key(const uchar *trn, size_t *len, + my_bool unused __attribute__ ((unused))) +{ + *len= sizeof(TrID); + return (uchar *) & ((*((TRN **)trn))->trid); +} + + +/** + @brief Initializes transaction manager. + + @param initial_trid Generated TrIDs will start from initial_trid+1. + + @return Operation status + @retval 0 OK + @retval !=0 Error +*/ + +int trnman_init(TrID initial_trid) +{ + DBUG_ENTER("trnman_init"); + DBUG_PRINT("enter", ("initial_trid: %lu", (ulong) initial_trid)); + + short_trid_to_active_trn= (TRN **)my_malloc(PSI_INSTRUMENT_ME, SHORT_TRID_MAX*sizeof(TRN*), + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!short_trid_to_active_trn)) + DBUG_RETURN(1); + short_trid_to_active_trn--; /* min short_id is 1 */ + + /* + Initialize lists. + active_list_max.min_read_from must be larger than any trid, + so that when an active list is empty we would could free + all committed list. + And committed_list_max itself can not be freed so + committed_list_max.commit_trid must not be smaller that + active_list_max.min_read_from + */ + + active_list_max.trid= active_list_min.trid= 0; + active_list_max.min_read_from= MAX_TRID; + active_list_max.next= active_list_min.prev= 0; + active_list_max.prev= &active_list_min; + active_list_min.next= &active_list_max; + + committed_list_max.commit_trid= MAX_TRID; + committed_list_max.next= committed_list_min.prev= 0; + committed_list_max.prev= &committed_list_min; + committed_list_min.next= &committed_list_max; + + trnman_active_transactions= 0; + trnman_committed_transactions= 0; + trnman_allocated_transactions= 0; + /* This is needed for recovery and repair */ + dummy_transaction_object.min_read_from= ~(TrID) 0; + dummy_transaction_object.first_undo_lsn= TRANSACTION_LOGGED_LONG_ID; + + pool= 0; + global_trid_generator= initial_trid; + trid_min_read_from= initial_trid; + lf_hash_init(&trid_to_trn, sizeof(TRN*), LF_HASH_UNIQUE, + 0, 0, trn_get_hash_key, 0); + DBUG_PRINT("info", ("mysql_mutex_init LOCK_trn_list")); + mysql_mutex_init(key_LOCK_trn_list, &LOCK_trn_list, MY_MUTEX_INIT_FAST); + + DBUG_RETURN(0); +} + + +/* + NOTE + this could only be called in the "idle" state - no transaction can be + running. See asserts below. +*/ +void trnman_destroy() +{ + DBUG_ENTER("trnman_destroy"); + + if (short_trid_to_active_trn == NULL) /* trnman already destroyed */ + DBUG_VOID_RETURN; + DBUG_ASSERT(trid_to_trn.count == 0); + DBUG_ASSERT(trnman_active_transactions == 0); + DBUG_ASSERT(trnman_committed_transactions == 0); + DBUG_ASSERT(active_list_max.prev == &active_list_min); + DBUG_ASSERT(active_list_min.next == &active_list_max); + DBUG_ASSERT(committed_list_max.prev == &committed_list_min); + DBUG_ASSERT(committed_list_min.next == &committed_list_max); + while (pool) + { + TRN *trn= pool; + pool= pool->next; + DBUG_ASSERT(trn->wt == NULL); + mysql_mutex_destroy(&trn->state_lock); + my_free(trn); + } + lf_hash_destroy(&trid_to_trn); + DBUG_PRINT("info", ("mysql_mutex_destroy LOCK_trn_list")); + mysql_mutex_destroy(&LOCK_trn_list); + my_free(short_trid_to_active_trn+1); + short_trid_to_active_trn= NULL; + + DBUG_VOID_RETURN; +} + + +/* + NOTE + TrID is limited to 6 bytes. Initial value of the generator + is set by the recovery code - being read from the last checkpoint + (or 1 on a first run). +*/ +static TrID new_trid() +{ + DBUG_ENTER("new_trid"); + DBUG_ASSERT(global_trid_generator < MAX_INTERNAL_TRID); + DBUG_PRINT("info", ("mysql_mutex_assert_owner LOCK_trn_list")); + mysql_mutex_assert_owner(&LOCK_trn_list); + DBUG_RETURN(++global_trid_generator); +} + +static uint get_short_trid(TRN *trn) +{ + int i= (int) ((global_trid_generator + (intptr)trn) * 312089 % + SHORT_TRID_MAX) + 1; + uint res=0; + + for ( ; !res ; i= 1) + { + for ( ; i <= SHORT_TRID_MAX; i++) /* the range is [1..SHORT_TRID_MAX] */ + { + void *tmp= NULL; + if (short_trid_to_active_trn[i] == NULL && + my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn)) + { + res= i; + break; + } + } + } + return res; +} + +/** + Allocates and initializes a new TRN object + + @note the 'wt' parameter can only be 0 in a single-threaded code (or, + generally, where threads cannot block each other), otherwise the + first call to the deadlock detector will sigsegv. +*/ + +TRN *trnman_new_trn(WT_THD *wt) +{ + int res; + TRN *trn; + union { TRN *trn; void *v; } tmp; + DBUG_ENTER("trnman_new_trn"); + + /* + we have a mutex, to do simple things under it - allocate a TRN, + increment trnman_active_transactions, set trn->min_read_from. + + Note that all the above is fast. generating short_id may be slow, + as it involves scanning a large array - so it's done outside of the + mutex. + */ + + DBUG_PRINT("info", ("mysql_mutex_lock LOCK_trn_list")); + mysql_mutex_lock(&LOCK_trn_list); + + /* Allocating a new TRN structure */ + tmp.trn= pool; + /* + Popping an unused TRN from the pool + (ABA isn't possible, we're behind a mutex + */ + while (tmp.trn && !my_atomic_casptr((void **)(char*) &pool, &tmp.v, + (void *)tmp.trn->next)) + /* no-op */; + + /* Nothing in the pool ? Allocate a new one */ + if (!(trn= tmp.trn)) + { + /* + trn should be completely initialized at create time to allow + one to keep a known state on it. + (Like redo_lns, which is assumed to be 0 at start of row handling + and reset to zero before end of row handling) + */ + trn= (TRN *)my_malloc(PSI_INSTRUMENT_ME, sizeof(TRN), MYF(MY_WME | MY_ZEROFILL)); + if (unlikely(!trn)) + { + DBUG_PRINT("info", ("mysql_mutex_unlock LOCK_trn_list")); + mysql_mutex_unlock(&LOCK_trn_list); + return 0; + } + trnman_allocated_transactions++; + mysql_mutex_init(key_TRN_state_lock, &trn->state_lock, MY_MUTEX_INIT_FAST); + } + trn->wt= wt; + trn->pins= lf_hash_get_pins(&trid_to_trn); + if (!trn->pins) + { + trnman_free_trn(trn); + mysql_mutex_unlock(&LOCK_trn_list); + return 0; + } + + trnman_active_transactions++; + + trn->min_read_from= active_list_min.next->trid; + + trn->trid= new_trid(); + + trn->next= &active_list_max; + trn->prev= active_list_max.prev; + active_list_max.prev= trn->prev->next= trn; + trid_min_read_from= active_list_min.next->min_read_from; + DBUG_PRINT("info", ("mysql_mutex_unlock LOCK_trn_list")); + mysql_mutex_unlock(&LOCK_trn_list); + + if (unlikely(!trn->min_read_from)) + { + /* + We are the only transaction. Set min_read_from so that we can read + our own rows + */ + trn->min_read_from= trn->trid + 1; + } + + /* no other transaction can read changes done by this one */ + trn->commit_trid= MAX_TRID; + trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0; + trn->used_tables= 0; + trn->used_instances= 0; + + trn->locked_tables= 0; + trn->flags= 0; + + /* + only after the following function TRN is considered initialized, + so it must be done the last + */ + mysql_mutex_lock(&trn->state_lock); + trn->short_id= get_short_trid(trn); + mysql_mutex_unlock(&trn->state_lock); + + res= lf_hash_insert(&trid_to_trn, trn->pins, &trn); + DBUG_ASSERT(res <= 0); + if (res) + { + trnman_end_trn(trn, 0); + return 0; + } + + DBUG_PRINT("exit", ("trn: %p trid: 0x%lu min_read_from: 0x%lu", + trn, (ulong) trn->trid, (ulong) trn->min_read_from)); + + DBUG_RETURN(trn); +} + + +/* + Initialize a temporary TRN object for logging a new transaction id (trid) + to it. Used by create table to associate a create trid to the table. + + Out: trn->trid is updated with next available trid +*/ + +void trnman_init_tmp_trn_for_logging_trid(TRN *trn) +{ + *trn= dummy_transaction_object; + /* Avoid logging short_id */ + trn->short_id= 1; + /* Trid gets logged in translog_write_record */ + trn->first_undo_lsn= 0; + /* Get next free trid */ + trn->trid= trnman_get_min_safe_trid(); +} + + +/* + remove a trn from the active list. + if necessary - move to committed list and set commit_trid + + NOTE + Locks are released at the end. In particular, after placing the + transaction in commit list, and after setting commit_trid. It's + important, as commit_trid affects visibility. Locks don't affect + anything they simply delay execution of other threads - they could be + released arbitrarily late. In other words, when locks are released it + serves as a start banner for other threads, they start to run. So + everything they may need must be ready at that point. + + RETURN + 0 ok + 1 error +*/ +my_bool trnman_end_trn(TRN *trn, my_bool commit) +{ + int res= 1; + uint16 cached_short_id= trn->short_id; /* we have to cache it, see below */ + TRN *free_me= 0; + LF_PINS *pins= trn->pins; + DBUG_ENTER("trnman_end_trn"); + DBUG_PRINT("enter", ("trn: %p commit: %d", trn, commit)); + + /* if a rollback, all UNDO records should have been executed */ + DBUG_ASSERT(commit || trn->undo_lsn == 0); + DBUG_ASSERT(trn != &dummy_transaction_object); + DBUG_ASSERT(trn->locked_tables == 0 && trn->used_instances == 0); + DBUG_PRINT("info", ("mysql_mutex_lock LOCK_trn_list")); + + mysql_mutex_lock(&LOCK_trn_list); + + /* remove from active list */ + trn->next->prev= trn->prev; + trn->prev->next= trn->next; + + /* + if trn was the oldest active transaction, now that it goes away there + may be committed transactions in the list which no active transaction + needs to bother about - clean up the committed list + */ + if (trn->prev == &active_list_min) + { + uint free_me_count; + TRN *t; + for (t= committed_list_min.next, free_me_count= 0; + t->commit_trid < active_list_min.next->min_read_from; + t= t->next, free_me_count++) /* no-op */; + + DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) || + (t == committed_list_min.next && free_me_count == 0)); + /* found transactions committed before the oldest active one */ + if (t != committed_list_min.next) + { + free_me= committed_list_min.next; + committed_list_min.next= t; + t->prev->next= 0; + t->prev= &committed_list_min; + trnman_committed_transactions-= free_me_count; + } + } + + mysql_mutex_lock(&trn->state_lock); + if (commit) + trn->commit_trid= global_trid_generator; + wt_thd_release_self(trn); + mysql_mutex_unlock(&trn->state_lock); + + /* + if transaction is committed and it was not the only active transaction - + add it to the committed list + */ + if (commit && active_list_min.next != &active_list_max) + { + trn->next= &committed_list_max; + trn->prev= committed_list_max.prev; + trnman_committed_transactions++; + committed_list_max.prev= trn->prev->next= trn; + } + else + { + trn->next= free_me; + free_me= trn; + } + trid_min_read_from= active_list_min.next->min_read_from; + + if ((*trnman_end_trans_hook)(trn, commit, + active_list_min.next != &active_list_max)) + res= -1; + trnman_active_transactions--; + + DBUG_PRINT("info", ("mysql_mutex_unlock LOCK_trn_list")); + mysql_mutex_unlock(&LOCK_trn_list); + + /* + the rest is done outside of a critical section + + note that we don't own trn anymore, it may be in a shared list now. + Thus, we cannot dereference it, and must use cached_short_id below. + */ + my_atomic_storeptr((void **)&short_trid_to_active_trn[cached_short_id], 0); + + /* + we, under the mutex, removed going-in-free_me transactions from the + active and committed lists, thus nobody else may see them when it scans + those lists, and thus nobody may want to free them. Now we don't + need a mutex to access free_me list + */ + /* QQ: send them to the purge thread */ + while (free_me) + { + TRN *t= free_me; + free_me= free_me->next; + + /* ignore OOM. it's harmless, and we can do nothing here anyway */ + (void)lf_hash_delete(&trid_to_trn, pins, &t->trid, sizeof(TrID)); + + trnman_free_trn(t); + } + + lf_hash_put_pins(pins); + + DBUG_RETURN(res < 0); +} + +/* + free a trn (add to the pool, that is) + note - we can never really free() a TRN if there's at least one other + running transaction - see, e.g., how lock waits are implemented in + lockman.c + The same is true for other lock-free data structures too. We may need some + kind of FLUSH command to reset them all - ensuring that no transactions are + running. It may even be called automatically on checkpoints if no + transactions are running. +*/ +static void trnman_free_trn(TRN *trn) +{ + /* + union is to solve strict aliasing issue. + without it gcc 3.4.3 doesn't notice that updating *(void **)&tmp + modifies the value of tmp. + */ + union { TRN *trn; void *v; } tmp; + + DBUG_ASSERT(trn != &dummy_transaction_object); + + mysql_mutex_lock(&trn->state_lock); + trn->short_id= 0; + mysql_mutex_unlock(&trn->state_lock); + + tmp.trn= pool; + + do + { + /* + without this volatile cast gcc-3.4.4 moves the assignment + down after the loop at -O2 + */ + *(TRN * volatile *)&(trn->next)= tmp.trn; + } while (!my_atomic_casptr((void **)(char*)&pool, &tmp.v, trn)); +} + +/* + NOTE + here we access the hash in a lock-free manner. + It's safe, a 'found' TRN can never be freed/reused before we access it. + In fact, it cannot be freed before 'trn' ends, because a 'found' TRN + can only be removed from the hash when: + found->commit_trid < ALL (trn->min_read_from) + that is, at least + found->commit_trid < trn->min_read_from + but + found->trid >= trn->min_read_from + and + found->commit_trid > found->trid + + RETURN + 1 can + 0 cannot + -1 error (OOM) +*/ +int trnman_can_read_from(TRN *trn, TrID trid) +{ + TRN **found; + my_bool can; + + if (trid < trn->min_read_from) + return 1; /* Row is visible by all transactions in the system */ + + if (trid >= trn->trid) + { + /* + We have now two cases + trid > trn->trid, in which case the row is from a new transaction + and not visible, in which case we should return 0. + trid == trn->trid in which case the row is from the current transaction + and we should return 1 + */ + return trid == trn->trid; + } + + found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid)); + if (found == NULL) + return 0; /* not in the hash of transactions = cannot read */ + if (found == MY_ERRPTR) + return -1; + + can= (*found)->commit_trid < trn->trid; + lf_hash_search_unpin(trn->pins); + return can; +} + +/** + Finds a TRN by its TrID + + @param trn current trn. Needed for pinning pointers (see lf_pin) + @param trid trid to search for + + @return found trn or 0 + + @note that trn is returned with its state locked! +*/ +TRN *trnman_trid_to_trn(TRN *trn, TrID trid) +{ + TRN **found; + + if (trid < trn->min_read_from) + return 0; /* it's committed eons ago */ + + found= lf_hash_search(&trid_to_trn, trn->pins, &trid, sizeof(trid)); + if (found == NULL || found == MY_ERRPTR) + return 0; /* no luck */ + + /* we've found something */ + mysql_mutex_lock(&(*found)->state_lock); + + if ((*found)->short_id == 0) + { + mysql_mutex_unlock(&(*found)->state_lock); + lf_hash_search_unpin(trn->pins); + return 0; /* but it was a ghost */ + } + lf_hash_search_unpin(trn->pins); + + /* Gotcha! */ + return *found; +} + +/* TODO: the stubs below are waiting for savepoints to be implemented */ + +void trnman_new_statement(TRN *trn __attribute__ ((unused))) +{ +} + +void trnman_rollback_statement(TRN *trn __attribute__ ((unused))) +{ +} + + +/** + @brief Allocates buffers and stores in them some info about transactions + + Does the allocation because the caller cannot know the size itself. + Memory freeing is to be done by the caller (if the "str" member of the + LEX_STRING is not NULL). + The caller has the intention of doing checkpoints. + + @param[out] str_act pointer to where the allocated buffer, + and its size, will be put; buffer will be filled + with info about active transactions + @param[out] str_com pointer to where the allocated buffer, + and its size, will be put; buffer will be filled + with info about committed transactions + @param[out] min_first_undo_lsn pointer to where the minimum + first_undo_lsn of all transactions will be put + + @return Operation status + @retval 0 OK + @retval 1 Error +*/ + +my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com, + LSN *min_rec_lsn, LSN *min_first_undo_lsn) +{ + my_bool error; + TRN *trn; + char *ptr; + uint stored_transactions= 0; + LSN minimum_rec_lsn= LSN_MAX, minimum_first_undo_lsn= LSN_MAX; + DBUG_ENTER("trnman_collect_transactions"); + + DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str)); + + /* validate the use of read_non_atomic() in general: */ + compile_time_assert((sizeof(LSN) == 8) && (sizeof(LSN_WITH_FLAGS) == 8)); + mysql_mutex_lock(&LOCK_trn_list); + str_act->length= 2 + /* number of active transactions */ + LSN_STORE_SIZE + /* minimum of their rec_lsn */ + TRANSID_SIZE + /* current TrID generator value */ + (2 + /* short id */ + 6 + /* long id */ + LSN_STORE_SIZE + /* undo_lsn */ +#ifdef MARIA_VERSIONING /* not enabled yet */ + LSN_STORE_SIZE + /* undo_purge_lsn */ +#endif + LSN_STORE_SIZE /* first_undo_lsn */ + ) * trnman_active_transactions; + str_com->length= 4 + /* number of committed transactions */ + (6 + /* long id */ +#ifdef MARIA_VERSIONING /* not enabled yet */ + LSN_STORE_SIZE + /* undo_purge_lsn */ +#endif + LSN_STORE_SIZE /* first_undo_lsn */ + ) * trnman_committed_transactions; + if ((NULL == (str_act->str= my_malloc(PSI_INSTRUMENT_ME, str_act->length, MYF(MY_WME)))) || + (NULL == (str_com->str= my_malloc(PSI_INSTRUMENT_ME, str_com->length, MYF(MY_WME))))) + goto err; + /* First, the active transactions */ + ptr= str_act->str + 2 + LSN_STORE_SIZE; + transid_store(ptr, global_trid_generator); + ptr+= TRANSID_SIZE; + for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) + { + uint sid; + LSN rec_lsn, undo_lsn, first_undo_lsn; + mysql_mutex_lock(&trn->state_lock); + sid= trn->short_id; + mysql_mutex_unlock(&trn->state_lock); + if (sid == 0) + { + /* + Not even inited, has done nothing. Or it is the + dummy_transaction_object, which does only non-transactional + immediate-sync operations (CREATE/DROP/RENAME/REPAIR TABLE), and so + can be forgotten for Checkpoint. + */ + continue; + } + /* needed for low-water mark calculation */ + if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) && + (cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0)) + minimum_rec_lsn= rec_lsn; + /* + If trn has not logged LOGREC_LONG_TRANSACTION_ID, this trn will be + discovered when seeing that log record which is for sure located after + checkpoint_start_log_horizon. + */ + if ((LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn) & + TRANSACTION_LOGGED_LONG_ID) == 0) + continue; + /* + On the other hand, if undo_lsn is LSN_IMPOSSIBLE, trn may later log + records; so we must include trn in the checkpoint now, because we cannot + count on LOGREC_LONG_TRANSACTION_ID (as we are already past it). + */ + undo_lsn= trn->undo_lsn; + stored_transactions++; + int2store(ptr, sid); + ptr+= 2; + int6store(ptr, trn->trid); + ptr+= 6; + lsn_store(ptr, undo_lsn); /* needed for rollback */ + ptr+= LSN_STORE_SIZE; + /* needed for low-water mark calculation */ + if (((first_undo_lsn= lsn_read_non_atomic(trn->first_undo_lsn)) > 0) && + (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0)) + minimum_first_undo_lsn= first_undo_lsn; + lsn_store(ptr, first_undo_lsn); + ptr+= LSN_STORE_SIZE; +#ifdef MARIA_VERSIONING /* not enabled yet */ + /* to know where purging should start (last delete of this trn) */ + lsn_store(ptr, trn->undo_purge_lsn); + ptr+= LSN_STORE_SIZE; +#endif + /** + @todo RECOVERY: add a comment explaining why we can dirtily read some + vars, inspired by the text of "assumption 8" in WL#3072 + */ + } + str_act->length= ptr - str_act->str; /* as we maybe over-estimated */ + ptr= str_act->str; + DBUG_PRINT("info",("collected %u active transactions", + (uint)stored_transactions)); + int2store(ptr, stored_transactions); + ptr+= 2; + /* this LSN influences how REDOs for any page can be ignored by Recovery */ + lsn_store(ptr, minimum_rec_lsn); + /* one day there will also be a list of prepared transactions */ + /* do the same for committed ones */ + ptr= str_com->str; + int4store(ptr, trnman_committed_transactions); + ptr+= 4; + DBUG_PRINT("info",("collected %u committed transactions", + (uint)trnman_committed_transactions)); + for (trn= committed_list_min.next; trn != &committed_list_max; + trn= trn->next) + { + LSN first_undo_lsn; + int6store(ptr, trn->trid); + ptr+= 6; +#ifdef MARIA_VERSIONING /* not enabled yet */ + lsn_store(ptr, trn->undo_purge_lsn); + ptr+= LSN_STORE_SIZE; +#endif + first_undo_lsn= LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn); + if (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0) + minimum_first_undo_lsn= first_undo_lsn; + lsn_store(ptr, first_undo_lsn); + ptr+= LSN_STORE_SIZE; + } + /* + TODO: if we see there exists no transaction (active and committed) we can + tell the lock-free structures to do some freeing (my_free()). + */ + error= 0; + *min_rec_lsn= minimum_rec_lsn; + *min_first_undo_lsn= minimum_first_undo_lsn; + goto end; +err: + error= 1; +end: + mysql_mutex_unlock(&LOCK_trn_list); + DBUG_RETURN(error); +} + + +TRN *trnman_recreate_trn_from_recovery(uint16 shortid, TrID longid) +{ + TrID old_trid_generator= global_trid_generator; + TRN *trn; + DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded); + global_trid_generator= longid-1; /* force a correct trid in the new trn */ + if (unlikely((trn= trnman_new_trn(NULL)) == NULL)) + return NULL; + /* deallocate excessive allocations of trnman_new_trn() */ + global_trid_generator= old_trid_generator; + set_if_bigger(global_trid_generator, longid); + short_trid_to_active_trn[trn->short_id]= 0; + DBUG_ASSERT(short_trid_to_active_trn[shortid] == NULL); + short_trid_to_active_trn[shortid]= trn; + trn->short_id= shortid; + return trn; +} + + +TRN *trnman_get_any_trn() +{ + TRN *trn= active_list_min.next; + return (trn != &active_list_max) ? trn : NULL; +} + + +/** + Returns the minimum existing transaction id. May return a too small + number in race conditions, but this is ok as the value is used to + remove not visible transid from index/rows. +*/ + +TrID trnman_get_min_trid() +{ + return trid_min_read_from; +} + + +/** + Returns the minimum possible transaction id + + @notes + If there is no transactions running, returns number for next running + transaction. + If one has an active transaction, the returned number will be less or + equal to this. If one is not running in a transaction one will ge the + number for the next started transaction. This is used in create table + to get a safe minimum trid to use. +*/ + +TrID trnman_get_min_safe_trid() +{ + TrID trid; + mysql_mutex_lock(&LOCK_trn_list); + trid= MY_MIN(active_list_min.next->min_read_from, + global_trid_generator); + mysql_mutex_unlock(&LOCK_trn_list); + return trid; +} + + +/** + Returns maximum transaction id given to a transaction so far. +*/ + +TrID trnman_get_max_trid() +{ + TrID id; + /* Check if trnman has been initalized */ + if (short_trid_to_active_trn == NULL) + return 0; + mysql_mutex_lock(&LOCK_trn_list); + id= global_trid_generator; + mysql_mutex_unlock(&LOCK_trn_list); + return id; +} + +/** + @brief Check if there exist an active transaction between two commit_id's + + @todo + Improve speed of this. + - Store transactions in tree or skip list + - Have function to copying all active transaction id's to b-tree + and use b-tree for checking states. This could be a big win + for checkpoint that will call this function for a lot of objects. + + @return + 0 No transaction exists + 1 There is at least on active transaction in the given range +*/ + +my_bool trnman_exists_active_transactions(TrID min_id, TrID max_id, + my_bool trnman_is_locked) +{ + TRN *trn; + my_bool ret= 0; + + if (!trnman_is_locked) + mysql_mutex_lock(&LOCK_trn_list); + mysql_mutex_assert_owner(&LOCK_trn_list); + for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) + { + /* + We use <= for max_id as max_id is a commit_trid and trn->trid + is transaction id. When calculating commit_trid we use the + current value of global_trid_generator. global_trid_generator is + incremented for each new transaction. + + For example, assuming we have + min_id = 5 + max_id = 10 + + A trid of value 5 can't see the history event between 5 & 10 + at it vas started before min_id 5 was committed. + A trid of value 10 can't see the next history event (max_id = 10) + as it started before this was committed. In this case it must use + the this event. + */ + if (trn->trid > min_id && trn->trid <= max_id) + { + ret= 1; + break; + } + } + if (!trnman_is_locked) + mysql_mutex_unlock(&LOCK_trn_list); + return ret; +} + + +/** + lock transaction list +*/ + +void trnman_lock() +{ + mysql_mutex_lock(&LOCK_trn_list); +} + + +/** + unlock transaction list +*/ + +void trnman_unlock() +{ + mysql_mutex_unlock(&LOCK_trn_list); +} + + +/** + Is trman initialized +*/ + +my_bool trman_is_inited() +{ + return (short_trid_to_active_trn != NULL); +} |