summaryrefslogtreecommitdiffstats
path: root/sql/table_cache.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:24:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:24:36 +0000
commit06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch)
treee2c7b5777f728320e5b5542b6213fd3591ba51e2 /sql/table_cache.cc
parentInitial commit. (diff)
downloadmariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz
mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip
Adding upstream version 1:10.11.6.upstream/1%10.11.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sql/table_cache.cc')
-rw-r--r--sql/table_cache.cc1315
1 files changed, 1315 insertions, 0 deletions
diff --git a/sql/table_cache.cc b/sql/table_cache.cc
new file mode 100644
index 00000000..0039c96a
--- /dev/null
+++ b/sql/table_cache.cc
@@ -0,0 +1,1315 @@
+/* Copyright (c) 2000, 2012, Oracle and/or its affiliates.
+ Copyright (c) 2010, 2022, MariaDB Corporation.
+ Copyright (C) 2013 Sergey Vojtovich and MariaDB Foundation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/**
+ @file
+ Table definition cache and table cache implementation.
+
+ Table definition cache actions:
+ - add new TABLE_SHARE object to cache (tdc_acquire_share())
+ - acquire TABLE_SHARE object from cache (tdc_acquire_share())
+ - release TABLE_SHARE object to cache (tdc_release_share())
+ - purge unused TABLE_SHARE objects from cache (tdc_purge())
+ - remove TABLE_SHARE object from cache (tdc_remove_table())
+ - get number of TABLE_SHARE objects in cache (tdc_records())
+
+ Table cache actions:
+ - add new TABLE object to cache (tc_add_table())
+ - acquire TABLE object from cache (tc_acquire_table())
+ - release TABLE object to cache (tc_release_table())
+ - purge unused TABLE objects from cache (tc_purge())
+ - purge unused TABLE objects of a table from cache (tdc_remove_table())
+ - get number of TABLE objects in cache (tc_records())
+
+ Dependencies:
+ - close_cached_tables(): flush tables on shutdown
+ - alloc_table_share()
+ - free_table_share()
+
+ Table cache invariants:
+ - TABLE_SHARE::free_tables shall not contain objects with TABLE::in_use != 0
+ - TABLE_SHARE::free_tables shall not receive new objects if
+ TABLE_SHARE::tdc.flushed is true
+*/
+
+#include "mariadb.h"
+#include "lf.h"
+#include "table.h"
+#include "sql_base.h"
+#include "aligned.h"
+
+
+/** Configuration. */
+ulong tdc_size; /**< Table definition cache threshold for LRU eviction. */
+ulong tc_size; /**< Table cache threshold for LRU eviction. */
+uint32 tc_instances;
+static size_t tc_allocated_size;
+static std::atomic<uint32_t> tc_active_instances(1);
+static std::atomic<bool> tc_contention_warning_reported;
+
+/** Data collections. */
+static LF_HASH tdc_hash; /**< Collection of TABLE_SHARE objects. */
+/** Collection of unused TABLE_SHARE objects. */
+static
+I_P_List <TDC_element,
+ I_P_List_adapter<TDC_element, &TDC_element::next, &TDC_element::prev>,
+ I_P_List_null_counter,
+ I_P_List_fast_push_back<TDC_element> > unused_shares;
+
+static bool tdc_inited;
+
+
+/**
+ Protects unused shares list.
+
+ TDC_element::prev
+ TDC_element::next
+ unused_shares
+*/
+
+static mysql_mutex_t LOCK_unused_shares;
+
+#ifdef HAVE_PSI_INTERFACE
+static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share,
+ key_LOCK_table_cache;
+static PSI_mutex_info all_tc_mutexes[]=
+{
+ { &key_LOCK_unused_shares, "LOCK_unused_shares", PSI_FLAG_GLOBAL },
+ { &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 },
+ { &key_LOCK_table_cache, "LOCK_table_cache", 0 }
+};
+
+static PSI_cond_key key_TABLE_SHARE_COND_release;
+static PSI_cond_info all_tc_conds[]=
+{
+ { &key_TABLE_SHARE_COND_release, "TABLE_SHARE::tdc.COND_release", 0 }
+};
+#endif
+
+
+static int fix_thd_pins(THD *thd)
+{
+ return thd->tdc_hash_pins ? 0 :
+ (thd->tdc_hash_pins= lf_hash_get_pins(&tdc_hash)) == 0;
+}
+
+
+/*
+ Auxiliary routines for manipulating with per-share all/unused lists
+ and tc_count counter.
+ Responsible for preserving invariants between those lists, counter
+ and TABLE::in_use member.
+ In fact those routines implement sort of implicit table cache as
+ part of table definition cache.
+*/
+
+struct Table_cache_instance
+{
+ /**
+ Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev),
+ records, Share_free_tables::List (TABLE::prev and TABLE::next),
+ TABLE::in_use.
+ */
+ alignas(CPU_LEVEL1_DCACHE_LINESIZE)
+ mysql_mutex_t LOCK_table_cache;
+ I_P_List <TABLE, I_P_List_adapter<TABLE, &TABLE::global_free_next,
+ &TABLE::global_free_prev>,
+ I_P_List_null_counter, I_P_List_fast_push_back<TABLE> >
+ free_tables;
+ ulong records;
+ uint mutex_waits;
+ uint mutex_nowaits;
+
+ Table_cache_instance(): records(0), mutex_waits(0), mutex_nowaits(0)
+ {
+ static_assert(!(sizeof(*this) % CPU_LEVEL1_DCACHE_LINESIZE), "alignment");
+ mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache,
+ MY_MUTEX_INIT_FAST);
+ }
+
+ ~Table_cache_instance()
+ {
+ mysql_mutex_destroy(&LOCK_table_cache);
+ DBUG_ASSERT(free_tables.is_empty());
+ DBUG_ASSERT(records == 0);
+ }
+
+ static void *operator new[](size_t size)
+ { return aligned_malloc(size, CPU_LEVEL1_DCACHE_LINESIZE); }
+ static void operator delete[](void *ptr) { aligned_free(ptr); }
+ static void mark_memory_freed()
+ {
+ update_malloc_size(-(longlong) tc_allocated_size, 0);
+ }
+
+ /**
+ Lock table cache mutex and check contention.
+
+ Instance is considered contested if more than 20% of mutex acquisiotions
+ can't be served immediately. Up to 100 000 probes may be performed to avoid
+ instance activation on short sporadic peaks. 100 000 is estimated maximum
+ number of queries one instance can serve in one second.
+
+ These numbers work well on a 2 socket / 20 core / 40 threads Intel Broadwell
+ system, that is expected number of instances is activated within reasonable
+ warmup time. It may have to be adjusted for other systems.
+
+ Only TABLE object acquistion is instrumented. We intentionally avoid this
+ overhead on TABLE object release. All other table cache mutex acquistions
+ are considered out of hot path and are not instrumented either.
+ */
+ void lock_and_check_contention(uint32_t n_instances, uint32_t instance)
+ {
+ if (mysql_mutex_trylock(&LOCK_table_cache))
+ {
+ mysql_mutex_lock(&LOCK_table_cache);
+ if (++mutex_waits == 20000)
+ {
+ if (n_instances < tc_instances)
+ {
+ if (tc_active_instances.
+ compare_exchange_weak(n_instances, n_instances + 1,
+ std::memory_order_relaxed,
+ std::memory_order_relaxed))
+ {
+ sql_print_information("Detected table cache mutex contention at instance %d: "
+ "%d%% waits. Additional table cache instance "
+ "activated. Number of instances after "
+ "activation: %d.",
+ instance + 1,
+ mutex_waits * 100 / (mutex_nowaits + mutex_waits),
+ n_instances + 1);
+ }
+ }
+ else if (!tc_contention_warning_reported.exchange(true,
+ std::memory_order_relaxed))
+ {
+ sql_print_warning("Detected table cache mutex contention at instance %d: "
+ "%d%% waits. Additional table cache instance "
+ "cannot be activated: consider raising "
+ "table_open_cache_instances. Number of active "
+ "instances: %d.",
+ instance + 1,
+ mutex_waits * 100 / (mutex_nowaits + mutex_waits),
+ n_instances);
+ }
+ mutex_waits= 0;
+ mutex_nowaits= 0;
+ }
+ }
+ else if (++mutex_nowaits == 80000)
+ {
+ mutex_waits= 0;
+ mutex_nowaits= 0;
+ }
+ }
+};
+
+
+static Table_cache_instance *tc;
+
+
+static void intern_close_table(TABLE *table)
+{
+ delete table->triggers;
+ DBUG_ASSERT(table->file);
+ closefrm(table);
+ tdc_release_share(table->s);
+ my_free(table);
+}
+
+
+/**
+ Get number of TABLE objects (used and unused) in table cache.
+*/
+
+uint tc_records(void)
+{
+ ulong total= 0;
+ for (uint32 i= 0; i < tc_instances; i++)
+ {
+ mysql_mutex_lock(&tc[i].LOCK_table_cache);
+ total+= tc[i].records;
+ mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+ }
+ return total;
+}
+
+
+/**
+ Remove TABLE object from table cache.
+*/
+
+static void tc_remove_table(TABLE *table)
+{
+ TDC_element *element= table->s->tdc;
+
+ mysql_mutex_lock(&element->LOCK_table_share);
+ /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
+ while (element->all_tables_refs)
+ mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
+ element->all_tables.remove(table);
+ mysql_mutex_unlock(&element->LOCK_table_share);
+
+ intern_close_table(table);
+}
+
+
+static void tc_remove_all_unused_tables(TDC_element *element,
+ Share_free_tables::List *purge_tables)
+{
+ for (uint32 i= 0; i < tc_instances; i++)
+ {
+ mysql_mutex_lock(&tc[i].LOCK_table_cache);
+ while (auto table= element->free_tables[i].list.pop_front())
+ {
+ tc[i].records--;
+ tc[i].free_tables.remove(table);
+ DBUG_ASSERT(element->all_tables_refs == 0);
+ element->all_tables.remove(table);
+ purge_tables->push_front(table);
+ }
+ mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+ }
+}
+
+
+/**
+ Free all unused TABLE objects.
+
+ While locked:
+ - remove unused objects from TABLE_SHARE::tdc.free_tables and
+ TABLE_SHARE::tdc.all_tables
+ - decrement tc_count
+
+ While unlocked:
+ - free resources related to unused objects
+
+ @note This is called by 'handle_manager' when one wants to
+ periodicly flush all not used tables.
+*/
+
+static my_bool tc_purge_callback(TDC_element *element,
+ Share_free_tables::List *purge_tables)
+{
+ mysql_mutex_lock(&element->LOCK_table_share);
+ tc_remove_all_unused_tables(element, purge_tables);
+ mysql_mutex_unlock(&element->LOCK_table_share);
+ return FALSE;
+}
+
+
+void tc_purge()
+{
+ Share_free_tables::List purge_tables;
+
+ tdc_iterate(0, (my_hash_walk_action) tc_purge_callback, &purge_tables);
+ while (auto table= purge_tables.pop_front())
+ intern_close_table(table);
+}
+
+
+/**
+ Add new TABLE object to table cache.
+
+ @pre TABLE object is used by caller.
+
+ Added object cannot be evicted or acquired.
+
+ While locked:
+ - add object to TABLE_SHARE::tdc.all_tables
+ - increment tc_count
+ - evict LRU object from table cache if we reached threshold
+
+ While unlocked:
+ - free evicted object
+*/
+
+void tc_add_table(THD *thd, TABLE *table)
+{
+ uint32_t i=
+ thd->thread_id % tc_active_instances.load(std::memory_order_relaxed);
+ TABLE *LRU_table= 0;
+ TDC_element *element= table->s->tdc;
+
+ DBUG_ASSERT(table->in_use == thd);
+ table->instance= i;
+ mysql_mutex_lock(&element->LOCK_table_share);
+ /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
+ while (element->all_tables_refs)
+ mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
+ element->all_tables.push_front(table);
+ mysql_mutex_unlock(&element->LOCK_table_share);
+
+ mysql_mutex_lock(&tc[i].LOCK_table_cache);
+ if (tc[i].records == tc_size)
+ {
+ if ((LRU_table= tc[i].free_tables.pop_front()))
+ {
+ LRU_table->s->tdc->free_tables[i].list.remove(LRU_table);
+ /* Needed if MDL deadlock detector chimes in before tc_remove_table() */
+ LRU_table->in_use= thd;
+ mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+ /* Keep out of locked LOCK_table_cache */
+ tc_remove_table(LRU_table);
+ }
+ else
+ {
+ tc[i].records++;
+ mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+ }
+ /* Keep out of locked LOCK_table_cache */
+ status_var_increment(thd->status_var.table_open_cache_overflows);
+ }
+ else
+ {
+ tc[i].records++;
+ mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+ }
+}
+
+
+/**
+ Acquire TABLE object from table cache.
+
+ @pre share must be protected against removal.
+
+ Acquired object cannot be evicted or acquired again.
+
+ @return TABLE object, or NULL if no unused objects.
+*/
+
+TABLE *tc_acquire_table(THD *thd, TDC_element *element)
+{
+ uint32_t n_instances= tc_active_instances.load(std::memory_order_relaxed);
+ uint32_t i= thd->thread_id % n_instances;
+ TABLE *table;
+
+ tc[i].lock_and_check_contention(n_instances, i);
+ table= element->free_tables[i].list.pop_front();
+ if (table)
+ {
+ DBUG_ASSERT(!table->in_use);
+ table->in_use= thd;
+ /* The ex-unused table must be fully functional. */
+ DBUG_ASSERT(table->db_stat && table->file);
+ /* The children must be detached from the table. */
+ DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
+ tc[i].free_tables.remove(table);
+ }
+ mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+ return table;
+}
+
+
+/**
+ Release TABLE object to table cache.
+
+ @pre object is used by caller.
+
+ Released object may be evicted or acquired again.
+
+ While locked:
+ - if object is marked for purge, decrement tc_count
+ - add object to TABLE_SHARE::tdc.free_tables
+ - evict LRU object from table cache if we reached threshold
+
+ While unlocked:
+ - mark object not in use by any thread
+ - free evicted/purged object
+
+ @note Another thread may mark share for purge any moment (even
+ after version check). It means to-be-purged object may go to
+ unused lists. This other thread is expected to call tc_purge(),
+ which is synchronized with us on TABLE_SHARE::tdc.LOCK_table_share.
+
+ @return
+ @retval true object purged
+ @retval false object released
+*/
+
+void tc_release_table(TABLE *table)
+{
+ uint32 i= table->instance;
+ DBUG_ENTER("tc_release_table");
+ DBUG_ASSERT(table->in_use);
+ DBUG_ASSERT(table->file);
+ DBUG_ASSERT(!table->pos_in_locked_tables);
+
+ mysql_mutex_lock(&tc[i].LOCK_table_cache);
+ if (table->needs_reopen() || table->s->tdc->flushed ||
+ tc[i].records > tc_size)
+ {
+ tc[i].records--;
+ mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+ tc_remove_table(table);
+ }
+ else
+ {
+ table->in_use= 0;
+ table->s->tdc->free_tables[i].list.push_front(table);
+ tc[i].free_tables.push_back(table);
+ mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+static void tdc_assert_clean_share(TDC_element *element)
+{
+ DBUG_ASSERT(element->share == 0);
+ DBUG_ASSERT(element->ref_count == 0);
+ DBUG_ASSERT(element->m_flush_tickets.is_empty());
+ DBUG_ASSERT(element->all_tables.is_empty());
+#ifndef DBUG_OFF
+ for (uint32 i= 0; i < tc_instances; i++)
+ DBUG_ASSERT(element->free_tables[i].list.is_empty());
+#endif
+ DBUG_ASSERT(element->all_tables_refs == 0);
+ DBUG_ASSERT(element->next == 0);
+ DBUG_ASSERT(element->prev == 0);
+}
+
+
+/**
+ Delete share from hash and free share object.
+*/
+
+static void tdc_delete_share_from_hash(TDC_element *element)
+{
+ THD *thd= current_thd;
+ LF_PINS *pins;
+ TABLE_SHARE *share;
+ DBUG_ENTER("tdc_delete_share_from_hash");
+
+ mysql_mutex_assert_owner(&element->LOCK_table_share);
+ share= element->share;
+ DBUG_ASSERT(share);
+ element->share= 0;
+ PSI_CALL_release_table_share(share->m_psi);
+ share->m_psi= 0;
+
+ if (!element->m_flush_tickets.is_empty())
+ {
+ Wait_for_flush_list::Iterator it(element->m_flush_tickets);
+ Wait_for_flush *ticket;
+ while ((ticket= it++))
+ (void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED);
+
+ do
+ {
+ mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
+ } while (!element->m_flush_tickets.is_empty());
+ }
+
+ mysql_mutex_unlock(&element->LOCK_table_share);
+
+ if (thd)
+ {
+ fix_thd_pins(thd);
+ pins= thd->tdc_hash_pins;
+ }
+ else
+ pins= lf_hash_get_pins(&tdc_hash);
+
+ DBUG_ASSERT(pins); // What can we do about it?
+ tdc_assert_clean_share(element);
+ lf_hash_delete(&tdc_hash, pins, element->m_key, element->m_key_length);
+ if (!thd)
+ lf_hash_put_pins(pins);
+ free_table_share(share);
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ Prepeare table share for use with table definition cache.
+*/
+
+static void lf_alloc_constructor(uchar *arg)
+{
+ TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
+ DBUG_ENTER("lf_alloc_constructor");
+ mysql_mutex_init(key_TABLE_SHARE_LOCK_table_share,
+ &element->LOCK_table_share, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_TABLE_SHARE_COND_release, &element->COND_release, 0);
+ element->m_flush_tickets.empty();
+ element->all_tables.empty();
+ for (uint32 i= 0; i < tc_instances; i++)
+ element->free_tables[i].list.empty();
+ element->all_tables_refs= 0;
+ element->share= 0;
+ element->ref_count= 0;
+ element->next= 0;
+ element->prev= 0;
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ Release table definition cache specific resources of table share.
+*/
+
+static void lf_alloc_destructor(uchar *arg)
+{
+ TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
+ DBUG_ENTER("lf_alloc_destructor");
+ tdc_assert_clean_share(element);
+ mysql_cond_destroy(&element->COND_release);
+ mysql_mutex_destroy(&element->LOCK_table_share);
+ DBUG_VOID_RETURN;
+}
+
+
+static void tdc_hash_initializer(LF_HASH *,
+ TDC_element *element, LEX_STRING *key)
+{
+ memcpy(element->m_key, key->str, key->length);
+ element->m_key_length= (uint)key->length;
+ tdc_assert_clean_share(element);
+}
+
+
+static uchar *tdc_hash_key(const TDC_element *element, size_t *length,
+ my_bool)
+{
+ *length= element->m_key_length;
+ return (uchar*) element->m_key;
+}
+
+
+/**
+ Initialize table definition cache.
+*/
+
+bool tdc_init(void)
+{
+ DBUG_ENTER("tdc_init");
+#ifdef HAVE_PSI_INTERFACE
+ mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes));
+ mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds));
+#endif
+ /* Extra instance is allocated to avoid false sharing */
+ if (!(tc= new Table_cache_instance[tc_instances + 1]))
+ DBUG_RETURN(true);
+ tc_allocated_size= (tc_instances + 1) * sizeof *tc;
+ update_malloc_size(tc_allocated_size, 0);
+ tdc_inited= true;
+ mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares,
+ MY_MUTEX_INIT_FAST);
+ lf_hash_init(&tdc_hash, sizeof(TDC_element) +
+ sizeof(Share_free_tables) * (tc_instances - 1),
+ LF_HASH_UNIQUE, 0, 0,
+ (my_hash_get_key) tdc_hash_key,
+ &my_charset_bin);
+ tdc_hash.alloc.constructor= lf_alloc_constructor;
+ tdc_hash.alloc.destructor= lf_alloc_destructor;
+ tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer;
+ DBUG_RETURN(false);
+}
+
+
+/**
+ Notify table definition cache that process of shutting down server
+ has started so it has to keep number of TABLE and TABLE_SHARE objects
+ minimal in order to reduce number of references to pluggable engines.
+*/
+
+void tdc_start_shutdown(void)
+{
+ DBUG_ENTER("tdc_start_shutdown");
+ if (tdc_inited)
+ {
+ /*
+ Ensure that TABLE and TABLE_SHARE objects which are created for
+ tables that are open during process of plugins' shutdown are
+ immediately released. This keeps number of references to engine
+ plugins minimal and allows shutdown to proceed smoothly.
+ */
+ tdc_size= 0;
+ tc_size= 0;
+ /* Free all cached but unused TABLEs and TABLE_SHAREs. */
+ purge_tables();
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ Deinitialize table definition cache.
+*/
+
+void tdc_deinit(void)
+{
+ DBUG_ENTER("tdc_deinit");
+ if (tdc_inited)
+ {
+ tdc_inited= false;
+ lf_hash_destroy(&tdc_hash);
+ mysql_mutex_destroy(&LOCK_unused_shares);
+ if (tc)
+ {
+ tc->mark_memory_freed();
+ delete [] tc;
+ tc= 0;
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ Get number of cached table definitions.
+
+ @return Number of cached table definitions
+*/
+
+ulong tdc_records(void)
+{
+ return lf_hash_size(&tdc_hash);
+}
+
+
+void tdc_purge(bool all)
+{
+ DBUG_ENTER("tdc_purge");
+ while (all || tdc_records() > tdc_size)
+ {
+ TDC_element *element;
+
+ mysql_mutex_lock(&LOCK_unused_shares);
+ if (!(element= unused_shares.pop_front()))
+ {
+ mysql_mutex_unlock(&LOCK_unused_shares);
+ break;
+ }
+
+ /* Concurrent thread may start using share again, reset prev and next. */
+ element->prev= 0;
+ element->next= 0;
+ mysql_mutex_lock(&element->LOCK_table_share);
+ if (element->ref_count)
+ {
+ mysql_mutex_unlock(&element->LOCK_table_share);
+ mysql_mutex_unlock(&LOCK_unused_shares);
+ continue;
+ }
+ mysql_mutex_unlock(&LOCK_unused_shares);
+
+ tdc_delete_share_from_hash(element);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ Lock table share.
+
+ Find table share with given db.table_name in table definition cache. Return
+ locked table share if found.
+
+ Locked table share means:
+ - table share is protected against removal from table definition cache
+ - no other thread can acquire/release table share
+
+ Caller is expected to unlock table share with tdc_unlock_share().
+
+ @retval 0 Share not found
+ @retval MY_ERRPTR OOM
+ @retval ptr Pointer to locked table share
+*/
+
+TDC_element *tdc_lock_share(THD *thd, const char *db, const char *table_name)
+{
+ TDC_element *element;
+ char key[MAX_DBKEY_LENGTH];
+
+ DBUG_ENTER("tdc_lock_share");
+ if (unlikely(fix_thd_pins(thd)))
+ DBUG_RETURN((TDC_element*) MY_ERRPTR);
+
+ element= (TDC_element *) lf_hash_search(&tdc_hash, thd->tdc_hash_pins,
+ (uchar*) key,
+ tdc_create_key(key, db, table_name));
+ if (element)
+ {
+ mysql_mutex_lock(&element->LOCK_table_share);
+ if (unlikely(!element->share || element->share->error))
+ {
+ mysql_mutex_unlock(&element->LOCK_table_share);
+ element= 0;
+ }
+ lf_hash_search_unpin(thd->tdc_hash_pins);
+ }
+
+ DBUG_RETURN(element);
+}
+
+
+/**
+ Unlock share locked by tdc_lock_share().
+*/
+
+void tdc_unlock_share(TDC_element *element)
+{
+ DBUG_ENTER("tdc_unlock_share");
+ mysql_mutex_unlock(&element->LOCK_table_share);
+ DBUG_VOID_RETURN;
+}
+
+
+int tdc_share_is_cached(THD *thd, const char *db, const char *table_name)
+{
+ char key[MAX_DBKEY_LENGTH];
+
+ if (unlikely(fix_thd_pins(thd)))
+ return -1;
+
+ if (lf_hash_search(&tdc_hash, thd->tdc_hash_pins, (uchar*) key,
+ tdc_create_key(key, db, table_name)))
+ {
+ lf_hash_search_unpin(thd->tdc_hash_pins);
+ return 1;
+ }
+ return 0;
+}
+
+
+/*
+ Get TABLE_SHARE for a table.
+
+ tdc_acquire_share()
+ thd Thread handle
+ tl Table that should be opened
+ flags operation: what to open table or view
+ out_table TABLE for the requested table
+
+ IMPLEMENTATION
+ Get a table definition from the table definition cache.
+ If it doesn't exist, create a new from the table definition file.
+
+ RETURN
+ 0 Error
+ # Share for table
+*/
+
+TABLE_SHARE *tdc_acquire_share(THD *thd, TABLE_LIST *tl, uint flags,
+ TABLE **out_table)
+{
+ TABLE_SHARE *share;
+ TDC_element *element;
+ const char *key;
+ uint key_length= get_table_def_key(tl, &key);
+ my_hash_value_type hash_value= tl->mdl_request.key.tc_hash_value();
+ bool was_unused;
+ DBUG_ENTER("tdc_acquire_share");
+
+ if (fix_thd_pins(thd))
+ DBUG_RETURN(0);
+
+retry:
+ while (!(element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
+ thd->tdc_hash_pins, hash_value, (uchar*) key, key_length)))
+ {
+ LEX_STRING tmp= { const_cast<char*>(key), key_length };
+ int res= lf_hash_insert(&tdc_hash, thd->tdc_hash_pins, (uchar*) &tmp);
+
+ if (res == -1)
+ DBUG_RETURN(0);
+ else if (res == 1)
+ continue;
+
+ element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
+ thd->tdc_hash_pins, hash_value, (uchar*) key, key_length);
+ /* It's safe to unpin the pins here, because an empty element was inserted
+ above, "empty" means at least element->share = 0. Some other thread can't
+ delete it while element->share == 0. And element->share is also protected
+ with element->LOCK_table_share mutex. */
+ lf_hash_search_unpin(thd->tdc_hash_pins);
+ DBUG_ASSERT(element);
+
+ if (!(share= alloc_table_share(tl->db.str, tl->table_name.str, key, key_length)))
+ {
+ lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
+ DBUG_RETURN(0);
+ }
+
+ /* note that tdc_acquire_share() *always* uses discovery */
+ open_table_def(thd, share, flags | GTS_USE_DISCOVERY);
+
+ if (checked_unlikely(share->error))
+ {
+ free_table_share(share);
+ lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
+ DBUG_RETURN(0);
+ }
+
+ mysql_mutex_lock(&element->LOCK_table_share);
+ element->share= share;
+ share->tdc= element;
+ element->ref_count++;
+ element->flushed= false;
+ mysql_mutex_unlock(&element->LOCK_table_share);
+
+ tdc_purge(false);
+ if (out_table)
+ {
+ status_var_increment(thd->status_var.table_open_cache_misses);
+ *out_table= 0;
+ }
+ share->m_psi= PSI_CALL_get_table_share(false, share);
+ goto end;
+ }
+
+ /* cannot force discovery of a cached share */
+ DBUG_ASSERT(!(flags & GTS_FORCE_DISCOVERY));
+
+ if (out_table && (flags & GTS_TABLE))
+ {
+ if ((*out_table= tc_acquire_table(thd, element)))
+ {
+ lf_hash_search_unpin(thd->tdc_hash_pins);
+ DBUG_ASSERT(!(flags & GTS_NOLOCK));
+ DBUG_ASSERT(element->share);
+ DBUG_ASSERT(!element->share->error);
+ DBUG_ASSERT(!element->share->is_view);
+ status_var_increment(thd->status_var.table_open_cache_hits);
+ DBUG_RETURN(element->share);
+ }
+ status_var_increment(thd->status_var.table_open_cache_misses);
+ }
+
+ mysql_mutex_lock(&element->LOCK_table_share);
+ if (!(share= element->share))
+ {
+ mysql_mutex_unlock(&element->LOCK_table_share);
+ lf_hash_search_unpin(thd->tdc_hash_pins);
+ goto retry;
+ }
+ lf_hash_search_unpin(thd->tdc_hash_pins);
+
+ /*
+ We found an existing table definition. Return it if we didn't get
+ an error when reading the table definition from file.
+ */
+ if (unlikely(share->error))
+ {
+ open_table_error(share, share->error, share->open_errno);
+ goto err;
+ }
+
+ if (share->is_view && !(flags & GTS_VIEW))
+ {
+ open_table_error(share, OPEN_FRM_NOT_A_TABLE, ENOENT);
+ goto err;
+ }
+ if (!share->is_view && !(flags & GTS_TABLE))
+ {
+ open_table_error(share, OPEN_FRM_NOT_A_VIEW, ENOENT);
+ goto err;
+ }
+
+ was_unused= !element->ref_count;
+ element->ref_count++;
+ mysql_mutex_unlock(&element->LOCK_table_share);
+ if (was_unused)
+ {
+ mysql_mutex_lock(&LOCK_unused_shares);
+ if (element->prev)
+ {
+ /*
+ Share was not used before and it was in the old_unused_share list
+ Unlink share from this list
+ */
+ DBUG_PRINT("info", ("Unlinking from not used list"));
+ unused_shares.remove(element);
+ element->next= 0;
+ element->prev= 0;
+ }
+ mysql_mutex_unlock(&LOCK_unused_shares);
+ }
+
+end:
+ DBUG_PRINT("exit", ("share: %p ref_count: %u",
+ share, share->tdc->ref_count));
+ if (flags & GTS_NOLOCK)
+ {
+ tdc_release_share(share);
+ /*
+ if GTS_NOLOCK is requested, the returned share pointer cannot be used,
+ the share it points to may go away any moment.
+ But perhaps the caller is only interested to know whether a share or
+ table existed?
+ Let's return an invalid pointer here to catch dereferencing attempts.
+ */
+ share= UNUSABLE_TABLE_SHARE;
+ }
+ DBUG_RETURN(share);
+
+err:
+ mysql_mutex_unlock(&element->LOCK_table_share);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ Release table share acquired by tdc_acquire_share().
+*/
+
+void tdc_release_share(TABLE_SHARE *share)
+{
+ DBUG_ENTER("tdc_release_share");
+
+ mysql_mutex_lock(&share->tdc->LOCK_table_share);
+ DBUG_PRINT("enter",
+ ("share: %p table: %s.%s ref_count: %u",
+ share, share->db.str, share->table_name.str,
+ share->tdc->ref_count));
+ DBUG_ASSERT(share->tdc->ref_count);
+
+ if (share->tdc->ref_count > 1)
+ {
+ share->tdc->ref_count--;
+ if (!share->is_view)
+ mysql_cond_broadcast(&share->tdc->COND_release);
+ mysql_mutex_unlock(&share->tdc->LOCK_table_share);
+ DBUG_VOID_RETURN;
+ }
+ mysql_mutex_unlock(&share->tdc->LOCK_table_share);
+
+ mysql_mutex_lock(&LOCK_unused_shares);
+ mysql_mutex_lock(&share->tdc->LOCK_table_share);
+ if (--share->tdc->ref_count)
+ {
+ if (!share->is_view)
+ mysql_cond_broadcast(&share->tdc->COND_release);
+ mysql_mutex_unlock(&share->tdc->LOCK_table_share);
+ mysql_mutex_unlock(&LOCK_unused_shares);
+ DBUG_VOID_RETURN;
+ }
+ if (share->tdc->flushed || tdc_records() > tdc_size)
+ {
+ mysql_mutex_unlock(&LOCK_unused_shares);
+ tdc_delete_share_from_hash(share->tdc);
+ DBUG_VOID_RETURN;
+ }
+ /* Link share last in used_table_share list */
+ DBUG_PRINT("info", ("moving share to unused list"));
+ DBUG_ASSERT(share->tdc->next == 0);
+ unused_shares.push_back(share->tdc);
+ mysql_mutex_unlock(&share->tdc->LOCK_table_share);
+ mysql_mutex_unlock(&LOCK_unused_shares);
+ DBUG_VOID_RETURN;
+}
+
+
+void tdc_remove_referenced_share(THD *thd, TABLE_SHARE *share)
+{
+ DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, share->db.str,
+ share->table_name.str,
+ MDL_EXCLUSIVE));
+ share->tdc->flush_unused(true);
+ mysql_mutex_lock(&share->tdc->LOCK_table_share);
+ DEBUG_SYNC(thd, "before_wait_for_refs");
+ share->tdc->wait_for_refs(1);
+ DBUG_ASSERT(share->tdc->all_tables.is_empty());
+ share->tdc->ref_count--;
+ tdc_delete_share_from_hash(share->tdc);
+}
+
+
+/**
+ Removes all TABLE instances and corresponding TABLE_SHARE
+
+ @param thd Thread context
+ @param db Name of database
+ @param table_name Name of table
+
+ @note It assumes that table instances are already not used by any
+ (other) thread (this should be achieved by using meta-data locks).
+*/
+
+void tdc_remove_table(THD *thd, const char *db, const char *table_name)
+{
+ TDC_element *element;
+ DBUG_ENTER("tdc_remove_table");
+ DBUG_PRINT("enter", ("name: %s", table_name));
+
+ DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, db, table_name,
+ MDL_EXCLUSIVE));
+
+ mysql_mutex_lock(&LOCK_unused_shares);
+ if (!(element= tdc_lock_share(thd, db, table_name)))
+ {
+ mysql_mutex_unlock(&LOCK_unused_shares);
+ DBUG_VOID_RETURN;
+ }
+
+ DBUG_ASSERT(element != MY_ERRPTR); // What can we do about it?
+
+ if (!element->ref_count)
+ {
+ if (element->prev)
+ {
+ unused_shares.remove(element);
+ element->prev= 0;
+ element->next= 0;
+ }
+ mysql_mutex_unlock(&LOCK_unused_shares);
+
+ tdc_delete_share_from_hash(element);
+ DBUG_VOID_RETURN;
+ }
+ mysql_mutex_unlock(&LOCK_unused_shares);
+
+ element->ref_count++;
+ mysql_mutex_unlock(&element->LOCK_table_share);
+
+ /* We have to relock the mutex to avoid code duplication. Sigh. */
+ tdc_remove_referenced_share(thd, element->share);
+ DBUG_VOID_RETURN;
+}
+
+
+/**
+ Check if table's share is being removed from the table definition
+ cache and, if yes, wait until the flush is complete.
+
+ @param thd Thread context.
+ @param table_list Table which share should be checked.
+ @param timeout Timeout for waiting.
+ @param deadlock_weight Weight of this wait for deadlock detector.
+
+ @retval 0 Success. Share is up to date or has been flushed.
+ @retval 1 Error (OOM, was killed, the wait resulted
+ in a deadlock or timeout). Reported.
+*/
+
+int tdc_wait_for_old_version(THD *thd, const char *db, const char *table_name,
+ ulong wait_timeout, uint deadlock_weight)
+{
+ TDC_element *element;
+
+ if (!(element= tdc_lock_share(thd, db, table_name)))
+ return FALSE;
+ else if (element == MY_ERRPTR)
+ return TRUE;
+ else if (element->flushed)
+ {
+ struct timespec abstime;
+ set_timespec(abstime, wait_timeout);
+ return element->share->wait_for_old_version(thd, &abstime, deadlock_weight);
+ }
+ tdc_unlock_share(element);
+ return FALSE;
+}
+
+
+/**
+ Iterate table definition cache.
+
+ Object is protected against removal from table definition cache.
+
+ @note Returned TABLE_SHARE is not guaranteed to be fully initialized:
+ tdc_acquire_share() added new share, but didn't open it yet. If caller
+ needs fully initializer share, it must lock table share mutex.
+*/
+
+struct eliminate_duplicates_arg
+{
+ HASH hash;
+ MEM_ROOT root;
+ my_hash_walk_action action;
+ void *argument;
+};
+
+
+static uchar *eliminate_duplicates_get_key(const uchar *element, size_t *length,
+ my_bool not_used __attribute__((unused)))
+{
+ LEX_STRING *key= (LEX_STRING *) element;
+ *length= key->length;
+ return (uchar *) key->str;
+}
+
+
+static my_bool eliminate_duplicates(TDC_element *element,
+ eliminate_duplicates_arg *arg)
+{
+ LEX_STRING *key= (LEX_STRING *) alloc_root(&arg->root, sizeof(LEX_STRING));
+
+ if (!key || !(key->str= (char*) memdup_root(&arg->root, element->m_key,
+ element->m_key_length)))
+ return TRUE;
+
+ key->length= element->m_key_length;
+
+ if (my_hash_insert(&arg->hash, (uchar *) key))
+ return FALSE;
+
+ return arg->action(element, arg->argument);
+}
+
+
+int tdc_iterate(THD *thd, my_hash_walk_action action, void *argument,
+ bool no_dups)
+{
+ eliminate_duplicates_arg no_dups_argument;
+ LF_PINS *pins;
+ myf alloc_flags= 0;
+ uint hash_flags= HASH_UNIQUE;
+ int res;
+
+ if (thd)
+ {
+ fix_thd_pins(thd);
+ pins= thd->tdc_hash_pins;
+ alloc_flags= MY_THREAD_SPECIFIC;
+ hash_flags|= HASH_THREAD_SPECIFIC;
+ }
+ else
+ pins= lf_hash_get_pins(&tdc_hash);
+
+ if (!pins)
+ return ER_OUTOFMEMORY;
+
+ if (no_dups)
+ {
+ init_alloc_root(PSI_INSTRUMENT_ME, &no_dups_argument.root, 4096, 4096, MYF(alloc_flags));
+ my_hash_init(PSI_INSTRUMENT_ME, &no_dups_argument.hash, &my_charset_bin,
+ tdc_records(), 0, 0, eliminate_duplicates_get_key, 0,
+ hash_flags);
+ no_dups_argument.action= action;
+ no_dups_argument.argument= argument;
+ action= (my_hash_walk_action) eliminate_duplicates;
+ argument= &no_dups_argument;
+ }
+
+ res= lf_hash_iterate(&tdc_hash, pins, action, argument);
+
+ if (!thd)
+ lf_hash_put_pins(pins);
+
+ if (no_dups)
+ {
+ my_hash_free(&no_dups_argument.hash);
+ free_root(&no_dups_argument.root, MYF(0));
+ }
+ return res;
+}
+
+
+int show_tc_active_instances(THD *thd, SHOW_VAR *var, char *buff,
+ enum enum_var_type scope)
+{
+ var->type= SHOW_UINT;
+ var->value= buff;
+ *(reinterpret_cast<uint32_t*>(buff))=
+ tc_active_instances.load(std::memory_order_relaxed);
+ return 0;
+}
+
+
+/**
+ Waits until ref_count goes down to given number
+
+ @param my_refs Number of references owned by the caller
+
+ Caller must own at least one TABLE_SHARE reference.
+
+ Even though current thread holds exclusive metadata lock on this share,
+ concurrent FLUSH TABLES threads may be in process of closing unused table
+ instances belonging to this share. E.g.:
+ thr1 (FLUSH TABLES): table= share->tdc.free_tables.pop_front();
+ thr1 (FLUSH TABLES): share->tdc.all_tables.remove(table);
+ thr2 (ALTER TABLE): tdc_remove_table();
+ thr1 (FLUSH TABLES): intern_close_table(table);
+
+ Current remove type assumes that all table instances (except for those
+ that are owned by current thread) must be closed before
+ thd_remove_table() returns. Wait for such tables now.
+
+ intern_close_table() decrements ref_count and signals COND_release. When
+ ref_count drops down to number of references owned by current thread
+ waiting is completed.
+
+ Unfortunately TABLE_SHARE::wait_for_old_version() cannot be used here
+ because it waits for all table instances, whereas we have to wait only
+ for those that are not owned by current thread.
+*/
+
+void TDC_element::wait_for_refs(uint my_refs)
+{
+ while (ref_count > my_refs)
+ mysql_cond_wait(&COND_release, &LOCK_table_share);
+}
+
+
+/**
+ Flushes unused TABLE instances
+
+ @param thd Thread context
+ @param mark_flushed Whether to destroy TABLE_SHARE when released
+
+ Caller is allowed to own used TABLE instances.
+ There must be no TABLE objects used by other threads and caller must own
+ exclusive metadata lock on the table.
+*/
+
+void TDC_element::flush(THD *thd, bool mark_flushed)
+{
+ DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, share->db.str,
+ share->table_name.str,
+ MDL_EXCLUSIVE));
+
+ flush_unused(mark_flushed);
+
+ mysql_mutex_lock(&LOCK_table_share);
+ All_share_tables_list::Iterator it(all_tables);
+ uint my_refs= 0;
+ while (auto table= it++)
+ {
+ if (table->in_use == thd)
+ my_refs++;
+ }
+ wait_for_refs(my_refs);
+#ifndef DBUG_OFF
+ it.rewind();
+ while (auto table= it++)
+ DBUG_ASSERT(table->in_use == thd);
+#endif
+ mysql_mutex_unlock(&LOCK_table_share);
+}
+
+
+/**
+ Flushes unused TABLE instances
+*/
+
+void TDC_element::flush_unused(bool mark_flushed)
+{
+ Share_free_tables::List purge_tables;
+
+ mysql_mutex_lock(&LOCK_table_share);
+ if (mark_flushed)
+ flushed= true;
+ tc_remove_all_unused_tables(this, &purge_tables);
+ mysql_mutex_unlock(&LOCK_table_share);
+
+ while (auto table= purge_tables.pop_front())
+ intern_close_table(table);
+}