summaryrefslogtreecommitdiffstats
path: root/sql/rpl_gtid.h
diff options
context:
space:
mode:
Diffstat (limited to 'sql/rpl_gtid.h')
-rw-r--r--sql/rpl_gtid.h384
1 files changed, 384 insertions, 0 deletions
diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h
new file mode 100644
index 00000000..11541c80
--- /dev/null
+++ b/sql/rpl_gtid.h
@@ -0,0 +1,384 @@
+/* Copyright (c) 2013, Kristian Nielsen and MariaDB Services Ab.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
+
+#ifndef RPL_GTID_H
+#define RPL_GTID_H
+
+#include "hash.h"
+#include "queues.h"
+#include <atomic>
+
+/* Definitions for MariaDB global transaction ID (GTID). */
+
+
+extern const LEX_CSTRING rpl_gtid_slave_state_table_name;
+
+class String;
+
+#define GTID_MAX_STR_LENGTH (10+1+10+1+20)
+
+struct rpl_gtid
+{
+ uint32 domain_id;
+ uint32 server_id;
+ uint64 seq_no;
+};
+
+inline bool operator==(const rpl_gtid& lhs, const rpl_gtid& rhs)
+{
+ return
+ lhs.domain_id == rhs.domain_id &&
+ lhs.server_id == rhs.server_id &&
+ lhs.seq_no == rhs.seq_no;
+};
+
+enum enum_gtid_skip_type {
+ GTID_SKIP_NOT, GTID_SKIP_STANDALONE, GTID_SKIP_TRANSACTION
+};
+
+
+/*
+ Structure to keep track of threads waiting in MASTER_GTID_WAIT().
+
+ Since replication is (mostly) single-threaded, we want to minimise the
+ performance impact on that from MASTER_GTID_WAIT(). To achieve this, we
+ are careful to keep the common lock between replication threads and
+ MASTER_GTID_WAIT threads held for as short as possible. We keep only
+ a single thread waiting to be notified by the replication threads; this
+ thread then handles all the (potentially heavy) lifting of dealing with
+ all current waiting threads.
+*/
+struct gtid_waiting {
+ /* Elements in the hash, basically a priority queue for each domain. */
+ struct hash_element {
+ QUEUE queue;
+ uint32 domain_id;
+ };
+ /* A priority queue to handle waiters in one domain in seq_no order. */
+ struct queue_element {
+ uint64 wait_seq_no;
+ THD *thd;
+ int queue_idx;
+ /*
+ do_small_wait is true if we have responsibility for ensuring that there
+ is a small waiter.
+ */
+ bool do_small_wait;
+ /*
+ The flag `done' is set when the wait is completed (either due to reaching
+ the position waited for, or due to timeout or kill). The queue_element
+ is in the queue if and only if `done' is true.
+ */
+ bool done;
+ };
+
+ mysql_mutex_t LOCK_gtid_waiting;
+ HASH hash;
+
+ void init();
+ void destroy();
+ hash_element *get_entry(uint32 domain_id);
+ int wait_for_pos(THD *thd, String *gtid_str, longlong timeout_us);
+ void promote_new_waiter(gtid_waiting::hash_element *he);
+ int wait_for_gtid(THD *thd, rpl_gtid *wait_gtid, struct timespec *wait_until);
+ void process_wait_hash(uint64 wakeup_seq_no, gtid_waiting::hash_element *he);
+ int register_in_wait_queue(THD *thd, rpl_gtid *wait_gtid, hash_element *he,
+ queue_element *elem);
+ void remove_from_wait_queue(hash_element *he, queue_element *elem);
+};
+
+
+class Relay_log_info;
+struct rpl_group_info;
+class Gtid_list_log_event;
+
+/*
+ Replication slave state.
+
+ For every independent replication stream (identified by domain_id), this
+ remembers the last gtid applied on the slave within this domain.
+
+ Since events are always committed in-order within a single domain, this is
+ sufficient to maintain the state of the replication slave.
+*/
+struct rpl_slave_state
+{
+ /* Elements in the list of GTIDs kept for each domain_id. */
+ struct list_element
+ {
+ struct list_element *next;
+ uint64 sub_id;
+ uint32 domain_id;
+ uint32 server_id;
+ uint64 seq_no;
+ /*
+ hton of mysql.gtid_slave_pos* table used to record this GTID.
+ Can be NULL if the gtid table failed to load (eg. missing
+ mysql.gtid_slave_pos table following an upgrade).
+ */
+ void *hton;
+ };
+
+ /* Elements in the HASH that hold the state for one domain_id. */
+ struct element
+ {
+ struct list_element *list;
+ uint32 domain_id;
+ /* Highest seq_no seen so far in this domain. */
+ uint64 highest_seq_no;
+ /*
+ If this is non-NULL, then it is the waiter responsible for the small
+ wait in MASTER_GTID_WAIT().
+ */
+ gtid_waiting::queue_element *gtid_waiter;
+ /*
+ If gtid_waiter is non-NULL, then this is the seq_no that its
+ MASTER_GTID_WAIT() is waiting on. When we reach this seq_no, we need to
+ signal the waiter on COND_wait_gtid.
+ */
+ uint64 min_wait_seq_no;
+ mysql_cond_t COND_wait_gtid;
+
+ /*
+ For --gtid-ignore-duplicates. The Relay_log_info that currently owns
+ this domain, and the number of worker threads that are active in it.
+
+ The idea is that only one of multiple master connections is allowed to
+ actively apply events for a given domain. Other connections must either
+ discard the events (if the seq_no in GTID shows they have already been
+ applied), or wait to see if the current owner will apply it.
+ */
+ const Relay_log_info *owner_rli;
+ uint32 owner_count;
+ mysql_cond_t COND_gtid_ignore_duplicates;
+
+ list_element *grab_list() { list_element *l= list; list= NULL; return l; }
+ void add(list_element *l)
+ {
+ l->next= list;
+ list= l;
+ }
+ };
+
+ /* Descriptor for mysql.gtid_slave_posXXX table in specific engine. */
+ enum gtid_pos_table_state {
+ GTID_POS_AUTO_CREATE,
+ GTID_POS_CREATE_REQUESTED,
+ GTID_POS_CREATE_IN_PROGRESS,
+ GTID_POS_AVAILABLE
+ };
+ struct gtid_pos_table {
+ struct gtid_pos_table *next;
+ /*
+ Use a void * here, rather than handlerton *, to make explicit that we
+ are not using the value to access any functionality in the engine. It
+ is just used as an opaque value to identify which engine we are using
+ for each GTID row.
+ */
+ void *table_hton;
+ LEX_CSTRING table_name;
+ uint8 state;
+ };
+
+ /* Mapping from domain_id to its element. */
+ HASH hash;
+ /* GTIDs added since last purge of old mysql.gtid_slave_pos rows. */
+ uint32 pending_gtid_count;
+ /* Mutex protecting access to the state. */
+ mysql_mutex_t LOCK_slave_state;
+ /* Auxiliary buffer to sort gtid list. */
+ DYNAMIC_ARRAY gtid_sort_array;
+
+ uint64 last_sub_id;
+ /*
+ List of tables available for durably storing the slave GTID position.
+
+ Accesses to this table is protected by LOCK_slave_state. However for
+ efficiency, there is also a provision for read access to it from a running
+ slave without lock.
+
+ An element can be added at the head of a list by storing the new
+ gtid_pos_tables pointer atomically with release semantics, to ensure that
+ the next pointer of the new element is visible to readers of the new list.
+ Other changes (like deleting or replacing elements) must happen only while
+ all SQL driver threads are stopped. LOCK_slave_state must be held in any
+ case.
+
+ The list can be read without lock by an SQL driver thread or worker thread
+ by reading the gtid_pos_tables pointer atomically with acquire semantics,
+ to ensure that it will see the correct next pointer of a new head element.
+ */
+ std::atomic<gtid_pos_table*> gtid_pos_tables;
+ /* The default entry in gtid_pos_tables, mysql.gtid_slave_pos. */
+ std::atomic<gtid_pos_table*> default_gtid_pos_table;
+ bool loaded;
+
+ rpl_slave_state();
+ ~rpl_slave_state();
+
+ void truncate_hash();
+ ulong count() const { return hash.records; }
+ int update(uint32 domain_id, uint32 server_id, uint64 sub_id,
+ uint64 seq_no, void *hton, rpl_group_info *rgi);
+ int truncate_state_table(THD *thd);
+ void select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename);
+ int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
+ bool in_transaction, bool in_statement, void **out_hton);
+ list_element *gtid_grab_pending_delete_list();
+ LEX_CSTRING *select_gtid_pos_table(void *hton);
+ void gtid_delete_pending(THD *thd, rpl_slave_state::list_element **list_ptr);
+ uint64 next_sub_id(uint32 domain_id);
+ int iterate(int (*cb)(rpl_gtid *, void *), void *data,
+ rpl_gtid *extra_gtids, uint32 num_extra,
+ bool sort);
+ int tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra);
+ bool domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid);
+ int load(THD *thd, const char *state_from_master, size_t len, bool reset,
+ bool in_statement);
+ bool is_empty();
+
+ element *get_element(uint32 domain_id);
+ int put_back_list(list_element *list);
+
+ void update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton,
+ rpl_group_info *rgi);
+ int record_and_update_gtid(THD *thd, struct rpl_group_info *rgi);
+ int check_duplicate_gtid(rpl_gtid *gtid, rpl_group_info *rgi);
+ void release_domain_owner(rpl_group_info *rgi);
+ void set_gtid_pos_tables_list(gtid_pos_table *new_list,
+ gtid_pos_table *default_entry);
+ void add_gtid_pos_table(gtid_pos_table *entry);
+ struct gtid_pos_table *alloc_gtid_pos_table(LEX_CSTRING *table_name,
+ void *hton, rpl_slave_state::gtid_pos_table_state state);
+ void free_gtid_pos_tables(struct gtid_pos_table *list);
+};
+
+
+/*
+ Binlog state.
+ This keeps the last GTID written to the binlog for every distinct
+ (domain_id, server_id) pair.
+ This will be logged at the start of the next binlog file as a
+ Gtid_list_log_event; this way, it is easy to find the binlog file
+ containing a given GTID, by simply scanning backwards from the newest
+ one until a lower seq_no is found in the Gtid_list_log_event at the
+ start of a binlog for the given domain_id and server_id.
+
+ We also remember the last logged GTID for every domain_id. This is used
+ to know where to start when a master is changed to a slave. As a side
+ effect, it also allows to skip a hash lookup in the very common case of
+ logging a new GTID with same server id as last GTID.
+*/
+struct rpl_binlog_state
+{
+ struct element {
+ uint32 domain_id;
+ HASH hash; /* Containing all server_id for one domain_id */
+ /* The most recent entry in the hash. */
+ rpl_gtid *last_gtid;
+ /* Counter to allocate next seq_no for this domain. */
+ uint64 seq_no_counter;
+
+ int update_element(const rpl_gtid *gtid);
+ };
+ /* Mapping from domain_id to collection of elements. */
+ HASH hash;
+ /* Mutex protecting access to the state. */
+ mysql_mutex_t LOCK_binlog_state;
+ my_bool initialized;
+
+ /* Auxiliary buffer to sort gtid list. */
+ DYNAMIC_ARRAY gtid_sort_array;
+
+ rpl_binlog_state() :initialized(0) {}
+ ~rpl_binlog_state();
+
+ void init();
+ void reset_nolock();
+ void reset();
+ void free();
+ bool load(struct rpl_gtid *list, uint32 count);
+ bool load(rpl_slave_state *slave_pos);
+ int update_nolock(const struct rpl_gtid *gtid, bool strict);
+ int update(const struct rpl_gtid *gtid, bool strict);
+ int update_with_next_gtid(uint32 domain_id, uint32 server_id,
+ rpl_gtid *gtid);
+ int alloc_element_nolock(const rpl_gtid *gtid);
+ bool check_strict_sequence(uint32 domain_id, uint32 server_id, uint64 seq_no);
+ int bump_seq_no_if_needed(uint32 domain_id, uint64 seq_no);
+ int write_to_iocache(IO_CACHE *dest);
+ int read_from_iocache(IO_CACHE *src);
+ uint32 count();
+ int get_gtid_list(rpl_gtid *gtid_list, uint32 list_size);
+ int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size);
+ bool append_pos(String *str);
+ bool append_state(String *str);
+ rpl_gtid *find_nolock(uint32 domain_id, uint32 server_id);
+ rpl_gtid *find(uint32 domain_id, uint32 server_id);
+ rpl_gtid *find_most_recent(uint32 domain_id);
+ const char* drop_domain(DYNAMIC_ARRAY *ids, Gtid_list_log_event *glev, char*);
+};
+
+
+/*
+ Represent the GTID state that a slave connection to a master requests
+ the master to start sending binlog events from.
+*/
+struct slave_connection_state
+{
+ struct entry {
+ rpl_gtid gtid;
+ uint32 flags;
+ };
+ /* Bits for 'flags' */
+ enum start_flags
+ {
+ START_OWN_SLAVE_POS= 0x1,
+ START_ON_EMPTY_DOMAIN= 0x2
+ };
+
+ /* Mapping from domain_id to the entry with GTID requested for that domain. */
+ HASH hash;
+
+ /* Auxiliary buffer to sort gtid list. */
+ DYNAMIC_ARRAY gtid_sort_array;
+
+ slave_connection_state();
+ ~slave_connection_state();
+
+ void reset() { my_hash_reset(&hash); }
+ int load(const char *slave_request, size_t len);
+ int load(const rpl_gtid *gtid_list, uint32 count);
+ int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra);
+ rpl_gtid *find(uint32 domain_id);
+ entry *find_entry(uint32 domain_id);
+ int update(const rpl_gtid *in_gtid);
+ void remove(const rpl_gtid *gtid);
+ void remove_if_present(const rpl_gtid *in_gtid);
+ ulong count() const { return hash.records; }
+ int to_string(String *out_str);
+ int append_to_string(String *out_str);
+ int get_gtid_list(rpl_gtid *gtid_list, uint32 list_size);
+ bool is_pos_reached();
+};
+
+
+extern bool rpl_slave_state_tostring_helper(String *dest, const rpl_gtid *gtid,
+ bool *first);
+extern int gtid_check_rpl_slave_state_table(TABLE *table);
+extern rpl_gtid *gtid_parse_string_to_list(const char *p, size_t len,
+ uint32 *out_len);
+
+#endif /* RPL_GTID_H */