diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:07:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:07:14 +0000 |
commit | a175314c3e5827eb193872241446f2f8f5c9d33c (patch) | |
tree | cd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/tokudb/ha_tokudb.h | |
parent | Initial commit. (diff) | |
download | mariadb-10.5-a175314c3e5827eb193872241446f2f8f5c9d33c.tar.xz mariadb-10.5-a175314c3e5827eb193872241446f2f8f5c9d33c.zip |
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/tokudb/ha_tokudb.h')
-rw-r--r-- | storage/tokudb/ha_tokudb.h | 1095 |
1 files changed, 1095 insertions, 0 deletions
diff --git a/storage/tokudb/ha_tokudb.h b/storage/tokudb/ha_tokudb.h new file mode 100644 index 00000000..5a7027a6 --- /dev/null +++ b/storage/tokudb/ha_tokudb.h @@ -0,0 +1,1095 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of TokuDB + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + TokuDBis is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + TokuDB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TokuDB. If not, see <http://www.gnu.org/licenses/>. + +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#ifndef _HA_TOKUDB_H +#define _HA_TOKUDB_H + +#include "hatoku_hton.h" +#include "hatoku_cmp.h" +#include "tokudb_background.h" + +#define HA_TOKU_ORIG_VERSION 4 +#define HA_TOKU_VERSION 4 +// +// no capabilities yet +// +#define HA_TOKU_CAP 0 + +class ha_tokudb; + +typedef struct loader_context { + THD* thd; + char write_status_msg[1024]; + ha_tokudb* ha; +} *LOADER_CONTEXT; + +// +// This class stores table information that is to be shared +// among all ha_tokudb objects. +// There is one instance per table, shared among handlers. +// Some of the variables here are the DB* pointers to indexes, +// and auto increment information. +// +// When the last user releases it's reference on the share, +// it closes all of its database handles and releases all info +// The share instance stays around though so some data can be transiently +// kept across open-close-open-close cycles. These data will be explicitly +// noted below. +// +class TOKUDB_SHARE { +public: + enum share_state_t { + CLOSED = 0, + OPENED = 1, + ERROR = 2 + }; + + // one time, start up init + static void static_init(); + + // one time, shutdown destroy + static void static_destroy(); + + // retuns a locked, properly reference counted share + // callers must check to ensure share is in correct state for callers use + // and unlock the share. + // if create_new is set, a new "CLOSED" share will be created if one + // doesn't exist, otherwise will return NULL if an existing is not found. + static TOKUDB_SHARE* get_share( + const char* table_name, + THR_LOCK_DATA* data, + bool create_new); + + // removes a share entirely from the pool, call to rename/deleta a table + // caller must hold ddl_mutex on this share and the share MUST have + // exactly 0 _use_count + static void drop_share(TOKUDB_SHARE* share); + + // returns state string for logging/reporting + static const char* get_state_string(share_state_t state); + + void* operator new(size_t sz); + void operator delete(void* p); + + TOKUDB_SHARE(); + + // increases the ref count and waits for any currently executing state + // transition to complete + // returns current state and leaves share locked + // callers must check to ensure share is in correct state for callers use + // and unlock the share. + share_state_t addref(); + + // decreases the ref count and potentially closes the share + // caller must not have ownership of mutex, will lock and release + int release(); + + // returns the current use count + // no locking requirements + inline int use_count() const; + + // locks the share + inline void lock() const; + + // unlocks the share + inline void unlock() const; + + // returns the current state of the share + // no locking requirements + inline share_state_t state() const; + + // sets the state of the share + // caller must hold mutex on this share + inline void set_state(share_state_t state); + + // returns the full MySQL table name of the table ex: + // ./database/table + // no locking requirements + inline const char* full_table_name() const; + + // returns the strlen of the full table name + // no locking requirements + inline uint full_table_name_length() const; + + // returns the parsed database name this table resides in + // no locking requirements + inline const char* database_name() const; + + // returns the strlen of the database name + // no locking requirements + inline uint database_name_length() const; + + // returns the parsed table name of this table + // no locking requirements + inline const char* table_name() const; + + // returns the strlen of the the table name + // no locking requirements + inline uint table_name_length() const; + + // sets the estimated number of rows in the table + // should be called only during share initialization and info call + // caller must hold mutex on this share unless specified by 'locked' + inline void set_row_count(uint64_t rows, bool locked); + + // updates tracked row count and ongoing table change delta tracking + // called from any ha_tokudb operation that inserts/modifies/deletes rows + // may spawn background analysis if enabled, allowed and threshold hit + // caller must not have ownership of mutex, will lock and release + void update_row_count( + THD* thd, + uint64_t added, + uint64_t deleted, + uint64_t updated); + + // returns the current row count estimate + // no locking requirements + inline ha_rows row_count() const; + + // initializes cardinality statistics, takes ownership of incoming buffer + // caller must hold mutex on this share + inline void init_cardinality_counts( + uint32_t rec_per_keys, + uint64_t* rec_per_key); + + // update the cardinality statistics. number of records must match + // caller must hold mutex on this share + inline void update_cardinality_counts( + uint32_t rec_per_keys, + const uint64_t* rec_per_key); + + // disallow any auto analysis from taking place + // caller must hold mutex on this share + inline void disallow_auto_analysis(); + + // allow any auto analysis to take place + // pass in true for 'reset_deltas' to reset delta counting to 0 + // caller must hold mutex on this share + inline void allow_auto_analysis(bool reset_deltas); + + // cancels all background jobs for this share + // no locking requirements + inline void cancel_background_jobs() const; + + // copies cardinality statistics into TABLE counter set + // caller must not have ownership of mutex, will lock and release + void set_cardinality_counts_in_table(TABLE* table); + + // performs table analysis on underlying indices and produces estimated + // cardinality statistics. + // on success updates cardinality counts in status database and this share + // MUST pass a valid THD to access session variables. + // MAY pass txn. If txn is passed, assumes an explicit user scheduled + // ANALYZE and not an auto ANALYZE resulting from delta threshold + // uses session variables: + // tokudb_analyze_in_background, tokudb_analyze_throttle, + // tokudb_analyze_time, and tokudb_analyze_delete_fraction + // caller must hold mutex on this share + int analyze_standard(THD* thd, DB_TXN* txn); + + // performs table scan and updates the internal FT logical row count value + // on success also updates share row count estimate. + // MUST pass a valid THD to access session variables. + // MAY pass txn. If txn is passed, assumes an explicit user scheduled + // uses session variables: + // tokudb_analyze_in_background, and tokudb_analyze_throttle + // caller must not have ownership of mutex, will lock and release + int analyze_recount_rows(THD* thd, DB_TXN* txn); + +public: + //********************************* + // Destroyed and recreated on open-close-open + ulonglong auto_ident; + ulonglong last_auto_increment, auto_inc_create_value; + + // estimate on number of rows added in the process of a locked tables + // this is so we can better estimate row count during a lock table + ha_rows rows_from_locked_table; + DB* status_block; + + // DB that is indexed on the primary key + DB* file; + + // array of all DB's that make up table, includes DB that + // is indexed on the primary key, add 1 in case primary + // key is hidden + DB* key_file[MAX_KEY + 1]; + uint status, version, capabilities; + uint ref_length; + + // whether table has an auto increment column + bool has_auto_inc; + + // index of auto increment column in table->field, if auto_inc exists + uint ai_field_index; + + // whether the primary key has a string + bool pk_has_string; + + KEY_AND_COL_INFO kc_info; + + // key info copied from TABLE_SHARE, used by background jobs that have no + // access to a handler instance + uint _keys; + uint _max_key_parts; + struct key_descriptor_t { + uint _parts; + bool _is_unique; + char* _name; + }; + key_descriptor_t* _key_descriptors; + + // we want the following optimization for bulk loads, if the table is empty, + // attempt to grab a table lock. emptiness check can be expensive, + // so we try it once for a table. After that, we keep this variable around + // to tell us to not try it again. + bool try_table_lock; + + bool has_unique_keys; + bool replace_into_fast; + tokudb::thread::rwlock_t _num_DBs_lock; + uint32_t num_DBs; + +private: + static std::unordered_map<std::string, TOKUDB_SHARE*> _open_tables; + static tokudb::thread::mutex_t* _open_tables_mutex; + + //********************************* + // Spans open-close-open + mutable tokudb::thread::mutex_t _mutex; + mutable tokudb::thread::mutex_t _ddl_mutex; + uint _use_count; + + share_state_t _state; + + ulonglong _row_delta_activity; + bool _allow_auto_analysis; + + String _full_table_name; + String _database_name; + String _table_name; + + //********************************* + // Destroyed and recreated on open-close-open + THR_LOCK _thr_lock; + + // estimate on number of rows in table + ha_rows _rows; + + // cardinality counts + uint32_t _rec_per_keys; + uint64_t* _rec_per_key; + + void init(const char* table_name); + void destroy(); +}; +inline int TOKUDB_SHARE::use_count() const { + return _use_count; +} +inline void TOKUDB_SHARE::lock() const { + TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]", + _full_table_name.ptr(), + get_state_string(_state), + _use_count); + mutex_t_lock(_mutex); + TOKUDB_SHARE_DBUG_VOID_RETURN(); +} +inline void TOKUDB_SHARE::unlock() const { + TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]", + _full_table_name.ptr(), + get_state_string(_state), + _use_count); + mutex_t_unlock(_mutex); + TOKUDB_SHARE_DBUG_VOID_RETURN(); +} +inline TOKUDB_SHARE::share_state_t TOKUDB_SHARE::state() const { + return _state; +} +inline void TOKUDB_SHARE::set_state(TOKUDB_SHARE::share_state_t state) { + TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]:new_state[%s]", + _full_table_name.ptr(), + get_state_string(_state), + _use_count, + get_state_string(state)); + + assert_debug(_mutex.is_owned_by_me()); + _state = state; + TOKUDB_SHARE_DBUG_VOID_RETURN(); +} +inline const char* TOKUDB_SHARE::full_table_name() const { + return _full_table_name.ptr(); +} +inline uint TOKUDB_SHARE::full_table_name_length() const { + return _full_table_name.length(); +} +inline const char* TOKUDB_SHARE::database_name() const { + return _database_name.ptr(); +} +inline uint TOKUDB_SHARE::database_name_length() const { + return _database_name.length(); +} +inline const char* TOKUDB_SHARE::table_name() const { + return _table_name.ptr(); +} +inline uint TOKUDB_SHARE::table_name_length() const { + return _table_name.length(); +} +inline void TOKUDB_SHARE::set_row_count(uint64_t rows, bool locked) { + TOKUDB_SHARE_DBUG_ENTER("file[%s]:state[%s]:use_count[%d]:rows[%" PRIu64 "]:locked[%d]", + _full_table_name.ptr(), + get_state_string(_state), + _use_count, + rows, + locked); + + if (!locked) { + lock(); + } else { + assert_debug(_mutex.is_owned_by_me()); + } + if (_rows && rows == 0) + _row_delta_activity = 0; + + _rows = rows; + if (!locked) { + unlock(); + } + TOKUDB_SHARE_DBUG_VOID_RETURN(); +} +inline ha_rows TOKUDB_SHARE::row_count() const { + return _rows; +} +inline void TOKUDB_SHARE::init_cardinality_counts( + uint32_t rec_per_keys, + uint64_t* rec_per_key) { + + assert_debug(_mutex.is_owned_by_me()); + // can not change number of keys live + assert_always(_rec_per_key == nullptr); + assert_always(_rec_per_keys == 0); + _rec_per_keys = rec_per_keys; + _rec_per_key = rec_per_key; +} +inline void TOKUDB_SHARE::update_cardinality_counts( + uint32_t rec_per_keys, + const uint64_t* rec_per_key) { + + assert_debug(_mutex.is_owned_by_me()); + // can not change number of keys live + assert_always(rec_per_keys == _rec_per_keys); + assert_always(rec_per_key != NULL); + memcpy(_rec_per_key, rec_per_key, _rec_per_keys * sizeof(uint64_t)); +} +inline void TOKUDB_SHARE::disallow_auto_analysis() { + assert_debug(_mutex.is_owned_by_me()); + _allow_auto_analysis = false; +} +inline void TOKUDB_SHARE::allow_auto_analysis(bool reset_deltas) { + assert_debug(_mutex.is_owned_by_me()); + _allow_auto_analysis = true; + if (reset_deltas) + _row_delta_activity = 0; +} +inline void TOKUDB_SHARE::cancel_background_jobs() const { + tokudb::background::_job_manager->cancel_job(full_table_name()); +} + + + +typedef struct st_filter_key_part_info { + uint offset; + uint part_index; +} FILTER_KEY_PART_INFO; + +typedef enum { + lock_read = 0, + lock_write +} TABLE_LOCK_TYPE; + +// the number of rows bulk fetched in one callback grows exponentially +// with the bulk fetch iteration, so the max iteration is the max number +// of shifts we can perform on a 64 bit integer. +#define HA_TOKU_BULK_FETCH_ITERATION_MAX 63 + +class ha_tokudb : public handler { +private: + THR_LOCK_DATA lock; ///< MySQL lock + TOKUDB_SHARE *share; ///< Shared lock info + +#ifdef MARIADB_BASE_VERSION + // MariaDB version of MRR + DsMrr_impl ds_mrr; +#elif 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699 + // MySQL version of MRR + DsMrr_impl ds_mrr; +#endif + + // For ICP. Cache our own copies + Item* toku_pushed_idx_cond; + uint toku_pushed_idx_cond_keyno; /* The index which the above condition is for */ + bool icp_went_out_of_range; + + // + // last key returned by ha_tokudb's cursor + // + DBT last_key; + // + // pointer used for multi_alloc of key_buff, key_buff2, primary_key_buff + // + void *alloc_ptr; + // + // buffer used to temporarily store a "packed row" + // data pointer of a DBT will end up pointing to this + // see pack_row for usage + // + uchar *rec_buff; + // + // number of bytes allocated in rec_buff + // + ulong alloced_rec_buff_length; + // + // same as above two, but for updates + // + uchar *rec_update_buff; + ulong alloced_update_rec_buff_length; + uint32_t max_key_length; + + uchar* range_query_buff; // range query buffer + uint32_t size_range_query_buff; // size of the allocated range query buffer + uint32_t bytes_used_in_range_query_buff; // number of bytes used in the range query buffer + uint32_t curr_range_query_buff_offset; // current offset into the range query buffer for queries to read + uint64_t bulk_fetch_iteration; + uint64_t rows_fetched_using_bulk_fetch; + bool doing_bulk_fetch; + bool maybe_index_scan; + + // + // buffer used to temporarily store a "packed key" + // data pointer of a DBT will end up pointing to this + // + uchar *key_buff; + // + // buffer used to temporarily store a "packed key" + // data pointer of a DBT will end up pointing to this + // This is used in functions that require the packing + // of more than one key + // + uchar *key_buff2; + uchar *key_buff3; + uchar *key_buff4; + // + // buffer used to temporarily store a "packed key" + // data pointer of a DBT will end up pointing to this + // currently this is only used for a primary key in + // the function update_row, hence the name. It + // does not carry any state throughout the class. + // + uchar *primary_key_buff; + + // + // ranges of prelocked area, used to know how much to bulk fetch + // + uchar *prelocked_left_range; + uint32_t prelocked_left_range_size; + uchar *prelocked_right_range; + uint32_t prelocked_right_range_size; + + + // + // individual DBTs for each index + // + DBT_ARRAY mult_key_dbt_array[2*(MAX_KEY + 1)]; + DBT_ARRAY mult_rec_dbt_array[MAX_KEY + 1]; + uint32_t mult_put_flags[MAX_KEY + 1]; + uint32_t mult_del_flags[MAX_KEY + 1]; + uint32_t mult_dbt_flags[MAX_KEY + 1]; + + + // + // when unpacking blobs, we need to store it in a temporary + // buffer that will persist because MySQL just gets a pointer to the + // blob data, a pointer we need to ensure is valid until the next + // query + // + uchar* blob_buff; + uint32_t num_blob_bytes; + + bool unpack_entire_row; + + // + // buffers (and their sizes) that will hold the indexes + // of fields that need to be read for a query + // + uint32_t* fixed_cols_for_query; + uint32_t num_fixed_cols_for_query; + uint32_t* var_cols_for_query; + uint32_t num_var_cols_for_query; + bool read_blobs; + bool read_key; + + // + // transaction used by ha_tokudb's cursor + // + DB_TXN *transaction; + + // external_lock will set this true for read operations that will be closely followed by write operations. + bool use_write_locks; // use write locks for reads + + // + // instance of cursor being used for init_xxx and rnd_xxx functions + // + DBC *cursor; + uint32_t cursor_flags; // flags for cursor + // + // flags that are returned in table_flags() + // + ulonglong int_table_flags; + // + // count on the number of rows that gets changed, such as when write_row occurs + // this is meant to help keep estimate on number of elements in DB + // + ulonglong added_rows; + ulonglong deleted_rows; + ulonglong updated_rows; + + + uint last_dup_key; + // + // if set to 0, then the primary key is not hidden + // if non-zero (not necessarily 1), primary key is hidden + // + uint hidden_primary_key; + bool key_read, using_ignore; + bool using_ignore_no_key; + + // + // After a cursor encounters an error, the cursor will be unusable + // In case MySQL attempts to do a cursor operation (such as rnd_next + // or index_prev), we will gracefully return this error instead of crashing + // + int last_cursor_error; + + // + // For instances where we successfully prelock a range or a table, + // we set this to true so that successive cursor calls can know + // know to limit the locking overhead in a call to the fractal tree + // + bool range_lock_grabbed; + bool range_lock_grabbed_null; + + // + // For bulk inserts, we want option of not updating auto inc + // until all inserts are done. By default, is false + // + bool delay_updating_ai_metadata; // if true, don't update auto-increment metadata until bulk load completes + bool ai_metadata_update_required; // if true, autoincrement metadata must be updated + + // + // buffer for updating the status of long insert, delete, and update + // statements. Right now, the the messages are + // "[inserted|updated|deleted] about %llu rows", + // so a buffer of 200 is good enough. + // + char write_status_msg[200]; //buffer of 200 should be a good upper bound. + struct loader_context lc; + + DB_LOADER* loader; + bool abort_loader; + int loader_error; + + bool num_DBs_locked_in_bulk; + uint32_t lock_count; + + bool fix_rec_buff_for_blob(ulong length); + bool fix_rec_update_buff_for_blob(ulong length); + uchar current_ident[TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH]; + + ulong max_row_length(const uchar * buf); + int pack_row_in_buff( + DBT * row, + const uchar* record, + uint index, + uchar* row_buff + ); + int pack_row( + DBT * row, + const uchar* record, + uint index + ); + int pack_old_row_for_update( + DBT * row, + const uchar* record, + uint index + ); + uint32_t place_key_into_mysql_buff(KEY* key_info, uchar * record, uchar* data); + void unpack_key(uchar * record, DBT const *key, uint index); + uint32_t place_key_into_dbt_buff(KEY* key_info, uchar * buff, const uchar * record, bool* has_null, int key_length); + DBT* create_dbt_key_from_key(DBT * key, KEY* key_info, uchar * buff, const uchar * record, bool* has_null, bool dont_pack_pk, int key_length, uint8_t inf_byte); + DBT *create_dbt_key_from_table(DBT * key, uint keynr, uchar * buff, const uchar * record, bool* has_null, int key_length = MAX_KEY_LENGTH); + DBT* create_dbt_key_for_lookup(DBT * key, KEY* key_info, uchar * buff, const uchar * record, bool* has_null, int key_length = MAX_KEY_LENGTH); + DBT *pack_key(DBT * key, uint keynr, uchar * buff, const uchar * key_ptr, uint key_length, int8_t inf_byte); +#if defined(TOKU_INCLUDE_EXTENDED_KEYS) && TOKU_INCLUDE_EXTENDED_KEYS + DBT *pack_ext_key(DBT * key, uint keynr, uchar * buff, const uchar * key_ptr, uint key_length, int8_t inf_byte); +#endif // defined(TOKU_INCLUDE_EXTENDED_KEYS) && TOKU_INCLUDE_EXTENDED_KEYS + bool key_changed(uint keynr, const uchar * old_row, const uchar * new_row); + int handle_cursor_error(int error, int err_to_return); + DBT *get_pos(DBT * to, uchar * pos); + + int open_main_dictionary(const char* name, bool is_read_only, DB_TXN* txn); + int open_secondary_dictionary(DB** ptr, KEY* key_info, const char* name, bool is_read_only, DB_TXN* txn); + int acquire_table_lock (DB_TXN* trans, TABLE_LOCK_TYPE lt); + int estimate_num_rows(DB* db, uint64_t* num_rows, DB_TXN* txn); + bool has_auto_increment_flag(uint* index); + +#if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA + int write_frm_data(DB* db, DB_TXN* txn, const char* frm_name); + int verify_frm_data(const char* frm_name, DB_TXN* trans); + int remove_frm_data(DB *db, DB_TXN *txn); +#endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA + + int write_to_status(DB* db, HA_METADATA_KEY curr_key_data, void* data, uint size, DB_TXN* txn); + int remove_from_status(DB* db, HA_METADATA_KEY curr_key_data, DB_TXN* txn); + + int write_metadata(DB* db, void* key, uint key_size, void* data, uint data_size, DB_TXN* txn); + int remove_metadata(DB* db, void* key_data, uint key_size, DB_TXN* transaction); + + int update_max_auto_inc(DB* db, ulonglong val); + int remove_key_name_from_status(DB* status_block, const char* key_name, DB_TXN* txn); + int write_key_name_to_status(DB* status_block, const char* key_name, DB_TXN* txn); + int write_auto_inc_create(DB* db, ulonglong val, DB_TXN* txn); + void init_auto_increment(); + bool can_replace_into_be_fast(TABLE_SHARE* table_share, KEY_AND_COL_INFO* kc_info, uint pk); + int initialize_share(const char* name, int mode); + + void set_query_columns(uint keynr); + int prelock_range (const key_range *start_key, const key_range *end_key); + int create_txn(THD* thd, tokudb_trx_data* trx); + bool may_table_be_empty(DB_TXN *txn); + int delete_or_rename_table (const char* from_name, const char* to_name, bool is_delete); + int delete_or_rename_dictionary( const char* from_name, const char* to_name, const char* index_name, bool is_key, DB_TXN* txn, bool is_delete); + int truncate_dictionary( uint keynr, DB_TXN* txn ); + int create_secondary_dictionary( + const char* name, + TABLE* form, + KEY* key_info, + DB_TXN* txn, + KEY_AND_COL_INFO* kc_info, + uint32_t keynr, + bool is_hot_index, + toku_compression_method compression_method + ); + int create_main_dictionary(const char* name, TABLE* form, DB_TXN* txn, KEY_AND_COL_INFO* kc_info, toku_compression_method compression_method); + void trace_create_table_info(TABLE* form); + int is_index_unique(bool* is_unique, DB_TXN* txn, DB* db, KEY* key_info, int lock_flags); + int is_val_unique(bool* is_unique, const uchar* record, KEY* key_info, uint dict_index, DB_TXN* txn); + int do_uniqueness_checks(const uchar* record, DB_TXN* txn, THD* thd); + void set_main_dict_put_flags(THD* thd, bool opt_eligible, uint32_t* put_flags); + int insert_row_to_main_dictionary(DBT* pk_key, DBT* pk_val, DB_TXN* txn); + int insert_rows_to_dictionaries_mult(DBT* pk_key, DBT* pk_val, DB_TXN* txn, THD* thd); + void test_row_packing(const uchar* record, DBT* pk_key, DBT* pk_val); + uint32_t fill_row_mutator( + uchar* buf, + uint32_t* dropped_columns, + uint32_t num_dropped_columns, + TABLE* altered_table, + KEY_AND_COL_INFO* altered_kc_info, + uint32_t keynr, + bool is_add + ); + + // 0 <= active_index < table_share->keys || active_index == MAX_KEY + // tokudb_active_index = active_index if active_index < table_share->keys, else tokudb_active_index = primary_key = table_share->keys + uint tokudb_active_index; + +public: + ha_tokudb(handlerton * hton, TABLE_SHARE * table_arg); + ~ha_tokudb(); + + const char *table_type() const; + const char *index_type(uint inx); + const char **bas_ext() const; + + // + // Returns a bit mask of capabilities of storage engine. Capabilities + // defined in sql/handler.h + // + ulonglong table_flags() const; + + ulong index_flags(uint inx, uint part, bool all_parts) const; + + // + // Returns limit on the number of keys imposed by tokudb. + // + uint max_supported_keys() const { + return MAX_KEY; + } + + uint extra_rec_buf_length() const { + return TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH; + } + ha_rows estimate_rows_upper_bound(); + + // + // Returns the limit on the key length imposed by tokudb. + // + uint max_supported_key_length() const { + return UINT_MAX32; + } + + // + // Returns limit on key part length imposed by tokudb. + // + uint max_supported_key_part_length() const { + return UINT_MAX32; + } + const key_map *keys_to_use_for_scanning() { + return &key_map_full; + } + + double scan_time(); + + double read_time(uint index, uint ranges, ha_rows rows); + + // Defined in mariadb + double keyread_time(uint index, uint ranges, ha_rows rows); + + // Defined in mysql 5.6 + double index_only_read_time(uint keynr, double records); + + int open(const char *name, int mode, uint test_if_locked); + int close(); + void update_create_info(HA_CREATE_INFO* create_info); + int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info); + int delete_table(const char *name); + int rename_table(const char *from, const char *to); + int optimize(THD * thd, HA_CHECK_OPT * check_opt); + int analyze(THD * thd, HA_CHECK_OPT * check_opt); + int write_row(const uchar * buf); + int update_row(const uchar * old_data, const uchar * new_data); + int delete_row(const uchar * buf); +#if MYSQL_VERSION_ID >= 100000 + void start_bulk_insert(ha_rows rows, uint flags); +#else + void start_bulk_insert(ha_rows rows); +#endif + static int bulk_insert_poll(void* extra, float progress); + static void loader_add_index_err(DB* db, + int i, + int err, + DBT* key, + DBT* val, + void* error_extra); + static void loader_dup(DB* db, + int i, + int err, + DBT* key, + DBT* val, + void* error_extra); + int end_bulk_insert(); + int end_bulk_insert(bool abort); + + int prepare_index_scan(); + int prepare_index_key_scan( const uchar * key, uint key_len ); + int prepare_range_scan( const key_range *start_key, const key_range *end_key); + void column_bitmaps_signal(); + int index_init(uint index, bool sorted); + int index_end(); + int index_next_same(uchar * buf, const uchar * key, uint keylen); + int index_read(uchar * buf, const uchar * key, uint key_len, enum ha_rkey_function find_flag); + int index_read_last(uchar * buf, const uchar * key, uint key_len); + int index_next(uchar * buf); + int index_prev(uchar * buf); + int index_first(uchar * buf); + int index_last(uchar * buf); + + bool has_gap_locks() const { return true; } + + int rnd_init(bool scan); + int rnd_end(); + int rnd_next(uchar * buf); + int rnd_pos(uchar * buf, uchar * pos); + + int read_range_first(const key_range *start_key, + const key_range *end_key, + bool eq_range, bool sorted); + int read_range_next(); + + + void position(const uchar * record); + int info(uint); + int extra(enum ha_extra_function operation); + int reset(); + int external_lock(THD * thd, int lock_type); + int start_stmt(THD * thd, thr_lock_type lock_type); + + ha_rows records_in_range(uint inx, const key_range * min_key, + const key_range * max_key, + page_range *pages); + + uint32_t get_cursor_isolation_flags(enum thr_lock_type lock_type, THD* thd); + THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, enum thr_lock_type lock_type); + + int get_status(DB_TXN* trans); + void init_hidden_prim_key_info(DB_TXN *txn); + inline void get_auto_primary_key(uchar * to) { + share->lock(); + share->auto_ident++; + hpk_num_to_char(to, share->auto_ident); + share->unlock(); + } + virtual void get_auto_increment( + ulonglong offset, + ulonglong increment, + ulonglong nb_desired_values, + ulonglong* first_value, + ulonglong* nb_reserved_values); + bool is_optimize_blocking(); + bool is_auto_inc_singleton(); + void print_error(int error, myf errflag); + uint8 table_cache_type() { + return HA_CACHE_TBL_TRANSACT; + } + int cmp_ref(const uchar * ref1, const uchar * ref2); + bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes); + +#ifdef MARIADB_BASE_VERSION + +// MariaDB MRR introduced in 5.5, API changed in MariaDB 10.0 +#if MYSQL_VERSION_ID >= 100000 +#define COST_VECT Cost_estimate +#endif + + int multi_range_read_init(RANGE_SEQ_IF* seq, + void* seq_init_param, + uint n_ranges, uint mode, + HANDLER_BUFFER *buf); + int multi_range_read_next(range_id_t *range_info); + ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, + uint n_ranges, uint *bufsz, + uint *flags, COST_VECT *cost); + ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, + uint key_parts, uint *bufsz, + uint *flags, COST_VECT *cost); + int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size); + +#else + +// MySQL MRR introduced in 5.6 +#if 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699 + int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, + uint n_ranges, uint mode, HANDLER_BUFFER *buf); + int multi_range_read_next(char **range_info); + ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, + uint n_ranges, uint *bufsz, + uint *flags, Cost_estimate *cost); + ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, + uint *bufsz, uint *flags, Cost_estimate *cost); +#endif + +#endif + + Item* idx_cond_push(uint keyno, class Item* idx_cond); + void cancel_pushed_idx_cond(); + + bool can_convert_varstring(const Field_varstring* field, + const Column_definition&new_type) const; + +#if defined(TOKU_INCLUDE_ALTER_56) && TOKU_INCLUDE_ALTER_56 + public: + enum_alter_inplace_result check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_info *ha_alter_info); + bool prepare_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info); + bool inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info); + bool commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, bool commit); + private: + int alter_table_add_index(Alter_inplace_info* ha_alter_info); + int alter_table_drop_index(Alter_inplace_info* ha_alter_info); + int alter_table_add_or_drop_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info); + int alter_table_expand_varchar_offsets(TABLE *altered_table, Alter_inplace_info *ha_alter_info); + int alter_table_expand_columns(TABLE *altered_table, Alter_inplace_info *ha_alter_info); + int alter_table_expand_one_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info, int expand_field_num); + int alter_table_expand_blobs(TABLE *altered_table, Alter_inplace_info *ha_alter_info); + void print_alter_info(TABLE *altered_table, Alter_inplace_info *ha_alter_info); + int setup_kc_info(TABLE *altered_table, KEY_AND_COL_INFO *kc_info); + int new_row_descriptor(TABLE* altered_table, + Alter_inplace_info* ha_alter_info, + uint32_t idx, + DBT* row_descriptor); + + public: +#endif // defined(TOKU_INCLUDE_ALTER_56) && TOKU_INCLUDE_ALTER_56 +#if defined(TOKU_INCLUDE_ALTER_55) && TOKU_INCLUDE_ALTER_55 +public: + // Returns true of the 5.6 inplace alter table interface is used. + bool try_hot_alter_table(); + + // Used by the partition storage engine to provide new frm data for the table. + int new_alter_table_frm_data(const uchar *frm_data, size_t frm_len); +#endif // defined(TOKU_INCLUDE_ALTER_55) && TOKU_INCLUDE_ALTER_55 + + private: + int tokudb_add_index(TABLE* table_arg, + KEY* key_info, + uint num_of_keys, + DB_TXN* txn, + bool* inc_num_DBs, + bool* modified_DB); + static int tokudb_add_index_poll(void *extra, float progress); + void restore_add_index(TABLE* table_arg, + uint num_of_keys, + bool incremented_numDBs, + bool modified_DBs); + int drop_indexes(uint* key_num, uint num_of_keys, KEY* key_info, DB_TXN* txn); + void restore_drop_indexes(uint* key_num, uint num_of_keys); + + public: + // delete all rows from the table + // effect: all dictionaries, including the main and indexes, should be empty + int discard_or_import_tablespace(my_bool discard); + int truncate(); + int delete_all_rows(); + void extract_hidden_primary_key(uint keynr, DBT const *found_key); + void read_key_only(uchar * buf, uint keynr, DBT const *found_key); + int read_row_callback (uchar * buf, uint keynr, DBT const *row, DBT const *found_key); + int read_primary_key(uchar * buf, uint keynr, DBT const *row, DBT const *found_key); + int unpack_blobs( + uchar* record, + const uchar* from_tokudb_blob, + uint32_t num_blob_bytes, + bool check_bitmap + ); + int unpack_row( + uchar* record, + DBT const *row, + DBT const *key, + uint index + ); + + int prefix_cmp_dbts( uint keynr, const DBT* first_key, const DBT* second_key) { + return tokudb_prefix_cmp_dbt_key(share->key_file[keynr], first_key, second_key); + } + + void track_progress(THD* thd); + void set_loader_error(int err); + void set_dup_value_for_pk(DBT* key); + + + // + // index into key_file that holds DB* that is indexed on + // the primary_key. this->key_file[primary_index] == this->file + // + uint primary_key; + + int check(THD *thd, HA_CHECK_OPT *check_opt); + + int fill_range_query_buf( + bool need_val, + DBT const* key, + DBT const* row, + int direction, + THD* thd, + uchar* buf, + DBT* key_to_compare); + +#if defined(TOKU_INCLUDE_ROW_TYPE_COMPRESSION) && \ + TOKU_INCLUDE_ROW_TYPE_COMPRESSION + enum row_type get_row_type() const; +#endif // defined(TOKU_INCLUDE_ROW_TYPE_COMPRESSION) && + // TOKU_INCLUDE_ROW_TYPE_COMPRESSION +private: + int read_full_row(uchar * buf); + int __close(); + int get_next(uchar* buf, int direction, DBT* key_to_compare, bool do_key_read); + int read_data_from_range_query_buff(uchar* buf, bool need_val, bool do_key_read); + // for ICP, only in MariaDB and MySQL 5.6 + check_result_t toku_handler_index_cond_check(Item* pushed_idx_cond); + void invalidate_bulk_fetch(); + void invalidate_icp(); + int delete_all_rows_internal(); + void close_dsmrr(); + void reset_dsmrr(); + +#if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA + int write_frm_data(const uchar *frm_data, size_t frm_len); +#endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA + +private: +#if defined(TOKU_INCLUDE_UPSERT) && TOKU_INCLUDE_UPSERT + MY_NODISCARD int fast_update(THD *thd, + List<Item> &update_fields, + List<Item> &update_values, + Item *conds); + MY_NODISCARD bool check_fast_update(THD *thd, + List<Item> &update_fields, + List<Item> &update_values, + Item *conds); + MY_NODISCARD int send_update_message(List<Item> &update_fields, + List<Item> &update_values, + Item *conds, + DB_TXN *txn); + MY_NODISCARD int upsert(THD *thd, + List<Item> &update_fields, + List<Item> &update_values); + MY_NODISCARD bool check_upsert(THD *thd, + List<Item> &update_fields, + List<Item> &update_values); + MY_NODISCARD int send_upsert_message(List<Item> &update_fields, + List<Item> &update_values, + DB_TXN *txn); +#endif // defined(TOKU_INCLUDE_UPSERT) && TOKU_INCLUDE_UPSERT + +public: + // mysql sometimes retires a txn before a cursor that references the txn is closed. + // for example, commit is sometimes called before index_end. the following methods + // put the handler on a list of handlers that get cleaned up when the txn is retired. + void cleanup_txn(DB_TXN *txn); +private: + LIST trx_handler_list; + void add_to_trx_handler_list(); + void remove_from_trx_handler_list(); + +private: + int do_optimize(THD *thd); + int map_to_handler_error(int error); + +#if defined(TOKU_INCLUDE_RFR) && TOKU_INCLUDE_RFR +public: + void rpl_before_write_rows(); + void rpl_after_write_rows(); + void rpl_before_delete_rows(); + void rpl_after_delete_rows(); + void rpl_before_update_rows(); + void rpl_after_update_rows(); + bool rpl_lookup_rows(); +private: + bool in_rpl_write_rows; + bool in_rpl_delete_rows; + bool in_rpl_update_rows; +#endif // defined(TOKU_INCLUDE_RFR) && TOKU_INCLUDE_RFR +}; + +#endif // _HA_TOKUDB_H + |