summaryrefslogtreecommitdiffstats
path: root/storage/maria/ha_maria.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
commita175314c3e5827eb193872241446f2f8f5c9d33c (patch)
treecd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/maria/ha_maria.cc
parentInitial commit. (diff)
downloadmariadb-10.5-upstream.tar.xz
mariadb-10.5-upstream.zip
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/maria/ha_maria.cc')
-rw-r--r--storage/maria/ha_maria.cc4240
1 files changed, 4240 insertions, 0 deletions
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
new file mode 100644
index 00000000..5bf6754f
--- /dev/null
+++ b/storage/maria/ha_maria.cc
@@ -0,0 +1,4240 @@
+/* Copyright (C) 2004-2008 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+ Copyright (C) 2008-2009 Sun Microsystems, Inc.
+ Copyright (c) 2009, 2021, MariaDB Corporation Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation // gcc: Class implementation
+#endif
+
+#define MYSQL_SERVER 1
+#include <my_global.h>
+#include <m_ctype.h>
+#include <my_dir.h>
+#include <myisampack.h>
+#include <my_bit.h>
+#include "ha_maria.h"
+#include "trnman_public.h"
+#include "trnman.h"
+
+C_MODE_START
+#include "maria_def.h"
+#include "ma_rt_index.h"
+#include "ma_blockrec.h"
+#include "ma_checkpoint.h"
+#include "ma_recovery.h"
+C_MODE_END
+#include "ma_trnman.h"
+
+//#include "sql_priv.h"
+#include "protocol.h"
+#include "sql_class.h"
+#include "key.h"
+#include "log.h"
+#include "sql_parse.h"
+
+/*
+ Note that in future versions, only *transactional* Maria tables can
+ rollback, so this flag should be up or down conditionally.
+*/
+#ifdef ARIA_HAS_TRANSACTIONS
+#define TRANSACTION_STATE
+#else
+#define TRANSACTION_STATE HA_NO_TRANSACTIONS
+#endif
+
+#define THD_TRN (TRN*) thd_get_ha_data(thd, maria_hton)
+
+ulong pagecache_division_limit, pagecache_age_threshold, pagecache_file_hash_size;
+ulonglong pagecache_buffer_size;
+const char *zerofill_error_msg=
+ "Table is probably from another system and must be zerofilled or repaired ('REPAIR TABLE table_name') to be usable on this system";
+
+/**
+ As the auto-repair is initiated when opened from the SQL layer
+ (open_unireg_entry(), check_and_repair()), it does not happen when Maria's
+ Recovery internally opens the table to apply log records to it, which is
+ good. It would happen only after Recovery, if the table is still
+ corrupted.
+*/
+ulonglong maria_recover_options= HA_RECOVER_NONE;
+handlerton *maria_hton;
+
+/* bits in maria_recover_options */
+const char *maria_recover_names[]=
+{
+ /*
+ Compared to MyISAM, "default" was renamed to "normal" as it collided with
+ SET var=default which sets to the var's default i.e. what happens when the
+ var is not set i.e. HA_RECOVER_NONE.
+ OFF flag is ignored.
+ */
+ "NORMAL", "BACKUP", "FORCE", "QUICK", "OFF", NullS
+};
+TYPELIB maria_recover_typelib=
+{
+ array_elements(maria_recover_names) - 1, "",
+ maria_recover_names, NULL
+};
+
+const char *maria_stats_method_names[]=
+{
+ "nulls_unequal", "nulls_equal",
+ "nulls_ignored", NullS
+};
+TYPELIB maria_stats_method_typelib=
+{
+ array_elements(maria_stats_method_names) - 1, "",
+ maria_stats_method_names, NULL
+};
+
+/* transactions log purge mode */
+const char *maria_translog_purge_type_names[]=
+{
+ "immediate", "external", "at_flush", NullS
+};
+TYPELIB maria_translog_purge_type_typelib=
+{
+ array_elements(maria_translog_purge_type_names) - 1, "",
+ maria_translog_purge_type_names, NULL
+};
+
+/* transactional log directory sync */
+const char *maria_sync_log_dir_names[]=
+{
+ "NEVER", "NEWFILE", "ALWAYS", NullS
+};
+TYPELIB maria_sync_log_dir_typelib=
+{
+ array_elements(maria_sync_log_dir_names) - 1, "",
+ maria_sync_log_dir_names, NULL
+};
+
+/* transactional log group commit */
+const char *maria_group_commit_names[]=
+{
+ "none", "hard", "soft", NullS
+};
+TYPELIB maria_group_commit_typelib=
+{
+ array_elements(maria_group_commit_names) - 1, "",
+ maria_group_commit_names, NULL
+};
+
+/** Interval between background checkpoints in seconds */
+static ulong checkpoint_interval;
+static void update_checkpoint_interval(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+static void update_maria_group_commit(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+static void update_maria_group_commit_interval(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+/** After that many consecutive recovery failures, remove logs */
+static ulong force_start_after_recovery_failures;
+static void update_log_file_size(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save);
+
+/* The 4096 is there because of MariaDB privilege tables */
+static MYSQL_SYSVAR_ULONG(block_size, maria_block_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Block size to be used for Aria index pages.", 0, 0,
+ MARIA_KEY_BLOCK_LENGTH, 4096,
+ MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);
+
+static MYSQL_SYSVAR_ULONG(checkpoint_interval, checkpoint_interval,
+ PLUGIN_VAR_RQCMDARG,
+ "Interval between tries to do an automatic checkpoints. In seconds; 0 means"
+ " 'no automatic checkpoints' which makes sense only for testing.",
+ NULL, update_checkpoint_interval, 30, 0, UINT_MAX, 1);
+
+static MYSQL_SYSVAR_ULONG(checkpoint_log_activity, maria_checkpoint_min_log_activity,
+ PLUGIN_VAR_RQCMDARG,
+ "Number of bytes that the transaction log has to grow between checkpoints before a new "
+ "checkpoint is written to the log.",
+ NULL, NULL, 1024*1024, 0, UINT_MAX, 1);
+
+static MYSQL_SYSVAR_ULONG(force_start_after_recovery_failures,
+ force_start_after_recovery_failures,
+ /*
+ Read-only because setting it on the fly has no useful effect,
+ should be set on command-line.
+ */
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Number of consecutive log recovery failures after which logs will be"
+ " automatically deleted to cure the problem; 0 (the default) disables"
+ " the feature.", NULL, NULL, 0, 0, UINT_MAX8, 1);
+
+static MYSQL_SYSVAR_BOOL(page_checksum, maria_page_checksums, 0,
+ "Maintain page checksums (can be overridden per table "
+ "with PAGE_CHECKSUM clause in CREATE TABLE)", 0, 0, 1);
+
+/* It is only command line argument */
+static MYSQL_SYSVAR_CONST_STR(log_dir_path, maria_data_root,
+ PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Path to the directory where to store transactional log",
+ NULL, NULL, mysql_real_data_home);
+
+static MYSQL_SYSVAR_ULONG(log_file_size, log_file_size,
+ PLUGIN_VAR_RQCMDARG,
+ "Limit for transaction log size",
+ NULL, update_log_file_size, TRANSLOG_FILE_SIZE,
+ TRANSLOG_MIN_FILE_SIZE, 0xffffffffL, TRANSLOG_PAGE_SIZE);
+
+static MYSQL_SYSVAR_ENUM(group_commit, maria_group_commit,
+ PLUGIN_VAR_RQCMDARG,
+ "Specifies Aria group commit mode. "
+ "Possible values are \"none\" (no group commit), "
+ "\"hard\" (with waiting to actual commit), "
+ "\"soft\" (no wait for commit (DANGEROUS!!!))",
+ NULL, update_maria_group_commit,
+ TRANSLOG_GCOMMIT_NONE, &maria_group_commit_typelib);
+
+static MYSQL_SYSVAR_ULONG(group_commit_interval, maria_group_commit_interval,
+ PLUGIN_VAR_RQCMDARG,
+ "Interval between commits in microseconds (1/1000000 sec)."
+ " 0 stands for no waiting"
+ " for other threads to come and do a commit in \"hard\" mode and no"
+ " sync()/commit at all in \"soft\" mode. Option has only an effect"
+ " if aria_group_commit is used",
+ NULL, update_maria_group_commit_interval, 0, 0, UINT_MAX, 1);
+
+static MYSQL_SYSVAR_ENUM(log_purge_type, log_purge_type,
+ PLUGIN_VAR_RQCMDARG,
+ "Specifies how Aria transactional log will be purged",
+ NULL, NULL, TRANSLOG_PURGE_IMMIDIATE,
+ &maria_translog_purge_type_typelib);
+
+static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
+ maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
+ "Don't use the fast sort index method to created index if the "
+ "temporary file would get bigger than this.",
+ 0, 0, MAX_FILE_SIZE & ~((ulonglong) (1*MB-1)),
+ 0, MAX_FILE_SIZE, 1*MB);
+
+static MYSQL_SYSVAR_ULONG(pagecache_age_threshold,
+ pagecache_age_threshold, PLUGIN_VAR_RQCMDARG,
+ "This characterizes the number of hits a hot block has to be untouched "
+ "until it is considered aged enough to be downgraded to a warm block. "
+ "This specifies the percentage ratio of that number of hits to the "
+ "total number of blocks in the page cache.", 0, 0,
+ 300, 100, ~ (ulong) 0L, 100);
+
+static MYSQL_SYSVAR_ULONGLONG(pagecache_buffer_size, pagecache_buffer_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "The size of the buffer used for index blocks for Aria tables. "
+ "Increase this to get better index handling (for all reads and "
+ "multiple writes) to as much as you can afford.", 0, 0,
+ KEY_CACHE_SIZE, 8192*16L, ~(ulonglong) 0, 1);
+
+static MYSQL_SYSVAR_ULONG(pagecache_division_limit, pagecache_division_limit,
+ PLUGIN_VAR_RQCMDARG,
+ "The minimum percentage of warm blocks in key cache", 0, 0,
+ 100, 1, 100, 1);
+
+static MYSQL_SYSVAR_ULONG(pagecache_file_hash_size, pagecache_file_hash_size,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Number of hash buckets for open and changed files. If you have a lot of Aria "
+ "files open you should increase this for faster flush of changes. A good "
+ "value is probably 1/10 of number of possible open Aria files.", 0,0,
+ 512, 128, 16384, 1);
+
+static MYSQL_SYSVAR_SET(recover_options, maria_recover_options, PLUGIN_VAR_OPCMDARG,
+ "Specifies how corrupted tables should be automatically repaired",
+ NULL, NULL, HA_RECOVER_BACKUP|HA_RECOVER_QUICK, &maria_recover_typelib);
+
+static MYSQL_THDVAR_ULONG(repair_threads, PLUGIN_VAR_RQCMDARG,
+ "Number of threads to use when repairing Aria tables. The value of 1 "
+ "disables parallel repair.",
+ 0, 0, 1, 1, 128, 1);
+
+static MYSQL_THDVAR_ULONGLONG(sort_buffer_size, PLUGIN_VAR_RQCMDARG,
+ "The buffer that is allocated when sorting the index when doing a "
+ "REPAIR or when creating indexes with CREATE INDEX or ALTER TABLE.",
+ NULL, NULL,
+ SORT_BUFFER_INIT, MIN_SORT_BUFFER, SIZE_T_MAX/2, 1);
+
+static MYSQL_THDVAR_ENUM(stats_method, PLUGIN_VAR_RQCMDARG,
+ "Specifies how Aria index statistics collection code should treat "
+ "NULLs", 0, 0, 0, &maria_stats_method_typelib);
+
+static MYSQL_SYSVAR_ENUM(sync_log_dir, sync_log_dir, PLUGIN_VAR_RQCMDARG,
+ "Controls syncing directory after log file growth and new file "
+ "creation", NULL, NULL, TRANSLOG_SYNC_DIR_NEWFILE,
+ &maria_sync_log_dir_typelib);
+
+#ifdef USE_ARIA_FOR_TMP_TABLES
+#define USE_ARIA_FOR_TMP_TABLES_VAL 1
+#else
+#define USE_ARIA_FOR_TMP_TABLES_VAL 0
+#endif
+my_bool use_maria_for_temp_tables= USE_ARIA_FOR_TMP_TABLES_VAL;
+
+static MYSQL_SYSVAR_BOOL(used_for_temp_tables,
+ use_maria_for_temp_tables, PLUGIN_VAR_READONLY | PLUGIN_VAR_NOCMDOPT,
+ "Whether temporary tables should be MyISAM or Aria", 0, 0,
+ 1);
+
+static MYSQL_SYSVAR_BOOL(encrypt_tables, maria_encrypt_tables, PLUGIN_VAR_OPCMDARG,
+ "Encrypt tables (only for tables with ROW_FORMAT=PAGE (default) "
+ "and not FIXED/DYNAMIC)",
+ 0, 0, 0);
+
+#if defined HAVE_PSI_INTERFACE && !defined EMBEDDED_LIBRARY
+
+static PSI_mutex_info all_aria_mutexes[]=
+{
+ { &key_THR_LOCK_maria, "THR_LOCK_maria", PSI_FLAG_GLOBAL},
+ { &key_LOCK_soft_sync, "LOCK_soft_sync", PSI_FLAG_GLOBAL},
+ { &key_LOCK_trn_list, "LOCK_trn_list", PSI_FLAG_GLOBAL},
+ { &key_SHARE_BITMAP_lock, "SHARE::bitmap::bitmap_lock", 0},
+ { &key_SORT_INFO_mutex, "SORT_INFO::mutex", 0},
+ { &key_TRANSLOG_BUFFER_mutex, "TRANSLOG_BUFFER::mutex", 0},
+ { &key_TRANSLOG_DESCRIPTOR_dirty_buffer_mask_lock, "TRANSLOG_DESCRIPTOR::dirty_buffer_mask_lock", 0},
+ { &key_TRANSLOG_DESCRIPTOR_sent_to_disk_lock, "TRANSLOG_DESCRIPTOR::sent_to_disk_lock", 0},
+ { &key_TRANSLOG_DESCRIPTOR_log_flush_lock, "TRANSLOG_DESCRIPTOR::log_flush_lock", 0},
+ { &key_TRANSLOG_DESCRIPTOR_file_header_lock, "TRANSLOG_DESCRIPTOR::file_header_lock", 0},
+ { &key_TRANSLOG_DESCRIPTOR_unfinished_files_lock, "TRANSLOG_DESCRIPTOR::unfinished_files_lock", 0},
+ { &key_TRANSLOG_DESCRIPTOR_purger_lock, "TRANSLOG_DESCRIPTOR::purger_lock", 0},
+ { &key_SHARE_intern_lock, "SHARE::intern_lock", 0},
+ { &key_SHARE_key_del_lock, "SHARE::key_del_lock", 0},
+ { &key_SHARE_close_lock, "SHARE::close_lock", 0},
+ { &key_SERVICE_THREAD_CONTROL_lock, "SERVICE_THREAD_CONTROL::LOCK_control", 0},
+ { &key_TRN_state_lock, "TRN::state_lock", 0},
+ { &key_PAGECACHE_cache_lock, "PAGECACHE::cache_lock", 0}
+};
+
+static PSI_cond_info all_aria_conds[]=
+{
+ { &key_COND_soft_sync, "COND_soft_sync", PSI_FLAG_GLOBAL},
+ { &key_SHARE_key_del_cond, "SHARE::key_del_cond", 0},
+ { &key_SERVICE_THREAD_CONTROL_cond, "SERVICE_THREAD_CONTROL::COND_control", 0},
+ { &key_SORT_INFO_cond, "SORT_INFO::cond", 0},
+ { &key_SHARE_BITMAP_cond, "BITMAP::bitmap_cond", 0},
+ { &key_TRANSLOG_BUFFER_waiting_filling_buffer, "TRANSLOG_BUFFER::waiting_filling_buffer", 0},
+ { &key_TRANSLOG_BUFFER_prev_sent_to_disk_cond, "TRANSLOG_BUFFER::prev_sent_to_disk_cond", 0},
+ { &key_TRANSLOG_DESCRIPTOR_log_flush_cond, "TRANSLOG_DESCRIPTOR::log_flush_cond", 0},
+ { &key_TRANSLOG_DESCRIPTOR_new_goal_cond, "TRANSLOG_DESCRIPTOR::new_goal_cond", 0}
+};
+
+static PSI_rwlock_info all_aria_rwlocks[]=
+{
+ { &key_KEYINFO_root_lock, "KEYINFO::root_lock", 0},
+ { &key_SHARE_mmap_lock, "SHARE::mmap_lock", 0},
+ { &key_TRANSLOG_DESCRIPTOR_open_files_lock, "TRANSLOG_DESCRIPTOR::open_files_lock", 0}
+};
+
+static PSI_thread_info all_aria_threads[]=
+{
+ { &key_thread_checkpoint, "checkpoint_background", PSI_FLAG_GLOBAL},
+ { &key_thread_soft_sync, "soft_sync_background", PSI_FLAG_GLOBAL},
+ { &key_thread_find_all_keys, "thr_find_all_keys", 0}
+};
+
+static PSI_file_info all_aria_files[]=
+{
+ { &key_file_translog, "translog", 0},
+ { &key_file_kfile, "MAI", 0},
+ { &key_file_dfile, "MAD", 0},
+ { &key_file_control, "control", PSI_FLAG_GLOBAL}
+};
+
+# ifdef HAVE_PSI_STAGE_INTERFACE
+static PSI_stage_info *all_aria_stages[]=
+{
+ & stage_waiting_for_a_resource
+};
+# endif /* HAVE_PSI_STAGE_INTERFACE */
+
+static void init_aria_psi_keys(void)
+{
+ const char* category= "aria";
+ int count;
+
+ count= array_elements(all_aria_mutexes);
+ mysql_mutex_register(category, all_aria_mutexes, count);
+
+ count= array_elements(all_aria_rwlocks);
+ mysql_rwlock_register(category, all_aria_rwlocks, count);
+
+ count= array_elements(all_aria_conds);
+ mysql_cond_register(category, all_aria_conds, count);
+
+ count= array_elements(all_aria_threads);
+ mysql_thread_register(category, all_aria_threads, count);
+
+ count= array_elements(all_aria_files);
+ mysql_file_register(category, all_aria_files, count);
+# ifdef HAVE_PSI_STAGE_INTERFACE
+ count= array_elements(all_aria_stages);
+ mysql_stage_register(category, all_aria_stages, count);
+# endif /* HAVE_PSI_STAGE_INTERFACE */
+}
+#else
+#define init_aria_psi_keys() /* no-op */
+#endif /* HAVE_PSI_INTERFACE */
+
+const char *MA_CHECK_INFO= "info";
+const char *MA_CHECK_WARNING= "warning";
+const char *MA_CHECK_ERROR= "error";
+
+/*****************************************************************************
+** MARIA tables
+*****************************************************************************/
+
+static handler *maria_create_handler(handlerton *hton,
+ TABLE_SHARE * table,
+ MEM_ROOT *mem_root)
+{
+ return new (mem_root) ha_maria(hton, table);
+}
+
+
+static void _ma_check_print(HA_CHECK *param, const char* msg_type,
+ const char *msgbuf)
+{
+ if (msg_type == MA_CHECK_INFO)
+ sql_print_information("%s.%s: %s", param->db_name, param->table_name,
+ msgbuf);
+ else if (msg_type == MA_CHECK_WARNING)
+ sql_print_warning("%s.%s: %s", param->db_name, param->table_name,
+ msgbuf);
+ else
+ sql_print_error("%s.%s: %s", param->db_name, param->table_name, msgbuf);
+}
+
+
+// collect errors printed by maria_check routines
+
+static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type,
+ const char *fmt, va_list args)
+{
+ THD *thd= (THD *) param->thd;
+ Protocol *protocol= thd->protocol;
+ size_t length, msg_length;
+ char msgbuf[MYSQL_ERRMSG_SIZE];
+ char name[NAME_LEN * 2 + 2];
+
+ if (param->testflag & T_SUPPRESS_ERR_HANDLING)
+ return;
+
+ msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args);
+ msgbuf[sizeof(msgbuf) - 1]= 0; // healthy paranoia
+
+ DBUG_PRINT(msg_type, ("message: %s", msgbuf));
+
+ if (!thd->vio_ok())
+ {
+ _ma_check_print(param, msg_type, msgbuf);
+ return;
+ }
+
+ if (param->testflag &
+ (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR))
+ {
+ myf flag= 0;
+ if (msg_type == MA_CHECK_INFO)
+ flag= ME_NOTE;
+ else if (msg_type == MA_CHECK_WARNING)
+ flag= ME_WARNING;
+ my_message(ER_NOT_KEYFILE, msgbuf, MYF(flag));
+ if (thd->variables.log_warnings > 2)
+ _ma_check_print(param, msg_type, msgbuf);
+ return;
+ }
+ length= (uint) (strxmov(name, param->db_name, ".", param->table_name,
+ NullS) - name);
+ /*
+ TODO: switch from protocol to push_warning here. The main reason we didn't
+ it yet is parallel repair, which threads have no THD object accessible via
+ current_thd.
+
+ Also we likely need to lock mutex here (in both cases with protocol and
+ push_warning).
+ */
+ protocol->prepare_for_resend();
+ protocol->store(name, (uint)length, system_charset_info);
+ protocol->store(param->op_name, system_charset_info);
+ protocol->store(msg_type, system_charset_info);
+ protocol->store(msgbuf, (uint)msg_length, system_charset_info);
+ if (protocol->write())
+ sql_print_error("Failed on my_net_write, writing to stderr instead: %s.%s: %s\n",
+ param->db_name, param->table_name, msgbuf);
+ else if (thd->variables.log_warnings > 2)
+ _ma_check_print(param, msg_type, msgbuf);
+
+ return;
+}
+
+
+/*
+ Convert TABLE object to Maria key and column definition
+
+ SYNOPSIS
+ table2maria()
+ table_arg in TABLE object.
+ keydef_out out Maria key definition.
+ recinfo_out out Maria column definition.
+ records_out out Number of fields.
+
+ DESCRIPTION
+ This function will allocate and initialize Maria key and column
+ definition for further use in ma_create or for a check for underlying
+ table conformance in merge engine.
+
+ The caller needs to free *recinfo_out after use. Since *recinfo_out
+ and *keydef_out are allocated with a my_multi_malloc, *keydef_out
+ is freed automatically when *recinfo_out is freed.
+
+ RETURN VALUE
+ 0 OK
+ # error code
+*/
+
+static int table2maria(TABLE *table_arg, data_file_type row_type,
+ MARIA_KEYDEF **keydef_out,
+ MARIA_COLUMNDEF **recinfo_out, uint *records_out,
+ MARIA_CREATE_INFO *create_info)
+{
+ uint i, j, recpos, minpos, fieldpos, temp_length, length;
+ enum ha_base_keytype type= HA_KEYTYPE_BINARY;
+ uchar *record;
+ KEY *pos;
+ MARIA_KEYDEF *keydef;
+ MARIA_COLUMNDEF *recinfo, *recinfo_pos;
+ HA_KEYSEG *keyseg;
+ TABLE_SHARE *share= table_arg->s;
+ uint options= share->db_options_in_use;
+ DBUG_ENTER("table2maria");
+
+ if (row_type == BLOCK_RECORD)
+ options|= HA_OPTION_PACK_RECORD;
+
+ if (!(my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME),
+ recinfo_out, (share->fields * 2 + 2) * sizeof(MARIA_COLUMNDEF),
+ keydef_out, share->keys * sizeof(MARIA_KEYDEF),
+ &keyseg,
+ (share->key_parts + share->keys) * sizeof(HA_KEYSEG),
+ NullS)))
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */
+ keydef= *keydef_out;
+ recinfo= *recinfo_out;
+ pos= table_arg->key_info;
+ for (i= 0; i < share->keys; i++, pos++)
+ {
+ keydef[i].flag= (uint16) (pos->flags & (HA_NOSAME | HA_FULLTEXT |
+ HA_SPATIAL));
+ keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ?
+ (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) :
+ pos->algorithm;
+ keydef[i].block_length= pos->block_size;
+ keydef[i].seg= keyseg;
+ keydef[i].keysegs= pos->user_defined_key_parts;
+ for (j= 0; j < pos->user_defined_key_parts; j++)
+ {
+ Field *field= pos->key_part[j].field;
+
+ if (!table_arg->field[field->field_index]->stored_in_db())
+ {
+ my_free(*recinfo_out);
+ if (table_arg->s->long_unique_table)
+ {
+ my_error(ER_TOO_LONG_KEY, MYF(0), table_arg->file->max_key_length());
+ DBUG_RETURN(HA_ERR_INDEX_COL_TOO_LONG);
+ }
+ my_error(ER_KEY_BASED_ON_GENERATED_VIRTUAL_COLUMN, MYF(0));
+ DBUG_RETURN(HA_ERR_UNSUPPORTED);
+ }
+
+ type= field->key_type();
+ keydef[i].seg[j].flag= pos->key_part[j].key_part_flag;
+
+ if (options & HA_OPTION_PACK_KEYS ||
+ (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY |
+ HA_SPACE_PACK_USED)))
+ {
+ if (pos->key_part[j].length > 8 &&
+ (type == HA_KEYTYPE_TEXT ||
+ type == HA_KEYTYPE_NUM ||
+ (type == HA_KEYTYPE_BINARY && !field->zero_pack())))
+ {
+ /* No blobs here */
+ if (j == 0)
+ keydef[i].flag|= HA_PACK_KEY;
+ if (!(field->flags & ZEROFILL_FLAG) &&
+ (field->type() == MYSQL_TYPE_STRING ||
+ field->type() == MYSQL_TYPE_VAR_STRING ||
+ ((int) (pos->key_part[j].length - field->decimals())) >= 4))
+ keydef[i].seg[j].flag|= HA_SPACE_PACK;
+ }
+ else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16))
+ keydef[i].flag|= HA_BINARY_PACK_KEY;
+ }
+ keydef[i].seg[j].type= (int) type;
+ keydef[i].seg[j].start= pos->key_part[j].offset;
+ keydef[i].seg[j].length= pos->key_part[j].length;
+ keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_length= 0;
+ keydef[i].seg[j].bit_pos= 0;
+ keydef[i].seg[j].language= field->charset()->number;
+
+ if (field->null_ptr)
+ {
+ keydef[i].seg[j].null_bit= field->null_bit;
+ keydef[i].seg[j].null_pos= (uint) (field->null_ptr-
+ (uchar*) table_arg->record[0]);
+ }
+ else
+ {
+ keydef[i].seg[j].null_bit= 0;
+ keydef[i].seg[j].null_pos= 0;
+ }
+ if (field->type() == MYSQL_TYPE_BLOB ||
+ field->type() == MYSQL_TYPE_GEOMETRY)
+ {
+ keydef[i].seg[j].flag|= HA_BLOB_PART;
+ /* save number of bytes used to pack length */
+ keydef[i].seg[j].bit_start= (uint) (field->pack_length() -
+ portable_sizeof_char_ptr);
+ }
+ else if (field->type() == MYSQL_TYPE_BIT)
+ {
+ keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len;
+ keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs;
+ keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr -
+ (uchar*) table_arg->record[0]);
+ }
+ }
+ keyseg+= pos->user_defined_key_parts;
+ }
+ if (table_arg->found_next_number_field)
+ keydef[share->next_number_index].flag|= HA_AUTO_KEY;
+ record= table_arg->record[0];
+ recpos= 0;
+ recinfo_pos= recinfo;
+ create_info->null_bytes= table_arg->s->null_bytes;
+
+ while (recpos < (uint) share->stored_rec_length)
+ {
+ Field **field, *found= 0;
+ minpos= share->reclength;
+ length= 0;
+
+ for (field= table_arg->field; *field; field++)
+ {
+ if ((fieldpos= (*field)->offset(record)) >= recpos &&
+ fieldpos <= minpos)
+ {
+ /* skip null fields */
+ if (!(temp_length= (*field)->pack_length_in_rec()))
+ continue; /* Skip null-fields */
+ if (! found || fieldpos < minpos ||
+ (fieldpos == minpos && temp_length < length))
+ {
+ minpos= fieldpos;
+ found= *field;
+ length= temp_length;
+ }
+ }
+ }
+ DBUG_PRINT("loop", ("found: %p recpos: %d minpos: %d length: %d",
+ found, recpos, minpos, length));
+ if (!found)
+ break;
+
+ if (found->flags & BLOB_FLAG)
+ recinfo_pos->type= FIELD_BLOB;
+ else if (found->type() == MYSQL_TYPE_TIMESTAMP)
+ recinfo_pos->type= FIELD_NORMAL;
+ else if (found->type() == MYSQL_TYPE_VARCHAR)
+ recinfo_pos->type= FIELD_VARCHAR;
+ else if (!(options & HA_OPTION_PACK_RECORD) ||
+ (found->zero_pack() && (found->flags & PRI_KEY_FLAG)))
+ recinfo_pos->type= FIELD_NORMAL;
+ else if (found->zero_pack())
+ recinfo_pos->type= FIELD_SKIP_ZERO;
+ else
+ recinfo_pos->type= ((length <= 3 ||
+ (found->flags & ZEROFILL_FLAG)) ?
+ FIELD_NORMAL :
+ found->type() == MYSQL_TYPE_STRING ||
+ found->type() == MYSQL_TYPE_VAR_STRING ?
+ FIELD_SKIP_ENDSPACE :
+ FIELD_SKIP_PRESPACE);
+ if (found->null_ptr)
+ {
+ recinfo_pos->null_bit= found->null_bit;
+ recinfo_pos->null_pos= (uint) (found->null_ptr -
+ (uchar*) table_arg->record[0]);
+ }
+ else
+ {
+ recinfo_pos->null_bit= 0;
+ recinfo_pos->null_pos= 0;
+ }
+ (recinfo_pos++)->length= (uint16) length;
+ recpos= minpos + length;
+ DBUG_PRINT("loop", ("length: %d type: %d",
+ recinfo_pos[-1].length,recinfo_pos[-1].type));
+ }
+ *records_out= (uint) (recinfo_pos - recinfo);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Check for underlying table conformance
+
+ SYNOPSIS
+ maria_check_definition()
+ t1_keyinfo in First table key definition
+ t1_recinfo in First table record definition
+ t1_keys in Number of keys in first table
+ t1_recs in Number of records in first table
+ t2_keyinfo in Second table key definition
+ t2_recinfo in Second table record definition
+ t2_keys in Number of keys in second table
+ t2_recs in Number of records in second table
+ strict in Strict check switch
+
+ DESCRIPTION
+ This function compares two Maria definitions. By intention it was done
+ to compare merge table definition against underlying table definition.
+ It may also be used to compare dot-frm and MAI definitions of Maria
+ table as well to compare different Maria table definitions.
+
+ For merge table it is not required that number of keys in merge table
+ must exactly match number of keys in underlying table. When calling this
+ function for underlying table conformance check, 'strict' flag must be
+ set to false, and converted merge definition must be passed as t1_*.
+
+ Otherwise 'strict' flag must be set to 1 and it is not required to pass
+ converted dot-frm definition as t1_*.
+
+ RETURN VALUE
+ 0 - Equal definitions.
+ 1 - Different definitions.
+
+ TODO
+ - compare FULLTEXT keys;
+ - compare SPATIAL keys;
+ - compare FIELD_SKIP_ZERO which is converted to FIELD_NORMAL correctly
+ (should be correctly detected in table2maria).
+*/
+
+int maria_check_definition(MARIA_KEYDEF *t1_keyinfo,
+ MARIA_COLUMNDEF *t1_recinfo,
+ uint t1_keys, uint t1_recs,
+ MARIA_KEYDEF *t2_keyinfo,
+ MARIA_COLUMNDEF *t2_recinfo,
+ uint t2_keys, uint t2_recs, bool strict)
+{
+ uint i, j;
+ DBUG_ENTER("maria_check_definition");
+ if ((strict ? t1_keys != t2_keys : t1_keys > t2_keys))
+ {
+ DBUG_PRINT("error", ("Number of keys differs: t1_keys=%u, t2_keys=%u",
+ t1_keys, t2_keys));
+ DBUG_RETURN(1);
+ }
+ if (t1_recs != t2_recs)
+ {
+ DBUG_PRINT("error", ("Number of recs differs: t1_recs=%u, t2_recs=%u",
+ t1_recs, t2_recs));
+ DBUG_RETURN(1);
+ }
+ for (i= 0; i < t1_keys; i++)
+ {
+ HA_KEYSEG *t1_keysegs= t1_keyinfo[i].seg;
+ HA_KEYSEG *t2_keysegs= t2_keyinfo[i].seg;
+ if (t1_keyinfo[i].flag & HA_FULLTEXT && t2_keyinfo[i].flag & HA_FULLTEXT)
+ continue;
+ else if (t1_keyinfo[i].flag & HA_FULLTEXT ||
+ t2_keyinfo[i].flag & HA_FULLTEXT)
+ {
+ DBUG_PRINT("error", ("Key %d has different definition", i));
+ DBUG_PRINT("error", ("t1_fulltext= %d, t2_fulltext=%d",
+ MY_TEST(t1_keyinfo[i].flag & HA_FULLTEXT),
+ MY_TEST(t2_keyinfo[i].flag & HA_FULLTEXT)));
+ DBUG_RETURN(1);
+ }
+ if (t1_keyinfo[i].flag & HA_SPATIAL && t2_keyinfo[i].flag & HA_SPATIAL)
+ continue;
+ else if (t1_keyinfo[i].flag & HA_SPATIAL ||
+ t2_keyinfo[i].flag & HA_SPATIAL)
+ {
+ DBUG_PRINT("error", ("Key %d has different definition", i));
+ DBUG_PRINT("error", ("t1_spatial= %d, t2_spatial=%d",
+ MY_TEST(t1_keyinfo[i].flag & HA_SPATIAL),
+ MY_TEST(t2_keyinfo[i].flag & HA_SPATIAL)));
+ DBUG_RETURN(1);
+ }
+ if (t1_keyinfo[i].keysegs != t2_keyinfo[i].keysegs ||
+ t1_keyinfo[i].key_alg != t2_keyinfo[i].key_alg)
+ {
+ DBUG_PRINT("error", ("Key %d has different definition", i));
+ DBUG_PRINT("error", ("t1_keysegs=%d, t1_key_alg=%d",
+ t1_keyinfo[i].keysegs, t1_keyinfo[i].key_alg));
+ DBUG_PRINT("error", ("t2_keysegs=%d, t2_key_alg=%d",
+ t2_keyinfo[i].keysegs, t2_keyinfo[i].key_alg));
+ DBUG_RETURN(1);
+ }
+ for (j= t1_keyinfo[i].keysegs; j--;)
+ {
+ uint8 t1_keysegs_j__type= t1_keysegs[j].type;
+ /*
+ Table migration from 4.1 to 5.1. In 5.1 a *TEXT key part is
+ always HA_KEYTYPE_VARTEXT2. In 4.1 we had only the equivalent of
+ HA_KEYTYPE_VARTEXT1. Since we treat both the same on MyISAM
+ level, we can ignore a mismatch between these types.
+ */
+ if ((t1_keysegs[j].flag & HA_BLOB_PART) &&
+ (t2_keysegs[j].flag & HA_BLOB_PART))
+ {
+ if ((t1_keysegs_j__type == HA_KEYTYPE_VARTEXT2) &&
+ (t2_keysegs[j].type == HA_KEYTYPE_VARTEXT1))
+ t1_keysegs_j__type= HA_KEYTYPE_VARTEXT1; /* purecov: tested */
+ else if ((t1_keysegs_j__type == HA_KEYTYPE_VARBINARY2) &&
+ (t2_keysegs[j].type == HA_KEYTYPE_VARBINARY1))
+ t1_keysegs_j__type= HA_KEYTYPE_VARBINARY1; /* purecov: inspected */
+ }
+
+ if (t1_keysegs_j__type != t2_keysegs[j].type ||
+ t1_keysegs[j].language != t2_keysegs[j].language ||
+ t1_keysegs[j].null_bit != t2_keysegs[j].null_bit ||
+ t1_keysegs[j].length != t2_keysegs[j].length)
+ {
+ DBUG_PRINT("error", ("Key segment %d (key %d) has different "
+ "definition", j, i));
+ DBUG_PRINT("error", ("t1_type=%d, t1_language=%d, t1_null_bit=%d, "
+ "t1_length=%d",
+ t1_keysegs[j].type, t1_keysegs[j].language,
+ t1_keysegs[j].null_bit, t1_keysegs[j].length));
+ DBUG_PRINT("error", ("t2_type=%d, t2_language=%d, t2_null_bit=%d, "
+ "t2_length=%d",
+ t2_keysegs[j].type, t2_keysegs[j].language,
+ t2_keysegs[j].null_bit, t2_keysegs[j].length));
+
+ DBUG_RETURN(1);
+ }
+ }
+ }
+
+ for (i= 0; i < t1_recs; i++)
+ {
+ MARIA_COLUMNDEF *t1_rec= &t1_recinfo[i];
+ MARIA_COLUMNDEF *t2_rec= &t2_recinfo[i];
+ /*
+ FIELD_SKIP_ZERO can be changed to FIELD_NORMAL in maria_create,
+ see NOTE1 in ma_create.c
+ */
+ if ((t1_rec->type != t2_rec->type &&
+ !(t1_rec->type == (int) FIELD_SKIP_ZERO &&
+ t1_rec->length == 1 &&
+ t2_rec->type == (int) FIELD_NORMAL)) ||
+ t1_rec->length != t2_rec->length ||
+ t1_rec->null_bit != t2_rec->null_bit)
+ {
+ DBUG_PRINT("error", ("Field %d has different definition", i));
+ DBUG_PRINT("error", ("t1_type=%d, t1_length=%d, t1_null_bit=%d",
+ t1_rec->type, t1_rec->length, t1_rec->null_bit));
+ DBUG_PRINT("error", ("t2_type=%d, t2_length=%d, t2_null_bit=%d",
+ t2_rec->type, t2_rec->length, t2_rec->null_bit));
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+extern "C" {
+
+int _ma_killed_ptr(HA_CHECK *param)
+{
+ if (likely(thd_killed((THD*)param->thd)) == 0)
+ return 0;
+ my_errno= HA_ERR_ABORTED_BY_USER;
+ return 1;
+}
+
+
+/*
+ Report progress to mysqld
+
+ This is a bit more complex than what a normal progress report
+ function normally is.
+
+ The reason is that this is called by enable_index/repair which
+ is one stage in ALTER TABLE and we can't use the external
+ stage/max_stage for this.
+
+ thd_progress_init/thd_progress_next_stage is to be called by
+ high level commands like CHECK TABLE or REPAIR TABLE, not
+ by sub commands like enable_index().
+
+ In ma_check.c it's easier to work with stages than with a total
+ progress, so we use internal stage/max_stage here to keep the
+ code simple.
+*/
+
+void _ma_report_progress(HA_CHECK *param, ulonglong progress,
+ ulonglong max_progress)
+{
+ thd_progress_report((THD*)param->thd,
+ progress + max_progress * param->stage,
+ max_progress * param->max_stage);
+}
+
+
+void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_error");
+ param->error_printed++;
+ param->out_flag |= O_DATA_LOST;
+ if (param->testflag & T_SUPPRESS_ERR_HANDLING)
+ DBUG_VOID_RETURN;
+ va_start(args, fmt);
+ _ma_check_print_msg(param, MA_CHECK_ERROR, fmt, args);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
+
+
+void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_info");
+ va_start(args, fmt);
+ _ma_check_print_msg(param, MA_CHECK_INFO, fmt, args);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
+
+
+void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
+{
+ va_list args;
+ DBUG_ENTER("_ma_check_print_warning");
+ param->warning_printed++;
+ param->out_flag |= O_DATA_LOST;
+ va_start(args, fmt);
+ _ma_check_print_msg(param, MA_CHECK_WARNING, fmt, args);
+ va_end(args);
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Create a transaction object
+
+ SYNOPSIS
+ info Maria handler
+
+ RETURN
+ 0 ok
+ # Error number (HA_ERR_OUT_OF_MEM)
+*/
+
+static int maria_create_trn_for_mysql(MARIA_HA *info)
+{
+ THD *thd= ((TABLE*) info->external_ref)->in_use;
+ TRN *trn= THD_TRN;
+ DBUG_ENTER("maria_create_trn_for_mysql");
+
+ if (!trn) /* no transaction yet - open it now */
+ {
+ trn= trnman_new_trn(& thd->transaction->wt);
+ if (unlikely(!trn))
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+ thd_set_ha_data(thd, maria_hton, trn);
+ if (thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
+ trans_register_ha(thd, TRUE, maria_hton, trn->trid);
+ }
+ _ma_set_trn_for_table(info, trn);
+ if (!trnman_increment_locked_tables(trn))
+ {
+ trans_register_ha(thd, FALSE, maria_hton, trn->trid);
+ trnman_new_statement(trn);
+ }
+#ifdef EXTRA_DEBUG
+ if (info->lock_type == F_WRLCK &&
+ ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
+ {
+ trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED |
+ TRN_STATE_TABLES_CAN_CHANGE);
+ (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
+ (uchar*) thd->query(),
+ thd->query_length());
+ }
+ else
+ {
+ DBUG_PRINT("info", ("lock_type: %d trnman_flags: %u",
+ info->lock_type, trnman_get_flags(trn)));
+ }
+
+#endif
+ DBUG_RETURN(0);
+}
+
+my_bool ma_killed_in_mariadb(MARIA_HA *info)
+{
+ return (((TABLE*) (info->external_ref))->in_use->killed != 0);
+}
+
+} /* extern "C" */
+
+/**
+ Transactional table doing bulk insert with one single UNDO
+ (UNDO_BULK_INSERT) and with repair.
+*/
+#define BULK_INSERT_SINGLE_UNDO_AND_REPAIR 1
+/**
+ Transactional table doing bulk insert with one single UNDO
+ (UNDO_BULK_INSERT) and without repair.
+*/
+#define BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR 2
+/**
+ None of BULK_INSERT_SINGLE_UNDO_AND_REPAIR and
+ BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR.
+*/
+#define BULK_INSERT_NONE 0
+
+ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg):
+handler(hton, table_arg), file(0),
+int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
+ HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
+ HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
+ HA_FILE_BASED | HA_CAN_GEOMETRY | TRANSACTION_STATE |
+ HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | HA_CAN_REPAIR |
+ HA_CAN_VIRTUAL_COLUMNS | HA_CAN_EXPORT |
+ HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT |
+ HA_CAN_TABLES_WITHOUT_ROLLBACK),
+can_enable_indexes(0), bulk_insert_single_undo(BULK_INSERT_NONE)
+{}
+
+
+handler *ha_maria::clone(const char *name __attribute__((unused)),
+ MEM_ROOT *mem_root)
+{
+ ha_maria *new_handler=
+ static_cast <ha_maria *>(handler::clone(file->s->open_file_name.str,
+ mem_root));
+ if (new_handler)
+ {
+ new_handler->file->state= file->state;
+ /* maria_create_trn_for_mysql() is never called for clone() tables */
+ new_handler->file->trn= file->trn;
+ DBUG_ASSERT(new_handler->file->trn_prev == 0 &&
+ new_handler->file->trn_next == 0);
+ }
+ return new_handler;
+}
+
+
+static const char *ha_maria_exts[]=
+{
+ MARIA_NAME_IEXT,
+ MARIA_NAME_DEXT,
+ NullS
+};
+
+
+const char *ha_maria::index_type(uint key_number)
+{
+ return ((table->key_info[key_number].flags & HA_FULLTEXT) ?
+ "FULLTEXT" :
+ (table->key_info[key_number].flags & HA_SPATIAL) ?
+ "SPATIAL" :
+ (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ?
+ "RTREE" : "BTREE");
+}
+
+
+ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const
+{
+ ulong flags;
+ if (table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT)
+ flags= 0;
+ else
+ if ((table_share->key_info[inx].flags & HA_SPATIAL ||
+ table_share->key_info[inx].algorithm == HA_KEY_ALG_RTREE))
+ {
+ /* All GIS scans are non-ROR scans. We also disable IndexConditionPushdown */
+ flags= HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
+ HA_READ_ORDER | HA_KEYREAD_ONLY | HA_KEY_SCAN_NOT_ROR;
+ }
+ else
+ {
+ flags= HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE |
+ HA_READ_ORDER | HA_KEYREAD_ONLY | HA_DO_INDEX_COND_PUSHDOWN;
+ }
+ return flags;
+}
+
+
+double ha_maria::scan_time()
+{
+ if (file->s->data_file_type == BLOCK_RECORD)
+ return (ulonglong2double(stats.data_file_length - file->s->block_size) /
+ file->s->block_size) + 2;
+ return handler::scan_time();
+}
+
+/*
+ We need to be able to store at least 2 keys on an index page as the
+ splitting algorithms depends on this. (With only one key on a page
+ we also can't use any compression, which may make the index file much
+ larger)
+ We use MARIA_MAX_KEY_LENGTH to limit the key size as we don't want to use
+ too much stack when searching in the b_tree.
+
+ We also need to reserve place for a record pointer (8) and 3 bytes
+ per key segment to store the length of the segment + possible null bytes.
+ These extra bytes are required here so that maria_create() will surely
+ accept any keys created which the returned key data storage length.
+*/
+
+uint ha_maria::max_supported_key_length() const
+{
+ return maria_max_key_length();
+}
+
+/* Name is here without an extension */
+
+int ha_maria::open(const char *name, int mode, uint test_if_locked)
+{
+ uint i;
+
+#ifdef NOT_USED
+ /*
+ If the user wants to have memory mapped data files, add an
+ open_flag. Do not memory map temporary tables because they are
+ expected to be inserted and thus extended a lot. Memory mapping is
+ efficient for files that keep their size, but very inefficient for
+ growing files. Using an open_flag instead of calling ma_extra(...
+ HA_EXTRA_MMAP ...) after maxs_open() has the advantage that the
+ mapping is not repeated for every open, but just done on the initial
+ open, when the MyISAM share is created. Every time the server
+ requires to open a new instance of a table it calls this method. We
+ will always supply HA_OPEN_MMAP for a permanent table. However, the
+ Maria storage engine will ignore this flag if this is a secondary
+ open of a table that is in use by other threads already (if the
+ Maria share exists already).
+ */
+ if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap)
+ test_if_locked|= HA_OPEN_MMAP;
+#endif
+
+ if (maria_recover_options & HA_RECOVER_ANY)
+ {
+ /* user asked to trigger a repair if table was not properly closed */
+ test_if_locked|= HA_OPEN_ABORT_IF_CRASHED;
+ }
+
+ if (aria_readonly)
+ test_if_locked|= HA_OPEN_IGNORE_MOVED_STATE;
+
+ if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER,
+ s3_open_args())))
+ {
+ if (my_errno == HA_ERR_OLD_FILE)
+ {
+ push_warning(current_thd, Sql_condition::WARN_LEVEL_NOTE,
+ ER_CRASHED_ON_USAGE,
+ zerofill_error_msg);
+ }
+ return (my_errno ? my_errno : -1);
+ }
+ if (aria_readonly)
+ file->s->options|= HA_OPTION_READ_ONLY_DATA;
+
+ file->s->chst_invalidator= query_cache_invalidate_by_MyISAM_filename_ref;
+ /* Set external_ref, mainly for temporary tables */
+ file->external_ref= (void*) table; // For ma_killed()
+
+ if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
+ maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0);
+
+ info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
+ if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED))
+ maria_extra(file, HA_EXTRA_WAIT_LOCK, 0);
+ if ((data_file_type= file->s->data_file_type) != STATIC_RECORD)
+ int_table_flags |= HA_REC_NOT_IN_SEQ;
+ if (!file->s->base.born_transactional)
+ {
+ /*
+ INSERT DELAYED cannot work with transactional tables (because it cannot
+ stand up to "when client gets ok the data is safe on disk": the record
+ may not even be inserted). In the future, we could enable it back (as a
+ client doing INSERT DELAYED knows the specificities; but we then should
+ make sure to regularly commit in the delayed_insert thread).
+ */
+ int_table_flags|= HA_CAN_INSERT_DELAYED | HA_NO_TRANSACTIONS;
+ }
+ else
+ int_table_flags|= HA_CRASH_SAFE;
+
+ if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
+ int_table_flags |= HA_HAS_NEW_CHECKSUM;
+
+ /*
+ We can only do online backup on transactional tables with checksum.
+ Checksums are needed to avoid half writes.
+ */
+ if (file->s->options & HA_OPTION_PAGE_CHECKSUM &&
+ file->s->base.born_transactional)
+ int_table_flags |= HA_CAN_ONLINE_BACKUPS;
+
+ /*
+ For static size rows, tell MariaDB that we will access all bytes
+ in the record when writing it. This signals MariaDB to initialize
+ the full row to ensure we don't get any errors from valgrind and
+ that all bytes in the row is properly reset.
+ */
+ if (file->s->data_file_type == STATIC_RECORD &&
+ (file->s->has_varchar_fields || file->s->has_null_fields))
+ int_table_flags|= HA_RECORD_MUST_BE_CLEAN_ON_WRITE;
+
+ for (i= 0; i < table->s->keys; i++)
+ {
+ plugin_ref parser= table->key_info[i].parser;
+ if (table->key_info[i].flags & HA_USES_PARSER)
+ file->s->keyinfo[i].parser=
+ (struct st_mysql_ftparser *)plugin_decl(parser)->info;
+ table->key_info[i].block_size= file->s->keyinfo[i].block_length;
+ }
+ my_errno= 0;
+
+ /* Count statistics of usage for newly open normal files */
+ if (file->s->reopen == 1 && ! (test_if_locked & HA_OPEN_TMP_TABLE))
+ {
+ if (file->s->delay_key_write)
+ feature_files_opened_with_delayed_keys++;
+ }
+
+ return my_errno;
+}
+
+
+int ha_maria::close(void)
+{
+ MARIA_HA *tmp= file;
+ if (!tmp)
+ return 0;
+ DBUG_ASSERT(file->trn == 0 || file->trn == &dummy_transaction_object);
+ DBUG_ASSERT(file->trn_next == 0 && file->trn_prev == 0);
+ file= 0;
+ return maria_close(tmp);
+}
+
+
+int ha_maria::write_row(const uchar * buf)
+{
+ /*
+ If we have an auto_increment column and we are writing a changed row
+ or a new row, then update the auto_increment value in the record.
+ */
+ if (table->next_number_field && buf == table->record[0])
+ {
+ int error;
+ if ((error= update_auto_increment()))
+ return error;
+ }
+ return maria_write(file, buf);
+}
+
+
+int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt)
+{
+ int error, fatal_error;
+ HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
+ MARIA_SHARE *share= file->s;
+ const char *old_proc_info;
+ TRN *old_trn= file->trn;
+
+ if (!file || !param) return HA_ADMIN_INTERNAL_ERROR;
+
+ unmap_file(file);
+ maria_chk_init(param);
+ param->thd= thd;
+ param->op_name= "check";
+ param->db_name= table->s->db.str;
+ param->table_name= table->alias.c_ptr();
+ param->testflag= check_opt->flags | T_CHECK | T_SILENT;
+ param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
+
+ if (!(table->db_stat & HA_READ_ONLY))
+ param->testflag |= T_STATISTICS;
+ param->using_global_keycache= 1;
+
+ if (!maria_is_crashed(file) &&
+ (((param->testflag & T_CHECK_ONLY_CHANGED) &&
+ !(share->state.changed & (STATE_CHANGED | STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR)) &&
+ share->state.open_count == 0) ||
+ ((param->testflag & T_FAST) && (share->state.open_count ==
+ (uint) (share->global_changed ? 1 :
+ 0)))))
+ return HA_ADMIN_ALREADY_DONE;
+
+ maria_chk_init_for_check(param, file);
+ param->max_allowed_lsn= translog_get_horizon();
+
+ if ((file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
+ STATE_MOVED)
+ {
+ _ma_check_print_error(param, "%s", zerofill_error_msg);
+ return HA_ADMIN_CORRUPT;
+ }
+
+ old_proc_info= thd_proc_info(thd, "Checking status");
+ thd_progress_init(thd, 3);
+ error= maria_chk_status(param, file); // Not fatal
+ /* maria_chk_size() will flush the page cache for this file */
+ if (maria_chk_size(param, file))
+ error= 1;
+ if (!error)
+ error|= maria_chk_del(param, file, param->testflag);
+ thd_proc_info(thd, "Checking keys");
+ thd_progress_next_stage(thd);
+ if (!error)
+ error= maria_chk_key(param, file);
+ thd_proc_info(thd, "Checking data");
+ thd_progress_next_stage(thd);
+ if (!error)
+ {
+ if ((!(param->testflag & T_QUICK) &&
+ ((share->options &
+ (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ||
+ (param->testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file))
+ {
+ ulonglong old_testflag= param->testflag;
+ param->testflag |= T_MEDIUM;
+ if (!(error= init_io_cache(&param->read_cache, file->dfile.file,
+ my_default_record_cache_size, READ_CACHE,
+ share->pack.header_length, 1, MYF(MY_WME))))
+ {
+ error= maria_chk_data_link(param, file,
+ MY_TEST(param->testflag & T_EXTEND));
+ end_io_cache(&param->read_cache);
+ }
+ param->testflag= old_testflag;
+ }
+ }
+ fatal_error= error;
+ if (param->error_printed &&
+ param->error_printed == (param->skip_lsn_error_count +
+ param->not_visible_rows_found) &&
+ !(share->state.changed & (STATE_CRASHED_FLAGS | STATE_IN_REPAIR)))
+ {
+ _ma_check_print_error(param, "%s", zerofill_error_msg);
+ /* This ensures that a future REPAIR TABLE will only do a zerofill */
+ file->update|= STATE_MOVED;
+ share->state.changed|= STATE_MOVED;
+ fatal_error= 0;
+ }
+ if (!fatal_error)
+ {
+ if ((share->state.changed & (STATE_CHANGED | STATE_MOVED |
+ STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR | STATE_NOT_ANALYZED)) ||
+ (param->testflag & T_STATISTICS) || maria_is_crashed(file))
+ {
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ mysql_mutex_lock(&share->intern_lock);
+ DBUG_PRINT("info", ("Resetting crashed state"));
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR);
+ if (!(table->db_stat & HA_READ_ONLY))
+ {
+ int tmp;
+ if ((tmp= maria_update_state_info(param, file,
+ UPDATE_TIME | UPDATE_OPEN_COUNT |
+ UPDATE_STAT)))
+ error= tmp;
+ }
+ mysql_mutex_unlock(&share->intern_lock);
+ info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
+ HA_STATUS_CONST);
+
+ /*
+ Write a 'table is ok' message to error log if table is ok and
+ we have written to error log that table was getting checked
+ */
+ if (!error && !(table->db_stat & HA_READ_ONLY) &&
+ !maria_is_crashed(file) && thd->error_printed_to_log &&
+ (param->warning_printed || param->error_printed ||
+ param->note_printed))
+ _ma_check_print_info(param, "Table is fixed");
+ }
+ }
+ else if (!maria_is_crashed(file) && !thd->killed)
+ {
+ maria_mark_crashed(file);
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ }
+
+ /* Reset trn, that may have been set by repair */
+ if (old_trn && old_trn != file->trn)
+ {
+ DBUG_ASSERT(old_trn->used_instances == 0);
+ _ma_set_trn_for_table(file, old_trn);
+ }
+ thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd);
+ return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
+}
+
+
+/*
+ Analyze the key distribution in the table
+ As the table may be only locked for read, we have to take into account that
+ two threads may do an analyze at the same time!
+*/
+
+int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt)
+{
+ int error= 0;
+ HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
+ MARIA_SHARE *share= file->s;
+ const char *old_proc_info;
+
+ if (!param)
+ return HA_ADMIN_INTERNAL_ERROR;
+
+ maria_chk_init(param);
+ param->thd= thd;
+ param->op_name= "analyze";
+ param->db_name= table->s->db.str;
+ param->table_name= table->alias.c_ptr();
+ param->testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS |
+ T_DONT_CHECK_CHECKSUM);
+ param->using_global_keycache= 1;
+ param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
+
+ if (!(share->state.changed & STATE_NOT_ANALYZED))
+ return HA_ADMIN_ALREADY_DONE;
+
+ old_proc_info= thd_proc_info(thd, "Scanning");
+ thd_progress_init(thd, 1);
+ error= maria_chk_key(param, file);
+ if (!error)
+ {
+ mysql_mutex_lock(&share->intern_lock);
+ error= maria_update_state_info(param, file, UPDATE_STAT);
+ mysql_mutex_unlock(&share->intern_lock);
+ }
+ else if (!maria_is_crashed(file) && !thd->killed)
+ maria_mark_crashed(file);
+ thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd);
+ return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK;
+}
+
+int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ int error;
+ HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
+ ha_rows start_records;
+ const char *old_proc_info;
+
+ if (!file || !param)
+ return HA_ADMIN_INTERNAL_ERROR;
+
+ maria_chk_init(param);
+ param->thd= thd;
+ param->op_name= "repair";
+
+ /*
+ The following can only be true if the table was marked as STATE_MOVED
+ during a CHECK TABLE and the table has not been used since then
+ */
+ if ((file->s->state.changed & STATE_MOVED) &&
+ !(file->s->state.changed & STATE_CRASHED_FLAGS))
+ {
+ param->db_name= table->s->db.str;
+ param->table_name= table->alias.c_ptr();
+ _ma_check_print_info(param, "Running zerofill on moved table");
+ return zerofill(thd, check_opt);
+ }
+
+ param->testflag= ((check_opt->flags & ~(T_EXTEND)) |
+ T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM |
+ (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT));
+ param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
+ param->backup_time= check_opt->start_time;
+ start_records= file->state->records;
+ old_proc_info= thd_proc_info(thd, "Checking table");
+ thd_progress_init(thd, 1);
+ while ((error= repair(thd, param, 0)) && param->retry_repair)
+ {
+ param->retry_repair= 0;
+ file->state->records= start_records;
+ if (test_all_bits(param->testflag,
+ (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK)))
+ {
+ param->testflag&= ~(T_RETRY_WITHOUT_QUICK | T_QUICK);
+ /* Ensure we don't loose any rows when retrying without quick */
+ param->testflag|= T_SAFE_REPAIR;
+ if (thd->vio_ok())
+ _ma_check_print_info(param, "Retrying repair without quick");
+ else
+ sql_print_information("Retrying repair of: '%s' without quick",
+ table->s->path.str);
+ continue;
+ }
+ param->testflag &= ~T_QUICK;
+ if (param->testflag & T_REP_BY_SORT)
+ {
+ param->testflag= (param->testflag & ~T_REP_BY_SORT) | T_REP;
+ if (thd->vio_ok())
+ _ma_check_print_info(param, "Retrying repair with keycache");
+ sql_print_information("Retrying repair of: '%s' with keycache",
+ table->s->path.str);
+ continue;
+ }
+ break;
+ }
+ /*
+ Commit is needed in the case of tables are locked to ensure that repair
+ is registered in the recovery log
+ */
+ if (implicit_commit(thd, TRUE))
+ error= HA_ADMIN_COMMIT_ERROR;
+
+ if (!error && start_records != file->state->records &&
+ !(check_opt->flags & T_VERY_SILENT))
+ {
+ char llbuff[22], llbuff2[22];
+ sql_print_information("Found %s of %s rows when repairing '%s'",
+ llstr(file->state->records, llbuff),
+ llstr(start_records, llbuff2),
+ table->s->path.str);
+ }
+ thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd);
+ return error;
+}
+
+int ha_maria::zerofill(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ int error;
+ HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
+ TRN *old_trn;
+ MARIA_SHARE *share= file->s;
+
+ if (!file || !param)
+ return HA_ADMIN_INTERNAL_ERROR;
+
+ unmap_file(file);
+ old_trn= file->trn;
+ maria_chk_init(param);
+ param->thd= thd;
+ param->op_name= "zerofill";
+ param->testflag= check_opt->flags | T_SILENT | T_ZEROFILL;
+ param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
+ param->db_name= table->s->db.str;
+ param->table_name= table->alias.c_ptr();
+
+ error=maria_zerofill(param, file, share->open_file_name.str);
+
+ /* Reset trn, that may have been set by repair */
+ if (old_trn && old_trn != file->trn)
+ _ma_set_trn_for_table(file, old_trn);
+
+ if (!error)
+ {
+ TrID create_trid= trnman_get_min_safe_trid();
+ mysql_mutex_lock(&share->intern_lock);
+ share->state.changed|= STATE_NOT_MOVABLE;
+ maria_update_state_info(param, file, UPDATE_TIME | UPDATE_OPEN_COUNT);
+ _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE, create_trid,
+ TRUE, TRUE);
+ mysql_mutex_unlock(&share->intern_lock);
+ }
+ return error;
+}
+
+int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ int error;
+ HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
+
+ if (!file || !param)
+ return HA_ADMIN_INTERNAL_ERROR;
+
+ maria_chk_init(param);
+ param->thd= thd;
+ param->op_name= "optimize";
+ param->testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE |
+ T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX);
+ param->sort_buffer_length= THDVAR(thd, sort_buffer_size);
+ thd_progress_init(thd, 1);
+ if ((error= repair(thd, param, 1)) && param->retry_repair)
+ {
+ sql_print_warning("Warning: Optimize table got errno %d on %s.%s, retrying",
+ my_errno, param->db_name, param->table_name);
+ param->testflag &= ~T_REP_BY_SORT;
+ error= repair(thd, param, 0);
+ }
+ thd_progress_end(thd);
+ return error;
+}
+
+
+int ha_maria::repair(THD *thd, HA_CHECK *param, bool do_optimize)
+{
+ int error= 0;
+ ulonglong local_testflag= param->testflag;
+ bool optimize_done= !do_optimize, statistics_done= 0, full_repair_done= 0;
+ const char *old_proc_info= thd->proc_info;
+ char fixed_name[FN_REFLEN];
+ MARIA_SHARE *share= file->s;
+ ha_rows rows= file->state->records;
+ TRN *old_trn= file->trn;
+ my_bool locking= 0;
+ DBUG_ENTER("ha_maria::repair");
+
+ /*
+ Normally this method is entered with a properly opened table. If the
+ repair fails, it can be repeated with more elaborate options. Under
+ special circumstances it can happen that a repair fails so that it
+ closed the data file and cannot re-open it. In this case file->dfile
+ is set to -1. We must not try another repair without an open data
+ file. (Bug #25289)
+ */
+ if (file->dfile.file == -1)
+ {
+ sql_print_information("Retrying repair of: '%s' failed. "
+ "Please try REPAIR EXTENDED or aria_chk",
+ table->s->path.str);
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ }
+
+ /*
+ If transactions was not enabled for a transactional table then
+ file->s->status is not up to date. This is needed for repair_by_sort
+ to work
+ */
+ if (share->base.born_transactional && !share->now_transactional)
+ _ma_copy_nontrans_state_information(file);
+
+ param->db_name= table->s->db.str;
+ param->table_name= table->alias.c_ptr();
+ param->tmpfile_createflag= O_RDWR | O_TRUNC;
+ param->using_global_keycache= 1;
+ param->thd= thd;
+ param->tmpdir= &mysql_tmpdir_list;
+ param->out_flag= 0;
+ share->state.dupp_key= MI_MAX_KEY;
+ strmov(fixed_name, share->open_file_name.str);
+ unmap_file(file);
+
+ /*
+ Don't lock tables if we have used LOCK TABLE or if we come from
+ enable_index()
+ */
+ if (!thd->locked_tables_mode && ! (param->testflag & T_NO_LOCKS))
+ {
+ locking= 1;
+ if (maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK))
+ {
+ _ma_check_print_error(param, ER_THD(thd, ER_CANT_LOCK), my_errno);
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ }
+ }
+
+ if (!do_optimize ||
+ (((share->data_file_type == BLOCK_RECORD) ?
+ (share->state.changed & STATE_NOT_OPTIMIZED_ROWS) :
+ (file->state->del ||
+ share->state.split != file->state->records)) &&
+ (!(param->testflag & T_QUICK) ||
+ (share->state.changed & (STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_OPTIMIZED_ROWS)))))
+ {
+ ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ?
+ maria_get_mask_all_keys_active(share->base.keys) :
+ share->state.key_map);
+ ulonglong save_testflag= param->testflag;
+ if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) &&
+ (local_testflag & T_REP_BY_SORT))
+ {
+ local_testflag |= T_STATISTICS;
+ param->testflag |= T_STATISTICS; // We get this for free
+ statistics_done= 1;
+ /* TODO: Remove BLOCK_RECORD test when parallel works with blocks */
+ if (THDVAR(thd,repair_threads) > 1 &&
+ share->data_file_type != BLOCK_RECORD)
+ {
+ char buf[40];
+ /* TODO: respect maria_repair_threads variable */
+ my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map));
+ thd_proc_info(thd, buf);
+ param->testflag|= T_REP_PARALLEL;
+ error= maria_repair_parallel(param, file, fixed_name,
+ MY_TEST(param->testflag & T_QUICK));
+ /* to reset proc_info, as it was pointing to local buffer */
+ thd_proc_info(thd, "Repair done");
+ }
+ else
+ {
+ thd_proc_info(thd, "Repair by sorting");
+ param->testflag|= T_REP_BY_SORT;
+ error= maria_repair_by_sort(param, file, fixed_name,
+ MY_TEST(param->testflag & T_QUICK));
+ }
+ if (error && file->create_unique_index_by_sort &&
+ share->state.dupp_key != MAX_KEY)
+ {
+ my_errno= HA_ERR_FOUND_DUPP_KEY;
+ print_keydup_error(table, &table->key_info[share->state.dupp_key],
+ MYF(0));
+ }
+ }
+ else
+ {
+ thd_proc_info(thd, "Repair with keycache");
+ param->testflag &= ~(T_REP_BY_SORT | T_REP_PARALLEL);
+ error= maria_repair(param, file, fixed_name,
+ MY_TEST(param->testflag & T_QUICK));
+ }
+ param->testflag= save_testflag | (param->testflag & T_RETRY_WITHOUT_QUICK);
+ optimize_done= 1;
+ /*
+ set full_repair_done if we re-wrote all rows and all keys
+ (and thus removed all transid's from the table
+ */
+ full_repair_done= !MY_TEST(param->testflag & T_QUICK);
+ }
+ if (!error)
+ {
+ if ((local_testflag & T_SORT_INDEX) &&
+ (share->state.changed & STATE_NOT_SORTED_PAGES))
+ {
+ optimize_done= 1;
+ thd_proc_info(thd, "Sorting index");
+ error= maria_sort_index(param, file, fixed_name);
+ }
+ if (!error && !statistics_done && (local_testflag & T_STATISTICS))
+ {
+ if (share->state.changed & STATE_NOT_ANALYZED)
+ {
+ optimize_done= 1;
+ thd_proc_info(thd, "Analyzing");
+ error= maria_chk_key(param, file);
+ }
+ else
+ local_testflag &= ~T_STATISTICS; // Don't update statistics
+ }
+ }
+ thd_proc_info(thd, "Saving state");
+ if (full_repair_done && !error &&
+ !(param->testflag & T_NO_CREATE_RENAME_LSN))
+ {
+ /* Set trid (needed if the table was moved from another system) */
+ share->state.create_trid= trnman_get_min_safe_trid();
+ }
+ mysql_mutex_lock(&share->intern_lock);
+ if (!error)
+ {
+ if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file))
+ {
+ DBUG_PRINT("info", ("Resetting crashed state"));
+ share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED_FLAGS |
+ STATE_IN_REPAIR | STATE_MOVED);
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ }
+ /*
+ repair updates share->state.state. Ensure that file->state is up to date
+ */
+ if (file->state != &share->state.state)
+ *file->state= share->state.state;
+
+ if (share->base.auto_key)
+ _ma_update_auto_increment_key(param, file, 1);
+ if (optimize_done)
+ error= maria_update_state_info(param, file,
+ UPDATE_TIME | UPDATE_OPEN_COUNT |
+ (local_testflag &
+ T_STATISTICS ? UPDATE_STAT : 0));
+ /* File is repaired; Mark the file as moved to this system */
+ (void) _ma_set_uuid(share, 0);
+
+ info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE |
+ HA_STATUS_CONST);
+ if (rows != file->state->records && !(param->testflag & T_VERY_SILENT))
+ {
+ char llbuff[22], llbuff2[22];
+ _ma_check_print_warning(param, "Number of rows changed from %s to %s",
+ llstr(rows, llbuff),
+ llstr(file->state->records, llbuff2));
+ }
+ }
+ else
+ {
+ maria_mark_crashed_on_repair(file);
+ file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED;
+ maria_update_state_info(param, file, 0);
+ }
+ mysql_mutex_unlock(&share->intern_lock);
+ thd_proc_info(thd, old_proc_info);
+ thd_progress_end(thd); // Mark done
+ if (locking)
+ maria_lock_database(file, F_UNLCK);
+
+ /* Reset trn, that may have been set by repair */
+ if (old_trn && old_trn != file->trn)
+ _ma_set_trn_for_table(file, old_trn);
+ error= error ? HA_ADMIN_FAILED :
+ (optimize_done ?
+ (write_log_record_for_repair(param, file) ? HA_ADMIN_FAILED :
+ HA_ADMIN_OK) : HA_ADMIN_ALREADY_DONE);
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Assign table indexes to a specific key cache.
+*/
+
+int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt)
+{
+#if 0 && NOT_IMPLEMENTED
+ PAGECACHE *new_pagecache= check_opt->pagecache;
+ const char *errmsg= 0;
+ int error= HA_ADMIN_OK;
+ ulonglong map;
+ TABLE_LIST *table_list= table->pos_in_table_list;
+ DBUG_ENTER("ha_maria::assign_to_keycache");
+
+ table->keys_in_use_for_query.clear_all();
+
+ if (table_list->process_index_hints(table))
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ map= ~(ulonglong) 0;
+ if (!table->keys_in_use_for_query.is_clear_all())
+ /* use all keys if there's no list specified by the user through hints */
+ map= table->keys_in_use_for_query.to_ulonglong();
+
+ if ((error= maria_assign_to_pagecache(file, map, new_pagecache)))
+ {
+ char buf[STRING_BUFFER_USUAL_SIZE];
+ my_snprintf(buf, sizeof(buf),
+ "Failed to flush to index file (errno: %d)", error);
+ errmsg= buf;
+ error= HA_ADMIN_CORRUPT;
+ }
+
+ if (error != HA_ADMIN_OK)
+ {
+ /* Send error to user */
+ HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
+ if (!param)
+ return HA_ADMIN_INTERNAL_ERROR;
+
+ maria_chk_init(param);
+ param->thd= thd;
+ param->op_name= "assign_to_keycache";
+ param->db_name= table->s->db.str;
+ param->table_name= table->s->table_name.str;
+ param->testflag= 0;
+ _ma_check_print_error(param, errmsg);
+ }
+ DBUG_RETURN(error);
+#else
+ return HA_ADMIN_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/*
+ Preload pages of the index file for a table into the key cache.
+*/
+
+int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt)
+{
+ ulonglong map;
+ TABLE_LIST *table_list= table->pos_in_table_list;
+
+ DBUG_ENTER("ha_maria::preload_keys");
+
+ table->keys_in_use_for_query.clear_all();
+
+ if (table_list->process_index_hints(table))
+ DBUG_RETURN(HA_ADMIN_FAILED);
+
+ map= ~(ulonglong) 0;
+ /* Check validity of the index references */
+ if (!table->keys_in_use_for_query.is_clear_all())
+ /* use all keys if there's no list specified by the user through hints */
+ map= table->keys_in_use_for_query.to_ulonglong();
+
+ maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE,
+ (void*) &thd->variables.preload_buff_size);
+
+ int error;
+
+ if ((error= maria_preload(file, map, table_list->ignore_leaves)))
+ {
+ char buf[MYSQL_ERRMSG_SIZE+20];
+ const char *errmsg;
+
+ switch (error) {
+ case HA_ERR_NON_UNIQUE_BLOCK_SIZE:
+ errmsg= "Indexes use different block sizes";
+ break;
+ case HA_ERR_OUT_OF_MEM:
+ errmsg= "Failed to allocate buffer";
+ break;
+ default:
+ my_snprintf(buf, sizeof(buf),
+ "Failed to read from index file (errno: %d)", my_errno);
+ errmsg= buf;
+ }
+
+ HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
+ if (!param)
+ return HA_ADMIN_INTERNAL_ERROR;
+
+ maria_chk_init(param);
+ param->thd= thd;
+ param->op_name= "preload_keys";
+ param->db_name= table->s->db.str;
+ param->table_name= table->s->table_name.str;
+ param->testflag= 0;
+ _ma_check_print_error(param, "%s", errmsg);
+ DBUG_RETURN(HA_ADMIN_FAILED);
+ }
+ DBUG_RETURN(HA_ADMIN_OK);
+}
+
+
+/*
+ Disable indexes, making it persistent if requested.
+
+ SYNOPSIS
+ disable_indexes()
+ mode mode of operation:
+ HA_KEY_SWITCH_NONUNIQ disable all non-unique keys
+ HA_KEY_SWITCH_ALL disable all keys
+ HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent
+ HA_KEY_SWITCH_ALL_SAVE dis. all keys and make persistent
+
+ IMPLEMENTATION
+ HA_KEY_SWITCH_NONUNIQ is not implemented.
+ HA_KEY_SWITCH_ALL_SAVE is not implemented.
+
+ RETURN
+ 0 ok
+ HA_ERR_WRONG_COMMAND mode not implemented.
+*/
+
+int ha_maria::disable_indexes(uint mode)
+{
+ int error;
+
+ if (mode == HA_KEY_SWITCH_ALL)
+ {
+ /* call a storage engine function to switch the key map */
+ error= maria_disable_indexes(file);
+ }
+ else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
+ {
+ maria_extra(file, HA_EXTRA_NO_KEYS, 0);
+ info(HA_STATUS_CONST); // Read new key info
+ error= 0;
+ }
+ else
+ {
+ /* mode not implemented */
+ error= HA_ERR_WRONG_COMMAND;
+ }
+ return error;
+}
+
+
+/*
+ Enable indexes, making it persistent if requested.
+
+ SYNOPSIS
+ enable_indexes()
+ mode mode of operation:
+ HA_KEY_SWITCH_NONUNIQ enable all non-unique keys
+ HA_KEY_SWITCH_ALL enable all keys
+ HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent
+ HA_KEY_SWITCH_ALL_SAVE en. all keys and make persistent
+
+ DESCRIPTION
+ Enable indexes, which might have been disabled by disable_index() before.
+ The modes without _SAVE work only if both data and indexes are empty,
+ since the MARIA repair would enable them persistently.
+ To be sure in these cases, call handler::delete_all_rows() before.
+
+ IMPLEMENTATION
+ HA_KEY_SWITCH_NONUNIQ is not implemented.
+ HA_KEY_SWITCH_ALL_SAVE is not implemented.
+
+ RETURN
+ 0 ok
+ !=0 Error, among others:
+ HA_ERR_CRASHED data or index is non-empty. Delete all rows and retry.
+ HA_ERR_WRONG_COMMAND mode not implemented.
+*/
+
+int ha_maria::enable_indexes(uint mode)
+{
+ int error;
+ ha_rows start_rows= file->state->records;
+ DBUG_PRINT("info", ("ha_maria::enable_indexes mode: %d", mode));
+ if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys))
+ {
+ /* All indexes are enabled already. */
+ return 0;
+ }
+
+ if (mode == HA_KEY_SWITCH_ALL)
+ {
+ error= maria_enable_indexes(file);
+ /*
+ Do not try to repair on error,
+ as this could make the enabled state persistent,
+ but mode==HA_KEY_SWITCH_ALL forbids it.
+ */
+ }
+ else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE)
+ {
+ THD *thd= table->in_use;
+ HA_CHECK *param= (HA_CHECK*) thd->alloc(sizeof *param);
+ if (!param)
+ return HA_ADMIN_INTERNAL_ERROR;
+
+ const char *save_proc_info= thd_proc_info(thd, "Creating index");
+
+ maria_chk_init(param);
+ param->op_name= "recreating_index";
+ param->testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK |
+ T_CREATE_MISSING_KEYS | T_SAFE_REPAIR);
+ /*
+ Don't lock and unlock table if it's locked.
+ Normally table should be locked. This test is mostly for safety.
+ */
+ if (likely(file->lock_type != F_UNLCK))
+ param->testflag|= T_NO_LOCKS;
+
+ if (file->create_unique_index_by_sort)
+ param->testflag|= T_CREATE_UNIQUE_BY_SORT;
+
+ if (bulk_insert_single_undo == BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)
+ {
+ bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_REPAIR;
+ /*
+ Don't bump create_rename_lsn, because UNDO_BULK_INSERT
+ should not be skipped in case of crash during repair.
+ */
+ param->testflag|= T_NO_CREATE_RENAME_LSN;
+ }
+
+ param->myf_rw &= ~MY_WAIT_IF_FULL;
+ param->sort_buffer_length= THDVAR(thd,sort_buffer_size);
+ param->stats_method= (enum_handler_stats_method)THDVAR(thd,stats_method);
+ param->tmpdir= &mysql_tmpdir_list;
+ if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param->retry_repair)
+ {
+ sql_print_warning("Warning: Enabling keys got errno %d on %s.%s, "
+ "retrying",
+ my_errno, param->db_name, param->table_name);
+ /* This should never fail normally */
+ DBUG_ASSERT(thd->killed != 0);
+ /* Repairing by sort failed. Now try standard repair method. */
+ param->testflag &= ~T_REP_BY_SORT;
+ file->state->records= start_rows;
+ error= (repair(thd, param, 0) != HA_ADMIN_OK);
+ /*
+ If the standard repair succeeded, clear all error messages which
+ might have been set by the first repair. They can still be seen
+ with SHOW WARNINGS then.
+ */
+ if (!error)
+ thd->clear_error();
+ }
+ info(HA_STATUS_CONST);
+ thd_proc_info(thd, save_proc_info);
+ }
+ else
+ {
+ /* mode not implemented */
+ error= HA_ERR_WRONG_COMMAND;
+ }
+ DBUG_EXECUTE_IF("maria_flush_whole_log",
+ {
+ DBUG_PRINT("maria_flush_whole_log", ("now"));
+ translog_flush(translog_get_horizon());
+ });
+ DBUG_EXECUTE_IF("maria_crash_enable_index",
+ {
+ DBUG_PRINT("maria_crash_enable_index", ("now"));
+ DBUG_SUICIDE();
+ });
+ return error;
+}
+
+
+/*
+ Test if indexes are disabled.
+
+
+ SYNOPSIS
+ indexes_are_disabled()
+ no parameters
+
+
+ RETURN
+ 0 indexes are not disabled
+ 1 all indexes are disabled
+ [2 non-unique indexes are disabled - NOT YET IMPLEMENTED]
+*/
+
+int ha_maria::indexes_are_disabled(void)
+{
+ return maria_indexes_are_disabled(file);
+}
+
+
+/*
+ prepare for a many-rows insert operation
+ e.g. - disable indexes (if they can be recreated fast) or
+ activate special bulk-insert optimizations
+
+ SYNOPSIS
+ start_bulk_insert(rows, flags)
+ rows Rows to be inserted
+ 0 if we don't know
+ flags Flags to control index creation
+
+ NOTICE
+ Do not forget to call end_bulk_insert() later!
+*/
+
+void ha_maria::start_bulk_insert(ha_rows rows, uint flags)
+{
+ DBUG_ENTER("ha_maria::start_bulk_insert");
+ THD *thd= table->in_use;
+ MARIA_SHARE *share= file->s;
+ bool index_disabled= 0;
+ DBUG_PRINT("info", ("start_bulk_insert: rows %lu", (ulong) rows));
+
+ /* don't enable row cache if too few rows */
+ if ((!rows || rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE) && !has_long_unique())
+ {
+ ulonglong size= thd->variables.read_buff_size, tmp;
+ if (rows)
+ {
+ if (file->state->records)
+ {
+ MARIA_INFO maria_info;
+ maria_status(file, &maria_info, HA_STATUS_NO_LOCK |HA_STATUS_VARIABLE);
+ set_if_smaller(size, maria_info.mean_reclength * rows);
+ }
+ else if (table->s->avg_row_length)
+ set_if_smaller(size, (size_t) (table->s->avg_row_length * rows));
+ }
+ tmp= (ulong) size; // Safe becasue of limits
+ maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &tmp);
+ }
+
+ can_enable_indexes= (maria_is_all_keys_active(share->state.key_map,
+ share->base.keys));
+ bulk_insert_single_undo= BULK_INSERT_NONE;
+
+ if (!(specialflag & SPECIAL_SAFE_MODE))
+ {
+ /*
+ Only disable old index if the table was empty and we are inserting
+ a lot of rows.
+ We should not do this for only a few rows as this is slower and
+ we don't want to update the key statistics based of only a few rows.
+ Index file rebuild requires an exclusive lock, so if versioning is on
+ don't do it (see how ha_maria::store_lock() tries to predict repair).
+ We can repair index only if we have an exclusive (TL_WRITE) lock or
+ if this is inside an ALTER TABLE, in which case lock_type == TL_UNLOCK.
+
+ To see if table is empty, we shouldn't rely on the old record
+ count from our transaction's start (if that old count is 0 but
+ now there are records in the table, we would wrongly destroy
+ them). So we need to look at share->state.state.records. As a
+ safety net for now, we don't remove the test of
+ file->state->records, because there is uncertainty on what will
+ happen during repair if the two states disagree.
+
+ We also have to check in case of transactional tables that the
+ user has not used LOCK TABLE on the table twice.
+ */
+ if ((file->state->records == 0) &&
+ (share->state.state.records == 0) && can_enable_indexes &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES) &&
+ (file->lock.type == TL_WRITE || file->lock.type == TL_UNLOCK) &&
+ (!share->have_versioning || !share->now_transactional ||
+ file->used_tables->use_count == 1))
+ {
+ /**
+ @todo for a single-row INSERT SELECT, we will go into repair, which
+ is more costly (flushes, syncs) than a row write.
+ */
+ if (file->open_flags & HA_OPEN_INTERNAL_TABLE)
+ {
+ /* Internal table; If we get a duplicate something is very wrong */
+ file->update|= HA_STATE_CHANGED;
+ index_disabled= share->base.keys > 0;
+ maria_clear_all_keys_active(file->s->state.key_map);
+ }
+ else
+ {
+ my_bool all_keys= MY_TEST(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
+ /*
+ Deactivate all indexes that can be recreated fast.
+ These include packed keys on which sorting will use more temporary
+ space than the max allowed file length or for which the unpacked keys
+ will take much more space than packed keys.
+ Note that 'rows' may be zero for the case when we don't know how many
+ rows we will put into the file.
+ */
+ MARIA_SHARE *share= file->s;
+ MARIA_KEYDEF *key=share->keyinfo;
+ uint i;
+
+ DBUG_ASSERT(share->state.state.records == 0 &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES));
+ for (i=0 ; i < share->base.keys ; i++,key++)
+ {
+ if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY | HA_RTREE_INDEX)) &&
+ ! maria_too_big_key_for_sort(key,rows) && share->base.auto_key != i+1 &&
+ (all_keys || !(key->flag & HA_NOSAME)) &&
+ table->key_info[i].algorithm != HA_KEY_ALG_LONG_HASH)
+ {
+ maria_clear_key_active(share->state.key_map, i);
+ index_disabled= 1;
+ file->update|= HA_STATE_CHANGED;
+ file->create_unique_index_by_sort= all_keys;
+ }
+ }
+ }
+ if (share->now_transactional)
+ {
+ bulk_insert_single_undo= BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR;
+ write_log_record_for_bulk_insert(file);
+ _ma_tmp_disable_logging_for_table(file, TRUE);
+ /*
+ Pages currently in the page cache have type PAGECACHE_LSN_PAGE, we
+ are not allowed to overwrite them with PAGECACHE_PLAIN_PAGE, so
+ throw them away. It is not losing data, because we just wrote and
+ forced an UNDO which will for sure empty the table if we crash. The
+ upcoming unique-key insertions however need a proper index, so we
+ cannot leave the corrupted on-disk index file, thus we truncate it.
+ */
+ maria_delete_all_rows(file);
+ }
+ }
+ else if (!file->bulk_insert &&
+ (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT))
+ {
+ maria_init_bulk_insert(file,
+ (size_t) thd->variables.bulk_insert_buff_size,
+ rows);
+ }
+ }
+ can_enable_indexes= index_disabled;
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ end special bulk-insert optimizations,
+ which have been activated by start_bulk_insert().
+
+ SYNOPSIS
+ end_bulk_insert()
+ no arguments
+
+ RETURN
+ 0 OK
+ != 0 Error
+*/
+
+int ha_maria::end_bulk_insert()
+{
+ int first_error, error;
+ my_bool abort= file->s->deleting;
+ DBUG_ENTER("ha_maria::end_bulk_insert");
+
+ if ((first_error= maria_end_bulk_insert(file, abort)))
+ abort= 1;
+
+ if ((error= maria_extra(file, HA_EXTRA_NO_CACHE, 0)))
+ {
+ first_error= first_error ? first_error : error;
+ abort= 1;
+ }
+
+ if (!abort && can_enable_indexes)
+ if ((error= enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE)))
+ first_error= first_error ? first_error : error;
+
+ if (bulk_insert_single_undo != BULK_INSERT_NONE)
+ {
+ /*
+ Table was transactional just before start_bulk_insert().
+ No need to flush pages if we did a repair (which already flushed).
+ */
+ if ((error= _ma_reenable_logging_for_table(file,
+ bulk_insert_single_undo ==
+ BULK_INSERT_SINGLE_UNDO_AND_NO_REPAIR)))
+ first_error= first_error ? first_error : error;
+ bulk_insert_single_undo= BULK_INSERT_NONE; // Safety
+ }
+ can_enable_indexes= 0;
+ DBUG_RETURN(first_error);
+}
+
+
+bool ha_maria::check_and_repair(THD *thd)
+{
+ int error, crashed;
+ HA_CHECK_OPT check_opt;
+ const CSET_STRING query_backup= thd->query_string;
+ DBUG_ENTER("ha_maria::check_and_repair");
+
+ check_opt.init();
+ check_opt.flags= T_MEDIUM | T_AUTO_REPAIR;
+
+ error= 1;
+ if (!aria_readonly &&
+ (file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED)) ==
+ STATE_MOVED)
+ {
+ /* Remove error about crashed table */
+ thd->get_stmt_da()->clear_warning_info(thd->query_id);
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+ ER_CRASHED_ON_USAGE,
+ "Zerofilling moved table %s", table->s->path.str);
+ sql_print_information("Zerofilling moved table: '%s'",
+ table->s->path.str);
+ if (!(error= zerofill(thd, &check_opt)))
+ DBUG_RETURN(0);
+ }
+
+ /*
+ if we got this far - the table is crashed.
+ but don't auto-repair if maria_recover_options is not set
+ */
+ if (!maria_recover_options)
+ DBUG_RETURN(error);
+
+ error= 0;
+ // Don't use quick if deleted rows
+ if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK))
+ check_opt.flags |= T_QUICK;
+
+ thd->set_query((char*) table->s->table_name.str,
+ (uint) table->s->table_name.length, system_charset_info);
+
+ if (!(crashed= maria_is_crashed(file)))
+ {
+ sql_print_warning("Checking table: '%s'", table->s->path.str);
+ crashed= check(thd, &check_opt);
+ }
+
+ if (crashed)
+ {
+ bool save_log_all_errors;
+ sql_print_warning("Recovering table: '%s'", table->s->path.str);
+ save_log_all_errors= thd->log_all_errors;
+ thd->log_all_errors|= (thd->variables.log_warnings > 2);
+ check_opt.flags=
+ ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) |
+ (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) |
+ T_AUTO_REPAIR);
+ if (repair(thd, &check_opt))
+ error= 1;
+ thd->log_all_errors= save_log_all_errors;
+ }
+ thd->set_query(query_backup);
+ DBUG_RETURN(error);
+}
+
+
+bool ha_maria::is_crashed() const
+{
+ return (file->s->state.changed & (STATE_CRASHED_FLAGS | STATE_MOVED) ||
+ (my_disable_locking && file->s->state.open_count));
+}
+
+#define CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING(msg) \
+ do { \
+ if (file->lock.type == TL_WRITE_CONCURRENT_INSERT && !table->s->sequence) \
+ { \
+ my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), msg); \
+ return 1; \
+ } \
+ } while(0)
+
+int ha_maria::update_row(const uchar * old_data, const uchar * new_data)
+{
+ CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("UPDATE in WRITE CONCURRENT");
+ return maria_update(file, old_data, new_data);
+}
+
+
+int ha_maria::delete_row(const uchar * buf)
+{
+ CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("DELETE in WRITE CONCURRENT");
+ return maria_delete(file, buf);
+}
+
+int ha_maria::index_read_map(uchar * buf, const uchar * key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag)
+{
+ DBUG_ASSERT(inited == INDEX);
+ register_handler(file);
+ int error= maria_rkey(file, buf, active_index, key, keypart_map, find_flag);
+ return error;
+}
+
+
+int ha_maria::index_read_idx_map(uchar * buf, uint index, const uchar * key,
+ key_part_map keypart_map,
+ enum ha_rkey_function find_flag)
+{
+ int error;
+ register_handler(file);
+
+ /* Use the pushed index condition if it matches the index we're scanning */
+ end_range= NULL;
+ if (index == pushed_idx_cond_keyno)
+ ma_set_index_cond_func(file, handler_index_cond_check, this);
+
+ error= maria_rkey(file, buf, index, key, keypart_map, find_flag);
+
+ ma_set_index_cond_func(file, NULL, 0);
+ return error;
+}
+
+
+int ha_maria::index_read_last_map(uchar * buf, const uchar * key,
+ key_part_map keypart_map)
+{
+ DBUG_ENTER("ha_maria::index_read_last_map");
+ DBUG_ASSERT(inited == INDEX);
+ register_handler(file);
+ int error= maria_rkey(file, buf, active_index, key, keypart_map,
+ HA_READ_PREFIX_LAST);
+ DBUG_RETURN(error);
+}
+
+
+int ha_maria::index_next(uchar * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ register_handler(file);
+ int error= maria_rnext(file, buf, active_index);
+ return error;
+}
+
+
+int ha_maria::index_prev(uchar * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ register_handler(file);
+ int error= maria_rprev(file, buf, active_index);
+ return error;
+}
+
+
+int ha_maria::index_first(uchar * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ register_handler(file);
+ int error= maria_rfirst(file, buf, active_index);
+ return error;
+}
+
+
+int ha_maria::index_last(uchar * buf)
+{
+ DBUG_ASSERT(inited == INDEX);
+ register_handler(file);
+ int error= maria_rlast(file, buf, active_index);
+ return error;
+}
+
+
+int ha_maria::index_next_same(uchar * buf,
+ const uchar *key __attribute__ ((unused)),
+ uint length __attribute__ ((unused)))
+{
+ int error;
+ DBUG_ASSERT(inited == INDEX);
+ register_handler(file);
+ /*
+ TODO: Delete this loop in Maria 1.5 as versioning will ensure this never
+ happens
+ */
+ do
+ {
+ error= maria_rnext_same(file,buf);
+ } while (error == HA_ERR_RECORD_DELETED);
+ return error;
+}
+
+
+int ha_maria::index_init(uint idx, bool sorted)
+{
+ active_index=idx;
+ if (pushed_idx_cond_keyno == idx)
+ ma_set_index_cond_func(file, handler_index_cond_check, this);
+ return 0;
+}
+
+
+int ha_maria::index_end()
+{
+ active_index=MAX_KEY;
+ ma_set_index_cond_func(file, NULL, 0);
+ in_range_check_pushed_down= FALSE;
+ ds_mrr.dsmrr_close();
+ return 0;
+}
+
+
+int ha_maria::rnd_init(bool scan)
+{
+ if (scan)
+ return maria_scan_init(file);
+ return maria_reset(file); // Free buffers
+}
+
+
+int ha_maria::rnd_end()
+{
+ ds_mrr.dsmrr_close();
+ /* Safe to call even if we don't have started a scan */
+ maria_scan_end(file);
+ return 0;
+}
+
+
+int ha_maria::rnd_next(uchar *buf)
+{
+ register_handler(file);
+ return maria_scan(file, buf);
+}
+
+
+int ha_maria::remember_rnd_pos()
+{
+ register_handler(file);
+ return (*file->s->scan_remember_pos)(file, &remember_pos);
+}
+
+
+int ha_maria::restart_rnd_next(uchar *buf)
+{
+ int error;
+ register_handler(file);
+ if ((error= (*file->s->scan_restore_pos)(file, remember_pos)))
+ return error;
+ return rnd_next(buf);
+}
+
+
+int ha_maria::rnd_pos(uchar *buf, uchar *pos)
+{
+ register_handler(file);
+ int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length));
+ return error;
+}
+
+
+void ha_maria::position(const uchar *record)
+{
+ my_off_t row_position= maria_position(file);
+ my_store_ptr(ref, ref_length, row_position);
+}
+
+
+int ha_maria::info(uint flag)
+{
+ MARIA_INFO maria_info;
+ char name_buff[FN_REFLEN];
+
+ (void) maria_status(file, &maria_info, flag);
+ if (flag & HA_STATUS_VARIABLE)
+ {
+ stats.records= maria_info.records;
+ stats.deleted= maria_info.deleted;
+ stats.data_file_length= maria_info.data_file_length;
+ stats.index_file_length= maria_info.index_file_length;
+ stats.delete_length= maria_info.delete_length;
+ stats.check_time= maria_info.check_time;
+ stats.mean_rec_length= maria_info.mean_reclength;
+ stats.checksum= file->state->checksum;
+ }
+ if (flag & HA_STATUS_CONST)
+ {
+ TABLE_SHARE *share= table->s;
+ stats.max_data_file_length= maria_info.max_data_file_length;
+ stats.max_index_file_length= maria_info.max_index_file_length;
+ stats.create_time= maria_info.create_time;
+ ref_length= maria_info.reflength;
+ share->db_options_in_use= maria_info.options;
+ stats.block_size= maria_block_size;
+ stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = MY_MAX(sizeof(void *))
+
+ /* Update share */
+ share->keys_in_use.set_prefix(share->keys);
+ share->keys_in_use.intersect_extended(maria_info.key_map);
+ share->keys_for_keyread.intersect(share->keys_in_use);
+ share->db_record_offset= maria_info.record_offset;
+ if (share->key_parts)
+ {
+ ulong *to= table->key_info[0].rec_per_key, *end;
+ double *from= maria_info.rec_per_key;
+ for (end= to+ share->key_parts ; to < end ; to++, from++)
+ *to= (ulong) (*from + 0.5);
+ }
+
+ /*
+ Set data_file_name and index_file_name to point at the symlink value
+ if table is symlinked (Ie; Real name is not same as generated name)
+ */
+ data_file_name= index_file_name= 0;
+ fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_DEXT,
+ MY_APPEND_EXT | MY_UNPACK_FILENAME);
+ if (strcmp(name_buff, maria_info.data_file_name) &&
+ maria_info.data_file_name[0])
+ data_file_name= maria_info.data_file_name;
+ fn_format(name_buff, file->s->open_file_name.str, "", MARIA_NAME_IEXT,
+ MY_APPEND_EXT | MY_UNPACK_FILENAME);
+ if (strcmp(name_buff, maria_info.index_file_name) &&
+ maria_info.index_file_name[0])
+ index_file_name=maria_info.index_file_name;
+ }
+ if (flag & HA_STATUS_ERRKEY)
+ {
+ errkey= maria_info.errkey;
+ my_store_ptr(dup_ref, ref_length, maria_info.dup_key_pos);
+ }
+ if (flag & HA_STATUS_TIME)
+ stats.update_time= maria_info.update_time;
+ if (flag & HA_STATUS_AUTO)
+ stats.auto_increment_value= maria_info.auto_increment;
+
+ return 0;
+}
+
+
+int ha_maria::extra(enum ha_extra_function operation)
+{
+ int tmp;
+ TRN *old_trn= file->trn;
+ if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
+ return 0;
+#ifdef NOT_USED
+ if (operation == HA_EXTRA_MMAP && !opt_maria_use_mmap)
+ return 0;
+#endif
+ if (operation == HA_EXTRA_WRITE_CACHE && has_long_unique())
+ return 0;
+
+ /*
+ We have to set file->trn here because in some cases we call
+ extern_lock(F_UNLOCK) (which resets file->trn) followed by maria_close()
+ without calling commit/rollback in between. If file->trn is not set
+ we can't remove file->share from the transaction list in the extra() call.
+
+ In current code we don't have to do this for HA_EXTRA_PREPARE_FOR_RENAME
+ as this is only used the intermediate table used by ALTER TABLE which
+ is not part of the transaction (it's not in the TRN list). Better to
+ keep this for now, to not break anything in a stable release.
+ When HA_EXTRA_PREPARE_FOR_RENAME is not handled below, we can change
+ the warnings in _ma_remove_table_from_trnman() to asserts.
+
+ table->in_use is not set in the case this is a done as part of closefrm()
+ as part of drop table.
+ */
+
+ if (file->s->now_transactional && table->in_use &&
+ (operation == HA_EXTRA_PREPARE_FOR_DROP ||
+ operation == HA_EXTRA_PREPARE_FOR_RENAME ||
+ operation == HA_EXTRA_PREPARE_FOR_FORCED_CLOSE))
+ {
+ THD *thd= table->in_use;
+ file->trn= THD_TRN;
+ }
+ DBUG_ASSERT(file->s->base.born_transactional || file->trn == 0 ||
+ file->trn == &dummy_transaction_object);
+
+ tmp= maria_extra(file, operation, 0);
+ /*
+ Restore trn if it was changed above.
+ Note that table could be removed from trn->used_tables and
+ trn->used_instances if trn was set and some of the above operations
+ was used. This is ok as the table should not be part of any transaction
+ after this and thus doesn't need to be part of any of the above lists.
+ */
+ file->trn= old_trn;
+ return tmp;
+}
+
+int ha_maria::reset(void)
+{
+ ma_set_index_cond_func(file, NULL, 0);
+ ds_mrr.dsmrr_close();
+ if (file->trn)
+ {
+ /* Next statement is a new statement. Ensure it's logged */
+ trnman_set_flags(file->trn,
+ trnman_get_flags(file->trn) & ~TRN_STATE_INFO_LOGGED);
+ }
+ return maria_reset(file);
+}
+
+/* To be used with WRITE_CACHE and EXTRA_CACHE */
+
+int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size)
+{
+ if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
+ return 0;
+ return maria_extra(file, operation, (void*) &cache_size);
+}
+
+
+bool ha_maria::auto_repair(int error) const
+{
+ /* Always auto-repair moved tables (error == HA_ERR_OLD_FILE) */
+ return ((MY_TEST(maria_recover_options & HA_RECOVER_ANY) &&
+ error == HA_ERR_CRASHED_ON_USAGE) ||
+ error == HA_ERR_OLD_FILE);
+
+}
+
+
+int ha_maria::delete_all_rows()
+{
+ THD *thd= table->in_use;
+ TRN *trn= file->trn;
+ CHECK_UNTIL_WE_FULLY_IMPLEMENTED_VERSIONING("TRUNCATE in WRITE CONCURRENT");
+#ifdef EXTRA_DEBUG
+ if (trn && ! (trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED))
+ {
+ trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED |
+ TRN_STATE_TABLES_CAN_CHANGE);
+ (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
+ (uchar*) thd->query(), thd->query_length());
+ }
+#endif
+ /*
+ If we are under LOCK TABLES, we have to do a commit as
+ delete_all_rows() can't be rolled back
+ */
+ if (table->in_use->locked_tables_mode && trn &&
+ trnman_has_locked_tables(trn))
+ {
+ int error;
+ if ((error= implicit_commit(thd, 1)))
+ return error;
+ }
+
+ /* Note that this can't be rolled back */
+ return maria_delete_all_rows(file);
+}
+
+
+int ha_maria::delete_table(const char *name)
+{
+ THD *thd= current_thd;
+ (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
+ (uchar*) thd->query(), thd->query_length());
+ return maria_delete_table(name);
+}
+
+
+/* This is mainly for temporary tables, so no logging necessary */
+
+void ha_maria::drop_table(const char *name)
+{
+ DBUG_ASSERT(file->s->temporary);
+ (void) ha_close();
+ (void) maria_delete_table_files(name, 1, MY_WME);
+}
+
+
+void ha_maria::change_table_ptr(TABLE *table_arg, TABLE_SHARE *share)
+{
+ handler::change_table_ptr(table_arg, share);
+ if (file)
+ file->external_ref= table_arg;
+}
+
+
+int ha_maria::external_lock(THD *thd, int lock_type)
+{
+ int result= 0, result2;
+ DBUG_ENTER("ha_maria::external_lock");
+ file->external_ref= (void*) table; // For ma_killed()
+ /*
+ We don't test now_transactional because it may vary between lock/unlock
+ and thus confuse our reference counting.
+ It is critical to skip non-transactional tables: user-visible temporary
+ tables get an external_lock() when read/written for the first time, but no
+ corresponding unlock (they just stay locked and are later dropped while
+ locked); if a tmp table was transactional, "SELECT FROM non_tmp, tmp"
+ would never commit as its "locked_tables" count would stay 1.
+ When Maria has has_transactions()==TRUE, open_temporary_table()
+ (sql_base.cc) will use TRANSACTIONAL_TMP_TABLE and thus the
+ external_lock(F_UNLCK) will happen and we can then allow the user to
+ create transactional temporary tables.
+ */
+ if (file->s->base.born_transactional)
+ {
+ /* Transactional table */
+ if (lock_type != F_UNLCK)
+ {
+ if (file->trn)
+ {
+ /* This can only happen with tables created with clone() */
+ DBUG_PRINT("info",("file->trn: %p", file->trn));
+ trnman_increment_locked_tables(file->trn);
+ }
+
+ if (!thd->transaction->on)
+ {
+ /*
+ No need to log REDOs/UNDOs. If this is an internal temporary table
+ which will be renamed to a permanent table (like in ALTER TABLE),
+ the rename happens after unlocking so will be durable (and the table
+ will get its create_rename_lsn).
+ Note: if we wanted to enable users to have an old backup and apply
+ tons of archived logs to roll-forward, we could then not disable
+ REDOs/UNDOs in this case.
+ */
+ DBUG_PRINT("info", ("Disabling logging for table"));
+ _ma_tmp_disable_logging_for_table(file, TRUE);
+ file->autocommit= 0;
+ }
+ else
+ file->autocommit= !(thd->variables.option_bits &
+ (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
+#ifndef ARIA_HAS_TRANSACTIONS
+ /*
+ Until Aria has full transactions support, including MVCC support for
+ delete and update and purging of old states, we have to commit for
+ every statement.
+ */
+ file->autocommit=1;
+#endif
+ }
+ else
+ {
+ /* We have to test for THD_TRN to protect against implicit commits */
+ TRN *trn= (file->trn != &dummy_transaction_object && THD_TRN ? file->trn : 0);
+ /* End of transaction */
+
+ /*
+ We always re-enable, don't rely on thd->transaction.on as it is
+ sometimes reset to true after unlocking (see mysql_truncate() for a
+ partitioned table based on Maria).
+ Note that we can come here without having an exclusive lock on the
+ table, for example in this case:
+ external_lock(F_(WR|RD)LCK); thr_lock() which fails due to lock
+ abortion; external_lock(F_UNLCK). Fortunately, the re-enabling happens
+ only if we were the thread which disabled logging.
+ */
+ if (_ma_reenable_logging_for_table(file, TRUE))
+ DBUG_RETURN(1);
+ _ma_reset_trn_for_table(file);
+ /*
+ Ensure that file->state points to the current number of rows. This
+ is needed if someone calls maria_info() without first doing an
+ external lock of the table
+ */
+ file->state= &file->s->state.state;
+ if (trn)
+ {
+ DBUG_PRINT("info",
+ ("locked_tables: %u", trnman_has_locked_tables(trn)));
+ DBUG_ASSERT(trnman_has_locked_tables(trn) > 0);
+ if (trnman_has_locked_tables(trn) &&
+ !trnman_decrement_locked_tables(trn))
+ {
+ /*
+ OK should not have been sent to client yet (ACID).
+ This is a bit excessive, ACID requires this only if there are some
+ changes to commit (rollback shouldn't be tested).
+ */
+ DBUG_ASSERT(!thd->get_stmt_da()->is_sent() ||
+ thd->killed);
+ /*
+ If autocommit, commit transaction. This can happen when open and
+ lock tables as part of creating triggers, in which case commit
+ is not called.
+ Until ARIA_HAS_TRANSACTIONS is not defined, always commit.
+ */
+ if (file->autocommit)
+ {
+ if (ma_commit(trn))
+ result= HA_ERR_INTERNAL_ERROR;
+ thd_set_ha_data(thd, maria_hton, 0);
+ }
+ }
+ trnman_set_flags(trn, trnman_get_flags(trn) & ~ TRN_STATE_INFO_LOGGED);
+ }
+ }
+ } /* if transactional table */
+ if ((result2= maria_lock_database(file, !table->s->tmp_table ?
+ lock_type : ((lock_type == F_UNLCK) ?
+ F_UNLCK : F_EXTRA_LCK))))
+ result= result2;
+ if (!file->s->base.born_transactional)
+ file->state= &file->s->state.state; // Restore state if clone
+
+ /* Remember stack end for this thread */
+ file->stack_end_ptr= &ha_thd()->mysys_var->stack_ends_here;
+ DBUG_RETURN(result);
+}
+
+int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type)
+{
+ TRN *trn;
+ if (file->s->base.born_transactional)
+ {
+ trn= THD_TRN;
+ DBUG_ASSERT(trn); // this may be called only after external_lock()
+ DBUG_ASSERT(trnman_has_locked_tables(trn));
+ DBUG_ASSERT(lock_type != TL_UNLOCK);
+ DBUG_ASSERT(file->trn == trn);
+
+ /*
+ As external_lock() was already called, don't increment locked_tables.
+ Note that we call the function below possibly several times when
+ statement starts (once per table). This is ok as long as that function
+ does cheap operations. Otherwise, we will need to do it only on first
+ call to start_stmt().
+ */
+ trnman_new_statement(trn);
+
+#ifdef EXTRA_DEBUG
+ if (!(trnman_get_flags(trn) & TRN_STATE_INFO_LOGGED) &&
+ trnman_get_flags(trn) & TRN_STATE_TABLES_CAN_CHANGE)
+ {
+ trnman_set_flags(trn, trnman_get_flags(trn) | TRN_STATE_INFO_LOGGED);
+ (void) translog_log_debug_info(trn, LOGREC_DEBUG_INFO_QUERY,
+ (uchar*) thd->query(),
+ thd->query_length());
+ }
+#endif
+ }
+ return 0;
+}
+
+
+/*
+ Reset THD_TRN and all file->trn related to the transaction
+ This is needed as some calls, like extra() or external_lock() may access
+ it before next transaction is started
+*/
+
+static void reset_thd_trn(THD *thd, MARIA_HA *first_table)
+{
+ DBUG_ENTER("reset_thd_trn");
+ thd_set_ha_data(thd, maria_hton, 0);
+ MARIA_HA *next;
+ for (MARIA_HA *table= first_table; table ; table= next)
+ {
+ next= table->trn_next;
+ _ma_reset_trn_for_table(table);
+
+ /*
+ If table has changed by this statement, invalidate it from the query
+ cache
+ */
+ if (table->row_changes != table->start_row_changes)
+ {
+ table->start_row_changes= table->row_changes;
+ DBUG_ASSERT(table->s->chst_invalidator != NULL);
+ (*table->s->chst_invalidator)(table->s->data_file_name.str);
+ }
+ }
+ DBUG_VOID_RETURN;
+}
+
+bool ha_maria::has_active_transaction(THD *thd)
+{
+ return (maria_hton && THD_TRN);
+}
+
+/**
+ Performs an implicit commit of the Maria transaction and creates a new
+ one.
+
+ This can be considered a hack. When Maria loses HA_NO_TRANSACTIONS it will
+ be participant in the connection's transaction and so the implicit commits
+ (ha_commit()) (like in end_active_trans()) will do the implicit commit
+ without need to call this function which can then be removed.
+
+ @param thd THD object
+ @param new_trn if a new transaction should be created; a new
+ transaction is not needed when we know that the
+ tables will be unlocked very soon.
+*/
+
+int ha_maria::implicit_commit(THD *thd, bool new_trn)
+{
+#ifndef MARIA_CANNOT_ROLLBACK
+#error this method should be removed
+#endif
+ TRN *trn;
+ int error;
+ uint locked_tables;
+ extern my_bool plugins_are_initialized;
+ MARIA_HA *used_tables, *trn_next;
+ DBUG_ENTER("ha_maria::implicit_commit");
+
+ if (!maria_hton || !plugins_are_initialized || !(trn= THD_TRN))
+ DBUG_RETURN(0);
+ if (!new_trn && (thd->locked_tables_mode == LTM_LOCK_TABLES ||
+ thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES))
+ {
+ /*
+ No commit inside LOCK TABLES.
+
+ Note that we come here only at the end of the top statement
+ (dispatch_command()), we are never committing inside a sub-statement./
+ */
+ DBUG_PRINT("info", ("locked_tables, skipping"));
+ DBUG_RETURN(0);
+ }
+
+ /* Prepare to move used_instances and locked tables to new TRN object */
+ locked_tables= trnman_has_locked_tables(trn);
+ trnman_reset_locked_tables(trn, 0);
+ relink_trn_used_instances(&used_tables, trn);
+
+ error= 0;
+ if (unlikely(ma_commit(trn)))
+ error= 1;
+ if (!new_trn)
+ {
+ reset_thd_trn(thd, used_tables);
+ goto end;
+ }
+
+ /*
+ We need to create a new transaction and put it in THD_TRN. Indeed,
+ tables may be under LOCK TABLES, and so they will start the next
+ statement assuming they have a trn (see ha_maria::start_stmt()).
+ */
+ trn= trnman_new_trn(& thd->transaction->wt);
+ thd_set_ha_data(thd, maria_hton, trn);
+ if (unlikely(trn == NULL))
+ {
+ reset_thd_trn(thd, used_tables);
+ error= HA_ERR_OUT_OF_MEM;
+ goto end;
+ }
+ /*
+ Move all locked tables to the new transaction
+ We must do it here as otherwise file->thd and file->state may be
+ stale pointers. We can't do this in start_stmt() as we don't know
+ when we should call _ma_setup_live_state() and in some cases, like
+ in check table, we use the table without calling start_stmt().
+ */
+
+ for (MARIA_HA *handler= used_tables; handler ;
+ handler= trn_next)
+ {
+ trn_next= handler->trn_next;
+ DBUG_ASSERT(handler->s->base.born_transactional);
+
+ /* If handler uses versioning */
+ if (handler->s->lock_key_trees)
+ {
+ /* _ma_set_trn_for_table() will be called indirectly */
+ if (_ma_setup_live_state(handler))
+ error= HA_ERR_OUT_OF_MEM;
+ }
+ else
+ _ma_set_trn_for_table(handler, trn);
+ }
+ /* This is just a commit, tables stay locked if they were: */
+ trnman_reset_locked_tables(trn, locked_tables);
+
+end:
+ DBUG_RETURN(error);
+}
+
+
+THR_LOCK_DATA **ha_maria::store_lock(THD *thd,
+ THR_LOCK_DATA **to,
+ enum thr_lock_type lock_type)
+{
+ /* Test if we can fix test below */
+ DBUG_ASSERT(lock_type != TL_UNLOCK &&
+ (lock_type == TL_IGNORE || file->lock.type == TL_UNLOCK));
+ if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK)
+ {
+ const enum enum_sql_command sql_command= thd->lex->sql_command;
+ /*
+ We have to disable concurrent inserts for INSERT ... SELECT or
+ INSERT/UPDATE/DELETE with sub queries if we are using statement based
+ logging. We take the safe route here and disable this for all commands
+ that only does reading that are not SELECT.
+ */
+ if (lock_type <= TL_READ_HIGH_PRIORITY &&
+ !thd->is_current_stmt_binlog_format_row() &&
+ (sql_command != SQLCOM_SELECT &&
+ sql_command != SQLCOM_LOCK_TABLES) &&
+ (thd->variables.option_bits & OPTION_BIN_LOG) &&
+ mysql_bin_log.is_open())
+ lock_type= TL_READ_NO_INSERT;
+ else if (lock_type == TL_WRITE_CONCURRENT_INSERT)
+ {
+ const enum enum_duplicates duplicates= thd->lex->duplicates;
+ /*
+ Explanation for the 3 conditions below, in order:
+
+ - Bulk insert may use repair, which will cause problems if other
+ threads try to read/insert to the table: disable versioning.
+ Note that our read of file->state->records is incorrect, as such
+ variable may have changed when we come to start_bulk_insert() (worse
+ case: we see != 0 so allow versioning, start_bulk_insert() sees 0 and
+ uses repair). This is prevented because start_bulk_insert() will not
+ try repair if we enabled versioning.
+ - INSERT SELECT ON DUPLICATE KEY UPDATE comes here with
+ TL_WRITE_CONCURRENT_INSERT but shouldn't because it can do
+ update/delete of a row and versioning doesn't support that
+ - same for LOAD DATA CONCURRENT REPLACE.
+ */
+ if ((file->state->records == 0) ||
+ (sql_command == SQLCOM_INSERT_SELECT && duplicates == DUP_UPDATE) ||
+ (sql_command == SQLCOM_LOAD && duplicates == DUP_REPLACE))
+ lock_type= TL_WRITE;
+ }
+ file->lock.type= lock_type;
+ }
+ *to++= &file->lock;
+ return to;
+}
+
+
+void ha_maria::update_create_info(HA_CREATE_INFO *create_info)
+{
+ ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST);
+ if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
+ {
+ create_info->auto_increment_value= stats.auto_increment_value;
+ }
+ create_info->data_file_name= data_file_name;
+ create_info->index_file_name= index_file_name;
+ /*
+ Keep user-specified row_type for ALTER,
+ but show the actually used one in SHOW
+ */
+ if (create_info->row_type != ROW_TYPE_DEFAULT &&
+ !(thd_sql_command(ha_thd()) == SQLCOM_ALTER_TABLE))
+ create_info->row_type= get_row_type();
+ /*
+ Show always page checksums, as this can be forced with
+ maria_page_checksums variable
+ */
+ if (create_info->page_checksum == HA_CHOICE_UNDEF)
+ create_info->page_checksum=
+ (file->s->options & HA_OPTION_PAGE_CHECKSUM) ? HA_CHOICE_YES :
+ HA_CHOICE_NO;
+}
+
+
+enum row_type ha_maria::get_row_type() const
+{
+ switch (file->s->data_file_type) {
+ case STATIC_RECORD: return ROW_TYPE_FIXED;
+ case DYNAMIC_RECORD: return ROW_TYPE_DYNAMIC;
+ case BLOCK_RECORD: return ROW_TYPE_PAGE;
+ case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED;
+ default: return ROW_TYPE_NOT_USED;
+ }
+}
+
+
+static enum data_file_type maria_row_type(HA_CREATE_INFO *info)
+{
+ if (info->transactional == HA_CHOICE_YES)
+ return BLOCK_RECORD;
+ switch (info->row_type) {
+ case ROW_TYPE_FIXED: return STATIC_RECORD;
+ case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD;
+ default: return BLOCK_RECORD;
+ }
+}
+
+
+int ha_maria::create(const char *name, TABLE *table_arg,
+ HA_CREATE_INFO *ha_create_info)
+{
+ int error;
+ uint create_flags= 0, record_count= 0, i;
+ char buff[FN_REFLEN];
+ MARIA_KEYDEF *keydef;
+ MARIA_COLUMNDEF *recinfo;
+ MARIA_CREATE_INFO create_info;
+ TABLE_SHARE *share= table_arg->s;
+ uint options= share->db_options_in_use;
+ ha_table_option_struct *table_options= table_arg->s->option_struct;
+ enum data_file_type row_type;
+ THD *thd= current_thd;
+ DBUG_ENTER("ha_maria::create");
+
+ for (i= 0; i < share->keys; i++)
+ {
+ if (table_arg->key_info[i].flags & HA_USES_PARSER)
+ {
+ create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
+ break;
+ }
+ }
+ /* Note: BLOCK_RECORD is used if table is transactional */
+ row_type= maria_row_type(ha_create_info);
+ if (ha_create_info->transactional == HA_CHOICE_YES &&
+ ha_create_info->row_type != ROW_TYPE_PAGE &&
+ ha_create_info->row_type != ROW_TYPE_NOT_USED &&
+ ha_create_info->row_type != ROW_TYPE_DEFAULT)
+ push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "Row format set to PAGE because of TRANSACTIONAL=1 option");
+
+ if (share->table_type == TABLE_TYPE_SEQUENCE)
+ {
+ /* For sequences, the simples record type is appropriate */
+ row_type= STATIC_RECORD;
+ ha_create_info->transactional= HA_CHOICE_NO;
+ }
+
+ bzero((char*) &create_info, sizeof(create_info));
+ if ((error= table2maria(table_arg, row_type, &keydef, &recinfo,
+ &record_count, &create_info)))
+ DBUG_RETURN(error); /* purecov: inspected */
+ create_info.max_rows= share->max_rows;
+ create_info.reloc_rows= share->min_rows;
+ create_info.with_auto_increment= share->next_number_key_offset == 0;
+ create_info.auto_increment= (ha_create_info->auto_increment_value ?
+ ha_create_info->auto_increment_value -1 :
+ (ulonglong) 0);
+ create_info.data_file_length= ((ulonglong) share->max_rows *
+ share->avg_row_length);
+ create_info.data_file_name= ha_create_info->data_file_name;
+ create_info.index_file_name= ha_create_info->index_file_name;
+ create_info.language= share->table_charset->number;
+ if (ht != maria_hton)
+ {
+ /* S3 engine */
+ create_info.s3_block_size= (ulong) table_options->s3_block_size;
+ create_info.compression_algorithm= table_options->compression_algorithm;
+ }
+
+ /*
+ Table is transactional:
+ - If the user specify that table is transactional (in this case
+ row type is forced to BLOCK_RECORD)
+ - If they specify BLOCK_RECORD without specifying transactional behaviour
+
+ Shouldn't this test be pushed down to maria_create()? Because currently,
+ ma_test1 -T crashes: it creates a table with DYNAMIC_RECORD but has
+ born_transactional==1, which confuses some recovery-related code.
+ */
+ create_info.transactional= (row_type == BLOCK_RECORD &&
+ ha_create_info->transactional != HA_CHOICE_NO);
+
+ if (ha_create_info->tmp_table())
+ {
+ create_flags|= HA_CREATE_TMP_TABLE | HA_CREATE_DELAY_KEY_WRITE;
+ create_info.transactional= 0;
+ }
+ if (ha_create_info->options & HA_CREATE_KEEP_FILES)
+ create_flags|= HA_CREATE_KEEP_FILES;
+ if (options & HA_OPTION_PACK_RECORD)
+ create_flags|= HA_PACK_RECORD;
+ if (options & HA_OPTION_CHECKSUM)
+ create_flags|= HA_CREATE_CHECKSUM;
+ if (options & HA_OPTION_DELAY_KEY_WRITE)
+ create_flags|= HA_CREATE_DELAY_KEY_WRITE;
+ if ((ha_create_info->page_checksum == HA_CHOICE_UNDEF &&
+ maria_page_checksums) ||
+ ha_create_info->page_checksum == HA_CHOICE_YES)
+ create_flags|= HA_CREATE_PAGE_CHECKSUM;
+
+ (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
+ (uchar*) thd->query(), thd->query_length());
+
+ create_info.encrypted= maria_encrypt_tables && ht == maria_hton;
+ /* TODO: Check that the following fn_format is really needed */
+ error=
+ maria_create(fn_format(buff, name, "", "",
+ MY_UNPACK_FILENAME | MY_APPEND_EXT),
+ row_type, share->keys, keydef,
+ record_count, recinfo,
+ 0, (MARIA_UNIQUEDEF *) 0,
+ &create_info, create_flags);
+
+ my_free(recinfo);
+ DBUG_RETURN(error);
+}
+
+
+int ha_maria::rename_table(const char *from, const char *to)
+{
+ THD *thd= current_thd;
+ (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
+ (uchar*) thd->query(), thd->query_length());
+ return maria_rename(from, to);
+}
+
+
+void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment,
+ ulonglong nb_desired_values,
+ ulonglong *first_value,
+ ulonglong *nb_reserved_values)
+{
+ ulonglong nr;
+ int error;
+ uchar key[MARIA_MAX_KEY_BUFF];
+
+ if (!table->s->next_number_key_offset)
+ { // Autoincrement at key-start
+ ha_maria::info(HA_STATUS_AUTO);
+ *first_value= stats.auto_increment_value;
+ /* Maria has only table-level lock for now, so reserves to +inf */
+ *nb_reserved_values= ULONGLONG_MAX;
+ return;
+ }
+
+ /* it's safe to call the following if bulk_insert isn't on */
+ maria_flush_bulk_insert(file, table->s->next_number_index);
+
+ (void) extra(HA_EXTRA_KEYREAD);
+ key_copy(key, table->record[0],
+ table->key_info + table->s->next_number_index,
+ table->s->next_number_key_offset);
+ error= maria_rkey(file, table->record[1], (int) table->s->next_number_index,
+ key, make_prev_keypart_map(table->s->next_number_keypart),
+ HA_READ_PREFIX_LAST);
+ if (error)
+ nr= 1;
+ else
+ {
+ /* Get data from record[1] */
+ nr= ((ulonglong) table->next_number_field->
+ val_int_offset(table->s->rec_buff_length) + 1);
+ }
+ extra(HA_EXTRA_NO_KEYREAD);
+ *first_value= nr;
+ /*
+ MySQL needs to call us for next row: assume we are inserting ("a",null)
+ here, we return 3, and next this statement will want to insert ("b",null):
+ there is no reason why ("b",3+1) would be the good row to insert: maybe it
+ already exists, maybe 3+1 is too large...
+ */
+ *nb_reserved_values= 1;
+}
+
+
+/*
+ Find out how many rows there is in the given range
+
+ SYNOPSIS
+ records_in_range()
+ inx Index to use
+ min_key Start of range. Null pointer if from first key
+ max_key End of range. Null pointer if to last key
+ pages Store first and last page for the range in case of
+ b-trees. In other cases it's not touched.
+
+ NOTES
+ min_key.flag can have one of the following values:
+ HA_READ_KEY_EXACT Include the key in the range
+ HA_READ_AFTER_KEY Don't include key in range
+
+ max_key.flag can have one of the following values:
+ HA_READ_BEFORE_KEY Don't include key in range
+ HA_READ_AFTER_KEY Include all 'end_key' values in the range
+
+ RETURN
+ HA_POS_ERROR Something is wrong with the index tree.
+ 0 There is no matching keys in the given range
+ number > 0 There is approximately 'number' matching rows in
+ the range.
+*/
+
+ha_rows ha_maria::records_in_range(uint inx, const key_range *min_key,
+ const key_range *max_key, page_range *pages)
+{
+ register_handler(file);
+ return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key,
+ pages);
+}
+
+
+FT_INFO *ha_maria::ft_init_ext(uint flags, uint inx, String * key)
+{
+ return maria_ft_init_search(flags, file, inx,
+ (uchar *) key->ptr(), key->length(),
+ key->charset(), table->record[0]);
+}
+
+
+int ha_maria::ft_read(uchar * buf)
+{
+ int error;
+
+ if (!ft_handler)
+ return -1;
+
+ register_handler(file);
+
+ thread_safe_increment(table->in_use->status_var.ha_read_next_count,
+ &LOCK_status); // why ?
+
+ error= ft_handler->please->read_next(ft_handler, (char*) buf);
+
+ return error;
+}
+
+
+bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *create_info,
+ uint table_changes)
+{
+ DBUG_ENTER("check_if_incompatible_data");
+ uint options= table->s->db_options_in_use;
+ enum ha_choice page_checksum= table->s->page_checksum;
+
+ if (page_checksum == HA_CHOICE_UNDEF)
+ page_checksum= file->s->options & HA_OPTION_PAGE_CHECKSUM ? HA_CHOICE_YES
+ : HA_CHOICE_NO;
+
+ if (create_info->auto_increment_value != stats.auto_increment_value ||
+ create_info->data_file_name != data_file_name ||
+ create_info->index_file_name != index_file_name ||
+ create_info->page_checksum != page_checksum ||
+ create_info->transactional != table->s->transactional ||
+ (maria_row_type(create_info) != data_file_type &&
+ create_info->row_type != ROW_TYPE_DEFAULT) ||
+ table_changes == IS_EQUAL_NO ||
+ (table_changes & IS_EQUAL_PACK_LENGTH)) // Not implemented yet
+ DBUG_RETURN(COMPATIBLE_DATA_NO);
+
+ if ((options & (HA_OPTION_CHECKSUM |
+ HA_OPTION_DELAY_KEY_WRITE)) !=
+ (create_info->table_options & (HA_OPTION_CHECKSUM |
+ HA_OPTION_DELAY_KEY_WRITE)))
+ DBUG_RETURN(COMPATIBLE_DATA_NO);
+ DBUG_RETURN(COMPATIBLE_DATA_YES);
+}
+
+
+static int maria_hton_panic(handlerton *hton, ha_panic_function flag)
+{
+ /* If no background checkpoints, we need to do one now */
+ int ret=0;
+
+ if (!checkpoint_interval && !aria_readonly)
+ ret= ma_checkpoint_execute(CHECKPOINT_FULL, FALSE);
+
+ ret|= maria_panic(flag);
+
+ maria_hton= 0;
+ return ret;
+}
+
+
+static int maria_commit(handlerton *hton __attribute__ ((unused)),
+ THD *thd, bool all)
+{
+ TRN *trn= THD_TRN;
+ int res;
+ MARIA_HA *used_instances;
+ DBUG_ENTER("maria_commit");
+
+ /* No commit inside lock_tables() */
+ if ((!trn ||
+ thd->locked_tables_mode == LTM_LOCK_TABLES ||
+ thd->locked_tables_mode == LTM_PRELOCKED_UNDER_LOCK_TABLES))
+ DBUG_RETURN(0);
+
+ /* statement or transaction ? */
+ if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
+ !all)
+ DBUG_RETURN(0); // end of statement
+
+ used_instances= (MARIA_HA*) trn->used_instances;
+ trnman_reset_locked_tables(trn, 0);
+ trnman_set_flags(trn, trnman_get_flags(trn) & ~TRN_STATE_INFO_LOGGED);
+ trn->used_instances= 0;
+ res= ma_commit(trn);
+ reset_thd_trn(thd, used_instances);
+ thd_set_ha_data(thd, maria_hton, 0);
+ DBUG_RETURN(res);
+}
+
+#ifdef MARIA_CANNOT_ROLLBACK
+static int maria_rollback(handlerton *hton, THD *thd, bool all)
+{
+ TRN *trn= THD_TRN;
+ DBUG_ENTER("maria_rollback");
+ if (!trn)
+ DBUG_RETURN(0);
+ if (trn->undo_lsn)
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
+ ER_DATA_WAS_COMMITED_UNDER_ROLLBACK,
+ ER_THD(thd, ER_DATA_WAS_COMMITED_UNDER_ROLLBACK),
+ "Aria");
+ if (all)
+ DBUG_RETURN(maria_commit(hton, thd, all));
+ /* Statement rollbacks are ignored. Commit will happen in external_lock */
+ DBUG_RETURN(0);
+}
+
+#else
+
+static int maria_rollback(handlerton *hton __attribute__ ((unused)),
+ THD *thd, bool all)
+{
+ TRN *trn= THD_TRN;
+ DBUG_ENTER("maria_rollback");
+
+ DBUG_ASSERT(trnman_has_locked_tables(trn) == 0);
+ trnman_reset_locked_tables(trn, 0);
+ /* statement or transaction ? */
+ if ((thd->variables.option_bits & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) &&
+ !all)
+ {
+ trnman_rollback_statement(trn);
+ DBUG_RETURN(0); // end of statement
+ }
+ reset_thd_trn(thd, (MARIA_HA*) trn->used_instances);
+ DBUG_RETURN(trnman_rollback_trn(trn) ?
+ HA_ERR_OUT_OF_MEM : 0); // end of transaction
+}
+#endif /* MARIA_CANNOT_ROLLBACK */
+
+
+/**
+ @brief flush log handler
+
+ @param hton maria handlerton (unused)
+
+ @retval FALSE OK
+ @retval TRUE Error
+*/
+
+bool maria_flush_logs(handlerton *hton)
+{
+ return MY_TEST(translog_purge_at_flush());
+}
+
+
+int maria_checkpoint_state(handlerton *hton, bool disabled)
+{
+ maria_checkpoint_disabled= (my_bool) disabled;
+ return 0;
+}
+
+
+/*
+ Handle backup calls
+*/
+
+void maria_prepare_for_backup()
+{
+ translog_disable_purge();
+}
+
+void maria_end_backup()
+{
+ translog_enable_purge();
+}
+
+
+
+#define SHOW_MSG_LEN (FN_REFLEN + 20)
+/**
+ @brief show status handler
+
+ @param hton maria handlerton
+ @param thd thread handler
+ @param print print function
+ @param stat type of status
+*/
+
+bool maria_show_status(handlerton *hton,
+ THD *thd,
+ stat_print_fn *print,
+ enum ha_stat_type stat)
+{
+ const LEX_CSTRING *engine_name= hton_name(hton);
+ switch (stat) {
+ case HA_ENGINE_LOGS:
+ {
+ TRANSLOG_ADDRESS horizon= translog_get_horizon();
+ uint32 last_file= LSN_FILE_NO(horizon);
+ uint32 first_needed= translog_get_first_needed_file();
+ uint32 first_file= translog_get_first_file(horizon);
+ uint32 i;
+ const char unknown[]= "unknown";
+ const char needed[]= "in use";
+ const char unneeded[]= "free";
+ char path[FN_REFLEN];
+
+ if (first_file == 0)
+ {
+ const char error[]= "error";
+ print(thd, engine_name->str, engine_name->length,
+ STRING_WITH_LEN(""), error, sizeof(error) - 1);
+ break;
+ }
+
+ for (i= first_file; i <= last_file; i++)
+ {
+ char *file;
+ const char *status;
+ size_t length, status_len;
+ MY_STAT stat_buff, *stat;
+ const char error[]= "can't stat";
+ char object[SHOW_MSG_LEN];
+ file= translog_filename_by_fileno(i, path);
+ if (!(stat= mysql_file_stat(key_file_translog, file, &stat_buff, MYF(0))))
+ {
+ status= error;
+ status_len= sizeof(error) - 1;
+ length= my_snprintf(object, SHOW_MSG_LEN, "Size unknown ; %s", file);
+ }
+ else
+ {
+ if (first_needed == 0)
+ {
+ status= unknown;
+ status_len= sizeof(unknown) - 1;
+ }
+ else if (i < first_needed)
+ {
+ status= unneeded;
+ status_len= sizeof(unneeded) - 1;
+ }
+ else
+ {
+ status= needed;
+ status_len= sizeof(needed) - 1;
+ }
+ length= my_snprintf(object, SHOW_MSG_LEN, "Size %12llu ; %s",
+ (ulonglong) stat->st_size, file);
+ }
+
+ print(thd, engine_name->str, engine_name->length,
+ object, length, status, status_len);
+ }
+ break;
+ }
+ case HA_ENGINE_STATUS:
+ case HA_ENGINE_MUTEX:
+ default:
+ break;
+ }
+ return 0;
+}
+
+
+/**
+ Callback to delete all logs in directory. This is lower-level than other
+ functions in ma_loghandler.c which delete logs, as it does not rely on
+ translog_init() having been called first.
+
+ @param directory directory where file is
+ @param filename base name of the file to delete
+*/
+
+static my_bool translog_callback_delete_all(const char *directory,
+ const char *filename)
+{
+ char complete_name[FN_REFLEN];
+ fn_format(complete_name, filename, directory, "", MYF(MY_UNPACK_FILENAME));
+ return mysql_file_delete(key_file_translog, complete_name, MYF(MY_WME));
+}
+
+
+/**
+ Helper function for option aria-force-start-after-recovery-failures.
+ Deletes logs if too many failures. Otherwise, increments the counter of
+ failures in the control file.
+ Notice how this has to be called _before_ translog_init() (if log is
+ corrupted, translog_init() might crash the server, so we need to remove logs
+ before).
+
+ @param log_dir directory where logs to be deleted are
+*/
+
+static int mark_recovery_start(const char* log_dir)
+{
+ int res;
+ DBUG_ENTER("mark_recovery_start");
+ if (!(maria_recover_options & HA_RECOVER_ANY))
+ ma_message_no_user(ME_WARNING, "Please consider using option"
+ " --aria-recover-options[=...] to automatically check and"
+ " repair tables when logs are removed by option"
+ " --aria-force-start-after-recovery-failures=#");
+ if (recovery_failures >= force_start_after_recovery_failures)
+ {
+ /*
+ Remove logs which cause the problem; keep control file which has
+ critical info like uuid, max_trid (removing control file may make
+ correct tables look corrupted!).
+ */
+ char msg[100];
+ res= translog_walk_filenames(log_dir, &translog_callback_delete_all);
+ my_snprintf(msg, sizeof(msg),
+ "%s logs after %u consecutive failures of"
+ " recovery from logs",
+ (res ? "failed to remove some" : "removed all"),
+ recovery_failures);
+ ma_message_no_user((res ? 0 : ME_WARNING), msg);
+ }
+ else
+ res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
+ max_trid_in_control_file,
+ recovery_failures + 1);
+ DBUG_RETURN(res);
+}
+
+
+/**
+ Helper function for option aria-force-start-after-recovery-failures.
+ Records in the control file that recovery was a success, so that it's not
+ counted for aria-force-start-after-recovery-failures.
+*/
+
+static int mark_recovery_success(void)
+{
+ /* success of recovery, reset recovery_failures: */
+ int res;
+ DBUG_ENTER("mark_recovery_success");
+ res= ma_control_file_write_and_force(last_checkpoint_lsn, last_logno,
+ max_trid_in_control_file, 0);
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Return 1 if table has changed during the current transaction
+*/
+
+bool ha_maria::is_changed() const
+{
+ return file->state->changed;
+}
+
+
+static int ha_maria_init(void *p)
+{
+ int res= 0, tmp;
+ const char *log_dir= maria_data_root;
+
+ /*
+ If aria_readonly is set, then we don't run recovery and we don't allow
+ opening of tables that are crashed. Used by mysqld --help
+ */
+ if ((aria_readonly= opt_help != 0))
+ {
+ maria_recover_options= 0;
+ checkpoint_interval= 0;
+ }
+
+#ifdef HAVE_PSI_INTERFACE
+ init_aria_psi_keys();
+#endif
+
+ maria_hton= (handlerton *)p;
+ maria_hton->db_type= DB_TYPE_ARIA;
+ maria_hton->create= maria_create_handler;
+ maria_hton->panic= maria_hton_panic;
+ maria_hton->tablefile_extensions= ha_maria_exts;
+ maria_hton->commit= maria_commit;
+ maria_hton->rollback= maria_rollback;
+ maria_hton->checkpoint_state= maria_checkpoint_state;
+ maria_hton->flush_logs= maria_flush_logs;
+ maria_hton->show_status= maria_show_status;
+ maria_hton->prepare_for_backup= maria_prepare_for_backup;
+ maria_hton->end_backup= maria_end_backup;
+
+ /* TODO: decide if we support Maria being used for log tables */
+ maria_hton->flags= (HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES |
+ HTON_NO_ROLLBACK |
+ HTON_TRANSACTIONAL_AND_NON_TRANSACTIONAL);
+ bzero(maria_log_pagecache, sizeof(*maria_log_pagecache));
+ maria_tmpdir= &mysql_tmpdir_list; /* For REDO */
+
+ if (!aria_readonly)
+ res= maria_upgrade();
+ res= res || maria_init();
+ tmp= ma_control_file_open(!aria_readonly, !aria_readonly, !aria_readonly);
+ res= res || aria_readonly ? tmp == CONTROL_FILE_LOCKED : tmp != 0;
+ res= res ||
+ ((force_start_after_recovery_failures != 0 && !aria_readonly) &&
+ mark_recovery_start(log_dir)) ||
+ !init_pagecache(maria_pagecache,
+ (size_t) pagecache_buffer_size, pagecache_division_limit,
+ pagecache_age_threshold, maria_block_size, pagecache_file_hash_size,
+ 0) ||
+ !init_pagecache(maria_log_pagecache,
+ TRANSLOG_PAGECACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE, 0, 0) ||
+ (!aria_readonly &&
+ translog_init(maria_data_root, log_file_size,
+ MYSQL_VERSION_ID, server_id, maria_log_pagecache,
+ TRANSLOG_DEFAULT_FLAGS, 0)) ||
+ (!aria_readonly &&
+ (maria_recovery_from_log() ||
+ ((force_start_after_recovery_failures != 0 ||
+ maria_recovery_changed_data || recovery_failures) &&
+ mark_recovery_success()))) ||
+ (aria_readonly && trnman_init(MAX_INTERNAL_TRID-16)) ||
+ ma_checkpoint_init(checkpoint_interval);
+ maria_multi_threaded= maria_in_ha_maria= TRUE;
+ maria_create_trn_hook= maria_create_trn_for_mysql;
+ maria_pagecache->extra_debug= 1;
+ maria_assert_if_crashed_table= debug_assert_if_crashed_table;
+
+ if (res)
+ {
+ maria_hton= 0;
+ maria_panic(HA_PANIC_CLOSE);
+ }
+
+ ma_killed= ma_killed_in_mariadb;
+ if (res)
+ maria_panic(HA_PANIC_CLOSE);
+
+ return res ? HA_ERR_INITIALIZATION : 0;
+}
+
+
+#ifdef HAVE_QUERY_CACHE
+/**
+ @brief Register a named table with a call back function to the query cache.
+
+ @param thd The thread handle
+ @param table_key A pointer to the table name in the table cache
+ @param key_length The length of the table name
+ @param[out] engine_callback The pointer to the storage engine call back
+ function, currently 0
+ @param[out] engine_data Engine data will be set to 0.
+
+ @note Despite the name of this function, it is used to check each statement
+ before it is cached and not to register a table or callback function.
+
+ @see handler::register_query_cache_table
+
+ @return The error code. The engine_data and engine_callback will be set to 0.
+ @retval TRUE Success
+ @retval FALSE An error occurred
+*/
+
+my_bool ha_maria::register_query_cache_table(THD *thd, const char *table_name,
+ uint table_name_len,
+ qc_engine_callback
+ *engine_callback,
+ ulonglong *engine_data)
+{
+ ulonglong actual_data_file_length;
+ ulonglong current_data_file_length;
+ DBUG_ENTER("ha_maria::register_query_cache_table");
+
+ /*
+ No call back function is needed to determine if a cached statement
+ is valid or not.
+ */
+ *engine_callback= 0;
+
+ /*
+ No engine data is needed.
+ */
+ *engine_data= 0;
+
+ if (file->s->now_transactional && file->s->have_versioning)
+ DBUG_RETURN(file->trn->trid >= file->s->state.last_change_trn);
+
+ /*
+ If a concurrent INSERT has happened just before the currently processed
+ SELECT statement, the total size of the table is unknown.
+
+ To determine if the table size is known, the current thread's snap shot of
+ the table size with the actual table size are compared.
+
+ If the table size is unknown the SELECT statement can't be cached.
+ */
+
+ /*
+ POSIX visibility rules specify that "2. Whatever memory values a
+ thread can see when it unlocks a mutex <...> can also be seen by any
+ thread that later locks the same mutex". In this particular case,
+ concurrent insert thread had modified the data_file_length in
+ MYISAM_SHARE before it has unlocked (or even locked)
+ structure_guard_mutex. So, here we're guaranteed to see at least that
+ value after we've locked the same mutex. We can see a later value
+ (modified by some other thread) though, but it's ok, as we only want
+ to know if the variable was changed, the actual new value doesn't matter
+ */
+ actual_data_file_length= file->s->state.state.data_file_length;
+ current_data_file_length= file->state->data_file_length;
+
+ /* Return whether is ok to try to cache current statement. */
+ DBUG_RETURN(!(file->s->non_transactional_concurrent_insert &&
+ current_data_file_length != actual_data_file_length));
+}
+#endif
+
+static struct st_mysql_sys_var *system_variables[]= {
+ MYSQL_SYSVAR(block_size),
+ MYSQL_SYSVAR(checkpoint_interval),
+ MYSQL_SYSVAR(checkpoint_log_activity),
+ MYSQL_SYSVAR(force_start_after_recovery_failures),
+ MYSQL_SYSVAR(group_commit),
+ MYSQL_SYSVAR(group_commit_interval),
+ MYSQL_SYSVAR(log_dir_path),
+ MYSQL_SYSVAR(log_file_size),
+ MYSQL_SYSVAR(log_purge_type),
+ MYSQL_SYSVAR(max_sort_file_size),
+ MYSQL_SYSVAR(page_checksum),
+ MYSQL_SYSVAR(pagecache_age_threshold),
+ MYSQL_SYSVAR(pagecache_buffer_size),
+ MYSQL_SYSVAR(pagecache_division_limit),
+ MYSQL_SYSVAR(pagecache_file_hash_size),
+ MYSQL_SYSVAR(recover_options),
+ MYSQL_SYSVAR(repair_threads),
+ MYSQL_SYSVAR(sort_buffer_size),
+ MYSQL_SYSVAR(stats_method),
+ MYSQL_SYSVAR(sync_log_dir),
+ MYSQL_SYSVAR(used_for_temp_tables),
+ MYSQL_SYSVAR(encrypt_tables),
+ NULL
+};
+
+
+/**
+ @brief Updates the checkpoint interval and restarts the background thread.
+*/
+
+static void update_checkpoint_interval(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save)
+{
+ ma_checkpoint_end();
+ ma_checkpoint_init(*(ulong *)var_ptr= (ulong)(*(long *)save));
+}
+
+
+/**
+ @brief Updates group commit mode
+*/
+
+static void update_maria_group_commit(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save)
+{
+ ulong value= (ulong)*((long *)var_ptr);
+ DBUG_ENTER("update_maria_group_commit");
+ DBUG_PRINT("enter", ("old value: %lu new value %lu rate %lu",
+ value, (ulong)(*(long *)save),
+ maria_group_commit_interval));
+ /* old value */
+ switch (value) {
+ case TRANSLOG_GCOMMIT_NONE:
+ break;
+ case TRANSLOG_GCOMMIT_HARD:
+ translog_hard_group_commit(FALSE);
+ break;
+ case TRANSLOG_GCOMMIT_SOFT:
+ translog_soft_sync(FALSE);
+ if (maria_group_commit_interval)
+ translog_soft_sync_end();
+ break;
+ default:
+ DBUG_ASSERT(0); /* impossible */
+ }
+ value= *(ulong *)var_ptr= (ulong)(*(long *)save);
+ translog_sync();
+ /* new value */
+ switch (value) {
+ case TRANSLOG_GCOMMIT_NONE:
+ break;
+ case TRANSLOG_GCOMMIT_HARD:
+ translog_hard_group_commit(TRUE);
+ break;
+ case TRANSLOG_GCOMMIT_SOFT:
+ translog_soft_sync(TRUE);
+ /* variable change made under global lock so we can just read it */
+ if (maria_group_commit_interval)
+ translog_soft_sync_start();
+ break;
+ default:
+ DBUG_ASSERT(0); /* impossible */
+ }
+ DBUG_VOID_RETURN;
+}
+
+/**
+ @brief Updates group commit interval
+*/
+
+static void update_maria_group_commit_interval(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save)
+{
+ ulong new_value= (ulong)*((long *)save);
+ ulong *value_ptr= (ulong*) var_ptr;
+ DBUG_ENTER("update_maria_group_commit_interval");
+ DBUG_PRINT("enter", ("old value: %lu new value %lu group commit %lu",
+ *value_ptr, new_value, maria_group_commit));
+
+ /* variable change made under global lock so we can just read it */
+ switch (maria_group_commit) {
+ case TRANSLOG_GCOMMIT_NONE:
+ *value_ptr= new_value;
+ translog_set_group_commit_interval(new_value);
+ break;
+ case TRANSLOG_GCOMMIT_HARD:
+ *value_ptr= new_value;
+ translog_set_group_commit_interval(new_value);
+ break;
+ case TRANSLOG_GCOMMIT_SOFT:
+ if (*value_ptr)
+ translog_soft_sync_end();
+ translog_set_group_commit_interval(new_value);
+ if ((*value_ptr= new_value))
+ translog_soft_sync_start();
+ break;
+ default:
+ DBUG_ASSERT(0); /* impossible */
+ }
+ DBUG_VOID_RETURN;
+}
+
+/**
+ @brief Updates the transaction log file limit.
+*/
+
+static void update_log_file_size(MYSQL_THD thd,
+ struct st_mysql_sys_var *var,
+ void *var_ptr, const void *save)
+{
+ uint32 size= (uint32)((ulong)(*(long *)save));
+ translog_set_file_size(size);
+ *(ulong *)var_ptr= size;
+}
+
+
+static SHOW_VAR status_variables[]= {
+ {"pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG},
+ {"pagecache_blocks_unused", (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG},
+ {"pagecache_blocks_used", (char*) &maria_pagecache_var.blocks_used, SHOW_LONG},
+ {"pagecache_read_requests", (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG},
+ {"pagecache_reads", (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG},
+ {"pagecache_write_requests", (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG},
+ {"pagecache_writes", (char*) &maria_pagecache_var.global_cache_write, SHOW_LONGLONG},
+ {"transaction_log_syncs", (char*) &translog_syncs, SHOW_LONGLONG},
+ {NullS, NullS, SHOW_LONG}
+};
+
+/****************************************************************************
+ * Maria MRR implementation: use DS-MRR
+ ***************************************************************************/
+
+int ha_maria::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
+{
+ return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
+}
+
+int ha_maria::multi_range_read_next(range_id_t *range_info)
+{
+ return ds_mrr.dsmrr_next(range_info);
+}
+
+ha_rows ha_maria::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags, Cost_estimate *cost)
+{
+ /*
+ This call is here because there is no location where this->table would
+ already be known.
+ TODO: consider moving it into some per-query initialization call.
+ */
+ ds_mrr.init(this, table);
+ return ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz,
+ flags, cost);
+}
+
+ha_rows ha_maria::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint *bufsz,
+ uint *flags, Cost_estimate *cost)
+{
+ ds_mrr.init(this, table);
+ return ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, flags, cost);
+}
+
+int ha_maria::multi_range_read_explain_info(uint mrr_mode, char *str,
+ size_t size)
+{
+ return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
+}
+/* MyISAM MRR implementation ends */
+
+
+/* Index condition pushdown implementation*/
+
+
+Item *ha_maria::idx_cond_push(uint keyno_arg, Item* idx_cond_arg)
+{
+ /*
+ Check if the key contains a blob field. If it does then MyISAM
+ should not accept the pushed index condition since MyISAM will not
+ read the blob field from the index entry during evaluation of the
+ pushed index condition and the BLOB field might be part of the
+ range evaluation done by the ICP code.
+ */
+ const KEY *key= &table_share->key_info[keyno_arg];
+
+ for (uint k= 0; k < key->user_defined_key_parts; ++k)
+ {
+ const KEY_PART_INFO *key_part= &key->key_part[k];
+ if (key_part->key_part_flag & HA_BLOB_PART)
+ {
+ /* Let the server handle the index condition */
+ return idx_cond_arg;
+ }
+ }
+
+ pushed_idx_cond_keyno= keyno_arg;
+ pushed_idx_cond= idx_cond_arg;
+ in_range_check_pushed_down= TRUE;
+ if (active_index == pushed_idx_cond_keyno)
+ ma_set_index_cond_func(file, handler_index_cond_check, this);
+ return NULL;
+}
+
+/**
+ Find record by unique constrain (used in temporary tables)
+
+ @param record (IN|OUT) the record to find
+ @param constrain_no (IN) number of constrain (for this engine)
+
+ @note It is like hp_search but uses function for raw where hp_search
+ uses functions for index.
+
+ @retval 0 OK
+ @retval 1 Not found
+ @retval -1 Error
+*/
+
+int ha_maria::find_unique_row(uchar *record, uint constrain_no)
+{
+ int rc;
+ register_handler(file);
+ if (file->s->state.header.uniques)
+ {
+ DBUG_ASSERT(file->s->state.header.uniques > constrain_no);
+ MARIA_UNIQUEDEF *def= file->s->uniqueinfo + constrain_no;
+ ha_checksum unique_hash= _ma_unique_hash(def, record);
+ rc= _ma_check_unique(file, def, record, unique_hash, HA_OFFSET_ERROR);
+ if (rc)
+ {
+ file->cur_row.lastpos= file->dup_key_pos;
+ if ((*file->read_record)(file, record, file->cur_row.lastpos))
+ return -1;
+ file->update|= HA_STATE_AKTIV; /* Record is read */
+ }
+ // invert logic
+ rc= !MY_TEST(rc);
+ }
+ else
+ {
+ /*
+ It is case when just unique index used instead unicue constrain
+ (conversion from heap table).
+ */
+ DBUG_ASSERT(file->s->state.header.keys > constrain_no);
+ MARIA_KEY key;
+ file->once_flags|= USE_PACKED_KEYS;
+ (*file->s->keyinfo[constrain_no].make_key)
+ (file, &key, constrain_no, file->lastkey_buff2, record, 0, 0);
+ rc= maria_rkey(file, record, constrain_no, key.data, key.data_length,
+ HA_READ_KEY_EXACT);
+ rc= MY_TEST(rc);
+ }
+ return rc;
+}
+
+
+/**
+ Check if a table needs to be repaired
+*/
+
+int ha_maria::check_for_upgrade(HA_CHECK_OPT *check)
+{
+ if (table->s->mysql_version && table->s->mysql_version <= 100509 &&
+ (file->s->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
+ {
+ /*
+ Encrypted tables before 10.5.9 had a bug where LSN was not
+ stored on the pages. These must be repaired!
+ */
+ return HA_ADMIN_NEEDS_ALTER;
+ }
+ return HA_ADMIN_OK;
+}
+
+
+struct st_mysql_storage_engine maria_storage_engine=
+{ MYSQL_HANDLERTON_INTERFACE_VERSION };
+
+maria_declare_plugin(aria)
+{
+ MYSQL_STORAGE_ENGINE_PLUGIN,
+ &maria_storage_engine,
+ "Aria",
+ "MariaDB Corporation Ab",
+ "Crash-safe tables with MyISAM heritage. Used for internal temporary tables and privilege tables",
+ PLUGIN_LICENSE_GPL,
+ ha_maria_init, /* Plugin Init */
+ NULL, /* Plugin Deinit */
+ 0x0105, /* 1.5 */
+ status_variables, /* status variables */
+ system_variables, /* system variables */
+ "1.5", /* string version */
+ MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
+}
+maria_declare_plugin_end;