summaryrefslogtreecommitdiffstats
path: root/storage/maria/ma_open.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--storage/maria/ma_open.c2200
1 files changed, 2200 insertions, 0 deletions
diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c
new file mode 100644
index 00000000..ad98a534
--- /dev/null
+++ b/storage/maria/ma_open.c
@@ -0,0 +1,2200 @@
+/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+ Copyright (c) 2009, 2022, MariaDB Corporation Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/* open an Aria table */
+
+#include "ma_fulltext.h"
+#include "ma_sp_defs.h"
+#include "ma_rt_index.h"
+#include "ma_blockrec.h"
+#include "trnman.h"
+#include "ma_trnman.h"
+#include <m_ctype.h>
+#include "ma_crypt.h"
+#include "s3_func.h"
+
+#ifdef _WIN32
+#include <fcntl.h>
+#endif
+
+static void setup_key_functions(MARIA_KEYDEF *keyinfo);
+static my_bool maria_scan_init_dummy(MARIA_HA *info);
+static void maria_scan_end_dummy(MARIA_HA *info);
+static my_bool maria_once_init_dummy(MARIA_SHARE *, File);
+static my_bool maria_once_end_dummy(MARIA_SHARE *);
+static uchar *_ma_state_info_read(uchar *, MARIA_STATE_INFO *, myf);
+
+#define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \
+ pos+=size;}
+
+
+#define disk_pos_assert(share, pos, end_pos) \
+if (pos > end_pos) \
+{ \
+ _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED); \
+ goto err; \
+}
+
+
+/******************************************************************************
+** Return the shared struct if the table is already open.
+** In MySQL the server will handle version issues.
+******************************************************************************/
+
+MARIA_HA *_ma_test_if_reopen(const char *filename)
+{
+ LIST *pos;
+
+ for (pos=maria_open_list ; pos ; pos=pos->next)
+ {
+ MARIA_HA *info=(MARIA_HA*) pos->data;
+ MARIA_SHARE *share= info->s;
+ if (!strcmp(share->unique_file_name.str,filename) && share->last_version)
+ return info;
+ }
+ return 0;
+}
+
+
+/*
+ Open a new instance of an already opened Maria table
+
+ SYNOPSIS
+ maria_clone_internal()
+ share Share of already open table
+ mode Mode of table (O_RDONLY | O_RDWR)
+ data_file Filedescriptor of data file to use < 0 if one should open
+ open it.
+ internal_table <> 0 if this is an internal temporary table
+
+ RETURN
+ # Maria handler
+ 0 Error
+*/
+
+static MARIA_HA *maria_clone_internal(MARIA_SHARE *share,
+ int mode, File data_file,
+ uint internal_table,
+ struct ms3_st *s3)
+{
+ int save_errno;
+ uint errpos;
+ MARIA_HA info,*m_info;
+ my_bitmap_map *changed_fields_bitmap;
+ myf flag= MY_WME | (share->temporary ? MY_THREAD_SPECIFIC : 0);
+ DBUG_ENTER("maria_clone_internal");
+
+ errpos= 0;
+ bzero((uchar*) &info,sizeof(info));
+
+ if (mode == O_RDWR && share->mode == O_RDONLY)
+ {
+ my_errno=EACCES; /* Can't open in write mode */
+ goto err;
+ }
+ if (data_file >= 0)
+ info.dfile.file= data_file;
+ else if (_ma_open_datafile(&info, share))
+ goto err;
+ errpos= 5;
+
+ /* alloc and set up private structure parts */
+ if (!my_multi_malloc(PSI_INSTRUMENT_ME, flag,
+ &m_info,sizeof(MARIA_HA),
+ &info.blobs,sizeof(MARIA_BLOB)*share->base.blobs,
+ &info.buff,(share->base.max_key_block_length*2+
+ share->base.max_key_length),
+ &info.lastkey_buff,share->base.max_key_length*3,
+ &info.first_mbr_key, share->base.max_key_length,
+ &info.maria_rtree_recursion_state,
+ share->have_rtree ? 1024 : 0,
+ &changed_fields_bitmap,
+ bitmap_buffer_size(share->base.fields),
+ NullS))
+ goto err;
+ errpos= 6;
+
+ info.s3= s3;
+ memcpy(info.blobs,share->blobs,sizeof(MARIA_BLOB)*share->base.blobs);
+ info.lastkey_buff2= info.lastkey_buff + share->base.max_key_length;
+ info.last_key.data= info.lastkey_buff;
+
+ info.s=share;
+ info.cur_row.lastpos= HA_OFFSET_ERROR;
+ /* Impossible first index to force initialization in _ma_check_index() */
+ info.lastinx= ~0;
+ info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND);
+ info.opt_flag=READ_CHECK_USED;
+ info.this_unique= (ulong) info.dfile.file; /* Uniq number in process */
+#ifdef MARIA_EXTERNAL_LOCKING
+ if (share->data_file_type == COMPRESSED_RECORD)
+ info.this_unique= share->state.unique;
+ info.this_loop=0; /* Update counter */
+ info.last_unique= share->state.unique;
+ info.last_loop= share->state.update_count;
+#endif
+ info.errkey= -1;
+ info.page_changed= 1;
+ info.autocommit= 1;
+ info.keyread_buff= info.buff + share->base.max_key_block_length;
+
+ info.lock_type= F_UNLCK;
+ if (share->options & HA_OPTION_TMP_TABLE)
+ info.lock_type= F_WRLCK;
+
+ _ma_set_data_pagecache_callbacks(&info.dfile, share);
+ my_bitmap_init(&info.changed_fields, changed_fields_bitmap, share->base.fields);
+ if ((*share->init)(&info))
+ goto err;
+
+ /* The following should be big enough for all pinning purposes */
+ if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &info.pinned_pages,
+ sizeof(MARIA_PINNED_PAGE),
+ MY_MAX(share->base.blobs*2 + 4,
+ MARIA_MAX_TREE_LEVELS*3), 16, flag))
+ goto err;
+
+
+ mysql_mutex_lock(&share->intern_lock);
+ info.read_record= share->read_record;
+ share->reopen++;
+ share->write_flag=MYF(MY_NABP | MY_WAIT_IF_FULL);
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ {
+ info.lock_type=F_RDLCK;
+ share->r_locks++;
+ share->tot_locks++;
+ }
+ if ((share->options & HA_OPTION_DELAY_KEY_WRITE) &&
+ maria_delay_key_write)
+ share->delay_key_write=1;
+
+ if (!share->now_transactional) /* If not transctional table */
+ {
+ /* Pagecache requires access to info->trn->rec_lsn */
+ _ma_set_tmp_trn_for_table(&info, &dummy_transaction_object);
+ info.state= &share->state.state; /* Change global values by default */
+ }
+ else
+ {
+ info.state= &share->state.common;
+ *info.state= share->state.state; /* Initial values */
+ }
+ info.state_start= info.state; /* Initial values */
+
+ mysql_mutex_unlock(&share->intern_lock);
+
+ /* Allocate buffer for one record */
+ /* prerequisites: info->rec_buffer == 0 && info->rec_buff_size == 0 */
+ if (_ma_alloc_buffer(&info.rec_buff, &info.rec_buff_size,
+ share->base.default_rec_buff_size, flag))
+ goto err;
+
+ bzero(info.rec_buff, share->base.default_rec_buff_size);
+
+ *m_info=info;
+ thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
+
+ if (share->options & HA_OPTION_TMP_TABLE)
+ m_info->lock.type= TL_WRITE;
+
+ if (!internal_table)
+ {
+ m_info->open_list.data= m_info->share_list.data= (void*) m_info;
+ maria_open_list= list_add(maria_open_list, &m_info->open_list);
+ share->open_list= list_add(share->open_list, &m_info->share_list);
+ }
+ else
+ {
+ /* We don't need to mark internal temporary tables as changed on disk */
+ share->internal_table= 1;
+ share->global_changed= 1;
+ }
+ DBUG_RETURN(m_info);
+
+err:
+ DBUG_PRINT("error", ("error: %d", my_errno));
+ save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
+ if ((save_errno == HA_ERR_CRASHED) ||
+ (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
+ (save_errno == HA_ERR_CRASHED_ON_REPAIR))
+ _ma_report_error(save_errno, &share->open_file_name,
+ MYF(ME_ERROR_LOG));
+ switch (errpos) {
+ case 6:
+ (*share->end)(&info);
+ delete_dynamic(&info.pinned_pages);
+ my_free(m_info->s3);
+ my_free(m_info);
+ /* fall through */
+ case 5:
+ if (data_file < 0)
+ mysql_file_close(info.dfile.file, MYF(0));
+ break;
+ }
+ my_errno=save_errno;
+ DBUG_RETURN (NULL);
+} /* maria_clone_internal */
+
+
+/******************************************************************************
+ open a MARIA table
+
+ See my_base.h for the handle_locking argument
+ if handle_locking and HA_OPEN_ABORT_IF_CRASHED then abort if the table
+ is marked crashed or if we are not using locking and the table doesn't
+ have an open count of 0.
+******************************************************************************/
+
+MARIA_HA *maria_open(const char *name, int mode, uint open_flags,
+ S3_INFO *s3)
+{
+ int open_mode= 0,save_errno;
+ uint i,j,len,errpos,head_length,base_pos,keys, realpath_err,
+ key_parts,base_key_parts,unique_key_parts,fulltext_keys,uniques;
+ uint internal_table= MY_TEST(open_flags & HA_OPEN_INTERNAL_TABLE);
+ myf common_flag= open_flags & HA_OPEN_TMP_TABLE ? MY_THREAD_SPECIFIC : 0;
+ uint file_version;
+ size_t info_length;
+ char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
+ data_name[FN_REFLEN];
+ uchar *UNINIT_VAR(disk_cache), *disk_pos, *end_pos;
+ MARIA_HA info, *UNINIT_VAR(m_info), *old_info= NULL;
+ MARIA_SHARE share_buff,*share;
+ double *rec_per_key_part;
+ ulong *nulls_per_key_part;
+ my_off_t key_root[HA_MAX_POSSIBLE_KEY];
+ ulonglong max_key_file_length, max_data_file_length;
+ my_bool versioning= 1, born_transactional;
+ File data_file= -1, kfile= -1;
+ struct ms3_st *s3_client= 0;
+ S3_INFO *share_s3= 0;
+ S3_BLOCK index_header;
+ DBUG_ENTER("maria_open");
+
+ errpos= 0;
+ head_length=sizeof(share_buff.state.header);
+ bzero((uchar*) &info,sizeof(info));
+ bzero((uchar*) &index_header, sizeof(index_header));
+
+#ifndef WITH_S3_STORAGE_ENGINE
+ DBUG_ASSERT(!s3);
+#else
+ if (!s3)
+#endif /* WITH_S3_STORAGE_ENGINE */
+ {
+ realpath_err= my_realpath(name_buff, fn_format(org_name, name, "",
+ MARIA_NAME_IEXT,
+ MY_UNPACK_FILENAME),MYF(0));
+ if (realpath_err > 0) /* File not found, no point in looking further. */
+ {
+ DBUG_RETURN(NULL);
+ }
+
+ if (my_is_symlink(org_name) &&
+ (realpath_err || mysys_test_invalid_symlink(name_buff)))
+ {
+ my_errno= HA_WRONG_CREATE_OPTION;
+ DBUG_RETURN(0);
+ }
+ }
+#ifdef WITH_S3_STORAGE_ENGINE
+ else
+ {
+ strmake(name_buff, name, sizeof(name_buff)-1); /* test_if_reopen() */
+ if (!(s3_client= s3f.open_connection(s3)))
+ {
+ internal_table= 1; /* Avoid unlock on error */
+ goto err;
+ }
+ }
+#endif /* WITH_S3_STORAGE_ENGINE */
+
+ if (!internal_table)
+ mysql_mutex_lock(&THR_LOCK_maria);
+ if ((open_flags & HA_OPEN_COPY) ||
+ (internal_table || !(old_info=_ma_test_if_reopen(name_buff))))
+ {
+ share= &share_buff;
+ bzero((uchar*) &share_buff,sizeof(share_buff));
+ share_buff.state.key_root=key_root;
+ share_buff.pagecache= multi_pagecache_search((uchar*) name_buff,
+ (uint) strlen(name_buff),
+ maria_pagecache);
+
+ if (!s3)
+ {
+ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_open",
+ if (strstr(name, "/t1"))
+ {
+ my_errno= HA_ERR_CRASHED;
+ goto err;
+ });
+ DEBUG_SYNC_C("mi_open_kfile");
+ if ((kfile=mysql_file_open(key_file_kfile, name_buff,
+ (open_mode=O_RDWR) | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
+ MYF(common_flag | MY_NOSYMLINKS))) < 0)
+ {
+ if ((errno != EROFS && errno != EACCES) ||
+ mode != O_RDONLY ||
+ (kfile=mysql_file_open(key_file_kfile, name_buff,
+ (open_mode=O_RDONLY) | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
+ MYF(common_flag | MY_NOSYMLINKS))) < 0)
+ goto err;
+ }
+ errpos= 1;
+ if (mysql_file_pread(kfile,share->state.header.file_version, head_length,
+ 0, MYF(MY_NABP)))
+ {
+ my_errno= HA_ERR_NOT_A_TABLE;
+ goto err;
+ }
+ }
+#ifdef WITH_S3_STORAGE_ENGINE
+ else
+ {
+ open_mode= mode;
+ errpos= 1;
+ if (s3f.set_database_and_table_from_path(s3, name_buff))
+ {
+ my_printf_error(HA_ERR_NO_SUCH_TABLE,
+ "Can't find database and path from %s", MYF(0),
+ name_buff);
+ my_errno= HA_ERR_NO_SUCH_TABLE;
+ goto err;
+ }
+ if (!(share_s3= share->s3_path= s3f.info_copy(s3)))
+ goto err; /* EiOM */
+
+ /* Check if table has changed in S3 */
+ if (s3f.check_frm_version(s3_client, share_s3) == 1)
+ {
+ my_errno= HA_ERR_TABLE_DEF_CHANGED;
+ goto err;
+ }
+
+ if (s3f.read_index_header(s3_client, share_s3, &index_header))
+ goto err;
+ if (index_header.length < head_length)
+ {
+ my_errno=HA_ERR_NOT_A_TABLE;
+ goto err;
+ }
+ memcpy(share->state.header.file_version, index_header.str,
+ head_length);
+ kfile= s3f.unique_file_number();
+ }
+#endif /* WITH_S3_STORAGE_ENGINE */
+
+ share->mode=open_mode;
+ if (memcmp(share->state.header.file_version, maria_file_magic, 4))
+ {
+ DBUG_PRINT("error",("Wrong header in %s",name_buff));
+ DBUG_DUMP("error_dump", share->state.header.file_version,
+ head_length);
+ my_errno=HA_ERR_NOT_A_TABLE;
+ goto err;
+ }
+ share->options= mi_uint2korr(share->state.header.options);
+ if (share->options &
+ ~(HA_OPTION_PACK_RECORD | HA_OPTION_PACK_KEYS |
+ HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA |
+ HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM |
+ HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE |
+ HA_OPTION_RELIES_ON_SQL_LAYER | HA_OPTION_NULL_FIELDS |
+ HA_OPTION_PAGE_CHECKSUM))
+ {
+ DBUG_PRINT("error",("wrong options: 0x%lx", share->options));
+ my_errno=HA_ERR_NEW_FILE;
+ goto err;
+ }
+ if ((share->options & HA_OPTION_RELIES_ON_SQL_LAYER) &&
+ ! (open_flags & HA_OPEN_FROM_SQL_LAYER))
+ {
+ DBUG_PRINT("error", ("table cannot be opened from non-sql layer"));
+ my_errno= HA_ERR_UNSUPPORTED;
+ goto err;
+ }
+ if (!s3)
+ {
+ /* Don't call realpath() if the name can't be a link */
+ if (!strcmp(name_buff, org_name) ||
+ my_readlink(index_name, org_name, MYF(0)) == -1)
+ (void) strmov(index_name, org_name);
+ *strrchr(org_name, FN_EXTCHAR)= '\0';
+ (void) fn_format(data_name,org_name,"",MARIA_NAME_DEXT,
+ MY_APPEND_EXT|MY_UNPACK_FILENAME);
+ if (my_is_symlink(data_name))
+ {
+ if (my_realpath(data_name, data_name, MYF(0)))
+ goto err;
+ if (mysys_test_invalid_symlink(data_name))
+ {
+ my_errno= HA_WRONG_CREATE_OPTION;
+ goto err;
+ }
+ share->mode|= O_NOFOLLOW; /* all symlinks are resolved by realpath() */
+ }
+ }
+ else
+ {
+ /* Don't show DIRECTORY in show create table */
+ index_name[0]= data_name[0]= 0;
+ }
+
+ info_length=mi_uint2korr(share->state.header.header_length);
+ base_pos= mi_uint2korr(share->state.header.base_pos);
+
+ /*
+ Allocate space for header information and for data that is too
+ big to keep on stack
+ */
+ if (!(disk_cache= my_malloc(PSI_INSTRUMENT_ME, info_length+128,
+ MYF(MY_WME | common_flag))))
+ {
+ my_errno=ENOMEM;
+ goto err;
+ }
+
+ end_pos=disk_cache+info_length;
+ errpos= 3;
+ if (!s3)
+ {
+ if (mysql_file_pread(kfile, disk_cache, info_length, 0L, MYF(MY_NABP)))
+ {
+ _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED);
+ goto err;
+ }
+ }
+#ifdef WITH_S3_STORAGE_ENGINE
+ else
+ {
+ if (index_header.length < info_length)
+ {
+ my_errno=HA_ERR_NOT_A_TABLE;
+ goto err;
+ }
+ memcpy(disk_cache, index_header.str, info_length);
+ }
+#endif /* WITH_S3_STORAGE_ENGINE */
+
+ len=mi_uint2korr(share->state.header.state_info_length);
+ keys= (uint) share->state.header.keys;
+ uniques= (uint) share->state.header.uniques;
+ fulltext_keys= (uint) share->state.header.fulltext_keys;
+ base_key_parts= key_parts= mi_uint2korr(share->state.header.key_parts);
+ unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts);
+ if (len != MARIA_STATE_INFO_SIZE)
+ {
+ DBUG_PRINT("warning",
+ ("saved_state_info_length: %d state_info_length: %d",
+ len,MARIA_STATE_INFO_SIZE));
+ }
+ share->state_diff_length=len-MARIA_STATE_INFO_SIZE;
+
+ if (!_ma_state_info_read(disk_cache, &share->state, common_flag))
+ goto err;
+ len= mi_uint2korr(share->state.header.base_info_length);
+ if (len != MARIA_BASE_INFO_SIZE)
+ {
+ DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d",
+ len,MARIA_BASE_INFO_SIZE));
+ }
+ disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base);
+ /*
+ Check if old version of Aria file. Version 0 has language
+ stored in header.not_used
+ */
+ file_version= (share->state.header.not_used == 0);
+ if (file_version == 0)
+ share->base.language= share->state.header.not_used;
+ born_transactional= share->base.born_transactional;
+
+ share->state.state_length=base_pos;
+ /* For newly opened tables we reset the error-has-been-printed flag */
+ share->state.changed&= ~STATE_CRASHED_PRINTED;
+ share->state.org_changed= share->state.changed;
+
+ if (!(open_flags & HA_OPEN_FOR_REPAIR) &&
+ ((share->state.changed & STATE_CRASHED_FLAGS) ||
+ ((open_flags & HA_OPEN_ABORT_IF_CRASHED) &&
+ (my_disable_locking && share->state.open_count))))
+ {
+ DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u "
+ "changed: %u open_count: %u !locking: %d",
+ open_flags, share->state.changed,
+ share->state.open_count, my_disable_locking));
+ my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
+ HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
+ goto err;
+ }
+ if (share->state.open_count)
+ share->open_count_not_zero_on_open= 1;
+
+ /*
+ A transactional table is not usable on this system if:
+ - share->state.create_trid > trnman_get_max_trid()
+ - Critical as trid as stored releative to create_trid.
+ - uuid is different
+
+ STATE_NOT_MOVABLE is reset when a table is zerofilled
+ (has no LSN's and no trids)
+
+ We can ignore testing uuid if STATE_NOT_MOVABLE is not set, as in this
+ case the uuid will be set in _ma_mark_file_changed().
+ */
+ if (born_transactional &&
+ ((share->state.create_trid > max_trid_in_system() &&
+ !maria_in_recovery) ||
+ ((share->state.changed & STATE_NOT_MOVABLE) &&
+ ((!(open_flags & HA_OPEN_IGNORE_MOVED_STATE) &&
+ memcmp(share->base.uuid, maria_uuid, MY_UUID_SIZE)))) ||
+ ((share->state.changed & (STATE_MOVED | STATE_NOT_ZEROFILLED)) ==
+ (STATE_MOVED | STATE_NOT_ZEROFILLED))))
+ {
+ DBUG_PRINT("warning", ("table is moved from another system. uuid_diff: %d create_trid: %lu max_trid: %lu moved: %d",
+ memcmp(share->base.uuid, maria_uuid,
+ MY_UUID_SIZE) != 0,
+ (ulong) share->state.create_trid,
+ (ulong) trnman_get_max_trid(),
+ MY_TEST((share->state.changed & STATE_MOVED))));
+ if (open_flags & HA_OPEN_FOR_REPAIR)
+ share->state.changed|= STATE_MOVED;
+ else
+ {
+ my_errno= HA_ERR_OLD_FILE;
+ goto err;
+ }
+ }
+
+ /* sanity check */
+ if (share->base.keystart > 65535 || share->base.rec_reflength > 8)
+ {
+ _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED);
+ goto err;
+ }
+
+ key_parts+=fulltext_keys*FT_SEGS;
+ if (share->base.max_key_length > _ma_max_key_length() ||
+ keys > MARIA_MAX_KEY || key_parts > MARIA_MAX_KEY * HA_MAX_KEY_SEG)
+ {
+ DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts));
+ my_errno=HA_ERR_UNSUPPORTED;
+ goto err;
+ }
+
+ /* Ensure we have space in the key buffer for transaction id's */
+ if (born_transactional)
+ share->base.max_key_length= ALIGN_SIZE(share->base.max_key_length +
+ MARIA_MAX_PACK_TRANSID_SIZE);
+
+ /*
+ If page cache is not initialized, then assume we will create the
+ page_cache after the table is opened!
+ This is only used by maria_check to allow it to check/repair tables
+ with different block sizes.
+ */
+ if (share->base.block_size != maria_block_size &&
+ share_buff.pagecache->inited != 0)
+ {
+ DBUG_PRINT("error", ("Wrong block size %u; Expected %u",
+ (uint) share->base.block_size,
+ (uint) maria_block_size));
+ my_errno=HA_ERR_UNSUPPORTED;
+ my_printf_error(my_errno, "Wrong block size %u; Expected %u",
+ MYF(0),
+ (uint) share->base.block_size,
+ (uint) maria_block_size);
+ goto err;
+ }
+
+ /* Correct max_file_length based on length of sizeof(off_t) */
+ max_data_file_length=
+ (share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ?
+ (((ulonglong) 1 << (share->base.rec_reflength*8))-1) :
+ (_ma_safe_mul(share->base.pack_reclength,
+ (ulonglong) 1 << (share->base.rec_reflength*8))-1);
+
+ max_key_file_length=
+ _ma_safe_mul(share->base.block_size,
+ ((ulonglong) 1 << (share->base.key_reflength*8))-1);
+#if SIZEOF_OFF_T == 4
+ set_if_smaller(max_data_file_length, INT_MAX32);
+ set_if_smaller(max_key_file_length, INT_MAX32);
+#endif
+ /* For internal temporary tables, max_data_file_length is already set */
+ if (!internal_table || !share->base.max_data_file_length)
+ share->base.max_data_file_length=(my_off_t) max_data_file_length;
+ DBUG_ASSERT(share->base.max_data_file_length);
+ share->base.max_key_file_length=(my_off_t) max_key_file_length;
+
+ if (share->options & HA_OPTION_COMPRESS_RECORD)
+ share->base.max_key_length+=2; /* For safety */
+ /* Add space for node pointer */
+ share->base.max_key_length+= share->base.key_reflength;
+
+ share->unique_file_name.length= strlen(name_buff);
+ share->index_file_name.length= strlen(index_name);
+ share->data_file_name.length= strlen(data_name);
+ share->open_file_name.length= strlen(name);
+ if (!my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | common_flag),
+ &share,sizeof(*share),
+ &rec_per_key_part, sizeof(double) * key_parts,
+ &nulls_per_key_part, sizeof(long)* key_parts,
+ &share->keyinfo,keys*sizeof(MARIA_KEYDEF),
+ &share->uniqueinfo,uniques*sizeof(MARIA_UNIQUEDEF),
+ &share->keyparts,
+ (key_parts+unique_key_parts+keys+uniques) *
+ sizeof(HA_KEYSEG),
+ &share->columndef,
+ (share->base.fields+1)*sizeof(MARIA_COLUMNDEF),
+ &share->column_nr, share->base.fields*sizeof(uint16),
+ &share->blobs,sizeof(MARIA_BLOB)*share->base.blobs,
+ &share->unique_file_name.str,
+ share->unique_file_name.length+1,
+ &share->index_file_name.str,
+ share->index_file_name.length+1,
+ &share->data_file_name.str,
+ share->data_file_name.length+1,
+ &share->open_file_name.str,
+ share->open_file_name.length+1,
+ &share->state.key_root,keys*sizeof(my_off_t),
+ &share->mmap_lock,sizeof(mysql_rwlock_t),
+ NullS))
+ goto err;
+ errpos= 4;
+
+ *share= share_buff;
+ share->state.rec_per_key_part= rec_per_key_part;
+ share->state.nulls_per_key_part= nulls_per_key_part;
+
+ memcpy((char*) rec_per_key_part,
+ (char*) share_buff.state.rec_per_key_part,
+ sizeof(double)*base_key_parts);
+ memcpy((char*) nulls_per_key_part,
+ (char*) share_buff.state.nulls_per_key_part,
+ sizeof(long)*base_key_parts);
+ memcpy((char*) share->state.key_root,
+ (char*) key_root, sizeof(my_off_t)*keys);
+ strmov(share->unique_file_name.str, name_buff);
+ strmov(share->index_file_name.str, index_name);
+ strmov(share->data_file_name.str, data_name);
+ strmov(share->open_file_name.str, name);
+
+ share->block_size= share->base.block_size; /* Convenience */
+ share->max_index_block_size= share->block_size - KEYPAGE_CHECKSUM_SIZE;
+ share->keypage_header= ((born_transactional ?
+ LSN_STORE_SIZE + TRANSID_SIZE :
+ 0) + KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE +
+ KEYPAGE_USED_SIZE);
+
+ if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
+ {
+ share->keypage_header+= ma_crypt_get_index_page_header_space(share);
+ }
+
+ {
+ HA_KEYSEG *pos=share->keyparts;
+ uint32 ftkey_nr= 1;
+ for (i=0 ; i < keys ; i++)
+ {
+ MARIA_KEYDEF *keyinfo= &share->keyinfo[i];
+ keyinfo->share= share;
+ disk_pos=_ma_keydef_read(disk_pos, keyinfo);
+ keyinfo->key_nr= i;
+
+ /* Calculate length to store a key + nod flag and transaction info */
+ keyinfo->max_store_length= (keyinfo->maxlength +
+ share->base.key_reflength);
+ if (born_transactional)
+ keyinfo->max_store_length+= MARIA_INDEX_OVERHEAD_SIZE;
+
+ /* See ma_delete.cc::underflow() */
+ if (!(keyinfo->flag & (HA_BINARY_PACK_KEY | HA_PACK_KEY)))
+ keyinfo->underflow_block_length= keyinfo->block_length/3;
+ else
+ {
+ /* Packed key, ensure we don't get overflow in underflow() */
+ keyinfo->underflow_block_length=
+ MY_MAX((int) (share->max_index_block_size - keyinfo->maxlength * 3),
+ (int) (share->keypage_header + share->base.key_reflength));
+ set_if_smaller(keyinfo->underflow_block_length,
+ keyinfo->block_length/3);
+ }
+
+ disk_pos_assert(share,
+ disk_pos + keyinfo->keysegs * HA_KEYSEG_SIZE,
+ end_pos);
+ if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
+ share->have_rtree= 1;
+ keyinfo->seg=pos;
+ for (j=0 ; j < keyinfo->keysegs; j++,pos++)
+ {
+ disk_pos=_ma_keyseg_read(disk_pos, pos);
+ if (pos->type == HA_KEYTYPE_TEXT ||
+ pos->type == HA_KEYTYPE_VARTEXT1 ||
+ pos->type == HA_KEYTYPE_VARTEXT2)
+ {
+ if (!pos->language)
+ pos->charset=default_charset_info;
+ else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
+ {
+ my_errno=HA_ERR_UNKNOWN_CHARSET;
+ goto err;
+ }
+ }
+ else if (pos->type == HA_KEYTYPE_BINARY)
+ pos->charset= &my_charset_bin;
+ }
+ if (keyinfo->flag & HA_SPATIAL)
+ {
+#ifdef HAVE_SPATIAL
+ uint sp_segs=SPDIMS*2;
+ keyinfo->seg=pos-sp_segs;
+ keyinfo->keysegs--;
+ versioning= 0;
+#else
+ my_errno=HA_ERR_UNSUPPORTED;
+ goto err;
+#endif
+ }
+ else if (keyinfo->flag & HA_FULLTEXT)
+ {
+ versioning= 0;
+ DBUG_ASSERT(fulltext_keys);
+ {
+ uint k;
+ keyinfo->seg=pos;
+ for (k=0; k < FT_SEGS; k++)
+ {
+ *pos= ft_keysegs[k];
+ pos[0].language= pos[-1].language;
+ if (!(pos[0].charset= pos[-1].charset))
+ {
+ _ma_set_fatal_error_with_share(share, HA_ERR_CRASHED);
+ goto err;
+ }
+ pos++;
+ }
+ }
+ if (!share->ft2_keyinfo.seg)
+ {
+ memcpy(&share->ft2_keyinfo, keyinfo, sizeof(MARIA_KEYDEF));
+ share->ft2_keyinfo.keysegs=1;
+ share->ft2_keyinfo.flag=0;
+ share->ft2_keyinfo.keylength=
+ share->ft2_keyinfo.minlength=
+ share->ft2_keyinfo.maxlength=HA_FT_WLEN+share->base.rec_reflength;
+ share->ft2_keyinfo.seg=pos-1;
+ share->ft2_keyinfo.end=pos;
+ setup_key_functions(& share->ft2_keyinfo);
+ }
+ keyinfo->ftkey_nr= ftkey_nr++;
+ }
+ setup_key_functions(keyinfo);
+ keyinfo->end=pos;
+ pos->type=HA_KEYTYPE_END; /* End */
+ pos->length=share->base.rec_reflength;
+ pos->null_bit=0;
+ pos->flag=0; /* For purify */
+ pos++;
+ }
+ for (i=0 ; i < uniques ; i++)
+ {
+ disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]);
+ disk_pos_assert(share,
+ disk_pos + share->uniqueinfo[i].keysegs *
+ HA_KEYSEG_SIZE, end_pos);
+ share->uniqueinfo[i].seg=pos;
+ for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++)
+ {
+ disk_pos=_ma_keyseg_read(disk_pos, pos);
+ if (pos->type == HA_KEYTYPE_TEXT ||
+ pos->type == HA_KEYTYPE_VARTEXT1 ||
+ pos->type == HA_KEYTYPE_VARTEXT2)
+ {
+ if (!pos->language)
+ pos->charset=default_charset_info;
+ else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
+ {
+ my_errno=HA_ERR_UNKNOWN_CHARSET;
+ goto err;
+ }
+ }
+ }
+ share->uniqueinfo[i].end=pos;
+ pos->type=HA_KEYTYPE_END; /* End */
+ pos->null_bit=0;
+ pos->flag=0;
+ pos++;
+ }
+ share->ftkeys= ftkey_nr;
+ }
+ share->data_file_type= share->state.header.data_file_type;
+ share->base_length= (BASE_ROW_HEADER_SIZE +
+ share->base.is_nulls_extended +
+ share->base.null_bytes +
+ share->base.pack_bytes +
+ MY_TEST(share->options & HA_OPTION_CHECKSUM));
+ share->kfile.file= kfile;
+
+ if (open_flags & HA_OPEN_COPY)
+ {
+ /*
+ this instance will be a temporary one used just to create a data
+ file for REPAIR. Don't do logging. This base information will not go
+ to disk.
+ */
+ born_transactional= FALSE;
+ }
+ if (born_transactional)
+ {
+ share->page_type= PAGECACHE_LSN_PAGE;
+ if (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS)
+ {
+ /*
+ Was repaired with maria_chk, maybe later maria_pack-ed. Some sort of
+ import into the server. It starts its existence (from the point of
+ view of the server, including server's recovery) now.
+ */
+ if (((open_flags & HA_OPEN_FROM_SQL_LAYER) &&
+ (share->state.changed & STATE_NOT_MOVABLE)) || maria_in_recovery)
+ _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
+ trnman_get_min_safe_trid(), TRUE, TRUE);
+ }
+ else if ((!LSN_VALID(share->state.create_rename_lsn) ||
+ !LSN_VALID(share->state.is_of_horizon) ||
+ (cmp_translog_addr(share->state.create_rename_lsn,
+ share->state.is_of_horizon) > 0) ||
+ !LSN_VALID(share->state.skip_redo_lsn) ||
+ (cmp_translog_addr(share->state.create_rename_lsn,
+ share->state.skip_redo_lsn) > 0)))
+ {
+ if (!(open_flags & HA_OPEN_FOR_REPAIR))
+ {
+ /*
+ If in Recovery, it will not work. If LSN is invalid and not
+ LSN_NEEDS_NEW_STATE_LSNS, header must be corrupted.
+ In both cases, must repair.
+ */
+ my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
+ HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
+ goto err;
+ }
+ else
+ {
+ /*
+ Open in repair mode. Ensure that we mark the table crashed, so
+ that we run auto_repair on it
+ */
+ maria_mark_crashed_share(share);
+ }
+ }
+ else if (!(open_flags & HA_OPEN_FOR_REPAIR))
+ {
+ /* create_rename_lsn != LSN_NEEDS_NEW_STATE_LSNS */
+ share->state.changed|= STATE_NOT_MOVABLE;
+ }
+ }
+ else
+ share->page_type= PAGECACHE_PLAIN_PAGE;
+ share->now_transactional= born_transactional;
+
+ /* Use pack_reclength as we don't want to modify base.pack_recklength */
+ if (share->state.header.org_data_file_type == DYNAMIC_RECORD)
+ {
+ /* add bits used to pack data to pack_reclength for faster allocation */
+ share->base.pack_reclength+= share->base.pack_bytes;
+ share->base.extra_rec_buff_size=
+ (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER) + MARIA_SPLIT_LENGTH +
+ MARIA_REC_BUFF_OFFSET);
+ }
+ if (share->data_file_type == COMPRESSED_RECORD)
+ {
+ /* Need some extra bytes for decode_bytes */
+ share->base.extra_rec_buff_size+= 7;
+ }
+ share->base.default_rec_buff_size= MY_MAX(share->base.pack_reclength +
+ share->base.extra_rec_buff_size,
+ share->base.max_key_length);
+
+ disk_pos_assert(share,
+ disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE,
+ end_pos);
+ for (i= j= 0 ; i < share->base.fields ; i++)
+ {
+ disk_pos=_ma_columndef_read(disk_pos,&share->columndef[i]);
+ share->columndef[i].pack_type=0;
+ share->columndef[i].huff_tree=0;
+ if (share->columndef[i].type == FIELD_BLOB)
+ {
+ share->blobs[j].pack_length=
+ share->columndef[i].length-portable_sizeof_char_ptr;
+ share->blobs[j].offset= share->columndef[i].offset;
+ j++;
+ }
+ if (share->columndef[i].type == FIELD_VARCHAR)
+ share->has_varchar_fields= 1;
+ if (share->columndef[i].null_bit)
+ share->has_null_fields= 1;
+ }
+ share->columndef[i].type= FIELD_LAST; /* End marker */
+ disk_pos= _ma_column_nr_read(disk_pos, share->column_nr,
+ share->base.fields);
+
+ if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
+ {
+ if (!(disk_pos= ma_crypt_read(share, disk_pos,
+ MY_TEST(open_flags & HA_OPEN_FOR_DROP))))
+ goto err;
+ }
+
+ if ((share->data_file_type == BLOCK_RECORD ||
+ share->data_file_type == COMPRESSED_RECORD))
+ {
+ if (!s3)
+ {
+ if (_ma_open_datafile(&info, share))
+ goto err;
+ data_file= info.dfile.file;
+ }
+#ifdef WITH_S3_STORAGE_ENGINE
+ else
+ data_file= info.dfile.file= s3f.unique_file_number();
+#endif /* WITH_S3_STORAGE_ENGINE */
+ }
+ errpos= 5;
+
+ if (open_flags & HA_OPEN_DELAY_KEY_WRITE)
+ share->options|= HA_OPTION_DELAY_KEY_WRITE;
+ if (mode == O_RDONLY)
+ share->options|= HA_OPTION_READ_ONLY_DATA;
+ share->is_log_table= FALSE;
+
+ if (open_flags & HA_OPEN_TMP_TABLE || share->options & HA_OPTION_TMP_TABLE)
+ {
+ common_flag|= MY_THREAD_SPECIFIC;
+ share->options|= HA_OPTION_TMP_TABLE;
+ share->temporary= share->delay_key_write= 1;
+ share->write_flag=MYF(MY_NABP);
+ share->w_locks++; /* We don't have to update status */
+ share->tot_locks++;
+ }
+
+ _ma_set_index_pagecache_callbacks(&share->kfile, share);
+ share->this_process=(ulong) getpid();
+#ifdef MARIA_EXTERNAL_LOCKING
+ share->last_process= share->state.process;
+#endif
+ share->base.key_parts=key_parts;
+ share->base.all_key_parts=key_parts+unique_key_parts;
+ if (!(share->last_version=share->state.version))
+ share->last_version=1; /* Safety */
+ share->rec_reflength=share->base.rec_reflength; /* May be changed */
+ share->base.margin_key_file_length=(share->base.max_key_file_length -
+ (keys ? MARIA_INDEX_BLOCK_MARGIN *
+ share->block_size * keys : 0));
+ my_free(disk_cache);
+ my_free(share_buff.state.rec_per_key_part);
+ disk_cache= 0;
+ share_buff.state.rec_per_key_part= 0;
+
+ _ma_setup_functions(share);
+ max_data_file_length= share->base.max_data_file_length;
+ if ((*share->once_init)(share, info.dfile.file))
+ goto err;
+ errpos= 6;
+ if (internal_table)
+ set_if_smaller(share->base.max_data_file_length,
+ max_data_file_length);
+ if (share->now_transactional)
+ {
+ /* Setup initial state that is visible for all */
+ MARIA_STATE_HISTORY_CLOSED *history;
+ if ((history= (MARIA_STATE_HISTORY_CLOSED *)
+ my_hash_search(&maria_stored_state,
+ (uchar*) &share->state.create_rename_lsn,
+ sizeof(share->state.create_rename_lsn))))
+ {
+ /*
+ Move history from hash to share. This is safe to do as we
+ know we are the only one that is using the share.
+ */
+ share->state_history=
+ _ma_remove_not_visible_states(history->state_history, 0, 0);
+ history->state_history= 0;
+ (void) my_hash_delete(&maria_stored_state, (uchar*) history);
+ DBUG_PRINT("info", ("Reading state history. trid: %lu records: %lld",
+ (ulong) share->state_history->trid,
+ share->state_history->state.records));
+ }
+ else
+ {
+ /* Table is not part of any active transaction; Create new history */
+ if (!(share->state_history= (MARIA_STATE_HISTORY *)
+ my_malloc(PSI_INSTRUMENT_ME, sizeof(*share->state_history),
+ MYF(MY_WME))))
+ goto err;
+ share->state_history->trid= 0; /* Visible by all */
+ share->state_history->state= share->state.state;
+ share->state_history->next= 0;
+ }
+ }
+ errpos= 7;
+ thr_lock_init(&share->lock);
+ mysql_mutex_init(key_SHARE_intern_lock,
+ &share->intern_lock, MY_MUTEX_INIT_FAST);
+ mysql_mutex_init(key_SHARE_key_del_lock,
+ &share->key_del_lock, MY_MUTEX_INIT_FAST);
+ mysql_cond_init(key_SHARE_key_del_cond, &share->key_del_cond, 0);
+ mysql_mutex_init(key_SHARE_close_lock,
+ &share->close_lock, MY_MUTEX_INIT_FAST);
+ for (i=0; i<keys; i++)
+ mysql_rwlock_init(key_KEYINFO_root_lock,
+ &share->keyinfo[i].root_lock);
+ mysql_rwlock_init(key_SHARE_mmap_lock, &share->mmap_lock);
+
+ share->row_is_visible= _ma_row_visible_always;
+ share->lock.get_status= _ma_reset_update_flag;
+ share->lock.start_trans= _ma_start_trans;
+
+ if (!thr_lock_inited)
+ {
+ /* Probably a single threaded program; Don't use concurrent inserts */
+ maria_concurrent_insert=0;
+ }
+ else if (maria_concurrent_insert)
+ {
+ share->non_transactional_concurrent_insert=
+ ((share->options & (HA_OPTION_READ_ONLY_DATA | HA_OPTION_TMP_TABLE |
+ HA_OPTION_COMPRESS_RECORD |
+ HA_OPTION_TEMP_COMPRESS_RECORD)) ||
+ (open_flags & HA_OPEN_TMP_TABLE) ||
+ share->data_file_type == BLOCK_RECORD ||
+ share->have_rtree) ? 0 : 1;
+ if (share->non_transactional_concurrent_insert ||
+ (!share->temporary && share->now_transactional && versioning))
+ {
+ share->lock_key_trees= 1;
+ if (share->data_file_type == BLOCK_RECORD)
+ {
+ DBUG_ASSERT(share->now_transactional);
+ share->have_versioning= 1;
+ share->row_is_visible= _ma_row_visible_transactional_table;
+ share->lock.get_status= _ma_block_get_status;
+ share->lock.check_status= _ma_block_check_status;
+ share->lock.start_trans= _ma_block_start_trans;
+ /*
+ We can for the moment only allow multiple concurrent inserts
+ only if there is no auto-increment key. To lift this restriction
+ we have to:
+ - Extend statement base replication to support auto-increment
+ intervalls.
+ - Fix that we allocate auto-increment in intervals and that
+ it's properly reset if the interval was not used
+ */
+ share->lock.allow_multiple_concurrent_insert=
+ share->base.auto_key == 0;
+ share->lock_restore_status= 0;
+ }
+ else
+ {
+ share->row_is_visible= _ma_row_visible_non_transactional_table;
+ share->lock.get_status= _ma_get_status;
+ share->lock.copy_status= _ma_copy_status;
+ share->lock.update_status= _ma_update_status;
+ share->lock.restore_status= _ma_restore_status;
+ share->lock.check_status= _ma_check_status;
+ share->lock_restore_status= _ma_restore_status;
+ }
+ }
+ else if (share->now_transactional)
+ {
+ DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
+ share->lock.start_trans= _ma_block_start_trans_no_versioning;
+ }
+ }
+#ifdef SAFE_MUTEX
+ if (share->data_file_type == BLOCK_RECORD)
+ {
+ /*
+ We must have internal_lock before bitmap_lock because we call
+ _ma_flush_table_files() with internal_lock locked.
+ */
+ mysql_mutex_lock(&share->intern_lock);
+ mysql_mutex_lock(&share->bitmap.bitmap_lock);
+ mysql_mutex_unlock(&share->bitmap.bitmap_lock);
+ mysql_mutex_unlock(&share->intern_lock);
+ }
+#endif
+ /*
+ Memory mapping can only be requested after initializing intern_lock.
+ */
+ if (open_flags & HA_OPEN_MMAP)
+ {
+ info.s= share;
+ maria_extra(&info, HA_EXTRA_MMAP, 0);
+ }
+#ifdef WITH_S3_STORAGE_ENGINE
+ if (s3_client)
+ {
+ size_t block_size= share->base.s3_block_size;
+ s3f.set_option(s3_client, MS3_OPT_BUFFER_CHUNK_SIZE, &block_size);
+ }
+#endif /* WITH_S3_STORAGE_ENGINE */
+ }
+ else
+ {
+ share= old_info->s;
+ if (share->data_file_type == BLOCK_RECORD)
+ data_file= share->bitmap.file.file; /* Only opened once */
+ }
+
+#ifdef WITH_S3_STORAGE_ENGINE
+ if (index_header.alloc_ptr)
+ s3f.free(&index_header);
+#endif /* WITH_S3_STORAGE_ENGINE */
+
+ if (!(m_info= maria_clone_internal(share, mode, data_file,
+ internal_table, s3_client)))
+ goto err;
+
+ if (maria_is_crashed(m_info))
+ DBUG_PRINT("warning", ("table is crashed: changed: %u",
+ share->state.changed));
+
+ if (!internal_table)
+ mysql_mutex_unlock(&THR_LOCK_maria);
+
+ m_info->open_flags= open_flags;
+ m_info->stack_end_ptr= &my_thread_var->stack_ends_here;
+ DBUG_PRINT("exit", ("table: %p name: %s",m_info, name));
+ DBUG_RETURN(m_info);
+
+err:
+ DBUG_PRINT("error", ("error: %d errpos: %d", my_errno, errpos));
+ save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
+ if ((save_errno == HA_ERR_CRASHED) ||
+ (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
+ (save_errno == HA_ERR_CRASHED_ON_REPAIR))
+ {
+ LEX_STRING tmp_name;
+ tmp_name.str= (char*) name;
+ tmp_name.length= strlen(name);
+ _ma_report_error(save_errno, &tmp_name, MYF(ME_ERROR_LOG));
+ }
+ switch (errpos) {
+ case 7:
+ thr_lock_delete(&share->lock);
+ /* fall through */
+ case 6:
+ /* Avoid mutex test in _ma_bitmap_end() */
+ share->internal_table= 1;
+ (*share->once_end)(share);
+ /* fall through */
+ case 5:
+ if (data_file >= 0 && !s3_client)
+ mysql_file_close(data_file, MYF(0));
+ if (old_info)
+ break; /* Don't remove open table */
+ /* fall through */
+ case 4:
+ ma_crypt_free(share);
+ my_free(share);
+ /* fall through */
+ case 3:
+ my_free(disk_cache);
+ my_free(share_buff.state.rec_per_key_part);
+ /* fall through */
+ case 1:
+ if (!s3)
+ mysql_file_close(kfile,MYF(0));
+ my_free(share_s3);
+ /* fall through */
+ case 0:
+ default:
+ break;
+ }
+#ifdef WITH_S3_STORAGE_ENGINE
+ if (s3_client)
+ s3f.deinit(s3_client);
+ if (index_header.alloc_ptr)
+ s3f.free(&index_header);
+#endif /* WITH_S3_STORAGE_ENGINE */
+ if (!internal_table)
+ mysql_mutex_unlock(&THR_LOCK_maria);
+ my_errno= save_errno;
+ DBUG_RETURN (NULL);
+} /* maria_open */
+
+
+/*
+ Reallocate a buffer, if the current buffer is not large enough
+*/
+
+my_bool _ma_alloc_buffer(uchar **old_addr, size_t *old_size,
+ size_t new_size, myf flag)
+{
+ if (*old_size < new_size)
+ {
+ uchar *addr;
+ if (!(addr= (uchar*) my_realloc(PSI_INSTRUMENT_ME, *old_addr, new_size,
+ MYF(MY_ALLOW_ZERO_PTR | flag))))
+ return 1;
+ *old_addr= addr;
+ *old_size= new_size;
+ }
+ return 0;
+}
+
+
+ulonglong _ma_safe_mul(ulonglong a, ulonglong b)
+{
+ ulonglong max_val= ~ (ulonglong) 0; /* my_off_t is unsigned */
+
+ if (!a || max_val / a < b)
+ return max_val;
+ return a*b;
+}
+
+ /* Set up functions in structs */
+
+void _ma_setup_functions(register MARIA_SHARE *share)
+{
+ share->once_init= maria_once_init_dummy;
+ share->once_end= maria_once_end_dummy;
+ share->init= maria_scan_init_dummy;
+ share->end= maria_scan_end_dummy;
+ share->scan_init= maria_scan_init_dummy;/* Compat. dummy function */
+ share->scan_end= maria_scan_end_dummy;/* Compat. dummy function */
+ share->scan_remember_pos= _ma_def_scan_remember_pos;
+ share->scan_restore_pos= _ma_def_scan_restore_pos;
+
+ share->write_record_init= _ma_write_init_default;
+ share->write_record_abort= _ma_write_abort_default;
+ share->keypos_to_recpos= _ma_transparent_recpos;
+ share->recpos_to_keypos= _ma_transparent_recpos;
+
+ switch (share->data_file_type) {
+ case COMPRESSED_RECORD:
+ share->read_record= _ma_read_pack_record;
+ share->scan= _ma_read_rnd_pack_record;
+ share->once_init= _ma_once_init_pack_row;
+ share->once_end= _ma_once_end_pack_row;
+ /*
+ Calculate checksum according to data in the original, not compressed,
+ row.
+ */
+ if (share->state.header.org_data_file_type == STATIC_RECORD &&
+ ! (share->options & HA_OPTION_NULL_FIELDS))
+ share->calc_checksum= _ma_static_checksum;
+ else
+ share->calc_checksum= _ma_checksum;
+ share->calc_write_checksum= share->calc_checksum;
+ break;
+ case DYNAMIC_RECORD:
+ share->read_record= _ma_read_dynamic_record;
+ share->scan= _ma_read_rnd_dynamic_record;
+ share->delete_record= _ma_delete_dynamic_record;
+ share->compare_record= _ma_cmp_dynamic_record;
+ share->compare_unique= _ma_cmp_dynamic_unique;
+ share->calc_checksum= share->calc_write_checksum= _ma_checksum;
+ if (share->base.blobs)
+ {
+ share->update_record= _ma_update_blob_record;
+ share->write_record= _ma_write_blob_record;
+ }
+ else
+ {
+ share->write_record= _ma_write_dynamic_record;
+ share->update_record= _ma_update_dynamic_record;
+ }
+ break;
+ case STATIC_RECORD:
+ share->read_record= _ma_read_static_record;
+ share->scan= _ma_read_rnd_static_record;
+ share->delete_record= _ma_delete_static_record;
+ share->compare_record= _ma_cmp_static_record;
+ share->update_record= _ma_update_static_record;
+ share->write_record= _ma_write_static_record;
+ share->compare_unique= _ma_cmp_static_unique;
+ share->keypos_to_recpos= _ma_static_keypos_to_recpos;
+ share->recpos_to_keypos= _ma_static_recpos_to_keypos;
+ if (share->state.header.org_data_file_type == STATIC_RECORD &&
+ ! (share->options & HA_OPTION_NULL_FIELDS))
+ share->calc_checksum= _ma_static_checksum;
+ else
+ share->calc_checksum= _ma_checksum;
+ break;
+ case NO_RECORD:
+ share->read_record= _ma_read_no_record;
+ share->scan= _ma_read_rnd_no_record;
+ share->delete_record= _ma_delete_no_record;
+ share->update_record= _ma_update_no_record;
+ share->write_record= _ma_write_no_record;
+ share->recpos_to_keypos= _ma_no_keypos_to_recpos;
+ share->keypos_to_recpos= _ma_no_keypos_to_recpos;
+
+ /* Abort if following functions are called */
+ share->compare_record= 0;
+ share->compare_unique= 0;
+ share->calc_checksum= 0;
+ break;
+ case BLOCK_RECORD:
+ share->once_init= _ma_once_init_block_record;
+ share->once_end= _ma_once_end_block_record;
+ share->init= _ma_init_block_record;
+ share->end= _ma_end_block_record;
+ share->write_record_init= _ma_write_init_block_record;
+ share->write_record_abort= _ma_write_abort_block_record;
+ share->scan_init= _ma_scan_init_block_record;
+ share->scan_end= _ma_scan_end_block_record;
+ share->scan= _ma_scan_block_record;
+ share->scan_remember_pos= _ma_scan_remember_block_record;
+ share->scan_restore_pos= _ma_scan_restore_block_record;
+ share->read_record= _ma_read_block_record;
+ share->delete_record= _ma_delete_block_record;
+ share->compare_record= _ma_compare_block_record;
+ share->update_record= _ma_update_block_record;
+ share->write_record= _ma_write_block_record;
+ share->compare_unique= _ma_cmp_block_unique;
+ share->calc_checksum= _ma_checksum;
+ share->keypos_to_recpos= _ma_transaction_keypos_to_recpos;
+ share->recpos_to_keypos= _ma_transaction_recpos_to_keypos;
+
+ /*
+ write_block_record() will calculate the checksum; Tell maria_write()
+ that it doesn't have to do this.
+ */
+ share->calc_write_checksum= 0;
+ break;
+ }
+ share->file_read= _ma_nommap_pread;
+ share->file_write= _ma_nommap_pwrite;
+ share->calc_check_checksum= share->calc_checksum;
+
+ if (!(share->options & HA_OPTION_CHECKSUM) &&
+ share->data_file_type != COMPRESSED_RECORD)
+ share->calc_checksum= share->calc_write_checksum= 0;
+ return;
+}
+
+
+static void setup_key_functions(register MARIA_KEYDEF *keyinfo)
+{
+ if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
+ {
+#ifdef HAVE_RTREE_KEYS
+ keyinfo->ck_insert = maria_rtree_insert;
+ keyinfo->ck_delete = maria_rtree_delete;
+#else
+ DBUG_ASSERT(0); /* maria_open should check it never happens */
+#endif
+ }
+ else
+ {
+ keyinfo->ck_insert = _ma_ck_write;
+ keyinfo->ck_delete = _ma_ck_delete;
+ }
+ if (keyinfo->flag & HA_SPATIAL)
+ keyinfo->make_key= _ma_sp_make_key;
+ else
+ keyinfo->make_key= _ma_make_key;
+
+ if (keyinfo->flag & HA_BINARY_PACK_KEY)
+ { /* Simple prefix compression */
+ keyinfo->bin_search= _ma_seq_search;
+ keyinfo->get_key= _ma_get_binary_pack_key;
+ keyinfo->skip_key= _ma_skip_binary_pack_key;
+ keyinfo->pack_key= _ma_calc_bin_pack_key_length;
+ keyinfo->store_key= _ma_store_bin_pack_key;
+ }
+ else if (keyinfo->flag & HA_VAR_LENGTH_KEY)
+ {
+ keyinfo->get_key= _ma_get_pack_key;
+ keyinfo->skip_key= _ma_skip_pack_key;
+ if (keyinfo->seg[0].flag & HA_PACK_KEY)
+ { /* Prefix compression */
+ /*
+ _ma_prefix_search() compares end-space against ASCII blank (' ').
+ It cannot be used for character sets, that do not encode the
+ blank character like ASCII does. UCS2 is an example. All
+ character sets with a fixed width > 1 or a mimimum width > 1
+ cannot represent blank like ASCII does. In these cases we have
+ to use _ma_seq_search() for the search.
+ */
+ if (!keyinfo->seg->charset || use_strnxfrm(keyinfo->seg->charset) ||
+ (keyinfo->seg->flag & HA_NULL_PART) ||
+ keyinfo->seg->charset->mbminlen > 1)
+ keyinfo->bin_search= _ma_seq_search;
+ else
+ keyinfo->bin_search= _ma_prefix_search;
+ keyinfo->pack_key= _ma_calc_var_pack_key_length;
+ keyinfo->store_key= _ma_store_var_pack_key;
+ }
+ else
+ {
+ keyinfo->bin_search= _ma_seq_search;
+ keyinfo->pack_key= _ma_calc_var_key_length; /* Variable length key */
+ keyinfo->store_key= _ma_store_static_key;
+ }
+ }
+ else
+ {
+ keyinfo->bin_search= _ma_bin_search;
+ keyinfo->get_key= _ma_get_static_key;
+ keyinfo->skip_key= _ma_skip_static_key;
+ keyinfo->pack_key= _ma_calc_static_key_length;
+ keyinfo->store_key= _ma_store_static_key;
+ }
+
+ /* set keyinfo->write_comp_flag */
+ if (keyinfo->flag & HA_SORT_ALLOWS_SAME)
+ keyinfo->write_comp_flag=SEARCH_BIGGER; /* Put after same key */
+ else if (keyinfo->flag & ( HA_NOSAME | HA_FULLTEXT))
+ {
+ keyinfo->write_comp_flag= SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */
+ if (keyinfo->flag & HA_NULL_ARE_EQUAL)
+ keyinfo->write_comp_flag|= SEARCH_NULL_ARE_EQUAL;
+ }
+ else
+ keyinfo->write_comp_flag= SEARCH_SAME; /* Keys in rec-pos order */
+ keyinfo->write_comp_flag|= SEARCH_INSERT;
+ return;
+}
+
+
+/**
+ @brief Function to save and store the header in the index file (.MAI)
+
+ Operates under MARIA_SHARE::intern_lock if requested.
+ Sets MARIA_SHARE::MARIA_STATE_INFO::is_of_horizon if transactional table.
+ Then calls _ma_state_info_write_sub().
+
+ @param share table
+ @param pWrite bitmap: if 1 (MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)
+ is set my_pwrite() is used otherwise my_write();
+ if 2 (MA_STATE_INFO_WRITE_FULL_INFO) is set, info
+ about keys is written (should only be needed
+ after ALTER TABLE ENABLE/DISABLE KEYS, and
+ REPAIR/OPTIMIZE); if 4 (MA_STATE_INFO_WRITE_LOCK)
+ is set, MARIA_SHARE::intern_lock is taken.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)
+{
+ uint res;
+ if (share->options & HA_OPTION_READ_ONLY_DATA)
+ return 0;
+
+ if (pWrite & MA_STATE_INFO_WRITE_LOCK)
+ mysql_mutex_lock(&share->intern_lock);
+ else if (maria_multi_threaded && !share->temporary)
+ mysql_mutex_assert_owner(&share->intern_lock);
+ if (share->base.born_transactional && translog_status == TRANSLOG_OK &&
+ !maria_in_recovery)
+ {
+ /*
+ In a recovery, we want to set is_of_horizon to the LSN of the last
+ record executed by Recovery, not the current EOF of the log (which
+ is too new). Recovery does it by itself.
+ */
+ share->state.is_of_horizon= translog_get_horizon();
+ DBUG_PRINT("info", ("is_of_horizon set to LSN " LSN_FMT "",
+ LSN_IN_PARTS(share->state.is_of_horizon)));
+ }
+ res= _ma_state_info_write_sub(share->kfile.file, &share->state, pWrite);
+ if (pWrite & MA_STATE_INFO_WRITE_LOCK)
+ mysql_mutex_unlock(&share->intern_lock);
+ /* If open_count != 0 we have to write the state again at close */
+ share->changed= share->state.open_count != 0;
+ return res;
+}
+
+
+/**
+ @brief Function to save and store the header in the index file (.MYI).
+
+ Shortcut to use instead of _ma_state_info_write() when appropriate.
+
+ @param file descriptor of the index file to write
+ @param state state information to write to the file
+ @param pWrite bitmap: if 1 (MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)
+ is set my_pwrite() is used otherwise my_write();
+ if 2 (MA_STATE_INFO_WRITE_FULL_INFO) is set, info
+ about keys is written (should only be needed
+ after ALTER TABLE ENABLE/DISABLE KEYS, and
+ REPAIR/OPTIMIZE).
+
+ @notes
+ For transactional multiuser tables, this function is called
+ with intern_lock & translog_lock or when the last thread who
+ is using the table is closing it.
+ Because of the translog_lock we don't need to have a lock on
+ key_del_lock.
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_state_info_write_sub(File file, MARIA_STATE_INFO *state, uint pWrite)
+{
+ uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
+ uchar *ptr=buff;
+ uint i, keys= (uint) state->header.keys;
+ size_t res;
+ DBUG_ENTER("_ma_state_info_write_sub");
+ DBUG_PRINT("info", ("Records: %lld", state->state.records));
+
+ memcpy(ptr,&state->header,sizeof(state->header));
+ ptr+=sizeof(state->header);
+
+ /* open_count must be first because of _ma_mark_file_changed ! */
+ mi_int2store(ptr,state->open_count); ptr+= 2;
+ /* changed must be second, because of _ma_mark_file_crashed */
+ mi_int2store(ptr,state->changed); ptr+= 2;
+
+ /*
+ If you change the offset of these LSNs, note that some functions do a
+ direct write of them without going through this function.
+ */
+ lsn_store(ptr, state->create_rename_lsn); ptr+= LSN_STORE_SIZE;
+ lsn_store(ptr, state->is_of_horizon); ptr+= LSN_STORE_SIZE;
+ lsn_store(ptr, state->skip_redo_lsn); ptr+= LSN_STORE_SIZE;
+ mi_rowstore(ptr,state->state.records); ptr+= 8;
+ mi_rowstore(ptr,state->state.del); ptr+= 8;
+ mi_rowstore(ptr,state->split); ptr+= 8;
+ mi_sizestore(ptr,state->dellink); ptr+= 8;
+ mi_sizestore(ptr,state->first_bitmap_with_space); ptr+= 8;
+ mi_sizestore(ptr,state->state.key_file_length); ptr+= 8;
+ mi_sizestore(ptr,state->state.data_file_length); ptr+= 8;
+ mi_sizestore(ptr,state->state.empty); ptr+= 8;
+ mi_sizestore(ptr,state->state.key_empty); ptr+= 8;
+ mi_int8store(ptr,state->auto_increment); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->state.checksum); ptr+= 8;
+ mi_int8store(ptr,state->create_trid); ptr+= 8;
+ mi_int4store(ptr,state->status); ptr+= 4;
+ mi_int4store(ptr,state->update_count); ptr+= 4;
+ *ptr++= state->sortkey;
+ *ptr++= 0; /* Reserved */
+ ptr+= state->state_diff_length;
+
+ for (i=0; i < keys; i++)
+ {
+ mi_sizestore(ptr,state->key_root[i]); ptr+= 8;
+ }
+ mi_sizestore(ptr,state->key_del); ptr+= 8;
+ if (pWrite & MA_STATE_INFO_WRITE_FULL_INFO) /* From maria_chk */
+ {
+ uint key_parts= mi_uint2korr(state->header.key_parts);
+ mi_int4store(ptr,state->sec_index_changed); ptr+= 4;
+ mi_int4store(ptr,state->sec_index_used); ptr+= 4;
+ mi_int4store(ptr,state->version); ptr+= 4;
+ mi_int8store(ptr,state->key_map); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->create_time); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->recover_time); ptr+= 8;
+ mi_int8store(ptr,(ulonglong) state->check_time); ptr+= 8;
+ mi_sizestore(ptr, state->records_at_analyze); ptr+= 8;
+ /* reserve place for some information per key */
+ bzero(ptr, keys*4); ptr+= keys*4;
+ for (i=0 ; i < key_parts ; i++)
+ {
+ float8store(ptr, state->rec_per_key_part[i]); ptr+= 8;
+ mi_int4store(ptr, state->nulls_per_key_part[i]); ptr+= 4;
+ }
+ }
+
+ res= (pWrite & MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET) ?
+ mysql_file_pwrite(file, buff, (size_t) (ptr-buff), 0L,
+ MYF(MY_NABP | MY_THREADSAFE)) :
+ mysql_file_write(file, buff, (size_t) (ptr-buff),
+ MYF(MY_NABP));
+ DBUG_RETURN(res != 0);
+}
+
+
+static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state, myf flag)
+{
+ uint i,keys,key_parts;
+ DBUG_ENTER("_ma_state_info_read");
+
+ memcpy(&state->header,ptr, sizeof(state->header));
+ ptr+= sizeof(state->header);
+ keys= (uint) state->header.keys;
+ key_parts= mi_uint2korr(state->header.key_parts);
+
+ /* Allocate memory for key parts if not already done */
+ if (!state->rec_per_key_part &&
+ !my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | flag),
+ &state->rec_per_key_part,
+ sizeof(*state->rec_per_key_part) * key_parts,
+ &state->nulls_per_key_part,
+ sizeof(*state->nulls_per_key_part) * key_parts,
+ NullS))
+ DBUG_RETURN(0);
+
+ state->open_count = mi_uint2korr(ptr); ptr+= 2;
+ state->changed= mi_uint2korr(ptr); ptr+= 2;
+ state->create_rename_lsn= lsn_korr(ptr); ptr+= LSN_STORE_SIZE;
+ state->is_of_horizon= lsn_korr(ptr); ptr+= LSN_STORE_SIZE;
+ state->skip_redo_lsn= lsn_korr(ptr); ptr+= LSN_STORE_SIZE;
+ state->state.records= mi_rowkorr(ptr); ptr+= 8;
+ state->state.del = mi_rowkorr(ptr); ptr+= 8;
+ state->split = mi_rowkorr(ptr); ptr+= 8;
+ state->dellink= mi_sizekorr(ptr); ptr+= 8;
+ state->first_bitmap_with_space= mi_sizekorr(ptr); ptr+= 8;
+ state->state.key_file_length = mi_sizekorr(ptr); ptr+= 8;
+ state->state.data_file_length= mi_sizekorr(ptr); ptr+= 8;
+ state->state.empty = mi_sizekorr(ptr); ptr+= 8;
+ state->state.key_empty= mi_sizekorr(ptr); ptr+= 8;
+ state->auto_increment=mi_uint8korr(ptr); ptr+= 8;
+ state->state.checksum=(ha_checksum) mi_uint8korr(ptr);ptr+= 8;
+ state->create_trid= mi_uint8korr(ptr); ptr+= 8;
+ state->status = mi_uint4korr(ptr); ptr+= 4;
+ state->update_count=mi_uint4korr(ptr); ptr+= 4;
+ state->sortkey= (uint) *ptr++;
+ ptr++; /* reserved */
+
+ ptr+= state->state_diff_length;
+
+ for (i=0; i < keys; i++)
+ {
+ state->key_root[i]= mi_sizekorr(ptr); ptr+= 8;
+ }
+ state->key_del= mi_sizekorr(ptr); ptr+= 8;
+ state->sec_index_changed = mi_uint4korr(ptr); ptr+= 4;
+ state->sec_index_used = mi_uint4korr(ptr); ptr+= 4;
+ state->version = mi_uint4korr(ptr); ptr+= 4;
+ state->key_map = mi_uint8korr(ptr); ptr+= 8;
+ state->create_time = (time_t) mi_sizekorr(ptr); ptr+= 8;
+ state->recover_time =(time_t) mi_sizekorr(ptr); ptr+= 8;
+ state->check_time = (time_t) mi_sizekorr(ptr); ptr+= 8;
+ state->records_at_analyze= mi_sizekorr(ptr); ptr+= 8;
+ ptr+= keys * 4; /* Skip reserved bytes */
+ for (i=0 ; i < key_parts ; i++)
+ {
+ float8get(state->rec_per_key_part[i], ptr); ptr+= 8;
+ state->nulls_per_key_part[i]= mi_uint4korr(ptr); ptr+= 4;
+ }
+
+ DBUG_PRINT("info", ("Records: %lld", state->state.records));
+ DBUG_RETURN(ptr);
+}
+
+
+/**
+ @brief Fills the state by reading its copy on disk.
+
+ Should not be called for transactional tables, as their state on disk is
+ rarely current and so is often misleading for a reader.
+ Does nothing in single user mode.
+
+ @param file file to read from
+ @param state state which will be filled
+*/
+
+uint _ma_state_info_read_dsk(File file __attribute__((unused)),
+ MARIA_STATE_INFO *state __attribute__((unused)))
+{
+#ifdef MARIA_EXTERNAL_LOCKING
+ uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
+
+ /* trick to detect transactional tables */
+ DBUG_ASSERT(state->create_rename_lsn == LSN_IMPOSSIBLE);
+ if (!maria_single_user)
+ {
+ if (mysql_file_pread(file, buff, state->state_length, 0L, MYF(MY_NABP)))
+ return 1;
+ _ma_state_info_read(buff, state);
+ }
+#endif
+ return 0;
+}
+
+
+/****************************************************************************
+** store MARIA_BASE_INFO
+****************************************************************************/
+
+uint _ma_base_info_write(File file, MARIA_BASE_INFO *base)
+{
+ uchar buff[MARIA_BASE_INFO_SIZE], *ptr=buff;
+
+ bmove(ptr, maria_uuid, MY_UUID_SIZE);
+ ptr+= MY_UUID_SIZE;
+ mi_sizestore(ptr,base->keystart); ptr+= 8;
+ mi_sizestore(ptr,base->max_data_file_length); ptr+= 8;
+ mi_sizestore(ptr,base->max_key_file_length); ptr+= 8;
+ mi_rowstore(ptr,base->records); ptr+= 8;
+ mi_rowstore(ptr,base->reloc); ptr+= 8;
+ mi_int4store(ptr,base->mean_row_length); ptr+= 4;
+ mi_int4store(ptr,base->reclength); ptr+= 4;
+ mi_int4store(ptr,base->pack_reclength); ptr+= 4;
+ mi_int4store(ptr,base->min_pack_length); ptr+= 4;
+ mi_int4store(ptr,base->max_pack_length); ptr+= 4;
+ mi_int4store(ptr,base->min_block_length); ptr+= 4;
+ mi_int2store(ptr,base->fields); ptr+= 2;
+ mi_int2store(ptr,base->fixed_not_null_fields); ptr+= 2;
+ mi_int2store(ptr,base->fixed_not_null_fields_length); ptr+= 2;
+ mi_int2store(ptr,base->max_field_lengths); ptr+= 2;
+ mi_int2store(ptr,base->pack_fields); ptr+= 2;
+ mi_int2store(ptr,base->extra_options) ptr+= 2;
+ mi_int2store(ptr,base->null_bytes); ptr+= 2;
+ mi_int2store(ptr,base->original_null_bytes); ptr+= 2;
+ mi_int2store(ptr,base->field_offsets); ptr+= 2;
+ mi_int2store(ptr,base->language); ptr+= 2;
+ mi_int2store(ptr,base->block_size); ptr+= 2;
+ *ptr++= base->rec_reflength;
+ *ptr++= base->key_reflength;
+ *ptr++= base->keys;
+ *ptr++= base->auto_key;
+ *ptr++= base->born_transactional;
+ *ptr++= base->compression_algorithm;
+ mi_int2store(ptr,base->pack_bytes); ptr+= 2;
+ mi_int2store(ptr,base->blobs); ptr+= 2;
+ mi_int2store(ptr,base->max_key_block_length); ptr+= 2;
+ mi_int2store(ptr,base->max_key_length); ptr+= 2;
+ mi_int2store(ptr,base->extra_alloc_bytes); ptr+= 2;
+ *ptr++= base->extra_alloc_procent;
+ mi_int3store(ptr, base->s3_block_size); ptr+= 3;
+ bzero(ptr,13); ptr+= 13; /* extra */
+ DBUG_ASSERT((ptr - buff) == MARIA_BASE_INFO_SIZE);
+ return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+
+/*--------------------------------------------------------------------------
+ maria_keydef
+---------------------------------------------------------------------------*/
+
+my_bool _ma_keydef_write(File file, MARIA_KEYDEF *keydef)
+{
+ uchar buff[MARIA_KEYDEF_SIZE];
+ uchar *ptr=buff;
+
+ *ptr++= (uchar) keydef->keysegs;
+ *ptr++= keydef->key_alg; /* Rtree or Btree */
+ mi_int2store(ptr,keydef->flag); ptr+= 2;
+ mi_int2store(ptr,keydef->block_length); ptr+= 2;
+ mi_int2store(ptr,keydef->keylength); ptr+= 2;
+ mi_int2store(ptr,keydef->minlength); ptr+= 2;
+ mi_int2store(ptr,keydef->maxlength); ptr+= 2;
+ return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+uchar *_ma_keydef_read(uchar *ptr, MARIA_KEYDEF *keydef)
+{
+ keydef->keysegs = (uint) *ptr++;
+ keydef->key_alg = *ptr++; /* Rtree or Btree */
+
+ keydef->flag = mi_uint2korr(ptr); ptr+= 2;
+ keydef->block_length = mi_uint2korr(ptr); ptr+= 2;
+ keydef->keylength = mi_uint2korr(ptr); ptr+= 2;
+ keydef->minlength = mi_uint2korr(ptr); ptr+= 2;
+ keydef->maxlength = mi_uint2korr(ptr); ptr+= 2;
+ keydef->version = 0; /* Not saved */
+ keydef->parser = &ft_default_parser;
+ keydef->ftkey_nr = 0;
+ return ptr;
+}
+
+/***************************************************************************
+** maria_keyseg
+***************************************************************************/
+
+my_bool _ma_keyseg_write(File file, const HA_KEYSEG *keyseg)
+{
+ uchar buff[HA_KEYSEG_SIZE];
+ uchar *ptr=buff;
+ ulong pos;
+
+ *ptr++= keyseg->type;
+ *ptr++= keyseg->language & 0xFF; /* Collation ID, low byte */
+ *ptr++= keyseg->null_bit;
+ *ptr++= keyseg->bit_start;
+ *ptr++= keyseg->language >> 8; /* Collation ID, high byte */
+ *ptr++= keyseg->bit_length;
+ mi_int2store(ptr,keyseg->flag); ptr+= 2;
+ mi_int2store(ptr,keyseg->length); ptr+= 2;
+ mi_int4store(ptr,keyseg->start); ptr+= 4;
+ pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos;
+ mi_int4store(ptr, pos);
+ ptr+=4;
+
+ return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+
+uchar *_ma_keyseg_read(uchar *ptr, HA_KEYSEG *keyseg)
+{
+ keyseg->type = *ptr++;
+ keyseg->language = *ptr++;
+ keyseg->null_bit = *ptr++;
+ keyseg->bit_start = *ptr++;
+ keyseg->language += ((uint16) (*ptr++)) << 8;
+ keyseg->bit_length = *ptr++;
+ keyseg->flag = mi_uint2korr(ptr); ptr+= 2;
+ keyseg->length = mi_uint2korr(ptr); ptr+= 2;
+ keyseg->start = mi_uint4korr(ptr); ptr+= 4;
+ keyseg->null_pos = mi_uint4korr(ptr); ptr+= 4;
+ keyseg->charset=0; /* Will be filled in later */
+ if (keyseg->null_bit)
+ keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7));
+ else
+ {
+ keyseg->bit_pos= (uint16)keyseg->null_pos;
+ keyseg->null_pos= 0;
+ }
+ return ptr;
+}
+
+/*--------------------------------------------------------------------------
+ maria_uniquedef
+---------------------------------------------------------------------------*/
+
+my_bool _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *def)
+{
+ uchar buff[MARIA_UNIQUEDEF_SIZE];
+ uchar *ptr=buff;
+
+ mi_int2store(ptr,def->keysegs); ptr+=2;
+ *ptr++= (uchar) def->key;
+ *ptr++ = (uchar) def->null_are_equal;
+
+ return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+uchar *_ma_uniquedef_read(uchar *ptr, MARIA_UNIQUEDEF *def)
+{
+ def->keysegs = mi_uint2korr(ptr);
+ def->key = ptr[2];
+ def->null_are_equal=ptr[3];
+ return ptr+4; /* 1 extra uchar */
+}
+
+/***************************************************************************
+** MARIA_COLUMNDEF
+***************************************************************************/
+
+my_bool _ma_columndef_write(File file, MARIA_COLUMNDEF *columndef)
+{
+ uchar buff[MARIA_COLUMNDEF_SIZE];
+ uchar *ptr=buff;
+ uint low_offset= (uint) (columndef->offset & 0xffff);
+ uint high_offset= (uint) (columndef->offset >> 16);
+
+ mi_int2store(ptr,(ulong) columndef->column_nr); ptr+= 2;
+ mi_int2store(ptr, low_offset); ptr+= 2;
+ mi_int2store(ptr,columndef->type); ptr+= 2;
+ mi_int2store(ptr,columndef->length); ptr+= 2;
+ mi_int2store(ptr,columndef->fill_length); ptr+= 2;
+ mi_int2store(ptr,columndef->null_pos); ptr+= 2;
+ mi_int2store(ptr,columndef->empty_pos); ptr+= 2;
+
+ (*ptr++)= columndef->null_bit;
+ (*ptr++)= columndef->empty_bit;
+ mi_int2store(ptr, high_offset); ptr+= 2;
+ ptr[0]= ptr[1]= 0; ptr+= 2; /* For future */
+ return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
+}
+
+uchar *_ma_columndef_read(uchar *ptr, MARIA_COLUMNDEF *columndef)
+{
+ uint high_offset;
+ columndef->column_nr= mi_uint2korr(ptr); ptr+= 2;
+ columndef->offset= mi_uint2korr(ptr); ptr+= 2;
+ columndef->type= mi_sint2korr(ptr); ptr+= 2;
+ columndef->length= mi_uint2korr(ptr); ptr+= 2;
+ columndef->fill_length= mi_uint2korr(ptr); ptr+= 2;
+ columndef->null_pos= mi_uint2korr(ptr); ptr+= 2;
+ columndef->empty_pos= mi_uint2korr(ptr); ptr+= 2;
+ columndef->null_bit= (uint8) *ptr++;
+ columndef->empty_bit= (uint8) *ptr++;
+ high_offset= mi_uint2korr(ptr); ptr+= 2;
+ columndef->offset|= ((ulong) high_offset << 16);
+ ptr+= 2;
+ return ptr;
+}
+
+my_bool _ma_column_nr_write(File file, uint16 *offsets, uint columns)
+{
+ uchar *buff, *ptr, *end;
+ size_t size= columns*2;
+ my_bool res;
+
+ if (!(buff= (uchar*) my_alloca(size)))
+ return 1;
+ for (ptr= buff, end= ptr + size; ptr < end ; ptr+= 2, offsets++)
+ int2store(ptr, *offsets);
+ res= mysql_file_write(file, buff, size, MYF(MY_NABP)) != 0;
+ my_afree(buff);
+ return res;
+}
+
+
+uchar *_ma_column_nr_read(uchar *ptr, uint16 *offsets, uint columns)
+{
+ uchar *end;
+ size_t size= columns*2;
+ for (end= ptr + size; ptr < end ; ptr+=2, offsets++)
+ *offsets= uint2korr(ptr);
+ return ptr;
+}
+
+/**
+ @brief Set callbacks for data pages
+
+ @note
+ We don't use pagecache_file_init here, as we want to keep the
+ code readable
+*/
+
+void _ma_set_data_pagecache_callbacks(PAGECACHE_FILE *file,
+ MARIA_SHARE *share)
+{
+ pagecache_file_set_null_hooks(file);
+ file->callback_data= (uchar*) share;
+ file->flush_log_callback= &maria_flush_log_for_page_none; /* Do nothing */
+ file->post_write_hook= maria_page_write_failure;
+
+ if (share->temporary)
+ {
+ file->post_read_hook= &maria_page_crc_check_none;
+ file->pre_write_hook= &maria_page_filler_set_none;
+ }
+ else
+ {
+ file->post_read_hook= &maria_page_crc_check_data;
+ if (share->options & HA_OPTION_PAGE_CHECKSUM)
+ file->pre_write_hook= &maria_page_crc_set_normal;
+ else
+ file->pre_write_hook= &maria_page_filler_set_normal;
+ if (share->now_transactional)
+ file->flush_log_callback= maria_flush_log_for_page;
+ }
+
+ if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
+ {
+ ma_crypt_set_data_pagecache_callbacks(file, share);
+ }
+}
+
+
+/**
+ @brief Set callbacks for index pages
+
+ @note
+ We don't use pagecache_file_init here, as we want to keep the
+ code readable
+*/
+
+void _ma_set_index_pagecache_callbacks(PAGECACHE_FILE *file,
+ MARIA_SHARE *share)
+{
+ pagecache_file_set_null_hooks(file);
+ file->callback_data= (uchar*) share;
+ file->flush_log_callback= &maria_flush_log_for_page_none; /* Do nothing */
+ file->post_write_hook= maria_page_write_failure;
+
+ if (share->temporary)
+ {
+ file->post_read_hook= &maria_page_crc_check_none;
+ file->pre_write_hook= &maria_page_filler_set_none;
+ }
+ else
+ {
+ file->post_read_hook= &maria_page_crc_check_index;
+ if (share->options & HA_OPTION_PAGE_CHECKSUM)
+ file->pre_write_hook= &maria_page_crc_set_index;
+ else
+ file->pre_write_hook= &maria_page_filler_set_normal;
+
+ if (share->now_transactional)
+ file->flush_log_callback= maria_flush_log_for_page;
+ }
+
+ if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
+ {
+ ma_crypt_set_index_pagecache_callbacks(file, share);
+ }
+}
+
+
+/**************************************************************************
+ Open data file
+ We can't use dup() here as the data file descriptors need to have different
+ active seek-positions.
+*************************************************************************/
+
+int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share)
+{
+ myf flags= (share->mode & O_NOFOLLOW) ? MY_NOSYMLINKS | MY_WME : MY_WME;
+ if (share->temporary)
+ flags|= MY_THREAD_SPECIFIC;
+ DEBUG_SYNC_C("mi_open_datafile");
+ info->dfile.file= share->bitmap.file.file=
+ mysql_file_open(key_file_dfile, share->data_file_name.str,
+ share->mode | O_SHARE | O_CLOEXEC, flags);
+ return info->dfile.file >= 0 ? 0 : 1;
+}
+
+
+int _ma_open_keyfile(MARIA_SHARE *share)
+{
+ /*
+ Modifications to share->kfile should be under intern_lock to protect
+ against a concurrent checkpoint.
+ */
+ mysql_mutex_lock(&share->intern_lock);
+ share->kfile.file= mysql_file_open(key_file_kfile,
+ share->unique_file_name.str,
+ share->mode | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
+ MYF(MY_WME | MY_NOSYMLINKS));
+ mysql_mutex_unlock(&share->intern_lock);
+ return (share->kfile.file < 0);
+}
+
+
+/*
+ Disable all indexes.
+
+ SYNOPSIS
+ maria_disable_indexes()
+ info A pointer to the MARIA storage engine MARIA_HA struct.
+
+ DESCRIPTION
+ Disable all indexes.
+
+ RETURN
+ 0 ok
+*/
+
+int maria_disable_indexes(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+
+ maria_clear_all_keys_active(share->state.key_map);
+ return 0;
+}
+
+
+/*
+ Enable all indexes
+
+ SYNOPSIS
+ maria_enable_indexes()
+ info A pointer to the MARIA storage engine MARIA_HA struct.
+
+ DESCRIPTION
+ Enable all indexes. The indexes might have been disabled
+ by maria_disable_index() before.
+ The function works only if both data and indexes are empty,
+ otherwise a repair is required.
+ To be sure, call handler::delete_all_rows() before.
+
+ RETURN
+ 0 ok
+ HA_ERR_CRASHED data or index is non-empty.
+*/
+
+int maria_enable_indexes(MARIA_HA *info)
+{
+ int error= 0;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("maria_enable_indexes");
+
+ if ((share->state.state.data_file_length !=
+ (share->data_file_type == BLOCK_RECORD ? share->block_size : 0)) ||
+ (share->state.state.key_file_length != share->base.keystart))
+ {
+ DBUG_PRINT("error", ("data_file_length: %lu key_file_length: %lu",
+ (ulong) share->state.state.data_file_length,
+ (ulong) share->state.state.key_file_length));
+ _ma_set_fatal_error(info, HA_ERR_CRASHED);
+ error= HA_ERR_CRASHED;
+ }
+ else
+ maria_set_all_keys_active(share->state.key_map, share->base.keys);
+ DBUG_RETURN(error);
+}
+
+
+/*
+ Test if indexes are disabled.
+
+ SYNOPSIS
+ maria_indexes_are_disabled()
+ info A pointer to the MARIA storage engine MARIA_HA struct.
+
+ DESCRIPTION
+ Test if indexes are disabled.
+
+ RETURN
+ 0 indexes are not disabled
+ 1 all indexes are disabled
+ 2 non-unique indexes are disabled
+*/
+
+int maria_indexes_are_disabled(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+
+ /*
+ No keys or all are enabled. keys is the number of keys. Left shifted
+ gives us only one bit set. When decreased by one, gives us all all bits
+ up to this one set and it gets unset.
+ */
+ if (!share->base.keys ||
+ (maria_is_all_keys_active(share->state.key_map, share->base.keys)))
+ return 0;
+
+ /* All are disabled */
+ if (maria_is_any_key_active(share->state.key_map))
+ return 1;
+
+ /*
+ We have keys. Some enabled, some disabled.
+ Don't check for any non-unique disabled but return directly 2
+ */
+ return 2;
+}
+
+
+static my_bool maria_scan_init_dummy(MARIA_HA *info __attribute__((unused)))
+{
+ return 0;
+}
+
+static void maria_scan_end_dummy(MARIA_HA *info __attribute__((unused)))
+{
+}
+
+static my_bool maria_once_init_dummy(MARIA_SHARE *share
+ __attribute__((unused)),
+ File dfile __attribute__((unused)))
+{
+ return 0;
+}
+
+static my_bool maria_once_end_dummy(MARIA_SHARE *share __attribute__((unused)))
+{
+ return 0;
+}