summaryrefslogtreecommitdiffstats
path: root/storage/tokudb/ha_tokudb_alter_56.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/tokudb/ha_tokudb_alter_56.cc')
-rw-r--r--storage/tokudb/ha_tokudb_alter_56.cc1646
1 files changed, 1646 insertions, 0 deletions
diff --git a/storage/tokudb/ha_tokudb_alter_56.cc b/storage/tokudb/ha_tokudb_alter_56.cc
new file mode 100644
index 00000000..233d929a
--- /dev/null
+++ b/storage/tokudb/ha_tokudb_alter_56.cc
@@ -0,0 +1,1646 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
+#ident "$Id$"
+/*======
+This file is part of TokuDB
+
+
+Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
+
+ TokuDBis is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License, version 2,
+ as published by the Free Software Foundation.
+
+ TokuDB is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with TokuDB. If not, see <http://www.gnu.org/licenses/>.
+
+======= */
+
+#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
+
+#if TOKU_INCLUDE_ALTER_56
+
+#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 101099
+#define TOKU_ALTER_RENAME ALTER_RENAME
+#define DYNAMIC_ARRAY_ELEMENTS_TYPE size_t
+#elif (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
+ (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
+#define TOKU_ALTER_RENAME ALTER_RENAME
+#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
+#elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
+#define TOKU_ALTER_RENAME ALTER_RENAME_56
+#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
+#else
+#error
+#endif
+
+#include "ha_tokudb_alter_common.cc"
+#include <sql_array.h>
+#include <sql_base.h>
+
+// The tokudb alter context contains the alter state that is set in the check if supported method and used
+// later when the alter operation is executed.
+class tokudb_alter_ctx : public inplace_alter_handler_ctx {
+public:
+ tokudb_alter_ctx() :
+ handler_flags(0),
+ alter_txn(NULL),
+ add_index_changed(false),
+ drop_index_changed(false),
+ reset_card(false),
+ compression_changed(false),
+ expand_varchar_update_needed(false),
+ expand_fixed_update_needed(false),
+ expand_blob_update_needed(false),
+ optimize_needed(false), changed_fields(PSI_INSTRUMENT_MEM),
+ table_kc_info(NULL),
+ altered_table_kc_info(NULL) {
+ }
+ ~tokudb_alter_ctx() {
+ if (altered_table_kc_info)
+ free_key_and_col_info(altered_table_kc_info);
+ }
+public:
+ ulong handler_flags;
+ DB_TXN* alter_txn;
+ bool add_index_changed;
+ bool incremented_num_DBs, modified_DBs;
+ bool drop_index_changed;
+ bool reset_card;
+ bool compression_changed;
+ enum toku_compression_method orig_compression_method;
+ bool expand_varchar_update_needed;
+ bool expand_fixed_update_needed;
+ bool expand_blob_update_needed;
+ bool optimize_needed;
+ Dynamic_array<uint> changed_fields;
+ KEY_AND_COL_INFO* table_kc_info;
+ KEY_AND_COL_INFO* altered_table_kc_info;
+ KEY_AND_COL_INFO altered_table_kc_info_base;
+};
+
+// Debug function to print out an alter table operation
+void ha_tokudb::print_alter_info(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info) {
+
+ TOKUDB_TRACE(
+ "***are keys of two tables same? %d",
+ tables_have_same_keys(table, altered_table, false, false));
+ if (ha_alter_info->handler_flags) {
+ TOKUDB_TRACE("***alter flags set ***");
+ for (int i = 0; i < 32; i++) {
+ if (ha_alter_info->handler_flags & (1 << i))
+ TOKUDB_TRACE("%d", i);
+ }
+ }
+
+ // everyone calculates data by doing some default_values - record[0], but
+ // I do not see why that is necessary
+ TOKUDB_TRACE("******");
+ TOKUDB_TRACE("***orig table***");
+ for (uint i = 0; i < table->s->fields; i++) {
+ //
+ // make sure to use table->field, and NOT table->s->field
+ //
+ Field* curr_field = table->field[i];
+ uint null_offset = get_null_offset(table, curr_field);
+ TOKUDB_TRACE(
+ "name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: "
+ "%d, is_null %d, pack_length %u",
+ curr_field->field_name.str,
+ curr_field->real_type(),
+ mysql_to_toku_type(curr_field),
+ curr_field->null_bit,
+ null_offset,
+ curr_field->real_maybe_null(),
+ curr_field->real_maybe_null() ?
+ table->s->default_values[null_offset] & curr_field->null_bit :
+ 0xffffffff,
+ curr_field->pack_length());
+ }
+ TOKUDB_TRACE("******");
+ TOKUDB_TRACE("***altered table***");
+ for (uint i = 0; i < altered_table->s->fields; i++) {
+ Field* curr_field = altered_table->field[i];
+ uint null_offset = get_null_offset(altered_table, curr_field);
+ TOKUDB_TRACE(
+ "name: %s, types: %u %u, nullable: %d, null_offset: %d, "
+ "is_null_field: %d, is_null %d, pack_length %u",
+ curr_field->field_name.str,
+ curr_field->real_type(),
+ mysql_to_toku_type(curr_field),
+ curr_field->null_bit,
+ null_offset,
+ curr_field->real_maybe_null(),
+ curr_field->real_maybe_null() ?
+ altered_table->s->default_values[null_offset] &
+ curr_field->null_bit : 0xffffffff,
+ curr_field->pack_length());
+ }
+ TOKUDB_TRACE("******");
+}
+
+// Given two tables with equal number of fields, find all of the fields with
+// different types and return the indexes of the different fields in the
+// changed_fields array. This function ignores field name differences.
+static int find_changed_fields(
+ TABLE* table_a,
+ TABLE* table_b,
+ Dynamic_array<uint>& changed_fields) {
+
+ for (uint i = 0; i < table_a->s->fields; i++) {
+ Field* field_a = table_a->field[i];
+ Field* field_b = table_b->field[i];
+ if (!fields_are_same_type(field_a, field_b))
+ changed_fields.append(i);
+ }
+ return changed_fields.elements();
+}
+
+static bool change_length_is_supported(TABLE* table,
+ TABLE* altered_table,
+ tokudb_alter_ctx* ctx);
+
+static bool change_type_is_supported(TABLE* table,
+ TABLE* altered_table,
+ tokudb_alter_ctx* ctx);
+
+// The ha_alter_info->handler_flags can not be trusted.
+// This function maps the bogus handler flags to something we like.
+static ulong fix_handler_flags(
+ THD* thd,
+ TABLE* table,
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info) {
+
+ ulong handler_flags = ha_alter_info->handler_flags;
+
+#if 100000 <= MYSQL_VERSION_ID
+ // This is automatically supported, hide the flag from later checks
+ handler_flags &= ~ALTER_PARTITIONED;
+#endif
+
+ // workaround for fill_alter_inplace_info bug (#5193)
+ // the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a
+ // column addition that does not change the keys.
+ // the following code turns the ADD_INDEX and DROP_INDEX flags so that
+ // we can do hot column addition later.
+ if (handler_flags &
+ (ALTER_ADD_COLUMN + ALTER_DROP_COLUMN)) {
+ if (handler_flags &
+ (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX)) {
+ if (tables_have_same_keys(
+ table,
+ altered_table,
+ tokudb::sysvars::alter_print_error(thd) != 0, false)) {
+ handler_flags &=
+ ~(ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
+ ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX);
+ }
+ }
+ }
+
+ // always allow rename table + any other operation, so turn off the
+ // rename flag
+ handler_flags &= ~ALTER_RENAME;
+
+ // ALTER_STORED_COLUMN_TYPE may be set when no columns have been changed,
+ // so turn off the flag
+ if (handler_flags & ALTER_STORED_COLUMN_TYPE) {
+ if (all_fields_are_same_type(table, altered_table)) {
+ handler_flags &= ~ALTER_STORED_COLUMN_TYPE;
+ }
+ }
+
+ return handler_flags;
+}
+
+// Require that there is no intersection of add and drop names.
+static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) {
+ for (uint d = 0; d < ha_alter_info->index_drop_count; d++) {
+ KEY* drop_key = ha_alter_info->index_drop_buffer[d];
+ for (uint a = 0; a < ha_alter_info->index_add_count; a++) {
+ KEY* add_key =
+ &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]];
+ if (strcmp(drop_key->name.str, add_key->name.str) == 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// Return true if some bit in mask is set and no bit in ~mask is set,
+// otherwise return false.
+static bool only_flags(ulong bits, ulong mask) {
+ return (bits & mask) != 0 && (bits & ~mask) == 0;
+}
+
+// Table create options that should be ignored by TokuDB
+// There are 25 total create options defined by mysql server (see handler.h),
+// and only 4 options will touch engine data, either rebuild engine data or
+// just update meta info:
+// 1. HA_CREATE_USED_AUTO update auto_inc info
+// 2. HA_CREATE_USED_CHARSET rebuild table if contains character columns
+// 3. HA_CREATE_USED_ENGINE rebuild table
+// 4. HA_CREATE_USED_ROW_FORMAT update compression method info
+//
+// All the others are either not supported by TokuDB or no need to
+// touch engine data.
+static constexpr uint32_t TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS =
+ HA_CREATE_USED_RAID | // deprecated field
+ HA_CREATE_USED_UNION | // for MERGE table
+ HA_CREATE_USED_INSERT_METHOD | // for MERGE table
+ HA_CREATE_USED_MIN_ROWS | // for MEMORY table
+ HA_CREATE_USED_MAX_ROWS | // for NDB table
+ HA_CREATE_USED_AVG_ROW_LENGTH | // for MyISAM table
+ HA_CREATE_USED_PACK_KEYS | // for MyISAM table
+ HA_CREATE_USED_DEFAULT_CHARSET | // no need to rebuild
+ HA_CREATE_USED_DATADIR | // ignored by alter
+ HA_CREATE_USED_INDEXDIR | // ignored by alter
+ HA_CREATE_USED_CHECKSUM | // for MyISAM table
+ HA_CREATE_USED_DELAY_KEY_WRITE | // for MyISAM table
+ HA_CREATE_USED_COMMENT | // no need to rebuild
+ HA_CREATE_USED_PASSWORD | // not supported by community version
+ HA_CREATE_USED_CONNECTION | // for FEDERATED table
+ HA_CREATE_USED_KEY_BLOCK_SIZE | // not supported by TokuDB
+ HA_CREATE_USED_TRANSACTIONAL | // unused
+ HA_CREATE_USED_PAGE_CHECKSUM | // unsued
+ HA_CREATE_USED_STATS_PERSISTENT | // not supported by TokuDB
+ HA_CREATE_USED_STATS_AUTO_RECALC | // not supported by TokuDB
+ HA_CREATE_USED_STATS_SAMPLE_PAGES; // not supported by TokuDB
+
+// Check if an alter table operation on this table and described by the alter
+// table parameters is supported inplace and if so, what type of locking is
+// needed to execute it. return values:
+
+// HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an
+// inplace operation, a table copy is required
+
+// HA_ALTER_ERROR: the alter table operation should fail
+
+// HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X
+
+// HA_ALTER_INPLACE_COPY_LOCK: prepare runs with MDL X,
+// alter runs with MDL SNW
+
+// HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW,
+// concurrent reads, no writes
+
+// HA_ALTER_INPLACE_COPY_NO_LOCK: prepare runs with MDL X,
+// alter runs with MDL SW
+
+// HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW,
+// concurrent reads, writes.
+// must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid
+// deadlocks with the MDL lock and the table lock
+enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info) {
+
+ TOKUDB_HANDLER_DBUG_ENTER("");
+
+ if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
+ print_alter_info(altered_table, ha_alter_info);
+ }
+
+ // default is NOT inplace
+ enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED;
+ THD* thd = ha_thd();
+
+ // setup context
+ tokudb_alter_ctx* ctx = new tokudb_alter_ctx;
+ ha_alter_info->handler_ctx = ctx;
+ ctx->handler_flags =
+ fix_handler_flags(thd, table, altered_table, ha_alter_info);
+ ctx->table_kc_info = &share->kc_info;
+ ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base;
+ memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO));
+
+ if (tokudb::sysvars::disable_hot_alter(thd)) {
+ ; // do nothing
+ } else if (only_flags(
+ ctx->handler_flags,
+ ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX +
+ ALTER_DROP_UNIQUE_INDEX +
+ ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
+ ALTER_ADD_UNIQUE_INDEX)) {
+ // add or drop index
+ if (table->s->null_bytes == altered_table->s->null_bytes &&
+ (ha_alter_info->index_add_count > 0 ||
+ ha_alter_info->index_drop_count > 0) &&
+ !tables_have_same_keys(
+ table,
+ altered_table,
+ tokudb::sysvars::alter_print_error(thd) != 0, false) &&
+ is_disjoint_add_drop(ha_alter_info)) {
+
+ if (ctx->handler_flags &
+ (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX +
+ ALTER_DROP_UNIQUE_INDEX)) {
+ // the fractal tree can not handle dropping an index concurrent
+ // with querying with the index.
+ // we grab an exclusive MDL for the drop index.
+ result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
+ } else {
+ /* FIXME: MDEV-16099 Use alter algorithm=nocopy
+ or algorithm=instant for non-InnoDB engine */
+ result = HA_ALTER_INPLACE_COPY_LOCK;
+
+ // someday, allow multiple hot indexes via alter table add key.
+ // don't forget to change the store_lock function.
+ // for now, hot indexing is only supported via session variable
+ // with the create index sql command
+ if (ha_alter_info->index_add_count == 1 &&
+ // only one add or drop
+ ha_alter_info->index_drop_count == 0 &&
+ // must be add index not add unique index
+ ctx->handler_flags == ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX &&
+ // must be a create index command
+ thd_sql_command(thd) == SQLCOM_CREATE_INDEX &&
+ // must be enabled
+ tokudb::sysvars::create_index_online(thd)) {
+ // external_lock set WRITE_ALLOW_WRITE which allows writes
+ // concurrent with the index creation
+ /* FIXME: MDEV-16099 Use alter algorithm=nocopy
+ or algorithm=instant for non-InnoDB engine */
+ result = HA_ALTER_INPLACE_COPY_NO_LOCK;
+ }
+ }
+ }
+ } else if (only_flags(
+ ctx->handler_flags,
+ ALTER_COLUMN_DEFAULT)) {
+ // column default
+ if (table->s->null_bytes == altered_table->s->null_bytes)
+ result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
+ } else if (ctx->handler_flags & ALTER_COLUMN_NAME &&
+ only_flags(
+ ctx->handler_flags,
+ ALTER_COLUMN_NAME |
+ ALTER_COLUMN_DEFAULT)) {
+ // column rename
+ // we have identified a possible column rename,
+ // but let's do some more checks
+
+ // we will only allow an hcr if there are no changes
+ // in column positions (ALTER_STORED_COLUMN_ORDER is not set)
+
+ // now need to verify that one and only one column
+ // has changed only its name. If we find anything to
+ // the contrary, we don't allow it, also check indexes
+ if (table->s->null_bytes == altered_table->s->null_bytes) {
+ bool cr_supported =
+ column_rename_supported(
+ table,
+ altered_table,
+ (ctx->handler_flags &
+ ALTER_STORED_COLUMN_ORDER) != 0);
+ if (cr_supported)
+ result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
+ }
+ } else if (ctx->handler_flags & ALTER_ADD_COLUMN &&
+ only_flags(
+ ctx->handler_flags,
+ ALTER_ADD_COLUMN |
+ ALTER_STORED_COLUMN_ORDER) &&
+ setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
+
+ // add column
+ uint32_t added_columns[altered_table->s->fields];
+ uint32_t num_added_columns = 0;
+ int r =
+ find_changed_columns(
+ added_columns,
+ &num_added_columns,
+ table,
+ altered_table);
+ if (r == 0) {
+ if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
+ for (uint32_t i = 0; i < num_added_columns; i++) {
+ uint32_t curr_added_index = added_columns[i];
+ Field* curr_added_field =
+ altered_table->field[curr_added_index];
+ TOKUDB_TRACE(
+ "Added column: index %d, name %s",
+ curr_added_index,
+ curr_added_field->field_name.str);
+ }
+ }
+ result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
+ }
+ } else if (ctx->handler_flags & ALTER_DROP_COLUMN &&
+ only_flags(
+ ctx->handler_flags,
+ ALTER_DROP_COLUMN |
+ ALTER_STORED_COLUMN_ORDER) &&
+ setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
+
+ // drop column
+ uint32_t dropped_columns[table->s->fields];
+ uint32_t num_dropped_columns = 0;
+ int r =
+ find_changed_columns(
+ dropped_columns,
+ &num_dropped_columns,
+ altered_table,
+ table);
+ if (r == 0) {
+ if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
+ for (uint32_t i = 0; i < num_dropped_columns; i++) {
+ uint32_t curr_dropped_index = dropped_columns[i];
+ Field* curr_dropped_field = table->field[curr_dropped_index];
+ TOKUDB_TRACE(
+ "Dropped column: index %d, name %s",
+ curr_dropped_index,
+ curr_dropped_field->field_name.str);
+ }
+ }
+ result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
+ }
+ } else if ((ctx->handler_flags &
+ ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE) &&
+ only_flags(
+ ctx->handler_flags,
+ ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE |
+ ALTER_COLUMN_DEFAULT) &&
+ table->s->fields == altered_table->s->fields &&
+ find_changed_fields(
+ table,
+ altered_table,
+ ctx->changed_fields) > 0 &&
+ setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
+
+ // change column length
+ if (change_length_is_supported(table, altered_table, ctx)) {
+ result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
+ }
+ } else if ((ctx->handler_flags & ALTER_STORED_COLUMN_TYPE) &&
+ only_flags(
+ ctx->handler_flags,
+ ALTER_STORED_COLUMN_TYPE |
+ ALTER_COLUMN_DEFAULT) &&
+ table->s->fields == altered_table->s->fields &&
+ find_changed_fields(
+ table,
+ altered_table,
+ ctx->changed_fields) > 0 &&
+ setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
+
+ // change column type
+ if (change_type_is_supported(table, altered_table, ctx)) {
+ result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
+ }
+ } else if (only_flags(
+ ctx->handler_flags,
+ ALTER_CHANGE_CREATE_OPTION)) {
+
+ HA_CREATE_INFO* create_info = ha_alter_info->create_info;
+#if TOKU_INCLUDE_OPTION_STRUCTS
+ // set the USED_ROW_FORMAT flag for use later in this file for changes in the table's
+ // compression
+ if (create_info->option_struct->row_format !=
+ table_share->option_struct->row_format)
+ create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT;
+#endif
+ // alter auto_increment
+ if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) {
+ // do a sanity check that the table is what we think it is
+ if (tables_have_same_keys_and_columns(
+ table,
+ altered_table,
+ tokudb::sysvars::alter_print_error(thd) != 0)) {
+ result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
+ }
+ } else if (only_flags(
+ create_info->used_fields,
+ HA_CREATE_USED_ROW_FORMAT)) {
+ // alter row_format
+ // do a sanity check that the table is what we think it is
+ if (tables_have_same_keys_and_columns(
+ table,
+ altered_table,
+ tokudb::sysvars::alter_print_error(thd) != 0)) {
+ result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
+ }
+ } else if (only_flags(
+ create_info->used_fields,
+ TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS)) {
+ result = HA_ALTER_INPLACE_COPY_NO_LOCK;
+ }
+ }
+#if TOKU_OPTIMIZE_WITH_RECREATE
+ else if (only_flags(
+ ctx->handler_flags,
+ ALTER_RECREATE_TABLE |
+ ALTER_COLUMN_DEFAULT)) {
+ ctx->optimize_needed = true;
+ /* FIXME: MDEV-16099 Use alter algorithm=nocopy
+ or algorithm=instant for non-InnoDB engine */
+ result = HA_ALTER_INPLACE_COPY_NO_LOCK;
+ }
+#endif
+
+ if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE)) &&
+ result != HA_ALTER_INPLACE_NOT_SUPPORTED &&
+ table->s->null_bytes != altered_table->s->null_bytes) {
+
+ TOKUDB_HANDLER_TRACE("q %s", thd->query());
+ TOKUDB_HANDLER_TRACE(
+ "null bytes %u -> %u",
+ table->s->null_bytes,
+ altered_table->s->null_bytes);
+ }
+
+ // turn a not supported result into an error if the slow alter table
+ // (copy) is disabled
+ if (result == HA_ALTER_INPLACE_NOT_SUPPORTED &&
+ tokudb::sysvars::disable_slow_alter(thd)) {
+ print_error(HA_ERR_UNSUPPORTED, MYF(0));
+ result = HA_ALTER_ERROR;
+ }
+
+ DBUG_RETURN(result);
+}
+
+// Prepare for the alter operations
+bool ha_tokudb::prepare_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table),
+ Alter_inplace_info* ha_alter_info) {
+ TOKUDB_HANDLER_DBUG_ENTER("");
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+ assert_always(transaction); // transaction must exist after table is locked
+ ctx->alter_txn = transaction;
+ bool result = false; // success
+ DBUG_RETURN(result);
+}
+
+// Execute the alter operations.
+bool ha_tokudb::inplace_alter_table(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info) {
+
+ TOKUDB_HANDLER_DBUG_ENTER("");
+
+ int error = 0;
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+ HA_CREATE_INFO* create_info = ha_alter_info->create_info;
+
+ // this should be enough to handle locking as the higher level MDL
+ // on this table should prevent any new analyze tasks.
+ share->cancel_background_jobs();
+
+ if (error == 0 &&
+ (ctx->handler_flags &
+ (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX |
+ ALTER_DROP_UNIQUE_INDEX))) {
+ error = alter_table_drop_index(ha_alter_info);
+ }
+ if (error == 0 &&
+ (ctx->handler_flags &
+ (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX +
+ ALTER_ADD_UNIQUE_INDEX))) {
+ error = alter_table_add_index(ha_alter_info);
+ }
+ if (error == 0 &&
+ (ctx->handler_flags &
+ (ALTER_ADD_COLUMN |
+ ALTER_DROP_COLUMN))) {
+ error = alter_table_add_or_drop_column(altered_table, ha_alter_info);
+ }
+ if (error == 0 &&
+ (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) &&
+ (create_info->used_fields & HA_CREATE_USED_AUTO)) {
+ error = write_auto_inc_create(
+ share->status_block,
+ create_info->auto_increment_value,
+ ctx->alter_txn);
+ }
+ if (error == 0 &&
+ (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) &&
+ (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) {
+ // Get the current compression
+ DB *db = share->key_file[0];
+ error = db->get_compression_method(db, &ctx->orig_compression_method);
+ assert_always(error == 0);
+
+ // Set the new compression
+#if TOKU_INCLUDE_OPTION_STRUCTS
+ toku_compression_method method =
+ row_format_to_toku_compression_method(
+ (tokudb::sysvars::row_format_t)create_info->option_struct->row_format);
+#else
+ toku_compression_method method =
+ row_type_to_toku_compression_method(create_info->row_type);
+#endif
+ uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
+ for (uint32_t i = 0; i < curr_num_DBs; i++) {
+ db = share->key_file[i];
+ error = db->change_compression_method(db, method);
+ if (error)
+ break;
+ ctx->compression_changed = true;
+ }
+ }
+
+ // note: only one column expansion is allowed
+
+ if (error == 0 && ctx->expand_fixed_update_needed)
+ error = alter_table_expand_columns(altered_table, ha_alter_info);
+
+ if (error == 0 && ctx->expand_varchar_update_needed)
+ error = alter_table_expand_varchar_offsets(
+ altered_table,
+ ha_alter_info);
+
+ if (error == 0 && ctx->expand_blob_update_needed)
+ error = alter_table_expand_blobs(altered_table, ha_alter_info);
+
+ if (error == 0 && ctx->reset_card) {
+ error = tokudb::alter_card(
+ share->status_block,
+ ctx->alter_txn,
+ table->s,
+ altered_table->s);
+ }
+ if (error == 0 && ctx->optimize_needed) {
+ error = do_optimize(ha_thd());
+ }
+
+
+#if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
+#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
+ (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
+#if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
+ if (error == 0 &&
+ (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL)) {
+#else
+ if (error == 0) {
+#endif // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
+ error = write_frm_data(
+ share->status_block,
+ ctx->alter_txn,
+ altered_table->s->path.str);
+ }
+#endif // (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) ||
+ // (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
+#endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
+
+ bool result = false; // success
+ if (error) {
+ print_error(error, MYF(0));
+ result = true; // failure
+ }
+
+ DBUG_RETURN(result);
+}
+
+int ha_tokudb::alter_table_add_index(Alter_inplace_info* ha_alter_info) {
+
+ // sort keys in add index order
+ KEY* key_info = (KEY*)tokudb::memory::malloc(
+ sizeof(KEY) * ha_alter_info->index_add_count,
+ MYF(MY_WME));
+ for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
+ KEY *key = &key_info[i];
+ *key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
+ for (KEY_PART_INFO* key_part = key->key_part;
+ key_part < key->key_part + key->user_defined_key_parts;
+ key_part++) {
+ key_part->field = table->field[key_part->fieldnr];
+ }
+ }
+
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+ ctx->add_index_changed = true;
+ int error = tokudb_add_index(
+ table,
+ key_info,
+ ha_alter_info->index_add_count,
+ ctx->alter_txn,
+ &ctx->incremented_num_DBs,
+ &ctx->modified_DBs);
+ if (error == HA_ERR_FOUND_DUPP_KEY) {
+ // hack for now, in case of duplicate key error,
+ // because at the moment we cannot display the right key
+ // information to the user, so that he knows potentially what went
+ // wrong.
+ last_dup_key = MAX_KEY;
+ }
+
+ tokudb::memory::free(key_info);
+
+ if (error == 0)
+ ctx->reset_card = true;
+
+ return error;
+}
+
+static bool find_index_of_key(
+ const char* key_name,
+ TABLE* table,
+ uint* index_offset_ptr) {
+
+ for (uint i = 0; i < table->s->keys; i++) {
+ if (strcmp(key_name, table->key_info[i].name.str) == 0) {
+ *index_offset_ptr = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool find_index_of_key(
+ const char* key_name,
+ KEY* key_info,
+ uint key_count,
+ uint* index_offset_ptr) {
+
+ for (uint i = 0; i < key_count; i++) {
+ if (strcmp(key_name, key_info[i].name.str) == 0) {
+ *index_offset_ptr = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+int ha_tokudb::alter_table_drop_index(Alter_inplace_info* ha_alter_info) {
+
+ KEY *key_info = table->key_info;
+ // translate key names to indexes into the key_info array
+ uint index_drop_offsets[ha_alter_info->index_drop_count];
+ for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
+ bool found;
+ found = find_index_of_key(
+ ha_alter_info->index_drop_buffer[i]->name.str,
+ table,
+ &index_drop_offsets[i]);
+ if (!found) {
+ // undo of add key in partition engine
+ found = find_index_of_key(
+ ha_alter_info->index_drop_buffer[i]->name.str,
+ ha_alter_info->key_info_buffer,
+ ha_alter_info->key_count,
+ &index_drop_offsets[i]);
+ assert_always(found);
+ key_info = ha_alter_info->key_info_buffer;
+ }
+ }
+
+ // drop indexes
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+ ctx->drop_index_changed = true;
+
+ int error = drop_indexes(index_drop_offsets,
+ ha_alter_info->index_drop_count,
+ key_info,
+ ctx->alter_txn);
+
+ if (error == 0)
+ ctx->reset_card = true;
+
+ return error;
+}
+
+int ha_tokudb::alter_table_add_or_drop_column(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info) {
+
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+ int error;
+ uchar *column_extra = NULL;
+ uint32_t max_column_extra_size;
+ uint32_t num_column_extra;
+ uint32_t num_columns = 0;
+ uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
+ // set size such that we know it is big enough for both cases
+ uint32_t columns[table->s->fields + altered_table->s->fields];
+ memset(columns, 0, sizeof(columns));
+
+ // generate the array of columns
+ if (ha_alter_info->handler_flags & ALTER_DROP_COLUMN) {
+ find_changed_columns(
+ columns,
+ &num_columns,
+ altered_table,
+ table);
+ } else if (ha_alter_info->handler_flags & ALTER_ADD_COLUMN) {
+ find_changed_columns(
+ columns,
+ &num_columns,
+ table,
+ altered_table);
+ } else {
+ assert_unreachable();
+ }
+ max_column_extra_size =
+ // max static row_mutator
+ STATIC_ROW_MUTATOR_SIZE +
+ // max dynamic row_mutator
+ 4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength +
+ // max static blob size
+ (4 + share->kc_info.num_blobs) +
+ // max dynamic blob size
+ (num_columns*(1+4+1+4));
+ column_extra = (uchar*)tokudb::memory::malloc(
+ max_column_extra_size,
+ MYF(MY_WME));
+ if (column_extra == NULL) {
+ error = ENOMEM;
+ goto cleanup;
+ }
+
+ for (uint32_t i = 0; i < curr_num_DBs; i++) {
+ // change to a new descriptor
+ DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
+ error = new_row_descriptor(
+ altered_table, ha_alter_info, i, &row_descriptor);
+ if (error)
+ goto cleanup;
+ error = share->key_file[i]->change_descriptor(
+ share->key_file[i],
+ ctx->alter_txn,
+ &row_descriptor,
+ 0);
+ tokudb::memory::free(row_descriptor.data);
+ if (error)
+ goto cleanup;
+
+ if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
+ num_column_extra = fill_row_mutator(
+ column_extra,
+ columns,
+ num_columns,
+ altered_table,
+ ctx->altered_table_kc_info,
+ i,
+ // true if adding columns, otherwise is a drop
+ (ha_alter_info->handler_flags &
+ ALTER_ADD_COLUMN) != 0);
+
+ DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt);
+ column_dbt.data = column_extra;
+ column_dbt.size = num_column_extra;
+ DBUG_ASSERT(num_column_extra <= max_column_extra_size);
+ error = share->key_file[i]->update_broadcast(
+ share->key_file[i],
+ ctx->alter_txn,
+ &column_dbt,
+ DB_IS_RESETTING_OP);
+ if (error) {
+ goto cleanup;
+ }
+ }
+ }
+
+ error = 0;
+ cleanup:
+ tokudb::memory::free(column_extra);
+ return error;
+}
+
+// Commit or abort the alter operations.
+// If commit then write the new frm data to the status using the alter
+// transaction.
+// If abort then abort the alter transaction and try to rollback the
+// non-transactional changes.
+bool ha_tokudb::commit_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table),
+ Alter_inplace_info* ha_alter_info,
+ bool commit) {
+ TOKUDB_HANDLER_DBUG_ENTER("");
+
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+ bool result = false; // success
+ THD *thd = ha_thd();
+
+ if (commit) {
+#if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
+ (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
+ (100000 <= MYSQL_VERSION_ID)
+ if (ha_alter_info->group_commit_ctx) {
+ ha_alter_info->group_commit_ctx = NULL;
+ }
+#endif
+#if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
+#if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || \
+ (100000 <= MYSQL_VERSION_ID)
+#if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
+ if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) {
+#else
+ if (true) {
+#endif // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
+ int error = write_frm_data(
+ share->status_block,
+ ctx->alter_txn,
+ altered_table->s->path.str);
+ if (error) {
+ commit = false;
+ result = true;
+ print_error(error, MYF(0));
+ }
+ }
+#endif // (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) ||
+ // (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100099)
+#endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
+ }
+
+ if (!commit) {
+ if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE &&
+ (ctx->add_index_changed || ctx->drop_index_changed ||
+ ctx->compression_changed)) {
+
+ // get exclusive lock no matter what
+#if defined(MARIADB_BASE_VERSION)
+ killed_state saved_killed_state = thd->killed;
+ thd->killed = NOT_KILLED;
+ for (volatile uint i = 0;
+ wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED);
+ i++) {
+ if (thd->killed != NOT_KILLED)
+ thd->killed = NOT_KILLED;
+ sleep(1);
+ }
+ assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
+ if (thd->killed == NOT_KILLED)
+ thd->killed = saved_killed_state;
+#else
+ THD::killed_state saved_killed_state = thd->killed;
+ thd->killed = THD::NOT_KILLED;
+ // MySQL does not handle HA_EXTRA_NOT_USED so we use
+ // HA_EXTRA_PREPARE_FOR_RENAME since it is passed through
+ // the partition storage engine and is treated as a NOP by tokudb
+ for (volatile uint i = 0;
+ wait_while_table_is_used(
+ thd,
+ table,
+ HA_EXTRA_PREPARE_FOR_RENAME);
+ i++) {
+ if (thd->killed != THD::NOT_KILLED)
+ thd->killed = THD::NOT_KILLED;
+ sleep(1);
+ }
+ assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
+ if (thd->killed == THD::NOT_KILLED)
+ thd->killed = saved_killed_state;
+#endif
+ }
+
+ // abort the alter transaction NOW so that any alters are rolled back.
+ // this allows the following restores to work.
+ tokudb_trx_data* trx =
+ (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton);
+ assert_always(ctx->alter_txn == trx->stmt);
+ assert_always(trx->tokudb_lock_count > 0);
+ // for partitioned tables, we use a single transaction to do all of the
+ // partition changes. the tokudb_lock_count is a reference count for
+ // each of the handlers to the same transaction. obviously, we want
+ // to only abort once.
+ if (trx->tokudb_lock_count > 0) {
+ if (--trx->tokudb_lock_count <= trx->create_lock_count) {
+ trx->create_lock_count = 0;
+ abort_txn(ctx->alter_txn);
+ ctx->alter_txn = NULL;
+ trx->stmt = NULL;
+ trx->sub_sp_level = NULL;
+ }
+ transaction = NULL;
+ }
+
+ if (ctx->add_index_changed) {
+ restore_add_index(
+ table,
+ ha_alter_info->index_add_count,
+ ctx->incremented_num_DBs,
+ ctx->modified_DBs);
+ }
+ if (ctx->drop_index_changed) {
+ // translate key names to indexes into the key_info array
+ uint index_drop_offsets[ha_alter_info->index_drop_count];
+ for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
+ bool found = find_index_of_key(
+ ha_alter_info->index_drop_buffer[i]->name.str,
+ table,
+ &index_drop_offsets[i]);
+ assert_always(found);
+ }
+ restore_drop_indexes(index_drop_offsets,
+ ha_alter_info->index_drop_count);
+ }
+ if (ctx->compression_changed) {
+ uint32_t curr_num_DBs =
+ table->s->keys + tokudb_test(hidden_primary_key);
+ for (uint32_t i = 0; i < curr_num_DBs; i++) {
+ DB *db = share->key_file[i];
+ int error = db->change_compression_method(
+ db,
+ ctx->orig_compression_method);
+ assert_always(error == 0);
+ }
+ }
+ }
+ DBUG_RETURN(result);
+}
+
+// Setup the altered table's key and col info.
+int ha_tokudb::setup_kc_info(
+ TABLE* altered_table,
+ KEY_AND_COL_INFO* altered_kc_info) {
+
+ int error = allocate_key_and_col_info(altered_table->s, altered_kc_info);
+ if (error == 0)
+ error = initialize_key_and_col_info(
+ altered_table->s,
+ altered_table,
+ altered_kc_info,
+ hidden_primary_key,
+ primary_key);
+ return error;
+}
+
+// Expand the variable length fields offsets from 1 to 2 bytes.
+int ha_tokudb::alter_table_expand_varchar_offsets(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info) {
+
+ int error = 0;
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+
+ uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
+ for (uint32_t i = 0; i < curr_num_DBs; i++) {
+ // change to a new descriptor
+ DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
+ error = new_row_descriptor(
+ altered_table, ha_alter_info, i, &row_descriptor);
+ if (error)
+ break;
+ error = share->key_file[i]->change_descriptor(
+ share->key_file[i],
+ ctx->alter_txn,
+ &row_descriptor,
+ 0);
+ tokudb::memory::free(row_descriptor.data);
+ if (error)
+ break;
+
+ // for all trees that have values, make an update variable offsets
+ // message and broadcast it into the tree
+ if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
+ uint32_t offset_start =
+ table_share->null_bytes +
+ share->kc_info.mcp_info[i].fixed_field_size;
+ uint32_t offset_end =
+ offset_start +
+ share->kc_info.mcp_info[i].len_of_offsets;
+ uint32_t number_of_offsets = offset_end - offset_start;
+
+ // make the expand variable offsets message
+ DBT expand; memset(&expand, 0, sizeof expand);
+ expand.size =
+ sizeof(uchar) + sizeof(offset_start) + sizeof(offset_end);
+ expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
+ if (!expand.data) {
+ error = ENOMEM;
+ break;
+ }
+ uchar* expand_ptr = (uchar*)expand.data;
+ expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS;
+ expand_ptr += sizeof(uchar);
+
+ memcpy(expand_ptr, &number_of_offsets, sizeof(number_of_offsets));
+ expand_ptr += sizeof(number_of_offsets);
+
+ memcpy(expand_ptr, &offset_start, sizeof(offset_start));
+ expand_ptr += sizeof(offset_start);
+
+ // and broadcast it into the tree
+ error = share->key_file[i]->update_broadcast(
+ share->key_file[i],
+ ctx->alter_txn,
+ &expand,
+ DB_IS_RESETTING_OP);
+ tokudb::memory::free(expand.data);
+ if (error)
+ break;
+ }
+ }
+
+ return error;
+}
+
+// Return true if a field is part of a key
+static bool field_in_key(KEY *key, Field *field) {
+ for (uint i = 0; i < key->user_defined_key_parts; i++) {
+ KEY_PART_INFO *key_part = &key->key_part[i];
+ if (strcmp(key_part->field->field_name.str, field->field_name.str) == 0)
+ return true;
+ }
+ return false;
+}
+
+// Return true if a field is part of any key
+static bool field_in_key_of_table(TABLE *table, Field *field) {
+ for (uint i = 0; i < table->s->keys; i++) {
+ if (field_in_key(&table->key_info[i], field))
+ return true;
+ }
+ return false;
+}
+
+// Return true if all changed varchar/varbinary field lengths can be changed
+// inplace, otherwise return false
+static bool change_varchar_length_is_supported(Field* old_field,
+ Field* new_field,
+ tokudb_alter_ctx* ctx) {
+ if (old_field->real_type() != MYSQL_TYPE_VARCHAR ||
+ new_field->real_type() != MYSQL_TYPE_VARCHAR ||
+ old_field->binary() != new_field->binary() ||
+ old_field->charset()->number != new_field->charset()->number ||
+ old_field->field_length > new_field->field_length)
+ return false;
+ if (ctx->table_kc_info->num_offset_bytes >
+ ctx->altered_table_kc_info->num_offset_bytes)
+ return false; // shrink is not supported
+ if (ctx->table_kc_info->num_offset_bytes <
+ ctx->altered_table_kc_info->num_offset_bytes)
+ // sum of varchar lengths changed from 1 to 2
+ ctx->expand_varchar_update_needed = true;
+ return true;
+}
+
+bool ha_tokudb::can_convert_varstring(const Field_varstring* field,
+ const Column_definition& new_type) const {
+ if (new_type.length < field->field_length ||
+ new_type.char_length < field->char_length() ||
+ !new_type.compression_method() != !field->compression_method() ||
+ new_type.type_handler() != field->type_handler()) {
+ return false;
+ }
+
+ return true;
+}
+
+// Return true if all changed field lengths can be changed inplace, otherwise
+// return false
+static bool change_length_is_supported(TABLE* table,
+ TABLE* altered_table,
+ tokudb_alter_ctx* ctx) {
+ if (table->s->fields != altered_table->s->fields)
+ return false;
+ if (table->s->null_bytes != altered_table->s->null_bytes)
+ return false;
+ if (ctx->changed_fields.elements() > 1)
+ return false; // only support one field change
+ for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
+ ai < ctx->changed_fields.elements();
+ ai++) {
+ uint i = ctx->changed_fields.at(ai);
+ Field *old_field = table->field[i];
+ Field *new_field = altered_table->field[i];
+ if (old_field->real_type() != new_field->real_type())
+ return false; // no type conversions
+ if (old_field->real_type() != MYSQL_TYPE_VARCHAR)
+ return false; // only varchar
+ if (field_in_key_of_table(table, old_field) ||
+ field_in_key_of_table(altered_table, new_field))
+ return false; // not in any key
+ if (!change_varchar_length_is_supported(old_field, new_field, ctx))
+ return false;
+ }
+
+ return true;
+}
+
+// Debug function that ensures that the array is sorted
+static bool is_sorted(Dynamic_array<uint> &a) {
+ bool r = true;
+ if (a.elements() > 0) {
+ uint lastelement = a.at(0);
+ for (DYNAMIC_ARRAY_ELEMENTS_TYPE i = 1; i < a.elements(); i++)
+ if (lastelement > a.at(i))
+ r = false;
+ }
+ return r;
+}
+
+int ha_tokudb::alter_table_expand_columns(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info) {
+
+ int error = 0;
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+ // since we build the changed_fields array in field order, it must be sorted
+ assert_always(is_sorted(ctx->changed_fields));
+ for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
+ error == 0 && ai < ctx->changed_fields.elements();
+ ai++) {
+ uint expand_field_num = ctx->changed_fields.at(ai);
+ error = alter_table_expand_one_column(
+ altered_table,
+ ha_alter_info,
+ expand_field_num);
+ }
+
+ return error;
+}
+
+// Return true if the field is an unsigned int
+static bool is_unsigned(Field *f) {
+ return (f->flags & UNSIGNED_FLAG) != 0;
+}
+
+// Return the starting offset in the value for a particular index (selected by
+// idx) of a particular field (selected by expand_field_num)
+// TODO: replace this?
+static uint32_t alter_table_field_offset(
+ uint32_t null_bytes,
+ KEY_AND_COL_INFO* kc_info,
+ int idx,
+ int expand_field_num) {
+
+ uint32_t offset = null_bytes;
+ for (int i = 0; i < expand_field_num; i++) {
+ if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
+ continue;
+ offset += kc_info->field_lengths[i];
+ }
+ return offset;
+}
+
+// Send an expand message into all clustered indexes including the primary
+int ha_tokudb::alter_table_expand_one_column(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info,
+ int expand_field_num) {
+
+ int error = 0;
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+
+ Field *old_field = table->field[expand_field_num];
+ TOKU_TYPE old_field_type = mysql_to_toku_type(old_field);
+ Field *new_field = altered_table->field[expand_field_num];
+ TOKU_TYPE new_field_type = mysql_to_toku_type(new_field);
+ assert_always(old_field_type == new_field_type);
+
+ uchar operation;
+ uchar pad_char;
+ switch (old_field_type) {
+ case toku_type_int:
+ assert_always(is_unsigned(old_field) == is_unsigned(new_field));
+ if (is_unsigned(old_field))
+ operation = UPDATE_OP_EXPAND_UINT;
+ else
+ operation = UPDATE_OP_EXPAND_INT;
+ pad_char = 0;
+ break;
+ case toku_type_fixstring:
+ operation = UPDATE_OP_EXPAND_CHAR;
+ pad_char = old_field->charset()->pad_char;
+ break;
+ case toku_type_fixbinary:
+ operation = UPDATE_OP_EXPAND_BINARY;
+ pad_char = 0;
+ break;
+ default:
+ assert_unreachable();
+ }
+
+ uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
+ for (uint32_t i = 0; i < curr_num_DBs; i++) {
+ // change to a new descriptor
+ DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
+ error = new_row_descriptor(
+ altered_table, ha_alter_info, i, &row_descriptor);
+ if (error)
+ break;
+ error = share->key_file[i]->change_descriptor(
+ share->key_file[i],
+ ctx->alter_txn,
+ &row_descriptor,
+ 0);
+ tokudb::memory::free(row_descriptor.data);
+ if (error)
+ break;
+
+ // for all trees that have values, make an expand update message and
+ // broadcast it into the tree
+ if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
+ uint32_t old_offset = alter_table_field_offset(
+ table_share->null_bytes,
+ ctx->table_kc_info,
+ i,
+ expand_field_num);
+ uint32_t new_offset = alter_table_field_offset(
+ table_share->null_bytes,
+ ctx->altered_table_kc_info,
+ i,
+ expand_field_num);
+ assert_always(old_offset <= new_offset);
+
+ uint32_t old_length =
+ ctx->table_kc_info->field_lengths[expand_field_num];
+ assert_always(old_length == old_field->pack_length());
+
+ uint32_t new_length =
+ ctx->altered_table_kc_info->field_lengths[expand_field_num];
+ assert_always(new_length == new_field->pack_length());
+
+ DBT expand; memset(&expand, 0, sizeof(expand));
+ expand.size =
+ sizeof(operation) + sizeof(new_offset) +
+ sizeof(old_length) + sizeof(new_length);
+ if (operation == UPDATE_OP_EXPAND_CHAR ||
+ operation == UPDATE_OP_EXPAND_BINARY)
+ expand.size += sizeof(pad_char);
+ expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
+ if (!expand.data) {
+ error = ENOMEM;
+ break;
+ }
+ uchar *expand_ptr = (uchar *)expand.data;
+ expand_ptr[0] = operation;
+ expand_ptr += sizeof operation;
+
+ // for the first altered field, old_offset == new_offset.
+ // for the subsequent altered fields, the new_offset
+ // should be used as it includes the length changes from the
+ // previous altered fields.
+ memcpy(expand_ptr, &new_offset, sizeof(new_offset));
+ expand_ptr += sizeof(new_offset);
+
+ memcpy(expand_ptr, &old_length, sizeof(old_length));
+ expand_ptr += sizeof(old_length);
+
+ memcpy(expand_ptr, &new_length, sizeof(new_length));
+ expand_ptr += sizeof(new_length);
+
+ if (operation == UPDATE_OP_EXPAND_CHAR ||
+ operation == UPDATE_OP_EXPAND_BINARY) {
+ memcpy(expand_ptr, &pad_char, sizeof(pad_char));
+ expand_ptr += sizeof(pad_char);
+ }
+
+ assert_always(expand_ptr == (uchar*)expand.data + expand.size);
+
+ // and broadcast it into the tree
+ error = share->key_file[i]->update_broadcast(
+ share->key_file[i],
+ ctx->alter_txn,
+ &expand,
+ DB_IS_RESETTING_OP);
+ tokudb::memory::free(expand.data);
+ if (error)
+ break;
+ }
+ }
+
+ return error;
+}
+
+static void marshall_blob_lengths(
+ tokudb::buffer& b,
+ uint32_t n,
+ TABLE* table,
+ KEY_AND_COL_INFO* kc_info) {
+
+ for (uint i = 0; i < n; i++) {
+ uint blob_field_index = kc_info->blob_fields[i];
+ assert_always(blob_field_index < table->s->fields);
+ uint8_t blob_field_length =
+ table->s->field[blob_field_index]->row_pack_length();
+ b.append(&blob_field_length, sizeof blob_field_length);
+ }
+}
+
+int ha_tokudb::alter_table_expand_blobs(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info) {
+
+ int error = 0;
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+
+ uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
+ for (uint32_t i = 0; i < curr_num_DBs; i++) {
+ // change to a new descriptor
+ DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
+ error = new_row_descriptor(
+ altered_table, ha_alter_info, i, &row_descriptor);
+ if (error)
+ break;
+ error = share->key_file[i]->change_descriptor(
+ share->key_file[i],
+ ctx->alter_txn,
+ &row_descriptor,
+ 0);
+ tokudb::memory::free(row_descriptor.data);
+ if (error)
+ break;
+
+ // for all trees that have values, make an update blobs message and
+ // broadcast it into the tree
+ if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
+ tokudb::buffer b;
+ uint8_t op = UPDATE_OP_EXPAND_BLOB;
+ b.append(&op, sizeof op);
+ b.append_ui<uint32_t>(
+ table->s->null_bytes +
+ ctx->table_kc_info->mcp_info[i].fixed_field_size);
+ uint32_t var_offset_bytes =
+ ctx->table_kc_info->mcp_info[i].len_of_offsets;
+ b.append_ui<uint32_t>(var_offset_bytes);
+ b.append_ui<uint32_t>(
+ var_offset_bytes == 0 ? 0 :
+ ctx->table_kc_info->num_offset_bytes);
+
+ // add blobs info
+ uint32_t num_blobs = ctx->table_kc_info->num_blobs;
+ b.append_ui<uint32_t>(num_blobs);
+ marshall_blob_lengths(b, num_blobs, table, ctx->table_kc_info);
+ marshall_blob_lengths(
+ b,
+ num_blobs,
+ altered_table,
+ ctx->altered_table_kc_info);
+
+ // and broadcast it into the tree
+ DBT expand; memset(&expand, 0, sizeof expand);
+ expand.data = b.data();
+ expand.size = b.size();
+ error = share->key_file[i]->update_broadcast(
+ share->key_file[i],
+ ctx->alter_txn,
+ &expand,
+ DB_IS_RESETTING_OP);
+ if (error)
+ break;
+ }
+ }
+
+ return error;
+}
+
+// Return true if two fixed length fields can be changed inplace
+static bool change_fixed_length_is_supported(Field* old_field,
+ Field* new_field,
+ tokudb_alter_ctx* ctx) {
+ // no change in size is supported
+ if (old_field->pack_length() == new_field->pack_length())
+ return true;
+ // shrink is not supported
+ if (old_field->pack_length() > new_field->pack_length())
+ return false;
+ ctx->expand_fixed_update_needed = true;
+ return true;
+}
+
+static bool change_blob_length_is_supported(Field* old_field,
+ Field* new_field,
+ tokudb_alter_ctx* ctx) {
+ // blob -> longer or equal length blob
+ if (old_field->binary() && new_field->binary() &&
+ old_field->pack_length() <= new_field->pack_length()) {
+ ctx->expand_blob_update_needed = true;
+ return true;
+ }
+ // text -> longer or equal length text
+ if (!old_field->binary() && !new_field->binary() &&
+ old_field->pack_length() <= new_field->pack_length() &&
+ old_field->charset()->number == new_field->charset()->number) {
+ ctx->expand_blob_update_needed = true;
+ return true;
+ }
+ return false;
+}
+
+// Return true if the MySQL type is an int or unsigned int type
+static bool is_int_type(enum_field_types t) {
+ switch (t) {
+ case MYSQL_TYPE_TINY:
+ case MYSQL_TYPE_SHORT:
+ case MYSQL_TYPE_INT24:
+ case MYSQL_TYPE_LONG:
+ case MYSQL_TYPE_LONGLONG:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Return true if two field types can be changed inplace
+static bool change_field_type_is_supported(Field* old_field,
+ Field* new_field,
+ tokudb_alter_ctx* ctx) {
+ enum_field_types old_type = old_field->real_type();
+ enum_field_types new_type = new_field->real_type();
+ if (is_int_type(old_type)) {
+ // int and unsigned int expansion
+ if (is_int_type(new_type) &&
+ is_unsigned(old_field) == is_unsigned(new_field))
+ return change_fixed_length_is_supported(old_field, new_field, ctx);
+ else
+ return false;
+ } else if (old_type == MYSQL_TYPE_STRING) {
+ // char(X) -> char(Y) and binary(X) -> binary(Y) expansion
+ if (new_type == MYSQL_TYPE_STRING &&
+ old_field->binary() == new_field->binary() &&
+ old_field->charset()->number == new_field->charset()->number)
+ return change_fixed_length_is_supported(old_field, new_field, ctx);
+ else
+ return false;
+ } else if (old_type == MYSQL_TYPE_VARCHAR) {
+ // varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion
+ // where X < 256 <= Y the ALTER_STORED_COLUMN_TYPE handler flag is set for
+ // these cases
+ return change_varchar_length_is_supported(old_field, new_field, ctx);
+ } else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) {
+ return change_blob_length_is_supported(old_field, new_field, ctx);
+ } else
+ return false;
+}
+
+// Return true if all changed field types can be changed inplace
+static bool change_type_is_supported(TABLE* table,
+ TABLE* altered_table,
+ tokudb_alter_ctx* ctx) {
+ if (table->s->null_bytes != altered_table->s->null_bytes)
+ return false;
+ if (table->s->fields != altered_table->s->fields)
+ return false;
+ if (ctx->changed_fields.elements() > 1)
+ return false; // only support one field change
+ for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
+ ai < ctx->changed_fields.elements();
+ ai++) {
+ uint i = ctx->changed_fields.at(ai);
+ Field *old_field = table->field[i];
+ Field *new_field = altered_table->field[i];
+ if (field_in_key_of_table(table, old_field) ||
+ field_in_key_of_table(altered_table, new_field))
+ return false;
+ if (!change_field_type_is_supported(old_field, new_field, ctx))
+ return false;
+ }
+ return true;
+}
+
+// Allocate and initialize a new descriptor for a dictionary in the altered
+// table identified with idx.
+// Return the new descriptor in the row_descriptor DBT.
+// Return non-zero on error.
+int ha_tokudb::new_row_descriptor(TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info,
+ uint32_t idx,
+ DBT* row_descriptor) {
+ int error = 0;
+ tokudb_alter_ctx* ctx =
+ static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
+ row_descriptor->size =
+ get_max_desc_size(ctx->altered_table_kc_info, altered_table);
+ row_descriptor->data =
+ (uchar*)tokudb::memory::malloc(row_descriptor->size, MYF(MY_WME));
+ if (row_descriptor->data == NULL) {
+ error = ENOMEM;
+ } else {
+ KEY* prim_key =
+ hidden_primary_key ? NULL :
+ &altered_table->key_info[primary_key];
+ if (idx == primary_key) {
+ row_descriptor->size = create_main_key_descriptor(
+ (uchar*)row_descriptor->data,
+ prim_key,
+ hidden_primary_key,
+ primary_key,
+ altered_table,
+ ctx->altered_table_kc_info);
+ } else {
+ row_descriptor->size = create_secondary_key_descriptor(
+ (uchar*)row_descriptor->data,
+ &altered_table->key_info[idx],
+ prim_key,
+ hidden_primary_key,
+ altered_table,
+ primary_key,
+ idx,
+ ctx->altered_table_kc_info);
+ }
+ error = 0;
+ }
+ return error;
+}
+
+#endif