diff options
Diffstat (limited to 'storage/tokudb/ha_tokudb_alter_56.cc')
-rw-r--r-- | storage/tokudb/ha_tokudb_alter_56.cc | 1646 |
1 files changed, 1646 insertions, 0 deletions
diff --git a/storage/tokudb/ha_tokudb_alter_56.cc b/storage/tokudb/ha_tokudb_alter_56.cc new file mode 100644 index 00000000..233d929a --- /dev/null +++ b/storage/tokudb/ha_tokudb_alter_56.cc @@ -0,0 +1,1646 @@ +/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: +#ident "$Id$" +/*====== +This file is part of TokuDB + + +Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. + + TokuDBis is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2, + as published by the Free Software Foundation. + + TokuDB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TokuDB. If not, see <http://www.gnu.org/licenses/>. + +======= */ + +#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." + +#if TOKU_INCLUDE_ALTER_56 + +#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 101099 +#define TOKU_ALTER_RENAME ALTER_RENAME +#define DYNAMIC_ARRAY_ELEMENTS_TYPE size_t +#elif (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \ + (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) +#define TOKU_ALTER_RENAME ALTER_RENAME +#define DYNAMIC_ARRAY_ELEMENTS_TYPE int +#elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599 +#define TOKU_ALTER_RENAME ALTER_RENAME_56 +#define DYNAMIC_ARRAY_ELEMENTS_TYPE int +#else +#error +#endif + +#include "ha_tokudb_alter_common.cc" +#include <sql_array.h> +#include <sql_base.h> + +// The tokudb alter context contains the alter state that is set in the check if supported method and used +// later when the alter operation is executed. +class tokudb_alter_ctx : public inplace_alter_handler_ctx { +public: + tokudb_alter_ctx() : + handler_flags(0), + alter_txn(NULL), + add_index_changed(false), + drop_index_changed(false), + reset_card(false), + compression_changed(false), + expand_varchar_update_needed(false), + expand_fixed_update_needed(false), + expand_blob_update_needed(false), + optimize_needed(false), changed_fields(PSI_INSTRUMENT_MEM), + table_kc_info(NULL), + altered_table_kc_info(NULL) { + } + ~tokudb_alter_ctx() { + if (altered_table_kc_info) + free_key_and_col_info(altered_table_kc_info); + } +public: + ulong handler_flags; + DB_TXN* alter_txn; + bool add_index_changed; + bool incremented_num_DBs, modified_DBs; + bool drop_index_changed; + bool reset_card; + bool compression_changed; + enum toku_compression_method orig_compression_method; + bool expand_varchar_update_needed; + bool expand_fixed_update_needed; + bool expand_blob_update_needed; + bool optimize_needed; + Dynamic_array<uint> changed_fields; + KEY_AND_COL_INFO* table_kc_info; + KEY_AND_COL_INFO* altered_table_kc_info; + KEY_AND_COL_INFO altered_table_kc_info_base; +}; + +// Debug function to print out an alter table operation +void ha_tokudb::print_alter_info( + TABLE* altered_table, + Alter_inplace_info* ha_alter_info) { + + TOKUDB_TRACE( + "***are keys of two tables same? %d", + tables_have_same_keys(table, altered_table, false, false)); + if (ha_alter_info->handler_flags) { + TOKUDB_TRACE("***alter flags set ***"); + for (int i = 0; i < 32; i++) { + if (ha_alter_info->handler_flags & (1 << i)) + TOKUDB_TRACE("%d", i); + } + } + + // everyone calculates data by doing some default_values - record[0], but + // I do not see why that is necessary + TOKUDB_TRACE("******"); + TOKUDB_TRACE("***orig table***"); + for (uint i = 0; i < table->s->fields; i++) { + // + // make sure to use table->field, and NOT table->s->field + // + Field* curr_field = table->field[i]; + uint null_offset = get_null_offset(table, curr_field); + TOKUDB_TRACE( + "name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: " + "%d, is_null %d, pack_length %u", + curr_field->field_name.str, + curr_field->real_type(), + mysql_to_toku_type(curr_field), + curr_field->null_bit, + null_offset, + curr_field->real_maybe_null(), + curr_field->real_maybe_null() ? + table->s->default_values[null_offset] & curr_field->null_bit : + 0xffffffff, + curr_field->pack_length()); + } + TOKUDB_TRACE("******"); + TOKUDB_TRACE("***altered table***"); + for (uint i = 0; i < altered_table->s->fields; i++) { + Field* curr_field = altered_table->field[i]; + uint null_offset = get_null_offset(altered_table, curr_field); + TOKUDB_TRACE( + "name: %s, types: %u %u, nullable: %d, null_offset: %d, " + "is_null_field: %d, is_null %d, pack_length %u", + curr_field->field_name.str, + curr_field->real_type(), + mysql_to_toku_type(curr_field), + curr_field->null_bit, + null_offset, + curr_field->real_maybe_null(), + curr_field->real_maybe_null() ? + altered_table->s->default_values[null_offset] & + curr_field->null_bit : 0xffffffff, + curr_field->pack_length()); + } + TOKUDB_TRACE("******"); +} + +// Given two tables with equal number of fields, find all of the fields with +// different types and return the indexes of the different fields in the +// changed_fields array. This function ignores field name differences. +static int find_changed_fields( + TABLE* table_a, + TABLE* table_b, + Dynamic_array<uint>& changed_fields) { + + for (uint i = 0; i < table_a->s->fields; i++) { + Field* field_a = table_a->field[i]; + Field* field_b = table_b->field[i]; + if (!fields_are_same_type(field_a, field_b)) + changed_fields.append(i); + } + return changed_fields.elements(); +} + +static bool change_length_is_supported(TABLE* table, + TABLE* altered_table, + tokudb_alter_ctx* ctx); + +static bool change_type_is_supported(TABLE* table, + TABLE* altered_table, + tokudb_alter_ctx* ctx); + +// The ha_alter_info->handler_flags can not be trusted. +// This function maps the bogus handler flags to something we like. +static ulong fix_handler_flags( + THD* thd, + TABLE* table, + TABLE* altered_table, + Alter_inplace_info* ha_alter_info) { + + ulong handler_flags = ha_alter_info->handler_flags; + +#if 100000 <= MYSQL_VERSION_ID + // This is automatically supported, hide the flag from later checks + handler_flags &= ~ALTER_PARTITIONED; +#endif + + // workaround for fill_alter_inplace_info bug (#5193) + // the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a + // column addition that does not change the keys. + // the following code turns the ADD_INDEX and DROP_INDEX flags so that + // we can do hot column addition later. + if (handler_flags & + (ALTER_ADD_COLUMN + ALTER_DROP_COLUMN)) { + if (handler_flags & + (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX)) { + if (tables_have_same_keys( + table, + altered_table, + tokudb::sysvars::alter_print_error(thd) != 0, false)) { + handler_flags &= + ~(ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + + ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX); + } + } + } + + // always allow rename table + any other operation, so turn off the + // rename flag + handler_flags &= ~ALTER_RENAME; + + // ALTER_STORED_COLUMN_TYPE may be set when no columns have been changed, + // so turn off the flag + if (handler_flags & ALTER_STORED_COLUMN_TYPE) { + if (all_fields_are_same_type(table, altered_table)) { + handler_flags &= ~ALTER_STORED_COLUMN_TYPE; + } + } + + return handler_flags; +} + +// Require that there is no intersection of add and drop names. +static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) { + for (uint d = 0; d < ha_alter_info->index_drop_count; d++) { + KEY* drop_key = ha_alter_info->index_drop_buffer[d]; + for (uint a = 0; a < ha_alter_info->index_add_count; a++) { + KEY* add_key = + &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]]; + if (strcmp(drop_key->name.str, add_key->name.str) == 0) { + return false; + } + } + } + return true; +} + +// Return true if some bit in mask is set and no bit in ~mask is set, +// otherwise return false. +static bool only_flags(ulong bits, ulong mask) { + return (bits & mask) != 0 && (bits & ~mask) == 0; +} + +// Table create options that should be ignored by TokuDB +// There are 25 total create options defined by mysql server (see handler.h), +// and only 4 options will touch engine data, either rebuild engine data or +// just update meta info: +// 1. HA_CREATE_USED_AUTO update auto_inc info +// 2. HA_CREATE_USED_CHARSET rebuild table if contains character columns +// 3. HA_CREATE_USED_ENGINE rebuild table +// 4. HA_CREATE_USED_ROW_FORMAT update compression method info +// +// All the others are either not supported by TokuDB or no need to +// touch engine data. +static constexpr uint32_t TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS = + HA_CREATE_USED_RAID | // deprecated field + HA_CREATE_USED_UNION | // for MERGE table + HA_CREATE_USED_INSERT_METHOD | // for MERGE table + HA_CREATE_USED_MIN_ROWS | // for MEMORY table + HA_CREATE_USED_MAX_ROWS | // for NDB table + HA_CREATE_USED_AVG_ROW_LENGTH | // for MyISAM table + HA_CREATE_USED_PACK_KEYS | // for MyISAM table + HA_CREATE_USED_DEFAULT_CHARSET | // no need to rebuild + HA_CREATE_USED_DATADIR | // ignored by alter + HA_CREATE_USED_INDEXDIR | // ignored by alter + HA_CREATE_USED_CHECKSUM | // for MyISAM table + HA_CREATE_USED_DELAY_KEY_WRITE | // for MyISAM table + HA_CREATE_USED_COMMENT | // no need to rebuild + HA_CREATE_USED_PASSWORD | // not supported by community version + HA_CREATE_USED_CONNECTION | // for FEDERATED table + HA_CREATE_USED_KEY_BLOCK_SIZE | // not supported by TokuDB + HA_CREATE_USED_TRANSACTIONAL | // unused + HA_CREATE_USED_PAGE_CHECKSUM | // unsued + HA_CREATE_USED_STATS_PERSISTENT | // not supported by TokuDB + HA_CREATE_USED_STATS_AUTO_RECALC | // not supported by TokuDB + HA_CREATE_USED_STATS_SAMPLE_PAGES; // not supported by TokuDB + +// Check if an alter table operation on this table and described by the alter +// table parameters is supported inplace and if so, what type of locking is +// needed to execute it. return values: + +// HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an +// inplace operation, a table copy is required + +// HA_ALTER_ERROR: the alter table operation should fail + +// HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X + +// HA_ALTER_INPLACE_COPY_LOCK: prepare runs with MDL X, +// alter runs with MDL SNW + +// HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW, +// concurrent reads, no writes + +// HA_ALTER_INPLACE_COPY_NO_LOCK: prepare runs with MDL X, +// alter runs with MDL SW + +// HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW, +// concurrent reads, writes. +// must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid +// deadlocks with the MDL lock and the table lock +enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter( + TABLE* altered_table, + Alter_inplace_info* ha_alter_info) { + + TOKUDB_HANDLER_DBUG_ENTER(""); + + if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) { + print_alter_info(altered_table, ha_alter_info); + } + + // default is NOT inplace + enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED; + THD* thd = ha_thd(); + + // setup context + tokudb_alter_ctx* ctx = new tokudb_alter_ctx; + ha_alter_info->handler_ctx = ctx; + ctx->handler_flags = + fix_handler_flags(thd, table, altered_table, ha_alter_info); + ctx->table_kc_info = &share->kc_info; + ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base; + memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO)); + + if (tokudb::sysvars::disable_hot_alter(thd)) { + ; // do nothing + } else if (only_flags( + ctx->handler_flags, + ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX + + ALTER_DROP_UNIQUE_INDEX + + ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + + ALTER_ADD_UNIQUE_INDEX)) { + // add or drop index + if (table->s->null_bytes == altered_table->s->null_bytes && + (ha_alter_info->index_add_count > 0 || + ha_alter_info->index_drop_count > 0) && + !tables_have_same_keys( + table, + altered_table, + tokudb::sysvars::alter_print_error(thd) != 0, false) && + is_disjoint_add_drop(ha_alter_info)) { + + if (ctx->handler_flags & + (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX + + ALTER_DROP_UNIQUE_INDEX)) { + // the fractal tree can not handle dropping an index concurrent + // with querying with the index. + // we grab an exclusive MDL for the drop index. + result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; + } else { + /* FIXME: MDEV-16099 Use alter algorithm=nocopy + or algorithm=instant for non-InnoDB engine */ + result = HA_ALTER_INPLACE_COPY_LOCK; + + // someday, allow multiple hot indexes via alter table add key. + // don't forget to change the store_lock function. + // for now, hot indexing is only supported via session variable + // with the create index sql command + if (ha_alter_info->index_add_count == 1 && + // only one add or drop + ha_alter_info->index_drop_count == 0 && + // must be add index not add unique index + ctx->handler_flags == ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX && + // must be a create index command + thd_sql_command(thd) == SQLCOM_CREATE_INDEX && + // must be enabled + tokudb::sysvars::create_index_online(thd)) { + // external_lock set WRITE_ALLOW_WRITE which allows writes + // concurrent with the index creation + /* FIXME: MDEV-16099 Use alter algorithm=nocopy + or algorithm=instant for non-InnoDB engine */ + result = HA_ALTER_INPLACE_COPY_NO_LOCK; + } + } + } + } else if (only_flags( + ctx->handler_flags, + ALTER_COLUMN_DEFAULT)) { + // column default + if (table->s->null_bytes == altered_table->s->null_bytes) + result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; + } else if (ctx->handler_flags & ALTER_COLUMN_NAME && + only_flags( + ctx->handler_flags, + ALTER_COLUMN_NAME | + ALTER_COLUMN_DEFAULT)) { + // column rename + // we have identified a possible column rename, + // but let's do some more checks + + // we will only allow an hcr if there are no changes + // in column positions (ALTER_STORED_COLUMN_ORDER is not set) + + // now need to verify that one and only one column + // has changed only its name. If we find anything to + // the contrary, we don't allow it, also check indexes + if (table->s->null_bytes == altered_table->s->null_bytes) { + bool cr_supported = + column_rename_supported( + table, + altered_table, + (ctx->handler_flags & + ALTER_STORED_COLUMN_ORDER) != 0); + if (cr_supported) + result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; + } + } else if (ctx->handler_flags & ALTER_ADD_COLUMN && + only_flags( + ctx->handler_flags, + ALTER_ADD_COLUMN | + ALTER_STORED_COLUMN_ORDER) && + setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) { + + // add column + uint32_t added_columns[altered_table->s->fields]; + uint32_t num_added_columns = 0; + int r = + find_changed_columns( + added_columns, + &num_added_columns, + table, + altered_table); + if (r == 0) { + if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) { + for (uint32_t i = 0; i < num_added_columns; i++) { + uint32_t curr_added_index = added_columns[i]; + Field* curr_added_field = + altered_table->field[curr_added_index]; + TOKUDB_TRACE( + "Added column: index %d, name %s", + curr_added_index, + curr_added_field->field_name.str); + } + } + result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; + } + } else if (ctx->handler_flags & ALTER_DROP_COLUMN && + only_flags( + ctx->handler_flags, + ALTER_DROP_COLUMN | + ALTER_STORED_COLUMN_ORDER) && + setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) { + + // drop column + uint32_t dropped_columns[table->s->fields]; + uint32_t num_dropped_columns = 0; + int r = + find_changed_columns( + dropped_columns, + &num_dropped_columns, + altered_table, + table); + if (r == 0) { + if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) { + for (uint32_t i = 0; i < num_dropped_columns; i++) { + uint32_t curr_dropped_index = dropped_columns[i]; + Field* curr_dropped_field = table->field[curr_dropped_index]; + TOKUDB_TRACE( + "Dropped column: index %d, name %s", + curr_dropped_index, + curr_dropped_field->field_name.str); + } + } + result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; + } + } else if ((ctx->handler_flags & + ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE) && + only_flags( + ctx->handler_flags, + ALTER_COLUMN_TYPE_CHANGE_BY_ENGINE | + ALTER_COLUMN_DEFAULT) && + table->s->fields == altered_table->s->fields && + find_changed_fields( + table, + altered_table, + ctx->changed_fields) > 0 && + setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) { + + // change column length + if (change_length_is_supported(table, altered_table, ctx)) { + result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; + } + } else if ((ctx->handler_flags & ALTER_STORED_COLUMN_TYPE) && + only_flags( + ctx->handler_flags, + ALTER_STORED_COLUMN_TYPE | + ALTER_COLUMN_DEFAULT) && + table->s->fields == altered_table->s->fields && + find_changed_fields( + table, + altered_table, + ctx->changed_fields) > 0 && + setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) { + + // change column type + if (change_type_is_supported(table, altered_table, ctx)) { + result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; + } + } else if (only_flags( + ctx->handler_flags, + ALTER_CHANGE_CREATE_OPTION)) { + + HA_CREATE_INFO* create_info = ha_alter_info->create_info; +#if TOKU_INCLUDE_OPTION_STRUCTS + // set the USED_ROW_FORMAT flag for use later in this file for changes in the table's + // compression + if (create_info->option_struct->row_format != + table_share->option_struct->row_format) + create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT; +#endif + // alter auto_increment + if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) { + // do a sanity check that the table is what we think it is + if (tables_have_same_keys_and_columns( + table, + altered_table, + tokudb::sysvars::alter_print_error(thd) != 0)) { + result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; + } + } else if (only_flags( + create_info->used_fields, + HA_CREATE_USED_ROW_FORMAT)) { + // alter row_format + // do a sanity check that the table is what we think it is + if (tables_have_same_keys_and_columns( + table, + altered_table, + tokudb::sysvars::alter_print_error(thd) != 0)) { + result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK; + } + } else if (only_flags( + create_info->used_fields, + TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS)) { + result = HA_ALTER_INPLACE_COPY_NO_LOCK; + } + } +#if TOKU_OPTIMIZE_WITH_RECREATE + else if (only_flags( + ctx->handler_flags, + ALTER_RECREATE_TABLE | + ALTER_COLUMN_DEFAULT)) { + ctx->optimize_needed = true; + /* FIXME: MDEV-16099 Use alter algorithm=nocopy + or algorithm=instant for non-InnoDB engine */ + result = HA_ALTER_INPLACE_COPY_NO_LOCK; + } +#endif + + if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE)) && + result != HA_ALTER_INPLACE_NOT_SUPPORTED && + table->s->null_bytes != altered_table->s->null_bytes) { + + TOKUDB_HANDLER_TRACE("q %s", thd->query()); + TOKUDB_HANDLER_TRACE( + "null bytes %u -> %u", + table->s->null_bytes, + altered_table->s->null_bytes); + } + + // turn a not supported result into an error if the slow alter table + // (copy) is disabled + if (result == HA_ALTER_INPLACE_NOT_SUPPORTED && + tokudb::sysvars::disable_slow_alter(thd)) { + print_error(HA_ERR_UNSUPPORTED, MYF(0)); + result = HA_ALTER_ERROR; + } + + DBUG_RETURN(result); +} + +// Prepare for the alter operations +bool ha_tokudb::prepare_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table), + Alter_inplace_info* ha_alter_info) { + TOKUDB_HANDLER_DBUG_ENTER(""); + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + assert_always(transaction); // transaction must exist after table is locked + ctx->alter_txn = transaction; + bool result = false; // success + DBUG_RETURN(result); +} + +// Execute the alter operations. +bool ha_tokudb::inplace_alter_table( + TABLE* altered_table, + Alter_inplace_info* ha_alter_info) { + + TOKUDB_HANDLER_DBUG_ENTER(""); + + int error = 0; + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + HA_CREATE_INFO* create_info = ha_alter_info->create_info; + + // this should be enough to handle locking as the higher level MDL + // on this table should prevent any new analyze tasks. + share->cancel_background_jobs(); + + if (error == 0 && + (ctx->handler_flags & + (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX | + ALTER_DROP_UNIQUE_INDEX))) { + error = alter_table_drop_index(ha_alter_info); + } + if (error == 0 && + (ctx->handler_flags & + (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX + + ALTER_ADD_UNIQUE_INDEX))) { + error = alter_table_add_index(ha_alter_info); + } + if (error == 0 && + (ctx->handler_flags & + (ALTER_ADD_COLUMN | + ALTER_DROP_COLUMN))) { + error = alter_table_add_or_drop_column(altered_table, ha_alter_info); + } + if (error == 0 && + (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) && + (create_info->used_fields & HA_CREATE_USED_AUTO)) { + error = write_auto_inc_create( + share->status_block, + create_info->auto_increment_value, + ctx->alter_txn); + } + if (error == 0 && + (ctx->handler_flags & ALTER_CHANGE_CREATE_OPTION) && + (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) { + // Get the current compression + DB *db = share->key_file[0]; + error = db->get_compression_method(db, &ctx->orig_compression_method); + assert_always(error == 0); + + // Set the new compression +#if TOKU_INCLUDE_OPTION_STRUCTS + toku_compression_method method = + row_format_to_toku_compression_method( + (tokudb::sysvars::row_format_t)create_info->option_struct->row_format); +#else + toku_compression_method method = + row_type_to_toku_compression_method(create_info->row_type); +#endif + uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); + for (uint32_t i = 0; i < curr_num_DBs; i++) { + db = share->key_file[i]; + error = db->change_compression_method(db, method); + if (error) + break; + ctx->compression_changed = true; + } + } + + // note: only one column expansion is allowed + + if (error == 0 && ctx->expand_fixed_update_needed) + error = alter_table_expand_columns(altered_table, ha_alter_info); + + if (error == 0 && ctx->expand_varchar_update_needed) + error = alter_table_expand_varchar_offsets( + altered_table, + ha_alter_info); + + if (error == 0 && ctx->expand_blob_update_needed) + error = alter_table_expand_blobs(altered_table, ha_alter_info); + + if (error == 0 && ctx->reset_card) { + error = tokudb::alter_card( + share->status_block, + ctx->alter_txn, + table->s, + altered_table->s); + } + if (error == 0 && ctx->optimize_needed) { + error = do_optimize(ha_thd()); + } + + +#if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA +#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \ + (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) +#if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE + if (error == 0 && + (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL)) { +#else + if (error == 0) { +#endif // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE + error = write_frm_data( + share->status_block, + ctx->alter_txn, + altered_table->s->path.str); + } +#endif // (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || + // (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) +#endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA + + bool result = false; // success + if (error) { + print_error(error, MYF(0)); + result = true; // failure + } + + DBUG_RETURN(result); +} + +int ha_tokudb::alter_table_add_index(Alter_inplace_info* ha_alter_info) { + + // sort keys in add index order + KEY* key_info = (KEY*)tokudb::memory::malloc( + sizeof(KEY) * ha_alter_info->index_add_count, + MYF(MY_WME)); + for (uint i = 0; i < ha_alter_info->index_add_count; i++) { + KEY *key = &key_info[i]; + *key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]]; + for (KEY_PART_INFO* key_part = key->key_part; + key_part < key->key_part + key->user_defined_key_parts; + key_part++) { + key_part->field = table->field[key_part->fieldnr]; + } + } + + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + ctx->add_index_changed = true; + int error = tokudb_add_index( + table, + key_info, + ha_alter_info->index_add_count, + ctx->alter_txn, + &ctx->incremented_num_DBs, + &ctx->modified_DBs); + if (error == HA_ERR_FOUND_DUPP_KEY) { + // hack for now, in case of duplicate key error, + // because at the moment we cannot display the right key + // information to the user, so that he knows potentially what went + // wrong. + last_dup_key = MAX_KEY; + } + + tokudb::memory::free(key_info); + + if (error == 0) + ctx->reset_card = true; + + return error; +} + +static bool find_index_of_key( + const char* key_name, + TABLE* table, + uint* index_offset_ptr) { + + for (uint i = 0; i < table->s->keys; i++) { + if (strcmp(key_name, table->key_info[i].name.str) == 0) { + *index_offset_ptr = i; + return true; + } + } + return false; +} + +static bool find_index_of_key( + const char* key_name, + KEY* key_info, + uint key_count, + uint* index_offset_ptr) { + + for (uint i = 0; i < key_count; i++) { + if (strcmp(key_name, key_info[i].name.str) == 0) { + *index_offset_ptr = i; + return true; + } + } + return false; +} + +int ha_tokudb::alter_table_drop_index(Alter_inplace_info* ha_alter_info) { + + KEY *key_info = table->key_info; + // translate key names to indexes into the key_info array + uint index_drop_offsets[ha_alter_info->index_drop_count]; + for (uint i = 0; i < ha_alter_info->index_drop_count; i++) { + bool found; + found = find_index_of_key( + ha_alter_info->index_drop_buffer[i]->name.str, + table, + &index_drop_offsets[i]); + if (!found) { + // undo of add key in partition engine + found = find_index_of_key( + ha_alter_info->index_drop_buffer[i]->name.str, + ha_alter_info->key_info_buffer, + ha_alter_info->key_count, + &index_drop_offsets[i]); + assert_always(found); + key_info = ha_alter_info->key_info_buffer; + } + } + + // drop indexes + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + ctx->drop_index_changed = true; + + int error = drop_indexes(index_drop_offsets, + ha_alter_info->index_drop_count, + key_info, + ctx->alter_txn); + + if (error == 0) + ctx->reset_card = true; + + return error; +} + +int ha_tokudb::alter_table_add_or_drop_column( + TABLE* altered_table, + Alter_inplace_info* ha_alter_info) { + + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + int error; + uchar *column_extra = NULL; + uint32_t max_column_extra_size; + uint32_t num_column_extra; + uint32_t num_columns = 0; + uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); + // set size such that we know it is big enough for both cases + uint32_t columns[table->s->fields + altered_table->s->fields]; + memset(columns, 0, sizeof(columns)); + + // generate the array of columns + if (ha_alter_info->handler_flags & ALTER_DROP_COLUMN) { + find_changed_columns( + columns, + &num_columns, + altered_table, + table); + } else if (ha_alter_info->handler_flags & ALTER_ADD_COLUMN) { + find_changed_columns( + columns, + &num_columns, + table, + altered_table); + } else { + assert_unreachable(); + } + max_column_extra_size = + // max static row_mutator + STATIC_ROW_MUTATOR_SIZE + + // max dynamic row_mutator + 4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength + + // max static blob size + (4 + share->kc_info.num_blobs) + + // max dynamic blob size + (num_columns*(1+4+1+4)); + column_extra = (uchar*)tokudb::memory::malloc( + max_column_extra_size, + MYF(MY_WME)); + if (column_extra == NULL) { + error = ENOMEM; + goto cleanup; + } + + for (uint32_t i = 0; i < curr_num_DBs; i++) { + // change to a new descriptor + DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor); + error = new_row_descriptor( + altered_table, ha_alter_info, i, &row_descriptor); + if (error) + goto cleanup; + error = share->key_file[i]->change_descriptor( + share->key_file[i], + ctx->alter_txn, + &row_descriptor, + 0); + tokudb::memory::free(row_descriptor.data); + if (error) + goto cleanup; + + if (i == primary_key || key_is_clustering(&table_share->key_info[i])) { + num_column_extra = fill_row_mutator( + column_extra, + columns, + num_columns, + altered_table, + ctx->altered_table_kc_info, + i, + // true if adding columns, otherwise is a drop + (ha_alter_info->handler_flags & + ALTER_ADD_COLUMN) != 0); + + DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt); + column_dbt.data = column_extra; + column_dbt.size = num_column_extra; + DBUG_ASSERT(num_column_extra <= max_column_extra_size); + error = share->key_file[i]->update_broadcast( + share->key_file[i], + ctx->alter_txn, + &column_dbt, + DB_IS_RESETTING_OP); + if (error) { + goto cleanup; + } + } + } + + error = 0; + cleanup: + tokudb::memory::free(column_extra); + return error; +} + +// Commit or abort the alter operations. +// If commit then write the new frm data to the status using the alter +// transaction. +// If abort then abort the alter transaction and try to rollback the +// non-transactional changes. +bool ha_tokudb::commit_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table), + Alter_inplace_info* ha_alter_info, + bool commit) { + TOKUDB_HANDLER_DBUG_ENTER(""); + + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + bool result = false; // success + THD *thd = ha_thd(); + + if (commit) { +#if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \ + (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \ + (100000 <= MYSQL_VERSION_ID) + if (ha_alter_info->group_commit_ctx) { + ha_alter_info->group_commit_ctx = NULL; + } +#endif +#if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA +#if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || \ + (100000 <= MYSQL_VERSION_ID) +#if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE + if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) { +#else + if (true) { +#endif // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE + int error = write_frm_data( + share->status_block, + ctx->alter_txn, + altered_table->s->path.str); + if (error) { + commit = false; + result = true; + print_error(error, MYF(0)); + } + } +#endif // (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || + // (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100099) +#endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA + } + + if (!commit) { + if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE && + (ctx->add_index_changed || ctx->drop_index_changed || + ctx->compression_changed)) { + + // get exclusive lock no matter what +#if defined(MARIADB_BASE_VERSION) + killed_state saved_killed_state = thd->killed; + thd->killed = NOT_KILLED; + for (volatile uint i = 0; + wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED); + i++) { + if (thd->killed != NOT_KILLED) + thd->killed = NOT_KILLED; + sleep(1); + } + assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE); + if (thd->killed == NOT_KILLED) + thd->killed = saved_killed_state; +#else + THD::killed_state saved_killed_state = thd->killed; + thd->killed = THD::NOT_KILLED; + // MySQL does not handle HA_EXTRA_NOT_USED so we use + // HA_EXTRA_PREPARE_FOR_RENAME since it is passed through + // the partition storage engine and is treated as a NOP by tokudb + for (volatile uint i = 0; + wait_while_table_is_used( + thd, + table, + HA_EXTRA_PREPARE_FOR_RENAME); + i++) { + if (thd->killed != THD::NOT_KILLED) + thd->killed = THD::NOT_KILLED; + sleep(1); + } + assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE); + if (thd->killed == THD::NOT_KILLED) + thd->killed = saved_killed_state; +#endif + } + + // abort the alter transaction NOW so that any alters are rolled back. + // this allows the following restores to work. + tokudb_trx_data* trx = + (tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton); + assert_always(ctx->alter_txn == trx->stmt); + assert_always(trx->tokudb_lock_count > 0); + // for partitioned tables, we use a single transaction to do all of the + // partition changes. the tokudb_lock_count is a reference count for + // each of the handlers to the same transaction. obviously, we want + // to only abort once. + if (trx->tokudb_lock_count > 0) { + if (--trx->tokudb_lock_count <= trx->create_lock_count) { + trx->create_lock_count = 0; + abort_txn(ctx->alter_txn); + ctx->alter_txn = NULL; + trx->stmt = NULL; + trx->sub_sp_level = NULL; + } + transaction = NULL; + } + + if (ctx->add_index_changed) { + restore_add_index( + table, + ha_alter_info->index_add_count, + ctx->incremented_num_DBs, + ctx->modified_DBs); + } + if (ctx->drop_index_changed) { + // translate key names to indexes into the key_info array + uint index_drop_offsets[ha_alter_info->index_drop_count]; + for (uint i = 0; i < ha_alter_info->index_drop_count; i++) { + bool found = find_index_of_key( + ha_alter_info->index_drop_buffer[i]->name.str, + table, + &index_drop_offsets[i]); + assert_always(found); + } + restore_drop_indexes(index_drop_offsets, + ha_alter_info->index_drop_count); + } + if (ctx->compression_changed) { + uint32_t curr_num_DBs = + table->s->keys + tokudb_test(hidden_primary_key); + for (uint32_t i = 0; i < curr_num_DBs; i++) { + DB *db = share->key_file[i]; + int error = db->change_compression_method( + db, + ctx->orig_compression_method); + assert_always(error == 0); + } + } + } + DBUG_RETURN(result); +} + +// Setup the altered table's key and col info. +int ha_tokudb::setup_kc_info( + TABLE* altered_table, + KEY_AND_COL_INFO* altered_kc_info) { + + int error = allocate_key_and_col_info(altered_table->s, altered_kc_info); + if (error == 0) + error = initialize_key_and_col_info( + altered_table->s, + altered_table, + altered_kc_info, + hidden_primary_key, + primary_key); + return error; +} + +// Expand the variable length fields offsets from 1 to 2 bytes. +int ha_tokudb::alter_table_expand_varchar_offsets( + TABLE* altered_table, + Alter_inplace_info* ha_alter_info) { + + int error = 0; + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + + uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); + for (uint32_t i = 0; i < curr_num_DBs; i++) { + // change to a new descriptor + DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor); + error = new_row_descriptor( + altered_table, ha_alter_info, i, &row_descriptor); + if (error) + break; + error = share->key_file[i]->change_descriptor( + share->key_file[i], + ctx->alter_txn, + &row_descriptor, + 0); + tokudb::memory::free(row_descriptor.data); + if (error) + break; + + // for all trees that have values, make an update variable offsets + // message and broadcast it into the tree + if (i == primary_key || key_is_clustering(&table_share->key_info[i])) { + uint32_t offset_start = + table_share->null_bytes + + share->kc_info.mcp_info[i].fixed_field_size; + uint32_t offset_end = + offset_start + + share->kc_info.mcp_info[i].len_of_offsets; + uint32_t number_of_offsets = offset_end - offset_start; + + // make the expand variable offsets message + DBT expand; memset(&expand, 0, sizeof expand); + expand.size = + sizeof(uchar) + sizeof(offset_start) + sizeof(offset_end); + expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME)); + if (!expand.data) { + error = ENOMEM; + break; + } + uchar* expand_ptr = (uchar*)expand.data; + expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS; + expand_ptr += sizeof(uchar); + + memcpy(expand_ptr, &number_of_offsets, sizeof(number_of_offsets)); + expand_ptr += sizeof(number_of_offsets); + + memcpy(expand_ptr, &offset_start, sizeof(offset_start)); + expand_ptr += sizeof(offset_start); + + // and broadcast it into the tree + error = share->key_file[i]->update_broadcast( + share->key_file[i], + ctx->alter_txn, + &expand, + DB_IS_RESETTING_OP); + tokudb::memory::free(expand.data); + if (error) + break; + } + } + + return error; +} + +// Return true if a field is part of a key +static bool field_in_key(KEY *key, Field *field) { + for (uint i = 0; i < key->user_defined_key_parts; i++) { + KEY_PART_INFO *key_part = &key->key_part[i]; + if (strcmp(key_part->field->field_name.str, field->field_name.str) == 0) + return true; + } + return false; +} + +// Return true if a field is part of any key +static bool field_in_key_of_table(TABLE *table, Field *field) { + for (uint i = 0; i < table->s->keys; i++) { + if (field_in_key(&table->key_info[i], field)) + return true; + } + return false; +} + +// Return true if all changed varchar/varbinary field lengths can be changed +// inplace, otherwise return false +static bool change_varchar_length_is_supported(Field* old_field, + Field* new_field, + tokudb_alter_ctx* ctx) { + if (old_field->real_type() != MYSQL_TYPE_VARCHAR || + new_field->real_type() != MYSQL_TYPE_VARCHAR || + old_field->binary() != new_field->binary() || + old_field->charset()->number != new_field->charset()->number || + old_field->field_length > new_field->field_length) + return false; + if (ctx->table_kc_info->num_offset_bytes > + ctx->altered_table_kc_info->num_offset_bytes) + return false; // shrink is not supported + if (ctx->table_kc_info->num_offset_bytes < + ctx->altered_table_kc_info->num_offset_bytes) + // sum of varchar lengths changed from 1 to 2 + ctx->expand_varchar_update_needed = true; + return true; +} + +bool ha_tokudb::can_convert_varstring(const Field_varstring* field, + const Column_definition& new_type) const { + if (new_type.length < field->field_length || + new_type.char_length < field->char_length() || + !new_type.compression_method() != !field->compression_method() || + new_type.type_handler() != field->type_handler()) { + return false; + } + + return true; +} + +// Return true if all changed field lengths can be changed inplace, otherwise +// return false +static bool change_length_is_supported(TABLE* table, + TABLE* altered_table, + tokudb_alter_ctx* ctx) { + if (table->s->fields != altered_table->s->fields) + return false; + if (table->s->null_bytes != altered_table->s->null_bytes) + return false; + if (ctx->changed_fields.elements() > 1) + return false; // only support one field change + for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0; + ai < ctx->changed_fields.elements(); + ai++) { + uint i = ctx->changed_fields.at(ai); + Field *old_field = table->field[i]; + Field *new_field = altered_table->field[i]; + if (old_field->real_type() != new_field->real_type()) + return false; // no type conversions + if (old_field->real_type() != MYSQL_TYPE_VARCHAR) + return false; // only varchar + if (field_in_key_of_table(table, old_field) || + field_in_key_of_table(altered_table, new_field)) + return false; // not in any key + if (!change_varchar_length_is_supported(old_field, new_field, ctx)) + return false; + } + + return true; +} + +// Debug function that ensures that the array is sorted +static bool is_sorted(Dynamic_array<uint> &a) { + bool r = true; + if (a.elements() > 0) { + uint lastelement = a.at(0); + for (DYNAMIC_ARRAY_ELEMENTS_TYPE i = 1; i < a.elements(); i++) + if (lastelement > a.at(i)) + r = false; + } + return r; +} + +int ha_tokudb::alter_table_expand_columns( + TABLE* altered_table, + Alter_inplace_info* ha_alter_info) { + + int error = 0; + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + // since we build the changed_fields array in field order, it must be sorted + assert_always(is_sorted(ctx->changed_fields)); + for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0; + error == 0 && ai < ctx->changed_fields.elements(); + ai++) { + uint expand_field_num = ctx->changed_fields.at(ai); + error = alter_table_expand_one_column( + altered_table, + ha_alter_info, + expand_field_num); + } + + return error; +} + +// Return true if the field is an unsigned int +static bool is_unsigned(Field *f) { + return (f->flags & UNSIGNED_FLAG) != 0; +} + +// Return the starting offset in the value for a particular index (selected by +// idx) of a particular field (selected by expand_field_num) +// TODO: replace this? +static uint32_t alter_table_field_offset( + uint32_t null_bytes, + KEY_AND_COL_INFO* kc_info, + int idx, + int expand_field_num) { + + uint32_t offset = null_bytes; + for (int i = 0; i < expand_field_num; i++) { + if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields + continue; + offset += kc_info->field_lengths[i]; + } + return offset; +} + +// Send an expand message into all clustered indexes including the primary +int ha_tokudb::alter_table_expand_one_column( + TABLE* altered_table, + Alter_inplace_info* ha_alter_info, + int expand_field_num) { + + int error = 0; + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + + Field *old_field = table->field[expand_field_num]; + TOKU_TYPE old_field_type = mysql_to_toku_type(old_field); + Field *new_field = altered_table->field[expand_field_num]; + TOKU_TYPE new_field_type = mysql_to_toku_type(new_field); + assert_always(old_field_type == new_field_type); + + uchar operation; + uchar pad_char; + switch (old_field_type) { + case toku_type_int: + assert_always(is_unsigned(old_field) == is_unsigned(new_field)); + if (is_unsigned(old_field)) + operation = UPDATE_OP_EXPAND_UINT; + else + operation = UPDATE_OP_EXPAND_INT; + pad_char = 0; + break; + case toku_type_fixstring: + operation = UPDATE_OP_EXPAND_CHAR; + pad_char = old_field->charset()->pad_char; + break; + case toku_type_fixbinary: + operation = UPDATE_OP_EXPAND_BINARY; + pad_char = 0; + break; + default: + assert_unreachable(); + } + + uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); + for (uint32_t i = 0; i < curr_num_DBs; i++) { + // change to a new descriptor + DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor); + error = new_row_descriptor( + altered_table, ha_alter_info, i, &row_descriptor); + if (error) + break; + error = share->key_file[i]->change_descriptor( + share->key_file[i], + ctx->alter_txn, + &row_descriptor, + 0); + tokudb::memory::free(row_descriptor.data); + if (error) + break; + + // for all trees that have values, make an expand update message and + // broadcast it into the tree + if (i == primary_key || key_is_clustering(&table_share->key_info[i])) { + uint32_t old_offset = alter_table_field_offset( + table_share->null_bytes, + ctx->table_kc_info, + i, + expand_field_num); + uint32_t new_offset = alter_table_field_offset( + table_share->null_bytes, + ctx->altered_table_kc_info, + i, + expand_field_num); + assert_always(old_offset <= new_offset); + + uint32_t old_length = + ctx->table_kc_info->field_lengths[expand_field_num]; + assert_always(old_length == old_field->pack_length()); + + uint32_t new_length = + ctx->altered_table_kc_info->field_lengths[expand_field_num]; + assert_always(new_length == new_field->pack_length()); + + DBT expand; memset(&expand, 0, sizeof(expand)); + expand.size = + sizeof(operation) + sizeof(new_offset) + + sizeof(old_length) + sizeof(new_length); + if (operation == UPDATE_OP_EXPAND_CHAR || + operation == UPDATE_OP_EXPAND_BINARY) + expand.size += sizeof(pad_char); + expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME)); + if (!expand.data) { + error = ENOMEM; + break; + } + uchar *expand_ptr = (uchar *)expand.data; + expand_ptr[0] = operation; + expand_ptr += sizeof operation; + + // for the first altered field, old_offset == new_offset. + // for the subsequent altered fields, the new_offset + // should be used as it includes the length changes from the + // previous altered fields. + memcpy(expand_ptr, &new_offset, sizeof(new_offset)); + expand_ptr += sizeof(new_offset); + + memcpy(expand_ptr, &old_length, sizeof(old_length)); + expand_ptr += sizeof(old_length); + + memcpy(expand_ptr, &new_length, sizeof(new_length)); + expand_ptr += sizeof(new_length); + + if (operation == UPDATE_OP_EXPAND_CHAR || + operation == UPDATE_OP_EXPAND_BINARY) { + memcpy(expand_ptr, &pad_char, sizeof(pad_char)); + expand_ptr += sizeof(pad_char); + } + + assert_always(expand_ptr == (uchar*)expand.data + expand.size); + + // and broadcast it into the tree + error = share->key_file[i]->update_broadcast( + share->key_file[i], + ctx->alter_txn, + &expand, + DB_IS_RESETTING_OP); + tokudb::memory::free(expand.data); + if (error) + break; + } + } + + return error; +} + +static void marshall_blob_lengths( + tokudb::buffer& b, + uint32_t n, + TABLE* table, + KEY_AND_COL_INFO* kc_info) { + + for (uint i = 0; i < n; i++) { + uint blob_field_index = kc_info->blob_fields[i]; + assert_always(blob_field_index < table->s->fields); + uint8_t blob_field_length = + table->s->field[blob_field_index]->row_pack_length(); + b.append(&blob_field_length, sizeof blob_field_length); + } +} + +int ha_tokudb::alter_table_expand_blobs( + TABLE* altered_table, + Alter_inplace_info* ha_alter_info) { + + int error = 0; + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + + uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key); + for (uint32_t i = 0; i < curr_num_DBs; i++) { + // change to a new descriptor + DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor); + error = new_row_descriptor( + altered_table, ha_alter_info, i, &row_descriptor); + if (error) + break; + error = share->key_file[i]->change_descriptor( + share->key_file[i], + ctx->alter_txn, + &row_descriptor, + 0); + tokudb::memory::free(row_descriptor.data); + if (error) + break; + + // for all trees that have values, make an update blobs message and + // broadcast it into the tree + if (i == primary_key || key_is_clustering(&table_share->key_info[i])) { + tokudb::buffer b; + uint8_t op = UPDATE_OP_EXPAND_BLOB; + b.append(&op, sizeof op); + b.append_ui<uint32_t>( + table->s->null_bytes + + ctx->table_kc_info->mcp_info[i].fixed_field_size); + uint32_t var_offset_bytes = + ctx->table_kc_info->mcp_info[i].len_of_offsets; + b.append_ui<uint32_t>(var_offset_bytes); + b.append_ui<uint32_t>( + var_offset_bytes == 0 ? 0 : + ctx->table_kc_info->num_offset_bytes); + + // add blobs info + uint32_t num_blobs = ctx->table_kc_info->num_blobs; + b.append_ui<uint32_t>(num_blobs); + marshall_blob_lengths(b, num_blobs, table, ctx->table_kc_info); + marshall_blob_lengths( + b, + num_blobs, + altered_table, + ctx->altered_table_kc_info); + + // and broadcast it into the tree + DBT expand; memset(&expand, 0, sizeof expand); + expand.data = b.data(); + expand.size = b.size(); + error = share->key_file[i]->update_broadcast( + share->key_file[i], + ctx->alter_txn, + &expand, + DB_IS_RESETTING_OP); + if (error) + break; + } + } + + return error; +} + +// Return true if two fixed length fields can be changed inplace +static bool change_fixed_length_is_supported(Field* old_field, + Field* new_field, + tokudb_alter_ctx* ctx) { + // no change in size is supported + if (old_field->pack_length() == new_field->pack_length()) + return true; + // shrink is not supported + if (old_field->pack_length() > new_field->pack_length()) + return false; + ctx->expand_fixed_update_needed = true; + return true; +} + +static bool change_blob_length_is_supported(Field* old_field, + Field* new_field, + tokudb_alter_ctx* ctx) { + // blob -> longer or equal length blob + if (old_field->binary() && new_field->binary() && + old_field->pack_length() <= new_field->pack_length()) { + ctx->expand_blob_update_needed = true; + return true; + } + // text -> longer or equal length text + if (!old_field->binary() && !new_field->binary() && + old_field->pack_length() <= new_field->pack_length() && + old_field->charset()->number == new_field->charset()->number) { + ctx->expand_blob_update_needed = true; + return true; + } + return false; +} + +// Return true if the MySQL type is an int or unsigned int type +static bool is_int_type(enum_field_types t) { + switch (t) { + case MYSQL_TYPE_TINY: + case MYSQL_TYPE_SHORT: + case MYSQL_TYPE_INT24: + case MYSQL_TYPE_LONG: + case MYSQL_TYPE_LONGLONG: + return true; + default: + return false; + } +} + +// Return true if two field types can be changed inplace +static bool change_field_type_is_supported(Field* old_field, + Field* new_field, + tokudb_alter_ctx* ctx) { + enum_field_types old_type = old_field->real_type(); + enum_field_types new_type = new_field->real_type(); + if (is_int_type(old_type)) { + // int and unsigned int expansion + if (is_int_type(new_type) && + is_unsigned(old_field) == is_unsigned(new_field)) + return change_fixed_length_is_supported(old_field, new_field, ctx); + else + return false; + } else if (old_type == MYSQL_TYPE_STRING) { + // char(X) -> char(Y) and binary(X) -> binary(Y) expansion + if (new_type == MYSQL_TYPE_STRING && + old_field->binary() == new_field->binary() && + old_field->charset()->number == new_field->charset()->number) + return change_fixed_length_is_supported(old_field, new_field, ctx); + else + return false; + } else if (old_type == MYSQL_TYPE_VARCHAR) { + // varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion + // where X < 256 <= Y the ALTER_STORED_COLUMN_TYPE handler flag is set for + // these cases + return change_varchar_length_is_supported(old_field, new_field, ctx); + } else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) { + return change_blob_length_is_supported(old_field, new_field, ctx); + } else + return false; +} + +// Return true if all changed field types can be changed inplace +static bool change_type_is_supported(TABLE* table, + TABLE* altered_table, + tokudb_alter_ctx* ctx) { + if (table->s->null_bytes != altered_table->s->null_bytes) + return false; + if (table->s->fields != altered_table->s->fields) + return false; + if (ctx->changed_fields.elements() > 1) + return false; // only support one field change + for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0; + ai < ctx->changed_fields.elements(); + ai++) { + uint i = ctx->changed_fields.at(ai); + Field *old_field = table->field[i]; + Field *new_field = altered_table->field[i]; + if (field_in_key_of_table(table, old_field) || + field_in_key_of_table(altered_table, new_field)) + return false; + if (!change_field_type_is_supported(old_field, new_field, ctx)) + return false; + } + return true; +} + +// Allocate and initialize a new descriptor for a dictionary in the altered +// table identified with idx. +// Return the new descriptor in the row_descriptor DBT. +// Return non-zero on error. +int ha_tokudb::new_row_descriptor(TABLE* altered_table, + Alter_inplace_info* ha_alter_info, + uint32_t idx, + DBT* row_descriptor) { + int error = 0; + tokudb_alter_ctx* ctx = + static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx); + row_descriptor->size = + get_max_desc_size(ctx->altered_table_kc_info, altered_table); + row_descriptor->data = + (uchar*)tokudb::memory::malloc(row_descriptor->size, MYF(MY_WME)); + if (row_descriptor->data == NULL) { + error = ENOMEM; + } else { + KEY* prim_key = + hidden_primary_key ? NULL : + &altered_table->key_info[primary_key]; + if (idx == primary_key) { + row_descriptor->size = create_main_key_descriptor( + (uchar*)row_descriptor->data, + prim_key, + hidden_primary_key, + primary_key, + altered_table, + ctx->altered_table_kc_info); + } else { + row_descriptor->size = create_secondary_key_descriptor( + (uchar*)row_descriptor->data, + &altered_table->key_info[idx], + prim_key, + hidden_primary_key, + altered_table, + primary_key, + idx, + ctx->altered_table_kc_info); + } + error = 0; + } + return error; +} + +#endif |