summaryrefslogtreecommitdiffstats
path: root/storage/innobase/row/row0mysql.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
commita175314c3e5827eb193872241446f2f8f5c9d33c (patch)
treecd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/innobase/row/row0mysql.cc
parentInitial commit. (diff)
downloadmariadb-10.5-9e4947182e0b875da38088fdd168e775f473b8ad.tar.xz
mariadb-10.5-9e4947182e0b875da38088fdd168e775f473b8ad.zip
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/row/row0mysql.cc')
-rw-r--r--storage/innobase/row/row0mysql.cc4902
1 files changed, 4902 insertions, 0 deletions
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
new file mode 100644
index 00000000..6998a573
--- /dev/null
+++ b/storage/innobase/row/row0mysql.cc
@@ -0,0 +1,4902 @@
+/*****************************************************************************
+
+Copyright (c) 2000, 2018, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2015, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0mysql.cc
+Interface between Innobase row operations and MySQL.
+Contains also create table and other data dictionary operations.
+
+Created 9/17/2000 Heikki Tuuri
+*******************************************************/
+
+#include "univ.i"
+#include <debug_sync.h>
+#include <gstream.h>
+#include <spatial.h>
+
+#include "row0mysql.h"
+#include "btr0sea.h"
+#include "dict0boot.h"
+#include "dict0crea.h"
+#include "dict0dict.h"
+#include "dict0load.h"
+#include "dict0priv.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+#include "dict0defrag_bg.h"
+#include "btr0defragment.h"
+#include "fil0fil.h"
+#include "fil0crypt.h"
+#include "fsp0file.h"
+#include "fts0fts.h"
+#include "fts0types.h"
+#include "ibuf0ibuf.h"
+#include "lock0lock.h"
+#include "log0log.h"
+#include "pars0pars.h"
+#include "que0que.h"
+#include "rem0cmp.h"
+#include "row0import.h"
+#include "row0ins.h"
+#include "row0row.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "trx0purge.h"
+#include "trx0rec.h"
+#include "trx0roll.h"
+#include "trx0undo.h"
+#include "srv0start.h"
+#include "row0ext.h"
+#include "srv0start.h"
+
+#include <algorithm>
+#include <deque>
+#include <vector>
+
+#ifdef WITH_WSREP
+#include "mysql/service_wsrep.h"
+#include "wsrep.h"
+#include "log.h"
+#include "wsrep_mysqld.h"
+#endif
+
+/** Provide optional 4.x backwards compatibility for 5.0 and above */
+ibool row_rollback_on_timeout = FALSE;
+
+/** Chain node of the list of tables to drop in the background. */
+struct row_mysql_drop_t{
+ table_id_t table_id; /*!< table id */
+ UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
+ /*!< list chain node */
+};
+
+/** @brief List of tables we should drop in background.
+
+ALTER TABLE in MySQL requires that the table handler can drop the
+table in background when there are no queries to it any
+more. Protected by row_drop_list_mutex. */
+static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
+
+/** Mutex protecting the background table drop list. */
+static ib_mutex_t row_drop_list_mutex;
+
+/** Flag: has row_mysql_drop_list been initialized? */
+static bool row_mysql_drop_list_inited;
+
+/*******************************************************************//**
+Determine if the given name is a name reserved for MySQL system tables.
+@return TRUE if name is a MySQL system table name */
+static
+ibool
+row_mysql_is_system_table(
+/*======================*/
+ const char* name)
+{
+ if (strncmp(name, "mysql/", 6) != 0) {
+
+ return(FALSE);
+ }
+
+ return(0 == strcmp(name + 6, "host")
+ || 0 == strcmp(name + 6, "user")
+ || 0 == strcmp(name + 6, "db"));
+}
+
+#ifdef UNIV_DEBUG
+/** Wait for the background drop list to become empty. */
+void
+row_wait_for_background_drop_list_empty()
+{
+ bool empty = false;
+ while (!empty) {
+ mutex_enter(&row_drop_list_mutex);
+ empty = (UT_LIST_GET_LEN(row_mysql_drop_list) == 0);
+ mutex_exit(&row_drop_list_mutex);
+ os_thread_sleep(100000);
+ }
+}
+#endif /* UNIV_DEBUG */
+
+/*******************************************************************//**
+Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
+static
+void
+row_mysql_delay_if_needed(void)
+/*===========================*/
+{
+ if (srv_dml_needed_delay) {
+ os_thread_sleep(srv_dml_needed_delay);
+ }
+}
+
+/*******************************************************************//**
+Frees the blob heap in prebuilt when no longer needed. */
+void
+row_mysql_prebuilt_free_blob_heap(
+/*==============================*/
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct of a
+ ha_innobase:: table handle */
+{
+ DBUG_ENTER("row_mysql_prebuilt_free_blob_heap");
+
+ DBUG_PRINT("row_mysql_prebuilt_free_blob_heap",
+ ("blob_heap freeing: %p", prebuilt->blob_heap));
+
+ mem_heap_free(prebuilt->blob_heap);
+ prebuilt->blob_heap = NULL;
+ DBUG_VOID_RETURN;
+}
+
+/*******************************************************************//**
+Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
+format.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+byte*
+row_mysql_store_true_var_len(
+/*=========================*/
+ byte* dest, /*!< in: where to store */
+ ulint len, /*!< in: length, must fit in two bytes */
+ ulint lenlen) /*!< in: storage length of len: either 1 or 2 bytes */
+{
+ if (lenlen == 2) {
+ ut_a(len < 256 * 256);
+
+ mach_write_to_2_little_endian(dest, len);
+
+ return(dest + 2);
+ }
+
+ ut_a(lenlen == 1);
+ ut_a(len < 256);
+
+ mach_write_to_1(dest, len);
+
+ return(dest + 1);
+}
+
+/*******************************************************************//**
+Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
+returns a pointer to the data.
+@return pointer to the data, we skip the 1 or 2 bytes at the start
+that are used to store the len */
+const byte*
+row_mysql_read_true_varchar(
+/*========================*/
+ ulint* len, /*!< out: variable-length field length */
+ const byte* field, /*!< in: field in the MySQL format */
+ ulint lenlen) /*!< in: storage length of len: either 1
+ or 2 bytes */
+{
+ if (lenlen == 2) {
+ *len = mach_read_from_2_little_endian(field);
+
+ return(field + 2);
+ }
+
+ ut_a(lenlen == 1);
+
+ *len = mach_read_from_1(field);
+
+ return(field + 1);
+}
+
+/*******************************************************************//**
+Stores a reference to a BLOB in the MySQL format. */
+void
+row_mysql_store_blob_ref(
+/*=====================*/
+ byte* dest, /*!< in: where to store */
+ ulint col_len,/*!< in: dest buffer size: determines into
+ how many bytes the BLOB length is stored,
+ the space for the length may vary from 1
+ to 4 bytes */
+ const void* data, /*!< in: BLOB data; if the value to store
+ is SQL NULL this should be NULL pointer */
+ ulint len) /*!< in: BLOB length; if the value to store
+ is SQL NULL this should be 0; remember
+ also to set the NULL bit in the MySQL record
+ header! */
+{
+ /* MySQL might assume the field is set to zero except the length and
+ the pointer fields */
+
+ memset(dest, '\0', col_len);
+
+ /* In dest there are 1 - 4 bytes reserved for the BLOB length,
+ and after that 8 bytes reserved for the pointer to the data.
+ In 32-bit architectures we only use the first 4 bytes of the pointer
+ slot. */
+
+ ut_a(col_len - 8 > 1 || len < 256);
+ ut_a(col_len - 8 > 2 || len < 256 * 256);
+ ut_a(col_len - 8 > 3 || len < 256 * 256 * 256);
+
+ mach_write_to_n_little_endian(dest, col_len - 8, len);
+
+ memcpy(dest + col_len - 8, &data, sizeof data);
+}
+
+/*******************************************************************//**
+Reads a reference to a BLOB in the MySQL format.
+@return pointer to BLOB data */
+const byte*
+row_mysql_read_blob_ref(
+/*====================*/
+ ulint* len, /*!< out: BLOB length */
+ const byte* ref, /*!< in: BLOB reference in the
+ MySQL format */
+ ulint col_len) /*!< in: BLOB reference length
+ (not BLOB length) */
+{
+ byte* data;
+
+ *len = mach_read_from_n_little_endian(ref, col_len - 8);
+
+ memcpy(&data, ref + col_len - 8, sizeof data);
+
+ return(data);
+}
+
+/*******************************************************************//**
+Converting InnoDB geometry data format to MySQL data format. */
+void
+row_mysql_store_geometry(
+/*=====================*/
+ byte* dest, /*!< in/out: where to store */
+ ulint dest_len, /*!< in: dest buffer size: determines
+ into how many bytes the GEOMETRY length
+ is stored, the space for the length
+ may vary from 1 to 4 bytes */
+ const byte* src, /*!< in: GEOMETRY data; if the value to
+ store is SQL NULL this should be NULL
+ pointer */
+ ulint src_len) /*!< in: GEOMETRY length; if the value
+ to store is SQL NULL this should be 0;
+ remember also to set the NULL bit in
+ the MySQL record header! */
+{
+ /* MySQL might assume the field is set to zero except the length and
+ the pointer fields */
+ MEM_CHECK_DEFINED(src, src_len);
+
+ memset(dest, '\0', dest_len);
+
+ /* In dest there are 1 - 4 bytes reserved for the BLOB length,
+ and after that 8 bytes reserved for the pointer to the data.
+ In 32-bit architectures we only use the first 4 bytes of the pointer
+ slot. */
+
+ ut_ad(dest_len - 8 > 1 || src_len < 1<<8);
+ ut_ad(dest_len - 8 > 2 || src_len < 1<<16);
+ ut_ad(dest_len - 8 > 3 || src_len < 1<<24);
+
+ mach_write_to_n_little_endian(dest, dest_len - 8, src_len);
+
+ memcpy(dest + dest_len - 8, &src, sizeof src);
+}
+
+/*******************************************************************//**
+Read geometry data in the MySQL format.
+@return pointer to geometry data */
+static
+const byte*
+row_mysql_read_geometry(
+/*====================*/
+ ulint* len, /*!< out: data length */
+ const byte* ref, /*!< in: geometry data in the
+ MySQL format */
+ ulint col_len) /*!< in: MySQL format length */
+{
+ byte* data;
+ ut_ad(col_len > 8);
+
+ *len = mach_read_from_n_little_endian(ref, col_len - 8);
+
+ memcpy(&data, ref + col_len - 8, sizeof data);
+
+ return(data);
+}
+
+/**************************************************************//**
+Pad a column with spaces. */
+void
+row_mysql_pad_col(
+/*==============*/
+ ulint mbminlen, /*!< in: minimum size of a character,
+ in bytes */
+ byte* pad, /*!< out: padded buffer */
+ ulint len) /*!< in: number of bytes to pad */
+{
+ const byte* pad_end;
+
+ switch (UNIV_EXPECT(mbminlen, 1)) {
+ default:
+ ut_error;
+ case 1:
+ /* space=0x20 */
+ memset(pad, 0x20, len);
+ break;
+ case 2:
+ /* space=0x0020 */
+ pad_end = pad + len;
+ ut_a(!(len % 2));
+ while (pad < pad_end) {
+ *pad++ = 0x00;
+ *pad++ = 0x20;
+ };
+ break;
+ case 4:
+ /* space=0x00000020 */
+ pad_end = pad + len;
+ ut_a(!(len % 4));
+ while (pad < pad_end) {
+ *pad++ = 0x00;
+ *pad++ = 0x00;
+ *pad++ = 0x00;
+ *pad++ = 0x20;
+ }
+ break;
+ }
+}
+
+/**************************************************************//**
+Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
+The counterpart of this function is row_sel_field_store_in_mysql_format() in
+row0sel.cc.
+@return up to which byte we used buf in the conversion */
+byte*
+row_mysql_store_col_in_innobase_format(
+/*===================================*/
+ dfield_t* dfield, /*!< in/out: dfield where dtype
+ information must be already set when
+ this function is called! */
+ byte* buf, /*!< in/out: buffer for a converted
+ integer value; this must be at least
+ col_len long then! NOTE that dfield
+ may also get a pointer to 'buf',
+ therefore do not discard this as long
+ as dfield is used! */
+ ibool row_format_col, /*!< TRUE if the mysql_data is from
+ a MySQL row, FALSE if from a MySQL
+ key value;
+ in MySQL, a true VARCHAR storage
+ format differs in a row and in a
+ key value: in a key value the length
+ is always stored in 2 bytes! */
+ const byte* mysql_data, /*!< in: MySQL column value, not
+ SQL NULL; NOTE that dfield may also
+ get a pointer to mysql_data,
+ therefore do not discard this as long
+ as dfield is used! */
+ ulint col_len, /*!< in: MySQL column length; NOTE that
+ this is the storage length of the
+ column in the MySQL format row, not
+ necessarily the length of the actual
+ payload data; if the column is a true
+ VARCHAR then this is irrelevant */
+ ulint comp) /*!< in: nonzero=compact format */
+{
+ const byte* ptr = mysql_data;
+ const dtype_t* dtype;
+ ulint type;
+ ulint lenlen;
+
+ dtype = dfield_get_type(dfield);
+
+ type = dtype->mtype;
+
+ if (type == DATA_INT) {
+ /* Store integer data in Innobase in a big-endian format,
+ sign bit negated if the data is a signed integer. In MySQL,
+ integers are stored in a little-endian format. */
+
+ byte* p = buf + col_len;
+
+ for (;;) {
+ p--;
+ *p = *mysql_data;
+ if (p == buf) {
+ break;
+ }
+ mysql_data++;
+ }
+
+ if (!(dtype->prtype & DATA_UNSIGNED)) {
+
+ *buf ^= 128;
+ }
+
+ ptr = buf;
+ buf += col_len;
+ } else if ((type == DATA_VARCHAR
+ || type == DATA_VARMYSQL
+ || type == DATA_BINARY)) {
+
+ if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) {
+ /* The length of the actual data is stored to 1 or 2
+ bytes at the start of the field */
+
+ if (row_format_col) {
+ if (dtype->prtype & DATA_LONG_TRUE_VARCHAR) {
+ lenlen = 2;
+ } else {
+ lenlen = 1;
+ }
+ } else {
+ /* In a MySQL key value, lenlen is always 2 */
+ lenlen = 2;
+ }
+
+ ptr = row_mysql_read_true_varchar(&col_len, mysql_data,
+ lenlen);
+ } else {
+ /* Remove trailing spaces from old style VARCHAR
+ columns. */
+
+ /* Handle Unicode strings differently. */
+ ulint mbminlen = dtype_get_mbminlen(dtype);
+
+ ptr = mysql_data;
+
+ switch (mbminlen) {
+ default:
+ ut_error;
+ case 4:
+ /* space=0x00000020 */
+ /* Trim "half-chars", just in case. */
+ col_len &= ~3U;
+
+ while (col_len >= 4
+ && ptr[col_len - 4] == 0x00
+ && ptr[col_len - 3] == 0x00
+ && ptr[col_len - 2] == 0x00
+ && ptr[col_len - 1] == 0x20) {
+ col_len -= 4;
+ }
+ break;
+ case 2:
+ /* space=0x0020 */
+ /* Trim "half-chars", just in case. */
+ col_len &= ~1U;
+
+ while (col_len >= 2 && ptr[col_len - 2] == 0x00
+ && ptr[col_len - 1] == 0x20) {
+ col_len -= 2;
+ }
+ break;
+ case 1:
+ /* space=0x20 */
+ while (col_len > 0
+ && ptr[col_len - 1] == 0x20) {
+ col_len--;
+ }
+ }
+ }
+ } else if (comp && type == DATA_MYSQL
+ && dtype_get_mbminlen(dtype) == 1
+ && dtype_get_mbmaxlen(dtype) > 1) {
+ /* In some cases we strip trailing spaces from UTF-8 and other
+ multibyte charsets, from FIXED-length CHAR columns, to save
+ space. UTF-8 would otherwise normally use 3 * the string length
+ bytes to store an ASCII string! */
+
+ /* We assume that this CHAR field is encoded in a
+ variable-length character set where spaces have
+ 1:1 correspondence to 0x20 bytes, such as UTF-8.
+
+ Consider a CHAR(n) field, a field of n characters.
+ It will contain between n * mbminlen and n * mbmaxlen bytes.
+ We will try to truncate it to n bytes by stripping
+ space padding. If the field contains single-byte
+ characters only, it will be truncated to n characters.
+ Consider a CHAR(5) field containing the string
+ ".a " where "." denotes a 3-byte character represented
+ by the bytes "$%&". After our stripping, the string will
+ be stored as "$%&a " (5 bytes). The string
+ ".abc " will be stored as "$%&abc" (6 bytes).
+
+ The space padding will be restored in row0sel.cc, function
+ row_sel_field_store_in_mysql_format(). */
+
+ ulint n_chars;
+
+ ut_a(!(dtype_get_len(dtype) % dtype_get_mbmaxlen(dtype)));
+
+ n_chars = dtype_get_len(dtype) / dtype_get_mbmaxlen(dtype);
+
+ /* Strip space padding. */
+ while (col_len > n_chars && ptr[col_len - 1] == 0x20) {
+ col_len--;
+ }
+ } else if (!row_format_col) {
+ /* if mysql data is from a MySQL key value
+ since the length is always stored in 2 bytes,
+ we need do nothing here. */
+ } else if (type == DATA_BLOB) {
+
+ ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
+ } else if (DATA_GEOMETRY_MTYPE(type)) {
+ ptr = row_mysql_read_geometry(&col_len, mysql_data, col_len);
+ }
+
+ dfield_set_data(dfield, ptr, col_len);
+
+ return(buf);
+}
+
+/**************************************************************//**
+Convert a row in the MySQL format to a row in the Innobase format. Note that
+the function to convert a MySQL format key value to an InnoDB dtuple is
+row_sel_convert_mysql_key_to_innobase() in row0sel.cc. */
+static
+void
+row_mysql_convert_row_to_innobase(
+/*==============================*/
+ dtuple_t* row, /*!< in/out: Innobase row where the
+ field type information is already
+ copied there! */
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct where template
+ must be of type ROW_MYSQL_WHOLE_ROW */
+ const byte* mysql_rec, /*!< in: row in the MySQL format;
+ NOTE: do not discard as long as
+ row is used, as row may contain
+ pointers to this record! */
+ mem_heap_t** blob_heap) /*!< in: FIX_ME, remove this after
+ server fixes its issue */
+{
+ const mysql_row_templ_t*templ;
+ dfield_t* dfield;
+ ulint i;
+ ulint n_col = 0;
+ ulint n_v_col = 0;
+
+ ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
+ ut_ad(prebuilt->mysql_template);
+
+ for (i = 0; i < prebuilt->n_template; i++) {
+
+ templ = prebuilt->mysql_template + i;
+
+ if (templ->is_virtual) {
+ ut_ad(n_v_col < dtuple_get_n_v_fields(row));
+ dfield = dtuple_get_nth_v_field(row, n_v_col);
+ n_v_col++;
+ } else {
+ dfield = dtuple_get_nth_field(row, n_col);
+ n_col++;
+ }
+
+ if (templ->mysql_null_bit_mask != 0) {
+ /* Column may be SQL NULL */
+
+ if (mysql_rec[templ->mysql_null_byte_offset]
+ & (byte) (templ->mysql_null_bit_mask)) {
+
+ /* It is SQL NULL */
+
+ dfield_set_null(dfield);
+
+ goto next_column;
+ }
+ }
+
+ row_mysql_store_col_in_innobase_format(
+ dfield,
+ prebuilt->ins_upd_rec_buff + templ->mysql_col_offset,
+ TRUE, /* MySQL row format data */
+ mysql_rec + templ->mysql_col_offset,
+ templ->mysql_col_len,
+ dict_table_is_comp(prebuilt->table));
+
+ /* server has issue regarding handling BLOB virtual fields,
+ and we need to duplicate it with our own memory here */
+ if (templ->is_virtual
+ && DATA_LARGE_MTYPE(dfield_get_type(dfield)->mtype)) {
+ if (*blob_heap == NULL) {
+ *blob_heap = mem_heap_create(dfield->len);
+ }
+ dfield_dup(dfield, *blob_heap);
+ }
+next_column:
+ ;
+ }
+
+ /* If there is a FTS doc id column and it is not user supplied (
+ generated by server) then assign it a new doc id. */
+ if (!prebuilt->table->fts) {
+ return;
+ }
+
+ ut_a(prebuilt->table->fts->doc_col != ULINT_UNDEFINED);
+
+ doc_id_t doc_id;
+
+ if (!DICT_TF2_FLAG_IS_SET(prebuilt->table, DICT_TF2_FTS_HAS_DOC_ID)) {
+ if (prebuilt->table->fts->cache->first_doc_id
+ == FTS_NULL_DOC_ID) {
+ fts_get_next_doc_id(prebuilt->table, &doc_id);
+ }
+ return;
+ }
+
+ dfield_t* fts_doc_id = dtuple_get_nth_field(
+ row, prebuilt->table->fts->doc_col);
+
+ if (fts_get_next_doc_id(prebuilt->table, &doc_id) == DB_SUCCESS) {
+ ut_a(doc_id != FTS_NULL_DOC_ID);
+ ut_ad(sizeof(doc_id) == fts_doc_id->type.len);
+ dfield_set_data(fts_doc_id, prebuilt->ins_upd_rec_buff
+ + prebuilt->mysql_row_len, 8);
+ fts_write_doc_id(fts_doc_id->data, doc_id);
+ } else {
+ dfield_set_null(fts_doc_id);
+ }
+}
+
+/****************************************************************//**
+Handles user errors and lock waits detected by the database engine.
+@return true if it was a lock wait and we should continue running the
+query thread and in that case the thr is ALREADY in the running state. */
+bool
+row_mysql_handle_errors(
+/*====================*/
+ dberr_t* new_err,/*!< out: possible new error encountered in
+ lock wait, or if no new error, the value
+ of trx->error_state at the entry of this
+ function */
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t* thr, /*!< in: query thread, or NULL */
+ trx_savept_t* savept) /*!< in: savepoint, or NULL */
+{
+ dberr_t err;
+
+ DBUG_ENTER("row_mysql_handle_errors");
+ DEBUG_SYNC_C("row_mysql_handle_errors");
+
+handle_new_error:
+ err = trx->error_state;
+
+ ut_a(err != DB_SUCCESS);
+
+ trx->error_state = DB_SUCCESS;
+
+ DBUG_LOG("trx", "handle error: " << ut_strerr(err)
+ << ";id=" << ib::hex(trx->id) << ", " << trx);
+
+ switch (err) {
+ case DB_LOCK_WAIT_TIMEOUT:
+ if (row_rollback_on_timeout) {
+ goto rollback;
+ }
+ /* fall through */
+ case DB_DUPLICATE_KEY:
+ case DB_FOREIGN_DUPLICATE_KEY:
+ case DB_TOO_BIG_RECORD:
+ case DB_UNDO_RECORD_TOO_BIG:
+ case DB_ROW_IS_REFERENCED:
+ case DB_NO_REFERENCED_ROW:
+ case DB_CANNOT_ADD_CONSTRAINT:
+ case DB_TOO_MANY_CONCURRENT_TRXS:
+ case DB_OUT_OF_FILE_SPACE:
+ case DB_READ_ONLY:
+ case DB_FTS_INVALID_DOCID:
+ case DB_INTERRUPTED:
+ case DB_CANT_CREATE_GEOMETRY_OBJECT:
+ case DB_TABLE_NOT_FOUND:
+ case DB_DECRYPTION_FAILED:
+ case DB_COMPUTE_VALUE_FAILED:
+ rollback_to_savept:
+ DBUG_EXECUTE_IF("row_mysql_crash_if_error", {
+ log_buffer_flush_to_disk();
+ DBUG_SUICIDE(); });
+ if (savept) {
+ /* Roll back the latest, possibly incomplete insertion
+ or update */
+
+ trx->rollback(savept);
+ }
+ /* MySQL will roll back the latest SQL statement */
+ break;
+ case DB_LOCK_WAIT:
+ lock_wait_suspend_thread(thr);
+
+ if (trx->error_state != DB_SUCCESS) {
+ que_thr_stop_for_mysql(thr);
+
+ goto handle_new_error;
+ }
+
+ *new_err = err;
+
+ DBUG_RETURN(true);
+
+ case DB_DEADLOCK:
+ case DB_LOCK_TABLE_FULL:
+ rollback:
+ /* Roll back the whole transaction; this resolution was added
+ to version 3.23.43 */
+
+ trx->rollback();
+ break;
+
+ case DB_MUST_GET_MORE_FILE_SPACE:
+ ib::fatal() << "The database cannot continue operation because"
+ " of lack of space. You must add a new data file"
+ " to my.cnf and restart the database.";
+ break;
+
+ case DB_CORRUPTION:
+ case DB_PAGE_CORRUPTED:
+ ib::error() << "We detected index corruption in an InnoDB type"
+ " table. You have to dump + drop + reimport the"
+ " table or, in a case of widespread corruption,"
+ " dump all InnoDB tables and recreate the whole"
+ " tablespace. If the mysqld server crashes after"
+ " the startup or when you dump the tables. "
+ << FORCE_RECOVERY_MSG;
+ goto rollback_to_savept;
+ case DB_FOREIGN_EXCEED_MAX_CASCADE:
+ ib::error() << "Cannot delete/update rows with cascading"
+ " foreign key constraints that exceed max depth of "
+ << FK_MAX_CASCADE_DEL << ". Please drop excessive"
+ " foreign constraints and try again";
+ goto rollback_to_savept;
+ case DB_UNSUPPORTED:
+ ib::error() << "Cannot delete/update rows with cascading"
+ " foreign key constraints in timestamp-based temporal"
+ " table. Please drop excessive"
+ " foreign constraints and try again";
+ goto rollback_to_savept;
+ default:
+ ib::fatal() << "Unknown error " << err;
+ }
+
+ if (trx->error_state != DB_SUCCESS) {
+ *new_err = trx->error_state;
+ } else {
+ *new_err = err;
+ }
+
+ trx->error_state = DB_SUCCESS;
+
+ DBUG_RETURN(false);
+}
+
+/********************************************************************//**
+Create a prebuilt struct for a MySQL table handle.
+@return own: a prebuilt struct */
+row_prebuilt_t*
+row_create_prebuilt(
+/*================*/
+ dict_table_t* table, /*!< in: Innobase table handle */
+ ulint mysql_row_len) /*!< in: length in bytes of a row in
+ the MySQL format */
+{
+ DBUG_ENTER("row_create_prebuilt");
+
+ row_prebuilt_t* prebuilt;
+ mem_heap_t* heap;
+ dict_index_t* clust_index;
+ dict_index_t* temp_index;
+ dtuple_t* ref;
+ ulint ref_len;
+ uint srch_key_len = 0;
+ ulint search_tuple_n_fields;
+
+ search_tuple_n_fields = 2 * (dict_table_get_n_cols(table)
+ + dict_table_get_n_v_cols(table));
+
+ clust_index = dict_table_get_first_index(table);
+
+ /* Make sure that search_tuple is long enough for clustered index */
+ ut_a(2 * unsigned(table->n_cols) >= unsigned(clust_index->n_fields)
+ - clust_index->table->n_dropped());
+
+ ref_len = dict_index_get_n_unique(clust_index);
+
+
+ /* Maximum size of the buffer needed for conversion of INTs from
+ little endian format to big endian format in an index. An index
+ can have maximum 16 columns (MAX_REF_PARTS) in it. Therfore
+ Max size for PK: 16 * 8 bytes (BIGINT's size) = 128 bytes
+ Max size Secondary index: 16 * 8 bytes + PK = 256 bytes. */
+#define MAX_SRCH_KEY_VAL_BUFFER 2* (8 * MAX_REF_PARTS)
+
+#define PREBUILT_HEAP_INITIAL_SIZE \
+ ( \
+ sizeof(*prebuilt) \
+ /* allocd in this function */ \
+ + DTUPLE_EST_ALLOC(search_tuple_n_fields) \
+ + DTUPLE_EST_ALLOC(ref_len) \
+ /* allocd in row_prebuild_sel_graph() */ \
+ + sizeof(sel_node_t) \
+ + sizeof(que_fork_t) \
+ + sizeof(que_thr_t) \
+ /* allocd in row_get_prebuilt_update_vector() */ \
+ + sizeof(upd_node_t) \
+ + sizeof(upd_t) \
+ + sizeof(upd_field_t) \
+ * dict_table_get_n_cols(table) \
+ + sizeof(que_fork_t) \
+ + sizeof(que_thr_t) \
+ /* allocd in row_get_prebuilt_insert_row() */ \
+ + sizeof(ins_node_t) \
+ /* mysql_row_len could be huge and we are not \
+ sure if this prebuilt instance is going to be \
+ used in inserts */ \
+ + (mysql_row_len < 256 ? mysql_row_len : 0) \
+ + DTUPLE_EST_ALLOC(dict_table_get_n_cols(table) \
+ + dict_table_get_n_v_cols(table)) \
+ + sizeof(que_fork_t) \
+ + sizeof(que_thr_t) \
+ + sizeof(*prebuilt->pcur) \
+ + sizeof(*prebuilt->clust_pcur) \
+ )
+
+ /* Calculate size of key buffer used to store search key in
+ InnoDB format. MySQL stores INTs in little endian format and
+ InnoDB stores INTs in big endian format with the sign bit
+ flipped. All other field types are stored/compared the same
+ in MySQL and InnoDB, so we must create a buffer containing
+ the INT key parts in InnoDB format.We need two such buffers
+ since both start and end keys are used in records_in_range(). */
+
+ for (temp_index = dict_table_get_first_index(table); temp_index;
+ temp_index = dict_table_get_next_index(temp_index)) {
+ DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value",
+ ut_a(temp_index->n_user_defined_cols
+ == MAX_REF_PARTS););
+ uint temp_len = 0;
+ for (uint i = 0; i < temp_index->n_uniq; i++) {
+ ulint type = temp_index->fields[i].col->mtype;
+ if (type == DATA_INT) {
+ temp_len +=
+ temp_index->fields[i].fixed_len;
+ }
+ }
+ srch_key_len = std::max(srch_key_len,temp_len);
+ }
+
+ ut_a(srch_key_len <= MAX_SRCH_KEY_VAL_BUFFER);
+
+ DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value",
+ ut_a(srch_key_len == MAX_SRCH_KEY_VAL_BUFFER););
+
+ /* We allocate enough space for the objects that are likely to
+ be created later in order to minimize the number of malloc()
+ calls */
+ heap = mem_heap_create(PREBUILT_HEAP_INITIAL_SIZE + 2 * srch_key_len);
+
+ prebuilt = static_cast<row_prebuilt_t*>(
+ mem_heap_zalloc(heap, sizeof(*prebuilt)));
+
+ prebuilt->magic_n = ROW_PREBUILT_ALLOCATED;
+ prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED;
+
+ prebuilt->table = table;
+
+ prebuilt->sql_stat_start = TRUE;
+ prebuilt->heap = heap;
+
+ prebuilt->srch_key_val_len = srch_key_len;
+ if (prebuilt->srch_key_val_len) {
+ prebuilt->srch_key_val1 = static_cast<byte*>(
+ mem_heap_alloc(prebuilt->heap,
+ 2 * prebuilt->srch_key_val_len));
+ prebuilt->srch_key_val2 = prebuilt->srch_key_val1 +
+ prebuilt->srch_key_val_len;
+ } else {
+ prebuilt->srch_key_val1 = NULL;
+ prebuilt->srch_key_val2 = NULL;
+ }
+
+ prebuilt->pcur = static_cast<btr_pcur_t*>(
+ mem_heap_zalloc(prebuilt->heap,
+ sizeof(btr_pcur_t)));
+ prebuilt->clust_pcur = static_cast<btr_pcur_t*>(
+ mem_heap_zalloc(prebuilt->heap,
+ sizeof(btr_pcur_t)));
+ btr_pcur_reset(prebuilt->pcur);
+ btr_pcur_reset(prebuilt->clust_pcur);
+
+ prebuilt->select_lock_type = LOCK_NONE;
+ prebuilt->stored_select_lock_type = LOCK_NONE_UNSET;
+
+ prebuilt->search_tuple = dtuple_create(heap, search_tuple_n_fields);
+
+ ref = dtuple_create(heap, ref_len);
+
+ dict_index_copy_types(ref, clust_index, ref_len);
+
+ prebuilt->clust_ref = ref;
+
+ prebuilt->autoinc_error = DB_SUCCESS;
+ prebuilt->autoinc_offset = 0;
+
+ /* Default to 1, we will set the actual value later in
+ ha_innobase::get_auto_increment(). */
+ prebuilt->autoinc_increment = 1;
+
+ prebuilt->autoinc_last_value = 0;
+
+ /* During UPDATE and DELETE we need the doc id. */
+ prebuilt->fts_doc_id = 0;
+
+ prebuilt->mysql_row_len = mysql_row_len;
+
+ prebuilt->fts_doc_id_in_read_set = 0;
+ prebuilt->blob_heap = NULL;
+
+ DBUG_RETURN(prebuilt);
+}
+
+/********************************************************************//**
+Free a prebuilt struct for a MySQL table handle. */
+void
+row_prebuilt_free(
+/*==============*/
+ row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */
+ ibool dict_locked) /*!< in: TRUE=data dictionary locked */
+{
+ DBUG_ENTER("row_prebuilt_free");
+
+ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
+
+ prebuilt->magic_n = ROW_PREBUILT_FREED;
+ prebuilt->magic_n2 = ROW_PREBUILT_FREED;
+
+ btr_pcur_reset(prebuilt->pcur);
+ btr_pcur_reset(prebuilt->clust_pcur);
+
+ ut_free(prebuilt->mysql_template);
+
+ if (prebuilt->ins_graph) {
+ que_graph_free_recursive(prebuilt->ins_graph);
+ }
+
+ if (prebuilt->sel_graph) {
+ que_graph_free_recursive(prebuilt->sel_graph);
+ }
+
+ if (prebuilt->upd_graph) {
+ que_graph_free_recursive(prebuilt->upd_graph);
+ }
+
+ if (prebuilt->blob_heap) {
+ row_mysql_prebuilt_free_blob_heap(prebuilt);
+ }
+
+ if (prebuilt->old_vers_heap) {
+ mem_heap_free(prebuilt->old_vers_heap);
+ }
+
+ if (prebuilt->fetch_cache[0] != NULL) {
+ byte* base = prebuilt->fetch_cache[0] - 4;
+ byte* ptr = base;
+
+ for (ulint i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
+ ulint magic1 = mach_read_from_4(ptr);
+ ut_a(magic1 == ROW_PREBUILT_FETCH_MAGIC_N);
+ ptr += 4;
+
+ byte* row = ptr;
+ ut_a(row == prebuilt->fetch_cache[i]);
+ ptr += prebuilt->mysql_row_len;
+
+ ulint magic2 = mach_read_from_4(ptr);
+ ut_a(magic2 == ROW_PREBUILT_FETCH_MAGIC_N);
+ ptr += 4;
+ }
+
+ ut_free(base);
+ }
+
+ if (prebuilt->rtr_info) {
+ rtr_clean_rtr_info(prebuilt->rtr_info, true);
+ }
+ if (prebuilt->table) {
+ dict_table_close(prebuilt->table, dict_locked, FALSE);
+ }
+
+ mem_heap_free(prebuilt->heap);
+
+ DBUG_VOID_RETURN;
+}
+
+/*********************************************************************//**
+Updates the transaction pointers in query graphs stored in the prebuilt
+struct. */
+void
+row_update_prebuilt_trx(
+/*====================*/
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
+ in MySQL handle */
+ trx_t* trx) /*!< in: transaction handle */
+{
+ ut_a(trx->magic_n == TRX_MAGIC_N);
+ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
+
+ prebuilt->trx = trx;
+
+ if (prebuilt->ins_graph) {
+ prebuilt->ins_graph->trx = trx;
+ }
+
+ if (prebuilt->upd_graph) {
+ prebuilt->upd_graph->trx = trx;
+ }
+
+ if (prebuilt->sel_graph) {
+ prebuilt->sel_graph->trx = trx;
+ }
+}
+
+/*********************************************************************//**
+Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
+has not yet been built in the prebuilt struct, then this function first
+builds it.
+@return prebuilt dtuple; the column type information is also set in it */
+static
+dtuple_t*
+row_get_prebuilt_insert_row(
+/*========================*/
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
+ handle */
+{
+ dict_table_t* table = prebuilt->table;
+
+ ut_ad(prebuilt && table && prebuilt->trx);
+
+ if (prebuilt->ins_node != 0) {
+
+ /* Check if indexes have been dropped or added and we
+ may need to rebuild the row insert template. */
+
+ if (prebuilt->trx_id == table->def_trx_id
+ && prebuilt->ins_node->entry_list.size()
+ == UT_LIST_GET_LEN(table->indexes)) {
+
+ return(prebuilt->ins_node->row);
+ }
+
+ ut_ad(prebuilt->trx_id < table->def_trx_id);
+
+ que_graph_free_recursive(prebuilt->ins_graph);
+
+ prebuilt->ins_graph = 0;
+ }
+
+ /* Create an insert node and query graph to the prebuilt struct */
+
+ ins_node_t* node;
+
+ node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
+
+ prebuilt->ins_node = node;
+
+ if (prebuilt->ins_upd_rec_buff == 0) {
+ prebuilt->ins_upd_rec_buff = static_cast<byte*>(
+ mem_heap_alloc(
+ prebuilt->heap,
+ DICT_TF2_FLAG_IS_SET(prebuilt->table,
+ DICT_TF2_FTS_HAS_DOC_ID)
+ ? prebuilt->mysql_row_len + 8/* FTS_DOC_ID */
+ : prebuilt->mysql_row_len));
+ }
+
+ dtuple_t* row;
+
+ row = dtuple_create_with_vcol(
+ prebuilt->heap, dict_table_get_n_cols(table),
+ dict_table_get_n_v_cols(table));
+
+ dict_table_copy_types(row, table);
+
+ ins_node_set_new_row(node, row);
+
+ prebuilt->ins_graph = static_cast<que_fork_t*>(
+ que_node_get_parent(
+ pars_complete_graph_for_exec(
+ node,
+ prebuilt->trx, prebuilt->heap, prebuilt)));
+
+ prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
+
+ prebuilt->trx_id = table->def_trx_id;
+
+ return(prebuilt->ins_node->row);
+}
+
+/*********************************************************************//**
+Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
+AUTO_INC lock gives exclusive access to the auto-inc counter of the
+table. The lock is reserved only for the duration of an SQL statement.
+It is not compatible with another AUTO_INC or exclusive lock on the
+table.
+@return error code or DB_SUCCESS */
+dberr_t
+row_lock_table_autoinc_for_mysql(
+/*=============================*/
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL
+ table handle */
+{
+ trx_t* trx = prebuilt->trx;
+ ins_node_t* node = prebuilt->ins_node;
+ const dict_table_t* table = prebuilt->table;
+ que_thr_t* thr;
+ dberr_t err;
+ ibool was_lock_wait;
+
+ /* If we already hold an AUTOINC lock on the table then do nothing.
+ Note: We peek at the value of the current owner without acquiring
+ the lock mutex. */
+ if (trx == table->autoinc_trx) {
+
+ return(DB_SUCCESS);
+ }
+
+ trx->op_info = "setting auto-inc lock";
+
+ row_get_prebuilt_insert_row(prebuilt);
+ node = prebuilt->ins_node;
+
+ /* We use the insert query graph as the dummy graph needed
+ in the lock module call */
+
+ thr = que_fork_get_first_thr(prebuilt->ins_graph);
+
+ thr->start_running();
+
+run_again:
+ thr->run_node = node;
+ thr->prev_node = node;
+
+ /* It may be that the current session has not yet started
+ its transaction, or it has been committed: */
+
+ trx_start_if_not_started_xa(trx, true);
+
+ err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
+
+ trx->error_state = err;
+
+ if (err != DB_SUCCESS) {
+ que_thr_stop_for_mysql(thr);
+
+ was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
+
+ if (was_lock_wait) {
+ goto run_again;
+ }
+
+ trx->op_info = "";
+
+ return(err);
+ }
+
+ thr->stop_no_error();
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/** Lock a table.
+@param[in,out] prebuilt table handle
+@return error code or DB_SUCCESS */
+dberr_t
+row_lock_table(row_prebuilt_t* prebuilt)
+{
+ trx_t* trx = prebuilt->trx;
+ que_thr_t* thr;
+ dberr_t err;
+ ibool was_lock_wait;
+
+ trx->op_info = "setting table lock";
+
+ if (prebuilt->sel_graph == NULL) {
+ /* Build a dummy select query graph */
+ row_prebuild_sel_graph(prebuilt);
+ }
+
+ /* We use the select query graph as the dummy graph needed
+ in the lock module call */
+
+ thr = que_fork_get_first_thr(prebuilt->sel_graph);
+
+ thr->start_running();
+
+run_again:
+ thr->run_node = thr;
+ thr->prev_node = thr->common.parent;
+
+ /* It may be that the current session has not yet started
+ its transaction, or it has been committed: */
+
+ trx_start_if_not_started_xa(trx, false);
+
+ err = lock_table(0, prebuilt->table,
+ static_cast<enum lock_mode>(
+ prebuilt->select_lock_type),
+ thr);
+
+ trx->error_state = err;
+
+ if (err != DB_SUCCESS) {
+ que_thr_stop_for_mysql(thr);
+
+ was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
+
+ if (was_lock_wait) {
+ goto run_again;
+ }
+
+ trx->op_info = "";
+
+ return(err);
+ }
+
+ thr->stop_no_error();
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/** Determine is tablespace encrypted but decryption failed, is table corrupted
+or is tablespace .ibd file missing.
+@param[in] table Table
+@param[in] trx Transaction
+@param[in] push_warning true if we should push warning to user
+@retval DB_DECRYPTION_FAILED table is encrypted but decryption failed
+@retval DB_CORRUPTION table is corrupted
+@retval DB_TABLESPACE_NOT_FOUND tablespace .ibd file not found */
+static
+dberr_t
+row_mysql_get_table_status(
+ const dict_table_t* table,
+ trx_t* trx,
+ bool push_warning = true)
+{
+ dberr_t err;
+ if (const fil_space_t* space = table->space) {
+ if (space->crypt_data && space->crypt_data->is_encrypted()) {
+ // maybe we cannot access the table due to failing
+ // to decrypt
+ if (push_warning) {
+ ib_push_warning(trx, DB_DECRYPTION_FAILED,
+ "Table %s in tablespace %lu encrypted."
+ "However key management plugin or used key_id is not found or"
+ " used encryption algorithm or method does not match.",
+ table->name.m_name, table->space);
+ }
+
+ err = DB_DECRYPTION_FAILED;
+ } else {
+ if (push_warning) {
+ ib_push_warning(trx, DB_CORRUPTION,
+ "Table %s in tablespace %lu corrupted.",
+ table->name.m_name, table->space);
+ }
+
+ err = DB_CORRUPTION;
+ }
+ } else {
+ ib::error() << ".ibd file is missing for table "
+ << table->name;
+ err = DB_TABLESPACE_NOT_FOUND;
+ }
+
+ return(err);
+}
+
+/** Does an insert for MySQL.
+@param[in] mysql_rec row in the MySQL format
+@param[in,out] prebuilt prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
+dberr_t
+row_insert_for_mysql(
+ const byte* mysql_rec,
+ row_prebuilt_t* prebuilt,
+ ins_mode_t ins_mode)
+{
+ trx_savept_t savept;
+ que_thr_t* thr;
+ dberr_t err;
+ ibool was_lock_wait;
+ trx_t* trx = prebuilt->trx;
+ ins_node_t* node = prebuilt->ins_node;
+ dict_table_t* table = prebuilt->table;
+
+ /* FIX_ME: This blob heap is used to compensate an issue in server
+ for virtual column blob handling */
+ mem_heap_t* blob_heap = NULL;
+
+ ut_ad(trx);
+ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
+
+ if (!prebuilt->table->space) {
+
+ ib::error() << "The table " << prebuilt->table->name
+ << " doesn't have a corresponding tablespace, it was"
+ " discarded.";
+
+ return(DB_TABLESPACE_DELETED);
+
+ } else if (!prebuilt->table->is_readable()) {
+ return(row_mysql_get_table_status(prebuilt->table, trx, true));
+ } else if (high_level_read_only) {
+ return(DB_READ_ONLY);
+ }
+
+ DBUG_EXECUTE_IF("mark_table_corrupted", {
+ /* Mark the table corrupted for the clustered index */
+ dict_index_t* index = dict_table_get_first_index(table);
+ ut_ad(dict_index_is_clust(index));
+ dict_set_corrupted(index, trx, "INSERT TABLE"); });
+
+ if (dict_table_is_corrupted(table)) {
+
+ ib::error() << "Table " << table->name << " is corrupt.";
+ return(DB_TABLE_CORRUPT);
+ }
+
+ trx->op_info = "inserting";
+
+ row_mysql_delay_if_needed();
+
+ if (!table->no_rollback()) {
+ trx_start_if_not_started_xa(trx, true);
+ }
+
+ row_get_prebuilt_insert_row(prebuilt);
+ node = prebuilt->ins_node;
+
+ row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec,
+ &blob_heap);
+
+ if (ins_mode != ROW_INS_NORMAL) {
+ node->vers_update_end(prebuilt, ins_mode == ROW_INS_HISTORICAL);
+ }
+
+ savept = trx_savept_take(trx);
+
+ thr = que_fork_get_first_thr(prebuilt->ins_graph);
+
+ if (prebuilt->sql_stat_start) {
+ node->state = INS_NODE_SET_IX_LOCK;
+ prebuilt->sql_stat_start = FALSE;
+ } else {
+ node->state = INS_NODE_ALLOC_ROW_ID;
+ }
+
+ thr->start_running();
+
+run_again:
+ thr->run_node = node;
+ thr->prev_node = node;
+
+ row_ins_step(thr);
+
+ DEBUG_SYNC_C("ib_after_row_insert_step");
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+error_exit:
+ que_thr_stop_for_mysql(thr);
+
+ /* FIXME: What's this ? */
+ thr->lock_state = QUE_THR_LOCK_ROW;
+
+ was_lock_wait = row_mysql_handle_errors(
+ &err, trx, thr, &savept);
+
+ thr->lock_state = QUE_THR_LOCK_NOLOCK;
+
+ if (was_lock_wait) {
+ ut_ad(node->state == INS_NODE_INSERT_ENTRIES
+ || node->state == INS_NODE_ALLOC_ROW_ID);
+ goto run_again;
+ }
+
+ trx->op_info = "";
+
+ if (blob_heap != NULL) {
+ mem_heap_free(blob_heap);
+ }
+
+ return(err);
+ }
+
+ if (dict_table_has_fts_index(table)) {
+ doc_id_t doc_id;
+
+ /* Extract the doc id from the hidden FTS column */
+ doc_id = fts_get_doc_id_from_row(table, node->row);
+
+ if (doc_id <= 0) {
+ ib::error() << "FTS_DOC_ID must be larger than 0 for table "
+ << table->name;
+ err = DB_FTS_INVALID_DOCID;
+ trx->error_state = DB_FTS_INVALID_DOCID;
+ goto error_exit;
+ }
+
+ if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
+ doc_id_t next_doc_id
+ = table->fts->cache->next_doc_id;
+
+ if (doc_id < next_doc_id) {
+ ib::error() << "FTS_DOC_ID must be larger than "
+ << next_doc_id - 1 << " for table "
+ << table->name;
+
+ err = DB_FTS_INVALID_DOCID;
+ trx->error_state = DB_FTS_INVALID_DOCID;
+ goto error_exit;
+ }
+
+ /* Difference between Doc IDs are restricted within
+ 4 bytes integer. See fts_get_encoded_len(). Consecutive
+ doc_ids difference should not exceed
+ FTS_DOC_ID_MAX_STEP value. */
+
+ if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
+ ib::error() << "Doc ID " << doc_id
+ << " is too big. Its difference with"
+ " largest used Doc ID "
+ << next_doc_id - 1 << " cannot"
+ " exceed or equal to "
+ << FTS_DOC_ID_MAX_STEP;
+ err = DB_FTS_INVALID_DOCID;
+ trx->error_state = DB_FTS_INVALID_DOCID;
+ goto error_exit;
+ }
+ }
+
+ if (table->skip_alter_undo) {
+ if (trx->fts_trx == NULL) {
+ trx->fts_trx = fts_trx_create(trx);
+ }
+
+ fts_trx_table_t ftt;
+ ftt.table = table;
+ ftt.fts_trx = trx->fts_trx;
+
+ fts_add_doc_from_tuple(&ftt, doc_id, node->row);
+ } else {
+ /* Pass NULL for the columns affected, since an INSERT affects
+ all FTS indexes. */
+ fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL);
+ }
+ }
+
+ thr->stop_no_error();
+
+ if (table->is_system_db) {
+ srv_stats.n_system_rows_inserted.inc(size_t(trx->id));
+ } else {
+ srv_stats.n_rows_inserted.inc(size_t(trx->id));
+ }
+
+ /* Not protected by dict_sys.mutex for performance
+ reasons, we would rather get garbage in stat_n_rows (which is
+ just an estimate anyway) than protecting the following code
+ with a latch. */
+ dict_table_n_rows_inc(table);
+
+ if (prebuilt->clust_index_was_generated) {
+ /* set row id to prebuilt */
+ memcpy(prebuilt->row_id, node->sys_buf, DATA_ROW_ID_LEN);
+ }
+
+ dict_stats_update_if_needed(table, *trx);
+ trx->op_info = "";
+
+ if (blob_heap != NULL) {
+ mem_heap_free(blob_heap);
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Builds a dummy query graph used in selects. */
+void
+row_prebuild_sel_graph(
+/*===================*/
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
+ handle */
+{
+ sel_node_t* node;
+
+ ut_ad(prebuilt && prebuilt->trx);
+
+ if (prebuilt->sel_graph == NULL) {
+
+ node = sel_node_create(prebuilt->heap);
+
+ prebuilt->sel_graph = static_cast<que_fork_t*>(
+ que_node_get_parent(
+ pars_complete_graph_for_exec(
+ static_cast<sel_node_t*>(node),
+ prebuilt->trx, prebuilt->heap,
+ prebuilt)));
+
+ prebuilt->sel_graph->state = QUE_FORK_ACTIVE;
+ }
+}
+
+/*********************************************************************//**
+Creates an query graph node of 'update' type to be used in the MySQL
+interface.
+@return own: update node */
+upd_node_t*
+row_create_update_node_for_mysql(
+/*=============================*/
+ dict_table_t* table, /*!< in: table to update */
+ mem_heap_t* heap) /*!< in: mem heap from which allocated */
+{
+ upd_node_t* node;
+
+ DBUG_ENTER("row_create_update_node_for_mysql");
+
+ node = upd_node_create(heap);
+
+ node->in_mysql_interface = true;
+ node->is_delete = NO_DELETE;
+ node->searched_update = FALSE;
+ node->select = NULL;
+ node->pcur = btr_pcur_create_for_mysql();
+
+ DBUG_PRINT("info", ("node: %p, pcur: %p", node, node->pcur));
+
+ node->table = table;
+
+ node->update = upd_create(dict_table_get_n_cols(table)
+ + dict_table_get_n_v_cols(table), heap);
+
+ node->update_n_fields = dict_table_get_n_cols(table);
+
+ UT_LIST_INIT(node->columns, &sym_node_t::col_var_list);
+
+ node->has_clust_rec_x_lock = TRUE;
+ node->cmpl_info = 0;
+
+ node->table_sym = NULL;
+ node->col_assign_list = NULL;
+
+ DBUG_RETURN(node);
+}
+
+/*********************************************************************//**
+Gets pointer to a prebuilt update vector used in updates. If the update
+graph has not yet been built in the prebuilt struct, then this function
+first builds it.
+@return prebuilt update vector */
+upd_t*
+row_get_prebuilt_update_vector(
+/*===========================*/
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
+ handle */
+{
+ if (prebuilt->upd_node == NULL) {
+
+ /* Not called before for this handle: create an update node
+ and query graph to the prebuilt struct */
+
+ prebuilt->upd_node = row_create_update_node_for_mysql(
+ prebuilt->table, prebuilt->heap);
+
+ prebuilt->upd_graph = static_cast<que_fork_t*>(
+ que_node_get_parent(
+ pars_complete_graph_for_exec(
+ prebuilt->upd_node,
+ prebuilt->trx, prebuilt->heap,
+ prebuilt)));
+
+ prebuilt->upd_graph->state = QUE_FORK_ACTIVE;
+ }
+
+ return(prebuilt->upd_node->update);
+}
+
+/********************************************************************
+Handle an update of a column that has an FTS index. */
+static
+void
+row_fts_do_update(
+/*==============*/
+ trx_t* trx, /* in: transaction */
+ dict_table_t* table, /* in: Table with FTS index */
+ doc_id_t old_doc_id, /* in: old document id */
+ doc_id_t new_doc_id) /* in: new document id */
+{
+ if(trx->fts_next_doc_id) {
+ fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
+ if(new_doc_id != FTS_NULL_DOC_ID)
+ fts_trx_add_op(trx, table, new_doc_id, FTS_INSERT, NULL);
+ }
+}
+
+/************************************************************************
+Handles FTS matters for an update or a delete.
+NOTE: should not be called if the table does not have an FTS index. .*/
+static
+dberr_t
+row_fts_update_or_delete(
+/*=====================*/
+ row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
+ handle */
+{
+ trx_t* trx = prebuilt->trx;
+ dict_table_t* table = prebuilt->table;
+ upd_node_t* node = prebuilt->upd_node;
+ doc_id_t old_doc_id = prebuilt->fts_doc_id;
+
+ DBUG_ENTER("row_fts_update_or_delete");
+
+ ut_a(dict_table_has_fts_index(prebuilt->table));
+
+ /* Deletes are simple; get them out of the way first. */
+ if (node->is_delete == PLAIN_DELETE) {
+ /* A delete affects all FTS indexes, so we pass NULL */
+ fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
+ } else {
+ doc_id_t new_doc_id;
+ new_doc_id = fts_read_doc_id((byte*) &trx->fts_next_doc_id);
+
+ if (new_doc_id == 0) {
+ ib::error() << "InnoDB FTS: Doc ID cannot be 0";
+ return(DB_FTS_INVALID_DOCID);
+ }
+ row_fts_do_update(trx, table, old_doc_id, new_doc_id);
+ }
+
+ DBUG_RETURN(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Initialize the Doc ID system for FK table with FTS index */
+static
+void
+init_fts_doc_id_for_ref(
+/*====================*/
+ dict_table_t* table, /*!< in: table */
+ ulint* depth) /*!< in: recusive call depth */
+{
+ dict_foreign_t* foreign;
+
+ table->fk_max_recusive_level = 0;
+
+ (*depth)++;
+
+ /* Limit on tables involved in cascading delete/update */
+ if (*depth > FK_MAX_CASCADE_DEL) {
+ return;
+ }
+
+ /* Loop through this table's referenced list and also
+ recursively traverse each table's foreign table list */
+ for (dict_foreign_set::iterator it = table->referenced_set.begin();
+ it != table->referenced_set.end();
+ ++it) {
+
+ foreign = *it;
+
+ ut_ad(foreign->foreign_table != NULL);
+
+ if (foreign->foreign_table->fts != NULL) {
+ fts_init_doc_id(foreign->foreign_table);
+ }
+
+ if (!foreign->foreign_table->referenced_set.empty()
+ && foreign->foreign_table != table) {
+ init_fts_doc_id_for_ref(
+ foreign->foreign_table, depth);
+ }
+ }
+}
+
+/** Does an update or delete of a row for MySQL.
+@param[in,out] prebuilt prebuilt struct in MySQL handle
+@return error code or DB_SUCCESS */
+dberr_t
+row_update_for_mysql(row_prebuilt_t* prebuilt)
+{
+ trx_savept_t savept;
+ dberr_t err;
+ que_thr_t* thr;
+ dict_index_t* clust_index;
+ upd_node_t* node;
+ dict_table_t* table = prebuilt->table;
+ trx_t* trx = prebuilt->trx;
+ ulint fk_depth = 0;
+ bool got_s_lock = false;
+
+ DBUG_ENTER("row_update_for_mysql");
+
+ ut_ad(trx);
+ ut_a(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->magic_n2 == ROW_PREBUILT_ALLOCATED);
+ ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
+ ut_ad(table->stat_initialized);
+
+ if (!table->is_readable()) {
+ return(row_mysql_get_table_status(table, trx, true));
+ }
+
+ if (high_level_read_only) {
+ return(DB_READ_ONLY);
+ }
+
+ DEBUG_SYNC_C("innodb_row_update_for_mysql_begin");
+
+ trx->op_info = "updating or deleting";
+
+ row_mysql_delay_if_needed();
+
+ init_fts_doc_id_for_ref(table, &fk_depth);
+
+ if (!table->no_rollback()) {
+ trx_start_if_not_started_xa(trx, true);
+ }
+
+ if (dict_table_is_referenced_by_foreign_key(table)) {
+ /* Share lock the data dictionary to prevent any
+ table dictionary (for foreign constraint) change.
+ This is similar to row_ins_check_foreign_constraint
+ check protect by the dictionary lock as well.
+ In the future, this can be removed once the Foreign
+ key MDL is implemented */
+ row_mysql_freeze_data_dictionary(trx);
+ init_fts_doc_id_for_ref(table, &fk_depth);
+ row_mysql_unfreeze_data_dictionary(trx);
+ }
+
+ node = prebuilt->upd_node;
+ const bool is_delete = node->is_delete == PLAIN_DELETE;
+ ut_ad(node->table == table);
+
+ clust_index = dict_table_get_first_index(table);
+
+ btr_pcur_copy_stored_position(node->pcur,
+ prebuilt->pcur->btr_cur.index
+ == clust_index
+ ? prebuilt->pcur
+ : prebuilt->clust_pcur);
+
+ ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
+
+ /* MySQL seems to call rnd_pos before updating each row it
+ has cached: we can get the correct cursor position from
+ prebuilt->pcur; NOTE that we cannot build the row reference
+ from mysql_rec if the clustered index was automatically
+ generated for the table: MySQL does not know anything about
+ the row id used as the clustered index key */
+
+ savept = trx_savept_take(trx);
+
+ thr = que_fork_get_first_thr(prebuilt->upd_graph);
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+ ut_ad(!prebuilt->sql_stat_start);
+
+ thr->start_running();
+
+ ut_ad(!prebuilt->versioned_write || node->table->versioned());
+
+ if (prebuilt->versioned_write) {
+ if (node->is_delete == VERSIONED_DELETE) {
+ node->vers_make_delete(trx);
+ } else if (node->update->affects_versioned()) {
+ node->vers_make_update(trx);
+ }
+ }
+
+ for (;;) {
+ thr->run_node = node;
+ thr->prev_node = node;
+ thr->fk_cascade_depth = 0;
+
+ row_upd_step(thr);
+
+ err = trx->error_state;
+
+ if (err == DB_SUCCESS) {
+ break;
+ }
+
+ que_thr_stop_for_mysql(thr);
+
+ if (err == DB_RECORD_NOT_FOUND) {
+ trx->error_state = DB_SUCCESS;
+ goto error;
+ }
+
+ thr->lock_state= QUE_THR_LOCK_ROW;
+
+ DEBUG_SYNC(trx->mysql_thd, "row_update_for_mysql_error");
+
+ bool was_lock_wait = row_mysql_handle_errors(
+ &err, trx, thr, &savept);
+ thr->lock_state= QUE_THR_LOCK_NOLOCK;
+
+ if (!was_lock_wait) {
+ goto error;
+ }
+ }
+
+ thr->stop_no_error();
+
+ if (dict_table_has_fts_index(table)
+ && trx->fts_next_doc_id != UINT64_UNDEFINED) {
+ err = row_fts_update_or_delete(prebuilt);
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ ut_ad("unexpected error" == 0);
+ goto error;
+ }
+ }
+
+ /* Completed cascading operations (if any) */
+ if (got_s_lock) {
+ row_mysql_unfreeze_data_dictionary(trx);
+ }
+
+ bool update_statistics;
+ ut_ad(is_delete == (node->is_delete == PLAIN_DELETE));
+
+ if (is_delete) {
+ /* Not protected by dict_sys.mutex for performance
+ reasons, we would rather get garbage in stat_n_rows (which is
+ just an estimate anyway) than protecting the following code
+ with a latch. */
+ dict_table_n_rows_dec(prebuilt->table);
+
+ if (table->is_system_db) {
+ srv_stats.n_system_rows_deleted.inc(size_t(trx->id));
+ } else {
+ srv_stats.n_rows_deleted.inc(size_t(trx->id));
+ }
+
+ update_statistics = !srv_stats_include_delete_marked;
+ } else {
+ if (table->is_system_db) {
+ srv_stats.n_system_rows_updated.inc(size_t(trx->id));
+ } else {
+ srv_stats.n_rows_updated.inc(size_t(trx->id));
+ }
+
+ update_statistics
+ = !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
+ }
+
+ if (update_statistics) {
+ dict_stats_update_if_needed(prebuilt->table, *trx);
+ } else {
+ /* Always update the table modification counter. */
+ prebuilt->table->stat_modified_counter++;
+ }
+
+ trx->op_info = "";
+
+ DBUG_RETURN(err);
+
+error:
+ trx->op_info = "";
+ if (got_s_lock) {
+ row_mysql_unfreeze_data_dictionary(trx);
+ }
+
+ DBUG_RETURN(err);
+}
+
+/** This can only be used when the current transaction is at
+READ COMMITTED or READ UNCOMMITTED isolation level.
+Before calling this function row_search_for_mysql() must have
+initialized prebuilt->new_rec_locks to store the information which new
+record locks really were set. This function removes a newly set
+clustered index record lock under prebuilt->pcur or
+prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that
+releases the latest clustered index record lock we set.
+@param[in,out] prebuilt prebuilt struct in MySQL handle
+@param[in] has_latches_on_recs TRUE if called so that we have the
+ latches on the records under pcur
+ and clust_pcur, and we do not need
+ to reposition the cursors. */
+void
+row_unlock_for_mysql(
+ row_prebuilt_t* prebuilt,
+ ibool has_latches_on_recs)
+{
+ btr_pcur_t* pcur = prebuilt->pcur;
+ btr_pcur_t* clust_pcur = prebuilt->clust_pcur;
+ trx_t* trx = prebuilt->trx;
+
+ ut_ad(prebuilt != NULL);
+ ut_ad(trx != NULL);
+ ut_ad(trx->isolation_level <= TRX_ISO_READ_COMMITTED);
+
+ if (dict_index_is_spatial(prebuilt->index)) {
+ return;
+ }
+
+ trx->op_info = "unlock_row";
+
+ if (prebuilt->new_rec_locks >= 1) {
+
+ const rec_t* rec;
+ dict_index_t* index;
+ trx_id_t rec_trx_id;
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ /* Restore the cursor position and find the record */
+
+ if (!has_latches_on_recs) {
+ btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr);
+ }
+
+ rec = btr_pcur_get_rec(pcur);
+ index = btr_pcur_get_btr_cur(pcur)->index;
+
+ if (prebuilt->new_rec_locks >= 2) {
+ /* Restore the cursor position and find the record
+ in the clustered index. */
+
+ if (!has_latches_on_recs) {
+ btr_pcur_restore_position(BTR_SEARCH_LEAF,
+ clust_pcur, &mtr);
+ }
+
+ rec = btr_pcur_get_rec(clust_pcur);
+ index = btr_pcur_get_btr_cur(clust_pcur)->index;
+ }
+
+ if (!dict_index_is_clust(index)) {
+ /* This is not a clustered index record. We
+ do not know how to unlock the record. */
+ goto no_unlock;
+ }
+
+ /* If the record has been modified by this
+ transaction, do not unlock it. */
+
+ if (index->trx_id_offset) {
+ rec_trx_id = trx_read_trx_id(rec
+ + index->trx_id_offset);
+ } else {
+ mem_heap_t* heap = NULL;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets = offsets_;
+
+ rec_offs_init(offsets_);
+ offsets = rec_get_offsets(rec, index, offsets,
+ index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+
+ rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ }
+
+ if (rec_trx_id != trx->id) {
+ /* We did not update the record: unlock it */
+
+ rec = btr_pcur_get_rec(pcur);
+
+ lock_rec_unlock(
+ trx,
+ btr_pcur_get_block(pcur),
+ rec,
+ static_cast<enum lock_mode>(
+ prebuilt->select_lock_type));
+
+ if (prebuilt->new_rec_locks >= 2) {
+ rec = btr_pcur_get_rec(clust_pcur);
+
+ lock_rec_unlock(
+ trx,
+ btr_pcur_get_block(clust_pcur),
+ rec,
+ static_cast<enum lock_mode>(
+ prebuilt->select_lock_type));
+ }
+ }
+no_unlock:
+ mtr_commit(&mtr);
+ }
+
+ trx->op_info = "";
+}
+
+/*********************************************************************//**
+Locks the data dictionary in shared mode from modifications, for performing
+foreign key check, rollback, or other operation invisible to MySQL. */
+void
+row_mysql_freeze_data_dictionary_func(
+/*==================================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const char* file, /*!< in: file name */
+ unsigned line) /*!< in: line number */
+{
+ ut_a(trx->dict_operation_lock_mode == 0);
+
+ rw_lock_s_lock_inline(&dict_sys.latch, 0, file, line);
+
+ trx->dict_operation_lock_mode = RW_S_LATCH;
+}
+
+/*********************************************************************//**
+Unlocks the data dictionary shared lock. */
+void
+row_mysql_unfreeze_data_dictionary(
+/*===============================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
+
+ ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
+
+ rw_lock_s_unlock(&dict_sys.latch);
+
+ trx->dict_operation_lock_mode = 0;
+}
+
+/** Write query start time as SQL field data to a buffer. Needed by InnoDB.
+@param thd Thread object
+@param buf Buffer to hold start time data */
+void thd_get_query_start_data(THD *thd, char *buf);
+
+/** Insert history row when evaluating foreign key referential action.
+
+1. Create new dtuple_t 'row' from node->historical_row;
+2. Update its row_end to current timestamp;
+3. Insert it to a table;
+4. Update table statistics.
+
+This is used in UPDATE CASCADE/SET NULL of a system versioned referenced table.
+
+node->historical_row: dtuple_t containing pointers of row changed by refertial
+action.
+
+@param[in] thr current query thread
+@param[in] node a node which just updated a row in a foreign table
+@return DB_SUCCESS or some error */
+static dberr_t row_update_vers_insert(que_thr_t* thr, upd_node_t* node)
+{
+ trx_t* trx = thr_get_trx(thr);
+ dfield_t* row_end;
+ char row_end_data[8];
+ dict_table_t* table = node->table;
+ const unsigned zip_size = table->space->zip_size();
+ ut_ad(table->versioned());
+
+ dtuple_t* row;
+ const ulint n_cols = dict_table_get_n_cols(table);
+ const ulint n_v_cols = dict_table_get_n_v_cols(table);
+
+ ut_ad(n_cols == dtuple_get_n_fields(node->historical_row));
+ ut_ad(n_v_cols == dtuple_get_n_v_fields(node->historical_row));
+
+ row = dtuple_create_with_vcol(node->historical_heap, n_cols, n_v_cols);
+
+ dict_table_copy_types(row, table);
+
+ ins_node_t* insert_node =
+ ins_node_create(INS_DIRECT, table, node->historical_heap);
+
+ if (!insert_node) {
+ trx->error_state = DB_OUT_OF_MEMORY;
+ goto exit;
+ }
+
+ insert_node->common.parent = thr;
+ ins_node_set_new_row(insert_node, row);
+
+ ut_ad(n_cols > DATA_N_SYS_COLS);
+ // Exclude DB_ROW_ID, DB_TRX_ID, DB_ROLL_PTR
+ for (ulint i = 0; i < n_cols - DATA_N_SYS_COLS; i++) {
+ dfield_t *src= dtuple_get_nth_field(node->historical_row, i);
+ dfield_t *dst= dtuple_get_nth_field(row, i);
+ dfield_copy(dst, src);
+ if (dfield_is_ext(src)) {
+ byte *field_data
+ = static_cast<byte*>(dfield_get_data(src));
+ ulint ext_len;
+ ulint field_len = dfield_get_len(src);
+
+ ut_a(field_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ ut_a(memcmp(field_data + field_len
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero,
+ BTR_EXTERN_FIELD_REF_SIZE));
+
+ byte *data = btr_copy_externally_stored_field(
+ &ext_len, field_data, zip_size, field_len,
+ node->historical_heap);
+ dfield_set_data(dst, data, ext_len);
+ }
+ }
+
+ for (ulint i = 0; i < n_v_cols; i++) {
+ dfield_t *dst= dtuple_get_nth_v_field(row, i);
+ dfield_t *src= dtuple_get_nth_v_field(node->historical_row, i);
+ dfield_copy(dst, src);
+ }
+
+ node->historical_row = NULL;
+
+ row_end = dtuple_get_nth_field(row, table->vers_end);
+ if (dict_table_get_nth_col(table, table->vers_end)->vers_native()) {
+ mach_write_to_8(row_end_data, trx->id);
+ dfield_set_data(row_end, row_end_data, 8);
+ } else {
+ thd_get_query_start_data(trx->mysql_thd, row_end_data);
+ dfield_set_data(row_end, row_end_data, 7);
+ }
+
+ for (;;) {
+ thr->run_node = insert_node;
+ thr->prev_node = insert_node;
+
+ row_ins_step(thr);
+
+ switch (trx->error_state) {
+ case DB_LOCK_WAIT:
+ que_thr_stop_for_mysql(thr);
+ lock_wait_suspend_thread(thr);
+
+ if (trx->error_state == DB_SUCCESS) {
+ continue;
+ }
+
+ /* fall through */
+ default:
+ /* Other errors are handled for the parent node. */
+ thr->fk_cascade_depth = 0;
+ goto exit;
+
+ case DB_SUCCESS:
+ srv_stats.n_rows_inserted.inc(
+ static_cast<size_t>(trx->id));
+ dict_stats_update_if_needed(table, *trx);
+ goto exit;
+ }
+ }
+exit:
+ que_graph_free_recursive(insert_node);
+ mem_heap_free(node->historical_heap);
+ node->historical_heap = NULL;
+ return trx->error_state;
+}
+
+/**********************************************************************//**
+Does a cascaded delete or set null in a foreign key operation.
+@return error code or DB_SUCCESS */
+dberr_t
+row_update_cascade_for_mysql(
+/*=========================*/
+ que_thr_t* thr, /*!< in: query thread */
+ upd_node_t* node, /*!< in: update node used in the cascade
+ or set null operation */
+ dict_table_t* table) /*!< in: table where we do the operation */
+{
+ /* Increment fk_cascade_depth to record the recursive call depth on
+ a single update/delete that affects multiple tables chained
+ together with foreign key relations. */
+
+ if (++thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) {
+ return(DB_FOREIGN_EXCEED_MAX_CASCADE);
+ }
+
+ const trx_t* trx = thr_get_trx(thr);
+
+ if (table->versioned()) {
+ if (node->is_delete == PLAIN_DELETE) {
+ node->vers_make_delete(trx);
+ } else if (node->update->affects_versioned()) {
+ dberr_t err = row_update_vers_insert(thr, node);
+ if (err != DB_SUCCESS) {
+ return err;
+ }
+ node->vers_make_update(trx);
+ }
+ }
+
+ for (;;) {
+ thr->run_node = node;
+ thr->prev_node = node;
+
+ DEBUG_SYNC_C("foreign_constraint_update_cascade");
+ {
+ TABLE *mysql_table = thr->prebuilt->m_mysql_table;
+ thr->prebuilt->m_mysql_table = NULL;
+ row_upd_step(thr);
+ thr->prebuilt->m_mysql_table = mysql_table;
+ }
+
+ switch (trx->error_state) {
+ case DB_LOCK_WAIT:
+ que_thr_stop_for_mysql(thr);
+ lock_wait_suspend_thread(thr);
+
+ if (trx->error_state == DB_SUCCESS) {
+ continue;
+ }
+
+ /* fall through */
+ default:
+ /* Other errors are handled for the parent node. */
+ thr->fk_cascade_depth = 0;
+ return trx->error_state;
+
+ case DB_SUCCESS:
+ thr->fk_cascade_depth = 0;
+ bool stats;
+
+ if (node->is_delete == PLAIN_DELETE) {
+ /* Not protected by dict_sys.mutex for
+ performance reasons, we would rather
+ get garbage in stat_n_rows (which is
+ just an estimate anyway) than
+ protecting the following code with a
+ latch. */
+ dict_table_n_rows_dec(node->table);
+
+ stats = !srv_stats_include_delete_marked;
+ srv_stats.n_rows_deleted.inc(size_t(trx->id));
+ } else {
+ stats = !(node->cmpl_info
+ & UPD_NODE_NO_ORD_CHANGE);
+ srv_stats.n_rows_updated.inc(size_t(trx->id));
+ }
+
+ if (stats) {
+ dict_stats_update_if_needed(node->table, *trx);
+ } else {
+ /* Always update the table
+ modification counter. */
+ node->table->stat_modified_counter++;
+ }
+
+ return(DB_SUCCESS);
+ }
+ }
+}
+
+/*********************************************************************//**
+Locks the data dictionary exclusively for performing a table create or other
+data dictionary modification operation. */
+void
+row_mysql_lock_data_dictionary_func(
+/*================================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const char* file, /*!< in: file name */
+ unsigned line) /*!< in: line number */
+{
+ ut_a(trx->dict_operation_lock_mode == 0
+ || trx->dict_operation_lock_mode == RW_X_LATCH);
+ dict_sys.lock(file, line);
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+}
+
+/*********************************************************************//**
+Unlocks the data dictionary exclusive lock. */
+void
+row_mysql_unlock_data_dictionary(
+/*=============================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
+ ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+ trx->dict_operation_lock_mode = 0;
+ dict_sys.unlock();
+}
+
+/*********************************************************************//**
+Creates a table for MySQL. On failure the transaction will be rolled back
+and the 'table' object will be freed.
+@return error code or DB_SUCCESS */
+dberr_t
+row_create_table_for_mysql(
+/*=======================*/
+ dict_table_t* table, /*!< in, own: table definition
+ (will be freed, or on DB_SUCCESS
+ added to the data dictionary cache) */
+ trx_t* trx, /*!< in/out: transaction */
+ fil_encryption_t mode, /*!< in: encryption mode */
+ uint32_t key_id) /*!< in: encryption key_id */
+{
+ tab_node_t* node;
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ dberr_t err;
+
+ ut_d(dict_sys.assert_locked());
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_at_start_of_row_create_table_for_mysql",
+ goto err_exit;
+ );
+
+ trx->op_info = "creating table";
+
+ if (row_mysql_is_system_table(table->name.m_name)) {
+
+ ib::error() << "Trying to create a MySQL system table "
+ << table->name << " of type InnoDB. MySQL system"
+ " tables must be of the MyISAM type!";
+#ifndef DBUG_OFF
+err_exit:
+#endif /* !DBUG_OFF */
+ dict_mem_table_free(table);
+
+ trx->op_info = "";
+
+ return(DB_ERROR);
+ }
+
+ trx_start_if_not_started_xa(trx, true);
+
+ heap = mem_heap_create(512);
+
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ case TRX_DICT_OP_TABLE:
+ break;
+ case TRX_DICT_OP_INDEX:
+ /* If the transaction was previously flagged as
+ TRX_DICT_OP_INDEX, we should be creating auxiliary
+ tables for full-text indexes. */
+ ut_ad(strstr(table->name.m_name, "/FTS_") != NULL);
+ }
+
+ node = tab_create_graph_create(table, heap, mode, key_id);
+
+ thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
+
+ ut_a(thr == que_fork_start_command(
+ static_cast<que_fork_t*>(que_node_get_parent(thr))));
+
+ que_run_threads(thr);
+
+ err = trx->error_state;
+
+ /* Update SYS_TABLESPACES and SYS_DATAFILES if a new file-per-table
+ tablespace was created. */
+ if (err == DB_SUCCESS && dict_table_is_file_per_table(table)) {
+ err = dict_replace_tablespace_in_dictionary(
+ table->space_id, table->name.m_name,
+ table->space->flags,
+ table->space->chain.start->name, trx);
+
+ if (err != DB_SUCCESS) {
+
+ /* We must delete the link file. */
+ RemoteDatafile::delete_link_file(table->name.m_name);
+ }
+ }
+
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ case DB_OUT_OF_FILE_SPACE:
+ trx->error_state = DB_SUCCESS;
+ trx->rollback();
+
+ ib::warn() << "Cannot create table "
+ << table->name
+ << " because tablespace full";
+
+ if (dict_table_open_on_name(table->name.m_name, TRUE, FALSE,
+ DICT_ERR_IGNORE_NONE)) {
+
+ dict_table_close_and_drop(trx, table);
+ } else {
+ dict_mem_table_free(table);
+ }
+
+ break;
+
+ case DB_UNSUPPORTED:
+ case DB_TOO_MANY_CONCURRENT_TRXS:
+ /* We already have .ibd file here. it should be deleted. */
+
+ if (dict_table_is_file_per_table(table)
+ && fil_delete_tablespace(table->space_id) != DB_SUCCESS) {
+ ib::error() << "Cannot delete the file of table "
+ << table->name;
+ }
+ /* fall through */
+
+ case DB_DUPLICATE_KEY:
+ case DB_TABLESPACE_EXISTS:
+ default:
+ trx->error_state = DB_SUCCESS;
+ trx->rollback();
+ dict_mem_table_free(table);
+ break;
+ }
+
+ que_graph_free((que_t*) que_node_get_parent(thr));
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*********************************************************************//**
+Create an index when creating a table.
+On failure, the caller must drop the table!
+@return error number or DB_SUCCESS */
+dberr_t
+row_create_index_for_mysql(
+/*=======================*/
+ dict_index_t* index, /*!< in, own: index definition
+ (will be freed) */
+ trx_t* trx, /*!< in: transaction handle */
+ const ulint* field_lengths) /*!< in: if not NULL, must contain
+ dict_index_get_n_fields(index)
+ actual field lengths for the
+ index columns, which are
+ then checked for not being too
+ large. */
+{
+ ind_node_t* node;
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ dberr_t err;
+ ulint i;
+ ulint len;
+ dict_table_t* table = index->table;
+
+ ut_d(dict_sys.assert_locked());
+
+ for (i = 0; i < index->n_def; i++) {
+ /* Check that prefix_len and actual length
+ < DICT_MAX_INDEX_COL_LEN */
+
+ len = dict_index_get_nth_field(index, i)->prefix_len;
+
+ if (field_lengths && field_lengths[i]) {
+ len = ut_max(len, field_lengths[i]);
+ }
+
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_at_create_index",
+ len = DICT_MAX_FIELD_LEN_BY_FORMAT(table) + 1;
+ );
+
+ /* Column or prefix length exceeds maximum column length */
+ if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) {
+ dict_mem_index_free(index);
+ return DB_TOO_BIG_INDEX_COL;
+ }
+ }
+
+ trx->op_info = "creating index";
+
+ /* For temp-table we avoid insertion into SYSTEM TABLES to
+ maintain performance and so we have separate path that directly
+ just updates dictonary cache. */
+ if (!table->is_temporary()) {
+ trx_start_if_not_started_xa(trx, true);
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ /* Note that the space id where we store the index is
+ inherited from the table in dict_build_index_def_step()
+ in dict0crea.cc. */
+
+ heap = mem_heap_create(512);
+ node = ind_create_graph_create(index, table->name.m_name,
+ heap);
+
+ thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
+
+ ut_a(thr == que_fork_start_command(
+ static_cast<que_fork_t*>(
+ que_node_get_parent(thr))));
+
+ que_run_threads(thr);
+
+ err = trx->error_state;
+
+ index = node->index;
+
+ ut_ad(!index == (err != DB_SUCCESS));
+
+ que_graph_free((que_t*) que_node_get_parent(thr));
+
+ if (index && (index->type & DICT_FTS)) {
+ err = fts_create_index_tables(trx, index, table->id);
+ }
+ } else {
+ dict_build_index_def(table, index, trx);
+
+ err = dict_index_add_to_cache(index, FIL_NULL);
+ ut_ad((index == NULL) == (err != DB_SUCCESS));
+ if (UNIV_LIKELY(err == DB_SUCCESS)) {
+ ut_ad(!index->is_instant());
+ index->n_core_null_bytes = static_cast<uint8_t>(
+ UT_BITS_IN_BYTES(unsigned(index->n_nullable)));
+
+ err = dict_create_index_tree_in_mem(index, trx);
+#ifdef BTR_CUR_HASH_ADAPT
+ ut_ad(!index->search_info->ref_count);
+#endif /* BTR_CUR_HASH_ADAPT */
+
+ if (err != DB_SUCCESS) {
+ dict_index_remove_from_cache(table, index);
+ }
+ }
+ }
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*********************************************************************//**
+Drops a table for MySQL as a background operation. MySQL relies on Unix
+in ALTER TABLE to the fact that the table handler does not remove the
+table before all handles to it has been removed. Furhermore, the MySQL's
+call to drop table must be non-blocking. Therefore we do the drop table
+as a background operation, which is taken care of by the master thread
+in srv0srv.cc.
+@return error code or DB_SUCCESS */
+static
+dberr_t
+row_drop_table_for_mysql_in_background(
+/*===================================*/
+ const char* name) /*!< in: table name */
+{
+ dberr_t error;
+ trx_t* trx;
+
+ trx = trx_create();
+
+ /* If the original transaction was dropping a table referenced by
+ foreign keys, we must set the following to be able to drop the
+ table: */
+
+ trx->check_foreigns = false;
+
+ /* Try to drop the table in InnoDB */
+
+ error = row_drop_table_for_mysql(name, trx, SQLCOM_TRUNCATE);
+
+ trx_commit_for_mysql(trx);
+
+ trx->free();
+
+ return(error);
+}
+
+/*********************************************************************//**
+The master thread in srv0srv.cc calls this regularly to drop tables which
+we must drop in background after queries to them have ended. Such lazy
+dropping of tables is needed in ALTER TABLE on Unix.
+@return how many tables dropped + remaining tables in list */
+ulint
+row_drop_tables_for_mysql_in_background(void)
+/*=========================================*/
+{
+ row_mysql_drop_t* drop;
+ dict_table_t* table;
+ ulint n_tables;
+ ulint n_tables_dropped = 0;
+loop:
+ mutex_enter(&row_drop_list_mutex);
+
+ ut_a(row_mysql_drop_list_inited);
+next:
+ drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
+
+ n_tables = UT_LIST_GET_LEN(row_mysql_drop_list);
+
+ mutex_exit(&row_drop_list_mutex);
+
+ if (drop == NULL) {
+ /* All tables dropped */
+
+ return(n_tables + n_tables_dropped);
+ }
+
+ /* On fast shutdown, just empty the list without dropping tables. */
+ table = srv_shutdown_state == SRV_SHUTDOWN_NONE || !srv_fast_shutdown
+ ? dict_table_open_on_id(drop->table_id, FALSE,
+ DICT_TABLE_OP_OPEN_ONLY_IF_CACHED)
+ : NULL;
+
+ if (!table) {
+ n_tables_dropped++;
+ mutex_enter(&row_drop_list_mutex);
+ UT_LIST_REMOVE(row_mysql_drop_list, drop);
+ MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
+ ut_free(drop);
+ goto next;
+ }
+
+ ut_a(!table->can_be_evicted);
+
+ bool skip = false;
+
+ if (!table->to_be_dropped) {
+skip:
+ dict_table_close(table, FALSE, FALSE);
+
+ mutex_enter(&row_drop_list_mutex);
+ UT_LIST_REMOVE(row_mysql_drop_list, drop);
+ if (!skip) {
+ UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
+ } else {
+ ut_free(drop);
+ }
+ goto next;
+ }
+
+ if (!srv_fast_shutdown && !trx_sys.any_active_transactions()) {
+ lock_mutex_enter();
+ skip = UT_LIST_GET_LEN(table->locks) != 0;
+ lock_mutex_exit();
+ if (skip) {
+ /* We cannot drop tables that are locked by XA
+ PREPARE transactions. */
+ goto skip;
+ }
+ }
+
+ char* name = mem_strdup(table->name.m_name);
+
+ dict_table_close(table, FALSE, FALSE);
+
+ dberr_t err = row_drop_table_for_mysql_in_background(name);
+
+ ut_free(name);
+
+ if (err != DB_SUCCESS) {
+ /* If the DROP fails for some table, we return, and let the
+ main thread retry later */
+ return(n_tables + n_tables_dropped);
+ }
+
+ goto loop;
+}
+
+/*********************************************************************//**
+Get the background drop list length. NOTE: the caller must own the
+drop list mutex!
+@return how many tables in list */
+ulint
+row_get_background_drop_list_len_low(void)
+/*======================================*/
+{
+ ulint len;
+
+ mutex_enter(&row_drop_list_mutex);
+
+ ut_a(row_mysql_drop_list_inited);
+
+ len = UT_LIST_GET_LEN(row_mysql_drop_list);
+
+ mutex_exit(&row_drop_list_mutex);
+
+ return(len);
+}
+
+/** Drop garbage tables during recovery. */
+void
+row_mysql_drop_garbage_tables()
+{
+ mem_heap_t* heap = mem_heap_create(FN_REFLEN);
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ trx_t* trx = trx_create();
+ trx->op_info = "dropping garbage tables";
+ row_mysql_lock_data_dictionary(trx);
+
+ mtr.start();
+ btr_pcur_open_at_index_side(
+ true, dict_table_get_first_index(dict_sys.sys_tables),
+ BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
+
+ for (;;) {
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ const char* table_name;
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ break;
+ }
+
+ rec = btr_pcur_get_rec(&pcur);
+ if (rec_get_deleted_flag(rec, 0)) {
+ continue;
+ }
+
+ field = rec_get_nth_field_old(rec, 0/*NAME*/, &len);
+ if (len == UNIV_SQL_NULL || len == 0) {
+ /* Corrupted SYS_TABLES.NAME */
+ continue;
+ }
+
+ table_name = mem_heap_strdupl(
+ heap,
+ reinterpret_cast<const char*>(field), len);
+ if (strstr(table_name, "/" TEMP_FILE_PREFIX "-") &&
+ !strstr(table_name, "/" TEMP_FILE_PREFIX "-backup-") &&
+ !strstr(table_name, "/" TEMP_FILE_PREFIX "-exchange-"))
+ {
+ btr_pcur_store_position(&pcur, &mtr);
+ btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+ if (dict_load_table(table_name,
+ DICT_ERR_IGNORE_DROP)) {
+ row_drop_table_for_mysql(table_name, trx,
+ SQLCOM_DROP_TABLE);
+ trx_commit_for_mysql(trx);
+ }
+
+ mtr.start();
+ btr_pcur_restore_position(BTR_SEARCH_LEAF,
+ &pcur, &mtr);
+ }
+
+ mem_heap_empty(heap);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr.commit();
+ row_mysql_unlock_data_dictionary(trx);
+ trx->free();
+ mem_heap_free(heap);
+}
+
+/*********************************************************************//**
+If a table is not yet in the drop list, adds the table to the list of tables
+which the master thread drops in background. We need this on Unix because in
+ALTER TABLE MySQL may call drop table even if the table has running queries on
+it. Also, if there are running foreign key checks on the table, we drop the
+table lazily.
+@return whether background DROP TABLE was scheduled for the first time */
+static
+bool
+row_add_table_to_background_drop_list(table_id_t table_id)
+{
+ row_mysql_drop_t* drop;
+ bool added = true;
+
+ mutex_enter(&row_drop_list_mutex);
+
+ ut_a(row_mysql_drop_list_inited);
+
+ /* Look if the table already is in the drop list */
+ for (drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
+ drop != NULL;
+ drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop)) {
+
+ if (drop->table_id == table_id) {
+ added = false;
+ goto func_exit;
+ }
+ }
+
+ drop = static_cast<row_mysql_drop_t*>(ut_malloc_nokey(sizeof *drop));
+ drop->table_id = table_id;
+
+ UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
+
+ MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE);
+func_exit:
+ mutex_exit(&row_drop_list_mutex);
+ return added;
+}
+
+/** Reassigns the table identifier of a table.
+@param[in,out] table table
+@param[in,out] trx transaction
+@param[out] new_id new table id
+@return error code or DB_SUCCESS */
+static
+dberr_t
+row_mysql_table_id_reassign(
+ dict_table_t* table,
+ trx_t* trx,
+ table_id_t* new_id)
+{
+ dberr_t err;
+ pars_info_t* info = pars_info_create();
+
+ dict_hdr_get_new_id(new_id, NULL, NULL);
+
+ pars_info_add_ull_literal(info, "old_id", table->id);
+ pars_info_add_ull_literal(info, "new_id", *new_id);
+
+ /* Note: This cannot be rolled back. Rollback would see the
+ UPDATE SYS_INDEXES as two operations: DELETE and INSERT.
+ It would invoke btr_free_if_exists() when rolling back the
+ INSERT, effectively dropping all indexes of the table. */
+ err = que_eval_sql(
+ info,
+ "PROCEDURE RENUMBER_TABLE_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLES SET ID = :new_id\n"
+ " WHERE ID = :old_id;\n"
+ "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
+ " WHERE TABLE_ID = :old_id;\n"
+ "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
+ " WHERE TABLE_ID = :old_id;\n"
+ "UPDATE SYS_VIRTUAL SET TABLE_ID = :new_id\n"
+ " WHERE TABLE_ID = :old_id;\n"
+ "END;\n", FALSE, trx);
+
+ return(err);
+}
+
+/*********************************************************************//**
+Setup the pre-requisites for DISCARD TABLESPACE. It will start the transaction,
+acquire the data dictionary lock in X mode and open the table.
+@return table instance or 0 if not found. */
+static
+dict_table_t*
+row_discard_tablespace_begin(
+/*=========================*/
+ const char* name, /*!< in: table name */
+ trx_t* trx) /*!< in: transaction handle */
+{
+ trx->op_info = "discarding tablespace";
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ trx_start_if_not_started_xa(trx, true);
+
+ /* Serialize data dictionary operations with dictionary mutex:
+ this is to avoid deadlocks during data dictionary operations */
+
+ row_mysql_lock_data_dictionary(trx);
+
+ dict_table_t* table;
+
+ table = dict_table_open_on_name(
+ name, TRUE, FALSE, DICT_ERR_IGNORE_FK_NOKEY);
+
+ if (table) {
+ dict_stats_wait_bg_to_stop_using_table(table, trx);
+ ut_a(!is_system_tablespace(table->space_id));
+ ut_ad(!table->n_foreign_key_checks_running);
+ }
+
+ return(table);
+}
+
+/*********************************************************************//**
+Do the foreign key constraint checks.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_discard_tablespace_foreign_key_checks(
+/*======================================*/
+ const trx_t* trx, /*!< in: transaction handle */
+ const dict_table_t* table) /*!< in: table to be discarded */
+{
+
+ if (srv_read_only_mode || !trx->check_foreigns) {
+ return(DB_SUCCESS);
+ }
+
+ /* Check if the table is referenced by foreign key constraints from
+ some other table (not the table itself) */
+ dict_foreign_set::const_iterator it
+ = std::find_if(table->referenced_set.begin(),
+ table->referenced_set.end(),
+ dict_foreign_different_tables());
+
+ if (it == table->referenced_set.end()) {
+ return(DB_SUCCESS);
+ }
+
+ const dict_foreign_t* foreign = *it;
+ FILE* ef = dict_foreign_err_file;
+
+ ut_ad(foreign->foreign_table != table);
+ ut_ad(foreign->referenced_table == table);
+
+ /* We only allow discarding a referenced table if
+ FOREIGN_KEY_CHECKS is set to 0 */
+
+ mutex_enter(&dict_foreign_err_mutex);
+
+ rewind(ef);
+
+ ut_print_timestamp(ef);
+
+ fputs(" Cannot DISCARD table ", ef);
+ ut_print_name(ef, trx, table->name.m_name);
+ fputs("\n"
+ "because it is referenced by ", ef);
+ ut_print_name(ef, trx, foreign->foreign_table_name);
+ putc('\n', ef);
+
+ mutex_exit(&dict_foreign_err_mutex);
+
+ return(DB_CANNOT_DROP_CONSTRAINT);
+}
+
+/*********************************************************************//**
+Cleanup after the DISCARD TABLESPACE operation.
+@return error code. */
+static
+dberr_t
+row_discard_tablespace_end(
+/*=======================*/
+ trx_t* trx, /*!< in/out: transaction handle */
+ dict_table_t* table, /*!< in/out: table to be discarded */
+ dberr_t err) /*!< in: error code */
+{
+ if (table != 0) {
+ dict_table_close(table, TRUE, FALSE);
+ }
+
+ DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
+ log_buffer_flush_to_disk();
+ DBUG_SUICIDE(););
+
+ trx_commit_for_mysql(trx);
+
+ DBUG_EXECUTE_IF("ib_discard_after_commit_crash",
+ log_buffer_flush_to_disk();
+ DBUG_SUICIDE(););
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*********************************************************************//**
+Do the DISCARD TABLESPACE operation.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_discard_tablespace(
+/*===================*/
+ trx_t* trx, /*!< in/out: transaction handle */
+ dict_table_t* table) /*!< in/out: table to be discarded */
+{
+ dberr_t err;
+
+ /* How do we prevent crashes caused by ongoing operations on
+ the table? Old operations could try to access non-existent
+ pages. MySQL will block all DML on the table using MDL and a
+ DISCARD will not start unless all existing operations on the
+ table to be discarded are completed.
+
+ 1) Acquire the data dictionary latch in X mode. To prevent any
+ internal operations that MySQL is not aware off and also for
+ the internal SQL parser.
+
+ 2) Purge and rollback: we assign a new table id for the
+ table. Since purge and rollback look for the table based on
+ the table id, they see the table as 'dropped' and discard
+ their operations.
+
+ 3) Insert buffer: we remove all entries for the tablespace in
+ the insert buffer tree. */
+
+ ibuf_delete_for_discarded_space(table->space_id);
+
+ table_id_t new_id;
+
+ /* Set the TABLESPACE DISCARD flag in the table definition
+ on disk. */
+ err = row_import_update_discarded_flag(trx, table->id, true);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Update the index root pages in the system tables, on disk */
+ err = row_import_update_index_root(trx, table, true);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Drop all the FTS auxiliary tables. */
+ if (dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
+
+ fts_drop_tables(trx, table);
+ }
+
+ /* Assign a new space ID to the table definition so that purge
+ can ignore the changes. Update the system table on disk. */
+
+ err = row_mysql_table_id_reassign(table, trx, &new_id);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Discard the physical file that is used for the tablespace. */
+ err = fil_delete_tablespace(table->space_id);
+ switch (err) {
+ case DB_IO_ERROR:
+ ib::warn() << "ALTER TABLE " << table->name
+ << " DISCARD TABLESPACE failed to delete file";
+ break;
+ case DB_TABLESPACE_NOT_FOUND:
+ ib::warn() << "ALTER TABLE " << table->name
+ << " DISCARD TABLESPACE failed to find tablespace";
+ break;
+ case DB_SUCCESS:
+ break;
+ default:
+ ut_error;
+ }
+
+ /* All persistent operations successful, update the
+ data dictionary memory cache. */
+
+ table->file_unreadable = true;
+ table->space = NULL;
+ table->flags2 |= DICT_TF2_DISCARDED;
+ dict_table_change_id_in_cache(table, new_id);
+
+ dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ if (index) index->clear_instant_alter();
+
+ /* Reset the root page numbers. */
+ for (; index; index = UT_LIST_GET_NEXT(indexes, index)) {
+ index->page = FIL_NULL;
+ }
+
+ /* If the tablespace did not already exist or we couldn't
+ write to it, we treat that as a successful DISCARD. It is
+ unusable anyway. */
+ return DB_SUCCESS;
+}
+
+/*********************************************************************//**
+Discards the tablespace of a table which stored in an .ibd file. Discarding
+means that this function renames the .ibd file and assigns a new table id for
+the table. Also the file_unreadable flag is set.
+@return error code or DB_SUCCESS */
+dberr_t
+row_discard_tablespace_for_mysql(
+/*=============================*/
+ const char* name, /*!< in: table name */
+ trx_t* trx) /*!< in: transaction handle */
+{
+ dberr_t err;
+ dict_table_t* table;
+
+ /* Open the table and start the transaction if not started. */
+
+ table = row_discard_tablespace_begin(name, trx);
+
+ if (table == 0) {
+ err = DB_TABLE_NOT_FOUND;
+ } else if (table->is_temporary()) {
+
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_CANNOT_DISCARD_TEMPORARY_TABLE);
+
+ err = DB_ERROR;
+
+ } else if (table->space_id == TRX_SYS_SPACE) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name),
+ table->name.m_name);
+
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
+
+ err = DB_ERROR;
+
+ } else {
+ ut_ad(!table->n_foreign_key_checks_running);
+
+ bool fts_exist = (dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(
+ table, DICT_TF2_FTS_HAS_DOC_ID));
+
+ if (fts_exist) {
+ row_mysql_unlock_data_dictionary(trx);
+ fts_optimize_remove_table(table);
+ row_mysql_lock_data_dictionary(trx);
+ }
+
+ /* Do foreign key constraint checks. */
+
+ err = row_discard_tablespace_foreign_key_checks(trx, table);
+
+ if (err == DB_SUCCESS) {
+ /* Note: This cannot be rolled back.
+ Rollback would see the UPDATE SYS_INDEXES
+ as two operations: DELETE and INSERT.
+ It would invoke btr_free_if_exists()
+ when rolling back the INSERT, effectively
+ dropping all indexes of the table. */
+ err = row_discard_tablespace(trx, table);
+ }
+
+ if (fts_exist && err != DB_SUCCESS) {
+ fts_optimize_add_table(table);
+ }
+ }
+
+ return(row_discard_tablespace_end(trx, table, err));
+}
+
+/*********************************************************************//**
+Sets an exclusive lock on a table.
+@return error code or DB_SUCCESS */
+dberr_t
+row_mysql_lock_table(
+/*=================*/
+ trx_t* trx, /*!< in/out: transaction */
+ dict_table_t* table, /*!< in: table to lock */
+ enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */
+ const char* op_info) /*!< in: string for trx->op_info */
+{
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ dberr_t err;
+ sel_node_t* node;
+
+ ut_ad(mode == LOCK_X || mode == LOCK_S);
+
+ heap = mem_heap_create(512);
+
+ trx->op_info = op_info;
+
+ node = sel_node_create(heap);
+ thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
+ thr->graph->state = QUE_FORK_ACTIVE;
+
+ /* We use the select query graph as the dummy graph needed
+ in the lock module call */
+
+ thr = que_fork_get_first_thr(
+ static_cast<que_fork_t*>(que_node_get_parent(thr)));
+
+ thr->start_running();
+
+run_again:
+ thr->run_node = thr;
+ thr->prev_node = thr->common.parent;
+
+ err = lock_table(0, table, mode, thr);
+
+ trx->error_state = err;
+
+ if (err == DB_SUCCESS) {
+ thr->stop_no_error();
+ } else {
+ que_thr_stop_for_mysql(thr);
+
+ if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
+ goto run_again;
+ }
+ }
+
+ que_graph_free(thr->graph);
+ trx->op_info = "";
+
+ return(err);
+}
+
+/** Drop ancillary FTS tables as part of dropping a table.
+@param[in,out] table Table cache entry
+@param[in,out] trx Transaction handle
+@return error code or DB_SUCCESS */
+UNIV_INLINE
+dberr_t
+row_drop_ancillary_fts_tables(
+ dict_table_t* table,
+ trx_t* trx)
+{
+ /* Drop ancillary FTS tables */
+ if (dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
+
+ ut_ad(table->get_ref_count() == 0);
+ ut_ad(trx_is_started(trx));
+
+ dberr_t err = fts_drop_tables(trx, table);
+
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ ib::error() << " Unable to remove ancillary FTS"
+ " tables for table "
+ << table->name << " : " << err;
+
+ return(err);
+ }
+ }
+
+ /* The table->fts flag can be set on the table for which
+ the cluster index is being rebuilt. Such table might not have
+ DICT_TF2_FTS flag set. So keep this out of above
+ dict_table_has_fts_index condition */
+ if (table->fts != NULL) {
+ /* fts_que_graph_free_check_lock would try to acquire
+ dict mutex lock */
+ table->fts->dict_locked = true;
+
+ fts_free(table);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Drop a table from the memory cache as part of dropping a table.
+@param[in] tablename A copy of table->name. Used when table == null
+@param[in,out] table Table cache entry
+@param[in,out] trx Transaction handle
+@return error code or DB_SUCCESS */
+UNIV_INLINE
+dberr_t
+row_drop_table_from_cache(
+ const char* tablename,
+ dict_table_t* table,
+ trx_t* trx)
+{
+ dberr_t err = DB_SUCCESS;
+ ut_ad(!table->is_temporary());
+
+ /* Remove the pointer to this table object from the list
+ of modified tables by the transaction because the object
+ is going to be destroyed below. */
+ trx->mod_tables.erase(table);
+
+ dict_sys.remove(table);
+
+ if (dict_load_table(tablename, DICT_ERR_IGNORE_FK_NOKEY)) {
+ ib::error() << "Not able to remove table "
+ << ut_get_name(trx, tablename)
+ << " from the dictionary cache!";
+ err = DB_ERROR;
+ }
+
+ return(err);
+}
+
+/** Drop a table for MySQL.
+If the data dictionary was not already locked by the transaction,
+the transaction will be committed. Otherwise, the data dictionary
+will remain locked.
+@param[in] name Table name
+@param[in,out] trx Transaction handle
+@param[in] sqlcom type of SQL operation
+@param[in] create_failed true=create table failed
+ because e.g. foreign key column
+@param[in] nonatomic Whether it is permitted to release
+ and reacquire dict_sys.latch
+@return error code or DB_SUCCESS */
+dberr_t
+row_drop_table_for_mysql(
+ const char* name,
+ trx_t* trx,
+ enum_sql_command sqlcom,
+ bool create_failed,
+ bool nonatomic)
+{
+ dberr_t err;
+ dict_foreign_t* foreign;
+ dict_table_t* table;
+ char* tablename = NULL;
+ bool locked_dictionary = false;
+ pars_info_t* info = NULL;
+ mem_heap_t* heap = NULL;
+
+
+ DBUG_ENTER("row_drop_table_for_mysql");
+ DBUG_PRINT("row_drop_table_for_mysql", ("table: '%s'", name));
+
+ ut_a(name != NULL);
+
+ /* Serialize data dictionary operations with dictionary mutex:
+ no deadlocks can occur then in these operations */
+
+ trx->op_info = "dropping table";
+
+ if (trx->dict_operation_lock_mode != RW_X_LATCH) {
+ /* Prevent foreign key checks etc. while we are
+ dropping the table */
+
+ row_mysql_lock_data_dictionary(trx);
+
+ locked_dictionary = true;
+ nonatomic = true;
+ }
+
+ ut_d(dict_sys.assert_locked());
+
+ table = dict_table_open_on_name(
+ name, TRUE, FALSE,
+ static_cast<dict_err_ignore_t>(
+ DICT_ERR_IGNORE_INDEX_ROOT
+ | DICT_ERR_IGNORE_CORRUPT));
+
+ if (!table) {
+ if (locked_dictionary) {
+ row_mysql_unlock_data_dictionary(trx);
+ }
+ trx->op_info = "";
+ DBUG_RETURN(DB_TABLE_NOT_FOUND);
+ }
+
+ std::vector<pfs_os_file_t> detached_handles;
+
+ const bool is_temp_name = strstr(table->name.m_name,
+ "/" TEMP_FILE_PREFIX);
+
+ if (table->is_temporary()) {
+ ut_ad(table->space == fil_system.temp_space);
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ btr_free(page_id_t(SRV_TMP_SPACE_ID, index->page));
+ }
+ /* Remove the pointer to this table object from the list
+ of modified tables by the transaction because the object
+ is going to be destroyed below. */
+ trx->mod_tables.erase(table);
+ table->release();
+ dict_sys.remove(table);
+ err = DB_SUCCESS;
+ goto funct_exit_all_freed;
+ }
+
+ /* This function is called recursively via fts_drop_tables(). */
+ if (!trx_is_started(trx)) {
+ trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+ }
+
+ /* Turn on this drop bit before we could release the dictionary
+ latch */
+ table->to_be_dropped = true;
+
+ if (nonatomic) {
+ /* This trx did not acquire any locks on dictionary
+ table records yet. Thus it is safe to release and
+ reacquire the data dictionary latches. */
+ if (table->fts) {
+ row_mysql_unlock_data_dictionary(trx);
+ fts_optimize_remove_table(table);
+ row_mysql_lock_data_dictionary(trx);
+ }
+
+ dict_stats_wait_bg_to_stop_using_table(table, trx);
+ }
+
+ /* make sure background stats thread is not running on the table */
+ ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS));
+ if (!table->no_rollback()) {
+ if (table->space != fil_system.sys_space) {
+ /* Delete the link file if used. */
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ RemoteDatafile::delete_link_file(name);
+ }
+ }
+
+ dict_stats_recalc_pool_del(table);
+ dict_stats_defrag_pool_del(table, NULL);
+ if (btr_defragment_active) {
+ /* During fts_drop_orphaned_tables() the
+ btr_defragment_mutex has not yet been
+ initialized by btr_defragment_init(). */
+ btr_defragment_remove_table(table);
+ }
+
+ if (UNIV_LIKELY(!strstr(name, "/" TEMP_FILE_PREFIX_INNODB))) {
+ /* Remove any persistent statistics for this table,
+ in a separate transaction. */
+ char errstr[1024];
+ err = dict_stats_drop_table(name, errstr,
+ sizeof errstr);
+ if (err != DB_SUCCESS) {
+ ib::warn() << errstr;
+ }
+ }
+ }
+
+ dict_table_prevent_eviction(table);
+ dict_table_close(table, TRUE, FALSE);
+
+ /* Check if the table is referenced by foreign key constraints from
+ some other table (not the table itself) */
+
+ if (!srv_read_only_mode && trx->check_foreigns) {
+
+ for (dict_foreign_set::iterator it
+ = table->referenced_set.begin();
+ it != table->referenced_set.end();
+ ++it) {
+
+ foreign = *it;
+
+ const bool ref_ok = sqlcom == SQLCOM_DROP_DB
+ && dict_tables_have_same_db(
+ name,
+ foreign->foreign_table_name_lookup);
+
+ /* We should allow dropping a referenced table if creating
+ that referenced table has failed for some reason. For example
+ if referenced table is created but it column types that are
+ referenced do not match. */
+ if (foreign->foreign_table != table &&
+ !create_failed && !ref_ok) {
+
+ FILE* ef = dict_foreign_err_file;
+
+ /* We only allow dropping a referenced table
+ if FOREIGN_KEY_CHECKS is set to 0 */
+
+ err = DB_CANNOT_DROP_CONSTRAINT;
+
+ mutex_enter(&dict_foreign_err_mutex);
+ rewind(ef);
+ ut_print_timestamp(ef);
+
+ fputs(" Cannot drop table ", ef);
+ ut_print_name(ef, trx, name);
+ fputs("\n"
+ "because it is referenced by ", ef);
+ ut_print_name(ef, trx,
+ foreign->foreign_table_name);
+ putc('\n', ef);
+ mutex_exit(&dict_foreign_err_mutex);
+
+ goto funct_exit;
+ }
+ }
+ }
+
+ DBUG_EXECUTE_IF("row_drop_table_add_to_background", goto defer;);
+
+ /* TODO: could we replace the counter n_foreign_key_checks_running
+ with lock checks on the table? Acquire here an exclusive lock on the
+ table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
+ they can cope with the table having been dropped here? Foreign key
+ checks take an IS or IX lock on the table. */
+
+ if (table->n_foreign_key_checks_running > 0) {
+defer:
+ /* Rename #sql-backup to #sql-ib if table has open ref count
+ while dropping the table. This scenario can happen
+ when purge thread is waiting for dict_sys.mutex so
+ that it could close the table. But drop table acquires
+ dict_sys.mutex.
+ In the future this should use 'tmp_file_prefix'!
+ */
+ if (!is_temp_name
+ || strstr(table->name.m_name, "/#sql-backup-")) {
+ heap = mem_heap_create(FN_REFLEN);
+ const char* tmp_name
+ = dict_mem_create_temporary_tablename(
+ heap, table->name.m_name, table->id);
+ ib::info() << "Deferring DROP TABLE " << table->name
+ << "; renaming to " << tmp_name;
+ err = row_rename_table_for_mysql(
+ table->name.m_name, tmp_name, trx,
+ false, false);
+ } else {
+ err = DB_SUCCESS;
+ }
+ if (err == DB_SUCCESS) {
+ row_add_table_to_background_drop_list(table->id);
+ }
+ goto funct_exit;
+ }
+
+ /* Remove all locks that are on the table or its records, if there
+ are no references to the table but it has record locks, we release
+ the record locks unconditionally. One use case is:
+
+ CREATE TABLE t2 (PRIMARY KEY (a)) SELECT * FROM t1;
+
+ If after the user transaction has done the SELECT and there is a
+ problem in completing the CREATE TABLE operation, MySQL will drop
+ the table. InnoDB will create a new background transaction to do the
+ actual drop, the trx instance that is passed to this function. To
+ preserve existing behaviour we remove the locks but ideally we
+ shouldn't have to. There should never be record locks on a table
+ that is going to be dropped. */
+
+ if (table->get_ref_count() > 0 || table->n_rec_locks > 0
+ || lock_table_has_locks(table)) {
+ goto defer;
+ }
+
+ /* The "to_be_dropped" marks table that is to be dropped, but
+ has not been dropped, instead, was put in the background drop
+ list due to being used by concurrent DML operations. Clear it
+ here since there are no longer any concurrent activities on it,
+ and it is free to be dropped */
+ table->to_be_dropped = false;
+
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ trx->table_id = table->id;
+ case TRX_DICT_OP_TABLE:
+ break;
+ case TRX_DICT_OP_INDEX:
+ /* If the transaction was previously flagged as
+ TRX_DICT_OP_INDEX, we should be dropping auxiliary
+ tables for full-text indexes. */
+ ut_ad(strstr(table->name.m_name, "/FTS_"));
+ }
+
+ /* Mark all indexes unavailable in the data dictionary cache
+ before starting to drop the table. */
+
+ unsigned* page_no;
+ unsigned* page_nos;
+ heap = mem_heap_create(
+ 200 + UT_LIST_GET_LEN(table->indexes) * sizeof *page_nos);
+ tablename = mem_heap_strdup(heap, name);
+
+ page_no = page_nos = static_cast<unsigned*>(
+ mem_heap_alloc(
+ heap,
+ UT_LIST_GET_LEN(table->indexes) * sizeof *page_no));
+
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ rw_lock_x_lock(dict_index_get_lock(index));
+ /* Save the page numbers so that we can restore them
+ if the operation fails. */
+ *page_no++ = index->page;
+ /* Mark the index unusable. */
+ index->page = FIL_NULL;
+ rw_lock_x_unlock(dict_index_get_lock(index));
+ }
+
+ /* Deleting a row from SYS_INDEXES table will invoke
+ dict_drop_index_tree(). */
+ info = pars_info_create();
+
+ pars_info_add_str_literal(info, "name", name);
+
+ if (sqlcom != SQLCOM_TRUNCATE
+ && strchr(name, '/')
+ && dict_table_get_low("SYS_FOREIGN")
+ && dict_table_get_low("SYS_FOREIGN_COLS")) {
+ err = que_eval_sql(
+ info,
+ "PROCEDURE DROP_FOREIGN_PROC () IS\n"
+ "fid CHAR;\n"
+
+ "DECLARE CURSOR fk IS\n"
+ "SELECT ID FROM SYS_FOREIGN\n"
+ "WHERE FOR_NAME = :name\n"
+ "AND TO_BINARY(FOR_NAME) = TO_BINARY(:name)\n"
+ "FOR UPDATE;\n"
+
+ "BEGIN\n"
+ "OPEN fk;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH fk INTO fid;\n"
+ " IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
+ " DELETE FROM SYS_FOREIGN_COLS WHERE ID=fid;\n"
+ " DELETE FROM SYS_FOREIGN WHERE ID=fid;\n"
+ "END LOOP;\n"
+ "CLOSE fk;\n"
+ "END;\n", FALSE, trx);
+ if (err == DB_SUCCESS) {
+ info = pars_info_create();
+ pars_info_add_str_literal(info, "name", name);
+ goto do_drop;
+ }
+ } else {
+do_drop:
+ if (dict_table_get_low("SYS_VIRTUAL")) {
+ err = que_eval_sql(
+ info,
+ "PROCEDURE DROP_VIRTUAL_PROC () IS\n"
+ "tid CHAR;\n"
+
+ "BEGIN\n"
+ "SELECT ID INTO tid FROM SYS_TABLES\n"
+ "WHERE NAME = :name FOR UPDATE;\n"
+ "IF (SQL % NOTFOUND) THEN RETURN;"
+ " END IF;\n"
+ "DELETE FROM SYS_VIRTUAL"
+ " WHERE TABLE_ID = tid;\n"
+ "END;\n", FALSE, trx);
+ if (err == DB_SUCCESS) {
+ info = pars_info_create();
+ pars_info_add_str_literal(
+ info, "name", name);
+ }
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ err = err == DB_SUCCESS ? que_eval_sql(
+ info,
+ "PROCEDURE DROP_TABLE_PROC () IS\n"
+ "tid CHAR;\n"
+ "iid CHAR;\n"
+
+ "DECLARE CURSOR cur_idx IS\n"
+ "SELECT ID FROM SYS_INDEXES\n"
+ "WHERE TABLE_ID = tid FOR UPDATE;\n"
+
+ "BEGIN\n"
+ "SELECT ID INTO tid FROM SYS_TABLES\n"
+ "WHERE NAME = :name FOR UPDATE;\n"
+ "IF (SQL % NOTFOUND) THEN RETURN; END IF;\n"
+
+ "OPEN cur_idx;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH cur_idx INTO iid;\n"
+ " IF (SQL % NOTFOUND) THEN EXIT; END IF;\n"
+ " DELETE FROM SYS_FIELDS\n"
+ " WHERE INDEX_ID = iid;\n"
+ " DELETE FROM SYS_INDEXES\n"
+ " WHERE ID = iid AND TABLE_ID = tid;\n"
+ "END LOOP;\n"
+ "CLOSE cur_idx;\n"
+
+ "DELETE FROM SYS_COLUMNS WHERE TABLE_ID=tid;\n"
+ "DELETE FROM SYS_TABLES WHERE NAME=:name;\n"
+
+ "END;\n", FALSE, trx) : err;
+
+ if (err == DB_SUCCESS && table->space
+ && dict_table_get_low("SYS_TABLESPACES")
+ && dict_table_get_low("SYS_DATAFILES")) {
+ info = pars_info_create();
+ pars_info_add_int4_literal(info, "id",
+ lint(table->space_id));
+ err = que_eval_sql(
+ info,
+ "PROCEDURE DROP_SPACE_PROC () IS\n"
+ "BEGIN\n"
+ "DELETE FROM SYS_TABLESPACES\n"
+ "WHERE SPACE = :id;\n"
+ "DELETE FROM SYS_DATAFILES\n"
+ "WHERE SPACE = :id;\n"
+ "END;\n", FALSE, trx);
+ }
+ }
+
+ switch (err) {
+ fil_space_t* space;
+ char* filepath;
+ case DB_SUCCESS:
+ if (!table->no_rollback()) {
+ err = row_drop_ancillary_fts_tables(table, trx);
+ if (err != DB_SUCCESS) {
+ break;
+ }
+ }
+
+ space = table->space;
+ ut_ad(!space || space->id == table->space_id);
+ /* Determine the tablespace filename before we drop
+ dict_table_t. */
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ dict_get_and_save_data_dir_path(table, true);
+ ut_ad(table->data_dir_path || !space);
+ filepath = space ? NULL : fil_make_filepath(
+ table->data_dir_path,
+ table->name.m_name, IBD,
+ table->data_dir_path != NULL);
+ } else {
+ filepath = space ? NULL : fil_make_filepath(
+ NULL, table->name.m_name, IBD, false);
+ }
+
+ /* Free the dict_table_t object. */
+ err = row_drop_table_from_cache(tablename, table, trx);
+ if (err != DB_SUCCESS) {
+ ut_free(filepath);
+ break;
+ }
+
+ /* Do not attempt to drop known-to-be-missing tablespaces,
+ nor the system tablespace. */
+ if (!space) {
+ fil_delete_file(filepath);
+ ut_free(filepath);
+ break;
+ }
+
+ ut_ad(!filepath);
+
+ if (space->id != TRX_SYS_SPACE) {
+ err = fil_delete_tablespace(space->id, false,
+ &detached_handles);
+ }
+ break;
+
+ case DB_OUT_OF_FILE_SPACE:
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+ trx->error_state = err;
+ row_mysql_handle_errors(&err, trx, NULL, NULL);
+
+ /* raise error */
+ ut_error;
+ break;
+
+ case DB_TOO_MANY_CONCURRENT_TRXS:
+ /* Cannot even find a free slot for the
+ the undo log. We can directly exit here
+ and return the DB_TOO_MANY_CONCURRENT_TRXS
+ error. */
+
+ default:
+ /* This is some error we do not expect. Print
+ the error number and rollback the transaction */
+ ib::error() << "Unknown error code " << err << " while"
+ " dropping table: "
+ << ut_get_name(trx, tablename) << ".";
+
+ trx->error_state = DB_SUCCESS;
+ trx->rollback();
+ trx->error_state = DB_SUCCESS;
+
+ /* Mark all indexes available in the data dictionary
+ cache again. */
+
+ page_no = page_nos;
+
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ rw_lock_x_lock(dict_index_get_lock(index));
+ ut_a(index->page == FIL_NULL);
+ index->page = *page_no++;
+ rw_lock_x_unlock(dict_index_get_lock(index));
+ }
+ }
+
+ if (err != DB_SUCCESS && table != NULL) {
+ /* Drop table has failed with error but as drop table is not
+ transaction safe we should mark the table as corrupted to avoid
+ unwarranted follow-up action on this table that can result
+ in more serious issues. */
+
+ table->corrupted = true;
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+ dict_set_corrupted(index, trx, "DROP TABLE");
+ }
+ }
+
+funct_exit:
+ if (heap) {
+ mem_heap_free(heap);
+ }
+
+funct_exit_all_freed:
+ if (locked_dictionary) {
+
+ if (trx_is_started(trx)) {
+
+ trx_commit_for_mysql(trx);
+ }
+
+ /* Add the table to fts queue if drop table fails */
+ if (err != DB_SUCCESS && table->fts) {
+ fts_optimize_add_table(table);
+ }
+
+ row_mysql_unlock_data_dictionary(trx);
+ }
+
+ for (const auto& handle : detached_handles) {
+ ut_ad(handle != OS_FILE_CLOSED);
+ os_file_close(handle);
+ }
+
+ trx->op_info = "";
+
+ DBUG_RETURN(err);
+}
+
+/** Drop a table after failed CREATE TABLE. */
+dberr_t row_drop_table_after_create_fail(const char* name, trx_t* trx)
+{
+ ib::warn() << "Dropping incompletely created " << name << " table.";
+ return row_drop_table_for_mysql(name, trx, SQLCOM_DROP_DB, true);
+}
+
+/*******************************************************************//**
+Drop all foreign keys in a database, see Bug#18942.
+Called at the end of row_drop_database_for_mysql().
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+drop_all_foreign_keys_in_db(
+/*========================*/
+ const char* name, /*!< in: database name which ends to '/' */
+ trx_t* trx) /*!< in: transaction handle */
+{
+ pars_info_t* pinfo;
+ dberr_t err;
+
+ ut_a(name[strlen(name) - 1] == '/');
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "dbname", name);
+
+/** true if for_name is not prefixed with dbname */
+#define TABLE_NOT_IN_THIS_DB \
+"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname"
+
+ err = que_eval_sql(pinfo,
+ "PROCEDURE DROP_ALL_FOREIGN_KEYS_PROC () IS\n"
+ "foreign_id CHAR;\n"
+ "for_name CHAR;\n"
+ "found INT;\n"
+ "DECLARE CURSOR cur IS\n"
+ "SELECT ID, FOR_NAME FROM SYS_FOREIGN\n"
+ "WHERE FOR_NAME >= :dbname\n"
+ "LOCK IN SHARE MODE\n"
+ "ORDER BY FOR_NAME;\n"
+ "BEGIN\n"
+ "found := 1;\n"
+ "OPEN cur;\n"
+ "WHILE found = 1 LOOP\n"
+ " FETCH cur INTO foreign_id, for_name;\n"
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " ELSIF (" TABLE_NOT_IN_THIS_DB ") THEN\n"
+ " found := 0;\n"
+ " ELSIF (1=1) THEN\n"
+ " DELETE FROM SYS_FOREIGN_COLS\n"
+ " WHERE ID = foreign_id;\n"
+ " DELETE FROM SYS_FOREIGN\n"
+ " WHERE ID = foreign_id;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE cur;\n"
+ "COMMIT WORK;\n"
+ "END;\n",
+ FALSE, /* do not reserve dict mutex,
+ we are already holding it */
+ trx);
+
+ return(err);
+}
+
+/** Drop a database for MySQL.
+@param[in] name database name which ends at '/'
+@param[in] trx transaction handle
+@param[out] found number of dropped tables/partitions
+@return error code or DB_SUCCESS */
+dberr_t
+row_drop_database_for_mysql(
+ const char* name,
+ trx_t* trx,
+ ulint* found)
+{
+ dict_table_t* table;
+ char* table_name;
+ dberr_t err = DB_SUCCESS;
+ ulint namelen = strlen(name);
+ bool is_partition = false;
+
+ ut_ad(found != NULL);
+
+ DBUG_ENTER("row_drop_database_for_mysql");
+
+ DBUG_PRINT("row_drop_database_for_mysql", ("db: '%s'", name));
+
+ ut_a(name != NULL);
+ /* Assert DB name or partition name. */
+ if (name[namelen - 1] == '#') {
+ ut_ad(name[namelen - 2] != '/');
+ is_partition = true;
+ trx->op_info = "dropping partitions";
+ } else {
+ ut_a(name[namelen - 1] == '/');
+ trx->op_info = "dropping database";
+ }
+
+ *found = 0;
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ trx_start_if_not_started_xa(trx, true);
+
+loop:
+ row_mysql_lock_data_dictionary(trx);
+
+ while ((table_name = dict_get_first_table_name_in_db(name))) {
+ /* Drop parent table if it is a fts aux table, to
+ avoid accessing dropped fts aux tables in information
+ scheam when parent table still exists.
+ Note: Drop parent table will drop fts aux tables. */
+ char* parent_table_name = NULL;
+ table_id_t table_id;
+ index_id_t index_id;
+
+ if (fts_check_aux_table(
+ table_name, &table_id, &index_id)) {
+ dict_table_t* parent_table = dict_table_open_on_id(
+ table_id, TRUE, DICT_TABLE_OP_NORMAL);
+ if (parent_table != NULL) {
+ parent_table_name = mem_strdupl(
+ parent_table->name.m_name,
+ strlen(parent_table->name.m_name));
+ dict_table_close(parent_table, TRUE, FALSE);
+ }
+ }
+
+ if (parent_table_name != NULL) {
+ ut_free(table_name);
+ table_name = parent_table_name;
+ }
+
+ ut_a(memcmp(table_name, name, namelen) == 0);
+
+ table = dict_table_open_on_name(
+ table_name, TRUE, FALSE, static_cast<dict_err_ignore_t>(
+ DICT_ERR_IGNORE_INDEX_ROOT
+ | DICT_ERR_IGNORE_CORRUPT));
+
+ if (!table) {
+ ib::error() << "Cannot load table " << table_name
+ << " from InnoDB internal data dictionary"
+ " during drop database";
+ ut_free(table_name);
+ err = DB_TABLE_NOT_FOUND;
+ break;
+
+ }
+
+ if (!table->name.is_temporary()) {
+ /* There could be orphan temp tables left from
+ interrupted alter table. Leave them, and handle
+ the rest.*/
+ if (table->can_be_evicted
+ && (name[namelen - 1] != '#')) {
+ ib::warn() << "Orphan table encountered during"
+ " DROP DATABASE. This is possible if '"
+ << table->name << ".frm' was lost.";
+ }
+
+ if (!table->is_readable() && !table->space) {
+ ib::warn() << "Missing .ibd file for table "
+ << table->name << ".";
+ }
+ }
+
+ dict_table_close(table, TRUE, FALSE);
+
+ /* The dict_table_t object must not be accessed before
+ dict_table_open() or after dict_table_close(). But this is OK
+ if we are holding, the dict_sys.mutex. */
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ /* Disable statistics on the found table. */
+ if (!dict_stats_stop_bg(table)) {
+ row_mysql_unlock_data_dictionary(trx);
+
+ os_thread_sleep(250000);
+
+ ut_free(table_name);
+
+ goto loop;
+ }
+
+ /* Wait until MySQL does not have any queries running on
+ the table */
+
+ if (table->get_ref_count() > 0) {
+ row_mysql_unlock_data_dictionary(trx);
+
+ ib::warn() << "MySQL is trying to drop database "
+ << ut_get_name(trx, name) << " though"
+ " there are still open handles to table "
+ << table->name << ".";
+
+ os_thread_sleep(1000000);
+
+ ut_free(table_name);
+
+ goto loop;
+ }
+
+ err = row_drop_table_for_mysql(
+ table_name, trx, SQLCOM_DROP_DB);
+ trx_commit_for_mysql(trx);
+
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ ib::error() << "DROP DATABASE "
+ << ut_get_name(trx, name) << " failed"
+ " with error (" << err << ") for"
+ " table " << ut_get_name(trx, table_name);
+ ut_free(table_name);
+ break;
+ }
+
+ ut_free(table_name);
+ (*found)++;
+ }
+
+ /* Partitioning does not yet support foreign keys. */
+ if (err == DB_SUCCESS && !is_partition) {
+ /* after dropping all tables try to drop all leftover
+ foreign keys in case orphaned ones exist */
+ err = drop_all_foreign_keys_in_db(name, trx);
+
+ if (err != DB_SUCCESS) {
+ const std::string& db = ut_get_name(trx, name);
+ ib::error() << "DROP DATABASE " << db << " failed with"
+ " error " << err << " while dropping all"
+ " foreign keys";
+ }
+ }
+
+ trx_commit_for_mysql(trx);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx->op_info = "";
+
+ DBUG_RETURN(err);
+}
+
+/****************************************************************//**
+Delete a single constraint.
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_delete_constraint_low(
+/*======================*/
+ const char* id, /*!< in: constraint id */
+ trx_t* trx) /*!< in: transaction handle */
+{
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_str_literal(info, "id", id);
+
+ return(que_eval_sql(info,
+ "PROCEDURE DELETE_CONSTRAINT () IS\n"
+ "BEGIN\n"
+ "DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n"
+ "DELETE FROM SYS_FOREIGN WHERE ID = :id;\n"
+ "END;\n"
+ , FALSE, trx));
+}
+
+/****************************************************************//**
+Delete a single constraint.
+@return error code or DB_SUCCESS */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+row_delete_constraint(
+/*==================*/
+ const char* id, /*!< in: constraint id */
+ const char* database_name, /*!< in: database name, with the
+ trailing '/' */
+ mem_heap_t* heap, /*!< in: memory heap */
+ trx_t* trx) /*!< in: transaction handle */
+{
+ dberr_t err;
+
+ /* New format constraints have ids <databasename>/<constraintname>. */
+ err = row_delete_constraint_low(
+ mem_heap_strcat(heap, database_name, id), trx);
+
+ if ((err == DB_SUCCESS) && !strchr(id, '/')) {
+ /* Old format < 4.0.18 constraints have constraint ids
+ NUMBER_NUMBER. We only try deleting them if the
+ constraint name does not contain a '/' character, otherwise
+ deleting a new format constraint named 'foo/bar' from
+ database 'baz' would remove constraint 'bar' from database
+ 'foo', if it existed. */
+
+ err = row_delete_constraint_low(id, trx);
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Renames a table for MySQL.
+@return error code or DB_SUCCESS */
+dberr_t
+row_rename_table_for_mysql(
+/*=======================*/
+ const char* old_name, /*!< in: old table name */
+ const char* new_name, /*!< in: new table name */
+ trx_t* trx, /*!< in/out: transaction */
+ bool commit, /*!< in: whether to commit trx */
+ bool use_fk) /*!< in: whether to parse and enforce
+ FOREIGN KEY constraints */
+{
+ dict_table_t* table = NULL;
+ dberr_t err = DB_ERROR;
+ mem_heap_t* heap = NULL;
+ const char** constraints_to_drop = NULL;
+ ulint n_constraints_to_drop = 0;
+ ibool old_is_tmp, new_is_tmp;
+ pars_info_t* info = NULL;
+ int retry;
+ bool aux_fts_rename = false;
+ char* is_part = NULL;
+
+ ut_a(old_name != NULL);
+ ut_a(new_name != NULL);
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
+ const bool dict_locked = trx->dict_operation_lock_mode == RW_X_LATCH;
+ ut_ad(!commit || dict_locked);
+
+ if (high_level_read_only) {
+ return(DB_READ_ONLY);
+
+ } else if (row_mysql_is_system_table(new_name)) {
+
+ ib::error() << "Trying to create a MySQL system table "
+ << new_name << " of type InnoDB. MySQL system tables"
+ " must be of the MyISAM type!";
+
+ goto funct_exit;
+ }
+
+ trx->op_info = "renaming table";
+
+ old_is_tmp = dict_table_t::is_temporary_name(old_name);
+ new_is_tmp = dict_table_t::is_temporary_name(new_name);
+
+ table = dict_table_open_on_name(old_name, dict_locked, FALSE,
+ DICT_ERR_IGNORE_FK_NOKEY);
+
+ /* We look for pattern #P# to see if the table is partitioned
+ MySQL table. */
+#ifdef __WIN__
+ is_part = strstr((char *)old_name, (char *)"#p#");
+#else
+ is_part = strstr((char *)old_name, (char *)"#P#");
+#endif /* __WIN__ */
+
+ /* MySQL partition engine hard codes the file name
+ separator as "#P#". The text case is fixed even if
+ lower_case_table_names is set to 1 or 2. This is true
+ for sub-partition names as well. InnoDB always
+ normalises file names to lower case on Windows, this
+ can potentially cause problems when copying/moving
+ tables between platforms.
+
+ 1) If boot against an installation from Windows
+ platform, then its partition table name could
+ be all be in lower case in system tables. So we
+ will need to check lower case name when load table.
+
+ 2) If we boot an installation from other case
+ sensitive platform in Windows, we might need to
+ check the existence of table name without lowering
+ case them in the system table. */
+ if (!table &&
+ is_part &&
+ innobase_get_lower_case_table_names() == 1) {
+ char par_case_name[MAX_FULL_NAME_LEN + 1];
+#ifndef __WIN__
+ /* Check for the table using lower
+ case name, including the partition
+ separator "P" */
+ memcpy(par_case_name, old_name,
+ strlen(old_name));
+ par_case_name[strlen(old_name)] = 0;
+ innobase_casedn_str(par_case_name);
+#else
+ /* On Windows platfrom, check
+ whether there exists table name in
+ system table whose name is
+ not being normalized to lower case */
+ normalize_table_name_c_low(
+ par_case_name, old_name, FALSE);
+#endif
+ table = dict_table_open_on_name(par_case_name, dict_locked, FALSE,
+ DICT_ERR_IGNORE_FK_NOKEY);
+ }
+
+ if (!table) {
+ err = DB_TABLE_NOT_FOUND;
+ goto funct_exit;
+
+ } else if (!table->is_readable() && !table->space
+ && !(table->flags2 & DICT_TF2_DISCARDED)) {
+
+ err = DB_TABLE_NOT_FOUND;
+
+ ib::error() << "Table " << old_name << " does not have an .ibd"
+ " file in the database directory. "
+ << TROUBLESHOOTING_MSG;
+
+ goto funct_exit;
+
+ } else if (use_fk && !old_is_tmp && new_is_tmp) {
+ /* MySQL is doing an ALTER TABLE command and it renames the
+ original table to a temporary table name. We want to preserve
+ the original foreign key constraint definitions despite the
+ name change. An exception is those constraints for which
+ the ALTER TABLE contained DROP FOREIGN KEY <foreign key id>.*/
+
+ heap = mem_heap_create(100);
+
+ err = dict_foreign_parse_drop_constraints(
+ heap, trx, table, &n_constraints_to_drop,
+ &constraints_to_drop);
+
+ if (err != DB_SUCCESS) {
+ goto funct_exit;
+ }
+ }
+
+ /* Is a foreign key check running on this table? */
+ for (retry = 0; retry < 100
+ && table->n_foreign_key_checks_running > 0; ++retry) {
+ row_mysql_unlock_data_dictionary(trx);
+ os_thread_yield();
+ row_mysql_lock_data_dictionary(trx);
+ }
+
+ if (table->n_foreign_key_checks_running > 0) {
+ ib::error() << "In ALTER TABLE "
+ << ut_get_name(trx, old_name)
+ << " a FOREIGN KEY check is running. Cannot rename"
+ " table.";
+ err = DB_TABLE_IN_FK_CHECK;
+ goto funct_exit;
+ }
+
+ if (!table->is_temporary()) {
+ if (commit) {
+ dict_stats_wait_bg_to_stop_using_table(table, trx);
+ }
+
+ err = trx_undo_report_rename(trx, table);
+
+ if (err != DB_SUCCESS) {
+ goto funct_exit;
+ }
+ }
+
+ /* We use the private SQL parser of Innobase to generate the query
+ graphs needed in updating the dictionary data from system tables. */
+
+ info = pars_info_create();
+
+ pars_info_add_str_literal(info, "new_table_name", new_name);
+ pars_info_add_str_literal(info, "old_table_name", old_name);
+
+ err = que_eval_sql(info,
+ "PROCEDURE RENAME_TABLE () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLES"
+ " SET NAME = :new_table_name\n"
+ " WHERE NAME = :old_table_name;\n"
+ "END;\n"
+ , FALSE, trx);
+
+ /* Assume the caller guarantees destination name doesn't exist. */
+ ut_ad(err != DB_DUPLICATE_KEY);
+
+ /* SYS_TABLESPACES and SYS_DATAFILES need to be updated if
+ the table is in a single-table tablespace. */
+ if (err != DB_SUCCESS || !dict_table_is_file_per_table(table)) {
+ } else if (table->space) {
+ /* If old path and new path are the same means tablename
+ has not changed and only the database name holding the table
+ has changed so we need to make the complete filepath again. */
+ char* new_path = dict_tables_have_same_db(old_name, new_name)
+ ? os_file_make_new_pathname(
+ table->space->chain.start->name, new_name)
+ : fil_make_filepath(NULL, new_name, IBD, false);
+
+ info = pars_info_create();
+
+ pars_info_add_str_literal(info, "new_table_name", new_name);
+ pars_info_add_str_literal(info, "new_path_name", new_path);
+ pars_info_add_int4_literal(info, "space_id", table->space_id);
+
+ err = que_eval_sql(info,
+ "PROCEDURE RENAME_SPACE () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLESPACES"
+ " SET NAME = :new_table_name\n"
+ " WHERE SPACE = :space_id;\n"
+ "UPDATE SYS_DATAFILES"
+ " SET PATH = :new_path_name\n"
+ " WHERE SPACE = :space_id;\n"
+ "END;\n"
+ , FALSE, trx);
+
+ ut_free(new_path);
+ }
+ if (err != DB_SUCCESS) {
+ goto err_exit;
+ }
+
+ if (!new_is_tmp) {
+ /* Rename all constraints. */
+ char new_table_name[MAX_TABLE_NAME_LEN + 1];
+ char old_table_utf8[MAX_TABLE_NAME_LEN + 1];
+ uint errors = 0;
+
+ strncpy(old_table_utf8, old_name, MAX_TABLE_NAME_LEN);
+ old_table_utf8[MAX_TABLE_NAME_LEN] = '\0';
+ innobase_convert_to_system_charset(
+ strchr(old_table_utf8, '/') + 1,
+ strchr(old_name, '/') +1,
+ MAX_TABLE_NAME_LEN, &errors);
+
+ if (errors) {
+ /* Table name could not be converted from charset
+ my_charset_filename to UTF-8. This means that the
+ table name is already in UTF-8 (#mysql#50). */
+ strncpy(old_table_utf8, old_name, MAX_TABLE_NAME_LEN);
+ old_table_utf8[MAX_TABLE_NAME_LEN] = '\0';
+ }
+
+ info = pars_info_create();
+
+ pars_info_add_str_literal(info, "new_table_name", new_name);
+ pars_info_add_str_literal(info, "old_table_name", old_name);
+ pars_info_add_str_literal(info, "old_table_name_utf8",
+ old_table_utf8);
+
+ strncpy(new_table_name, new_name, MAX_TABLE_NAME_LEN);
+ new_table_name[MAX_TABLE_NAME_LEN] = '\0';
+ innobase_convert_to_system_charset(
+ strchr(new_table_name, '/') + 1,
+ strchr(new_name, '/') +1,
+ MAX_TABLE_NAME_LEN, &errors);
+
+ if (errors) {
+ /* Table name could not be converted from charset
+ my_charset_filename to UTF-8. This means that the
+ table name is already in UTF-8 (#mysql#50). */
+ strncpy(new_table_name, new_name, MAX_TABLE_NAME_LEN);
+ new_table_name[MAX_TABLE_NAME_LEN] = '\0';
+ }
+
+ pars_info_add_str_literal(info, "new_table_utf8", new_table_name);
+
+ err = que_eval_sql(
+ info,
+ "PROCEDURE RENAME_CONSTRAINT_IDS () IS\n"
+ "gen_constr_prefix CHAR;\n"
+ "new_db_name CHAR;\n"
+ "foreign_id CHAR;\n"
+ "new_foreign_id CHAR;\n"
+ "old_db_name_len INT;\n"
+ "old_t_name_len INT;\n"
+ "new_db_name_len INT;\n"
+ "id_len INT;\n"
+ "offset INT;\n"
+ "found INT;\n"
+ "BEGIN\n"
+ "found := 1;\n"
+ "old_db_name_len := INSTR(:old_table_name, '/')-1;\n"
+ "new_db_name_len := INSTR(:new_table_name, '/')-1;\n"
+ "new_db_name := SUBSTR(:new_table_name, 0,\n"
+ " new_db_name_len);\n"
+ "old_t_name_len := LENGTH(:old_table_name);\n"
+ "gen_constr_prefix := CONCAT(:old_table_name_utf8,\n"
+ " '_ibfk_');\n"
+ "WHILE found = 1 LOOP\n"
+ " SELECT ID INTO foreign_id\n"
+ " FROM SYS_FOREIGN\n"
+ " WHERE FOR_NAME = :old_table_name\n"
+ " AND TO_BINARY(FOR_NAME)\n"
+ " = TO_BINARY(:old_table_name)\n"
+ " LOCK IN SHARE MODE;\n"
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " ELSE\n"
+ " UPDATE SYS_FOREIGN\n"
+ " SET FOR_NAME = :new_table_name\n"
+ " WHERE ID = foreign_id;\n"
+ " id_len := LENGTH(foreign_id);\n"
+ " IF (INSTR(foreign_id, '/') > 0) THEN\n"
+ " IF (INSTR(foreign_id,\n"
+ " gen_constr_prefix) > 0)\n"
+ " THEN\n"
+ " offset := INSTR(foreign_id, '_ibfk_') - 1;\n"
+ " new_foreign_id :=\n"
+ " CONCAT(:new_table_utf8,\n"
+ " SUBSTR(foreign_id, offset,\n"
+ " id_len - offset));\n"
+ " ELSE\n"
+ " new_foreign_id :=\n"
+ " CONCAT(new_db_name,\n"
+ " SUBSTR(foreign_id,\n"
+ " old_db_name_len,\n"
+ " id_len - old_db_name_len));\n"
+ " END IF;\n"
+ " UPDATE SYS_FOREIGN\n"
+ " SET ID = new_foreign_id\n"
+ " WHERE ID = foreign_id;\n"
+ " UPDATE SYS_FOREIGN_COLS\n"
+ " SET ID = new_foreign_id\n"
+ " WHERE ID = foreign_id;\n"
+ " END IF;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "UPDATE SYS_FOREIGN SET REF_NAME = :new_table_name\n"
+ "WHERE REF_NAME = :old_table_name\n"
+ " AND TO_BINARY(REF_NAME)\n"
+ " = TO_BINARY(:old_table_name);\n"
+ "END;\n"
+ , FALSE, trx);
+
+ } else if (n_constraints_to_drop > 0) {
+ /* Drop some constraints of tmp tables. */
+
+ ulint db_name_len = dict_get_db_name_len(old_name) + 1;
+ char* db_name = mem_heap_strdupl(heap, old_name,
+ db_name_len);
+ ulint i;
+
+ for (i = 0; i < n_constraints_to_drop; i++) {
+ err = row_delete_constraint(constraints_to_drop[i],
+ db_name, heap, trx);
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+ }
+ }
+
+ if (err == DB_SUCCESS
+ && (dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
+ && !dict_tables_have_same_db(old_name, new_name)) {
+ err = fts_rename_aux_tables(table, new_name, trx);
+ if (err != DB_TABLE_NOT_FOUND) {
+ aux_fts_rename = true;
+ }
+ }
+
+ if (err != DB_SUCCESS) {
+err_exit:
+ if (err == DB_DUPLICATE_KEY) {
+ ib::error() << "Possible reasons:";
+ ib::error() << "(1) Table rename would cause two"
+ " FOREIGN KEY constraints to have the same"
+ " internal name in case-insensitive"
+ " comparison.";
+ ib::error() << "(2) Table "
+ << ut_get_name(trx, new_name)
+ << " exists in the InnoDB internal data"
+ " dictionary though MySQL is trying to rename"
+ " table " << ut_get_name(trx, old_name)
+ << " to it. Have you deleted the .frm file and"
+ " not used DROP TABLE?";
+ ib::info() << TROUBLESHOOTING_MSG;
+ ib::error() << "If table "
+ << ut_get_name(trx, new_name)
+ << " is a temporary table #sql..., then"
+ " it can be that there are still queries"
+ " running on the table, and it will be dropped"
+ " automatically when the queries end. You can"
+ " drop the orphaned table inside InnoDB by"
+ " creating an InnoDB table with the same name"
+ " in another database and copying the .frm file"
+ " to the current database. Then MySQL thinks"
+ " the table exists, and DROP TABLE will"
+ " succeed.";
+ }
+ trx->error_state = DB_SUCCESS;
+ trx->rollback();
+ trx->error_state = DB_SUCCESS;
+ } else {
+ /* The following call will also rename the .ibd data file if
+ the table is stored in a single-table tablespace */
+
+ err = dict_table_rename_in_cache(
+ table, new_name, !new_is_tmp);
+ if (err != DB_SUCCESS) {
+ trx->error_state = DB_SUCCESS;
+ trx->rollback();
+ trx->error_state = DB_SUCCESS;
+ goto funct_exit;
+ }
+
+ /* In case of copy alter, template db_name and
+ table_name should be renamed only for newly
+ created table. */
+ if (table->vc_templ != NULL && !new_is_tmp) {
+ innobase_rename_vc_templ(table);
+ }
+
+ /* We only want to switch off some of the type checking in
+ an ALTER TABLE, not in a RENAME. */
+ dict_names_t fk_tables;
+
+ err = dict_load_foreigns(
+ new_name, NULL, false,
+ !old_is_tmp || trx->check_foreigns,
+ use_fk
+ ? DICT_ERR_IGNORE_NONE
+ : DICT_ERR_IGNORE_FK_NOKEY,
+ fk_tables);
+
+ if (err != DB_SUCCESS) {
+
+ if (old_is_tmp) {
+ /* In case of copy alter, ignore the
+ loading of foreign key constraint
+ when foreign_key_check is disabled */
+ ib::error_or_warn(trx->check_foreigns)
+ << "In ALTER TABLE "
+ << ut_get_name(trx, new_name)
+ << " has or is referenced in foreign"
+ " key constraints which are not"
+ " compatible with the new table"
+ " definition.";
+ if (!trx->check_foreigns) {
+ err = DB_SUCCESS;
+ goto funct_exit;
+ }
+ } else {
+ ib::error() << "In RENAME TABLE table "
+ << ut_get_name(trx, new_name)
+ << " is referenced in foreign key"
+ " constraints which are not compatible"
+ " with the new table definition.";
+ }
+
+ trx->error_state = DB_SUCCESS;
+ trx->rollback();
+ trx->error_state = DB_SUCCESS;
+ }
+
+ /* Check whether virtual column or stored column affects
+ the foreign key constraint of the table. */
+ if (dict_foreigns_has_s_base_col(
+ table->foreign_set, table)) {
+ err = DB_NO_FK_ON_S_BASE_COL;
+ ut_a(DB_SUCCESS == dict_table_rename_in_cache(
+ table, old_name, FALSE));
+ trx->error_state = DB_SUCCESS;
+ trx->rollback();
+ trx->error_state = DB_SUCCESS;
+ goto funct_exit;
+ }
+
+ /* Fill the virtual column set in foreign when
+ the table undergoes copy alter operation. */
+ dict_mem_table_free_foreign_vcol_set(table);
+ dict_mem_table_fill_foreign_vcol_set(table);
+
+ while (!fk_tables.empty()) {
+ dict_load_table(fk_tables.front(),
+ DICT_ERR_IGNORE_NONE);
+ fk_tables.pop_front();
+ }
+
+ table->data_dir_path= NULL;
+ }
+
+funct_exit:
+ if (aux_fts_rename && err != DB_SUCCESS
+ && table != NULL && (table->space != 0)) {
+
+ char* orig_name = table->name.m_name;
+ trx_t* trx_bg = trx_create();
+
+ /* If the first fts_rename fails, the trx would
+ be rolled back and committed, we can't use it any more,
+ so we have to start a new background trx here. */
+ ut_a(trx_state_eq(trx_bg, TRX_STATE_NOT_STARTED));
+ trx_bg->op_info = "Revert the failing rename "
+ "for fts aux tables";
+ trx_bg->dict_operation_lock_mode = RW_X_LATCH;
+ trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
+
+ /* If rename fails and table has its own tablespace,
+ we need to call fts_rename_aux_tables again to
+ revert the ibd file rename, which is not under the
+ control of trx. Also notice the parent table name
+ in cache is not changed yet. If the reverting fails,
+ the ibd data may be left in the new database, which
+ can be fixed only manually. */
+ table->name.m_name = const_cast<char*>(new_name);
+ fts_rename_aux_tables(table, old_name, trx_bg);
+ table->name.m_name = orig_name;
+
+ trx_bg->dict_operation_lock_mode = 0;
+ trx_commit_for_mysql(trx_bg);
+ trx_bg->free();
+ }
+
+ if (table != NULL) {
+ if (commit && !table->is_temporary()) {
+ table->stats_bg_flag &= byte(~BG_STAT_SHOULD_QUIT);
+ }
+ dict_table_close(table, dict_locked, FALSE);
+ }
+
+ if (commit) {
+ DEBUG_SYNC(trx->mysql_thd, "before_rename_table_commit");
+ trx_commit_for_mysql(trx);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*********************************************************************//**
+Scans an index for either COUNT(*) or CHECK TABLE.
+If CHECK TABLE; Checks that the index contains entries in an ascending order,
+unique constraint is not broken, and calculates the number of index entries
+in the read view of the current transaction.
+@return DB_SUCCESS or other error */
+dberr_t
+row_scan_index_for_mysql(
+/*=====================*/
+ row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
+ in MySQL handle */
+ const dict_index_t* index, /*!< in: index */
+ ulint* n_rows) /*!< out: number of entries
+ seen in the consistent read */
+{
+ dtuple_t* prev_entry = NULL;
+ ulint matched_fields;
+ byte* buf;
+ dberr_t ret;
+ rec_t* rec;
+ int cmp;
+ ibool contains_null;
+ ulint i;
+ ulint cnt;
+ mem_heap_t* heap = NULL;
+ rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
+ rec_offs* offsets;
+ rec_offs_init(offsets_);
+
+ *n_rows = 0;
+
+ /* Don't support RTree Leaf level scan */
+ ut_ad(!dict_index_is_spatial(index));
+
+ if (dict_index_is_clust(index)) {
+ /* The clustered index of a table is always available.
+ During online ALTER TABLE that rebuilds the table, the
+ clustered index in the old table will have
+ index->online_log pointing to the new table. All
+ indexes of the old table will remain valid and the new
+ table will be unaccessible to MySQL until the
+ completion of the ALTER TABLE. */
+ } else if (dict_index_is_online_ddl(index)
+ || (index->type & DICT_FTS)) {
+ /* Full Text index are implemented by auxiliary tables,
+ not the B-tree. We also skip secondary indexes that are
+ being created online. */
+ return(DB_SUCCESS);
+ }
+
+ ulint bufsize = std::max<ulint>(srv_page_size,
+ prebuilt->mysql_row_len);
+ buf = static_cast<byte*>(ut_malloc_nokey(bufsize));
+ heap = mem_heap_create(100);
+
+ cnt = 1000;
+
+ ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0);
+loop:
+ /* Check thd->killed every 1,000 scanned rows */
+ if (--cnt == 0) {
+ if (trx_is_interrupted(prebuilt->trx)) {
+ ret = DB_INTERRUPTED;
+ goto func_exit;
+ }
+ cnt = 1000;
+ }
+
+ switch (ret) {
+ case DB_SUCCESS:
+ break;
+ case DB_DEADLOCK:
+ case DB_LOCK_TABLE_FULL:
+ case DB_LOCK_WAIT_TIMEOUT:
+ case DB_INTERRUPTED:
+ goto func_exit;
+ default:
+ ib::warn() << "CHECK TABLE on index " << index->name << " of"
+ " table " << index->table->name << " returned " << ret;
+ /* (this error is ignored by CHECK TABLE) */
+ /* fall through */
+ case DB_END_OF_INDEX:
+ ret = DB_SUCCESS;
+func_exit:
+ ut_free(buf);
+ mem_heap_free(heap);
+
+ return(ret);
+ }
+
+ *n_rows = *n_rows + 1;
+
+ /* else this code is doing handler::check() for CHECK TABLE */
+
+ /* row_search... returns the index record in buf, record origin offset
+ within buf stored in the first 4 bytes, because we have built a dummy
+ template */
+
+ rec = buf + mach_read_from_4(buf);
+
+ offsets = rec_get_offsets(rec, index, offsets_, index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+
+ if (prev_entry != NULL) {
+ matched_fields = 0;
+
+ cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
+ &matched_fields);
+ contains_null = FALSE;
+
+ /* In a unique secondary index we allow equal key values if
+ they contain SQL NULLs */
+
+ for (i = 0;
+ i < dict_index_get_n_ordering_defined_by_user(index);
+ i++) {
+ if (UNIV_SQL_NULL == dfield_get_len(
+ dtuple_get_nth_field(prev_entry, i))) {
+
+ contains_null = TRUE;
+ break;
+ }
+ }
+
+ const char* msg;
+
+ if (cmp > 0) {
+ ret = DB_INDEX_CORRUPT;
+ msg = "index records in a wrong order in ";
+not_ok:
+ ib::error()
+ << msg << index->name
+ << " of table " << index->table->name
+ << ": " << *prev_entry << ", "
+ << rec_offsets_print(rec, offsets);
+ /* Continue reading */
+ } else if (dict_index_is_unique(index)
+ && !contains_null
+ && matched_fields
+ >= dict_index_get_n_ordering_defined_by_user(
+ index)) {
+ ret = DB_DUPLICATE_KEY;
+ msg = "duplicate key in ";
+ goto not_ok;
+ }
+ }
+
+ {
+ mem_heap_t* tmp_heap = NULL;
+
+ /* Empty the heap on each round. But preserve offsets[]
+ for the row_rec_to_index_entry() call, by copying them
+ into a separate memory heap when needed. */
+ if (UNIV_UNLIKELY(offsets != offsets_)) {
+ ulint size = rec_offs_get_n_alloc(offsets)
+ * sizeof *offsets;
+
+ tmp_heap = mem_heap_create(size);
+
+ offsets = static_cast<rec_offs*>(
+ mem_heap_dup(tmp_heap, offsets, size));
+ }
+
+ mem_heap_empty(heap);
+
+ prev_entry = row_rec_to_index_entry(
+ rec, index, offsets, heap);
+
+ if (UNIV_LIKELY_NULL(tmp_heap)) {
+ mem_heap_free(tmp_heap);
+ }
+ }
+
+ ret = row_search_for_mysql(
+ buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
+
+ goto loop;
+}
+
+/*********************************************************************//**
+Initialize this module */
+void
+row_mysql_init(void)
+/*================*/
+{
+ mutex_create(LATCH_ID_ROW_DROP_LIST, &row_drop_list_mutex);
+
+ UT_LIST_INIT(
+ row_mysql_drop_list,
+ &row_mysql_drop_t::row_mysql_drop_list);
+
+ row_mysql_drop_list_inited = true;
+}
+
+void row_mysql_close()
+{
+ ut_ad(!UT_LIST_GET_LEN(row_mysql_drop_list) ||
+ srv_force_recovery >= SRV_FORCE_NO_BACKGROUND);
+ if (row_mysql_drop_list_inited)
+ {
+ row_mysql_drop_list_inited= false;
+ mutex_free(&row_drop_list_mutex);
+
+ while (row_mysql_drop_t *drop= UT_LIST_GET_FIRST(row_mysql_drop_list))
+ {
+ UT_LIST_REMOVE(row_mysql_drop_list, drop);
+ ut_free(drop);
+ }
+ }
+}