summaryrefslogtreecommitdiffstats
path: root/storage/innobase/fts
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/fts')
-rw-r--r--storage/innobase/fts/Makefile.query18
-rw-r--r--storage/innobase/fts/fts0ast.cc815
-rw-r--r--storage/innobase/fts/fts0blex.cc2177
-rw-r--r--storage/innobase/fts/fts0blex.l74
-rw-r--r--storage/innobase/fts/fts0config.cc432
-rw-r--r--storage/innobase/fts/fts0fts.cc6316
-rw-r--r--storage/innobase/fts/fts0opt.cc3053
-rw-r--r--storage/innobase/fts/fts0pars.cc2007
-rw-r--r--storage/innobase/fts/fts0pars.y293
-rw-r--r--storage/innobase/fts/fts0plugin.cc283
-rw-r--r--storage/innobase/fts/fts0que.cc4596
-rw-r--r--storage/innobase/fts/fts0sql.cc258
-rw-r--r--storage/innobase/fts/fts0tlex.cc2169
-rw-r--r--storage/innobase/fts/fts0tlex.l69
-rwxr-xr-xstorage/innobase/fts/make_parser.sh49
15 files changed, 22609 insertions, 0 deletions
diff --git a/storage/innobase/fts/Makefile.query b/storage/innobase/fts/Makefile.query
new file mode 100644
index 00000000..d91b1b92
--- /dev/null
+++ b/storage/innobase/fts/Makefile.query
@@ -0,0 +1,18 @@
+LEX=flex
+YACC=bison
+PREFIX=fts
+
+all: fts0pars.cc fts0blex.cc fts0tlex.cc
+
+fts0par.cc: fts0pars.y
+fts0blex.cc: fts0blex.l
+fts0tlex.cc: fts0tlex.l
+
+.l.cc:
+ echo '#include "univ.i"' > $*.cc
+ $(LEX) --stdout -P$(subst lex,,$*) -o $*.cc \
+ --header-file=../include/$*.h $< >> $*.cc
+
+.y.cc:
+ $(YACC) -p $(PREFIX) -o $*.cc -d $<
+ mv $*.h ../include
diff --git a/storage/innobase/fts/fts0ast.cc b/storage/innobase/fts/fts0ast.cc
new file mode 100644
index 00000000..bb42f7c9
--- /dev/null
+++ b/storage/innobase/fts/fts0ast.cc
@@ -0,0 +1,815 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2020, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fts/fts0ast.cc
+Full Text Search parser helper file.
+
+Created 2007/3/16 Sunny Bains.
+***********************************************************************/
+
+#include "row0sel.h"
+#include "fts0ast.h"
+#include "fts0pars.h"
+#include "fts0fts.h"
+
+/* The FTS ast visit pass. */
+enum fts_ast_visit_pass_t {
+ FTS_PASS_FIRST, /*!< First visit pass,
+ process operators excluding
+ FTS_EXIST and FTS_IGNORE */
+ FTS_PASS_EXIST, /*!< Exist visit pass,
+ process operator FTS_EXIST */
+ FTS_PASS_IGNORE /*!< Ignore visit pass,
+ process operator FTS_IGNORE */
+};
+
+/******************************************************************//**
+Create an empty fts_ast_node_t.
+@return Create a new node */
+static
+fts_ast_node_t*
+fts_ast_node_create(void)
+/*=====================*/
+{
+ fts_ast_node_t* node;
+
+ node = (fts_ast_node_t*) ut_zalloc_nokey(sizeof(*node));
+
+ return(node);
+}
+
+/** Track node allocations, in case there is an error during parsing. */
+static
+void
+fts_ast_state_add_node(
+ fts_ast_state_t*state, /*!< in: ast instance */
+ fts_ast_node_t* node) /*!< in: node to add to ast */
+{
+ if (!state->list.head) {
+ ut_a(!state->list.tail);
+
+ state->list.head = state->list.tail = node;
+ } else {
+ state->list.tail->next_alloc = node;
+ state->list.tail = node;
+ }
+}
+
+/******************************************************************//**
+Create a operator fts_ast_node_t.
+@return new node */
+fts_ast_node_t*
+fts_ast_create_node_oper(
+/*=====================*/
+ void* arg, /*!< in: ast state instance */
+ fts_ast_oper_t oper) /*!< in: ast operator */
+{
+ fts_ast_node_t* node = fts_ast_node_create();
+
+ node->type = FTS_AST_OPER;
+ node->oper = oper;
+
+ fts_ast_state_add_node((fts_ast_state_t*) arg, node);
+
+ return(node);
+}
+
+/******************************************************************//**
+This function takes ownership of the ptr and is responsible
+for free'ing it
+@return new node or a node list with tokenized words */
+fts_ast_node_t*
+fts_ast_create_node_term(
+/*=====================*/
+ void* arg, /*!< in: ast state instance */
+ const fts_ast_string_t* ptr) /*!< in: ast term string */
+{
+ fts_ast_state_t* state = static_cast<fts_ast_state_t*>(arg);
+ ulint len = ptr->len;
+ ulint cur_pos = 0;
+ fts_ast_node_t* node = NULL;
+ fts_ast_node_t* node_list = NULL;
+ fts_ast_node_t* first_node = NULL;
+
+ /* Scan the incoming string and filter out any "non-word" characters */
+ while (cur_pos < len) {
+ fts_string_t str;
+ ulint cur_len;
+
+ cur_len = innobase_mysql_fts_get_token(
+ state->charset,
+ reinterpret_cast<const byte*>(ptr->str) + cur_pos,
+ reinterpret_cast<const byte*>(ptr->str) + len, &str);
+
+ if (cur_len == 0) {
+ break;
+ }
+
+ cur_pos += cur_len;
+
+ if (str.f_n_char > 0) {
+ /* If the subsequent term (after the first one)'s size
+ is less than fts_min_token_size or the term is greater
+ than fts_max_token_size, we shall ignore that. This is
+ to make consistent with MyISAM behavior */
+ if ((first_node && (str.f_n_char < fts_min_token_size))
+ || str.f_n_char > fts_max_token_size) {
+ continue;
+ }
+
+ node = fts_ast_node_create();
+
+ node->type = FTS_AST_TERM;
+
+ node->term.ptr = fts_ast_string_create(
+ str.f_str, str.f_len);
+
+ fts_ast_state_add_node(
+ static_cast<fts_ast_state_t*>(arg), node);
+
+ if (first_node) {
+ /* There is more than one word, create
+ a list to organize them */
+ if (!node_list) {
+ node_list = fts_ast_create_node_list(
+ static_cast<fts_ast_state_t*>(
+ arg),
+ first_node);
+ }
+
+ fts_ast_add_node(node_list, node);
+ } else {
+ first_node = node;
+ }
+ }
+ }
+
+ return((node_list != NULL) ? node_list : first_node);
+}
+
+/******************************************************************//**
+Create an AST term node, makes a copy of ptr for plugin parser
+@return node */
+fts_ast_node_t*
+fts_ast_create_node_term_for_parser(
+/*================================*/
+ void* arg, /*!< in: ast state */
+ const char* ptr, /*!< in: term string */
+ const ulint len) /*!< in: term string length */
+{
+ fts_ast_node_t* node = NULL;
+
+ /* '%' as first char is forbidden for LIKE in internal SQL parser;
+ '%' as last char is reserved for wildcard search;*/
+ if (len == 0 || len > FTS_MAX_WORD_LEN
+ || ptr[0] == '%' || ptr[len - 1] == '%') {
+ return(NULL);
+ }
+
+ node = fts_ast_node_create();
+
+ node->type = FTS_AST_TERM;
+
+ node->term.ptr = fts_ast_string_create(
+ reinterpret_cast<const byte*>(ptr), len);
+
+ fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node);
+
+ return(node);
+}
+
+/******************************************************************//**
+This function takes ownership of the ptr and is responsible
+for free'ing it.
+@return new node */
+fts_ast_node_t*
+fts_ast_create_node_text(
+/*=====================*/
+ void* arg, /*!< in: ast state instance */
+ const fts_ast_string_t* ptr) /*!< in: ast text string */
+{
+ ulint len = ptr->len;
+ fts_ast_node_t* node = NULL;
+
+ /* Once we come here, the string must have at least 2 quotes ""
+ around the query string, which could be empty. Also the query
+ string may contain 0x00 in it, we don't treat it as null-terminated. */
+ ut_ad(len >= 2);
+ ut_ad(ptr->str[0] == '\"' && ptr->str[len - 1] == '\"');
+
+ if (len == 2) {
+ /* If the query string contains nothing except quotes,
+ it's obviously an invalid query. */
+ return(NULL);
+ }
+
+ node = fts_ast_node_create();
+
+ /*!< We ignore the actual quotes "" */
+ len -= 2;
+
+ node->type = FTS_AST_TEXT;
+ /*!< Skip copying the first quote */
+ node->text.ptr = fts_ast_string_create(
+ reinterpret_cast<const byte*>(ptr->str + 1), len);
+ node->text.distance = ULINT_UNDEFINED;
+
+ fts_ast_state_add_node((fts_ast_state_t*) arg, node);
+
+ return(node);
+}
+
+/******************************************************************//**
+Create an AST phrase list node for plugin parser
+@return node */
+fts_ast_node_t*
+fts_ast_create_node_phrase_list(
+/*============================*/
+ void* arg) /*!< in: ast state */
+{
+ fts_ast_node_t* node = fts_ast_node_create();
+
+ node->type = FTS_AST_PARSER_PHRASE_LIST;
+
+ node->text.distance = ULINT_UNDEFINED;
+ node->list.head = node->list.tail = NULL;
+
+ fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node);
+
+ return(node);
+}
+
+/******************************************************************//**
+This function takes ownership of the expr and is responsible
+for free'ing it.
+@return new node */
+fts_ast_node_t*
+fts_ast_create_node_list(
+/*=====================*/
+ void* arg, /*!< in: ast state instance */
+ fts_ast_node_t* expr) /*!< in: ast expr instance */
+{
+ fts_ast_node_t* node = fts_ast_node_create();
+
+ node->type = FTS_AST_LIST;
+ node->list.head = node->list.tail = expr;
+
+ fts_ast_state_add_node((fts_ast_state_t*) arg, node);
+
+ return(node);
+}
+
+/******************************************************************//**
+Create a sub-expression list node. This function takes ownership of
+expr and is responsible for deleting it.
+@return new node */
+fts_ast_node_t*
+fts_ast_create_node_subexp_list(
+/*============================*/
+ void* arg, /*!< in: ast state instance */
+ fts_ast_node_t* expr) /*!< in: ast expr instance */
+{
+ fts_ast_node_t* node = fts_ast_node_create();
+
+ node->type = FTS_AST_SUBEXP_LIST;
+ node->list.head = node->list.tail = expr;
+
+ fts_ast_state_add_node((fts_ast_state_t*) arg, node);
+
+ return(node);
+}
+
+/******************************************************************//**
+Free an expr list node elements. */
+static
+void
+fts_ast_free_list(
+/*==============*/
+ fts_ast_node_t* node) /*!< in: ast node to free */
+{
+ ut_a(node->type == FTS_AST_LIST
+ || node->type == FTS_AST_SUBEXP_LIST
+ || node->type == FTS_AST_PARSER_PHRASE_LIST);
+
+ for (node = node->list.head;
+ node != NULL;
+ node = fts_ast_free_node(node)) {
+
+ /*!< No op */
+ }
+}
+
+/********************************************************************//**
+Free a fts_ast_node_t instance.
+@return next node to free */
+fts_ast_node_t*
+fts_ast_free_node(
+/*==============*/
+ fts_ast_node_t* node) /*!< in: the node to free */
+{
+ fts_ast_node_t* next_node;
+
+ switch (node->type) {
+ case FTS_AST_TEXT:
+ if (node->text.ptr) {
+ fts_ast_string_free(node->text.ptr);
+ node->text.ptr = NULL;
+ }
+ break;
+
+ case FTS_AST_TERM:
+ if (node->term.ptr) {
+ fts_ast_string_free(node->term.ptr);
+ node->term.ptr = NULL;
+ }
+ break;
+
+ case FTS_AST_LIST:
+ case FTS_AST_SUBEXP_LIST:
+ case FTS_AST_PARSER_PHRASE_LIST:
+ fts_ast_free_list(node);
+ node->list.head = node->list.tail = NULL;
+ break;
+
+ case FTS_AST_OPER:
+ break;
+
+ default:
+ ut_error;
+ }
+
+ /*!< Get next node before freeing the node itself */
+ next_node = node->next;
+
+ ut_free(node);
+
+ return(next_node);
+}
+
+/******************************************************************//**
+This AST takes ownership of the expr and is responsible
+for free'ing it.
+@return in param "list" */
+fts_ast_node_t*
+fts_ast_add_node(
+/*=============*/
+ fts_ast_node_t* node, /*!< in: list instance */
+ fts_ast_node_t* elem) /*!< in: node to add to list */
+{
+ if (!elem) {
+ return(NULL);
+ }
+
+ ut_a(!elem->next);
+ ut_a(node->type == FTS_AST_LIST
+ || node->type == FTS_AST_SUBEXP_LIST
+ || node->type == FTS_AST_PARSER_PHRASE_LIST);
+
+ if (!node->list.head) {
+ ut_a(!node->list.tail);
+
+ node->list.head = node->list.tail = elem;
+ } else {
+ ut_a(node->list.tail);
+
+ node->list.tail->next = elem;
+ node->list.tail = elem;
+ }
+
+ return(node);
+}
+
+/******************************************************************//**
+Set the wildcard attribute of a term. */
+void
+fts_ast_term_set_wildcard(
+/*======================*/
+ fts_ast_node_t* node) /*!< in/out: set attribute of
+ a term node */
+{
+ if (!node) {
+ return;
+ }
+
+ /* If it's a node list, the wildcard should be set to the tail node*/
+ if (node->type == FTS_AST_LIST) {
+ ut_ad(node->list.tail != NULL);
+ node = node->list.tail;
+ }
+
+ ut_a(node->type == FTS_AST_TERM);
+ ut_a(!node->term.wildcard);
+
+ node->term.wildcard = TRUE;
+}
+
+/******************************************************************//**
+Set the proximity attribute of a text node. */
+void
+fts_ast_text_set_distance(
+/*======================*/
+ fts_ast_node_t* node, /*!< in/out: text node */
+ ulint distance) /*!< in: the text proximity
+ distance */
+{
+ if (node == NULL) {
+ return;
+ }
+
+ ut_a(node->type == FTS_AST_TEXT);
+ ut_a(node->text.distance == ULINT_UNDEFINED);
+
+ node->text.distance = distance;
+}
+
+/******************************************************************//**
+Free node and expr allocations. */
+void
+fts_ast_state_free(
+/*===============*/
+ fts_ast_state_t*state) /*!< in: ast state to free */
+{
+ fts_ast_node_t* node = state->list.head;
+
+ /* Free the nodes that were allocated during parsing. */
+ while (node) {
+ fts_ast_node_t* next = node->next_alloc;
+
+ if (node->type == FTS_AST_TEXT && node->text.ptr) {
+ fts_ast_string_free(node->text.ptr);
+ node->text.ptr = NULL;
+ } else if (node->type == FTS_AST_TERM && node->term.ptr) {
+ fts_ast_string_free(node->term.ptr);
+ node->term.ptr = NULL;
+ }
+
+ ut_free(node);
+ node = next;
+ }
+
+ state->root = state->list.head = state->list.tail = NULL;
+}
+
+/** Print the ast string
+@param[in] str string to print */
+static
+void
+fts_ast_string_print(
+ const fts_ast_string_t* ast_str)
+{
+ for (ulint i = 0; i < ast_str->len; ++i) {
+ printf("%c", ast_str->str[i]);
+ }
+
+ printf("\n");
+}
+
+/******************************************************************//**
+Print an ast node recursively. */
+static
+void
+fts_ast_node_print_recursive(
+/*=========================*/
+ fts_ast_node_t* node, /*!< in: ast node to print */
+ ulint level) /*!< in: recursive level */
+{
+ /* Print alignment blank */
+ for (ulint i = 0; i < level; i++) {
+ printf(" ");
+ }
+
+ switch (node->type) {
+ case FTS_AST_TEXT:
+ printf("TEXT: ");
+ fts_ast_string_print(node->text.ptr);
+ break;
+
+ case FTS_AST_TERM:
+ printf("TERM: ");
+ fts_ast_string_print(node->term.ptr);
+ break;
+
+ case FTS_AST_LIST:
+ printf("LIST: \n");
+
+ for (node = node->list.head; node; node = node->next) {
+ fts_ast_node_print_recursive(node, level + 1);
+ }
+ break;
+
+ case FTS_AST_SUBEXP_LIST:
+ printf("SUBEXP_LIST: \n");
+
+ for (node = node->list.head; node; node = node->next) {
+ fts_ast_node_print_recursive(node, level + 1);
+ }
+ break;
+
+ case FTS_AST_OPER:
+ printf("OPER: %d\n", node->oper);
+ break;
+
+ case FTS_AST_PARSER_PHRASE_LIST:
+ printf("PARSER_PHRASE_LIST: \n");
+
+ for (node = node->list.head; node; node = node->next) {
+ fts_ast_node_print_recursive(node, level + 1);
+ }
+ break;
+
+ default:
+ ut_error;
+ }
+}
+
+/******************************************************************//**
+Print an ast node */
+void
+fts_ast_node_print(
+/*===============*/
+ fts_ast_node_t* node) /*!< in: ast node to print */
+{
+ fts_ast_node_print_recursive(node, 0);
+}
+
+/** Check only union operation involved in the node
+@param[in] node ast node to check
+@return true if the node contains only union else false. */
+bool
+fts_ast_node_check_union(
+ fts_ast_node_t* node)
+{
+ if (node->type == FTS_AST_LIST
+ || node->type == FTS_AST_SUBEXP_LIST) {
+
+ for (node = node->list.head; node; node = node->next) {
+ if (!fts_ast_node_check_union(node)) {
+ return(false);
+ }
+ }
+
+ } else if (node->type == FTS_AST_PARSER_PHRASE_LIST) {
+ /* Phrase search for plugin parser */
+ return(false);
+ } else if (node->type == FTS_AST_OPER
+ && (node->oper == FTS_IGNORE
+ || node->oper == FTS_EXIST)) {
+
+ return(false);
+ } else if (node->type == FTS_AST_TEXT) {
+ /* Distance or phrase search query. */
+ return(false);
+ }
+
+ return(true);
+}
+
+/******************************************************************//**
+Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE
+nodes, which will be ignored in the first pass of each level, and visited in a
+second and third pass after all other nodes in the same level are visited.
+@return DB_SUCCESS if all went well */
+dberr_t
+fts_ast_visit(
+/*==========*/
+ fts_ast_oper_t oper, /*!< in: current operator */
+ fts_ast_node_t* node, /*!< in: current root node */
+ fts_ast_callback visitor, /*!< in: callback function */
+ void* arg, /*!< in: arg for callback */
+ bool* has_ignore) /*!< out: true, if the operator
+ was ignored during processing,
+ currently we ignore FTS_EXIST
+ and FTS_IGNORE operators */
+{
+ dberr_t error = DB_SUCCESS;
+ fts_ast_node_t* oper_node = NULL;
+ fts_ast_node_t* start_node;
+ bool revisit = false;
+ bool will_be_ignored = false;
+ fts_ast_visit_pass_t visit_pass = FTS_PASS_FIRST;
+ const trx_t* trx = node->trx;
+
+ start_node = node->list.head;
+
+ ut_a(node->type == FTS_AST_LIST
+ || node->type == FTS_AST_SUBEXP_LIST);
+
+ if (oper == FTS_EXIST_SKIP) {
+ visit_pass = FTS_PASS_EXIST;
+ } else if (oper == FTS_IGNORE_SKIP) {
+ visit_pass = FTS_PASS_IGNORE;
+ }
+
+ /* In the first pass of the tree, at the leaf level of the
+ tree, FTS_EXIST and FTS_IGNORE operation will be ignored.
+ It will be repeated at the level above the leaf level.
+
+ The basic idea here is that when we encounter FTS_EXIST or
+ FTS_IGNORE, we will change the operator node into FTS_EXIST_SKIP
+ or FTS_IGNORE_SKIP, and term node & text node with the operators
+ is ignored in the first pass. We have two passes during the revisit:
+ We process nodes with FTS_EXIST_SKIP in the exist pass, and then
+ process nodes with FTS_IGNORE_SKIP in the ignore pass.
+
+ The order should be restrictly followed, or we will get wrong results.
+ For example, we have a query 'a +b -c d +e -f'.
+ first pass: process 'a' and 'd' by union;
+ exist pass: process '+b' and '+e' by intersection;
+ ignore pass: process '-c' and '-f' by difference. */
+
+ for (node = node->list.head;
+ node && (error == DB_SUCCESS);
+ node = node->next) {
+
+ switch (node->type) {
+ case FTS_AST_LIST:
+ if (visit_pass != FTS_PASS_FIRST) {
+ break;
+ }
+
+ error = fts_ast_visit(oper, node, visitor,
+ arg, &will_be_ignored);
+
+ /* If will_be_ignored is set to true, then
+ we encountered and ignored a FTS_EXIST or FTS_IGNORE
+ operator. */
+ if (will_be_ignored) {
+ revisit = true;
+ /* Remember oper for list in case '-abc&def',
+ ignored oper is from previous node of list.*/
+ node->oper = oper;
+ }
+
+ break;
+
+ case FTS_AST_OPER:
+ oper = node->oper;
+ oper_node = node;
+
+ /* Change the operator for revisit */
+ if (oper == FTS_EXIST) {
+ oper_node->oper = FTS_EXIST_SKIP;
+ } else if (oper == FTS_IGNORE) {
+ oper_node->oper = FTS_IGNORE_SKIP;
+ }
+
+ break;
+
+ default:
+ if (node->visited) {
+ continue;
+ }
+
+ ut_a(oper == FTS_NONE || !oper_node
+ || oper_node->oper == oper
+ || oper_node->oper == FTS_EXIST_SKIP
+ || oper_node->oper == FTS_IGNORE_SKIP);
+
+ if (oper== FTS_EXIST || oper == FTS_IGNORE) {
+ *has_ignore = true;
+ continue;
+ }
+
+ /* Process leaf node accroding to its pass.*/
+ if (oper == FTS_EXIST_SKIP
+ && visit_pass == FTS_PASS_EXIST) {
+ error = visitor(FTS_EXIST, node, arg);
+ node->visited = true;
+ } else if (oper == FTS_IGNORE_SKIP
+ && visit_pass == FTS_PASS_IGNORE) {
+ error = visitor(FTS_IGNORE, node, arg);
+ node->visited = true;
+ } else if (visit_pass == FTS_PASS_FIRST) {
+ error = visitor(oper, node, arg);
+ node->visited = true;
+ }
+ }
+ }
+
+ if (trx_is_interrupted(trx)) {
+ return DB_INTERRUPTED;
+ }
+
+ if (revisit) {
+ /* Exist pass processes the skipped FTS_EXIST operation. */
+ for (node = start_node;
+ node && error == DB_SUCCESS;
+ node = node->next) {
+
+ if (node->type == FTS_AST_LIST
+ && node->oper != FTS_IGNORE) {
+ error = fts_ast_visit(FTS_EXIST_SKIP, node,
+ visitor, arg, &will_be_ignored);
+ }
+ }
+
+ /* Ignore pass processes the skipped FTS_IGNORE operation. */
+ for (node = start_node;
+ node && error == DB_SUCCESS;
+ node = node->next) {
+
+ if (node->type == FTS_AST_LIST) {
+ error = fts_ast_visit(FTS_IGNORE_SKIP, node,
+ visitor, arg, &will_be_ignored);
+ }
+ }
+ }
+
+ return(error);
+}
+
+/**
+Create an ast string object, with NUL-terminator, so the string
+has one more byte than len
+@param[in] str pointer to string
+@param[in] len length of the string
+@return ast string with NUL-terminator */
+fts_ast_string_t*
+fts_ast_string_create(
+ const byte* str,
+ ulint len)
+{
+ fts_ast_string_t* ast_str;
+
+ ut_ad(len > 0);
+
+ ast_str = static_cast<fts_ast_string_t*>(
+ ut_malloc_nokey(sizeof(fts_ast_string_t)));
+
+ ast_str->str = static_cast<byte*>(ut_malloc_nokey(len + 1));
+
+ ast_str->len = len;
+ memcpy(ast_str->str, str, len);
+ ast_str->str[len] = '\0';
+
+ return(ast_str);
+}
+
+/**
+Free an ast string instance
+@param[in,out] ast_str string to free */
+void
+fts_ast_string_free(
+ fts_ast_string_t* ast_str)
+{
+ if (ast_str != NULL) {
+ ut_free(ast_str->str);
+ ut_free(ast_str);
+ }
+}
+
+/**
+Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
+@param[in] str string to translate
+@param[in] base the base
+@return translated number */
+ulint
+fts_ast_string_to_ul(
+ const fts_ast_string_t* ast_str,
+ int base)
+{
+ return(strtoul(reinterpret_cast<const char*>(ast_str->str),
+ NULL, base));
+}
+
+#ifdef UNIV_DEBUG
+const char*
+fts_ast_node_type_get(fts_ast_type_t type)
+{
+ switch (type) {
+ case FTS_AST_OPER:
+ return("FTS_AST_OPER");
+ case FTS_AST_NUMB:
+ return("FTS_AST_NUMB");
+ case FTS_AST_TERM:
+ return("FTS_AST_TERM");
+ case FTS_AST_TEXT:
+ return("FTS_AST_TEXT");
+ case FTS_AST_LIST:
+ return("FTS_AST_LIST");
+ case FTS_AST_SUBEXP_LIST:
+ return("FTS_AST_SUBEXP_LIST");
+ case FTS_AST_PARSER_PHRASE_LIST:
+ return("FTS_AST_PARSER_PHRASE_LIST");
+ }
+ ut_ad(0);
+ return("FTS_UNKNOWN");
+}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/fts/fts0blex.cc b/storage/innobase/fts/fts0blex.cc
new file mode 100644
index 00000000..6a2b4202
--- /dev/null
+++ b/storage/innobase/fts/fts0blex.cc
@@ -0,0 +1,2177 @@
+#include "univ.i"
+#line 2 "fts0blex.cc"
+
+#line 4 "fts0blex.cc"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 6
+#define YY_FLEX_SUBMINOR_VERSION 4
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+#ifdef yy_create_buffer
+#define fts0b_create_buffer_ALREADY_DEFINED
+#else
+#define yy_create_buffer fts0b_create_buffer
+#endif
+
+#ifdef yy_delete_buffer
+#define fts0b_delete_buffer_ALREADY_DEFINED
+#else
+#define yy_delete_buffer fts0b_delete_buffer
+#endif
+
+#ifdef yy_scan_buffer
+#define fts0b_scan_buffer_ALREADY_DEFINED
+#else
+#define yy_scan_buffer fts0b_scan_buffer
+#endif
+
+#ifdef yy_scan_string
+#define fts0b_scan_string_ALREADY_DEFINED
+#else
+#define yy_scan_string fts0b_scan_string
+#endif
+
+#ifdef yy_scan_bytes
+#define fts0b_scan_bytes_ALREADY_DEFINED
+#else
+#define yy_scan_bytes fts0b_scan_bytes
+#endif
+
+#ifdef yy_init_buffer
+#define fts0b_init_buffer_ALREADY_DEFINED
+#else
+#define yy_init_buffer fts0b_init_buffer
+#endif
+
+#ifdef yy_flush_buffer
+#define fts0b_flush_buffer_ALREADY_DEFINED
+#else
+#define yy_flush_buffer fts0b_flush_buffer
+#endif
+
+#ifdef yy_load_buffer_state
+#define fts0b_load_buffer_state_ALREADY_DEFINED
+#else
+#define yy_load_buffer_state fts0b_load_buffer_state
+#endif
+
+#ifdef yy_switch_to_buffer
+#define fts0b_switch_to_buffer_ALREADY_DEFINED
+#else
+#define yy_switch_to_buffer fts0b_switch_to_buffer
+#endif
+
+#ifdef yypush_buffer_state
+#define fts0bpush_buffer_state_ALREADY_DEFINED
+#else
+#define yypush_buffer_state fts0bpush_buffer_state
+#endif
+
+#ifdef yypop_buffer_state
+#define fts0bpop_buffer_state_ALREADY_DEFINED
+#else
+#define yypop_buffer_state fts0bpop_buffer_state
+#endif
+
+#ifdef yyensure_buffer_stack
+#define fts0bensure_buffer_stack_ALREADY_DEFINED
+#else
+#define yyensure_buffer_stack fts0bensure_buffer_stack
+#endif
+
+#ifdef yylex
+#define fts0blex_ALREADY_DEFINED
+#else
+#define yylex fts0blex
+#endif
+
+#ifdef yyrestart
+#define fts0brestart_ALREADY_DEFINED
+#else
+#define yyrestart fts0brestart
+#endif
+
+#ifdef yylex_init
+#define fts0blex_init_ALREADY_DEFINED
+#else
+#define yylex_init fts0blex_init
+#endif
+
+#ifdef yylex_init_extra
+#define fts0blex_init_extra_ALREADY_DEFINED
+#else
+#define yylex_init_extra fts0blex_init_extra
+#endif
+
+#ifdef yylex_destroy
+#define fts0blex_destroy_ALREADY_DEFINED
+#else
+#define yylex_destroy fts0blex_destroy
+#endif
+
+#ifdef yyget_debug
+#define fts0bget_debug_ALREADY_DEFINED
+#else
+#define yyget_debug fts0bget_debug
+#endif
+
+#ifdef yyset_debug
+#define fts0bset_debug_ALREADY_DEFINED
+#else
+#define yyset_debug fts0bset_debug
+#endif
+
+#ifdef yyget_extra
+#define fts0bget_extra_ALREADY_DEFINED
+#else
+#define yyget_extra fts0bget_extra
+#endif
+
+#ifdef yyset_extra
+#define fts0bset_extra_ALREADY_DEFINED
+#else
+#define yyset_extra fts0bset_extra
+#endif
+
+#ifdef yyget_in
+#define fts0bget_in_ALREADY_DEFINED
+#else
+#define yyget_in fts0bget_in
+#endif
+
+#ifdef yyset_in
+#define fts0bset_in_ALREADY_DEFINED
+#else
+#define yyset_in fts0bset_in
+#endif
+
+#ifdef yyget_out
+#define fts0bget_out_ALREADY_DEFINED
+#else
+#define yyget_out fts0bget_out
+#endif
+
+#ifdef yyset_out
+#define fts0bset_out_ALREADY_DEFINED
+#else
+#define yyset_out fts0bset_out
+#endif
+
+#ifdef yyget_leng
+#define fts0bget_leng_ALREADY_DEFINED
+#else
+#define yyget_leng fts0bget_leng
+#endif
+
+#ifdef yyget_text
+#define fts0bget_text_ALREADY_DEFINED
+#else
+#define yyget_text fts0bget_text
+#endif
+
+#ifdef yyget_lineno
+#define fts0bget_lineno_ALREADY_DEFINED
+#else
+#define yyget_lineno fts0bget_lineno
+#endif
+
+#ifdef yyset_lineno
+#define fts0bset_lineno_ALREADY_DEFINED
+#else
+#define yyset_lineno fts0bset_lineno
+#endif
+
+#ifdef yyget_column
+#define fts0bget_column_ALREADY_DEFINED
+#else
+#define yyget_column fts0bget_column
+#endif
+
+#ifdef yyset_column
+#define fts0bset_column_ALREADY_DEFINED
+#else
+#define yyset_column fts0bset_column
+#endif
+
+#ifdef yywrap
+#define fts0bwrap_ALREADY_DEFINED
+#else
+#define yywrap fts0bwrap
+#endif
+
+#ifdef yyalloc
+#define fts0balloc_ALREADY_DEFINED
+#else
+#define yyalloc fts0balloc
+#endif
+
+#ifdef yyrealloc
+#define fts0brealloc_ALREADY_DEFINED
+#else
+#define yyrealloc fts0brealloc
+#endif
+
+#ifdef yyfree
+#define fts0bfree_ALREADY_DEFINED
+#else
+#define yyfree fts0bfree
+#endif
+
+/* First, we deal with platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#ifndef SIZE_MAX
+#define SIZE_MAX (~(size_t)0)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+/* begin standard C++ headers. */
+
+/* TODO: this is always defined, so inline it */
+#define yyconst const
+
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define yynoreturn __attribute__((__noreturn__))
+#else
+#define yynoreturn
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an
+ * integer in range [0..255] for use as an array index.
+ */
+#define YY_SC_TO_UI(c) ((YY_CHAR) (c))
+
+/* An opaque pointer. */
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+
+/* For convenience, these vars (plus the bison vars far below)
+ are macros in the reentrant scanner. */
+#define yyin yyg->yyin_r
+#define yyout yyg->yyout_r
+#define yyextra yyg->yyextra_r
+#define yyleng yyg->yyleng_r
+#define yytext yyg->yytext_r
+#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
+#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
+#define yy_flex_debug yyg->yy_flex_debug_r
+
+/* Enter a start condition. This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN yyg->yy_start = 1 + 2 *
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state. The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START ((yyg->yy_start - 1) / 2)
+#define YYSTATE YY_START
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart( yyin , yyscanner )
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+/* The state buf must be large enough to hold one state per character in the main buffer.
+ */
+#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+ #define YY_LESS_LINENO(n)
+ #define YY_LINENO_REWIND_TO(ptr)
+
+/* Return all but the first "n" matched characters back to the input stream. */
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ *yy_cp = yyg->yy_hold_char; \
+ YY_RESTORE_YY_MORE_OFFSET \
+ yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
+ YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+ } \
+ while ( 0 )
+#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner )
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ int yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ int yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ int yy_bs_lineno; /**< The line count. */
+ int yy_bs_column; /**< The column count. */
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+ /* When an EOF's been seen but there's still some text to process
+ * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+ * shouldn't try reading from the input source any more. We might
+ * still have a bunch of tokens to match, though, because of
+ * possible backing-up.
+ *
+ * When we actually see the EOF, we change the status to "new"
+ * (via yyrestart()), so that the user can continue scanning by
+ * just pointing yyin at a new input file.
+ */
+#define YY_BUFFER_EOF_PENDING 2
+
+ };
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ *
+ * Returns the top of the stack, or NULL.
+ */
+#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \
+ ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \
+ : 0)
+/* Same as previous macro, but useful when we know that the buffer stack is not
+ * NULL or when we need an lvalue. For internal use only.
+ */
+#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top]
+
+void yyrestart ( FILE *input_file , yyscan_t yyscanner );
+void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner );
+YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size , yyscan_t yyscanner );
+void yy_delete_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner );
+void yy_flush_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner );
+void yypush_buffer_state ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner );
+void yypop_buffer_state ( yyscan_t yyscanner );
+
+static void yyensure_buffer_stack ( yyscan_t yyscanner );
+static void yy_load_buffer_state ( yyscan_t yyscanner );
+static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file , yyscan_t yyscanner );
+#define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER , yyscanner)
+
+YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size , yyscan_t yyscanner );
+YY_BUFFER_STATE yy_scan_string ( const char *yy_str , yyscan_t yyscanner );
+YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len , yyscan_t yyscanner );
+
+void *yyalloc ( yy_size_t , yyscan_t yyscanner );
+void *yyrealloc ( void *, yy_size_t , yyscan_t yyscanner );
+void yyfree ( void * , yyscan_t yyscanner );
+
+#define yy_new_buffer yy_create_buffer
+#define yy_set_interactive(is_interactive) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){ \
+ yyensure_buffer_stack (yyscanner); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
+ }
+#define yy_set_bol(at_bol) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){\
+ yyensure_buffer_stack (yyscanner); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
+ }
+#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
+
+/* Begin user sect3 */
+
+#define fts0bwrap(yyscanner) (/*CONSTCOND*/1)
+#define YY_SKIP_YYWRAP
+typedef flex_uint8_t YY_CHAR;
+
+typedef int yy_state_type;
+
+#define yytext_ptr yytext_r
+
+static yy_state_type yy_get_previous_state ( yyscan_t yyscanner );
+static yy_state_type yy_try_NUL_trans ( yy_state_type current_state , yyscan_t yyscanner);
+static int yy_get_next_buffer ( yyscan_t yyscanner );
+static void yynoreturn yy_fatal_error ( const char* msg , yyscan_t yyscanner );
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+ yyg->yytext_ptr = yy_bp; \
+ yyleng = (int) (yy_cp - yy_bp); \
+ yyg->yy_hold_char = *yy_cp; \
+ *yy_cp = '\0'; \
+ yyg->yy_c_buf_p = yy_cp;
+#define YY_NUM_RULES 7
+#define YY_END_OF_BUFFER 8
+/* This struct is not used in this scanner,
+ but its presence is necessary. */
+struct yy_trans_info
+ {
+ flex_int32_t yy_verify;
+ flex_int32_t yy_nxt;
+ };
+static const flex_int16_t yy_accept[19] =
+ { 0,
+ 4, 4, 8, 4, 1, 6, 1, 7, 7, 2,
+ 3, 4, 1, 1, 0, 5, 3, 0
+ } ;
+
+static const YY_CHAR yy_ec[256] =
+ { 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 4, 1, 5, 1, 1, 6, 1, 1, 7,
+ 7, 7, 7, 1, 7, 1, 1, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 1, 1, 7,
+ 1, 7, 1, 7, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 7, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+ } ;
+
+static const YY_CHAR yy_meta[9] =
+ { 0,
+ 1, 2, 3, 4, 5, 5, 5, 1
+ } ;
+
+static const flex_int16_t yy_base[22] =
+ { 0,
+ 0, 0, 22, 0, 7, 23, 0, 14, 23, 23,
+ 7, 0, 0, 0, 5, 23, 0, 23, 11, 12,
+ 16
+ } ;
+
+static const flex_int16_t yy_def[22] =
+ { 0,
+ 18, 1, 18, 19, 19, 18, 20, 21, 18, 18,
+ 19, 19, 5, 20, 21, 18, 11, 0, 18, 18,
+ 18
+ } ;
+
+static const flex_int16_t yy_nxt[32] =
+ { 0,
+ 4, 5, 6, 7, 8, 9, 10, 11, 13, 16,
+ 14, 12, 12, 14, 17, 14, 15, 15, 16, 15,
+ 15, 18, 3, 18, 18, 18, 18, 18, 18, 18,
+ 18
+ } ;
+
+static const flex_int16_t yy_chk[32] =
+ { 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 5, 15,
+ 5, 19, 19, 20, 11, 20, 21, 21, 8, 21,
+ 21, 3, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18
+ } ;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+#line 1 "fts0blex.l"
+/*****************************************************************************
+
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+/**
+ * @file fts/fts0blex.l
+ * FTS parser lexical analyzer
+ *
+ * Created 2007/5/9 Sunny Bains
+ */
+#line 27 "fts0blex.l"
+
+#include "fts0ast.h"
+#include "fts0pars.h"
+
+/* Required for reentrant parser */
+#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A) ut_error
+
+#line 675 "fts0blex.cc"
+#define YY_NO_INPUT 1
+#line 677 "fts0blex.cc"
+
+#define INITIAL 0
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+/* Holds the entire state of the reentrant scanner. */
+struct yyguts_t
+ {
+
+ /* User-defined. Not touched by flex. */
+ YY_EXTRA_TYPE yyextra_r;
+
+ /* The rest are the same as the globals declared in the non-reentrant scanner. */
+ FILE *yyin_r, *yyout_r;
+ size_t yy_buffer_stack_top; /**< index of top of stack. */
+ size_t yy_buffer_stack_max; /**< capacity of stack. */
+ YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */
+ char yy_hold_char;
+ int yy_n_chars;
+ int yyleng_r;
+ char *yy_c_buf_p;
+ int yy_init;
+ int yy_start;
+ int yy_did_buffer_switch_on_eof;
+ int yy_start_stack_ptr;
+ int yy_start_stack_depth;
+ int *yy_start_stack;
+ yy_state_type yy_last_accepting_state;
+ char* yy_last_accepting_cpos;
+
+ int yylineno_r;
+ int yy_flex_debug_r;
+
+ char *yytext_r;
+ int yy_more_flag;
+ int yy_more_len;
+
+ }; /* end struct yyguts_t */
+
+static int yy_init_globals ( yyscan_t yyscanner );
+
+int yylex_init (yyscan_t* scanner);
+
+int yylex_init_extra ( YY_EXTRA_TYPE user_defined, yyscan_t* scanner);
+
+/* Accessor methods to globals.
+ These are made visible to non-reentrant scanners for convenience. */
+
+int yylex_destroy ( yyscan_t yyscanner );
+
+int yyget_debug ( yyscan_t yyscanner );
+
+void yyset_debug ( int debug_flag , yyscan_t yyscanner );
+
+YY_EXTRA_TYPE yyget_extra ( yyscan_t yyscanner );
+
+void yyset_extra ( YY_EXTRA_TYPE user_defined , yyscan_t yyscanner );
+
+FILE *yyget_in ( yyscan_t yyscanner );
+
+void yyset_in ( FILE * _in_str , yyscan_t yyscanner );
+
+FILE *yyget_out ( yyscan_t yyscanner );
+
+void yyset_out ( FILE * _out_str , yyscan_t yyscanner );
+
+ int yyget_leng ( yyscan_t yyscanner );
+
+char *yyget_text ( yyscan_t yyscanner );
+
+int yyget_lineno ( yyscan_t yyscanner );
+
+void yyset_lineno ( int _line_number , yyscan_t yyscanner );
+
+int yyget_column ( yyscan_t yyscanner );
+
+void yyset_column ( int _column_no , yyscan_t yyscanner );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap ( yyscan_t yyscanner );
+#else
+extern int yywrap ( yyscan_t yyscanner );
+#endif
+#endif
+
+#ifndef YY_NO_UNPUT
+
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy ( char *, const char *, int , yyscan_t yyscanner);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen ( const char * , yyscan_t yyscanner);
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput ( yyscan_t yyscanner );
+#else
+static int input ( yyscan_t yyscanner );
+#endif
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0)
+#endif
+
+/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
+ { \
+ int c = '*'; \
+ int n; \
+ for ( n = 0; n < max_size && \
+ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
+ buf[n] = (char) c; \
+ if ( c == '\n' ) \
+ buf[n++] = (char) c; \
+ if ( c == EOF && ferror( yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ result = n; \
+ } \
+ else \
+ { \
+ errno=0; \
+ while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \
+ { \
+ if( errno != EINTR) \
+ { \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ break; \
+ } \
+ errno=0; \
+ clearerr(yyin); \
+ } \
+ }\
+\
+
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner)
+#endif
+
+/* end tables serialization structures and prototypes */
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int yylex (yyscan_t yyscanner);
+
+#define YY_DECL int yylex (yyscan_t yyscanner)
+#endif /* !YY_DECL */
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK /*LINTED*/break;
+#endif
+
+#define YY_RULE_SETUP \
+ YY_USER_ACTION
+
+/** The main scanner function which does all the work.
+ */
+YY_DECL
+{
+ yy_state_type yy_current_state;
+ char *yy_cp, *yy_bp;
+ int yy_act;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if ( !yyg->yy_init )
+ {
+ yyg->yy_init = 1;
+
+#ifdef YY_USER_INIT
+ YY_USER_INIT;
+#endif
+
+ if ( ! yyg->yy_start )
+ yyg->yy_start = 1; /* first start state */
+
+ if ( ! yyin )
+ yyin = stdin;
+
+ if ( ! yyout )
+ yyout = stdout;
+
+ if ( ! YY_CURRENT_BUFFER ) {
+ yyensure_buffer_stack (yyscanner);
+ YY_CURRENT_BUFFER_LVALUE =
+ yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner);
+ }
+
+ yy_load_buffer_state( yyscanner );
+ }
+
+ {
+#line 44 "fts0blex.l"
+
+
+#line 938 "fts0blex.cc"
+
+ while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
+ {
+ yy_cp = yyg->yy_c_buf_p;
+
+ /* Support of yytext. */
+ *yy_cp = yyg->yy_hold_char;
+
+ /* yy_bp points to the position in yy_ch_buf of the start of
+ * the current run.
+ */
+ yy_bp = yy_cp;
+
+ yy_current_state = yyg->yy_start;
+yy_match:
+ do
+ {
+ YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
+ if ( yy_accept[yy_current_state] )
+ {
+ yyg->yy_last_accepting_state = yy_current_state;
+ yyg->yy_last_accepting_cpos = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 19 )
+ yy_c = yy_meta[yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+ ++yy_cp;
+ }
+ while ( yy_current_state != 18 );
+ yy_cp = yyg->yy_last_accepting_cpos;
+ yy_current_state = yyg->yy_last_accepting_state;
+
+yy_find_action:
+ yy_act = yy_accept[yy_current_state];
+
+ YY_DO_BEFORE_ACTION;
+
+do_action: /* This label is used only to access EOF actions. */
+
+ switch ( yy_act )
+ { /* beginning of action switch */
+ case 0: /* must back up */
+ /* undo the effects of YY_DO_BEFORE_ACTION */
+ *yy_cp = yyg->yy_hold_char;
+ yy_cp = yyg->yy_last_accepting_cpos;
+ yy_current_state = yyg->yy_last_accepting_state;
+ goto yy_find_action;
+
+case 1:
+YY_RULE_SETUP
+#line 46 "fts0blex.l"
+/* Ignore whitespace */ ;
+ YY_BREAK
+case 2:
+YY_RULE_SETUP
+#line 48 "fts0blex.l"
+{
+ val->oper = fts0bget_text(yyscanner)[0];
+
+ return(val->oper);
+}
+ YY_BREAK
+case 3:
+YY_RULE_SETUP
+#line 54 "fts0blex.l"
+{
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
+
+ return(FTS_NUMB);
+}
+ YY_BREAK
+case 4:
+YY_RULE_SETUP
+#line 60 "fts0blex.l"
+{
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
+
+ return(FTS_TERM);
+}
+ YY_BREAK
+case 5:
+YY_RULE_SETUP
+#line 66 "fts0blex.l"
+{
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
+
+ return(FTS_TEXT);
+}
+ YY_BREAK
+case 6:
+/* rule 6 can match eol */
+YY_RULE_SETUP
+#line 72 "fts0blex.l"
+
+ YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 74 "fts0blex.l"
+ECHO;
+ YY_BREAK
+#line 1043 "fts0blex.cc"
+case YY_STATE_EOF(INITIAL):
+ yyterminate();
+
+ case YY_END_OF_BUFFER:
+ {
+ /* Amount of text matched not including the EOB char. */
+ int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1;
+
+ /* Undo the effects of YY_DO_BEFORE_ACTION. */
+ *yy_cp = yyg->yy_hold_char;
+ YY_RESTORE_YY_MORE_OFFSET
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
+ {
+ /* We're scanning a new file or input source. It's
+ * possible that this happened because the user
+ * just pointed yyin at a new source and called
+ * yylex(). If so, then we have to assure
+ * consistency between YY_CURRENT_BUFFER and our
+ * globals. Here is the right place to do so, because
+ * this is the first action (other than possibly a
+ * back-up) that will match for the new input source.
+ */
+ yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+ }
+
+ /* Note that here we test for yy_c_buf_p "<=" to the position
+ * of the first EOB in the buffer, since yy_c_buf_p will
+ * already have been incremented past the NUL character
+ * (since all states make transitions on EOB to the
+ * end-of-buffer state). Contrast this with the test
+ * in input().
+ */
+ if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
+ { /* This was really a NUL. */
+ yy_state_type yy_next_state;
+
+ yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( yyscanner );
+
+ /* Okay, we're now positioned to make the NUL
+ * transition. We couldn't have
+ * yy_get_previous_state() go ahead and do it
+ * for us because it doesn't know how to deal
+ * with the possibility of jamming (and we don't
+ * want to build jamming into it because then it
+ * will run more slowly).
+ */
+
+ yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner);
+
+ yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
+
+ if ( yy_next_state )
+ {
+ /* Consume the NUL. */
+ yy_cp = ++yyg->yy_c_buf_p;
+ yy_current_state = yy_next_state;
+ goto yy_match;
+ }
+
+ else
+ {
+ yy_cp = yyg->yy_last_accepting_cpos;
+ yy_current_state = yyg->yy_last_accepting_state;
+ goto yy_find_action;
+ }
+ }
+
+ else switch ( yy_get_next_buffer( yyscanner ) )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ yyg->yy_did_buffer_switch_on_eof = 0;
+
+ if ( yywrap( yyscanner ) )
+ {
+ /* Note: because we've taken care in
+ * yy_get_next_buffer() to have set up
+ * yytext, we can now set up
+ * yy_c_buf_p so that if some total
+ * hoser (like flex itself) wants to
+ * call the scanner after we return the
+ * YY_NULL, it'll still work - another
+ * YY_NULL will get returned.
+ */
+ yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ;
+
+ yy_act = YY_STATE_EOF(YY_START);
+ goto do_action;
+ }
+
+ else
+ {
+ if ( ! yyg->yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+ }
+ break;
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yyg->yy_c_buf_p =
+ yyg->yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( yyscanner );
+
+ yy_cp = yyg->yy_c_buf_p;
+ yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
+ goto yy_match;
+
+ case EOB_ACT_LAST_MATCH:
+ yyg->yy_c_buf_p =
+ &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars];
+
+ yy_current_state = yy_get_previous_state( yyscanner );
+
+ yy_cp = yyg->yy_c_buf_p;
+ yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
+ goto yy_find_action;
+ }
+ break;
+ }
+
+ default:
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--no action found" );
+ } /* end of action switch */
+ } /* end of scanning one token */
+ } /* end of user's declarations */
+} /* end of yylex */
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ * EOB_ACT_LAST_MATCH -
+ * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ * EOB_ACT_END_OF_FILE - end of file
+ */
+static int yy_get_next_buffer (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+ char *source = yyg->yytext_ptr;
+ int number_to_move, i;
+ int ret_val;
+
+ if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] )
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--end of buffer missed" );
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
+ { /* Don't try to fill the buffer, so this is an EOF. */
+ if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 )
+ {
+ /* We matched a single character, the EOB, so
+ * treat this as a final EOF.
+ */
+ return EOB_ACT_END_OF_FILE;
+ }
+
+ else
+ {
+ /* We matched some text prior to the EOB, first
+ * process it.
+ */
+ return EOB_ACT_LAST_MATCH;
+ }
+ }
+
+ /* Try to read more data. */
+
+ /* First move last chars to start of buffer. */
+ number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr - 1);
+
+ for ( i = 0; i < number_to_move; ++i )
+ *(dest++) = *(source++);
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+ /* don't do the read, it's not guaranteed to return an EOF,
+ * just force an EOF
+ */
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0;
+
+ else
+ {
+ int num_to_read =
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+ while ( num_to_read <= 0 )
+ { /* Not enough room in the buffer - grow it. */
+
+ /* just a shorter name for the current buffer */
+ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
+
+ int yy_c_buf_p_offset =
+ (int) (yyg->yy_c_buf_p - b->yy_ch_buf);
+
+ if ( b->yy_is_our_buffer )
+ {
+ int new_size = b->yy_buf_size * 2;
+
+ if ( new_size <= 0 )
+ b->yy_buf_size += b->yy_buf_size / 8;
+ else
+ b->yy_buf_size *= 2;
+
+ b->yy_ch_buf = (char *)
+ /* Include room in for 2 EOB chars. */
+ yyrealloc( (void *) b->yy_ch_buf,
+ (yy_size_t) (b->yy_buf_size + 2) , yyscanner );
+ }
+ else
+ /* Can't grow it, we don't own it. */
+ b->yy_ch_buf = NULL;
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR(
+ "fatal error - scanner input buffer overflow" );
+
+ yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+ num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
+ number_to_move - 1;
+
+ }
+
+ if ( num_to_read > YY_READ_BUF_SIZE )
+ num_to_read = YY_READ_BUF_SIZE;
+
+ /* Read in more data. */
+ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
+ yyg->yy_n_chars, num_to_read );
+
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
+ }
+
+ if ( yyg->yy_n_chars == 0 )
+ {
+ if ( number_to_move == YY_MORE_ADJ )
+ {
+ ret_val = EOB_ACT_END_OF_FILE;
+ yyrestart( yyin , yyscanner);
+ }
+
+ else
+ {
+ ret_val = EOB_ACT_LAST_MATCH;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
+ YY_BUFFER_EOF_PENDING;
+ }
+ }
+
+ else
+ ret_val = EOB_ACT_CONTINUE_SCAN;
+
+ if ((yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+ /* Extend the array by 50%, plus the number we really need. */
+ int new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc(
+ (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size , yyscanner );
+ if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
+ /* "- 2" to take care of EOB's */
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2);
+ }
+
+ yyg->yy_n_chars += number_to_move;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
+
+ yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+ return ret_val;
+}
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+ static yy_state_type yy_get_previous_state (yyscan_t yyscanner)
+{
+ yy_state_type yy_current_state;
+ char *yy_cp;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ yy_current_state = yyg->yy_start;
+
+ for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp )
+ {
+ YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
+ if ( yy_accept[yy_current_state] )
+ {
+ yyg->yy_last_accepting_state = yy_current_state;
+ yyg->yy_last_accepting_cpos = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 19 )
+ yy_c = yy_meta[yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+ }
+
+ return yy_current_state;
+}
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ * next_state = yy_try_NUL_trans( current_state );
+ */
+ static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner)
+{
+ int yy_is_jam;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
+ char *yy_cp = yyg->yy_c_buf_p;
+
+ YY_CHAR yy_c = 1;
+ if ( yy_accept[yy_current_state] )
+ {
+ yyg->yy_last_accepting_state = yy_current_state;
+ yyg->yy_last_accepting_cpos = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 19 )
+ yy_c = yy_meta[yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+ yy_is_jam = (yy_current_state == 18);
+
+ (void)yyg;
+ return yy_is_jam ? 0 : yy_current_state;
+}
+
+#ifndef YY_NO_UNPUT
+
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+ static int yyinput (yyscan_t yyscanner)
+#else
+ static int input (yyscan_t yyscanner)
+#endif
+
+{
+ int c;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ *yyg->yy_c_buf_p = yyg->yy_hold_char;
+
+ if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
+ {
+ /* yy_c_buf_p now points to the character we want to return.
+ * If this occurs *before* the EOB characters, then it's a
+ * valid NUL; if not, then we've hit the end of the buffer.
+ */
+ if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
+ /* This was really a NUL. */
+ *yyg->yy_c_buf_p = '\0';
+
+ else
+ { /* need more input */
+ int offset = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr);
+ ++yyg->yy_c_buf_p;
+
+ switch ( yy_get_next_buffer( yyscanner ) )
+ {
+ case EOB_ACT_LAST_MATCH:
+ /* This happens because yy_g_n_b()
+ * sees that we've accumulated a
+ * token and flags that we need to
+ * try matching the token before
+ * proceeding. But for input(),
+ * there's no matching to consider.
+ * So convert the EOB_ACT_LAST_MATCH
+ * to EOB_ACT_END_OF_FILE.
+ */
+
+ /* Reset buffer status. */
+ yyrestart( yyin , yyscanner);
+
+ /*FALLTHROUGH*/
+
+ case EOB_ACT_END_OF_FILE:
+ {
+ if ( yywrap( yyscanner ) )
+ return 0;
+
+ if ( ! yyg->yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+#ifdef __cplusplus
+ return yyinput(yyscanner);
+#else
+ return input(yyscanner);
+#endif
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yyg->yy_c_buf_p = yyg->yytext_ptr + offset;
+ break;
+ }
+ }
+ }
+
+ c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */
+ *yyg->yy_c_buf_p = '\0'; /* preserve yytext */
+ yyg->yy_hold_char = *++yyg->yy_c_buf_p;
+
+ return c;
+}
+#endif /* ifndef YY_NO_INPUT */
+
+/** Immediately switch to a different input stream.
+ * @param input_file A readable stream.
+ * @param yyscanner The scanner object.
+ * @note This function does not reset the start condition to @c INITIAL .
+ */
+ void yyrestart (FILE * input_file , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if ( ! YY_CURRENT_BUFFER ){
+ yyensure_buffer_stack (yyscanner);
+ YY_CURRENT_BUFFER_LVALUE =
+ yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner);
+ }
+
+ yy_init_buffer( YY_CURRENT_BUFFER, input_file , yyscanner);
+ yy_load_buffer_state( yyscanner );
+}
+
+/** Switch to a different input buffer.
+ * @param new_buffer The new input buffer.
+ * @param yyscanner The scanner object.
+ */
+ void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* TODO. We should be able to replace this entire function body
+ * with
+ * yypop_buffer_state();
+ * yypush_buffer_state(new_buffer);
+ */
+ yyensure_buffer_stack (yyscanner);
+ if ( YY_CURRENT_BUFFER == new_buffer )
+ return;
+
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *yyg->yy_c_buf_p = yyg->yy_hold_char;
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
+ }
+
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+ yy_load_buffer_state( yyscanner );
+
+ /* We don't actually know whether we did this switch during
+ * EOF (yywrap()) processing, but the only time this flag
+ * is looked at is after yywrap() is called, so it's safe
+ * to go ahead and always set it.
+ */
+ yyg->yy_did_buffer_switch_on_eof = 1;
+}
+
+static void yy_load_buffer_state (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+ yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+ yyg->yy_hold_char = *yyg->yy_c_buf_p;
+}
+
+/** Allocate and initialize an input buffer state.
+ * @param file A readable stream.
+ * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
+ * @param yyscanner The scanner object.
+ * @return the allocated buffer state.
+ */
+ YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner)
+{
+ YY_BUFFER_STATE b;
+
+ b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_buf_size = size;
+
+ /* yy_ch_buf has to be 2 characters longer than the size given because
+ * we need to put in 2 end-of-buffer characters.
+ */
+ b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) , yyscanner );
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_is_our_buffer = 1;
+
+ yy_init_buffer( b, file , yyscanner);
+
+ return b;
+}
+
+/** Destroy the buffer.
+ * @param b a buffer created with yy_create_buffer()
+ * @param yyscanner The scanner object.
+ */
+ void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if ( ! b )
+ return;
+
+ if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
+ YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
+
+ if ( b->yy_is_our_buffer )
+ yyfree( (void *) b->yy_ch_buf , yyscanner );
+
+ yyfree( (void *) b , yyscanner );
+}
+
+/* Initializes or reinitializes a buffer.
+ * This function is sometimes called more than once on the same buffer,
+ * such as during a yyrestart() or at EOF.
+ */
+ static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner)
+
+{
+ int oerrno = errno;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ yy_flush_buffer( b , yyscanner);
+
+ b->yy_input_file = file;
+ b->yy_fill_buffer = 1;
+
+ /* If b is the current buffer, then yy_init_buffer was _probably_
+ * called from yyrestart() or through yy_get_next_buffer.
+ * In that case, we don't want to reset the lineno or column.
+ */
+ if (b != YY_CURRENT_BUFFER){
+ b->yy_bs_lineno = 1;
+ b->yy_bs_column = 0;
+ }
+
+ b->yy_is_interactive = 0;
+
+ errno = oerrno;
+}
+
+/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
+ * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
+ * @param yyscanner The scanner object.
+ */
+ void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ if ( ! b )
+ return;
+
+ b->yy_n_chars = 0;
+
+ /* We always need two end-of-buffer characters. The first causes
+ * a transition to the end-of-buffer state. The second causes
+ * a jam in that state.
+ */
+ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+ b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+ b->yy_buf_pos = &b->yy_ch_buf[0];
+
+ b->yy_at_bol = 1;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ if ( b == YY_CURRENT_BUFFER )
+ yy_load_buffer_state( yyscanner );
+}
+
+/** Pushes the new state onto the stack. The new state becomes
+ * the current state. This function will allocate the stack
+ * if necessary.
+ * @param new_buffer The new state.
+ * @param yyscanner The scanner object.
+ */
+void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ if (new_buffer == NULL)
+ return;
+
+ yyensure_buffer_stack(yyscanner);
+
+ /* This block is copied from yy_switch_to_buffer. */
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *yyg->yy_c_buf_p = yyg->yy_hold_char;
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
+ }
+
+ /* Only push if top exists. Otherwise, replace top. */
+ if (YY_CURRENT_BUFFER)
+ yyg->yy_buffer_stack_top++;
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+ /* copied from yy_switch_to_buffer. */
+ yy_load_buffer_state( yyscanner );
+ yyg->yy_did_buffer_switch_on_eof = 1;
+}
+
+/** Removes and deletes the top of the stack, if present.
+ * The next element becomes the new top.
+ * @param yyscanner The scanner object.
+ */
+void yypop_buffer_state (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ if (!YY_CURRENT_BUFFER)
+ return;
+
+ yy_delete_buffer(YY_CURRENT_BUFFER , yyscanner);
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ if (yyg->yy_buffer_stack_top > 0)
+ --yyg->yy_buffer_stack_top;
+
+ if (YY_CURRENT_BUFFER) {
+ yy_load_buffer_state( yyscanner );
+ yyg->yy_did_buffer_switch_on_eof = 1;
+ }
+}
+
+/* Allocates the stack if it does not exist.
+ * Guarantees space for at least one push.
+ */
+static void yyensure_buffer_stack (yyscan_t yyscanner)
+{
+ yy_size_t num_to_alloc;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if (!yyg->yy_buffer_stack) {
+
+ /* First allocation is just for 2 elements, since we don't know if this
+ * scanner will even need a stack. We use 2 instead of 1 to avoid an
+ * immediate realloc on the next call.
+ */
+ num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc
+ (num_to_alloc * sizeof(struct yy_buffer_state*)
+ , yyscanner);
+ if ( ! yyg->yy_buffer_stack )
+ YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+
+ memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*));
+
+ yyg->yy_buffer_stack_max = num_to_alloc;
+ yyg->yy_buffer_stack_top = 0;
+ return;
+ }
+
+ if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){
+
+ /* Increase the buffer to prepare for a possible push. */
+ yy_size_t grow_size = 8 /* arbitrary grow size */;
+
+ num_to_alloc = yyg->yy_buffer_stack_max + grow_size;
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc
+ (yyg->yy_buffer_stack,
+ num_to_alloc * sizeof(struct yy_buffer_state*)
+ , yyscanner);
+ if ( ! yyg->yy_buffer_stack )
+ YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+
+ /* zero only the new slots.*/
+ memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*));
+ yyg->yy_buffer_stack_max = num_to_alloc;
+ }
+}
+
+/** Setup the input buffer state to scan directly from a user-specified character buffer.
+ * @param base the character buffer
+ * @param size the size in bytes of the character buffer
+ * @param yyscanner The scanner object.
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner)
+{
+ YY_BUFFER_STATE b;
+
+ if ( size < 2 ||
+ base[size-2] != YY_END_OF_BUFFER_CHAR ||
+ base[size-1] != YY_END_OF_BUFFER_CHAR )
+ /* They forgot to leave room for the EOB's. */
+ return NULL;
+
+ b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
+
+ b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */
+ b->yy_buf_pos = b->yy_ch_buf = base;
+ b->yy_is_our_buffer = 0;
+ b->yy_input_file = NULL;
+ b->yy_n_chars = b->yy_buf_size;
+ b->yy_is_interactive = 0;
+ b->yy_at_bol = 1;
+ b->yy_fill_buffer = 0;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ yy_switch_to_buffer( b , yyscanner );
+
+ return b;
+}
+
+/** Setup the input buffer state to scan a string. The next call to yylex() will
+ * scan from a @e copy of @a str.
+ * @param yystr a NUL-terminated string to scan
+ * @param yyscanner The scanner object.
+ * @return the newly allocated buffer state object.
+ * @note If you want to scan bytes that may contain NUL values, then use
+ * yy_scan_bytes() instead.
+ */
+YY_BUFFER_STATE yy_scan_string (const char * yystr , yyscan_t yyscanner)
+{
+
+ return yy_scan_bytes( yystr, (int) strlen(yystr) , yyscanner);
+}
+
+/** Setup the input buffer state to scan the given bytes. The next call to yylex() will
+ * scan from a @e copy of @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yyscanner The scanner object.
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len , yyscan_t yyscanner)
+{
+ YY_BUFFER_STATE b;
+ char *buf;
+ yy_size_t n;
+ int i;
+
+ /* Get memory for full buffer, including space for trailing EOB's. */
+ n = (yy_size_t) (_yybytes_len + 2);
+ buf = (char *) yyalloc( n , yyscanner );
+ if ( ! buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
+
+ for ( i = 0; i < _yybytes_len; ++i )
+ buf[i] = yybytes[i];
+
+ buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
+
+ b = yy_scan_buffer( buf, n , yyscanner);
+ if ( ! b )
+ YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
+
+ /* It's okay to grow etc. this buffer, and we should throw it
+ * away when we're done.
+ */
+ b->yy_is_our_buffer = 1;
+
+ return b;
+}
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+static void yynoreturn yy_fatal_error (const char* msg , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+ fprintf( stderr, "%s\n", msg );
+ exit( YY_EXIT_FAILURE );
+}
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ yytext[yyleng] = yyg->yy_hold_char; \
+ yyg->yy_c_buf_p = yytext + yyless_macro_arg; \
+ yyg->yy_hold_char = *yyg->yy_c_buf_p; \
+ *yyg->yy_c_buf_p = '\0'; \
+ yyleng = yyless_macro_arg; \
+ } \
+ while ( 0 )
+
+/* Accessor methods (get/set functions) to struct members. */
+
+/** Get the user-defined data for this scanner.
+ * @param yyscanner The scanner object.
+ */
+YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyextra;
+}
+
+/** Get the current line number.
+ * @param yyscanner The scanner object.
+ */
+int yyget_lineno (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if (! YY_CURRENT_BUFFER)
+ return 0;
+
+ return yylineno;
+}
+
+/** Get the current column number.
+ * @param yyscanner The scanner object.
+ */
+int yyget_column (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if (! YY_CURRENT_BUFFER)
+ return 0;
+
+ return yycolumn;
+}
+
+/** Get the input stream.
+ * @param yyscanner The scanner object.
+ */
+FILE *yyget_in (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyin;
+}
+
+/** Get the output stream.
+ * @param yyscanner The scanner object.
+ */
+FILE *yyget_out (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyout;
+}
+
+/** Get the length of the current token.
+ * @param yyscanner The scanner object.
+ */
+int yyget_leng (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyleng;
+}
+
+/** Get the current token.
+ * @param yyscanner The scanner object.
+ */
+
+char *yyget_text (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yytext;
+}
+
+/** Set the user-defined data. This data is never touched by the scanner.
+ * @param user_defined The data to be associated with this scanner.
+ * @param yyscanner The scanner object.
+ */
+void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyextra = user_defined ;
+}
+
+/** Set the current line number.
+ * @param _line_number line number
+ * @param yyscanner The scanner object.
+ */
+void yyset_lineno (int _line_number , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* lineno is only valid if an input buffer exists. */
+ if (! YY_CURRENT_BUFFER )
+ YY_FATAL_ERROR( "yyset_lineno called with no buffer" );
+
+ yylineno = _line_number;
+}
+
+/** Set the current column.
+ * @param _column_no column number
+ * @param yyscanner The scanner object.
+ */
+void yyset_column (int _column_no , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* column is only valid if an input buffer exists. */
+ if (! YY_CURRENT_BUFFER )
+ YY_FATAL_ERROR( "yyset_column called with no buffer" );
+
+ yycolumn = _column_no;
+}
+
+/** Set the input stream. This does not discard the current
+ * input buffer.
+ * @param _in_str A readable stream.
+ * @param yyscanner The scanner object.
+ * @see yy_switch_to_buffer
+ */
+void yyset_in (FILE * _in_str , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyin = _in_str ;
+}
+
+void yyset_out (FILE * _out_str , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyout = _out_str ;
+}
+
+int yyget_debug (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yy_flex_debug;
+}
+
+void yyset_debug (int _bdebug , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yy_flex_debug = _bdebug ;
+}
+
+/* Accessor methods for yylval and yylloc */
+
+/* User-visible API */
+
+/* yylex_init is special because it creates the scanner itself, so it is
+ * the ONLY reentrant function that doesn't take the scanner as the last argument.
+ * That's why we explicitly handle the declaration, instead of using our macros.
+ */
+int yylex_init(yyscan_t* ptr_yy_globals)
+{
+ if (ptr_yy_globals == NULL){
+ errno = EINVAL;
+ return 1;
+ }
+
+ *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL );
+
+ if (*ptr_yy_globals == NULL){
+ errno = ENOMEM;
+ return 1;
+ }
+
+ /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */
+ memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
+
+ return yy_init_globals ( *ptr_yy_globals );
+}
+
+/* yylex_init_extra has the same functionality as yylex_init, but follows the
+ * convention of taking the scanner as the last argument. Note however, that
+ * this is a *pointer* to a scanner, as it will be allocated by this call (and
+ * is the reason, too, why this function also must handle its own declaration).
+ * The user defined value in the first argument will be available to yyalloc in
+ * the yyextra field.
+ */
+int yylex_init_extra( YY_EXTRA_TYPE yy_user_defined, yyscan_t* ptr_yy_globals )
+{
+ struct yyguts_t dummy_yyguts;
+
+ yyset_extra (yy_user_defined, &dummy_yyguts);
+
+ if (ptr_yy_globals == NULL){
+ errno = EINVAL;
+ return 1;
+ }
+
+ *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
+
+ if (*ptr_yy_globals == NULL){
+ errno = ENOMEM;
+ return 1;
+ }
+
+ /* By setting to 0xAA, we expose bugs in
+ yy_init_globals. Leave at 0x00 for releases. */
+ memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
+
+ yyset_extra (yy_user_defined, *ptr_yy_globals);
+
+ return yy_init_globals ( *ptr_yy_globals );
+}
+
+static int yy_init_globals (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ /* Initialization is the same as for the non-reentrant scanner.
+ * This function is called from yylex_destroy(), so don't allocate here.
+ */
+
+ yyg->yy_buffer_stack = NULL;
+ yyg->yy_buffer_stack_top = 0;
+ yyg->yy_buffer_stack_max = 0;
+ yyg->yy_c_buf_p = NULL;
+ yyg->yy_init = 0;
+ yyg->yy_start = 0;
+
+ yyg->yy_start_stack_ptr = 0;
+ yyg->yy_start_stack_depth = 0;
+ yyg->yy_start_stack = NULL;
+
+/* Defined in main.c */
+#ifdef YY_STDINIT
+ yyin = stdin;
+ yyout = stdout;
+#else
+ yyin = NULL;
+ yyout = NULL;
+#endif
+
+ /* For future reference: Set errno on error, since we are called by
+ * yylex_init()
+ */
+ return 0;
+}
+
+/* yylex_destroy is for both reentrant and non-reentrant scanners. */
+int yylex_destroy (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* Pop the buffer stack, destroying each element. */
+ while(YY_CURRENT_BUFFER){
+ yy_delete_buffer( YY_CURRENT_BUFFER , yyscanner );
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ yypop_buffer_state(yyscanner);
+ }
+
+ /* Destroy the stack itself. */
+ yyfree(yyg->yy_buffer_stack , yyscanner);
+ yyg->yy_buffer_stack = NULL;
+
+ /* Destroy the start condition stack. */
+ yyfree( yyg->yy_start_stack , yyscanner );
+ yyg->yy_start_stack = NULL;
+
+ /* Reset the globals. This is important in a non-reentrant scanner so the next time
+ * yylex() is called, initialization will occur. */
+ yy_init_globals( yyscanner);
+
+ /* Destroy the main struct (reentrant only). */
+ yyfree ( yyscanner , yyscanner );
+ yyscanner = NULL;
+ return 0;
+}
+
+/*
+ * Internal utility routines.
+ */
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char* s1, const char * s2, int n , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+
+ int i;
+ for ( i = 0; i < n; ++i )
+ s1[i] = s2[i];
+}
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (const char * s , yyscan_t yyscanner)
+{
+ int n;
+ for ( n = 0; s[n]; ++n )
+ ;
+
+ return n;
+}
+#endif
+
+void *yyalloc (yy_size_t size , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+ return malloc(size);
+}
+
+void *yyrealloc (void * ptr, yy_size_t size , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+
+ /* The cast to (char *) in the following accommodates both
+ * implementations that use char* generic pointers, and those
+ * that use void* generic pointers. It works with the latter
+ * because both ANSI C and C++ allow castless assignment from
+ * any pointer type to void*, and deal with argument conversions
+ * as though doing an assignment.
+ */
+ return realloc(ptr, size);
+}
+
+void yyfree (void * ptr , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+ free( (char *) ptr ); /* see yyrealloc() for (char *) cast */
+}
+
+#define YYTABLES_NAME "yytables"
+
+#line 74 "fts0blex.l"
+
+
diff --git a/storage/innobase/fts/fts0blex.l b/storage/innobase/fts/fts0blex.l
new file mode 100644
index 00000000..cf19cd0f
--- /dev/null
+++ b/storage/innobase/fts/fts0blex.l
@@ -0,0 +1,74 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**
+ * @file fts/fts0blex.l
+ * FTS parser lexical analyzer
+ *
+ * Created 2007/5/9 Sunny Bains
+ */
+
+%{
+
+#include "fts0ast.h"
+#include "fts0pars.h"
+
+/* Required for reentrant parser */
+#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A) ut_error
+
+%}
+
+%option noinput
+%option nounput
+%option noyywrap
+%option nostdinit
+%option reentrant
+%option never-interactive
+
+%%
+
+[\t ]+ /* Ignore whitespace */ ;
+
+[*()+\-<>~@] {
+ val->oper = fts0bget_text(yyscanner)[0];
+
+ return(val->oper);
+}
+
+[0-9]+ {
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
+
+ return(FTS_NUMB);
+}
+
+[^" \n*()+\-<>~@%]* {
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
+
+ return(FTS_TERM);
+}
+
+\"[^\"\n]*\" {
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
+
+ return(FTS_TEXT);
+}
+
+\n
+
+%%
diff --git a/storage/innobase/fts/fts0config.cc b/storage/innobase/fts/fts0config.cc
new file mode 100644
index 00000000..9e2b4091
--- /dev/null
+++ b/storage/innobase/fts/fts0config.cc
@@ -0,0 +1,432 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fts/fts0config.cc
+Full Text Search configuration table.
+
+Created 2007/5/9 Sunny Bains
+***********************************************************************/
+
+#include "trx0roll.h"
+#include "row0sel.h"
+
+#include "fts0priv.h"
+
+/******************************************************************//**
+Callback function for fetching the config value.
+@return always returns TRUE */
+static
+ibool
+fts_config_fetch_value(
+/*===================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: pointer to
+ ib_vector_t */
+{
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ fts_string_t* value = static_cast<fts_string_t*>(user_arg);
+
+ dfield_t* dfield = que_node_get_val(node->select_list);
+ dtype_t* type = dfield_get_type(dfield);
+ ulint len = dfield_get_len(dfield);
+ void* data = dfield_get_data(dfield);
+
+ ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
+
+ if (len != UNIV_SQL_NULL) {
+ ulint max_len = ut_min(value->f_len - 1, len);
+
+ memcpy(value->f_str, data, max_len);
+ value->f_len = max_len;
+ value->f_str[value->f_len] = '\0';
+ }
+
+ return(TRUE);
+}
+
+/******************************************************************//**
+Get value from the config table. The caller must ensure that enough
+space is allocated for value to hold the column contents.
+@return DB_SUCCESS or error code */
+dberr_t
+fts_config_get_value(
+/*=================*/
+ trx_t* trx, /*!< transaction */
+ fts_table_t* fts_table, /*!< in: the indexed
+ FTS table */
+ const char* name, /*!< in: get config value for
+ this parameter name */
+ fts_string_t* value) /*!< out: value read from
+ config table */
+{
+ pars_info_t* info;
+ que_t* graph;
+ dberr_t error;
+ ulint name_len = strlen(name);
+ char table_name[MAX_FULL_NAME_LEN];
+
+ info = pars_info_create();
+
+ *value->f_str = '\0';
+ ut_a(value->f_len > 0);
+
+ pars_info_bind_function(info, "my_func", fts_config_fetch_value,
+ value);
+
+ /* The len field of value must be set to the max bytes that
+ it can hold. On a successful read, the len field will be set
+ to the actual number of bytes copied to value. */
+ pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len);
+
+ fts_table->suffix = "CONFIG";
+ fts_get_table_name(fts_table, table_name);
+ pars_info_bind_id(info, true, "table_name", table_name);
+
+ graph = fts_parse_sql(
+ fts_table,
+ info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS SELECT value FROM $table_name"
+ " WHERE key = :name;\n"
+ "BEGIN\n"
+ ""
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+
+ trx->op_info = "getting FTS config value";
+
+ error = fts_eval_sql(trx, graph);
+
+ mutex_enter(&dict_sys.mutex);
+ que_graph_free(graph);
+ mutex_exit(&dict_sys.mutex);
+
+ return(error);
+}
+
+/*********************************************************************//**
+Create the config table name for retrieving index specific value.
+@return index config parameter name */
+char*
+fts_config_create_index_param_name(
+/*===============================*/
+ const char* param, /*!< in: base name of param */
+ const dict_index_t* index) /*!< in: index for config */
+{
+ ulint len;
+ char* name;
+
+ /* The format of the config name is: name_<index_id>. */
+ len = strlen(param);
+
+ /* Caller is responsible for deleting name. */
+ name = static_cast<char*>(ut_malloc_nokey(
+ len + FTS_AUX_MIN_TABLE_ID_LENGTH + 2));
+ ::strcpy(name, param);
+ name[len] = '_';
+
+ fts_write_object_id(index->id, name + len + 1);
+
+ return(name);
+}
+
+/******************************************************************//**
+Get value specific to an FTS index from the config table. The caller
+must ensure that enough space is allocated for value to hold the
+column contents.
+@return DB_SUCCESS or error code */
+dberr_t
+fts_config_get_index_value(
+/*=======================*/
+ trx_t* trx, /*!< transaction */
+ dict_index_t* index, /*!< in: index */
+ const char* param, /*!< in: get config value for
+ this parameter name */
+ fts_string_t* value) /*!< out: value read from
+ config table */
+{
+ char* name;
+ dberr_t error;
+ fts_table_t fts_table;
+
+ FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
+ index->table);
+
+ /* We are responsible for free'ing name. */
+ name = fts_config_create_index_param_name(param, index);
+
+ error = fts_config_get_value(trx, &fts_table, name, value);
+
+ ut_free(name);
+
+ return(error);
+}
+
+/******************************************************************//**
+Set the value in the config table for name.
+@return DB_SUCCESS or error code */
+dberr_t
+fts_config_set_value(
+/*=================*/
+ trx_t* trx, /*!< transaction */
+ fts_table_t* fts_table, /*!< in: the indexed
+ FTS table */
+ const char* name, /*!< in: get config value for
+ this parameter name */
+ const fts_string_t*
+ value) /*!< in: value to update */
+{
+ pars_info_t* info;
+ que_t* graph;
+ dberr_t error;
+ undo_no_t undo_no;
+ undo_no_t n_rows_updated;
+ ulint name_len = strlen(name);
+ char table_name[MAX_FULL_NAME_LEN];
+
+ info = pars_info_create();
+
+ pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len);
+ pars_info_bind_varchar_literal(info, "value",
+ value->f_str, value->f_len);
+
+ const bool dict_locked = fts_table->table->fts->dict_locked;
+
+ fts_table->suffix = "CONFIG";
+ fts_get_table_name(fts_table, table_name, dict_locked);
+ pars_info_bind_id(info, true, "table_name", table_name);
+
+ graph = fts_parse_sql(
+ fts_table, info,
+ "BEGIN UPDATE $table_name SET value = :value"
+ " WHERE key = :name;");
+
+ trx->op_info = "setting FTS config value";
+
+ undo_no = trx->undo_no;
+
+ error = fts_eval_sql(trx, graph);
+
+ fts_que_graph_free_check_lock(fts_table, NULL, graph);
+
+ n_rows_updated = trx->undo_no - undo_no;
+
+ /* Check if we need to do an insert. */
+ if (n_rows_updated == 0) {
+ info = pars_info_create();
+
+ pars_info_bind_varchar_literal(
+ info, "name", (byte*) name, name_len);
+
+ pars_info_bind_varchar_literal(
+ info, "value", value->f_str, value->f_len);
+
+ fts_get_table_name(fts_table, table_name, dict_locked);
+ pars_info_bind_id(info, true, "table_name", table_name);
+
+ graph = fts_parse_sql(
+ fts_table, info,
+ "BEGIN\n"
+ "INSERT INTO $table_name VALUES(:name, :value);");
+
+ trx->op_info = "inserting FTS config value";
+
+ error = fts_eval_sql(trx, graph);
+
+ fts_que_graph_free_check_lock(fts_table, NULL, graph);
+ }
+
+ return(error);
+}
+
+/******************************************************************//**
+Set the value specific to an FTS index in the config table.
+@return DB_SUCCESS or error code */
+dberr_t
+fts_config_set_index_value(
+/*=======================*/
+ trx_t* trx, /*!< transaction */
+ dict_index_t* index, /*!< in: index */
+ const char* param, /*!< in: get config value for
+ this parameter name */
+ fts_string_t* value) /*!< out: value read from
+ config table */
+{
+ char* name;
+ dberr_t error;
+ fts_table_t fts_table;
+
+ FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
+ index->table);
+
+ /* We are responsible for free'ing name. */
+ name = fts_config_create_index_param_name(param, index);
+
+ error = fts_config_set_value(trx, &fts_table, name, value);
+
+ ut_free(name);
+
+ return(error);
+}
+
+#ifdef FTS_OPTIMIZE_DEBUG
+/******************************************************************//**
+Get an ulint value from the config table.
+@return DB_SUCCESS if all OK else error code */
+dberr_t
+fts_config_get_index_ulint(
+/*=======================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS index */
+ const char* name, /*!< in: param name */
+ ulint* int_value) /*!< out: value */
+{
+ dberr_t error;
+ fts_string_t value;
+
+ /* We set the length of value to the max bytes it can hold. This
+ information is used by the callback that reads the value.*/
+ value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
+ value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
+
+ error = fts_config_get_index_value(trx, index, name, &value);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "(" << error << ") reading `" << name << "'";
+ } else {
+ *int_value = strtoul((char*) value.f_str, NULL, 10);
+ }
+
+ ut_free(value.f_str);
+
+ return(error);
+}
+
+/******************************************************************//**
+Set an ulint value in the config table.
+@return DB_SUCCESS if all OK else error code */
+dberr_t
+fts_config_set_index_ulint(
+/*=======================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS index */
+ const char* name, /*!< in: param name */
+ ulint int_value) /*!< in: value */
+{
+ dberr_t error;
+ fts_string_t value;
+
+ /* We set the length of value to the max bytes it can hold. This
+ information is used by the callback that reads the value.*/
+ value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
+ value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
+
+ // FIXME: Get rid of snprintf
+ ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN);
+
+ value.f_len = snprintf(
+ (char*) value.f_str, FTS_MAX_INT_LEN, ULINTPF, int_value);
+
+ error = fts_config_set_index_value(trx, index, name, &value);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "(" << error << ") writing `" << name << "'";
+ }
+
+ ut_free(value.f_str);
+
+ return(error);
+}
+#endif /* FTS_OPTIMIZE_DEBUG */
+
+/******************************************************************//**
+Get an ulint value from the config table.
+@return DB_SUCCESS if all OK else error code */
+dberr_t
+fts_config_get_ulint(
+/*=================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_table_t* fts_table, /*!< in: the indexed
+ FTS table */
+ const char* name, /*!< in: param name */
+ ulint* int_value) /*!< out: value */
+{
+ dberr_t error;
+ fts_string_t value;
+
+ /* We set the length of value to the max bytes it can hold. This
+ information is used by the callback that reads the value.*/
+ value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
+ value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
+
+ error = fts_config_get_value(trx, fts_table, name, &value);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "(" << error << ") reading `" << name << "'";
+ } else {
+ *int_value = strtoul((char*) value.f_str, NULL, 10);
+ }
+
+ ut_free(value.f_str);
+
+ return(error);
+}
+
+/******************************************************************//**
+Set an ulint value in the config table.
+@return DB_SUCCESS if all OK else error code */
+dberr_t
+fts_config_set_ulint(
+/*=================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_table_t* fts_table, /*!< in: the indexed
+ FTS table */
+ const char* name, /*!< in: param name */
+ ulint int_value) /*!< in: value */
+{
+ dberr_t error;
+ fts_string_t value;
+
+ /* We set the length of value to the max bytes it can hold. This
+ information is used by the callback that reads the value.*/
+ value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
+ value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
+
+ ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN);
+
+ value.f_len = (ulint) snprintf(
+ (char*) value.f_str, FTS_MAX_INT_LEN, ULINTPF, int_value);
+
+ error = fts_config_set_value(trx, fts_table, name, &value);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "(" << error << ") writing `" << name << "'";
+ }
+
+ ut_free(value.f_str);
+
+ return(error);
+}
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
new file mode 100644
index 00000000..96ad0570
--- /dev/null
+++ b/storage/innobase/fts/fts0fts.cc
@@ -0,0 +1,6316 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2021, Oracle and/or its affiliates.
+Copyright (c) 2016, 2021, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fts/fts0fts.cc
+Full Text Search interface
+***********************************************************************/
+
+#include "trx0roll.h"
+#include "row0mysql.h"
+#include "row0upd.h"
+#include "dict0types.h"
+#include "dict0stats_bg.h"
+#include "row0sel.h"
+#include "fts0fts.h"
+#include "fts0priv.h"
+#include "fts0types.h"
+#include "fts0types.ic"
+#include "fts0vlc.ic"
+#include "fts0plugin.h"
+#include "dict0priv.h"
+#include "dict0stats.h"
+#include "btr0pcur.h"
+#include "sync0sync.h"
+
+static const ulint FTS_MAX_ID_LEN = 32;
+
+/** Column name from the FTS config table */
+#define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
+
+/** Verify if a aux table name is a obsolete table
+by looking up the key word in the obsolete table names */
+#define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
+ (strstr((table_name), "DOC_ID") != NULL \
+ || strstr((table_name), "ADDED") != NULL \
+ || strstr((table_name), "STOPWORDS") != NULL)
+
+/** This is maximum FTS cache for each table and would be
+a configurable variable */
+ulong fts_max_cache_size;
+
+/** Whether the total memory used for FTS cache is exhausted, and we will
+need a sync to free some memory */
+bool fts_need_sync = false;
+
+/** Variable specifying the total memory allocated for FTS cache */
+ulong fts_max_total_cache_size;
+
+/** This is FTS result cache limit for each query and would be
+a configurable variable */
+size_t fts_result_cache_limit;
+
+/** Variable specifying the maximum FTS max token size */
+ulong fts_max_token_size;
+
+/** Variable specifying the minimum FTS max token size */
+ulong fts_min_token_size;
+
+
+// FIXME: testing
+static time_t elapsed_time;
+static ulint n_nodes;
+
+#ifdef FTS_CACHE_SIZE_DEBUG
+/** The cache size permissible lower limit (1K) */
+static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
+
+/** The cache size permissible upper limit (1G) */
+static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
+#endif
+
+/** Time to sleep after DEADLOCK error before retrying operation. */
+static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
+
+/** InnoDB default stopword list:
+There are different versions of stopwords, the stop words listed
+below comes from "Google Stopword" list. Reference:
+http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
+The final version of InnoDB default stopword list is still pending
+for decision */
+const char *fts_default_stopword[] =
+{
+ "a",
+ "about",
+ "an",
+ "are",
+ "as",
+ "at",
+ "be",
+ "by",
+ "com",
+ "de",
+ "en",
+ "for",
+ "from",
+ "how",
+ "i",
+ "in",
+ "is",
+ "it",
+ "la",
+ "of",
+ "on",
+ "or",
+ "that",
+ "the",
+ "this",
+ "to",
+ "was",
+ "what",
+ "when",
+ "where",
+ "who",
+ "will",
+ "with",
+ "und",
+ "the",
+ "www",
+ NULL
+};
+
+/** For storing table info when checking for orphaned tables. */
+struct fts_aux_table_t {
+ table_id_t id; /*!< Table id */
+ table_id_t parent_id; /*!< Parent table id */
+ table_id_t index_id; /*!< Table FT index id */
+ char* name; /*!< Name of the table */
+};
+
+/** FTS auxiliary table suffixes that are common to all FT indexes. */
+const char* fts_common_tables[] = {
+ "BEING_DELETED",
+ "BEING_DELETED_CACHE",
+ "CONFIG",
+ "DELETED",
+ "DELETED_CACHE",
+ NULL
+};
+
+/** FTS auxiliary INDEX split intervals. */
+const fts_index_selector_t fts_index_selector[] = {
+ { 9, "INDEX_1" },
+ { 65, "INDEX_2" },
+ { 70, "INDEX_3" },
+ { 75, "INDEX_4" },
+ { 80, "INDEX_5" },
+ { 85, "INDEX_6" },
+ { 0 , NULL }
+};
+
+/** Default config values for FTS indexes on a table. */
+static const char* fts_config_table_insert_values_sql =
+ "BEGIN\n"
+ "\n"
+ "INSERT INTO $config_table VALUES('"
+ FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
+ ""
+ "INSERT INTO $config_table VALUES('"
+ FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
+ ""
+ "INSERT INTO $config_table VALUES ('"
+ FTS_SYNCED_DOC_ID "', '0');\n"
+ ""
+ "INSERT INTO $config_table VALUES ('"
+ FTS_TOTAL_DELETED_COUNT "', '0');\n"
+ "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
+ "INSERT INTO $config_table VALUES ('"
+ FTS_TABLE_STATE "', '0');\n";
+
+/** FTS tokenize parmameter for plugin parser */
+struct fts_tokenize_param_t {
+ fts_doc_t* result_doc; /*!< Result doc for tokens */
+ ulint add_pos; /*!< Added position for tokens */
+};
+
+/** Run SYNC on the table, i.e., write out data from the cache to the
+FTS auxiliary INDEX table and clear the cache at the end.
+@param[in,out] sync sync state
+@param[in] unlock_cache whether unlock cache lock when write node
+@param[in] wait whether wait when a sync is in progress
+@return DB_SUCCESS if all OK */
+static
+dberr_t
+fts_sync(
+ fts_sync_t* sync,
+ bool unlock_cache,
+ bool wait);
+
+/****************************************************************//**
+Release all resources help by the words rb tree e.g., the node ilist. */
+static
+void
+fts_words_free(
+/*===========*/
+ ib_rbt_t* words) /*!< in: rb tree of words */
+ MY_ATTRIBUTE((nonnull));
+#ifdef FTS_CACHE_SIZE_DEBUG
+/****************************************************************//**
+Read the max cache size parameter from the config table. */
+static
+void
+fts_update_max_cache_size(
+/*======================*/
+ fts_sync_t* sync); /*!< in: sync state */
+#endif
+
+/*********************************************************************//**
+This function fetches the document just inserted right before
+we commit the transaction, and tokenize the inserted text data
+and insert into FTS auxiliary table and its cache.
+@return TRUE if successful */
+static
+ulint
+fts_add_doc_by_id(
+/*==============*/
+ fts_trx_table_t*ftt, /*!< in: FTS trx table */
+ doc_id_t doc_id, /*!< in: doc id */
+ ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)));
+ /*!< in: affected fts indexes */
+/******************************************************************//**
+Update the last document id. This function could create a new
+transaction to update the last document id.
+@return DB_SUCCESS if OK */
+static
+dberr_t
+fts_update_sync_doc_id(
+/*===================*/
+ const dict_table_t* table, /*!< in: table */
+ doc_id_t doc_id, /*!< in: last document id */
+ trx_t* trx) /*!< in: update trx, or NULL */
+ MY_ATTRIBUTE((nonnull(1)));
+
+/** Tokenize a document.
+@param[in,out] doc document to tokenize
+@param[out] result tokenization result
+@param[in] parser pluggable parser */
+static
+void
+fts_tokenize_document(
+ fts_doc_t* doc,
+ fts_doc_t* result,
+ st_mysql_ftparser* parser);
+
+/** Continue to tokenize a document.
+@param[in,out] doc document to tokenize
+@param[in] add_pos add this position to all tokens from this tokenization
+@param[out] result tokenization result
+@param[in] parser pluggable parser */
+static
+void
+fts_tokenize_document_next(
+ fts_doc_t* doc,
+ ulint add_pos,
+ fts_doc_t* result,
+ st_mysql_ftparser* parser);
+
+/** Create the vector of fts_get_doc_t instances.
+@param[in,out] cache fts cache
+@return vector of fts_get_doc_t instances */
+static
+ib_vector_t*
+fts_get_docs_create(
+ fts_cache_t* cache);
+
+/** Free the FTS cache.
+@param[in,out] cache to be freed */
+static
+void
+fts_cache_destroy(fts_cache_t* cache)
+{
+ rw_lock_free(&cache->lock);
+ rw_lock_free(&cache->init_lock);
+ mutex_free(&cache->deleted_lock);
+ mutex_free(&cache->doc_id_lock);
+ os_event_destroy(cache->sync->event);
+
+ if (cache->stopword_info.cached_stopword) {
+ rbt_free(cache->stopword_info.cached_stopword);
+ }
+
+ if (cache->sync_heap->arg) {
+ mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
+ }
+
+ mem_heap_free(cache->cache_heap);
+}
+
+/** Get a character set based on precise type.
+@param prtype precise type
+@return the corresponding character set */
+UNIV_INLINE
+CHARSET_INFO*
+fts_get_charset(ulint prtype)
+{
+#ifdef UNIV_DEBUG
+ switch (prtype & DATA_MYSQL_TYPE_MASK) {
+ case MYSQL_TYPE_BIT:
+ case MYSQL_TYPE_STRING:
+ case MYSQL_TYPE_VAR_STRING:
+ case MYSQL_TYPE_TINY_BLOB:
+ case MYSQL_TYPE_MEDIUM_BLOB:
+ case MYSQL_TYPE_BLOB:
+ case MYSQL_TYPE_LONG_BLOB:
+ case MYSQL_TYPE_VARCHAR:
+ break;
+ default:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
+ uint cs_num = (uint) dtype_get_charset_coll(prtype);
+
+ if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
+ return(cs);
+ }
+
+ ib::fatal() << "Unable to find charset-collation " << cs_num;
+ return(NULL);
+}
+
+/****************************************************************//**
+This function loads the default InnoDB stopword list */
+static
+void
+fts_load_default_stopword(
+/*======================*/
+ fts_stopword_t* stopword_info) /*!< in: stopword info */
+{
+ fts_string_t str;
+ mem_heap_t* heap;
+ ib_alloc_t* allocator;
+ ib_rbt_t* stop_words;
+
+ allocator = stopword_info->heap;
+ heap = static_cast<mem_heap_t*>(allocator->arg);
+
+ if (!stopword_info->cached_stopword) {
+ stopword_info->cached_stopword = rbt_create_arg_cmp(
+ sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
+ &my_charset_latin1);
+ }
+
+ stop_words = stopword_info->cached_stopword;
+
+ str.f_n_char = 0;
+
+ for (ulint i = 0; fts_default_stopword[i]; ++i) {
+ char* word;
+ fts_tokenizer_word_t new_word;
+
+ /* We are going to duplicate the value below. */
+ word = const_cast<char*>(fts_default_stopword[i]);
+
+ new_word.nodes = ib_vector_create(
+ allocator, sizeof(fts_node_t), 4);
+
+ str.f_len = strlen(word);
+ str.f_str = reinterpret_cast<byte*>(word);
+
+ fts_string_dup(&new_word.text, &str, heap);
+
+ rbt_insert(stop_words, &new_word, &new_word);
+ }
+
+ stopword_info->status = STOPWORD_FROM_DEFAULT;
+}
+
+/****************************************************************//**
+Callback function to read a single stopword value.
+@return Always return TRUE */
+static
+ibool
+fts_read_stopword(
+/*==============*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: pointer to ib_vector_t */
+{
+ ib_alloc_t* allocator;
+ fts_stopword_t* stopword_info;
+ sel_node_t* sel_node;
+ que_node_t* exp;
+ ib_rbt_t* stop_words;
+ dfield_t* dfield;
+ fts_string_t str;
+ mem_heap_t* heap;
+ ib_rbt_bound_t parent;
+
+ sel_node = static_cast<sel_node_t*>(row);
+ stopword_info = static_cast<fts_stopword_t*>(user_arg);
+
+ stop_words = stopword_info->cached_stopword;
+ allocator = static_cast<ib_alloc_t*>(stopword_info->heap);
+ heap = static_cast<mem_heap_t*>(allocator->arg);
+
+ exp = sel_node->select_list;
+
+ /* We only need to read the first column */
+ dfield = que_node_get_val(exp);
+
+ str.f_n_char = 0;
+ str.f_str = static_cast<byte*>(dfield_get_data(dfield));
+ str.f_len = dfield_get_len(dfield);
+
+ /* Only create new node if it is a value not already existed */
+ if (str.f_len != UNIV_SQL_NULL
+ && rbt_search(stop_words, &parent, &str) != 0) {
+
+ fts_tokenizer_word_t new_word;
+
+ new_word.nodes = ib_vector_create(
+ allocator, sizeof(fts_node_t), 4);
+
+ new_word.text.f_str = static_cast<byte*>(
+ mem_heap_alloc(heap, str.f_len + 1));
+
+ memcpy(new_word.text.f_str, str.f_str, str.f_len);
+
+ new_word.text.f_n_char = 0;
+ new_word.text.f_len = str.f_len;
+ new_word.text.f_str[str.f_len] = 0;
+
+ rbt_insert(stop_words, &new_word, &new_word);
+ }
+
+ return(TRUE);
+}
+
+/******************************************************************//**
+Load user defined stopword from designated user table
+@return whether the operation is successful */
+static
+bool
+fts_load_user_stopword(
+/*===================*/
+ fts_t* fts, /*!< in: FTS struct */
+ const char* stopword_table_name, /*!< in: Stopword table
+ name */
+ fts_stopword_t* stopword_info) /*!< in: Stopword info */
+{
+ if (!fts->dict_locked) {
+ mutex_enter(&dict_sys.mutex);
+ }
+
+ /* Validate the user table existence in the right format */
+ bool ret= false;
+ stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
+ if (!stopword_info->charset) {
+cleanup:
+ if (!fts->dict_locked) {
+ mutex_exit(&dict_sys.mutex);
+ }
+
+ return ret;
+ }
+
+ trx_t* trx = trx_create();
+ trx->op_info = "Load user stopword table into FTS cache";
+
+ if (!stopword_info->cached_stopword) {
+ /* Create the stopword RB tree with the stopword column
+ charset. All comparison will use this charset */
+ stopword_info->cached_stopword = rbt_create_arg_cmp(
+ sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
+ (void*)stopword_info->charset);
+
+ }
+
+ pars_info_t* info = pars_info_create();
+
+ pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
+
+ pars_info_bind_function(info, "my_func", fts_read_stopword,
+ stopword_info);
+
+ que_t* graph = fts_parse_sql_no_dict_lock(
+ info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT value"
+ " FROM $table_stopword;\n"
+ "BEGIN\n"
+ "\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+
+ for (;;) {
+ dberr_t error = fts_eval_sql(trx, graph);
+
+ if (UNIV_LIKELY(error == DB_SUCCESS)) {
+ fts_sql_commit(trx);
+ stopword_info->status = STOPWORD_USER_TABLE;
+ break;
+ } else {
+ fts_sql_rollback(trx);
+
+ if (error == DB_LOCK_WAIT_TIMEOUT) {
+ ib::warn() << "Lock wait timeout reading user"
+ " stopword table. Retrying!";
+
+ trx->error_state = DB_SUCCESS;
+ } else {
+ ib::error() << "Error '" << error
+ << "' while reading user stopword"
+ " table.";
+ ret = FALSE;
+ break;
+ }
+ }
+ }
+
+ que_graph_free(graph);
+ trx->free();
+ ret = true;
+ goto cleanup;
+}
+
+/******************************************************************//**
+Initialize the index cache. */
+static
+void
+fts_index_cache_init(
+/*=================*/
+ ib_alloc_t* allocator, /*!< in: the allocator to use */
+ fts_index_cache_t* index_cache) /*!< in: index cache */
+{
+ ulint i;
+
+ ut_a(index_cache->words == NULL);
+
+ index_cache->words = rbt_create_arg_cmp(
+ sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
+ (void*) index_cache->charset);
+
+ ut_a(index_cache->doc_stats == NULL);
+
+ index_cache->doc_stats = ib_vector_create(
+ allocator, sizeof(fts_doc_stats_t), 4);
+
+ for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
+ ut_a(index_cache->ins_graph[i] == NULL);
+ ut_a(index_cache->sel_graph[i] == NULL);
+ }
+}
+
+/*********************************************************************//**
+Initialize FTS cache. */
+void
+fts_cache_init(
+/*===========*/
+ fts_cache_t* cache) /*!< in: cache to initialize */
+{
+ ulint i;
+
+ /* Just to make sure */
+ ut_a(cache->sync_heap->arg == NULL);
+
+ cache->sync_heap->arg = mem_heap_create(1024);
+
+ cache->total_size = 0;
+
+ mutex_enter((ib_mutex_t*) &cache->deleted_lock);
+ cache->deleted_doc_ids = ib_vector_create(
+ cache->sync_heap, sizeof(doc_id_t), 4);
+ mutex_exit((ib_mutex_t*) &cache->deleted_lock);
+
+ /* Reset the cache data for all the FTS indexes. */
+ for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
+ fts_index_cache_t* index_cache;
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(cache->indexes, i));
+
+ fts_index_cache_init(cache->sync_heap, index_cache);
+ }
+}
+
+/****************************************************************//**
+Create a FTS cache. */
+fts_cache_t*
+fts_cache_create(
+/*=============*/
+ dict_table_t* table) /*!< in: table owns the FTS cache */
+{
+ mem_heap_t* heap;
+ fts_cache_t* cache;
+
+ heap = static_cast<mem_heap_t*>(mem_heap_create(512));
+
+ cache = static_cast<fts_cache_t*>(
+ mem_heap_zalloc(heap, sizeof(*cache)));
+
+ cache->cache_heap = heap;
+
+ rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
+
+ rw_lock_create(
+ fts_cache_init_rw_lock_key, &cache->init_lock,
+ SYNC_FTS_CACHE_INIT);
+
+ mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
+
+ mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
+
+ /* This is the heap used to create the cache itself. */
+ cache->self_heap = ib_heap_allocator_create(heap);
+
+ /* This is a transient heap, used for storing sync data. */
+ cache->sync_heap = ib_heap_allocator_create(heap);
+ cache->sync_heap->arg = NULL;
+
+ cache->sync = static_cast<fts_sync_t*>(
+ mem_heap_zalloc(heap, sizeof(fts_sync_t)));
+
+ cache->sync->table = table;
+ cache->sync->event = os_event_create(0);
+
+ /* Create the index cache vector that will hold the inverted indexes. */
+ cache->indexes = ib_vector_create(
+ cache->self_heap, sizeof(fts_index_cache_t), 2);
+
+ fts_cache_init(cache);
+
+ cache->stopword_info.cached_stopword = NULL;
+ cache->stopword_info.charset = NULL;
+
+ cache->stopword_info.heap = cache->self_heap;
+
+ cache->stopword_info.status = STOPWORD_NOT_INIT;
+
+ return(cache);
+}
+
+/*******************************************************************//**
+Add a newly create index into FTS cache */
+void
+fts_add_index(
+/*==========*/
+ dict_index_t* index, /*!< FTS index to be added */
+ dict_table_t* table) /*!< table */
+{
+ fts_t* fts = table->fts;
+ fts_cache_t* cache;
+ fts_index_cache_t* index_cache;
+
+ ut_ad(fts);
+ cache = table->fts->cache;
+
+ rw_lock_x_lock(&cache->init_lock);
+
+ ib_vector_push(fts->indexes, &index);
+
+ index_cache = fts_find_index_cache(cache, index);
+
+ if (!index_cache) {
+ /* Add new index cache structure */
+ index_cache = fts_cache_index_cache_create(table, index);
+ }
+
+ rw_lock_x_unlock(&cache->init_lock);
+}
+
+/*******************************************************************//**
+recalibrate get_doc structure after index_cache in cache->indexes changed */
+static
+void
+fts_reset_get_doc(
+/*==============*/
+ fts_cache_t* cache) /*!< in: FTS index cache */
+{
+ fts_get_doc_t* get_doc;
+ ulint i;
+
+ ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
+
+ ib_vector_reset(cache->get_docs);
+
+ for (i = 0; i < ib_vector_size(cache->indexes); i++) {
+ fts_index_cache_t* ind_cache;
+
+ ind_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(cache->indexes, i));
+
+ get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_push(cache->get_docs, NULL));
+
+ memset(get_doc, 0x0, sizeof(*get_doc));
+
+ get_doc->index_cache = ind_cache;
+ get_doc->cache = cache;
+ }
+
+ ut_ad(ib_vector_size(cache->get_docs)
+ == ib_vector_size(cache->indexes));
+}
+
+/*******************************************************************//**
+Check an index is in the table->indexes list
+@return TRUE if it exists */
+static
+ibool
+fts_in_dict_index(
+/*==============*/
+ dict_table_t* table, /*!< in: Table */
+ dict_index_t* index_check) /*!< in: index to be checked */
+{
+ dict_index_t* index;
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (index == index_check) {
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/*******************************************************************//**
+Check an index is in the fts->cache->indexes list
+@return TRUE if it exists */
+static
+ibool
+fts_in_index_cache(
+/*===============*/
+ dict_table_t* table, /*!< in: Table */
+ dict_index_t* index) /*!< in: index to be checked */
+{
+ ulint i;
+
+ for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
+ fts_index_cache_t* index_cache;
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(table->fts->cache->indexes, i));
+
+ if (index_cache->index == index) {
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/*******************************************************************//**
+Check indexes in the fts->indexes is also present in index cache and
+table->indexes list
+@return TRUE if all indexes match */
+ibool
+fts_check_cached_index(
+/*===================*/
+ dict_table_t* table) /*!< in: Table where indexes are dropped */
+{
+ ulint i;
+
+ if (!table->fts || !table->fts->cache) {
+ return(TRUE);
+ }
+
+ ut_a(ib_vector_size(table->fts->indexes)
+ == ib_vector_size(table->fts->cache->indexes));
+
+ for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
+ dict_index_t* index;
+
+ index = static_cast<dict_index_t*>(
+ ib_vector_getp(table->fts->indexes, i));
+
+ if (!fts_in_index_cache(table, index)) {
+ return(FALSE);
+ }
+
+ if (!fts_in_dict_index(table, index)) {
+ return(FALSE);
+ }
+ }
+
+ return(TRUE);
+}
+
+/** Clear all fts resources when there is no internal DOC_ID
+and there are no new fts index to add.
+@param[in,out] table table where fts is to be freed
+@param[in] trx transaction to drop all fts tables */
+void fts_clear_all(dict_table_t *table, trx_t *trx)
+{
+ if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) ||
+ !table->fts ||
+ !ib_vector_is_empty(table->fts->indexes))
+ return;
+
+ for (const dict_index_t *index= dict_table_get_first_index(table);
+ index; index= dict_table_get_next_index(index))
+ if (index->type & DICT_FTS)
+ return;
+
+ fts_optimize_remove_table(table);
+
+ fts_drop_tables(trx, table);
+ fts_free(table);
+ DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
+}
+
+/*******************************************************************//**
+Drop auxiliary tables related to an FTS index
+@return DB_SUCCESS or error number */
+dberr_t
+fts_drop_index(
+/*===========*/
+ dict_table_t* table, /*!< in: Table where indexes are dropped */
+ dict_index_t* index, /*!< in: Index to be dropped */
+ trx_t* trx) /*!< in: Transaction for the drop */
+{
+ ib_vector_t* indexes = table->fts->indexes;
+ dberr_t err = DB_SUCCESS;
+
+ ut_a(indexes);
+
+ if ((ib_vector_size(indexes) == 1
+ && (index == static_cast<dict_index_t*>(
+ ib_vector_getp(table->fts->indexes, 0)))
+ && DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
+ || ib_vector_is_empty(indexes)) {
+ doc_id_t current_doc_id;
+ doc_id_t first_doc_id;
+
+ DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
+
+ current_doc_id = table->fts->cache->next_doc_id;
+ first_doc_id = table->fts->cache->first_doc_id;
+ fts_cache_clear(table->fts->cache);
+ fts_cache_destroy(table->fts->cache);
+ table->fts->cache = fts_cache_create(table);
+ table->fts->cache->next_doc_id = current_doc_id;
+ table->fts->cache->first_doc_id = first_doc_id;
+ } else {
+ fts_cache_t* cache = table->fts->cache;
+ fts_index_cache_t* index_cache;
+
+ rw_lock_x_lock(&cache->init_lock);
+
+ index_cache = fts_find_index_cache(cache, index);
+
+ if (index_cache != NULL) {
+ if (index_cache->words) {
+ fts_words_free(index_cache->words);
+ rbt_free(index_cache->words);
+ }
+
+ ib_vector_remove(cache->indexes, *(void**) index_cache);
+ }
+
+ if (cache->get_docs) {
+ fts_reset_get_doc(cache);
+ }
+
+ rw_lock_x_unlock(&cache->init_lock);
+ }
+
+ err = fts_drop_index_tables(trx, index);
+
+ ib_vector_remove(indexes, (const void*) index);
+
+ return(err);
+}
+
+/****************************************************************//**
+Free the query graph but check whether dict_sys.mutex is already
+held */
+void
+fts_que_graph_free_check_lock(
+/*==========================*/
+ fts_table_t* fts_table, /*!< in: FTS table */
+ const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
+ que_t* graph) /*!< in: query graph */
+{
+ bool has_dict = FALSE;
+
+ if (fts_table && fts_table->table) {
+ ut_ad(fts_table->table->fts);
+
+ has_dict = fts_table->table->fts->dict_locked;
+ } else if (index_cache) {
+ ut_ad(index_cache->index->table->fts);
+
+ has_dict = index_cache->index->table->fts->dict_locked;
+ }
+
+ if (!has_dict) {
+ mutex_enter(&dict_sys.mutex);
+ }
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ que_graph_free(graph);
+
+ if (!has_dict) {
+ mutex_exit(&dict_sys.mutex);
+ }
+}
+
+/****************************************************************//**
+Create an FTS index cache. */
+CHARSET_INFO*
+fts_index_get_charset(
+/*==================*/
+ dict_index_t* index) /*!< in: FTS index */
+{
+ CHARSET_INFO* charset = NULL;
+ dict_field_t* field;
+ ulint prtype;
+
+ field = dict_index_get_nth_field(index, 0);
+ prtype = field->col->prtype;
+
+ charset = fts_get_charset(prtype);
+
+#ifdef FTS_DEBUG
+ /* Set up charset info for this index. Please note all
+ field of the FTS index should have the same charset */
+ for (i = 1; i < index->n_fields; i++) {
+ CHARSET_INFO* fld_charset;
+
+ field = dict_index_get_nth_field(index, i);
+ prtype = field->col->prtype;
+
+ fld_charset = fts_get_charset(prtype);
+
+ /* All FTS columns should have the same charset */
+ if (charset) {
+ ut_a(charset == fld_charset);
+ } else {
+ charset = fld_charset;
+ }
+ }
+#endif
+
+ return(charset);
+
+}
+/****************************************************************//**
+Create an FTS index cache.
+@return Index Cache */
+fts_index_cache_t*
+fts_cache_index_cache_create(
+/*=========================*/
+ dict_table_t* table, /*!< in: table with FTS index */
+ dict_index_t* index) /*!< in: FTS index */
+{
+ ulint n_bytes;
+ fts_index_cache_t* index_cache;
+ fts_cache_t* cache = table->fts->cache;
+
+ ut_a(cache != NULL);
+
+ ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
+
+ /* Must not already exist in the cache vector. */
+ ut_a(fts_find_index_cache(cache, index) == NULL);
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_push(cache->indexes, NULL));
+
+ memset(index_cache, 0x0, sizeof(*index_cache));
+
+ index_cache->index = index;
+
+ index_cache->charset = fts_index_get_charset(index);
+
+ n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
+
+ index_cache->ins_graph = static_cast<que_t**>(
+ mem_heap_zalloc(static_cast<mem_heap_t*>(
+ cache->self_heap->arg), n_bytes));
+
+ index_cache->sel_graph = static_cast<que_t**>(
+ mem_heap_zalloc(static_cast<mem_heap_t*>(
+ cache->self_heap->arg), n_bytes));
+
+ fts_index_cache_init(cache->sync_heap, index_cache);
+
+ if (cache->get_docs) {
+ fts_reset_get_doc(cache);
+ }
+
+ return(index_cache);
+}
+
+/****************************************************************//**
+Release all resources help by the words rb tree e.g., the node ilist. */
+static
+void
+fts_words_free(
+/*===========*/
+ ib_rbt_t* words) /*!< in: rb tree of words */
+{
+ const ib_rbt_node_t* rbt_node;
+
+ /* Free the resources held by a word. */
+ for (rbt_node = rbt_first(words);
+ rbt_node != NULL;
+ rbt_node = rbt_first(words)) {
+
+ ulint i;
+ fts_tokenizer_word_t* word;
+
+ word = rbt_value(fts_tokenizer_word_t, rbt_node);
+
+ /* Free the ilists of this word. */
+ for (i = 0; i < ib_vector_size(word->nodes); ++i) {
+
+ fts_node_t* fts_node = static_cast<fts_node_t*>(
+ ib_vector_get(word->nodes, i));
+
+ ut_free(fts_node->ilist);
+ fts_node->ilist = NULL;
+ }
+
+ /* NOTE: We are responsible for free'ing the node */
+ ut_free(rbt_remove_node(words, rbt_node));
+ }
+}
+
+/** Clear cache.
+@param[in,out] cache fts cache */
+void
+fts_cache_clear(
+ fts_cache_t* cache)
+{
+ ulint i;
+
+ for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
+ ulint j;
+ fts_index_cache_t* index_cache;
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(cache->indexes, i));
+
+ fts_words_free(index_cache->words);
+
+ rbt_free(index_cache->words);
+
+ index_cache->words = NULL;
+
+ for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
+
+ if (index_cache->ins_graph[j] != NULL) {
+
+ fts_que_graph_free_check_lock(
+ NULL, index_cache,
+ index_cache->ins_graph[j]);
+
+ index_cache->ins_graph[j] = NULL;
+ }
+
+ if (index_cache->sel_graph[j] != NULL) {
+
+ fts_que_graph_free_check_lock(
+ NULL, index_cache,
+ index_cache->sel_graph[j]);
+
+ index_cache->sel_graph[j] = NULL;
+ }
+ }
+
+ index_cache->doc_stats = NULL;
+ }
+
+ fts_need_sync = false;
+
+ cache->total_size = 0;
+
+ mutex_enter((ib_mutex_t*) &cache->deleted_lock);
+ cache->deleted_doc_ids = NULL;
+ mutex_exit((ib_mutex_t*) &cache->deleted_lock);
+
+ mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
+ cache->sync_heap->arg = NULL;
+}
+
+/*********************************************************************//**
+Search the index specific cache for a particular FTS index.
+@return the index cache else NULL */
+UNIV_INLINE
+fts_index_cache_t*
+fts_get_index_cache(
+/*================*/
+ fts_cache_t* cache, /*!< in: cache to search */
+ const dict_index_t* index) /*!< in: index to search for */
+{
+ ulint i;
+
+ ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
+ || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
+
+ for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
+ fts_index_cache_t* index_cache;
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(cache->indexes, i));
+
+ if (index_cache->index == index) {
+
+ return(index_cache);
+ }
+ }
+
+ return(NULL);
+}
+
+#ifdef FTS_DEBUG
+/*********************************************************************//**
+Search the index cache for a get_doc structure.
+@return the fts_get_doc_t item else NULL */
+static
+fts_get_doc_t*
+fts_get_index_get_doc(
+/*==================*/
+ fts_cache_t* cache, /*!< in: cache to search */
+ const dict_index_t* index) /*!< in: index to search for */
+{
+ ulint i;
+
+ ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
+
+ for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
+ fts_get_doc_t* get_doc;
+
+ get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_get(cache->get_docs, i));
+
+ if (get_doc->index_cache->index == index) {
+
+ return(get_doc);
+ }
+ }
+
+ return(NULL);
+}
+#endif
+
+/**********************************************************************//**
+Find an existing word, or if not found, create one and return it.
+@return specified word token */
+static
+fts_tokenizer_word_t*
+fts_tokenizer_word_get(
+/*===================*/
+ fts_cache_t* cache, /*!< in: cache */
+ fts_index_cache_t*
+ index_cache, /*!< in: index cache */
+ fts_string_t* text) /*!< in: node text */
+{
+ fts_tokenizer_word_t* word;
+ ib_rbt_bound_t parent;
+
+ ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
+
+ /* If it is a stopword, do not index it */
+ if (!fts_check_token(text,
+ cache->stopword_info.cached_stopword,
+ index_cache->charset)) {
+
+ return(NULL);
+ }
+
+ /* Check if we found a match, if not then add word to tree. */
+ if (rbt_search(index_cache->words, &parent, text) != 0) {
+ mem_heap_t* heap;
+ fts_tokenizer_word_t new_word;
+
+ heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
+
+ new_word.nodes = ib_vector_create(
+ cache->sync_heap, sizeof(fts_node_t), 4);
+
+ fts_string_dup(&new_word.text, text, heap);
+
+ parent.last = rbt_add_node(
+ index_cache->words, &parent, &new_word);
+
+ /* Take into account the RB tree memory use and the vector. */
+ cache->total_size += sizeof(new_word)
+ + sizeof(ib_rbt_node_t)
+ + text->f_len
+ + (sizeof(fts_node_t) * 4)
+ + sizeof(*new_word.nodes);
+
+ ut_ad(rbt_validate(index_cache->words));
+ }
+
+ word = rbt_value(fts_tokenizer_word_t, parent.last);
+
+ return(word);
+}
+
+/**********************************************************************//**
+Add the given doc_id/word positions to the given node's ilist. */
+void
+fts_cache_node_add_positions(
+/*=========================*/
+ fts_cache_t* cache, /*!< in: cache */
+ fts_node_t* node, /*!< in: word node */
+ doc_id_t doc_id, /*!< in: doc id */
+ ib_vector_t* positions) /*!< in: fts_token_t::positions */
+{
+ ulint i;
+ byte* ptr;
+ byte* ilist;
+ ulint enc_len;
+ ulint last_pos;
+ byte* ptr_start;
+ ulint doc_id_delta;
+
+#ifdef UNIV_DEBUG
+ if (cache) {
+ ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
+ }
+#endif /* UNIV_DEBUG */
+
+ ut_ad(doc_id >= node->last_doc_id);
+
+ /* Calculate the space required to store the ilist. */
+ doc_id_delta = (ulint)(doc_id - node->last_doc_id);
+ enc_len = fts_get_encoded_len(doc_id_delta);
+
+ last_pos = 0;
+ for (i = 0; i < ib_vector_size(positions); i++) {
+ ulint pos = *(static_cast<ulint*>(
+ ib_vector_get(positions, i)));
+
+ ut_ad(last_pos == 0 || pos > last_pos);
+
+ enc_len += fts_get_encoded_len(pos - last_pos);
+ last_pos = pos;
+ }
+
+ /* The 0x00 byte at the end of the token positions list. */
+ enc_len++;
+
+ if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
+ /* No need to allocate more space, we can fit in the new
+ data at the end of the old one. */
+ ilist = NULL;
+ ptr = node->ilist + node->ilist_size;
+ } else {
+ ulint new_size = node->ilist_size + enc_len;
+
+ /* Over-reserve space by a fixed size for small lengths and
+ by 20% for lengths >= 48 bytes. */
+ if (new_size < 16) {
+ new_size = 16;
+ } else if (new_size < 32) {
+ new_size = 32;
+ } else if (new_size < 48) {
+ new_size = 48;
+ } else {
+ new_size = new_size * 6 / 5;
+ }
+
+ ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
+ ptr = ilist + node->ilist_size;
+
+ node->ilist_size_alloc = new_size;
+ if (cache) {
+ cache->total_size += new_size;
+ }
+ }
+
+ ptr_start = ptr;
+
+ /* Encode the new fragment. */
+ ptr += fts_encode_int(doc_id_delta, ptr);
+
+ last_pos = 0;
+ for (i = 0; i < ib_vector_size(positions); i++) {
+ ulint pos = *(static_cast<ulint*>(
+ ib_vector_get(positions, i)));
+
+ ptr += fts_encode_int(pos - last_pos, ptr);
+ last_pos = pos;
+ }
+
+ *ptr++ = 0;
+
+ ut_a(enc_len == (ulint)(ptr - ptr_start));
+
+ if (ilist) {
+ /* Copy old ilist to the start of the new one and switch the
+ new one into place in the node. */
+ if (node->ilist_size > 0) {
+ memcpy(ilist, node->ilist, node->ilist_size);
+ ut_free(node->ilist);
+ if (cache) {
+ cache->total_size -= node->ilist_size;
+ }
+ }
+
+ node->ilist = ilist;
+ }
+
+ node->ilist_size += enc_len;
+
+ if (node->first_doc_id == FTS_NULL_DOC_ID) {
+ node->first_doc_id = doc_id;
+ }
+
+ node->last_doc_id = doc_id;
+ ++node->doc_count;
+}
+
+/**********************************************************************//**
+Add document to the cache. */
+static
+void
+fts_cache_add_doc(
+/*==============*/
+ fts_cache_t* cache, /*!< in: cache */
+ fts_index_cache_t*
+ index_cache, /*!< in: index cache */
+ doc_id_t doc_id, /*!< in: doc id to add */
+ ib_rbt_t* tokens) /*!< in: document tokens */
+{
+ const ib_rbt_node_t* node;
+ ulint n_words;
+ fts_doc_stats_t* doc_stats;
+
+ if (!tokens) {
+ return;
+ }
+
+ ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
+
+ n_words = rbt_size(tokens);
+
+ for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
+
+ fts_tokenizer_word_t* word;
+ fts_node_t* fts_node = NULL;
+ fts_token_t* token = rbt_value(fts_token_t, node);
+
+ /* Find and/or add token to the cache. */
+ word = fts_tokenizer_word_get(
+ cache, index_cache, &token->text);
+
+ if (!word) {
+ ut_free(rbt_remove_node(tokens, node));
+ continue;
+ }
+
+ if (ib_vector_size(word->nodes) > 0) {
+ fts_node = static_cast<fts_node_t*>(
+ ib_vector_last(word->nodes));
+ }
+
+ if (fts_node == NULL || fts_node->synced
+ || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
+ || doc_id < fts_node->last_doc_id) {
+
+ fts_node = static_cast<fts_node_t*>(
+ ib_vector_push(word->nodes, NULL));
+
+ memset(fts_node, 0x0, sizeof(*fts_node));
+
+ cache->total_size += sizeof(*fts_node);
+ }
+
+ fts_cache_node_add_positions(
+ cache, fts_node, doc_id, token->positions);
+
+ ut_free(rbt_remove_node(tokens, node));
+ }
+
+ ut_a(rbt_empty(tokens));
+
+ /* Add to doc ids processed so far. */
+ doc_stats = static_cast<fts_doc_stats_t*>(
+ ib_vector_push(index_cache->doc_stats, NULL));
+
+ doc_stats->doc_id = doc_id;
+ doc_stats->word_count = n_words;
+
+ /* Add the doc stats memory usage too. */
+ cache->total_size += sizeof(*doc_stats);
+
+ if (doc_id > cache->sync->max_doc_id) {
+ cache->sync->max_doc_id = doc_id;
+ }
+}
+
+/****************************************************************//**
+Drops a table. If the table can't be found we return a SUCCESS code.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_drop_table(
+/*===========*/
+ trx_t* trx, /*!< in: transaction */
+ const char* table_name) /*!< in: table to drop */
+{
+ dict_table_t* table;
+ dberr_t error = DB_SUCCESS;
+
+ /* Check that the table exists in our data dictionary.
+ Similar to regular drop table case, we will open table with
+ DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
+ table = dict_table_open_on_name(
+ table_name, TRUE, FALSE,
+ static_cast<dict_err_ignore_t>(
+ DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
+
+ if (table != 0) {
+
+ dict_table_close(table, TRUE, FALSE);
+
+ /* Pass nonatomic=false (don't allow data dict unlock),
+ because the transaction may hold locks on SYS_* tables from
+ previous calls to fts_drop_table(). */
+ error = row_drop_table_for_mysql(table_name, trx,
+ SQLCOM_DROP_DB, false, false);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "Unable to drop FTS index aux table "
+ << table_name << ": " << error;
+ }
+ } else {
+ error = DB_FAIL;
+ }
+
+ return(error);
+}
+
+/****************************************************************//**
+Rename a single auxiliary table due to database name change.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_rename_one_aux_table(
+/*=====================*/
+ const char* new_name, /*!< in: new parent tbl name */
+ const char* fts_table_old_name, /*!< in: old aux tbl name */
+ trx_t* trx) /*!< in: transaction */
+{
+ char fts_table_new_name[MAX_TABLE_NAME_LEN];
+ ulint new_db_name_len = dict_get_db_name_len(new_name);
+ ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
+ ulint table_new_name_len = strlen(fts_table_old_name)
+ + new_db_name_len - old_db_name_len;
+
+ /* Check if the new and old database names are the same, if so,
+ nothing to do */
+ ut_ad((new_db_name_len != old_db_name_len)
+ || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
+
+ /* Get the database name from "new_name", and table name
+ from the fts_table_old_name */
+ strncpy(fts_table_new_name, new_name, new_db_name_len);
+ strncpy(fts_table_new_name + new_db_name_len,
+ strchr(fts_table_old_name, '/'),
+ table_new_name_len - new_db_name_len);
+ fts_table_new_name[table_new_name_len] = 0;
+
+ return row_rename_table_for_mysql(
+ fts_table_old_name, fts_table_new_name, trx, false, false);
+}
+
+/****************************************************************//**
+Rename auxiliary tables for all fts index for a table. This(rename)
+is due to database name change
+@return DB_SUCCESS or error code */
+dberr_t
+fts_rename_aux_tables(
+/*==================*/
+ dict_table_t* table, /*!< in: user Table */
+ const char* new_name, /*!< in: new table name */
+ trx_t* trx) /*!< in: transaction */
+{
+ ulint i;
+ fts_table_t fts_table;
+
+ FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
+
+ dberr_t err = DB_SUCCESS;
+ char old_table_name[MAX_FULL_NAME_LEN];
+
+ /* Rename common auxiliary tables */
+ for (i = 0; fts_common_tables[i] != NULL; ++i) {
+ fts_table.suffix = fts_common_tables[i];
+ fts_get_table_name(&fts_table, old_table_name, true);
+
+ err = fts_rename_one_aux_table(new_name, old_table_name, trx);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ fts_t* fts = table->fts;
+
+ /* Rename index specific auxiliary tables */
+ for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
+ ++i) {
+ dict_index_t* index;
+
+ index = static_cast<dict_index_t*>(
+ ib_vector_getp(fts->indexes, i));
+
+ FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
+
+ for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
+ fts_table.suffix = fts_get_suffix(j);
+ fts_get_table_name(&fts_table, old_table_name, true);
+
+ err = fts_rename_one_aux_table(
+ new_name, old_table_name, trx);
+
+ DBUG_EXECUTE_IF("fts_rename_failure",
+ err = DB_DEADLOCK;
+ fts_sql_rollback(trx););
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/** Drops the common ancillary tables needed for supporting an FTS index
+on the given table. row_mysql_lock_data_dictionary must have been called
+before this.
+@param[in] trx transaction to drop fts common table
+@param[in] fts_table table with an FTS index
+@param[in] drop_orphan True if the function is used to drop
+ orphaned table
+@return DB_SUCCESS or error code */
+static dberr_t
+fts_drop_common_tables(
+ trx_t* trx,
+ fts_table_t* fts_table,
+ bool drop_orphan=false)
+{
+ ulint i;
+ dberr_t error = DB_SUCCESS;
+
+ for (i = 0; fts_common_tables[i] != NULL; ++i) {
+ dberr_t err;
+ char table_name[MAX_FULL_NAME_LEN];
+
+ fts_table->suffix = fts_common_tables[i];
+ fts_get_table_name(fts_table, table_name, true);
+
+ err = fts_drop_table(trx, table_name);
+
+ /* We only return the status of the last error. */
+ if (err != DB_SUCCESS && err != DB_FAIL) {
+ error = err;
+ }
+
+ if (drop_orphan && err == DB_FAIL) {
+ char* path = fil_make_filepath(
+ NULL, table_name, IBD, false);
+ if (path != NULL) {
+ os_file_delete_if_exists(
+ innodb_data_file_key, path, NULL);
+ ut_free(path);
+ }
+ }
+ }
+
+ return(error);
+}
+
+/****************************************************************//**
+Since we do a horizontal split on the index table, we need to drop
+all the split tables.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+fts_drop_index_split_tables(
+/*========================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index) /*!< in: fts instance */
+
+{
+ ulint i;
+ fts_table_t fts_table;
+ dberr_t error = DB_SUCCESS;
+
+ FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
+
+ for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
+ dberr_t err;
+ char table_name[MAX_FULL_NAME_LEN];
+
+ fts_table.suffix = fts_get_suffix(i);
+ fts_get_table_name(&fts_table, table_name, true);
+
+ err = fts_drop_table(trx, table_name);
+
+ /* We only return the status of the last error. */
+ if (err != DB_SUCCESS && err != DB_FAIL) {
+ error = err;
+ }
+ }
+
+ return(error);
+}
+
+/****************************************************************//**
+Drops FTS auxiliary tables for an FTS index
+@return DB_SUCCESS or error code */
+dberr_t
+fts_drop_index_tables(
+/*==================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index) /*!< in: Index to drop */
+{
+ return(fts_drop_index_split_tables(trx, index));
+}
+
+/****************************************************************//**
+Drops FTS ancillary tables needed for supporting an FTS index
+on the given table. row_mysql_lock_data_dictionary must have been called
+before this.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_drop_all_index_tables(
+/*======================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_t* fts) /*!< in: fts instance */
+{
+ dberr_t error = DB_SUCCESS;
+
+ for (ulint i = 0;
+ fts->indexes != 0 && i < ib_vector_size(fts->indexes);
+ ++i) {
+
+ dberr_t err;
+ dict_index_t* index;
+
+ index = static_cast<dict_index_t*>(
+ ib_vector_getp(fts->indexes, i));
+
+ err = fts_drop_index_tables(trx, index);
+
+ if (err != DB_SUCCESS) {
+ error = err;
+ }
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Drops the ancillary tables needed for supporting an FTS index on a
+given table. row_mysql_lock_data_dictionary must have been called before
+this.
+@return DB_SUCCESS or error code */
+dberr_t
+fts_drop_tables(
+/*============*/
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table) /*!< in: table has the FTS index */
+{
+ dberr_t error;
+ fts_table_t fts_table;
+
+ FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
+
+ /* TODO: This is not atomic and can cause problems during recovery. */
+
+ error = fts_drop_common_tables(trx, &fts_table);
+
+ if (error == DB_SUCCESS && table->fts) {
+ error = fts_drop_all_index_tables(trx, table->fts);
+ }
+
+ return(error);
+}
+
+/** Create dict_table_t object for FTS Aux tables.
+@param[in] aux_table_name FTS Aux table name
+@param[in] table table object of FTS Index
+@param[in] n_cols number of columns for FTS Aux table
+@return table object for FTS Aux table */
+static
+dict_table_t*
+fts_create_in_mem_aux_table(
+ const char* aux_table_name,
+ const dict_table_t* table,
+ ulint n_cols)
+{
+ dict_table_t* new_table = dict_mem_table_create(
+ aux_table_name, NULL, n_cols, 0, table->flags,
+ table->space_id == TRX_SYS_SPACE
+ ? 0 : table->space_id == SRV_TMP_SPACE_ID
+ ? DICT_TF2_TEMPORARY : DICT_TF2_USE_FILE_PER_TABLE);
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ ut_ad(table->data_dir_path != NULL);
+ new_table->data_dir_path = mem_heap_strdup(
+ new_table->heap, table->data_dir_path);
+ }
+
+ return(new_table);
+}
+
+/** Function to create on FTS common table.
+@param[in,out] trx InnoDB transaction
+@param[in] table Table that has FTS Index
+@param[in] fts_table_name FTS AUX table name
+@param[in] fts_suffix FTS AUX table suffix
+@param[in,out] heap temporary memory heap
+@return table object if created, else NULL */
+static
+dict_table_t*
+fts_create_one_common_table(
+ trx_t* trx,
+ const dict_table_t* table,
+ const char* fts_table_name,
+ const char* fts_suffix,
+ mem_heap_t* heap)
+{
+ dict_table_t* new_table;
+ dberr_t error;
+ bool is_config = strcmp(fts_suffix, "CONFIG") == 0;
+
+ if (!is_config) {
+
+ new_table = fts_create_in_mem_aux_table(
+ fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
+
+ dict_mem_table_add_col(
+ new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
+ FTS_DELETED_TABLE_COL_LEN);
+ } else {
+ /* Config table has different schema. */
+ new_table = fts_create_in_mem_aux_table(
+ fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
+
+ dict_mem_table_add_col(
+ new_table, heap, "key", DATA_VARCHAR, 0,
+ FTS_CONFIG_TABLE_KEY_COL_LEN);
+
+ dict_mem_table_add_col(
+ new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
+ FTS_CONFIG_TABLE_VALUE_COL_LEN);
+ }
+
+ dict_table_add_system_columns(new_table, heap);
+ error = row_create_table_for_mysql(new_table, trx,
+ FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
+ if (error == DB_SUCCESS) {
+
+ dict_index_t* index = dict_mem_index_create(
+ new_table, "FTS_COMMON_TABLE_IND",
+ DICT_UNIQUE|DICT_CLUSTERED, 1);
+
+ if (!is_config) {
+ dict_mem_index_add_field(index, "doc_id", 0);
+ } else {
+ dict_mem_index_add_field(index, "key", 0);
+ }
+
+ /* We save and restore trx->dict_operation because
+ row_create_index_for_mysql() changes the operation to
+ TRX_DICT_OP_TABLE. */
+ trx_dict_op_t op = trx_get_dict_operation(trx);
+
+ error = row_create_index_for_mysql(index, trx, NULL);
+
+ trx->dict_operation = op;
+ } else {
+err_exit:
+ new_table = NULL;
+ ib::warn() << "Failed to create FTS common table "
+ << fts_table_name;
+ trx->error_state = error;
+ return NULL;
+ }
+
+ if (error != DB_SUCCESS) {
+ dict_mem_table_free(new_table);
+ trx->error_state = DB_SUCCESS;
+ row_drop_table_for_mysql(fts_table_name, trx, SQLCOM_DROP_DB);
+ goto err_exit;
+ }
+
+ return(new_table);
+}
+
+/** Creates the common auxiliary tables needed for supporting an FTS index
+on the given table. row_mysql_lock_data_dictionary must have been called
+before this.
+The following tables are created.
+CREATE TABLE $FTS_PREFIX_DELETED
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_DELETED_CACHE
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_CONFIG
+ (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
+@param[in,out] trx transaction
+@param[in,out] table table with FTS index
+@param[in] skip_doc_id_index Skip index on doc id
+@return DB_SUCCESS if succeed */
+dberr_t
+fts_create_common_tables(
+ trx_t* trx,
+ dict_table_t* table,
+ bool skip_doc_id_index)
+{
+ dberr_t error;
+ que_t* graph;
+ fts_table_t fts_table;
+ mem_heap_t* heap = mem_heap_create(1024);
+ pars_info_t* info;
+ char fts_name[MAX_FULL_NAME_LEN];
+ char full_name[sizeof(fts_common_tables) / sizeof(char*)]
+ [MAX_FULL_NAME_LEN];
+
+ dict_index_t* index = NULL;
+ trx_dict_op_t op;
+ /* common_tables vector is used for dropping FTS common tables
+ on error condition. */
+ std::vector<dict_table_t*> common_tables;
+ std::vector<dict_table_t*>::const_iterator it;
+
+ FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
+
+ op = trx_get_dict_operation(trx);
+
+ error = fts_drop_common_tables(trx, &fts_table);
+
+ if (error != DB_SUCCESS) {
+
+ goto func_exit;
+ }
+
+ /* Create the FTS tables that are common to an FTS index. */
+ for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
+
+ fts_table.suffix = fts_common_tables[i];
+ fts_get_table_name(&fts_table, full_name[i], true);
+ dict_table_t* common_table = fts_create_one_common_table(
+ trx, table, full_name[i], fts_table.suffix, heap);
+
+ if (!common_table) {
+ trx->error_state = DB_SUCCESS;
+ error = DB_ERROR;
+ goto func_exit;
+ } else {
+ common_tables.push_back(common_table);
+ }
+
+ mem_heap_empty(heap);
+
+ DBUG_EXECUTE_IF("ib_fts_aux_table_error",
+ /* Return error after creating FTS_AUX_CONFIG table. */
+ if (i == 4) {
+ error = DB_ERROR;
+ goto func_exit;
+ }
+ );
+
+ }
+
+ /* Write the default settings to the config table. */
+ info = pars_info_create();
+
+ fts_table.suffix = "CONFIG";
+ fts_get_table_name(&fts_table, fts_name, true);
+ pars_info_bind_id(info, true, "config_table", fts_name);
+
+ graph = fts_parse_sql_no_dict_lock(
+ info, fts_config_table_insert_values_sql);
+
+ error = fts_eval_sql(trx, graph);
+
+ que_graph_free(graph);
+
+ if (error != DB_SUCCESS || skip_doc_id_index) {
+
+ goto func_exit;
+ }
+
+ index = dict_mem_index_create(table, FTS_DOC_ID_INDEX_NAME,
+ DICT_UNIQUE, 1);
+ dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
+
+ op = trx_get_dict_operation(trx);
+
+ error = row_create_index_for_mysql(index, trx, NULL);
+
+func_exit:
+ if (error != DB_SUCCESS) {
+ for (it = common_tables.begin(); it != common_tables.end();
+ ++it) {
+ row_drop_table_for_mysql((*it)->name.m_name, trx,
+ SQLCOM_DROP_DB);
+ }
+ }
+
+ trx->dict_operation = op;
+
+ common_tables.clear();
+ mem_heap_free(heap);
+
+ return(error);
+}
+
+/** Create one FTS auxiliary index table for an FTS index.
+@param[in,out] trx transaction
+@param[in] index the index instance
+@param[in] fts_table fts_table structure
+@param[in,out] heap temporary memory heap
+@see row_merge_create_fts_sort_index()
+@return DB_SUCCESS or error code */
+static
+dict_table_t*
+fts_create_one_index_table(
+ trx_t* trx,
+ const dict_index_t* index,
+ const fts_table_t* fts_table,
+ mem_heap_t* heap)
+{
+ dict_field_t* field;
+ dict_table_t* new_table;
+ char table_name[MAX_FULL_NAME_LEN];
+ dberr_t error;
+ CHARSET_INFO* charset;
+
+ ut_ad(index->type & DICT_FTS);
+
+ fts_get_table_name(fts_table, table_name, true);
+
+ new_table = fts_create_in_mem_aux_table(
+ table_name, fts_table->table,
+ FTS_AUX_INDEX_TABLE_NUM_COLS);
+
+ field = dict_index_get_nth_field(index, 0);
+ charset = fts_get_charset(field->col->prtype);
+
+ dict_mem_table_add_col(new_table, heap, "word",
+ charset == &my_charset_latin1
+ ? DATA_VARCHAR : DATA_VARMYSQL,
+ field->col->prtype,
+ FTS_MAX_WORD_LEN_IN_CHAR
+ * unsigned(field->col->mbmaxlen));
+
+ dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED,
+ FTS_INDEX_FIRST_DOC_ID_LEN);
+
+ dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED,
+ FTS_INDEX_LAST_DOC_ID_LEN);
+
+ dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
+ DATA_NOT_NULL | DATA_UNSIGNED,
+ FTS_INDEX_DOC_COUNT_LEN);
+
+ /* The precise type calculation is as follows:
+ least signficiant byte: MySQL type code (not applicable for sys cols)
+ second least : DATA_NOT_NULL | DATA_BINARY_TYPE
+ third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */
+
+ dict_mem_table_add_col(
+ new_table, heap, "ilist", DATA_BLOB,
+ (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
+ FTS_INDEX_ILIST_LEN);
+
+ dict_table_add_system_columns(new_table, heap);
+ error = row_create_table_for_mysql(new_table, trx,
+ FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
+
+ if (error == DB_SUCCESS) {
+ dict_index_t* index = dict_mem_index_create(
+ new_table, "FTS_INDEX_TABLE_IND",
+ DICT_UNIQUE|DICT_CLUSTERED, 2);
+ dict_mem_index_add_field(index, "word", 0);
+ dict_mem_index_add_field(index, "first_doc_id", 0);
+
+ trx_dict_op_t op = trx_get_dict_operation(trx);
+
+ error = row_create_index_for_mysql(index, trx, NULL);
+
+ trx->dict_operation = op;
+ } else {
+err_exit:
+ new_table = NULL;
+ ib::warn() << "Failed to create FTS index table "
+ << table_name;
+ trx->error_state = error;
+ return NULL;
+ }
+
+ if (error != DB_SUCCESS) {
+ dict_mem_table_free(new_table);
+ trx->error_state = DB_SUCCESS;
+ row_drop_table_for_mysql(table_name, trx, SQLCOM_DROP_DB);
+ goto err_exit;
+ }
+
+ return(new_table);
+}
+
+/** Creates the column specific ancillary tables needed for supporting an
+FTS index on the given table. row_mysql_lock_data_dictionary must have
+been called before this.
+
+All FTS AUX Index tables have the following schema.
+CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
+ word VARCHAR(FTS_MAX_WORD_LEN),
+ first_doc_id INT NOT NULL,
+ last_doc_id UNSIGNED NOT NULL,
+ doc_count UNSIGNED INT NOT NULL,
+ ilist VARBINARY NOT NULL,
+ UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
+@param[in,out] trx dictionary transaction
+@param[in] index fulltext index
+@param[in] id table id
+@return DB_SUCCESS or error code */
+dberr_t
+fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
+{
+ ulint i;
+ fts_table_t fts_table;
+ dberr_t error = DB_SUCCESS;
+ mem_heap_t* heap = mem_heap_create(1024);
+
+ fts_table.type = FTS_INDEX_TABLE;
+ fts_table.index_id = index->id;
+ fts_table.table_id = id;
+ fts_table.table = index->table;
+
+ /* aux_idx_tables vector is used for dropping FTS AUX INDEX
+ tables on error condition. */
+ std::vector<dict_table_t*> aux_idx_tables;
+ std::vector<dict_table_t*>::const_iterator it;
+
+ for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
+ dict_table_t* new_table;
+
+ /* Create the FTS auxiliary tables that are specific
+ to an FTS index. We need to preserve the table_id %s
+ which fts_parse_sql_no_dict_lock() will fill in for us. */
+ fts_table.suffix = fts_get_suffix(i);
+
+ new_table = fts_create_one_index_table(
+ trx, index, &fts_table, heap);
+
+ if (new_table == NULL) {
+ error = DB_FAIL;
+ break;
+ } else {
+ aux_idx_tables.push_back(new_table);
+ }
+
+ mem_heap_empty(heap);
+
+ DBUG_EXECUTE_IF("ib_fts_index_table_error",
+ /* Return error after creating FTS_INDEX_5
+ aux table. */
+ if (i == 4) {
+ error = DB_FAIL;
+ break;
+ }
+ );
+ }
+
+ if (error != DB_SUCCESS) {
+
+ for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
+ ++it) {
+ row_drop_table_for_mysql((*it)->name.m_name, trx,
+ SQLCOM_DROP_DB);
+ }
+ }
+
+ aux_idx_tables.clear();
+ mem_heap_free(heap);
+
+ return(error);
+}
+
+/******************************************************************//**
+Calculate the new state of a row given the existing state and a new event.
+@return new state of row */
+static
+fts_row_state
+fts_trx_row_get_new_state(
+/*======================*/
+ fts_row_state old_state, /*!< in: existing state of row */
+ fts_row_state event) /*!< in: new event */
+{
+ /* The rules for transforming states:
+
+ I = inserted
+ M = modified
+ D = deleted
+ N = nothing
+
+ M+D -> D:
+
+ If the row existed before the transaction started and it is modified
+ during the transaction, followed by a deletion of the row, only the
+ deletion will be signaled.
+
+ M+ -> M:
+
+ If the row existed before the transaction started and it is modified
+ more than once during the transaction, only the last modification
+ will be signaled.
+
+ IM*D -> N:
+
+ If a new row is added during the transaction (and possibly modified
+ after its initial insertion) but it is deleted before the end of the
+ transaction, nothing will be signaled.
+
+ IM* -> I:
+
+ If a new row is added during the transaction and modified after its
+ initial insertion, only the addition will be signaled.
+
+ M*DI -> M:
+
+ If the row existed before the transaction started and it is deleted,
+ then re-inserted, only a modification will be signaled. Note that
+ this case is only possible if the table is using the row's primary
+ key for FTS row ids, since those can be re-inserted by the user,
+ which is not true for InnoDB generated row ids.
+
+ It is easily seen that the above rules decompose such that we do not
+ need to store the row's entire history of events. Instead, we can
+ store just one state for the row and update that when new events
+ arrive. Then we can implement the above rules as a two-dimensional
+ look-up table, and get checking of invalid combinations "for free"
+ in the process. */
+
+ /* The lookup table for transforming states. old_state is the
+ Y-axis, event is the X-axis. */
+ static const fts_row_state table[4][4] = {
+ /* I M D N */
+ /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID },
+ /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID },
+ /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID },
+ /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
+ };
+
+ fts_row_state result;
+
+ ut_a(old_state < FTS_INVALID);
+ ut_a(event < FTS_INVALID);
+
+ result = table[(int) old_state][(int) event];
+ ut_a(result != FTS_INVALID);
+
+ return(result);
+}
+
+/******************************************************************//**
+Create a savepoint instance.
+@return savepoint instance */
+static
+fts_savepoint_t*
+fts_savepoint_create(
+/*=================*/
+ ib_vector_t* savepoints, /*!< out: InnoDB transaction */
+ const char* name, /*!< in: savepoint name */
+ mem_heap_t* heap) /*!< in: heap */
+{
+ fts_savepoint_t* savepoint;
+
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_push(savepoints, NULL));
+
+ memset(savepoint, 0x0, sizeof(*savepoint));
+
+ if (name) {
+ savepoint->name = mem_heap_strdup(heap, name);
+ }
+
+ savepoint->tables = rbt_create(
+ sizeof(fts_trx_table_t*), fts_trx_table_cmp);
+
+ return(savepoint);
+}
+
+/******************************************************************//**
+Create an FTS trx.
+@return FTS trx */
+fts_trx_t*
+fts_trx_create(
+/*===========*/
+ trx_t* trx) /*!< in/out: InnoDB
+ transaction */
+{
+ fts_trx_t* ftt;
+ ib_alloc_t* heap_alloc;
+ mem_heap_t* heap = mem_heap_create(1024);
+ trx_named_savept_t* savep;
+
+ ut_a(trx->fts_trx == NULL);
+
+ ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
+ ftt->trx = trx;
+ ftt->heap = heap;
+
+ heap_alloc = ib_heap_allocator_create(heap);
+
+ ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
+ heap_alloc, sizeof(fts_savepoint_t), 4));
+
+ ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
+ heap_alloc, sizeof(fts_savepoint_t), 4));
+
+ /* Default instance has no name and no heap. */
+ fts_savepoint_create(ftt->savepoints, NULL, NULL);
+ fts_savepoint_create(ftt->last_stmt, NULL, NULL);
+
+ /* Copy savepoints that already set before. */
+ for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
+ savep != NULL;
+ savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
+
+ fts_savepoint_take(ftt, savep->name);
+ }
+
+ return(ftt);
+}
+
+/******************************************************************//**
+Create an FTS trx table.
+@return FTS trx table */
+static
+fts_trx_table_t*
+fts_trx_table_create(
+/*=================*/
+ fts_trx_t* fts_trx, /*!< in: FTS trx */
+ dict_table_t* table) /*!< in: table */
+{
+ fts_trx_table_t* ftt;
+
+ ftt = static_cast<fts_trx_table_t*>(
+ mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
+
+ memset(ftt, 0x0, sizeof(*ftt));
+
+ ftt->table = table;
+ ftt->fts_trx = fts_trx;
+
+ ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
+
+ return(ftt);
+}
+
+/******************************************************************//**
+Clone an FTS trx table.
+@return FTS trx table */
+static
+fts_trx_table_t*
+fts_trx_table_clone(
+/*=================*/
+ const fts_trx_table_t* ftt_src) /*!< in: FTS trx */
+{
+ fts_trx_table_t* ftt;
+
+ ftt = static_cast<fts_trx_table_t*>(
+ mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
+
+ memset(ftt, 0x0, sizeof(*ftt));
+
+ ftt->table = ftt_src->table;
+ ftt->fts_trx = ftt_src->fts_trx;
+
+ ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
+
+ /* Copy the rb tree values to the new savepoint. */
+ rbt_merge_uniq(ftt->rows, ftt_src->rows);
+
+ /* These are only added on commit. At this stage we only have
+ the updated row state. */
+ ut_a(ftt_src->added_doc_ids == NULL);
+
+ return(ftt);
+}
+
+/******************************************************************//**
+Initialize the FTS trx instance.
+@return FTS trx instance */
+static
+fts_trx_table_t*
+fts_trx_init(
+/*=========*/
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table, /*!< in: FTS table instance */
+ ib_vector_t* savepoints) /*!< in: Savepoints */
+{
+ fts_trx_table_t* ftt;
+ ib_rbt_bound_t parent;
+ ib_rbt_t* tables;
+ fts_savepoint_t* savepoint;
+
+ savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
+
+ tables = savepoint->tables;
+ rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
+
+ if (parent.result == 0) {
+ fts_trx_table_t** fttp;
+
+ fttp = rbt_value(fts_trx_table_t*, parent.last);
+ ftt = *fttp;
+ } else {
+ ftt = fts_trx_table_create(trx->fts_trx, table);
+ rbt_add_node(tables, &parent, &ftt);
+ }
+
+ ut_a(ftt->table == table);
+
+ return(ftt);
+}
+
+/******************************************************************//**
+Notify the FTS system about an operation on an FTS-indexed table. */
+static
+void
+fts_trx_table_add_op(
+/*=================*/
+ fts_trx_table_t*ftt, /*!< in: FTS trx table */
+ doc_id_t doc_id, /*!< in: doc id */
+ fts_row_state state, /*!< in: state of the row */
+ ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */
+{
+ ib_rbt_t* rows;
+ ib_rbt_bound_t parent;
+
+ rows = ftt->rows;
+ rbt_search(rows, &parent, &doc_id);
+
+ /* Row id found, update state, and if new state is FTS_NOTHING,
+ we delete the row from our tree. */
+ if (parent.result == 0) {
+ fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last);
+
+ row->state = fts_trx_row_get_new_state(row->state, state);
+
+ if (row->state == FTS_NOTHING) {
+ if (row->fts_indexes) {
+ ib_vector_free(row->fts_indexes);
+ }
+
+ ut_free(rbt_remove_node(rows, parent.last));
+ row = NULL;
+ } else if (row->fts_indexes != NULL) {
+ ib_vector_free(row->fts_indexes);
+ row->fts_indexes = fts_indexes;
+ }
+
+ } else { /* Row-id not found, create a new one. */
+ fts_trx_row_t row;
+
+ row.doc_id = doc_id;
+ row.state = state;
+ row.fts_indexes = fts_indexes;
+
+ rbt_add_node(rows, &parent, &row);
+ }
+}
+
+/******************************************************************//**
+Notify the FTS system about an operation on an FTS-indexed table. */
+void
+fts_trx_add_op(
+/*===========*/
+ trx_t* trx, /*!< in: InnoDB transaction */
+ dict_table_t* table, /*!< in: table */
+ doc_id_t doc_id, /*!< in: new doc id */
+ fts_row_state state, /*!< in: state of the row */
+ ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
+ (NULL=all) */
+{
+ fts_trx_table_t* tran_ftt;
+ fts_trx_table_t* stmt_ftt;
+
+ if (!trx->fts_trx) {
+ trx->fts_trx = fts_trx_create(trx);
+ }
+
+ tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
+ stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
+
+ fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
+ fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
+}
+
+/******************************************************************//**
+Fetch callback that converts a textual document id to a binary value and
+stores it in the given place.
+@return always returns NULL */
+static
+ibool
+fts_fetch_store_doc_id(
+/*===================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: doc_id_t* to store
+ doc_id in */
+{
+ int n_parsed;
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg);
+ dfield_t* dfield = que_node_get_val(node->select_list);
+ dtype_t* type = dfield_get_type(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ char buf[32];
+
+ ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
+ ut_a(len > 0 && len < sizeof(buf));
+
+ memcpy(buf, dfield_get_data(dfield), len);
+ buf[len] = '\0';
+
+ n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
+ ut_a(n_parsed == 1);
+
+ return(FALSE);
+}
+
+#ifdef FTS_CACHE_SIZE_DEBUG
+/******************************************************************//**
+Get the max cache size in bytes. If there is an error reading the
+value we simply print an error message here and return the default
+value to the caller.
+@return max cache size in bytes */
+static
+ulint
+fts_get_max_cache_size(
+/*===================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_table_t* fts_table) /*!< in: table instance */
+{
+ dberr_t error;
+ fts_string_t value;
+ ulong cache_size_in_mb;
+
+ /* Set to the default value. */
+ cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
+
+ /* We set the length of value to the max bytes it can hold. This
+ information is used by the callback that reads the value. */
+ value.f_n_char = 0;
+ value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
+ value.f_str = ut_malloc_nokey(value.f_len + 1);
+
+ error = fts_config_get_value(
+ trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
+
+ if (UNIV_LIKELY(error == DB_SUCCESS)) {
+ value.f_str[value.f_len] = 0;
+ cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
+
+ if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
+
+ ib::warn() << "FTS max cache size ("
+ << cache_size_in_mb << ") out of range."
+ " Minimum value is "
+ << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
+ << "MB and the maximum value is "
+ << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
+ << "MB, setting cache size to upper limit";
+
+ cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
+
+ } else if (cache_size_in_mb
+ < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
+
+ ib::warn() << "FTS max cache size ("
+ << cache_size_in_mb << ") out of range."
+ " Minimum value is "
+ << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
+ << "MB and the maximum value is"
+ << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
+ << "MB, setting cache size to lower limit";
+
+ cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
+ }
+ } else {
+ ib::error() << "(" << error << ") reading max"
+ " cache config value from config table "
+ << fts_table->table->name;
+ }
+
+ ut_free(value.f_str);
+
+ return(cache_size_in_mb * 1024 * 1024);
+}
+#endif
+
+/*********************************************************************//**
+Update the next and last Doc ID in the CONFIG table to be the input
+"doc_id" value (+ 1). We would do so after each FTS index build or
+table truncate */
+void
+fts_update_next_doc_id(
+/*===================*/
+ trx_t* trx, /*!< in/out: transaction */
+ const dict_table_t* table, /*!< in: table */
+ doc_id_t doc_id) /*!< in: DOC ID to set */
+{
+ table->fts->cache->synced_doc_id = doc_id;
+ table->fts->cache->next_doc_id = doc_id + 1;
+
+ table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
+
+ fts_update_sync_doc_id(
+ table, table->fts->cache->synced_doc_id, trx);
+
+}
+
+/*********************************************************************//**
+Get the next available document id.
+@return DB_SUCCESS if OK */
+dberr_t
+fts_get_next_doc_id(
+/*================*/
+ const dict_table_t* table, /*!< in: table */
+ doc_id_t* doc_id) /*!< out: new document id */
+{
+ fts_cache_t* cache = table->fts->cache;
+
+ /* If the Doc ID system has not yet been initialized, we
+ will consult the CONFIG table and user table to re-establish
+ the initial value of the Doc ID */
+ if (cache->first_doc_id == FTS_NULL_DOC_ID) {
+ fts_init_doc_id(table);
+ }
+
+ if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
+ *doc_id = FTS_NULL_DOC_ID;
+ return(DB_SUCCESS);
+ }
+
+ DEBUG_SYNC_C("get_next_FTS_DOC_ID");
+ mutex_enter(&cache->doc_id_lock);
+ *doc_id = cache->next_doc_id++;
+ mutex_exit(&cache->doc_id_lock);
+
+ return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+This function fetch the Doc ID from CONFIG table, and compare with
+the Doc ID supplied. And store the larger one to the CONFIG table.
+@return DB_SUCCESS if OK */
+static MY_ATTRIBUTE((nonnull))
+dberr_t
+fts_cmp_set_sync_doc_id(
+/*====================*/
+ const dict_table_t* table, /*!< in: table */
+ doc_id_t cmp_doc_id, /*!< in: Doc ID to compare */
+ ibool read_only, /*!< in: TRUE if read the
+ synced_doc_id only */
+ doc_id_t* doc_id) /*!< out: larger document id
+ after comparing "cmp_doc_id"
+ to the one stored in CONFIG
+ table */
+{
+ trx_t* trx;
+ pars_info_t* info;
+ dberr_t error;
+ fts_table_t fts_table;
+ que_t* graph = NULL;
+ fts_cache_t* cache = table->fts->cache;
+ char table_name[MAX_FULL_NAME_LEN];
+retry:
+ ut_a(table->fts->doc_col != ULINT_UNDEFINED);
+
+ fts_table.suffix = "CONFIG";
+ fts_table.table_id = table->id;
+ fts_table.type = FTS_COMMON_TABLE;
+ fts_table.table = table;
+
+ trx = trx_create();
+ if (srv_read_only_mode) {
+ trx_start_internal_read_only(trx);
+ } else {
+ trx_start_internal(trx);
+ }
+
+ trx->op_info = "update the next FTS document id";
+
+ info = pars_info_create();
+
+ pars_info_bind_function(
+ info, "my_func", fts_fetch_store_doc_id, doc_id);
+
+ fts_get_table_name(&fts_table, table_name);
+ pars_info_bind_id(info, true, "config_table", table_name);
+
+ graph = fts_parse_sql(
+ &fts_table, info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS SELECT value FROM $config_table"
+ " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
+ "BEGIN\n"
+ ""
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+
+ *doc_id = 0;
+
+ error = fts_eval_sql(trx, graph);
+
+ fts_que_graph_free_check_lock(&fts_table, NULL, graph);
+
+ // FIXME: We need to retry deadlock errors
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ if (read_only) {
+ /* InnoDB stores actual synced_doc_id value + 1 in
+ FTS_CONFIG table. Reduce the value by 1 while reading
+ after startup. */
+ if (*doc_id) *doc_id -= 1;
+ goto func_exit;
+ }
+
+ if (cmp_doc_id == 0 && *doc_id) {
+ cache->synced_doc_id = *doc_id - 1;
+ } else {
+ cache->synced_doc_id = ut_max(cmp_doc_id, *doc_id);
+ }
+
+ mutex_enter(&cache->doc_id_lock);
+ /* For each sync operation, we will add next_doc_id by 1,
+ so to mark a sync operation */
+ if (cache->next_doc_id < cache->synced_doc_id + 1) {
+ cache->next_doc_id = cache->synced_doc_id + 1;
+ }
+ mutex_exit(&cache->doc_id_lock);
+
+ if (cmp_doc_id > *doc_id) {
+ error = fts_update_sync_doc_id(
+ table, cache->synced_doc_id, trx);
+ }
+
+ *doc_id = cache->next_doc_id;
+
+func_exit:
+
+ if (UNIV_LIKELY(error == DB_SUCCESS)) {
+ fts_sql_commit(trx);
+ } else {
+ *doc_id = 0;
+
+ ib::error() << "(" << error << ") while getting next doc id "
+ "for table " << table->name;
+ fts_sql_rollback(trx);
+
+ if (error == DB_DEADLOCK) {
+ os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
+ goto retry;
+ }
+ }
+
+ trx->free();
+
+ return(error);
+}
+
+/*********************************************************************//**
+Update the last document id. This function could create a new
+transaction to update the last document id.
+@return DB_SUCCESS if OK */
+static
+dberr_t
+fts_update_sync_doc_id(
+/*===================*/
+ const dict_table_t* table, /*!< in: table */
+ doc_id_t doc_id, /*!< in: last document id */
+ trx_t* trx) /*!< in: update trx, or NULL */
+{
+ byte id[FTS_MAX_ID_LEN];
+ pars_info_t* info;
+ fts_table_t fts_table;
+ ulint id_len;
+ que_t* graph = NULL;
+ dberr_t error;
+ ibool local_trx = FALSE;
+ fts_cache_t* cache = table->fts->cache;
+ char fts_name[MAX_FULL_NAME_LEN];
+
+ if (srv_read_only_mode) {
+ return DB_READ_ONLY;
+ }
+
+ fts_table.suffix = "CONFIG";
+ fts_table.table_id = table->id;
+ fts_table.type = FTS_COMMON_TABLE;
+ fts_table.table = table;
+
+ if (!trx) {
+ trx = trx_create();
+ trx_start_internal(trx);
+
+ trx->op_info = "setting last FTS document id";
+ local_trx = TRUE;
+ }
+
+ info = pars_info_create();
+
+ id_len = (ulint) snprintf(
+ (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
+
+ pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
+
+ fts_get_table_name(&fts_table, fts_name,
+ table->fts->dict_locked);
+ pars_info_bind_id(info, true, "table_name", fts_name);
+
+ graph = fts_parse_sql(
+ &fts_table, info,
+ "BEGIN"
+ " UPDATE $table_name SET value = :doc_id"
+ " WHERE key = 'synced_doc_id';");
+
+ error = fts_eval_sql(trx, graph);
+
+ fts_que_graph_free_check_lock(&fts_table, NULL, graph);
+
+ if (local_trx) {
+ if (UNIV_LIKELY(error == DB_SUCCESS)) {
+ fts_sql_commit(trx);
+ cache->synced_doc_id = doc_id;
+ } else {
+ ib::error() << "(" << error << ") while"
+ " updating last doc id for table"
+ << table->name;
+
+ fts_sql_rollback(trx);
+ }
+ trx->free();
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Create a new fts_doc_ids_t.
+@return new fts_doc_ids_t */
+fts_doc_ids_t*
+fts_doc_ids_create(void)
+/*====================*/
+{
+ fts_doc_ids_t* fts_doc_ids;
+ mem_heap_t* heap = mem_heap_create(512);
+
+ fts_doc_ids = static_cast<fts_doc_ids_t*>(
+ mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
+
+ fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
+
+ fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
+ fts_doc_ids->self_heap, sizeof(doc_id_t), 32));
+
+ return(fts_doc_ids);
+}
+
+/*********************************************************************//**
+Do commit-phase steps necessary for the insertion of a new row. */
+void
+fts_add(
+/*====*/
+ fts_trx_table_t*ftt, /*!< in: FTS trx table */
+ fts_trx_row_t* row) /*!< in: row */
+{
+ dict_table_t* table = ftt->table;
+ doc_id_t doc_id = row->doc_id;
+
+ ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
+
+ fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
+
+ mutex_enter(&table->fts->cache->deleted_lock);
+ ++table->fts->cache->added;
+ mutex_exit(&table->fts->cache->deleted_lock);
+
+ if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
+ && doc_id >= table->fts->cache->next_doc_id) {
+ table->fts->cache->next_doc_id = doc_id + 1;
+ }
+}
+
+/*********************************************************************//**
+Do commit-phase steps necessary for the deletion of a row.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_delete(
+/*=======*/
+ fts_trx_table_t*ftt, /*!< in: FTS trx table */
+ fts_trx_row_t* row) /*!< in: row */
+{
+ que_t* graph;
+ fts_table_t fts_table;
+ dberr_t error = DB_SUCCESS;
+ doc_id_t write_doc_id;
+ dict_table_t* table = ftt->table;
+ doc_id_t doc_id = row->doc_id;
+ trx_t* trx = ftt->fts_trx->trx;
+ pars_info_t* info = pars_info_create();
+ fts_cache_t* cache = table->fts->cache;
+
+ /* we do not index Documents whose Doc ID value is 0 */
+ if (doc_id == FTS_NULL_DOC_ID) {
+ ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
+ return(error);
+ }
+
+ ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
+
+ FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
+
+ /* Convert to "storage" byte order. */
+ fts_write_doc_id((byte*) &write_doc_id, doc_id);
+ fts_bind_doc_id(info, "doc_id", &write_doc_id);
+
+ /* It is possible we update a record that has not yet been sync-ed
+ into cache from last crash (delete Doc will not initialize the
+ sync). Avoid any added counter accounting until the FTS cache
+ is re-established and sync-ed */
+ if (table->fts->added_synced
+ && doc_id > cache->synced_doc_id) {
+ mutex_enter(&table->fts->cache->deleted_lock);
+
+ /* The Doc ID could belong to those left in
+ ADDED table from last crash. So need to check
+ if it is less than first_doc_id when we initialize
+ the Doc ID system after reboot */
+ if (doc_id >= table->fts->cache->first_doc_id
+ && table->fts->cache->added > 0) {
+ --table->fts->cache->added;
+ }
+
+ mutex_exit(&table->fts->cache->deleted_lock);
+
+ /* Only if the row was really deleted. */
+ ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
+ }
+
+ /* Note the deleted document for OPTIMIZE to purge. */
+ if (error == DB_SUCCESS) {
+ char table_name[MAX_FULL_NAME_LEN];
+
+ trx->op_info = "adding doc id to FTS DELETED";
+
+ info->graph_owns_us = TRUE;
+
+ fts_table.suffix = "DELETED";
+
+ fts_get_table_name(&fts_table, table_name);
+ pars_info_bind_id(info, true, "deleted", table_name);
+
+ graph = fts_parse_sql(
+ &fts_table,
+ info,
+ "BEGIN INSERT INTO $deleted VALUES (:doc_id);");
+
+ error = fts_eval_sql(trx, graph);
+
+ fts_que_graph_free(graph);
+ } else {
+ pars_info_free(info);
+ }
+
+ /* Increment the total deleted count, this is used to calculate the
+ number of documents indexed. */
+ if (error == DB_SUCCESS) {
+ mutex_enter(&table->fts->cache->deleted_lock);
+
+ ++table->fts->cache->deleted;
+
+ mutex_exit(&table->fts->cache->deleted_lock);
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Do commit-phase steps necessary for the modification of a row.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_modify(
+/*=======*/
+ fts_trx_table_t* ftt, /*!< in: FTS trx table */
+ fts_trx_row_t* row) /*!< in: row */
+{
+ dberr_t error;
+
+ ut_a(row->state == FTS_MODIFY);
+
+ error = fts_delete(ftt, row);
+
+ if (error == DB_SUCCESS) {
+ fts_add(ftt, row);
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+The given transaction is about to be committed; do whatever is necessary
+from the FTS system's POV.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_commit_table(
+/*=============*/
+ fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
+{
+ if (srv_read_only_mode) {
+ return DB_READ_ONLY;
+ }
+
+ const ib_rbt_node_t* node;
+ ib_rbt_t* rows;
+ dberr_t error = DB_SUCCESS;
+ fts_cache_t* cache = ftt->table->fts->cache;
+ trx_t* trx = trx_create();
+
+ trx_start_internal(trx);
+
+ rows = ftt->rows;
+
+ ftt->fts_trx->trx = trx;
+
+ if (cache->get_docs == NULL) {
+ rw_lock_x_lock(&cache->init_lock);
+ if (cache->get_docs == NULL) {
+ cache->get_docs = fts_get_docs_create(cache);
+ }
+ rw_lock_x_unlock(&cache->init_lock);
+ }
+
+ for (node = rbt_first(rows);
+ node != NULL && error == DB_SUCCESS;
+ node = rbt_next(rows, node)) {
+
+ fts_trx_row_t* row = rbt_value(fts_trx_row_t, node);
+
+ switch (row->state) {
+ case FTS_INSERT:
+ fts_add(ftt, row);
+ break;
+
+ case FTS_MODIFY:
+ error = fts_modify(ftt, row);
+ break;
+
+ case FTS_DELETE:
+ error = fts_delete(ftt, row);
+ break;
+
+ default:
+ ut_error;
+ }
+ }
+
+ fts_sql_commit(trx);
+
+ trx->free();
+
+ return(error);
+}
+
+/*********************************************************************//**
+The given transaction is about to be committed; do whatever is necessary
+from the FTS system's POV.
+@return DB_SUCCESS or error code */
+dberr_t
+fts_commit(
+/*=======*/
+ trx_t* trx) /*!< in: transaction */
+{
+ const ib_rbt_node_t* node;
+ dberr_t error;
+ ib_rbt_t* tables;
+ fts_savepoint_t* savepoint;
+
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_last(trx->fts_trx->savepoints));
+ tables = savepoint->tables;
+
+ for (node = rbt_first(tables), error = DB_SUCCESS;
+ node != NULL && error == DB_SUCCESS;
+ node = rbt_next(tables, node)) {
+
+ fts_trx_table_t** ftt;
+
+ ftt = rbt_value(fts_trx_table_t*, node);
+
+ error = fts_commit_table(*ftt);
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Initialize a document. */
+void
+fts_doc_init(
+/*=========*/
+ fts_doc_t* doc) /*!< in: doc to initialize */
+{
+ mem_heap_t* heap = mem_heap_create(32);
+
+ memset(doc, 0, sizeof(*doc));
+
+ doc->self_heap = ib_heap_allocator_create(heap);
+}
+
+/*********************************************************************//**
+Free document. */
+void
+fts_doc_free(
+/*=========*/
+ fts_doc_t* doc) /*!< in: document */
+{
+ mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
+
+ if (doc->tokens) {
+ rbt_free(doc->tokens);
+ }
+
+ ut_d(memset(doc, 0, sizeof(*doc)));
+
+ mem_heap_free(heap);
+}
+
+/*********************************************************************//**
+Callback function for fetch that stores the text of an FTS document,
+converting each column to UTF-16.
+@return always FALSE */
+ibool
+fts_query_expansion_fetch_doc(
+/*==========================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: fts_doc_t* */
+{
+ que_node_t* exp;
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg);
+ dfield_t* dfield;
+ ulint len;
+ ulint doc_len;
+ fts_doc_t doc;
+ CHARSET_INFO* doc_charset = NULL;
+ ulint field_no = 0;
+
+ len = 0;
+
+ fts_doc_init(&doc);
+ doc.found = TRUE;
+
+ exp = node->select_list;
+ doc_len = 0;
+
+ doc_charset = result_doc->charset;
+
+ /* Copy each indexed column content into doc->text.f_str */
+ while (exp) {
+ dfield = que_node_get_val(exp);
+ len = dfield_get_len(dfield);
+
+ /* NULL column */
+ if (len == UNIV_SQL_NULL) {
+ exp = que_node_get_next(exp);
+ continue;
+ }
+
+ if (!doc_charset) {
+ doc_charset = fts_get_charset(dfield->type.prtype);
+ }
+
+ doc.charset = doc_charset;
+
+ if (dfield_is_ext(dfield)) {
+ /* We ignore columns that are stored externally, this
+ could result in too many words to search */
+ exp = que_node_get_next(exp);
+ continue;
+ } else {
+ doc.text.f_n_char = 0;
+
+ doc.text.f_str = static_cast<byte*>(
+ dfield_get_data(dfield));
+
+ doc.text.f_len = len;
+ }
+
+ if (field_no == 0) {
+ fts_tokenize_document(&doc, result_doc,
+ result_doc->parser);
+ } else {
+ fts_tokenize_document_next(&doc, doc_len, result_doc,
+ result_doc->parser);
+ }
+
+ exp = que_node_get_next(exp);
+
+ doc_len += (exp) ? len + 1 : len;
+
+ field_no++;
+ }
+
+ ut_ad(doc_charset);
+
+ if (!result_doc->charset) {
+ result_doc->charset = doc_charset;
+ }
+
+ fts_doc_free(&doc);
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+fetch and tokenize the document. */
+static
+void
+fts_fetch_doc_from_rec(
+/*===================*/
+ fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */
+ dict_index_t* clust_index, /*!< in: cluster index */
+ btr_pcur_t* pcur, /*!< in: cursor whose position
+ has been stored */
+ rec_offs* offsets, /*!< in: offsets */
+ fts_doc_t* doc) /*!< out: fts doc to hold parsed
+ documents */
+{
+ dict_index_t* index;
+ const rec_t* clust_rec;
+ const dict_field_t* ifield;
+ ulint clust_pos;
+ ulint doc_len = 0;
+ st_mysql_ftparser* parser;
+
+ if (!get_doc) {
+ return;
+ }
+
+ index = get_doc->index_cache->index;
+ parser = get_doc->index_cache->index->parser;
+
+ clust_rec = btr_pcur_get_rec(pcur);
+ ut_ad(!page_rec_is_comp(clust_rec)
+ || rec_get_status(clust_rec) == REC_STATUS_ORDINARY);
+
+ for (ulint i = 0; i < index->n_fields; i++) {
+ ifield = dict_index_get_nth_field(index, i);
+ clust_pos = dict_col_get_clust_pos(ifield->col, clust_index);
+
+ if (!get_doc->index_cache->charset) {
+ get_doc->index_cache->charset = fts_get_charset(
+ ifield->col->prtype);
+ }
+
+ if (rec_offs_nth_extern(offsets, clust_pos)) {
+ doc->text.f_str =
+ btr_rec_copy_externally_stored_field(
+ clust_rec, offsets,
+ btr_pcur_get_block(pcur)->zip_size(),
+ clust_pos, &doc->text.f_len,
+ static_cast<mem_heap_t*>(
+ doc->self_heap->arg));
+ } else {
+ doc->text.f_str = (byte*) rec_get_nth_field(
+ clust_rec, offsets, clust_pos,
+ &doc->text.f_len);
+ }
+
+ doc->found = TRUE;
+ doc->charset = get_doc->index_cache->charset;
+
+ /* Null Field */
+ if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
+ continue;
+ }
+
+ if (!doc_len) {
+ fts_tokenize_document(doc, NULL, parser);
+ } else {
+ fts_tokenize_document_next(doc, doc_len, NULL, parser);
+ }
+
+ doc_len += doc->text.f_len + 1;
+ }
+}
+
+/** Fetch the data from tuple and tokenize the document.
+@param[in] get_doc FTS index's get_doc struct
+@param[in] tuple tuple should be arranged in table schema order
+@param[out] doc fts doc to hold parsed documents. */
+static
+void
+fts_fetch_doc_from_tuple(
+ fts_get_doc_t* get_doc,
+ const dtuple_t* tuple,
+ fts_doc_t* doc)
+{
+ dict_index_t* index;
+ st_mysql_ftparser* parser;
+ ulint doc_len = 0;
+ ulint processed_doc = 0;
+ ulint num_field;
+
+ if (get_doc == NULL) {
+ return;
+ }
+
+ index = get_doc->index_cache->index;
+ parser = get_doc->index_cache->index->parser;
+ num_field = dict_index_get_n_fields(index);
+
+ for (ulint i = 0; i < num_field; i++) {
+ const dict_field_t* ifield;
+ const dict_col_t* col;
+ ulint pos;
+
+ ifield = dict_index_get_nth_field(index, i);
+ col = dict_field_get_col(ifield);
+ pos = dict_col_get_no(col);
+ const dfield_t* field = dtuple_get_nth_field(tuple, pos);
+
+ if (!get_doc->index_cache->charset) {
+ get_doc->index_cache->charset = fts_get_charset(
+ ifield->col->prtype);
+ }
+
+ ut_ad(!dfield_is_ext(field));
+
+ doc->text.f_str = (byte*) dfield_get_data(field);
+ doc->text.f_len = dfield_get_len(field);
+ doc->found = TRUE;
+ doc->charset = get_doc->index_cache->charset;
+
+ /* field data is NULL. */
+ if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
+ continue;
+ }
+
+ if (processed_doc == 0) {
+ fts_tokenize_document(doc, NULL, parser);
+ } else {
+ fts_tokenize_document_next(doc, doc_len, NULL, parser);
+ }
+
+ processed_doc++;
+ doc_len += doc->text.f_len + 1;
+ }
+}
+
+/** Fetch the document from tuple, tokenize the text data and
+insert the text data into fts auxiliary table and
+its cache. Moreover this tuple fields doesn't contain any information
+about externally stored field. This tuple contains data directly
+converted from mysql.
+@param[in] ftt FTS transaction table
+@param[in] doc_id doc id
+@param[in] tuple tuple from where data can be retrieved
+ and tuple should be arranged in table
+ schema order. */
+void
+fts_add_doc_from_tuple(
+ fts_trx_table_t*ftt,
+ doc_id_t doc_id,
+ const dtuple_t* tuple)
+{
+ mtr_t mtr;
+ fts_cache_t* cache = ftt->table->fts->cache;
+
+ ut_ad(cache->get_docs);
+
+ if (!ftt->table->fts->added_synced) {
+ fts_init_index(ftt->table, FALSE);
+ }
+
+ mtr_start(&mtr);
+
+ ulint num_idx = ib_vector_size(cache->get_docs);
+
+ for (ulint i = 0; i < num_idx; ++i) {
+ fts_doc_t doc;
+ dict_table_t* table;
+ fts_get_doc_t* get_doc;
+
+ get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_get(cache->get_docs, i));
+ table = get_doc->index_cache->index->table;
+
+ fts_doc_init(&doc);
+ fts_fetch_doc_from_tuple(
+ get_doc, tuple, &doc);
+
+ if (doc.found) {
+ mtr_commit(&mtr);
+ rw_lock_x_lock(&table->fts->cache->lock);
+
+ if (table->fts->cache->stopword_info.status
+ & STOPWORD_NOT_INIT) {
+ fts_load_stopword(table, NULL, NULL,
+ true, true);
+ }
+
+ fts_cache_add_doc(
+ table->fts->cache,
+ get_doc->index_cache,
+ doc_id, doc.tokens);
+
+ rw_lock_x_unlock(&table->fts->cache->lock);
+
+ if (cache->total_size > fts_max_cache_size / 5
+ || fts_need_sync) {
+ fts_sync(cache->sync, true, false);
+ }
+
+ mtr_start(&mtr);
+
+ }
+
+ fts_doc_free(&doc);
+ }
+
+ mtr_commit(&mtr);
+}
+
+/*********************************************************************//**
+This function fetches the document inserted during the committing
+transaction, and tokenize the inserted text data and insert into
+FTS auxiliary table and its cache.
+@return TRUE if successful */
+static
+ulint
+fts_add_doc_by_id(
+/*==============*/
+ fts_trx_table_t*ftt, /*!< in: FTS trx table */
+ doc_id_t doc_id, /*!< in: doc id */
+ ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)))
+ /*!< in: affected fts indexes */
+{
+ mtr_t mtr;
+ mem_heap_t* heap;
+ btr_pcur_t pcur;
+ dict_table_t* table;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ fts_get_doc_t* get_doc;
+ doc_id_t temp_doc_id;
+ dict_index_t* clust_index;
+ dict_index_t* fts_id_index;
+ ibool is_id_cluster;
+ fts_cache_t* cache = ftt->table->fts->cache;
+
+ ut_ad(cache->get_docs);
+
+ /* If Doc ID has been supplied by the user, then the table
+ might not yet be sync-ed */
+
+ if (!ftt->table->fts->added_synced) {
+ fts_init_index(ftt->table, FALSE);
+ }
+
+ /* Get the first FTS index's get_doc */
+ get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_get(cache->get_docs, 0));
+ ut_ad(get_doc);
+
+ table = get_doc->index_cache->index->table;
+
+ heap = mem_heap_create(512);
+
+ clust_index = dict_table_get_first_index(table);
+ fts_id_index = table->fts_doc_id_index;
+
+ /* Check whether the index on FTS_DOC_ID is cluster index */
+ is_id_cluster = (clust_index == fts_id_index);
+
+ mtr_start(&mtr);
+ btr_pcur_init(&pcur);
+
+ /* Search based on Doc ID. Here, we'll need to consider the case
+ when there is no primary index on Doc ID */
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, 0);
+ dfield->type.mtype = DATA_INT;
+ dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
+
+ mach_write_to_8((byte*) &temp_doc_id, doc_id);
+ dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
+
+ btr_pcur_open_with_no_init(
+ fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
+ &pcur, 0, &mtr);
+
+ /* If we have a match, add the data to doc structure */
+ if (btr_pcur_get_low_match(&pcur) == 1) {
+ const rec_t* rec;
+ btr_pcur_t* doc_pcur;
+ const rec_t* clust_rec;
+ btr_pcur_t clust_pcur;
+ rec_offs* offsets = NULL;
+ ulint num_idx = ib_vector_size(cache->get_docs);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ /* Doc could be deleted */
+ if (page_rec_is_infimum(rec)
+ || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
+
+ goto func_exit;
+ }
+
+ if (is_id_cluster) {
+ clust_rec = rec;
+ doc_pcur = &pcur;
+ } else {
+ dtuple_t* clust_ref;
+ ulint n_fields;
+
+ btr_pcur_init(&clust_pcur);
+ n_fields = dict_index_get_n_unique(clust_index);
+
+ clust_ref = dtuple_create(heap, n_fields);
+ dict_index_copy_types(clust_ref, clust_index, n_fields);
+
+ row_build_row_ref_in_tuple(
+ clust_ref, rec, fts_id_index, NULL);
+
+ btr_pcur_open_with_no_init(
+ clust_index, clust_ref, PAGE_CUR_LE,
+ BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
+
+ doc_pcur = &clust_pcur;
+ clust_rec = btr_pcur_get_rec(&clust_pcur);
+
+ }
+
+ offsets = rec_get_offsets(clust_rec, clust_index, NULL,
+ clust_index->n_core_fields,
+ ULINT_UNDEFINED, &heap);
+
+ for (ulint i = 0; i < num_idx; ++i) {
+ fts_doc_t doc;
+ dict_table_t* table;
+ fts_get_doc_t* get_doc;
+
+ get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_get(cache->get_docs, i));
+
+ table = get_doc->index_cache->index->table;
+
+ fts_doc_init(&doc);
+
+ fts_fetch_doc_from_rec(
+ get_doc, clust_index, doc_pcur, offsets, &doc);
+
+ if (doc.found) {
+ ibool success MY_ATTRIBUTE((unused));
+
+ btr_pcur_store_position(doc_pcur, &mtr);
+ mtr_commit(&mtr);
+
+ rw_lock_x_lock(&table->fts->cache->lock);
+
+ if (table->fts->cache->stopword_info.status
+ & STOPWORD_NOT_INIT) {
+ fts_load_stopword(table, NULL,
+ NULL, true, true);
+ }
+
+ fts_cache_add_doc(
+ table->fts->cache,
+ get_doc->index_cache,
+ doc_id, doc.tokens);
+
+ bool need_sync = false;
+ if ((cache->total_size > fts_max_cache_size / 10
+ || fts_need_sync)
+ && !cache->sync->in_progress) {
+ need_sync = true;
+ }
+
+ rw_lock_x_unlock(&table->fts->cache->lock);
+
+ DBUG_EXECUTE_IF(
+ "fts_instrument_sync",
+ fts_optimize_request_sync_table(table);
+ os_event_wait(cache->sync->event);
+ );
+
+ DBUG_EXECUTE_IF(
+ "fts_instrument_sync_debug",
+ fts_sync(cache->sync, true, true);
+ );
+
+ DEBUG_SYNC_C("fts_instrument_sync_request");
+ DBUG_EXECUTE_IF(
+ "fts_instrument_sync_request",
+ fts_optimize_request_sync_table(table);
+ );
+
+ if (need_sync) {
+ fts_optimize_request_sync_table(table);
+ }
+
+ mtr_start(&mtr);
+
+ if (i < num_idx - 1) {
+
+ success = btr_pcur_restore_position(
+ BTR_SEARCH_LEAF, doc_pcur,
+ &mtr);
+
+ ut_ad(success);
+ }
+ }
+
+ fts_doc_free(&doc);
+ }
+
+ if (!is_id_cluster) {
+ btr_pcur_close(doc_pcur);
+ }
+ }
+func_exit:
+ mtr_commit(&mtr);
+
+ btr_pcur_close(&pcur);
+
+ mem_heap_free(heap);
+ return(TRUE);
+}
+
+
+/*********************************************************************//**
+Callback function to read a single ulint column.
+return always returns TRUE */
+static
+ibool
+fts_read_ulint(
+/*===========*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: pointer to ulint */
+{
+ sel_node_t* sel_node = static_cast<sel_node_t*>(row);
+ ulint* value = static_cast<ulint*>(user_arg);
+ que_node_t* exp = sel_node->select_list;
+ dfield_t* dfield = que_node_get_val(exp);
+ void* data = dfield_get_data(dfield);
+
+ *value = mach_read_from_4(static_cast<const byte*>(data));
+
+ return(TRUE);
+}
+
+/*********************************************************************//**
+Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
+@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
+doc_id_t
+fts_get_max_doc_id(
+/*===============*/
+ dict_table_t* table) /*!< in: user table */
+{
+ dict_index_t* index;
+ dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL;
+ doc_id_t doc_id = 0;
+ mtr_t mtr;
+ btr_pcur_t pcur;
+
+ index = table->fts_doc_id_index;
+
+ if (!index) {
+ return(0);
+ }
+
+ ut_ad(!index->is_instant());
+
+ dfield = dict_index_get_nth_field(index, 0);
+
+#if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
+ ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
+#endif
+
+ mtr_start(&mtr);
+
+ /* fetch the largest indexes value */
+ btr_pcur_open_at_index_side(
+ false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
+
+ if (!page_is_empty(btr_pcur_get_page(&pcur))) {
+ const rec_t* rec = NULL;
+
+ do {
+ rec = btr_pcur_get_rec(&pcur);
+
+ if (page_rec_is_user_rec(rec)) {
+ break;
+ }
+ } while (btr_pcur_move_to_prev(&pcur, &mtr));
+
+ if (!rec || rec_is_metadata(rec, *index)) {
+ goto func_exit;
+ }
+
+ doc_id = fts_read_doc_id(rec);
+ }
+
+func_exit:
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ return(doc_id);
+}
+
+/*********************************************************************//**
+Fetch document with the given document id.
+@return DB_SUCCESS if OK else error */
+dberr_t
+fts_doc_fetch_by_doc_id(
+/*====================*/
+ fts_get_doc_t* get_doc, /*!< in: state */
+ doc_id_t doc_id, /*!< in: id of document to
+ fetch */
+ dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
+ or NULL */
+ ulint option, /*!< in: search option, if it is
+ greater than doc_id or equal */
+ fts_sql_callback
+ callback, /*!< in: callback to read */
+ void* arg) /*!< in: callback arg */
+{
+ pars_info_t* info;
+ dberr_t error;
+ const char* select_str;
+ doc_id_t write_doc_id;
+ dict_index_t* index;
+ trx_t* trx = trx_create();
+ que_t* graph;
+
+ trx->op_info = "fetching indexed FTS document";
+
+ /* The FTS index can be supplied by caller directly with
+ "index_to_use", otherwise, get it from "get_doc" */
+ index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
+
+ if (get_doc && get_doc->get_document_graph) {
+ info = get_doc->get_document_graph->info;
+ } else {
+ info = pars_info_create();
+ }
+
+ /* Convert to "storage" byte order. */
+ fts_write_doc_id((byte*) &write_doc_id, doc_id);
+ fts_bind_doc_id(info, "doc_id", &write_doc_id);
+ pars_info_bind_function(info, "my_func", callback, arg);
+
+ select_str = fts_get_select_columns_str(index, info, info->heap);
+ pars_info_bind_id(info, TRUE, "table_name", index->table->name.m_name);
+
+ if (!get_doc || !get_doc->get_document_graph) {
+ if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
+ graph = fts_parse_sql(
+ NULL,
+ info,
+ mem_heap_printf(info->heap,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT %s FROM $table_name"
+ " WHERE %s = :doc_id;\n"
+ "BEGIN\n"
+ ""
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c %% NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;",
+ select_str, FTS_DOC_ID_COL_NAME));
+ } else {
+ ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
+
+ /* This is used for crash recovery of table with
+ hidden DOC ID or FTS indexes. We will scan the table
+ to re-processing user table rows whose DOC ID or
+ FTS indexed documents have not been sync-ed to disc
+ during recent crash.
+ In the case that all fulltext indexes are dropped
+ for a table, we will keep the "hidden" FTS_DOC_ID
+ column, and this scan is to retreive the largest
+ DOC ID being used in the table to determine the
+ appropriate next DOC ID.
+ In the case of there exists fulltext index(es), this
+ operation will re-tokenize any docs that have not
+ been sync-ed to the disk, and re-prime the FTS
+ cached */
+ graph = fts_parse_sql(
+ NULL,
+ info,
+ mem_heap_printf(info->heap,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT %s, %s FROM $table_name"
+ " WHERE %s > :doc_id;\n"
+ "BEGIN\n"
+ ""
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c %% NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;",
+ FTS_DOC_ID_COL_NAME,
+ select_str, FTS_DOC_ID_COL_NAME));
+ }
+ if (get_doc) {
+ get_doc->get_document_graph = graph;
+ }
+ } else {
+ graph = get_doc->get_document_graph;
+ }
+
+ error = fts_eval_sql(trx, graph);
+ fts_sql_commit(trx);
+ trx->free();
+
+ if (!get_doc) {
+ fts_que_graph_free(graph);
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Write out a single word's data as new entry/entries in the INDEX table.
+@return DB_SUCCESS if all OK. */
+dberr_t
+fts_write_node(
+/*===========*/
+ trx_t* trx, /*!< in: transaction */
+ que_t** graph, /*!< in: query graph */
+ fts_table_t* fts_table, /*!< in: aux table */
+ fts_string_t* word, /*!< in: word in UTF-8 */
+ fts_node_t* node) /*!< in: node columns */
+{
+ pars_info_t* info;
+ dberr_t error;
+ ib_uint32_t doc_count;
+ time_t start_time;
+ doc_id_t last_doc_id;
+ doc_id_t first_doc_id;
+ char table_name[MAX_FULL_NAME_LEN];
+
+ ut_a(node->ilist != NULL);
+
+ if (*graph) {
+ info = (*graph)->info;
+ } else {
+ info = pars_info_create();
+
+ fts_get_table_name(fts_table, table_name);
+ pars_info_bind_id(info, true, "index_table_name", table_name);
+ }
+
+ pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
+
+ /* Convert to "storage" byte order. */
+ fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
+ fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
+
+ /* Convert to "storage" byte order. */
+ fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
+ fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
+
+ ut_a(node->last_doc_id >= node->first_doc_id);
+
+ /* Convert to "storage" byte order. */
+ mach_write_to_4((byte*) &doc_count, node->doc_count);
+ pars_info_bind_int4_literal(
+ info, "doc_count", (const ib_uint32_t*) &doc_count);
+
+ /* Set copy_name to FALSE since it's a static. */
+ pars_info_bind_literal(
+ info, "ilist", node->ilist, node->ilist_size,
+ DATA_BLOB, DATA_BINARY_TYPE);
+
+ if (!*graph) {
+
+ *graph = fts_parse_sql(
+ fts_table,
+ info,
+ "BEGIN\n"
+ "INSERT INTO $index_table_name VALUES"
+ " (:token, :first_doc_id,"
+ " :last_doc_id, :doc_count, :ilist);");
+ }
+
+ start_time = time(NULL);
+ error = fts_eval_sql(trx, *graph);
+ elapsed_time += time(NULL) - start_time;
+ ++n_nodes;
+
+ return(error);
+}
+
+/*********************************************************************//**
+Add rows to the DELETED_CACHE table.
+@return DB_SUCCESS if all went well else error code*/
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_sync_add_deleted_cache(
+/*=======================*/
+ fts_sync_t* sync, /*!< in: sync state */
+ ib_vector_t* doc_ids) /*!< in: doc ids to add */
+{
+ ulint i;
+ pars_info_t* info;
+ que_t* graph;
+ fts_table_t fts_table;
+ char table_name[MAX_FULL_NAME_LEN];
+ doc_id_t dummy = 0;
+ dberr_t error = DB_SUCCESS;
+ ulint n_elems = ib_vector_size(doc_ids);
+
+ ut_a(ib_vector_size(doc_ids) > 0);
+
+ ib_vector_sort(doc_ids, fts_doc_id_cmp);
+
+ info = pars_info_create();
+
+ fts_bind_doc_id(info, "doc_id", &dummy);
+
+ FTS_INIT_FTS_TABLE(
+ &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
+
+ fts_get_table_name(&fts_table, table_name);
+ pars_info_bind_id(info, true, "table_name", table_name);
+
+ graph = fts_parse_sql(
+ &fts_table,
+ info,
+ "BEGIN INSERT INTO $table_name VALUES (:doc_id);");
+
+ for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
+ doc_id_t* update;
+ doc_id_t write_doc_id;
+
+ update = static_cast<doc_id_t*>(ib_vector_get(doc_ids, i));
+
+ /* Convert to "storage" byte order. */
+ fts_write_doc_id((byte*) &write_doc_id, *update);
+ fts_bind_doc_id(info, "doc_id", &write_doc_id);
+
+ error = fts_eval_sql(sync->trx, graph);
+ }
+
+ fts_que_graph_free(graph);
+
+ return(error);
+}
+
+/** Write the words and ilist to disk.
+@param[in,out] trx transaction
+@param[in] index_cache index cache
+@param[in] unlock_cache whether unlock cache when write node
+@return DB_SUCCESS if all went well else error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_sync_write_words(
+ trx_t* trx,
+ fts_index_cache_t* index_cache,
+ bool unlock_cache)
+{
+ fts_table_t fts_table;
+ ulint n_nodes = 0;
+ ulint n_words = 0;
+ const ib_rbt_node_t* rbt_node;
+ dberr_t error = DB_SUCCESS;
+ ibool print_error = FALSE;
+ dict_table_t* table = index_cache->index->table;
+
+ FTS_INIT_INDEX_TABLE(
+ &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
+
+ n_words = rbt_size(index_cache->words);
+
+ /* We iterate over the entire tree, even if there is an error,
+ since we want to free the memory used during caching. */
+ for (rbt_node = rbt_first(index_cache->words);
+ rbt_node;
+ rbt_node = rbt_next(index_cache->words, rbt_node)) {
+
+ ulint i;
+ ulint selected;
+ fts_tokenizer_word_t* word;
+
+ word = rbt_value(fts_tokenizer_word_t, rbt_node);
+
+ DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
+ os_thread_sleep(300000););
+
+ selected = fts_select_index(
+ index_cache->charset, word->text.f_str,
+ word->text.f_len);
+
+ fts_table.suffix = fts_get_suffix(selected);
+
+ /* We iterate over all the nodes even if there was an error */
+ for (i = 0; i < ib_vector_size(word->nodes); ++i) {
+
+ fts_node_t* fts_node = static_cast<fts_node_t*>(
+ ib_vector_get(word->nodes, i));
+
+ if (fts_node->synced) {
+ continue;
+ } else {
+ fts_node->synced = true;
+ }
+
+ /*FIXME: we need to handle the error properly. */
+ if (error == DB_SUCCESS) {
+ if (unlock_cache) {
+ rw_lock_x_unlock(
+ &table->fts->cache->lock);
+ }
+
+ error = fts_write_node(
+ trx,
+ &index_cache->ins_graph[selected],
+ &fts_table, &word->text, fts_node);
+
+ DEBUG_SYNC_C("fts_write_node");
+ DBUG_EXECUTE_IF("fts_write_node_crash",
+ DBUG_SUICIDE(););
+
+ DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
+ os_thread_sleep(1000000);
+ );
+
+ if (unlock_cache) {
+ rw_lock_x_lock(
+ &table->fts->cache->lock);
+ }
+ }
+ }
+
+ n_nodes += ib_vector_size(word->nodes);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS) && !print_error) {
+ ib::error() << "(" << error << ") writing"
+ " word node to FTS auxiliary index table "
+ << table->name;
+ print_error = TRUE;
+ }
+ }
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print)) {
+ printf("Avg number of nodes: %lf\n",
+ (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Begin Sync, create transaction, acquire locks, etc. */
+static
+void
+fts_sync_begin(
+/*===========*/
+ fts_sync_t* sync) /*!< in: sync state */
+{
+ fts_cache_t* cache = sync->table->fts->cache;
+
+ n_nodes = 0;
+ elapsed_time = 0;
+
+ sync->start_time = time(NULL);
+
+ sync->trx = trx_create();
+ trx_start_internal(sync->trx);
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print)) {
+ ib::info() << "FTS SYNC for table " << sync->table->name
+ << ", deleted count: "
+ << ib_vector_size(cache->deleted_doc_ids)
+ << " size: " << cache->total_size << " bytes";
+ }
+}
+
+/*********************************************************************//**
+Run SYNC on the table, i.e., write out data from the index specific
+cache to the FTS aux INDEX table and FTS aux doc id stats table.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_sync_index(
+/*===========*/
+ fts_sync_t* sync, /*!< in: sync state */
+ fts_index_cache_t* index_cache) /*!< in: index cache */
+{
+ trx_t* trx = sync->trx;
+
+ trx->op_info = "doing SYNC index";
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print)) {
+ ib::info() << "SYNC words: " << rbt_size(index_cache->words);
+ }
+
+ ut_ad(rbt_validate(index_cache->words));
+
+ return(fts_sync_write_words(trx, index_cache, sync->unlock_cache));
+}
+
+/** Check if index cache has been synced completely
+@param[in,out] index_cache index cache
+@return true if index is synced, otherwise false. */
+static
+bool
+fts_sync_index_check(
+ fts_index_cache_t* index_cache)
+{
+ const ib_rbt_node_t* rbt_node;
+
+ for (rbt_node = rbt_first(index_cache->words);
+ rbt_node != NULL;
+ rbt_node = rbt_next(index_cache->words, rbt_node)) {
+
+ fts_tokenizer_word_t* word;
+ word = rbt_value(fts_tokenizer_word_t, rbt_node);
+
+ fts_node_t* fts_node;
+ fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
+
+ if (!fts_node->synced) {
+ return(false);
+ }
+ }
+
+ return(true);
+}
+
+/** Reset synced flag in index cache when rollback
+@param[in,out] index_cache index cache */
+static
+void
+fts_sync_index_reset(
+ fts_index_cache_t* index_cache)
+{
+ const ib_rbt_node_t* rbt_node;
+
+ for (rbt_node = rbt_first(index_cache->words);
+ rbt_node != NULL;
+ rbt_node = rbt_next(index_cache->words, rbt_node)) {
+
+ fts_tokenizer_word_t* word;
+ word = rbt_value(fts_tokenizer_word_t, rbt_node);
+
+ fts_node_t* fts_node;
+ fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
+
+ fts_node->synced = false;
+ }
+}
+
+/** Commit the SYNC, change state of processed doc ids etc.
+@param[in,out] sync sync state
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_sync_commit(
+ fts_sync_t* sync)
+{
+ dberr_t error;
+ trx_t* trx = sync->trx;
+ fts_cache_t* cache = sync->table->fts->cache;
+ doc_id_t last_doc_id;
+
+ trx->op_info = "doing SYNC commit";
+
+ /* After each Sync, update the CONFIG table about the max doc id
+ we just sync-ed to index table */
+ error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
+ &last_doc_id);
+
+ /* Get the list of deleted documents that are either in the
+ cache or were headed there but were deleted before the add
+ thread got to them. */
+
+ if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
+
+ error = fts_sync_add_deleted_cache(
+ sync, cache->deleted_doc_ids);
+ }
+
+ /* We need to do this within the deleted lock since fts_delete() can
+ attempt to add a deleted doc id to the cache deleted id array. */
+ fts_cache_clear(cache);
+ DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
+ fts_cache_init(cache);
+ rw_lock_x_unlock(&cache->lock);
+
+ if (UNIV_LIKELY(error == DB_SUCCESS)) {
+ fts_sql_commit(trx);
+ } else {
+ fts_sql_rollback(trx);
+ ib::error() << "(" << error << ") during SYNC of "
+ "table " << sync->table->name;
+ }
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print) && elapsed_time) {
+ ib::info() << "SYNC for table " << sync->table->name
+ << ": SYNC time: "
+ << (time(NULL) - sync->start_time)
+ << " secs: elapsed "
+ << static_cast<double>(n_nodes)
+ / static_cast<double>(elapsed_time)
+ << " ins/sec";
+ }
+
+ /* Avoid assertion in trx_t::free(). */
+ trx->dict_operation_lock_mode = 0;
+ trx->free();
+
+ return(error);
+}
+
+/** Rollback a sync operation
+@param[in,out] sync sync state */
+static
+void
+fts_sync_rollback(
+ fts_sync_t* sync)
+{
+ trx_t* trx = sync->trx;
+ fts_cache_t* cache = sync->table->fts->cache;
+
+ for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
+ ulint j;
+ fts_index_cache_t* index_cache;
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(cache->indexes, i));
+
+ /* Reset synced flag so nodes will not be skipped
+ in the next sync, see fts_sync_write_words(). */
+ fts_sync_index_reset(index_cache);
+
+ for (j = 0; fts_index_selector[j].value; ++j) {
+
+ if (index_cache->ins_graph[j] != NULL) {
+
+ fts_que_graph_free_check_lock(
+ NULL, index_cache,
+ index_cache->ins_graph[j]);
+
+ index_cache->ins_graph[j] = NULL;
+ }
+
+ if (index_cache->sel_graph[j] != NULL) {
+
+ fts_que_graph_free_check_lock(
+ NULL, index_cache,
+ index_cache->sel_graph[j]);
+
+ index_cache->sel_graph[j] = NULL;
+ }
+ }
+ }
+
+ rw_lock_x_unlock(&cache->lock);
+
+ fts_sql_rollback(trx);
+
+ /* Avoid assertion in trx_t::free(). */
+ trx->dict_operation_lock_mode = 0;
+ trx->free();
+}
+
+/** Run SYNC on the table, i.e., write out data from the cache to the
+FTS auxiliary INDEX table and clear the cache at the end.
+@param[in,out] sync sync state
+@param[in] unlock_cache whether unlock cache lock when write node
+@param[in] wait whether wait when a sync is in progress
+@return DB_SUCCESS if all OK */
+static
+dberr_t
+fts_sync(
+ fts_sync_t* sync,
+ bool unlock_cache,
+ bool wait)
+{
+ if (srv_read_only_mode) {
+ return DB_READ_ONLY;
+ }
+
+ ulint i;
+ dberr_t error = DB_SUCCESS;
+ fts_cache_t* cache = sync->table->fts->cache;
+
+ rw_lock_x_lock(&cache->lock);
+
+ /* Check if cache is being synced.
+ Note: we release cache lock in fts_sync_write_words() to
+ avoid long wait for the lock by other threads. */
+ while (sync->in_progress) {
+ rw_lock_x_unlock(&cache->lock);
+
+ if (wait) {
+ os_event_wait(sync->event);
+ } else {
+ return(DB_SUCCESS);
+ }
+
+ rw_lock_x_lock(&cache->lock);
+ }
+
+ sync->unlock_cache = unlock_cache;
+ sync->in_progress = true;
+
+ DEBUG_SYNC_C("fts_sync_begin");
+ fts_sync_begin(sync);
+
+begin_sync:
+ if (cache->total_size > fts_max_cache_size) {
+ /* Avoid the case: sync never finish when
+ insert/update keeps comming. */
+ ut_ad(sync->unlock_cache);
+ sync->unlock_cache = false;
+ }
+
+ for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
+ fts_index_cache_t* index_cache;
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(cache->indexes, i));
+
+ if (index_cache->index->to_be_dropped
+ || index_cache->index->table->to_be_dropped) {
+ continue;
+ }
+
+ DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
+ os_thread_sleep(300000););
+ error = fts_sync_index(sync, index_cache);
+
+ if (error != DB_SUCCESS) {
+ goto end_sync;
+ }
+ }
+
+ DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
+ sync->interrupted = true;
+ error = DB_INTERRUPTED;
+ goto end_sync;
+ );
+
+ /* Make sure all the caches are synced. */
+ for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
+ fts_index_cache_t* index_cache;
+
+ index_cache = static_cast<fts_index_cache_t*>(
+ ib_vector_get(cache->indexes, i));
+
+ if (index_cache->index->to_be_dropped
+ || index_cache->index->table->to_be_dropped
+ || fts_sync_index_check(index_cache)) {
+ continue;
+ }
+
+ goto begin_sync;
+ }
+
+end_sync:
+ if (error == DB_SUCCESS && !sync->interrupted) {
+ error = fts_sync_commit(sync);
+ } else {
+ fts_sync_rollback(sync);
+ }
+
+ rw_lock_x_lock(&cache->lock);
+
+ sync->interrupted = false;
+ sync->in_progress = false;
+ os_event_set(sync->event);
+ rw_lock_x_unlock(&cache->lock);
+
+ /* We need to check whether an optimize is required, for that
+ we make copies of the two variables that control the trigger. These
+ variables can change behind our back and we don't want to hold the
+ lock for longer than is needed. */
+ mutex_enter(&cache->deleted_lock);
+
+ cache->added = 0;
+ cache->deleted = 0;
+
+ mutex_exit(&cache->deleted_lock);
+
+ return(error);
+}
+
+/** Run SYNC on the table, i.e., write out data from the cache to the
+FTS auxiliary INDEX table and clear the cache at the end.
+@param[in,out] table fts table
+@param[in] wait whether wait for existing sync to finish
+@return DB_SUCCESS on success, error code on failure. */
+dberr_t fts_sync_table(dict_table_t* table, bool wait)
+{
+ dberr_t err = DB_SUCCESS;
+
+ ut_ad(table->fts);
+
+ if (table->space && table->fts->cache
+ && !dict_table_is_corrupted(table)) {
+ err = fts_sync(table->fts->cache->sync, !wait, wait);
+ }
+
+ return(err);
+}
+
+/** Check if a fts token is a stopword or less than fts_min_token_size
+or greater than fts_max_token_size.
+@param[in] token token string
+@param[in] stopwords stopwords rb tree
+@param[in] cs token charset
+@retval true if it is not stopword and length in range
+@retval false if it is stopword or lenght not in range */
+bool
+fts_check_token(
+ const fts_string_t* token,
+ const ib_rbt_t* stopwords,
+ const CHARSET_INFO* cs)
+{
+ ut_ad(cs != NULL || stopwords == NULL);
+
+ ib_rbt_bound_t parent;
+
+ return(token->f_n_char >= fts_min_token_size
+ && token->f_n_char <= fts_max_token_size
+ && (stopwords == NULL
+ || rbt_search(stopwords, &parent, token) != 0));
+}
+
+/** Add the token and its start position to the token's list of positions.
+@param[in,out] result_doc result doc rb tree
+@param[in] str token string
+@param[in] position token position */
+static
+void
+fts_add_token(
+ fts_doc_t* result_doc,
+ fts_string_t str,
+ ulint position)
+{
+ /* Ignore string whose character number is less than
+ "fts_min_token_size" or more than "fts_max_token_size" */
+
+ if (fts_check_token(&str, NULL, result_doc->charset)) {
+
+ mem_heap_t* heap;
+ fts_string_t t_str;
+ fts_token_t* token;
+ ib_rbt_bound_t parent;
+ ulint newlen;
+
+ heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
+
+ t_str.f_n_char = str.f_n_char;
+
+ t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
+
+ t_str.f_str = static_cast<byte*>(
+ mem_heap_alloc(heap, t_str.f_len));
+
+ /* For binary collations, a case sensitive search is
+ performed. Hence don't convert to lower case. */
+ if (my_binary_compare(result_doc->charset)) {
+ memcpy(t_str.f_str, str.f_str, str.f_len);
+ t_str.f_str[str.f_len]= 0;
+ newlen= str.f_len;
+ } else {
+ newlen = innobase_fts_casedn_str(
+ result_doc->charset, (char*) str.f_str, str.f_len,
+ (char*) t_str.f_str, t_str.f_len);
+ }
+
+ t_str.f_len = newlen;
+ t_str.f_str[newlen] = 0;
+
+ /* Add the word to the document statistics. If the word
+ hasn't been seen before we create a new entry for it. */
+ if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
+ fts_token_t new_token;
+
+ new_token.text.f_len = newlen;
+ new_token.text.f_str = t_str.f_str;
+ new_token.text.f_n_char = t_str.f_n_char;
+
+ new_token.positions = ib_vector_create(
+ result_doc->self_heap, sizeof(ulint), 32);
+
+ parent.last = rbt_add_node(
+ result_doc->tokens, &parent, &new_token);
+
+ ut_ad(rbt_validate(result_doc->tokens));
+ }
+
+ token = rbt_value(fts_token_t, parent.last);
+ ib_vector_push(token->positions, &position);
+ }
+}
+
+/********************************************************************
+Process next token from document starting at the given position, i.e., add
+the token's start position to the token's list of positions.
+@return number of characters handled in this call */
+static
+ulint
+fts_process_token(
+/*==============*/
+ fts_doc_t* doc, /* in/out: document to
+ tokenize */
+ fts_doc_t* result, /* out: if provided, save
+ result here */
+ ulint start_pos, /*!< in: start position in text */
+ ulint add_pos) /*!< in: add this position to all
+ tokens from this tokenization */
+{
+ ulint ret;
+ fts_string_t str;
+ ulint position;
+ fts_doc_t* result_doc;
+ byte buf[FTS_MAX_WORD_LEN + 1];
+
+ str.f_str = buf;
+
+ /* Determine where to save the result. */
+ result_doc = (result != NULL) ? result : doc;
+
+ /* The length of a string in characters is set here only. */
+
+ ret = innobase_mysql_fts_get_token(
+ doc->charset, doc->text.f_str + start_pos,
+ doc->text.f_str + doc->text.f_len, &str);
+
+ position = start_pos + ret - str.f_len + add_pos;
+
+ fts_add_token(result_doc, str, position);
+
+ return(ret);
+}
+
+/*************************************************************//**
+Get token char size by charset
+@return token size */
+ulint
+fts_get_token_size(
+/*===============*/
+ const CHARSET_INFO* cs, /*!< in: Character set */
+ const char* token, /*!< in: token */
+ ulint len) /*!< in: token length */
+{
+ char* start;
+ char* end;
+ ulint size = 0;
+
+ /* const_cast is for reinterpret_cast below, or it will fail. */
+ start = const_cast<char*>(token);
+ end = start + len;
+ while (start < end) {
+ int ctype;
+ int mbl;
+
+ mbl = cs->ctype(
+ &ctype,
+ reinterpret_cast<uchar*>(start),
+ reinterpret_cast<uchar*>(end));
+
+ size++;
+
+ start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
+ }
+
+ return(size);
+}
+
+/*************************************************************//**
+FTS plugin parser 'myql_parser' callback function for document tokenize.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return always returns 0 */
+int
+fts_tokenize_document_internal(
+/*===========================*/
+ MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */
+ const char* doc,/*!< in/out: document */
+ int len) /*!< in: document length */
+{
+ fts_string_t str;
+ byte buf[FTS_MAX_WORD_LEN + 1];
+ /* JAN: TODO: MySQL 5.7
+ MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
+ { FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
+ */
+ MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
+ { FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0};
+
+ ut_ad(len >= 0);
+
+ str.f_str = buf;
+
+ for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
+ inc = innobase_mysql_fts_get_token(
+ const_cast<CHARSET_INFO*>(param->cs),
+ (uchar*)(doc) + i,
+ (uchar*)(doc) + len,
+ &str);
+
+ if (str.f_len > 0) {
+ /* JAN: TODO: MySQL 5.7
+ bool_info.position =
+ static_cast<int>(i + inc - str.f_len);
+ ut_ad(bool_info.position >= 0);
+ */
+
+ /* Stop when add word fails */
+ if (param->mysql_add_word(
+ param,
+ reinterpret_cast<char*>(str.f_str),
+ static_cast<int>(str.f_len),
+ &bool_info)) {
+ break;
+ }
+ }
+ }
+
+ return(0);
+}
+
+/******************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for document tokenize.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return always returns 0 */
+static
+int
+fts_tokenize_add_word_for_parser(
+/*=============================*/
+ MYSQL_FTPARSER_PARAM* param, /* in: parser paramter */
+ const char* word, /* in: token word */
+ int word_len, /* in: word len */
+ MYSQL_FTPARSER_BOOLEAN_INFO*)
+{
+ fts_string_t str;
+ fts_tokenize_param_t* fts_param;
+ fts_doc_t* result_doc;
+ ulint position;
+
+ fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
+ result_doc = fts_param->result_doc;
+ ut_ad(result_doc != NULL);
+
+ str.f_str = (byte*)(word);
+ str.f_len = ulint(word_len);
+ str.f_n_char = fts_get_token_size(
+ const_cast<CHARSET_INFO*>(param->cs), word, str.f_len);
+
+ /* JAN: TODO: MySQL 5.7 FTS
+ ut_ad(boolean_info->position >= 0);
+ position = boolean_info->position + fts_param->add_pos;
+ */
+ position = fts_param->add_pos;
+
+ fts_add_token(result_doc, str, position);
+
+ return(0);
+}
+
+/******************************************************************//**
+Parse a document using an external / user supplied parser */
+static
+void
+fts_tokenize_by_parser(
+/*===================*/
+ fts_doc_t* doc, /* in/out: document to tokenize */
+ st_mysql_ftparser* parser, /* in: plugin fts parser */
+ fts_tokenize_param_t* fts_param) /* in: fts tokenize param */
+{
+ MYSQL_FTPARSER_PARAM param;
+
+ ut_a(parser);
+
+ /* Set paramters for param */
+ param.mysql_parse = fts_tokenize_document_internal;
+ param.mysql_add_word = fts_tokenize_add_word_for_parser;
+ param.mysql_ftparam = fts_param;
+ param.cs = doc->charset;
+ param.doc = reinterpret_cast<char*>(doc->text.f_str);
+ param.length = static_cast<int>(doc->text.f_len);
+ param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
+
+ PARSER_INIT(parser, &param);
+ parser->parse(&param);
+ PARSER_DEINIT(parser, &param);
+}
+
+/** Tokenize a document.
+@param[in,out] doc document to tokenize
+@param[out] result tokenization result
+@param[in] parser pluggable parser */
+static
+void
+fts_tokenize_document(
+ fts_doc_t* doc,
+ fts_doc_t* result,
+ st_mysql_ftparser* parser)
+{
+ ut_a(!doc->tokens);
+ ut_a(doc->charset);
+
+ doc->tokens = rbt_create_arg_cmp(sizeof(fts_token_t),
+ innobase_fts_text_cmp,
+ (void*) doc->charset);
+
+ if (parser != NULL) {
+ fts_tokenize_param_t fts_param;
+ fts_param.result_doc = (result != NULL) ? result : doc;
+ fts_param.add_pos = 0;
+
+ fts_tokenize_by_parser(doc, parser, &fts_param);
+ } else {
+ ulint inc;
+
+ for (ulint i = 0; i < doc->text.f_len; i += inc) {
+ inc = fts_process_token(doc, result, i, 0);
+ ut_a(inc > 0);
+ }
+ }
+}
+
+/** Continue to tokenize a document.
+@param[in,out] doc document to tokenize
+@param[in] add_pos add this position to all tokens from this tokenization
+@param[out] result tokenization result
+@param[in] parser pluggable parser */
+static
+void
+fts_tokenize_document_next(
+ fts_doc_t* doc,
+ ulint add_pos,
+ fts_doc_t* result,
+ st_mysql_ftparser* parser)
+{
+ ut_a(doc->tokens);
+
+ if (parser) {
+ fts_tokenize_param_t fts_param;
+
+ fts_param.result_doc = (result != NULL) ? result : doc;
+ fts_param.add_pos = add_pos;
+
+ fts_tokenize_by_parser(doc, parser, &fts_param);
+ } else {
+ ulint inc;
+
+ for (ulint i = 0; i < doc->text.f_len; i += inc) {
+ inc = fts_process_token(doc, result, i, add_pos);
+ ut_a(inc > 0);
+ }
+ }
+}
+
+/** Create the vector of fts_get_doc_t instances.
+@param[in,out] cache fts cache
+@return vector of fts_get_doc_t instances */
+static
+ib_vector_t*
+fts_get_docs_create(
+ fts_cache_t* cache)
+{
+ ib_vector_t* get_docs;
+
+ ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
+
+ /* We need one instance of fts_get_doc_t per index. */
+ get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
+
+ /* Create the get_doc instance, we need one of these
+ per FTS index. */
+ for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
+
+ dict_index_t** index;
+ fts_get_doc_t* get_doc;
+
+ index = static_cast<dict_index_t**>(
+ ib_vector_get(cache->indexes, i));
+
+ get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_push(get_docs, NULL));
+
+ memset(get_doc, 0x0, sizeof(*get_doc));
+
+ get_doc->index_cache = fts_get_index_cache(cache, *index);
+ get_doc->cache = cache;
+
+ /* Must find the index cache. */
+ ut_a(get_doc->index_cache != NULL);
+ }
+
+ return(get_docs);
+}
+
+/********************************************************************
+Release any resources held by the fts_get_doc_t instances. */
+static
+void
+fts_get_docs_clear(
+/*===============*/
+ ib_vector_t* get_docs) /*!< in: Doc retrieval vector */
+{
+ ulint i;
+
+ /* Release the get doc graphs if any. */
+ for (i = 0; i < ib_vector_size(get_docs); ++i) {
+
+ fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_get(get_docs, i));
+
+ if (get_doc->get_document_graph != NULL) {
+
+ ut_a(get_doc->index_cache);
+
+ fts_que_graph_free(get_doc->get_document_graph);
+ get_doc->get_document_graph = NULL;
+ }
+ }
+}
+
+/*********************************************************************//**
+Get the initial Doc ID by consulting the CONFIG table
+@return initial Doc ID */
+doc_id_t
+fts_init_doc_id(
+/*============*/
+ const dict_table_t* table) /*!< in: table */
+{
+ doc_id_t max_doc_id = 0;
+
+ rw_lock_x_lock(&table->fts->cache->lock);
+
+ /* Return if the table is already initialized for DOC ID */
+ if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
+ rw_lock_x_unlock(&table->fts->cache->lock);
+ return(0);
+ }
+
+ DEBUG_SYNC_C("fts_initialize_doc_id");
+
+ /* Then compare this value with the ID value stored in the CONFIG
+ table. The larger one will be our new initial Doc ID */
+ fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
+
+ /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
+ creating index (and add doc id column. No need to recovery
+ documents */
+ if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
+ fts_init_index((dict_table_t*) table, TRUE);
+ }
+
+ table->fts->added_synced = true;
+
+ table->fts->cache->first_doc_id = max_doc_id;
+
+ rw_lock_x_unlock(&table->fts->cache->lock);
+
+ ut_ad(max_doc_id > 0);
+
+ return(max_doc_id);
+}
+
+#ifdef FTS_MULT_INDEX
+/*********************************************************************//**
+Check if the index is in the affected set.
+@return TRUE if index is updated */
+static
+ibool
+fts_is_index_updated(
+/*=================*/
+ const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */
+ const fts_get_doc_t* get_doc) /*!< in: info for reading
+ document */
+{
+ ulint i;
+ dict_index_t* index = get_doc->index_cache->index;
+
+ for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
+ const dict_index_t* updated_fts_index;
+
+ updated_fts_index = static_cast<const dict_index_t*>(
+ ib_vector_getp_const(fts_indexes, i));
+
+ ut_a(updated_fts_index != NULL);
+
+ if (updated_fts_index == index) {
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+#endif
+
+/*********************************************************************//**
+Fetch COUNT(*) from specified table.
+@return the number of rows in the table */
+ulint
+fts_get_rows_count(
+/*===============*/
+ fts_table_t* fts_table) /*!< in: fts table to read */
+{
+ trx_t* trx;
+ pars_info_t* info;
+ que_t* graph;
+ dberr_t error;
+ ulint count = 0;
+ char table_name[MAX_FULL_NAME_LEN];
+
+ trx = trx_create();
+ trx->op_info = "fetching FT table rows count";
+
+ info = pars_info_create();
+
+ pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
+
+ fts_get_table_name(fts_table, table_name);
+ pars_info_bind_id(info, true, "table_name", table_name);
+
+ graph = fts_parse_sql(
+ fts_table,
+ info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT COUNT(*)"
+ " FROM $table_name;\n"
+ "BEGIN\n"
+ "\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+
+ for (;;) {
+ error = fts_eval_sql(trx, graph);
+
+ if (UNIV_LIKELY(error == DB_SUCCESS)) {
+ fts_sql_commit(trx);
+
+ break; /* Exit the loop. */
+ } else {
+ fts_sql_rollback(trx);
+
+ if (error == DB_LOCK_WAIT_TIMEOUT) {
+ ib::warn() << "lock wait timeout reading"
+ " FTS table. Retrying!";
+
+ trx->error_state = DB_SUCCESS;
+ } else {
+ ib::error() << "(" << error
+ << ") while reading FTS table "
+ << table_name;
+
+ break; /* Exit the loop. */
+ }
+ }
+ }
+
+ fts_que_graph_free(graph);
+
+ trx->free();
+
+ return(count);
+}
+
+#ifdef FTS_CACHE_SIZE_DEBUG
+/*********************************************************************//**
+Read the max cache size parameter from the config table. */
+static
+void
+fts_update_max_cache_size(
+/*======================*/
+ fts_sync_t* sync) /*!< in: sync state */
+{
+ trx_t* trx;
+ fts_table_t fts_table;
+
+ trx = trx_create();
+
+ FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
+
+ /* The size returned is in bytes. */
+ sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
+
+ fts_sql_commit(trx);
+
+ trx->free();
+}
+#endif /* FTS_CACHE_SIZE_DEBUG */
+
+/*********************************************************************//**
+Free the modified rows of a table. */
+UNIV_INLINE
+void
+fts_trx_table_rows_free(
+/*====================*/
+ ib_rbt_t* rows) /*!< in: rbt of rows to free */
+{
+ const ib_rbt_node_t* node;
+
+ for (node = rbt_first(rows); node; node = rbt_first(rows)) {
+ fts_trx_row_t* row;
+
+ row = rbt_value(fts_trx_row_t, node);
+
+ if (row->fts_indexes != NULL) {
+ /* This vector shouldn't be using the
+ heap allocator. */
+ ut_a(row->fts_indexes->allocator->arg == NULL);
+
+ ib_vector_free(row->fts_indexes);
+ row->fts_indexes = NULL;
+ }
+
+ ut_free(rbt_remove_node(rows, node));
+ }
+
+ ut_a(rbt_empty(rows));
+ rbt_free(rows);
+}
+
+/*********************************************************************//**
+Free an FTS savepoint instance. */
+UNIV_INLINE
+void
+fts_savepoint_free(
+/*===============*/
+ fts_savepoint_t* savepoint) /*!< in: savepoint instance */
+{
+ const ib_rbt_node_t* node;
+ ib_rbt_t* tables = savepoint->tables;
+
+ /* Nothing to free! */
+ if (tables == NULL) {
+ return;
+ }
+
+ for (node = rbt_first(tables); node; node = rbt_first(tables)) {
+ fts_trx_table_t* ftt;
+ fts_trx_table_t** fttp;
+
+ fttp = rbt_value(fts_trx_table_t*, node);
+ ftt = *fttp;
+
+ /* This can be NULL if a savepoint was released. */
+ if (ftt->rows != NULL) {
+ fts_trx_table_rows_free(ftt->rows);
+ ftt->rows = NULL;
+ }
+
+ /* This can be NULL if a savepoint was released. */
+ if (ftt->added_doc_ids != NULL) {
+ fts_doc_ids_free(ftt->added_doc_ids);
+ ftt->added_doc_ids = NULL;
+ }
+
+ /* The default savepoint name must be NULL. */
+ if (ftt->docs_added_graph) {
+ fts_que_graph_free(ftt->docs_added_graph);
+ }
+
+ /* NOTE: We are responsible for free'ing the node */
+ ut_free(rbt_remove_node(tables, node));
+ }
+
+ ut_a(rbt_empty(tables));
+ rbt_free(tables);
+ savepoint->tables = NULL;
+}
+
+/*********************************************************************//**
+Free an FTS trx. */
+void
+fts_trx_free(
+/*=========*/
+ fts_trx_t* fts_trx) /* in, own: FTS trx */
+{
+ ulint i;
+
+ for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
+ fts_savepoint_t* savepoint;
+
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_get(fts_trx->savepoints, i));
+
+ /* The default savepoint name must be NULL. */
+ if (i == 0) {
+ ut_a(savepoint->name == NULL);
+ }
+
+ fts_savepoint_free(savepoint);
+ }
+
+ for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
+ fts_savepoint_t* savepoint;
+
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_get(fts_trx->last_stmt, i));
+
+ /* The default savepoint name must be NULL. */
+ if (i == 0) {
+ ut_a(savepoint->name == NULL);
+ }
+
+ fts_savepoint_free(savepoint);
+ }
+
+ if (fts_trx->heap) {
+ mem_heap_free(fts_trx->heap);
+ }
+}
+
+/*********************************************************************//**
+Extract the doc id from the FTS hidden column.
+@return doc id that was extracted from rec */
+doc_id_t
+fts_get_doc_id_from_row(
+/*====================*/
+ dict_table_t* table, /*!< in: table */
+ dtuple_t* row) /*!< in: row whose FTS doc id we
+ want to extract.*/
+{
+ dfield_t* field;
+ doc_id_t doc_id = 0;
+
+ ut_a(table->fts->doc_col != ULINT_UNDEFINED);
+
+ field = dtuple_get_nth_field(row, table->fts->doc_col);
+
+ ut_a(dfield_get_len(field) == sizeof(doc_id));
+ ut_a(dfield_get_type(field)->mtype == DATA_INT);
+
+ doc_id = fts_read_doc_id(
+ static_cast<const byte*>(dfield_get_data(field)));
+
+ return(doc_id);
+}
+
+/** Extract the doc id from the record that belongs to index.
+@param[in] rec record containing FTS_DOC_ID
+@param[in] index index of rec
+@param[in] offsets rec_get_offsets(rec,index)
+@return doc id that was extracted from rec */
+doc_id_t
+fts_get_doc_id_from_rec(
+ const rec_t* rec,
+ const dict_index_t* index,
+ const rec_offs* offsets)
+{
+ ulint f = dict_col_get_index_pos(
+ &index->table->cols[index->table->fts->doc_col], index);
+ ulint len;
+ doc_id_t doc_id = mach_read_from_8(
+ rec_get_nth_field(rec, offsets, f, &len));
+ ut_ad(len == 8);
+ return doc_id;
+}
+
+/*********************************************************************//**
+Search the index specific cache for a particular FTS index.
+@return the index specific cache else NULL */
+fts_index_cache_t*
+fts_find_index_cache(
+/*=================*/
+ const fts_cache_t* cache, /*!< in: cache to search */
+ const dict_index_t* index) /*!< in: index to search for */
+{
+ /* We cast away the const because our internal function, takes
+ non-const cache arg and returns a non-const pointer. */
+ return(static_cast<fts_index_cache_t*>(
+ fts_get_index_cache((fts_cache_t*) cache, index)));
+}
+
+/*********************************************************************//**
+Search cache for word.
+@return the word node vector if found else NULL */
+const ib_vector_t*
+fts_cache_find_word(
+/*================*/
+ const fts_index_cache_t*index_cache, /*!< in: cache to search */
+ const fts_string_t* text) /*!< in: word to search for */
+{
+ ib_rbt_bound_t parent;
+ const ib_vector_t* nodes = NULL;
+#ifdef UNIV_DEBUG
+ dict_table_t* table = index_cache->index->table;
+ fts_cache_t* cache = table->fts->cache;
+
+ ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
+#endif /* UNIV_DEBUG */
+
+ /* Lookup the word in the rb tree */
+ if (rbt_search(index_cache->words, &parent, text) == 0) {
+ const fts_tokenizer_word_t* word;
+
+ word = rbt_value(fts_tokenizer_word_t, parent.last);
+
+ nodes = word->nodes;
+ }
+
+ return(nodes);
+}
+
+/*********************************************************************//**
+Append deleted doc ids to vector. */
+void
+fts_cache_append_deleted_doc_ids(
+/*=============================*/
+ const fts_cache_t* cache, /*!< in: cache to use */
+ ib_vector_t* vector) /*!< in: append to this vector */
+{
+ mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
+
+ if (cache->deleted_doc_ids == NULL) {
+ mutex_exit((ib_mutex_t*) &cache->deleted_lock);
+ return;
+ }
+
+
+ for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
+ doc_id_t* update;
+
+ update = static_cast<doc_id_t*>(
+ ib_vector_get(cache->deleted_doc_ids, i));
+
+ ib_vector_push(vector, &update);
+ }
+
+ mutex_exit((ib_mutex_t*) &cache->deleted_lock);
+}
+
+/*********************************************************************//**
+Add the FTS document id hidden column. */
+void
+fts_add_doc_id_column(
+/*==================*/
+ dict_table_t* table, /*!< in/out: Table with FTS index */
+ mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
+{
+ dict_mem_table_add_col(
+ table, heap,
+ FTS_DOC_ID_COL_NAME,
+ DATA_INT,
+ dtype_form_prtype(
+ DATA_NOT_NULL | DATA_UNSIGNED
+ | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
+ sizeof(doc_id_t));
+ DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
+}
+
+/** Add new fts doc id to the update vector.
+@param[in] table the table that contains the FTS index.
+@param[in,out] ufield the fts doc id field in the update vector.
+ No new memory is allocated for this in this
+ function.
+@param[in,out] next_doc_id the fts doc id that has been added to the
+ update vector. If 0, a new fts doc id is
+ automatically generated. The memory provided
+ for this argument will be used by the update
+ vector. Ensure that the life time of this
+ memory matches that of the update vector.
+@return the fts doc id used in the update vector */
+doc_id_t
+fts_update_doc_id(
+ dict_table_t* table,
+ upd_field_t* ufield,
+ doc_id_t* next_doc_id)
+{
+ doc_id_t doc_id;
+ dberr_t error = DB_SUCCESS;
+
+ if (*next_doc_id) {
+ doc_id = *next_doc_id;
+ } else {
+ /* Get the new document id that will be added. */
+ error = fts_get_next_doc_id(table, &doc_id);
+ }
+
+ if (error == DB_SUCCESS) {
+ dict_index_t* clust_index;
+ dict_col_t* col = dict_table_get_nth_col(
+ table, table->fts->doc_col);
+
+ ufield->exp = NULL;
+
+ ufield->new_val.len = sizeof(doc_id);
+
+ clust_index = dict_table_get_first_index(table);
+
+ ufield->field_no = static_cast<unsigned>(
+ dict_col_get_clust_pos(col, clust_index))
+ & dict_index_t::MAX_N_FIELDS;
+ dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
+
+ /* It is possible we update record that has
+ not yet be sync-ed from last crash. */
+
+ /* Convert to storage byte order. */
+ ut_a(doc_id != FTS_NULL_DOC_ID);
+ fts_write_doc_id((byte*) next_doc_id, doc_id);
+
+ ufield->new_val.data = next_doc_id;
+ ufield->new_val.ext = 0;
+ }
+
+ return(doc_id);
+}
+
+/** fts_t constructor.
+@param[in] table table with FTS indexes
+@param[in,out] heap memory heap where 'this' is stored */
+fts_t::fts_t(
+ const dict_table_t* table,
+ mem_heap_t* heap)
+ :
+ added_synced(0), dict_locked(0),
+ add_wq(NULL),
+ cache(NULL),
+ doc_col(ULINT_UNDEFINED), in_queue(false), sync_message(false),
+ fts_heap(heap)
+{
+ ut_a(table->fts == NULL);
+
+ ib_alloc_t* heap_alloc = ib_heap_allocator_create(fts_heap);
+
+ indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
+
+ dict_table_get_all_fts_indexes(table, indexes);
+}
+
+/** fts_t destructor. */
+fts_t::~fts_t()
+{
+ ut_ad(add_wq == NULL);
+
+ if (cache != NULL) {
+ fts_cache_clear(cache);
+ fts_cache_destroy(cache);
+ cache = NULL;
+ }
+
+ /* There is no need to call ib_vector_free() on this->indexes
+ because it is stored in this->fts_heap. */
+}
+
+/*********************************************************************//**
+Create an instance of fts_t.
+@return instance of fts_t */
+fts_t*
+fts_create(
+/*=======*/
+ dict_table_t* table) /*!< in/out: table with FTS indexes */
+{
+ fts_t* fts;
+ mem_heap_t* heap;
+
+ heap = mem_heap_create(512);
+
+ fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
+
+ new(fts) fts_t(table, heap);
+
+ return(fts);
+}
+
+/*********************************************************************//**
+Free the FTS resources. */
+void
+fts_free(
+/*=====*/
+ dict_table_t* table) /*!< in/out: table with FTS indexes */
+{
+ fts_t* fts = table->fts;
+
+ fts->~fts_t();
+
+ mem_heap_free(fts->fts_heap);
+
+ table->fts = NULL;
+}
+
+/*********************************************************************//**
+Take a FTS savepoint. */
+UNIV_INLINE
+void
+fts_savepoint_copy(
+/*===============*/
+ const fts_savepoint_t* src, /*!< in: source savepoint */
+ fts_savepoint_t* dst) /*!< out: destination savepoint */
+{
+ const ib_rbt_node_t* node;
+ const ib_rbt_t* tables;
+
+ tables = src->tables;
+
+ for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
+
+ fts_trx_table_t* ftt_dst;
+ const fts_trx_table_t** ftt_src;
+
+ ftt_src = rbt_value(const fts_trx_table_t*, node);
+
+ ftt_dst = fts_trx_table_clone(*ftt_src);
+
+ rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
+ }
+}
+
+/*********************************************************************//**
+Take a FTS savepoint. */
+void
+fts_savepoint_take(
+/*===============*/
+ fts_trx_t* fts_trx, /*!< in: fts transaction */
+ const char* name) /*!< in: savepoint name */
+{
+ mem_heap_t* heap;
+ fts_savepoint_t* savepoint;
+ fts_savepoint_t* last_savepoint;
+
+ ut_a(name != NULL);
+
+ heap = fts_trx->heap;
+
+ /* The implied savepoint must exist. */
+ ut_a(ib_vector_size(fts_trx->savepoints) > 0);
+
+ last_savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_last(fts_trx->savepoints));
+ savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
+
+ if (last_savepoint->tables != NULL) {
+ fts_savepoint_copy(last_savepoint, savepoint);
+ }
+}
+
+/*********************************************************************//**
+Lookup a savepoint instance by name.
+@return ULINT_UNDEFINED if not found */
+UNIV_INLINE
+ulint
+fts_savepoint_lookup(
+/*==================*/
+ ib_vector_t* savepoints, /*!< in: savepoints */
+ const char* name) /*!< in: savepoint name */
+{
+ ulint i;
+
+ ut_a(ib_vector_size(savepoints) > 0);
+
+ for (i = 1; i < ib_vector_size(savepoints); ++i) {
+ fts_savepoint_t* savepoint;
+
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_get(savepoints, i));
+
+ if (strcmp(name, savepoint->name) == 0) {
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/*********************************************************************//**
+Release the savepoint data identified by name. All savepoints created
+after the named savepoint are kept.
+@return DB_SUCCESS or error code */
+void
+fts_savepoint_release(
+/*==================*/
+ trx_t* trx, /*!< in: transaction */
+ const char* name) /*!< in: savepoint name */
+{
+ ut_a(name != NULL);
+
+ ib_vector_t* savepoints = trx->fts_trx->savepoints;
+
+ ut_a(ib_vector_size(savepoints) > 0);
+
+ ulint i = fts_savepoint_lookup(savepoints, name);
+ if (i != ULINT_UNDEFINED) {
+ ut_a(i >= 1);
+
+ fts_savepoint_t* savepoint;
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_get(savepoints, i));
+
+ if (i == ib_vector_size(savepoints) - 1) {
+ /* If the savepoint is the last, we save its
+ tables to the previous savepoint. */
+ fts_savepoint_t* prev_savepoint;
+ prev_savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_get(savepoints, i - 1));
+
+ ib_rbt_t* tables = savepoint->tables;
+ savepoint->tables = prev_savepoint->tables;
+ prev_savepoint->tables = tables;
+ }
+
+ fts_savepoint_free(savepoint);
+ ib_vector_remove(savepoints, *(void**)savepoint);
+
+ /* Make sure we don't delete the implied savepoint. */
+ ut_a(ib_vector_size(savepoints) > 0);
+ }
+}
+
+/**********************************************************************//**
+Refresh last statement savepoint. */
+void
+fts_savepoint_laststmt_refresh(
+/*===========================*/
+ trx_t* trx) /*!< in: transaction */
+{
+
+ fts_trx_t* fts_trx;
+ fts_savepoint_t* savepoint;
+
+ fts_trx = trx->fts_trx;
+
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_pop(fts_trx->last_stmt));
+ fts_savepoint_free(savepoint);
+
+ ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
+ savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
+}
+
+/********************************************************************
+Undo the Doc ID add/delete operations in last stmt */
+static
+void
+fts_undo_last_stmt(
+/*===============*/
+ fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */
+ fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */
+{
+ ib_rbt_t* s_rows;
+ ib_rbt_t* l_rows;
+ const ib_rbt_node_t* node;
+
+ l_rows = l_ftt->rows;
+ s_rows = s_ftt->rows;
+
+ for (node = rbt_first(l_rows);
+ node;
+ node = rbt_next(l_rows, node)) {
+ fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node);
+ ib_rbt_bound_t parent;
+
+ rbt_search(s_rows, &parent, &(l_row->doc_id));
+
+ if (parent.result == 0) {
+ fts_trx_row_t* s_row = rbt_value(
+ fts_trx_row_t, parent.last);
+
+ switch (l_row->state) {
+ case FTS_INSERT:
+ ut_free(rbt_remove_node(s_rows, parent.last));
+ break;
+
+ case FTS_DELETE:
+ if (s_row->state == FTS_NOTHING) {
+ s_row->state = FTS_INSERT;
+ } else if (s_row->state == FTS_DELETE) {
+ ut_free(rbt_remove_node(
+ s_rows, parent.last));
+ }
+ break;
+
+ /* FIXME: Check if FTS_MODIFY need to be addressed */
+ case FTS_MODIFY:
+ case FTS_NOTHING:
+ break;
+ default:
+ ut_error;
+ }
+ }
+ }
+}
+
+/**********************************************************************//**
+Rollback to savepoint indentified by name.
+@return DB_SUCCESS or error code */
+void
+fts_savepoint_rollback_last_stmt(
+/*=============================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ ib_vector_t* savepoints;
+ fts_savepoint_t* savepoint;
+ fts_savepoint_t* last_stmt;
+ fts_trx_t* fts_trx;
+ ib_rbt_bound_t parent;
+ const ib_rbt_node_t* node;
+ ib_rbt_t* l_tables;
+ ib_rbt_t* s_tables;
+
+ fts_trx = trx->fts_trx;
+ savepoints = fts_trx->savepoints;
+
+ savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
+ last_stmt = static_cast<fts_savepoint_t*>(
+ ib_vector_last(fts_trx->last_stmt));
+
+ l_tables = last_stmt->tables;
+ s_tables = savepoint->tables;
+
+ for (node = rbt_first(l_tables);
+ node;
+ node = rbt_next(l_tables, node)) {
+
+ fts_trx_table_t** l_ftt;
+
+ l_ftt = rbt_value(fts_trx_table_t*, node);
+
+ rbt_search_cmp(
+ s_tables, &parent, &(*l_ftt)->table->id,
+ fts_trx_table_id_cmp, NULL);
+
+ if (parent.result == 0) {
+ fts_trx_table_t** s_ftt;
+
+ s_ftt = rbt_value(fts_trx_table_t*, parent.last);
+
+ fts_undo_last_stmt(*s_ftt, *l_ftt);
+ }
+ }
+}
+
+/**********************************************************************//**
+Rollback to savepoint indentified by name.
+@return DB_SUCCESS or error code */
+void
+fts_savepoint_rollback(
+/*===================*/
+ trx_t* trx, /*!< in: transaction */
+ const char* name) /*!< in: savepoint name */
+{
+ ulint i;
+ ib_vector_t* savepoints;
+
+ ut_a(name != NULL);
+
+ savepoints = trx->fts_trx->savepoints;
+
+ /* We pop all savepoints from the the top of the stack up to
+ and including the instance that was found. */
+ i = fts_savepoint_lookup(savepoints, name);
+
+ if (i != ULINT_UNDEFINED) {
+ fts_savepoint_t* savepoint;
+
+ ut_a(i > 0);
+
+ while (ib_vector_size(savepoints) > i) {
+ fts_savepoint_t* savepoint;
+
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_pop(savepoints));
+
+ if (savepoint->name != NULL) {
+ /* Since name was allocated on the heap, the
+ memory will be released when the transaction
+ completes. */
+ savepoint->name = NULL;
+
+ fts_savepoint_free(savepoint);
+ }
+ }
+
+ /* Pop all a elements from the top of the stack that may
+ have been released. We have to be careful that we don't
+ delete the implied savepoint. */
+
+ for (savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_last(savepoints));
+ ib_vector_size(savepoints) > 1
+ && savepoint->name == NULL;
+ savepoint = static_cast<fts_savepoint_t*>(
+ ib_vector_last(savepoints))) {
+
+ ib_vector_pop(savepoints);
+ }
+
+ /* Make sure we don't delete the implied savepoint. */
+ ut_a(ib_vector_size(savepoints) > 0);
+
+ /* Restore the savepoint. */
+ fts_savepoint_take(trx->fts_trx, name);
+ }
+}
+
+bool fts_check_aux_table(const char *name,
+ table_id_t *table_id,
+ index_id_t *index_id)
+{
+ ulint len= strlen(name);
+ const char* ptr;
+ const char* end= name + len;
+
+ ut_ad(len <= MAX_FULL_NAME_LEN);
+ ptr= static_cast<const char*>(memchr(name, '/', len));
+
+ if (ptr != NULL)
+ {
+ /* We will start the match after the '/' */
+ ++ptr;
+ len = end - ptr;
+ }
+
+ /* All auxiliary tables are prefixed with "FTS_" and the name
+ length will be at the very least greater than 20 bytes. */
+ if (ptr && len > 20 && !memcmp(ptr, "FTS_", 4))
+ {
+ /* Skip the prefix. */
+ ptr+= 4;
+ len-= 4;
+
+ const char *table_id_ptr= ptr;
+ /* Skip the table id. */
+ ptr= static_cast<const char*>(memchr(ptr, '_', len));
+
+ if (!ptr)
+ return false;
+
+ /* Skip the underscore. */
+ ++ptr;
+ ut_ad(end > ptr);
+ len= end - ptr;
+
+ sscanf(table_id_ptr, UINT64PFx, table_id);
+ /* First search the common table suffix array. */
+ for (ulint i = 0; fts_common_tables[i]; ++i)
+ {
+ if (!strncmp(ptr, fts_common_tables[i], len))
+ return true;
+ }
+
+ /* Could be obsolete common tables. */
+ if ((len == 5 && !memcmp(ptr, "ADDED", len)) ||
+ (len == 9 && !memcmp(ptr, "STOPWORDS", len)))
+ return true;
+
+ const char* index_id_ptr= ptr;
+ /* Skip the index id. */
+ ptr= static_cast<const char*>(memchr(ptr, '_', len));
+ if (!ptr)
+ return false;
+
+ sscanf(index_id_ptr, UINT64PFx, index_id);
+
+ /* Skip the underscore. */
+ ++ptr;
+ ut_a(end > ptr);
+ len= end - ptr;
+
+ if (len > 7)
+ return false;
+
+ /* Search the FT index specific array. */
+ for (ulint i = 0; i < FTS_NUM_AUX_INDEX; ++i)
+ {
+ if (!memcmp(ptr, "INDEX_", len - 1))
+ return true;
+ }
+
+ /* Other FT index specific table(s). */
+ if (len == 6 && !memcmp(ptr, "DOC_ID", len))
+ return true;
+ }
+
+ return false;
+}
+
+typedef std::pair<table_id_t,index_id_t> fts_aux_id;
+typedef std::set<fts_aux_id> fts_space_set_t;
+
+/** Iterate over all the spaces in the space list and fetch the
+fts parent table id and index id.
+@param[in,out] fts_space_set store the list of tablespace id and
+ index id */
+static void fil_get_fts_spaces(fts_space_set_t& fts_space_set)
+{
+ mutex_enter(&fil_system.mutex);
+
+ for (fil_space_t *space= UT_LIST_GET_FIRST(fil_system.space_list);
+ space;
+ space= UT_LIST_GET_NEXT(space_list, space))
+ {
+ index_id_t index_id= 0;
+ table_id_t table_id= 0;
+
+ if (space->purpose == FIL_TYPE_TABLESPACE
+ && fts_check_aux_table(space->name, &table_id, &index_id))
+ fts_space_set.insert(std::make_pair(table_id, index_id));
+ }
+
+ mutex_exit(&fil_system.mutex);
+}
+
+/** Check whether the parent table id and index id of fts auxilary
+tables with SYS_INDEXES. If it exists then we can safely ignore the
+fts table from orphaned tables.
+@param[in,out] fts_space_set fts space set contains set of auxiliary
+ table ids */
+static void fts_check_orphaned_tables(fts_space_set_t& fts_space_set)
+{
+ btr_pcur_t pcur;
+ mtr_t mtr;
+ trx_t* trx = trx_create();
+ trx->op_info = "checking fts orphaned tables";
+
+ row_mysql_lock_data_dictionary(trx);
+
+ mtr.start();
+ btr_pcur_open_at_index_side(
+ true, dict_table_get_first_index(dict_sys.sys_indexes),
+ BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
+
+ do
+ {
+ const rec_t *rec;
+ const byte *tbl_field;
+ const byte *index_field;
+ ulint len;
+
+ btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ if (!btr_pcur_is_on_user_rec(&pcur))
+ break;
+
+ rec= btr_pcur_get_rec(&pcur);
+ if (rec_get_deleted_flag(rec, 0))
+ continue;
+
+ tbl_field= rec_get_nth_field_old(rec, 0, &len);
+ if (len != 8)
+ continue;
+
+ index_field= rec_get_nth_field_old(rec, 1, &len);
+ if (len != 8)
+ continue;
+
+ table_id_t table_id = mach_read_from_8(tbl_field);
+ index_id_t index_id = mach_read_from_8(index_field);
+
+ fts_space_set_t::iterator it = fts_space_set.find(
+ fts_aux_id(table_id, index_id));
+
+ if (it != fts_space_set.end())
+ fts_space_set.erase(*it);
+ else
+ {
+ it= fts_space_set.find(fts_aux_id(table_id, 0));
+ if (it != fts_space_set.end())
+ fts_space_set.erase(*it);
+ }
+ } while(!fts_space_set.empty());
+
+ btr_pcur_close(&pcur);
+ mtr.commit();
+ row_mysql_unlock_data_dictionary(trx);
+ trx->free();
+}
+
+/** Drop all fts auxilary table for the respective fts_id
+@param[in] fts_id fts auxilary table ids */
+static void fts_drop_all_aux_tables(trx_t *trx, fts_table_t *fts_table)
+{
+ char fts_table_name[MAX_FULL_NAME_LEN];
+ for (ulint i= 0;i < FTS_NUM_AUX_INDEX; i++)
+ {
+ fts_table->suffix= fts_get_suffix(i);
+ fts_get_table_name(fts_table, fts_table_name, true);
+
+ /* Drop all fts aux and common table */
+ dberr_t err= fts_drop_table(trx, fts_table_name);
+
+ if (err == DB_FAIL)
+ {
+ char *path= fil_make_filepath(NULL, fts_table_name, IBD, false);
+
+ if (path != NULL)
+ {
+ os_file_delete_if_exists(innodb_data_file_key, path , NULL);
+ ut_free(path);
+ }
+ }
+ }
+}
+
+/** Drop all orphaned FTS auxiliary tables, those that don't have
+a parent table or FTS index defined on them. */
+void fts_drop_orphaned_tables()
+{
+ fts_space_set_t fts_space_set;
+ fil_get_fts_spaces(fts_space_set);
+
+ if (fts_space_set.empty())
+ return;
+
+ fts_check_orphaned_tables(fts_space_set);
+
+ if (fts_space_set.empty())
+ return;
+
+ trx_t* trx= trx_create();
+ trx->op_info= "Drop orphaned aux FTS tables";
+ row_mysql_lock_data_dictionary(trx);
+
+ for (fts_space_set_t::iterator it = fts_space_set.begin();
+ it != fts_space_set.end(); it++)
+ {
+ fts_table_t fts_table;
+ dict_table_t *table= dict_table_open_on_id(it->first, TRUE,
+ DICT_TABLE_OP_NORMAL);
+ if (!table)
+ continue;
+
+ FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
+ fts_drop_common_tables(trx, &fts_table, true);
+
+ fts_table.type= FTS_INDEX_TABLE;
+ fts_table.index_id= it->second;
+ fts_drop_all_aux_tables(trx, &fts_table);
+
+ dict_table_close(table, true, false);
+ }
+ trx_commit_for_mysql(trx);
+ row_mysql_unlock_data_dictionary(trx);
+ trx->dict_operation_lock_mode= 0;
+ trx->free();
+}
+
+/**********************************************************************//**
+Check whether user supplied stopword table is of the right format.
+Caller is responsible to hold dictionary locks.
+@return the stopword column charset if qualifies */
+CHARSET_INFO*
+fts_valid_stopword_table(
+/*=====================*/
+ const char* stopword_table_name) /*!< in: Stopword table
+ name */
+{
+ dict_table_t* table;
+ dict_col_t* col = NULL;
+
+ if (!stopword_table_name) {
+ return(NULL);
+ }
+
+ table = dict_table_get_low(stopword_table_name);
+
+ if (!table) {
+ ib::error() << "User stopword table " << stopword_table_name
+ << " does not exist.";
+
+ return(NULL);
+ } else {
+ if (strcmp(dict_table_get_col_name(table, 0), "value")) {
+ ib::error() << "Invalid column name for stopword"
+ " table " << stopword_table_name << ". Its"
+ " first column must be named as 'value'.";
+
+ return(NULL);
+ }
+
+ col = dict_table_get_nth_col(table, 0);
+
+ if (col->mtype != DATA_VARCHAR
+ && col->mtype != DATA_VARMYSQL) {
+ ib::error() << "Invalid column type for stopword"
+ " table " << stopword_table_name << ". Its"
+ " first column must be of varchar type";
+
+ return(NULL);
+ }
+ }
+
+ ut_ad(col);
+
+ return(fts_get_charset(col->prtype));
+}
+
+/**********************************************************************//**
+This function loads the stopword into the FTS cache. It also
+records/fetches stopword configuration to/from FTS configure
+table, depending on whether we are creating or reloading the
+FTS.
+@return true if load operation is successful */
+bool
+fts_load_stopword(
+/*==============*/
+ const dict_table_t*
+ table, /*!< in: Table with FTS */
+ trx_t* trx, /*!< in: Transactions */
+ const char* session_stopword_table, /*!< in: Session stopword table
+ name */
+ bool stopword_is_on, /*!< in: Whether stopword
+ option is turned on/off */
+ bool reload) /*!< in: Whether it is
+ for reloading FTS table */
+{
+ fts_table_t fts_table;
+ fts_string_t str;
+ dberr_t error = DB_SUCCESS;
+ ulint use_stopword;
+ fts_cache_t* cache;
+ const char* stopword_to_use = NULL;
+ ibool new_trx = FALSE;
+ byte str_buffer[MAX_FULL_NAME_LEN + 1];
+
+ FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
+
+ cache = table->fts->cache;
+
+ if (!reload && !(cache->stopword_info.status & STOPWORD_NOT_INIT)) {
+ return true;
+ }
+
+ if (!trx) {
+ trx = trx_create();
+ if (srv_read_only_mode) {
+ trx_start_internal_read_only(trx);
+ } else {
+ trx_start_internal(trx);
+ }
+ trx->op_info = "upload FTS stopword";
+ new_trx = TRUE;
+ }
+
+ /* First check whether stopword filtering is turned off */
+ if (reload) {
+ error = fts_config_get_ulint(
+ trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
+ } else {
+ use_stopword = (ulint) stopword_is_on;
+
+ error = fts_config_set_ulint(
+ trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
+ }
+
+ if (error != DB_SUCCESS) {
+ goto cleanup;
+ }
+
+ /* If stopword is turned off, no need to continue to load the
+ stopword into cache, but still need to do initialization */
+ if (!use_stopword) {
+ cache->stopword_info.status = STOPWORD_OFF;
+ goto cleanup;
+ }
+
+ if (reload) {
+ /* Fetch the stopword table name from FTS config
+ table */
+ str.f_n_char = 0;
+ str.f_str = str_buffer;
+ str.f_len = sizeof(str_buffer) - 1;
+
+ error = fts_config_get_value(
+ trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
+
+ if (error != DB_SUCCESS) {
+ goto cleanup;
+ }
+
+ if (*str.f_str) {
+ stopword_to_use = (const char*) str.f_str;
+ }
+ } else {
+ stopword_to_use = session_stopword_table;
+ }
+
+ if (stopword_to_use
+ && fts_load_user_stopword(table->fts, stopword_to_use,
+ &cache->stopword_info)) {
+ /* Save the stopword table name to the configure
+ table */
+ if (!reload) {
+ str.f_n_char = 0;
+ str.f_str = (byte*) stopword_to_use;
+ str.f_len = strlen(stopword_to_use);
+
+ error = fts_config_set_value(
+ trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
+ }
+ } else {
+ /* Load system default stopword list */
+ fts_load_default_stopword(&cache->stopword_info);
+ }
+
+cleanup:
+ if (new_trx) {
+ if (error == DB_SUCCESS) {
+ fts_sql_commit(trx);
+ } else {
+ fts_sql_rollback(trx);
+ }
+
+ trx->free();
+ }
+
+ if (!cache->stopword_info.cached_stopword) {
+ cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
+ sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
+ &my_charset_latin1);
+ }
+
+ return error == DB_SUCCESS;
+}
+
+/**********************************************************************//**
+Callback function when we initialize the FTS at the start up
+time. It recovers the maximum Doc IDs presented in the current table.
+@return: always returns TRUE */
+static
+ibool
+fts_init_get_doc_id(
+/*================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: fts cache */
+{
+ doc_id_t doc_id = FTS_NULL_DOC_ID;
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ que_node_t* exp = node->select_list;
+ fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
+
+ ut_ad(ib_vector_is_empty(cache->get_docs));
+
+ /* Copy each indexed column content into doc->text.f_str */
+ if (exp) {
+ dfield_t* dfield = que_node_get_val(exp);
+ dtype_t* type = dfield_get_type(dfield);
+ void* data = dfield_get_data(dfield);
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+
+ doc_id = static_cast<doc_id_t>(mach_read_from_8(
+ static_cast<const byte*>(data)));
+
+ if (doc_id >= cache->next_doc_id) {
+ cache->next_doc_id = doc_id + 1;
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Callback function when we initialize the FTS at the start up
+time. It recovers Doc IDs that have not sync-ed to the auxiliary
+table, and require to bring them back into FTS index.
+@return: always returns TRUE */
+static
+ibool
+fts_init_recover_doc(
+/*=================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: fts cache */
+{
+
+ fts_doc_t doc;
+ ulint doc_len = 0;
+ ulint field_no = 0;
+ fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
+ doc_id_t doc_id = FTS_NULL_DOC_ID;
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ que_node_t* exp = node->select_list;
+ fts_cache_t* cache = get_doc->cache;
+ st_mysql_ftparser* parser = get_doc->index_cache->index->parser;
+
+ fts_doc_init(&doc);
+ doc.found = TRUE;
+
+ ut_ad(cache);
+
+ /* Copy each indexed column content into doc->text.f_str */
+ while (exp) {
+ dfield_t* dfield = que_node_get_val(exp);
+ ulint len = dfield_get_len(dfield);
+
+ if (field_no == 0) {
+ dtype_t* type = dfield_get_type(dfield);
+ void* data = dfield_get_data(dfield);
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+
+ doc_id = static_cast<doc_id_t>(mach_read_from_8(
+ static_cast<const byte*>(data)));
+
+ field_no++;
+ exp = que_node_get_next(exp);
+ continue;
+ }
+
+ if (len == UNIV_SQL_NULL) {
+ exp = que_node_get_next(exp);
+ continue;
+ }
+
+ ut_ad(get_doc);
+
+ if (!get_doc->index_cache->charset) {
+ get_doc->index_cache->charset = fts_get_charset(
+ dfield->type.prtype);
+ }
+
+ doc.charset = get_doc->index_cache->charset;
+
+ if (dfield_is_ext(dfield)) {
+ dict_table_t* table = cache->sync->table;
+
+ doc.text.f_str = btr_copy_externally_stored_field(
+ &doc.text.f_len,
+ static_cast<byte*>(dfield_get_data(dfield)),
+ table->space->zip_size(), len,
+ static_cast<mem_heap_t*>(doc.self_heap->arg));
+ } else {
+ doc.text.f_str = static_cast<byte*>(
+ dfield_get_data(dfield));
+
+ doc.text.f_len = len;
+ }
+
+ if (field_no == 1) {
+ fts_tokenize_document(&doc, NULL, parser);
+ } else {
+ fts_tokenize_document_next(&doc, doc_len, NULL, parser);
+ }
+
+ exp = que_node_get_next(exp);
+
+ doc_len += (exp) ? len + 1 : len;
+
+ field_no++;
+ }
+
+ fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
+
+ fts_doc_free(&doc);
+
+ cache->added++;
+
+ if (doc_id >= cache->next_doc_id) {
+ cache->next_doc_id = doc_id + 1;
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+This function brings FTS index in sync when FTS index is first
+used. There are documents that have not yet sync-ed to auxiliary
+tables from last server abnormally shutdown, we will need to bring
+such document into FTS cache before any further operations
+@return TRUE if all OK */
+ibool
+fts_init_index(
+/*===========*/
+ dict_table_t* table, /*!< in: Table with FTS */
+ ibool has_cache_lock) /*!< in: Whether we already have
+ cache lock */
+{
+ dict_index_t* index;
+ doc_id_t start_doc;
+ fts_get_doc_t* get_doc = NULL;
+ fts_cache_t* cache = table->fts->cache;
+ bool need_init = false;
+
+ ut_ad(!mutex_own(&dict_sys.mutex));
+
+ /* First check cache->get_docs is initialized */
+ if (!has_cache_lock) {
+ rw_lock_x_lock(&cache->lock);
+ }
+
+ rw_lock_x_lock(&cache->init_lock);
+ if (cache->get_docs == NULL) {
+ cache->get_docs = fts_get_docs_create(cache);
+ }
+ rw_lock_x_unlock(&cache->init_lock);
+
+ if (table->fts->added_synced) {
+ goto func_exit;
+ }
+
+ need_init = true;
+
+ start_doc = cache->synced_doc_id;
+
+ if (!start_doc) {
+ fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
+ cache->synced_doc_id = start_doc;
+ }
+
+ /* No FTS index, this is the case when previous FTS index
+ dropped, and we re-initialize the Doc ID system for subsequent
+ insertion */
+ if (ib_vector_is_empty(cache->get_docs)) {
+ index = table->fts_doc_id_index;
+
+ ut_a(index);
+
+ fts_doc_fetch_by_doc_id(NULL, start_doc, index,
+ FTS_FETCH_DOC_BY_ID_LARGE,
+ fts_init_get_doc_id, cache);
+ } else {
+ if (table->fts->cache->stopword_info.status
+ & STOPWORD_NOT_INIT) {
+ fts_load_stopword(table, NULL, NULL, true, true);
+ }
+
+ for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
+ get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_get(cache->get_docs, i));
+
+ index = get_doc->index_cache->index;
+
+ fts_doc_fetch_by_doc_id(NULL, start_doc, index,
+ FTS_FETCH_DOC_BY_ID_LARGE,
+ fts_init_recover_doc, get_doc);
+ }
+ }
+
+ table->fts->added_synced = true;
+
+ fts_get_docs_clear(cache->get_docs);
+
+func_exit:
+ if (!has_cache_lock) {
+ rw_lock_x_unlock(&cache->lock);
+ }
+
+ if (need_init) {
+ mutex_enter(&dict_sys.mutex);
+ /* Register the table with the optimize thread. */
+ fts_optimize_add_table(table);
+ mutex_exit(&dict_sys.mutex);
+ }
+
+ return(TRUE);
+}
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
new file mode 100644
index 00000000..e3c0f8f5
--- /dev/null
+++ b/storage/innobase/fts/fts0opt.cc
@@ -0,0 +1,3053 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2016, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fts/fts0opt.cc
+Full Text Search optimize thread
+
+Created 2007/03/27 Sunny Bains
+Completed 2011/7/10 Sunny and Jimmy Yang
+
+***********************************************************************/
+
+#include "fts0fts.h"
+#include "row0sel.h"
+#include "que0types.h"
+#include "fts0priv.h"
+#include "fts0types.h"
+#include "ut0wqueue.h"
+#include "srv0start.h"
+#include "ut0list.h"
+#include "zlib.h"
+#include "fts0opt.h"
+
+/** The FTS optimize thread's work queue. */
+ib_wqueue_t* fts_optimize_wq;
+static void fts_optimize_callback(void *);
+static void timer_callback(void*);
+static tpool::timer* timer;
+
+static tpool::task_group task_group(1);
+static tpool::task task(fts_optimize_callback,0, &task_group);
+
+/** FTS optimize thread, for MDL acquisition */
+static THD *fts_opt_thd;
+
+/** The FTS vector to store fts_slot_t */
+static ib_vector_t* fts_slots;
+
+/** Default optimize interval in secs. */
+static const ulint FTS_OPTIMIZE_INTERVAL_IN_SECS = 300;
+
+/** Server is shutting down, so does we exiting the optimize thread */
+static bool fts_opt_start_shutdown = false;
+
+/** Event to wait for shutdown of the optimize thread */
+static os_event_t fts_opt_shutdown_event = NULL;
+
+/** Initial size of nodes in fts_word_t. */
+static const ulint FTS_WORD_NODES_INIT_SIZE = 64;
+
+/** Last time we did check whether system need a sync */
+static time_t last_check_sync_time;
+
+/** FTS optimize thread message types. */
+enum fts_msg_type_t {
+ FTS_MSG_STOP, /*!< Stop optimizing and exit thread */
+
+ FTS_MSG_ADD_TABLE, /*!< Add table to the optimize thread's
+ work queue */
+
+ FTS_MSG_DEL_TABLE, /*!< Remove a table from the optimize
+ threads work queue */
+ FTS_MSG_SYNC_TABLE /*!< Sync fts cache of a table */
+};
+
+/** Compressed list of words that have been read from FTS INDEX
+that needs to be optimized. */
+struct fts_zip_t {
+ lint status; /*!< Status of (un)/zip operation */
+
+ ulint n_words; /*!< Number of words compressed */
+
+ ulint block_sz; /*!< Size of a block in bytes */
+
+ ib_vector_t* blocks; /*!< Vector of compressed blocks */
+
+ ib_alloc_t* heap_alloc; /*!< Heap to use for allocations */
+
+ ulint pos; /*!< Offset into blocks */
+
+ ulint last_big_block; /*!< Offset of last block in the
+ blocks array that is of size
+ block_sz. Blocks beyond this offset
+ are of size FTS_MAX_WORD_LEN */
+
+ z_streamp zp; /*!< ZLib state */
+
+ /*!< The value of the last word read
+ from the FTS INDEX table. This is
+ used to discard duplicates */
+
+ fts_string_t word; /*!< UTF-8 string */
+
+ ulint max_words; /*!< maximum number of words to read
+ in one pase */
+};
+
+/** Prepared statemets used during optimize */
+struct fts_optimize_graph_t {
+ /*!< Delete a word from FTS INDEX */
+ que_t* delete_nodes_graph;
+ /*!< Insert a word into FTS INDEX */
+ que_t* write_nodes_graph;
+ /*!< COMMIT a transaction */
+ que_t* commit_graph;
+ /*!< Read the nodes from FTS_INDEX */
+ que_t* read_nodes_graph;
+};
+
+/** Used by fts_optimize() to store state. */
+struct fts_optimize_t {
+ trx_t* trx; /*!< The transaction used for all SQL */
+
+ ib_alloc_t* self_heap; /*!< Heap to use for allocations */
+
+ char* name_prefix; /*!< FTS table name prefix */
+
+ fts_table_t fts_index_table;/*!< Common table definition */
+
+ /*!< Common table definition */
+ fts_table_t fts_common_table;
+
+ dict_table_t* table; /*!< Table that has to be queried */
+
+ dict_index_t* index; /*!< The FTS index to be optimized */
+
+ fts_doc_ids_t* to_delete; /*!< doc ids to delete, we check against
+ this vector and purge the matching
+ entries during the optimizing
+ process. The vector entries are
+ sorted on doc id */
+
+ ulint del_pos; /*!< Offset within to_delete vector,
+ this is used to keep track of where
+ we are up to in the vector */
+
+ ibool done; /*!< TRUE when optimize finishes */
+
+ ib_vector_t* words; /*!< Word + Nodes read from FTS_INDEX,
+ it contains instances of fts_word_t */
+
+ fts_zip_t* zip; /*!< Words read from the FTS_INDEX */
+
+ fts_optimize_graph_t /*!< Prepared statements used during */
+ graph; /*optimize */
+
+ ulint n_completed; /*!< Number of FTS indexes that have
+ been optimized */
+ ibool del_list_regenerated;
+ /*!< BEING_DELETED list regenarated */
+};
+
+/** Used by the optimize, to keep state during compacting nodes. */
+struct fts_encode_t {
+ doc_id_t src_last_doc_id;/*!< Last doc id read from src node */
+ byte* src_ilist_ptr; /*!< Current ptr within src ilist */
+};
+
+/** We use this information to determine when to start the optimize
+cycle for a table. */
+struct fts_slot_t {
+ /** table, or NULL if the slot is unused */
+ dict_table_t* table;
+
+ /** whether this slot is being processed */
+ bool running;
+
+ ulint added; /*!< Number of doc ids added since the
+ last time this table was optimized */
+
+ ulint deleted; /*!< Number of doc ids deleted since the
+ last time this table was optimized */
+
+ /** time(NULL) of completing fts_optimize_table_bk() */
+ time_t last_run;
+
+ /** time(NULL) of latest successful fts_optimize_table() */
+ time_t completed;
+};
+
+/** A table remove message for the FTS optimize thread. */
+struct fts_msg_del_t {
+ dict_table_t* table; /*!< The table to remove */
+
+ os_event_t event; /*!< Event to synchronize acknowledgement
+ of receipt and processing of the
+ this message by the consumer */
+};
+
+/** The FTS optimize message work queue message type. */
+struct fts_msg_t {
+ fts_msg_type_t type; /*!< Message type */
+
+ void* ptr; /*!< The message contents */
+
+ mem_heap_t* heap; /*!< The heap used to allocate this
+ message, the message consumer will
+ free the heap. */
+};
+
+/** The number of words to read and optimize in a single pass. */
+ulong fts_num_word_optimize;
+
+/** Whether to enable additional FTS diagnostic printout. */
+char fts_enable_diag_print;
+
+/** ZLib compressed block size.*/
+static ulint FTS_ZIP_BLOCK_SIZE = 1024;
+
+/** The amount of time optimizing in a single pass, in seconds. */
+static ulint fts_optimize_time_limit;
+
+/** It's defined in fts0fts.cc */
+extern const char* fts_common_tables[];
+
+/** SQL Statement for changing state of rows to be deleted from FTS Index. */
+static const char* fts_init_delete_sql =
+ "BEGIN\n"
+ "\n"
+ "INSERT INTO $BEING_DELETED\n"
+ "SELECT doc_id FROM $DELETED;\n"
+ "\n"
+ "INSERT INTO $BEING_DELETED_CACHE\n"
+ "SELECT doc_id FROM $DELETED_CACHE;\n";
+
+static const char* fts_delete_doc_ids_sql =
+ "BEGIN\n"
+ "\n"
+ "DELETE FROM $DELETED WHERE doc_id = :doc_id1;\n"
+ "DELETE FROM $DELETED_CACHE WHERE doc_id = :doc_id2;\n";
+
+static const char* fts_end_delete_sql =
+ "BEGIN\n"
+ "\n"
+ "DELETE FROM $BEING_DELETED;\n"
+ "DELETE FROM $BEING_DELETED_CACHE;\n";
+
+/**********************************************************************//**
+Initialize fts_zip_t. */
+static
+void
+fts_zip_initialize(
+/*===============*/
+ fts_zip_t* zip) /*!< out: zip instance to initialize */
+{
+ zip->pos = 0;
+ zip->n_words = 0;
+
+ zip->status = Z_OK;
+
+ zip->last_big_block = 0;
+
+ zip->word.f_len = 0;
+ *zip->word.f_str = 0;
+
+ ib_vector_reset(zip->blocks);
+
+ memset(zip->zp, 0, sizeof(*zip->zp));
+}
+
+/**********************************************************************//**
+Create an instance of fts_zip_t.
+@return a new instance of fts_zip_t */
+static
+fts_zip_t*
+fts_zip_create(
+/*===========*/
+ mem_heap_t* heap, /*!< in: heap */
+ ulint block_sz, /*!< in: size of a zip block.*/
+ ulint max_words) /*!< in: max words to read */
+{
+ fts_zip_t* zip;
+
+ zip = static_cast<fts_zip_t*>(mem_heap_zalloc(heap, sizeof(*zip)));
+
+ zip->word.f_str = static_cast<byte*>(
+ mem_heap_zalloc(heap, FTS_MAX_WORD_LEN + 1));
+
+ zip->block_sz = block_sz;
+
+ zip->heap_alloc = ib_heap_allocator_create(heap);
+
+ zip->blocks = ib_vector_create(zip->heap_alloc, sizeof(void*), 128);
+
+ zip->max_words = max_words;
+
+ zip->zp = static_cast<z_stream*>(
+ mem_heap_zalloc(heap, sizeof(*zip->zp)));
+
+ return(zip);
+}
+
+/**********************************************************************//**
+Initialize an instance of fts_zip_t. */
+static
+void
+fts_zip_init(
+/*=========*/
+
+ fts_zip_t* zip) /*!< in: zip instance to init */
+{
+ memset(zip->zp, 0, sizeof(*zip->zp));
+
+ zip->word.f_len = 0;
+ *zip->word.f_str = '\0';
+}
+
+/**********************************************************************//**
+Create a fts_optimizer_word_t instance.
+@return new instance */
+static
+fts_word_t*
+fts_word_init(
+/*==========*/
+ fts_word_t* word, /*!< in: word to initialize */
+ byte* utf8, /*!< in: UTF-8 string */
+ ulint len) /*!< in: length of string in bytes */
+{
+ mem_heap_t* heap = mem_heap_create(sizeof(fts_node_t));
+
+ memset(word, 0, sizeof(*word));
+
+ word->text.f_len = len;
+ word->text.f_str = static_cast<byte*>(mem_heap_alloc(heap, len + 1));
+
+ /* Need to copy the NUL character too. */
+ memcpy(word->text.f_str, utf8, word->text.f_len);
+ word->text.f_str[word->text.f_len] = 0;
+
+ word->heap_alloc = ib_heap_allocator_create(heap);
+
+ word->nodes = ib_vector_create(
+ word->heap_alloc, sizeof(fts_node_t), FTS_WORD_NODES_INIT_SIZE);
+
+ return(word);
+}
+
+/**********************************************************************//**
+Read the FTS INDEX row.
+@return fts_node_t instance */
+static
+fts_node_t*
+fts_optimize_read_node(
+/*===================*/
+ fts_word_t* word, /*!< in: */
+ que_node_t* exp) /*!< in: */
+{
+ int i;
+ fts_node_t* node = static_cast<fts_node_t*>(
+ ib_vector_push(word->nodes, NULL));
+
+ /* Start from 1 since the first node has been read by the caller */
+ for (i = 1; exp; exp = que_node_get_next(exp), ++i) {
+
+ dfield_t* dfield = que_node_get_val(exp);
+ byte* data = static_cast<byte*>(
+ dfield_get_data(dfield));
+ ulint len = dfield_get_len(dfield);
+
+ ut_a(len != UNIV_SQL_NULL);
+
+ /* Note: The column numbers below must match the SELECT */
+ switch (i) {
+ case 1: /* DOC_COUNT */
+ node->doc_count = mach_read_from_4(data);
+ break;
+
+ case 2: /* FIRST_DOC_ID */
+ node->first_doc_id = fts_read_doc_id(data);
+ break;
+
+ case 3: /* LAST_DOC_ID */
+ node->last_doc_id = fts_read_doc_id(data);
+ break;
+
+ case 4: /* ILIST */
+ node->ilist_size_alloc = node->ilist_size = len;
+ node->ilist = static_cast<byte*>(ut_malloc_nokey(len));
+ memcpy(node->ilist, data, len);
+ break;
+
+ default:
+ ut_error;
+ }
+ }
+
+ /* Make sure all columns were read. */
+ ut_a(i == 5);
+
+ return(node);
+}
+
+/**********************************************************************//**
+Callback function to fetch the rows in an FTS INDEX record.
+@return always returns non-NULL */
+ibool
+fts_optimize_index_fetch_node(
+/*==========================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: pointer to ib_vector_t */
+{
+ fts_word_t* word;
+ sel_node_t* sel_node = static_cast<sel_node_t*>(row);
+ fts_fetch_t* fetch = static_cast<fts_fetch_t*>(user_arg);
+ ib_vector_t* words = static_cast<ib_vector_t*>(fetch->read_arg);
+ que_node_t* exp = sel_node->select_list;
+ dfield_t* dfield = que_node_get_val(exp);
+ void* data = dfield_get_data(dfield);
+ ulint dfield_len = dfield_get_len(dfield);
+ fts_node_t* node;
+ bool is_word_init = false;
+
+ ut_a(dfield_len <= FTS_MAX_WORD_LEN);
+
+ if (ib_vector_size(words) == 0) {
+
+ word = static_cast<fts_word_t*>(ib_vector_push(words, NULL));
+ fts_word_init(word, (byte*) data, dfield_len);
+ is_word_init = true;
+ }
+
+ word = static_cast<fts_word_t*>(ib_vector_last(words));
+
+ if (dfield_len != word->text.f_len
+ || memcmp(word->text.f_str, data, dfield_len)) {
+
+ word = static_cast<fts_word_t*>(ib_vector_push(words, NULL));
+ fts_word_init(word, (byte*) data, dfield_len);
+ is_word_init = true;
+ }
+
+ node = fts_optimize_read_node(word, que_node_get_next(exp));
+
+ fetch->total_memory += node->ilist_size;
+ if (is_word_init) {
+ fetch->total_memory += sizeof(fts_word_t)
+ + sizeof(ib_alloc_t) + sizeof(ib_vector_t) + dfield_len
+ + sizeof(fts_node_t) * FTS_WORD_NODES_INIT_SIZE;
+ } else if (ib_vector_size(words) > FTS_WORD_NODES_INIT_SIZE) {
+ fetch->total_memory += sizeof(fts_node_t);
+ }
+
+ if (fetch->total_memory >= fts_result_cache_limit) {
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Read the rows from the FTS inde.
+@return DB_SUCCESS or error code */
+dberr_t
+fts_index_fetch_nodes(
+/*==================*/
+ trx_t* trx, /*!< in: transaction */
+ que_t** graph, /*!< in: prepared statement */
+ fts_table_t* fts_table, /*!< in: table of the FTS INDEX */
+ const fts_string_t*
+ word, /*!< in: the word to fetch */
+ fts_fetch_t* fetch) /*!< in: fetch callback.*/
+{
+ pars_info_t* info;
+ dberr_t error;
+ char table_name[MAX_FULL_NAME_LEN];
+
+ trx->op_info = "fetching FTS index nodes";
+
+ if (*graph) {
+ info = (*graph)->info;
+ } else {
+ ulint selected;
+
+ info = pars_info_create();
+
+ ut_a(fts_table->type == FTS_INDEX_TABLE);
+
+ selected = fts_select_index(fts_table->charset,
+ word->f_str, word->f_len);
+
+ fts_table->suffix = fts_get_suffix(selected);
+
+ fts_get_table_name(fts_table, table_name);
+
+ pars_info_bind_id(info, true, "table_name", table_name);
+ }
+
+ pars_info_bind_function(info, "my_func", fetch->read_record, fetch);
+ pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
+
+ if (!*graph) {
+
+ *graph = fts_parse_sql(
+ fts_table,
+ info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT word, doc_count, first_doc_id, last_doc_id,"
+ " ilist\n"
+ " FROM $table_name\n"
+ " WHERE word LIKE :word\n"
+ " ORDER BY first_doc_id;\n"
+ "BEGIN\n"
+ "\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+ }
+
+ for (;;) {
+ error = fts_eval_sql(trx, *graph);
+
+ if (UNIV_LIKELY(error == DB_SUCCESS)) {
+ fts_sql_commit(trx);
+
+ break; /* Exit the loop. */
+ } else {
+ fts_sql_rollback(trx);
+
+ if (error == DB_LOCK_WAIT_TIMEOUT) {
+ ib::warn() << "lock wait timeout reading"
+ " FTS index. Retrying!";
+
+ trx->error_state = DB_SUCCESS;
+ } else {
+ ib::error() << "(" << error
+ << ") while reading FTS index.";
+
+ break; /* Exit the loop. */
+ }
+ }
+ }
+
+ return(error);
+}
+
+/**********************************************************************//**
+Read a word */
+static
+byte*
+fts_zip_read_word(
+/*==============*/
+ fts_zip_t* zip, /*!< in: Zip state + data */
+ fts_string_t* word) /*!< out: uncompressed word */
+{
+ short len = 0;
+ void* null = NULL;
+ byte* ptr = word->f_str;
+ int flush = Z_NO_FLUSH;
+
+ /* Either there was an error or we are at the Z_STREAM_END. */
+ if (zip->status != Z_OK) {
+ return(NULL);
+ }
+
+ zip->zp->next_out = reinterpret_cast<byte*>(&len);
+ zip->zp->avail_out = sizeof(len);
+
+ while (zip->status == Z_OK && zip->zp->avail_out > 0) {
+
+ /* Finished decompressing block. */
+ if (zip->zp->avail_in == 0) {
+
+ /* Free the block that's been decompressed. */
+ if (zip->pos > 0) {
+ ulint prev = zip->pos - 1;
+
+ ut_a(zip->pos < ib_vector_size(zip->blocks));
+
+ ut_free(ib_vector_getp(zip->blocks, prev));
+ ib_vector_set(zip->blocks, prev, &null);
+ }
+
+ /* Any more blocks to decompress. */
+ if (zip->pos < ib_vector_size(zip->blocks)) {
+
+ zip->zp->next_in = static_cast<byte*>(
+ ib_vector_getp(
+ zip->blocks, zip->pos));
+
+ if (zip->pos > zip->last_big_block) {
+ zip->zp->avail_in =
+ FTS_MAX_WORD_LEN;
+ } else {
+ zip->zp->avail_in =
+ static_cast<uInt>(zip->block_sz);
+ }
+
+ ++zip->pos;
+ } else {
+ flush = Z_FINISH;
+ }
+ }
+
+ switch (zip->status = inflate(zip->zp, flush)) {
+ case Z_OK:
+ if (zip->zp->avail_out == 0 && len > 0) {
+
+ ut_a(len <= FTS_MAX_WORD_LEN);
+ ptr[len] = 0;
+
+ zip->zp->next_out = ptr;
+ zip->zp->avail_out = uInt(len);
+
+ word->f_len = ulint(len);
+ len = 0;
+ }
+ break;
+
+ case Z_BUF_ERROR: /* No progress possible. */
+ case Z_STREAM_END:
+ inflateEnd(zip->zp);
+ break;
+
+ case Z_STREAM_ERROR:
+ default:
+ ut_error;
+ }
+ }
+
+ /* All blocks must be freed at end of inflate. */
+ if (zip->status != Z_OK) {
+ for (ulint i = 0; i < ib_vector_size(zip->blocks); ++i) {
+ if (ib_vector_getp(zip->blocks, i)) {
+ ut_free(ib_vector_getp(zip->blocks, i));
+ ib_vector_set(zip->blocks, i, &null);
+ }
+ }
+ }
+
+ if (ptr != NULL) {
+ ut_ad(word->f_len == strlen((char*) ptr));
+ }
+
+ return(zip->status == Z_OK || zip->status == Z_STREAM_END ? ptr : NULL);
+}
+
+/**********************************************************************//**
+Callback function to fetch and compress the word in an FTS
+INDEX record.
+@return FALSE on EOF */
+static
+ibool
+fts_fetch_index_words(
+/*==================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: pointer to ib_vector_t */
+{
+ sel_node_t* sel_node = static_cast<sel_node_t*>(row);
+ fts_zip_t* zip = static_cast<fts_zip_t*>(user_arg);
+ que_node_t* exp = sel_node->select_list;
+ dfield_t* dfield = que_node_get_val(exp);
+
+ ut_a(dfield_get_len(dfield) <= FTS_MAX_WORD_LEN);
+
+ uint16 len = uint16(dfield_get_len(dfield));
+ void* data = dfield_get_data(dfield);
+
+ /* Skip the duplicate words. */
+ if (zip->word.f_len == len && !memcmp(zip->word.f_str, data, len)) {
+ return(TRUE);
+ }
+
+ memcpy(zip->word.f_str, data, len);
+ zip->word.f_len = len;
+
+ ut_a(zip->zp->avail_in == 0);
+ ut_a(zip->zp->next_in == NULL);
+
+ /* The string is prefixed by len. */
+ /* FIXME: This is not byte order agnostic (InnoDB data files
+ with FULLTEXT INDEX are not portable between little-endian and
+ big-endian systems!) */
+ zip->zp->next_in = reinterpret_cast<byte*>(&len);
+ zip->zp->avail_in = sizeof(len);
+
+ /* Compress the word, create output blocks as necessary. */
+ while (zip->zp->avail_in > 0) {
+
+ /* No space left in output buffer, create a new one. */
+ if (zip->zp->avail_out == 0) {
+ byte* block;
+
+ block = static_cast<byte*>(
+ ut_malloc_nokey(zip->block_sz));
+
+ ib_vector_push(zip->blocks, &block);
+
+ zip->zp->next_out = block;
+ zip->zp->avail_out = static_cast<uInt>(zip->block_sz);
+ }
+
+ switch (zip->status = deflate(zip->zp, Z_NO_FLUSH)) {
+ case Z_OK:
+ if (zip->zp->avail_in == 0) {
+ zip->zp->next_in = static_cast<byte*>(data);
+ zip->zp->avail_in = uInt(len);
+ ut_a(len <= FTS_MAX_WORD_LEN);
+ len = 0;
+ }
+ continue;
+
+ case Z_STREAM_END:
+ case Z_BUF_ERROR:
+ case Z_STREAM_ERROR:
+ default:
+ ut_error;
+ }
+ }
+
+ /* All data should have been compressed. */
+ ut_a(zip->zp->avail_in == 0);
+ zip->zp->next_in = NULL;
+
+ ++zip->n_words;
+
+ return(zip->n_words >= zip->max_words ? FALSE : TRUE);
+}
+
+/**********************************************************************//**
+Finish Zip deflate. */
+static
+void
+fts_zip_deflate_end(
+/*================*/
+ fts_zip_t* zip) /*!< in: instance that should be closed*/
+{
+ ut_a(zip->zp->avail_in == 0);
+ ut_a(zip->zp->next_in == NULL);
+
+ zip->status = deflate(zip->zp, Z_FINISH);
+
+ ut_a(ib_vector_size(zip->blocks) > 0);
+ zip->last_big_block = ib_vector_size(zip->blocks) - 1;
+
+ /* Allocate smaller block(s), since this is trailing data. */
+ while (zip->status == Z_OK) {
+ byte* block;
+
+ ut_a(zip->zp->avail_out == 0);
+
+ block = static_cast<byte*>(
+ ut_malloc_nokey(FTS_MAX_WORD_LEN + 1));
+
+ ib_vector_push(zip->blocks, &block);
+
+ zip->zp->next_out = block;
+ zip->zp->avail_out = FTS_MAX_WORD_LEN;
+
+ zip->status = deflate(zip->zp, Z_FINISH);
+ }
+
+ ut_a(zip->status == Z_STREAM_END);
+
+ zip->status = deflateEnd(zip->zp);
+ ut_a(zip->status == Z_OK);
+
+ /* Reset the ZLib data structure. */
+ memset(zip->zp, 0, sizeof(*zip->zp));
+}
+
+/**********************************************************************//**
+Read the words from the FTS INDEX.
+@return DB_SUCCESS if all OK, DB_TABLE_NOT_FOUND if no more indexes
+ to search else error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_index_fetch_words(
+/*==================*/
+ fts_optimize_t* optim, /*!< in: optimize scratch pad */
+ const fts_string_t* word, /*!< in: get words greater than this
+ word */
+ ulint n_words)/*!< in: max words to read */
+{
+ pars_info_t* info;
+ que_t* graph;
+ ulint selected;
+ fts_zip_t* zip = NULL;
+ dberr_t error = DB_SUCCESS;
+ mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg);
+ ibool inited = FALSE;
+
+ optim->trx->op_info = "fetching FTS index words";
+
+ if (optim->zip == NULL) {
+ optim->zip = fts_zip_create(heap, FTS_ZIP_BLOCK_SIZE, n_words);
+ } else {
+ fts_zip_initialize(optim->zip);
+ }
+
+ for (selected = fts_select_index(
+ optim->fts_index_table.charset, word->f_str, word->f_len);
+ selected < FTS_NUM_AUX_INDEX;
+ selected++) {
+
+ char table_name[MAX_FULL_NAME_LEN];
+
+ optim->fts_index_table.suffix = fts_get_suffix(selected);
+
+ info = pars_info_create();
+
+ pars_info_bind_function(
+ info, "my_func", fts_fetch_index_words, optim->zip);
+
+ pars_info_bind_varchar_literal(
+ info, "word", word->f_str, word->f_len);
+
+ fts_get_table_name(&optim->fts_index_table, table_name);
+ pars_info_bind_id(info, true, "table_name", table_name);
+
+ graph = fts_parse_sql(
+ &optim->fts_index_table,
+ info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT word\n"
+ " FROM $table_name\n"
+ " WHERE word > :word\n"
+ " ORDER BY word;\n"
+ "BEGIN\n"
+ "\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+
+ zip = optim->zip;
+
+ for (;;) {
+ int err;
+
+ if (!inited && ((err = deflateInit(zip->zp, 9))
+ != Z_OK)) {
+ ib::error() << "ZLib deflateInit() failed: "
+ << err;
+
+ error = DB_ERROR;
+ break;
+ } else {
+ inited = TRUE;
+ error = fts_eval_sql(optim->trx, graph);
+ }
+
+ if (UNIV_LIKELY(error == DB_SUCCESS)) {
+ //FIXME fts_sql_commit(optim->trx);
+ break;
+ } else {
+ //FIXME fts_sql_rollback(optim->trx);
+
+ if (error == DB_LOCK_WAIT_TIMEOUT) {
+ ib::warn() << "Lock wait timeout"
+ " reading document. Retrying!";
+
+ /* We need to reset the ZLib state. */
+ inited = FALSE;
+ deflateEnd(zip->zp);
+ fts_zip_init(zip);
+
+ optim->trx->error_state = DB_SUCCESS;
+ } else {
+ ib::error() << "(" << error
+ << ") while reading document.";
+
+ break; /* Exit the loop. */
+ }
+ }
+ }
+
+ fts_que_graph_free(graph);
+
+ /* Check if max word to fetch is exceeded */
+ if (optim->zip->n_words >= n_words) {
+ break;
+ }
+ }
+
+ if (error == DB_SUCCESS && zip->status == Z_OK && zip->n_words > 0) {
+
+ /* All data should have been read. */
+ ut_a(zip->zp->avail_in == 0);
+
+ fts_zip_deflate_end(zip);
+ } else {
+ deflateEnd(zip->zp);
+ }
+
+ return(error);
+}
+
+/**********************************************************************//**
+Callback function to fetch the doc id from the record.
+@return always returns TRUE */
+static
+ibool
+fts_fetch_doc_ids(
+/*==============*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: pointer to ib_vector_t */
+{
+ que_node_t* exp;
+ int i = 0;
+ sel_node_t* sel_node = static_cast<sel_node_t*>(row);
+ fts_doc_ids_t* fts_doc_ids = static_cast<fts_doc_ids_t*>(user_arg);
+ doc_id_t* update = static_cast<doc_id_t*>(
+ ib_vector_push(fts_doc_ids->doc_ids, NULL));
+
+ for (exp = sel_node->select_list;
+ exp;
+ exp = que_node_get_next(exp), ++i) {
+
+ dfield_t* dfield = que_node_get_val(exp);
+ void* data = dfield_get_data(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ ut_a(len != UNIV_SQL_NULL);
+
+ /* Note: The column numbers below must match the SELECT. */
+ switch (i) {
+ case 0: /* DOC_ID */
+ *update = fts_read_doc_id(
+ static_cast<byte*>(data));
+ break;
+
+ default:
+ ut_error;
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Read the rows from a FTS common auxiliary table.
+@return DB_SUCCESS or error code */
+dberr_t
+fts_table_fetch_doc_ids(
+/*====================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_table_t* fts_table, /*!< in: table */
+ fts_doc_ids_t* doc_ids) /*!< in: For collecting doc ids */
+{
+ dberr_t error;
+ que_t* graph;
+ pars_info_t* info = pars_info_create();
+ ibool alloc_bk_trx = FALSE;
+ char table_name[MAX_FULL_NAME_LEN];
+
+ ut_a(fts_table->suffix != NULL);
+ ut_a(fts_table->type == FTS_COMMON_TABLE);
+
+ if (!trx) {
+ trx = trx_create();
+ alloc_bk_trx = TRUE;
+ }
+
+ trx->op_info = "fetching FTS doc ids";
+
+ pars_info_bind_function(info, "my_func", fts_fetch_doc_ids, doc_ids);
+
+ fts_get_table_name(fts_table, table_name);
+ pars_info_bind_id(info, true, "table_name", table_name);
+
+ graph = fts_parse_sql(
+ fts_table,
+ info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT doc_id FROM $table_name;\n"
+ "BEGIN\n"
+ "\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+
+ error = fts_eval_sql(trx, graph);
+ fts_sql_commit(trx);
+
+ mutex_enter(&dict_sys.mutex);
+ que_graph_free(graph);
+ mutex_exit(&dict_sys.mutex);
+
+ if (error == DB_SUCCESS) {
+ ib_vector_sort(doc_ids->doc_ids, fts_doc_id_cmp);
+ }
+
+ if (alloc_bk_trx) {
+ trx->free();
+ }
+
+ return(error);
+}
+
+/**********************************************************************//**
+Do a binary search for a doc id in the array
+@return +ve index if found -ve index where it should be inserted
+ if not found */
+int
+fts_bsearch(
+/*========*/
+ doc_id_t* array, /*!< in: array to sort */
+ int lower, /*!< in: the array lower bound */
+ int upper, /*!< in: the array upper bound */
+ doc_id_t doc_id) /*!< in: the doc id to search for */
+{
+ int orig_size = upper;
+
+ if (upper == 0) {
+ /* Nothing to search */
+ return(-1);
+ } else {
+ while (lower < upper) {
+ int i = (lower + upper) >> 1;
+
+ if (doc_id > array[i]) {
+ lower = i + 1;
+ } else if (doc_id < array[i]) {
+ upper = i - 1;
+ } else {
+ return(i); /* Found. */
+ }
+ }
+ }
+
+ if (lower == upper && lower < orig_size) {
+ if (doc_id == array[lower]) {
+ return(lower);
+ } else if (lower == 0) {
+ return(-1);
+ }
+ }
+
+ /* Not found. */
+ return( (lower == 0) ? -1 : -(lower));
+}
+
+/**********************************************************************//**
+Search in the to delete array whether any of the doc ids within
+the [first, last] range are to be deleted
+@return +ve index if found -ve index where it should be inserted
+ if not found */
+static
+int
+fts_optimize_lookup(
+/*================*/
+ ib_vector_t* doc_ids, /*!< in: array to search */
+ ulint lower, /*!< in: lower limit of array */
+ doc_id_t first_doc_id, /*!< in: doc id to lookup */
+ doc_id_t last_doc_id) /*!< in: doc id to lookup */
+{
+ int pos;
+ int upper = static_cast<int>(ib_vector_size(doc_ids));
+ doc_id_t* array = (doc_id_t*) doc_ids->data;
+
+ pos = fts_bsearch(array, static_cast<int>(lower), upper, first_doc_id);
+
+ ut_a(abs(pos) <= upper + 1);
+
+ if (pos < 0) {
+
+ int i = abs(pos);
+
+ /* If i is 1, it could be first_doc_id is less than
+ either the first or second array item, do a
+ double check */
+ if (i == 1 && array[0] <= last_doc_id
+ && first_doc_id < array[0]) {
+ pos = 0;
+ } else if (i < upper && array[i] <= last_doc_id) {
+
+ /* Check if the "next" doc id is within the
+ first & last doc id of the node. */
+ pos = i;
+ }
+ }
+
+ return(pos);
+}
+
+/**********************************************************************//**
+Encode the word pos list into the node
+@return DB_SUCCESS or error code*/
+static MY_ATTRIBUTE((nonnull))
+dberr_t
+fts_optimize_encode_node(
+/*=====================*/
+ fts_node_t* node, /*!< in: node to fill*/
+ doc_id_t doc_id, /*!< in: doc id to encode */
+ fts_encode_t* enc) /*!< in: encoding state.*/
+{
+ byte* dst;
+ ulint enc_len;
+ ulint pos_enc_len;
+ doc_id_t doc_id_delta;
+ dberr_t error = DB_SUCCESS;
+ byte* src = enc->src_ilist_ptr;
+
+ if (node->first_doc_id == 0) {
+ ut_a(node->last_doc_id == 0);
+
+ node->first_doc_id = doc_id;
+ }
+
+ /* Calculate the space required to store the ilist. */
+ ut_ad(doc_id > node->last_doc_id);
+ doc_id_delta = doc_id - node->last_doc_id;
+ enc_len = fts_get_encoded_len(static_cast<ulint>(doc_id_delta));
+
+ /* Calculate the size of the encoded pos array. */
+ while (*src) {
+ fts_decode_vlc(&src);
+ }
+
+ /* Skip the 0x00 byte at the end of the word positions list. */
+ ++src;
+
+ /* Number of encoded pos bytes to copy. */
+ pos_enc_len = ulint(src - enc->src_ilist_ptr);
+
+ /* Total number of bytes required for copy. */
+ enc_len += pos_enc_len;
+
+ /* Check we have enough space in the destination buffer for
+ copying the document word list. */
+ if (!node->ilist) {
+ ulint new_size;
+
+ ut_a(node->ilist_size == 0);
+
+ new_size = enc_len > FTS_ILIST_MAX_SIZE
+ ? enc_len : FTS_ILIST_MAX_SIZE;
+
+ node->ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
+ node->ilist_size_alloc = new_size;
+
+ } else if ((node->ilist_size + enc_len) > node->ilist_size_alloc) {
+ ulint new_size = node->ilist_size + enc_len;
+ byte* ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
+
+ memcpy(ilist, node->ilist, node->ilist_size);
+
+ ut_free(node->ilist);
+
+ node->ilist = ilist;
+ node->ilist_size_alloc = new_size;
+ }
+
+ src = enc->src_ilist_ptr;
+ dst = node->ilist + node->ilist_size;
+
+ /* Encode the doc id. Cast to ulint, the delta should be small and
+ therefore no loss of precision. */
+ dst += fts_encode_int((ulint) doc_id_delta, dst);
+
+ /* Copy the encoded pos array. */
+ memcpy(dst, src, pos_enc_len);
+
+ node->last_doc_id = doc_id;
+
+ /* Data copied upto here. */
+ node->ilist_size += enc_len;
+ enc->src_ilist_ptr += pos_enc_len;
+
+ ut_a(node->ilist_size <= node->ilist_size_alloc);
+
+ return(error);
+}
+
+/**********************************************************************//**
+Optimize the data contained in a node.
+@return DB_SUCCESS or error code*/
+static MY_ATTRIBUTE((nonnull))
+dberr_t
+fts_optimize_node(
+/*==============*/
+ ib_vector_t* del_vec, /*!< in: vector of doc ids to delete*/
+ int* del_pos, /*!< in: offset into above vector */
+ fts_node_t* dst_node, /*!< in: node to fill*/
+ fts_node_t* src_node, /*!< in: source node for data*/
+ fts_encode_t* enc) /*!< in: encoding state */
+{
+ ulint copied;
+ dberr_t error = DB_SUCCESS;
+ doc_id_t doc_id = enc->src_last_doc_id;
+
+ if (!enc->src_ilist_ptr) {
+ enc->src_ilist_ptr = src_node->ilist;
+ }
+
+ copied = ulint(enc->src_ilist_ptr - src_node->ilist);
+
+ /* While there is data in the source node and space to copy
+ into in the destination node. */
+ while (copied < src_node->ilist_size
+ && dst_node->ilist_size < FTS_ILIST_MAX_SIZE) {
+
+ doc_id_t delta;
+ doc_id_t del_doc_id = FTS_NULL_DOC_ID;
+
+ delta = fts_decode_vlc(&enc->src_ilist_ptr);
+
+test_again:
+ /* Check whether the doc id is in the delete list, if
+ so then we skip the entries but we need to track the
+ delta for decoding the entries following this document's
+ entries. */
+ if (*del_pos >= 0 && *del_pos < (int) ib_vector_size(del_vec)) {
+ doc_id_t* update;
+
+ update = (doc_id_t*) ib_vector_get(
+ del_vec, ulint(*del_pos));
+
+ del_doc_id = *update;
+ }
+
+ if (enc->src_ilist_ptr == src_node->ilist && doc_id == 0) {
+ ut_a(delta == src_node->first_doc_id);
+ }
+
+ doc_id += delta;
+
+ if (del_doc_id > 0 && doc_id == del_doc_id) {
+
+ ++*del_pos;
+
+ /* Skip the entries for this document. */
+ while (*enc->src_ilist_ptr) {
+ fts_decode_vlc(&enc->src_ilist_ptr);
+ }
+
+ /* Skip the end of word position marker. */
+ ++enc->src_ilist_ptr;
+
+ } else {
+
+ /* DOC ID already becomes larger than
+ del_doc_id, check the next del_doc_id */
+ if (del_doc_id > 0 && doc_id > del_doc_id) {
+ del_doc_id = 0;
+ ++*del_pos;
+ delta = 0;
+ goto test_again;
+ }
+
+ /* Decode and copy the word positions into
+ the dest node. */
+ fts_optimize_encode_node(dst_node, doc_id, enc);
+
+ ++dst_node->doc_count;
+
+ ut_a(dst_node->last_doc_id == doc_id);
+ }
+
+ /* Bytes copied so for from source. */
+ copied = ulint(enc->src_ilist_ptr - src_node->ilist);
+ }
+
+ if (copied >= src_node->ilist_size) {
+ ut_a(doc_id == src_node->last_doc_id);
+ }
+
+ enc->src_last_doc_id = doc_id;
+
+ return(error);
+}
+
+/**********************************************************************//**
+Determine the starting pos within the deleted doc id vector for a word.
+@return delete position */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+int
+fts_optimize_deleted_pos(
+/*=====================*/
+ fts_optimize_t* optim, /*!< in: optimize state data */
+ fts_word_t* word) /*!< in: the word data to check */
+{
+ int del_pos;
+ ib_vector_t* del_vec = optim->to_delete->doc_ids;
+
+ /* Get the first and last dict ids for the word, we will use
+ these values to determine which doc ids need to be removed
+ when we coalesce the nodes. This way we can reduce the numer
+ of elements that need to be searched in the deleted doc ids
+ vector and secondly we can remove the doc ids during the
+ coalescing phase. */
+ if (ib_vector_size(del_vec) > 0) {
+ fts_node_t* node;
+ doc_id_t last_id;
+ doc_id_t first_id;
+ ulint size = ib_vector_size(word->nodes);
+
+ node = (fts_node_t*) ib_vector_get(word->nodes, 0);
+ first_id = node->first_doc_id;
+
+ node = (fts_node_t*) ib_vector_get(word->nodes, size - 1);
+ last_id = node->last_doc_id;
+
+ ut_a(first_id <= last_id);
+
+ del_pos = fts_optimize_lookup(
+ del_vec, optim->del_pos, first_id, last_id);
+ } else {
+
+ del_pos = -1; /* Note that there is nothing to delete. */
+ }
+
+ return(del_pos);
+}
+
+#define FTS_DEBUG_PRINT
+/**********************************************************************//**
+Compact the nodes for a word, we also remove any doc ids during the
+compaction pass.
+@return DB_SUCCESS or error code.*/
+static
+ib_vector_t*
+fts_optimize_word(
+/*==============*/
+ fts_optimize_t* optim, /*!< in: optimize state data */
+ fts_word_t* word) /*!< in: the word to optimize */
+{
+ fts_encode_t enc;
+ ib_vector_t* nodes;
+ ulint i = 0;
+ int del_pos;
+ fts_node_t* dst_node = NULL;
+ ib_vector_t* del_vec = optim->to_delete->doc_ids;
+ ulint size = ib_vector_size(word->nodes);
+
+ del_pos = fts_optimize_deleted_pos(optim, word);
+ nodes = ib_vector_create(word->heap_alloc, sizeof(*dst_node), 128);
+
+ enc.src_last_doc_id = 0;
+ enc.src_ilist_ptr = NULL;
+
+ while (i < size) {
+ ulint copied;
+ fts_node_t* src_node;
+
+ src_node = (fts_node_t*) ib_vector_get(word->nodes, i);
+
+ if (dst_node == NULL
+ || dst_node->last_doc_id > src_node->first_doc_id) {
+
+ dst_node = static_cast<fts_node_t*>(
+ ib_vector_push(nodes, NULL));
+ memset(dst_node, 0, sizeof(*dst_node));
+ }
+
+ /* Copy from the src to the dst node. */
+ fts_optimize_node(del_vec, &del_pos, dst_node, src_node, &enc);
+
+ ut_a(enc.src_ilist_ptr != NULL);
+
+ /* Determine the numer of bytes copied to dst_node. */
+ copied = ulint(enc.src_ilist_ptr - src_node->ilist);
+
+ /* Can't copy more than whats in the vlc array. */
+ ut_a(copied <= src_node->ilist_size);
+
+ /* We are done with this node release the resources. */
+ if (copied == src_node->ilist_size) {
+
+ enc.src_last_doc_id = 0;
+ enc.src_ilist_ptr = NULL;
+
+ ut_free(src_node->ilist);
+
+ src_node->ilist = NULL;
+ src_node->ilist_size = src_node->ilist_size_alloc = 0;
+
+ src_node = NULL;
+
+ ++i; /* Get next source node to OPTIMIZE. */
+ }
+
+ if (dst_node->ilist_size >= FTS_ILIST_MAX_SIZE || i >= size) {
+
+ dst_node = NULL;
+ }
+ }
+
+ /* All dst nodes created should have been added to the vector. */
+ ut_a(dst_node == NULL);
+
+ /* Return the OPTIMIZED nodes. */
+ return(nodes);
+}
+
+/**********************************************************************//**
+Update the FTS index table. This is a delete followed by an insert.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_write_word(
+/*====================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_table_t* fts_table, /*!< in: table of FTS index */
+ fts_string_t* word, /*!< in: word data to write */
+ ib_vector_t* nodes) /*!< in: the nodes to write */
+{
+ ulint i;
+ pars_info_t* info;
+ que_t* graph;
+ ulint selected;
+ dberr_t error = DB_SUCCESS;
+ char table_name[MAX_FULL_NAME_LEN];
+
+ info = pars_info_create();
+
+ ut_ad(fts_table->charset);
+
+ pars_info_bind_varchar_literal(
+ info, "word", word->f_str, word->f_len);
+
+ selected = fts_select_index(fts_table->charset,
+ word->f_str, word->f_len);
+
+ fts_table->suffix = fts_get_suffix(selected);
+ fts_get_table_name(fts_table, table_name);
+ pars_info_bind_id(info, true, "table_name", table_name);
+
+ graph = fts_parse_sql(
+ fts_table,
+ info,
+ "BEGIN DELETE FROM $table_name WHERE word = :word;");
+
+ error = fts_eval_sql(trx, graph);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "(" << error << ") during optimize,"
+ " when deleting a word from the FTS index.";
+ }
+
+ fts_que_graph_free(graph);
+ graph = NULL;
+
+ /* Even if the operation needs to be rolled back and redone,
+ we iterate over the nodes in order to free the ilist. */
+ for (i = 0; i < ib_vector_size(nodes); ++i) {
+
+ fts_node_t* node = (fts_node_t*) ib_vector_get(nodes, i);
+
+ if (error == DB_SUCCESS) {
+ /* Skip empty node. */
+ if (node->ilist == NULL) {
+ ut_ad(node->ilist_size == 0);
+ continue;
+ }
+
+ error = fts_write_node(
+ trx, &graph, fts_table, word, node);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "(" << error << ")"
+ " during optimize, while adding a"
+ " word to the FTS index.";
+ }
+ }
+
+ ut_free(node->ilist);
+ node->ilist = NULL;
+ node->ilist_size = node->ilist_size_alloc = 0;
+ }
+
+ if (graph != NULL) {
+ fts_que_graph_free(graph);
+ }
+
+ return(error);
+}
+
+/**********************************************************************//**
+Free fts_optimizer_word_t instanace.*/
+void
+fts_word_free(
+/*==========*/
+ fts_word_t* word) /*!< in: instance to free.*/
+{
+ mem_heap_t* heap = static_cast<mem_heap_t*>(word->heap_alloc->arg);
+
+#ifdef UNIV_DEBUG
+ memset(word, 0, sizeof(*word));
+#endif /* UNIV_DEBUG */
+
+ mem_heap_free(heap);
+}
+
+/**********************************************************************//**
+Optimize the word ilist and rewrite data to the FTS index.
+@return status one of RESTART, EXIT, ERROR */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_compact(
+/*=================*/
+ fts_optimize_t* optim, /*!< in: optimize state data */
+ dict_index_t* index, /*!< in: current FTS being optimized */
+ time_t start_time) /*!< in: optimize start time */
+{
+ ulint i;
+ dberr_t error = DB_SUCCESS;
+ ulint size = ib_vector_size(optim->words);
+
+ for (i = 0; i < size && error == DB_SUCCESS && !optim->done; ++i) {
+ fts_word_t* word;
+ ib_vector_t* nodes;
+ trx_t* trx = optim->trx;
+
+ word = (fts_word_t*) ib_vector_get(optim->words, i);
+
+ /* nodes is allocated from the word heap and will be destroyed
+ when the word is freed. We however have to be careful about
+ the ilist, that needs to be freed explicitly. */
+ nodes = fts_optimize_word(optim, word);
+
+ /* Update the data on disk. */
+ error = fts_optimize_write_word(
+ trx, &optim->fts_index_table, &word->text, nodes);
+
+ if (error == DB_SUCCESS) {
+ /* Write the last word optimized to the config table,
+ we use this value for restarting optimize. */
+ error = fts_config_set_index_value(
+ optim->trx, index,
+ FTS_LAST_OPTIMIZED_WORD, &word->text);
+ }
+
+ /* Free the word that was optimized. */
+ fts_word_free(word);
+
+ ulint interval = ulint(time(NULL) - start_time);
+
+ if (fts_optimize_time_limit > 0
+ && (lint(interval) < 0
+ || interval > fts_optimize_time_limit)) {
+
+ optim->done = TRUE;
+ }
+ }
+
+ return(error);
+}
+
+/**********************************************************************//**
+Create an instance of fts_optimize_t. Also create a new
+background transaction.*/
+static
+fts_optimize_t*
+fts_optimize_create(
+/*================*/
+ dict_table_t* table) /*!< in: table with FTS indexes */
+{
+ fts_optimize_t* optim;
+ mem_heap_t* heap = mem_heap_create(128);
+
+ optim = (fts_optimize_t*) mem_heap_zalloc(heap, sizeof(*optim));
+
+ optim->self_heap = ib_heap_allocator_create(heap);
+
+ optim->to_delete = fts_doc_ids_create();
+
+ optim->words = ib_vector_create(
+ optim->self_heap, sizeof(fts_word_t), 256);
+
+ optim->table = table;
+
+ optim->trx = trx_create();
+ trx_start_internal(optim->trx);
+
+ optim->fts_common_table.table_id = table->id;
+ optim->fts_common_table.type = FTS_COMMON_TABLE;
+ optim->fts_common_table.table = table;
+
+ optim->fts_index_table.table_id = table->id;
+ optim->fts_index_table.type = FTS_INDEX_TABLE;
+ optim->fts_index_table.table = table;
+
+ /* The common prefix for all this parent table's aux tables. */
+ optim->name_prefix = fts_get_table_name_prefix(
+ &optim->fts_common_table);
+
+ return(optim);
+}
+
+#ifdef FTS_OPTIMIZE_DEBUG
+/**********************************************************************//**
+Get optimize start time of an FTS index.
+@return DB_SUCCESS if all OK else error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_get_index_start_time(
+/*==============================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS index */
+ time_t* start_time) /*!< out: time in secs */
+{
+ return(fts_config_get_index_ulint(
+ trx, index, FTS_OPTIMIZE_START_TIME,
+ (ulint*) start_time));
+}
+
+/**********************************************************************//**
+Set the optimize start time of an FTS index.
+@return DB_SUCCESS if all OK else error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_set_index_start_time(
+/*==============================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS index */
+ time_t start_time) /*!< in: start time */
+{
+ return(fts_config_set_index_ulint(
+ trx, index, FTS_OPTIMIZE_START_TIME,
+ (ulint) start_time));
+}
+
+/**********************************************************************//**
+Get optimize end time of an FTS index.
+@return DB_SUCCESS if all OK else error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_get_index_end_time(
+/*============================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS index */
+ time_t* end_time) /*!< out: time in secs */
+{
+ return(fts_config_get_index_ulint(
+ trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time));
+}
+
+/**********************************************************************//**
+Set the optimize end time of an FTS index.
+@return DB_SUCCESS if all OK else error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_set_index_end_time(
+/*============================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS index */
+ time_t end_time) /*!< in: end time */
+{
+ return(fts_config_set_index_ulint(
+ trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time));
+}
+#endif
+
+/**********************************************************************//**
+Free the optimize prepared statements.*/
+static
+void
+fts_optimize_graph_free(
+/*====================*/
+ fts_optimize_graph_t* graph) /*!< in/out: The graph instances
+ to free */
+{
+ if (graph->commit_graph) {
+ que_graph_free(graph->commit_graph);
+ graph->commit_graph = NULL;
+ }
+
+ if (graph->write_nodes_graph) {
+ que_graph_free(graph->write_nodes_graph);
+ graph->write_nodes_graph = NULL;
+ }
+
+ if (graph->delete_nodes_graph) {
+ que_graph_free(graph->delete_nodes_graph);
+ graph->delete_nodes_graph = NULL;
+ }
+
+ if (graph->read_nodes_graph) {
+ que_graph_free(graph->read_nodes_graph);
+ graph->read_nodes_graph = NULL;
+ }
+}
+
+/**********************************************************************//**
+Free all optimize resources. */
+static
+void
+fts_optimize_free(
+/*==============*/
+ fts_optimize_t* optim) /*!< in: table with on FTS index */
+{
+ mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg);
+
+ trx_commit_for_mysql(optim->trx);
+ optim->trx->free();
+ optim->trx = NULL;
+
+ fts_doc_ids_free(optim->to_delete);
+ fts_optimize_graph_free(&optim->graph);
+
+ ut_free(optim->name_prefix);
+
+ /* This will free the heap from which optim itself was allocated. */
+ mem_heap_free(heap);
+}
+
+/**********************************************************************//**
+Get the max time optimize should run in millisecs.
+@return max optimize time limit in millisecs. */
+static
+ulint
+fts_optimize_get_time_limit(
+/*========================*/
+ trx_t* trx, /*!< in: transaction */
+ fts_table_t* fts_table) /*!< in: aux table */
+{
+ ulint time_limit = 0;
+
+ fts_config_get_ulint(
+ trx, fts_table,
+ FTS_OPTIMIZE_LIMIT_IN_SECS, &time_limit);
+
+ /* FIXME: This is returning milliseconds, while the variable
+ is being stored and interpreted as seconds! */
+ return(time_limit * 1000);
+}
+
+/**********************************************************************//**
+Run OPTIMIZE on the given table. Note: this can take a very long time
+(hours). */
+static
+void
+fts_optimize_words(
+/*===============*/
+ fts_optimize_t* optim, /*!< in: optimize instance */
+ dict_index_t* index, /*!< in: current FTS being optimized */
+ fts_string_t* word) /*!< in: the starting word to optimize */
+{
+ fts_fetch_t fetch;
+ que_t* graph = NULL;
+ CHARSET_INFO* charset = optim->fts_index_table.charset;
+
+ ut_a(!optim->done);
+
+ /* Get the time limit from the config table. */
+ fts_optimize_time_limit = fts_optimize_get_time_limit(
+ optim->trx, &optim->fts_common_table);
+
+ const time_t start_time = time(NULL);
+
+ /* Setup the callback to use for fetching the word ilist etc. */
+ fetch.read_arg = optim->words;
+ fetch.read_record = fts_optimize_index_fetch_node;
+
+ while (!optim->done) {
+ dberr_t error;
+ trx_t* trx = optim->trx;
+ ulint selected;
+
+ ut_a(ib_vector_size(optim->words) == 0);
+
+ selected = fts_select_index(charset, word->f_str, word->f_len);
+
+ /* Read the index records to optimize. */
+ fetch.total_memory = 0;
+ error = fts_index_fetch_nodes(
+ trx, &graph, &optim->fts_index_table, word,
+ &fetch);
+ ut_ad(fetch.total_memory < fts_result_cache_limit);
+
+ if (error == DB_SUCCESS) {
+ /* There must be some nodes to read. */
+ ut_a(ib_vector_size(optim->words) > 0);
+
+ /* Optimize the nodes that were read and write
+ back to DB. */
+ error = fts_optimize_compact(optim, index, start_time);
+
+ if (error == DB_SUCCESS) {
+ fts_sql_commit(optim->trx);
+ } else {
+ fts_sql_rollback(optim->trx);
+ }
+ }
+
+ ib_vector_reset(optim->words);
+
+ if (error == DB_SUCCESS) {
+ if (!optim->done) {
+ if (!fts_zip_read_word(optim->zip, word)) {
+ optim->done = TRUE;
+ } else if (selected
+ != fts_select_index(
+ charset, word->f_str,
+ word->f_len)
+ && graph) {
+ fts_que_graph_free(graph);
+ graph = NULL;
+ }
+ }
+ } else if (error == DB_LOCK_WAIT_TIMEOUT) {
+ ib::warn() << "Lock wait timeout during optimize."
+ " Retrying!";
+
+ trx->error_state = DB_SUCCESS;
+ } else if (error == DB_DEADLOCK) {
+ ib::warn() << "Deadlock during optimize. Retrying!";
+
+ trx->error_state = DB_SUCCESS;
+ } else {
+ optim->done = TRUE; /* Exit the loop. */
+ }
+ }
+
+ if (graph != NULL) {
+ fts_que_graph_free(graph);
+ }
+}
+
+/**********************************************************************//**
+Optimize is complete. Set the completion time, and reset the optimize
+start string for this FTS index to "".
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_index_completed(
+/*=========================*/
+ fts_optimize_t* optim, /*!< in: optimize instance */
+ dict_index_t* index) /*!< in: table with one FTS index */
+{
+ fts_string_t word;
+ dberr_t error;
+ byte buf[sizeof(ulint)];
+#ifdef FTS_OPTIMIZE_DEBUG
+ time_t end_time = time(NULL);
+
+ error = fts_optimize_set_index_end_time(optim->trx, index, end_time);
+#endif
+
+ /* If we've reached the end of the index then set the start
+ word to the empty string. */
+
+ word.f_len = 0;
+ word.f_str = buf;
+ *word.f_str = '\0';
+
+ error = fts_config_set_index_value(
+ optim->trx, index, FTS_LAST_OPTIMIZED_WORD, &word);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "(" << error << ") while updating"
+ " last optimized word!";
+ }
+
+ return(error);
+}
+
+
+/**********************************************************************//**
+Read the list of words from the FTS auxiliary index that will be
+optimized in this pass.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_index_read_words(
+/*==========================*/
+ fts_optimize_t* optim, /*!< in: optimize instance */
+ dict_index_t* index, /*!< in: table with one FTS index */
+ fts_string_t* word) /*!< in: buffer to use */
+{
+ dberr_t error = DB_SUCCESS;
+
+ if (optim->del_list_regenerated) {
+ word->f_len = 0;
+ } else {
+
+ /* Get the last word that was optimized from
+ the config table. */
+ error = fts_config_get_index_value(
+ optim->trx, index, FTS_LAST_OPTIMIZED_WORD, word);
+ }
+
+ /* If record not found then we start from the top. */
+ if (error == DB_RECORD_NOT_FOUND) {
+ word->f_len = 0;
+ error = DB_SUCCESS;
+ }
+
+ while (error == DB_SUCCESS) {
+
+ error = fts_index_fetch_words(
+ optim, word, fts_num_word_optimize);
+
+ if (error == DB_SUCCESS) {
+ /* Reset the last optimized word to '' if no
+ more words could be read from the FTS index. */
+ if (optim->zip->n_words == 0) {
+ word->f_len = 0;
+ *word->f_str = 0;
+ }
+
+ break;
+ }
+ }
+
+ return(error);
+}
+
+/**********************************************************************//**
+Run OPTIMIZE on the given FTS index. Note: this can take a very long
+time (hours).
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_index(
+/*===============*/
+ fts_optimize_t* optim, /*!< in: optimize instance */
+ dict_index_t* index) /*!< in: table with one FTS index */
+{
+ fts_string_t word;
+ dberr_t error;
+ byte str[FTS_MAX_WORD_LEN + 1];
+
+ /* Set the current index that we have to optimize. */
+ optim->fts_index_table.index_id = index->id;
+ optim->fts_index_table.charset = fts_index_get_charset(index);
+
+ optim->done = FALSE; /* Optimize until !done */
+
+ /* We need to read the last word optimized so that we start from
+ the next word. */
+ word.f_str = str;
+
+ /* We set the length of word to the size of str since we
+ need to pass the max len info to the fts_get_config_value() function. */
+ word.f_len = sizeof(str) - 1;
+
+ memset(word.f_str, 0x0, word.f_len);
+
+ /* Read the words that will be optimized in this pass. */
+ error = fts_optimize_index_read_words(optim, index, &word);
+
+ if (error == DB_SUCCESS) {
+ int zip_error;
+
+ ut_a(optim->zip->pos == 0);
+ ut_a(optim->zip->zp->total_in == 0);
+ ut_a(optim->zip->zp->total_out == 0);
+
+ zip_error = inflateInit(optim->zip->zp);
+ ut_a(zip_error == Z_OK);
+
+ word.f_len = 0;
+ word.f_str = str;
+
+ /* Read the first word to optimize from the Zip buffer. */
+ if (!fts_zip_read_word(optim->zip, &word)) {
+
+ optim->done = TRUE;
+ } else {
+ fts_optimize_words(optim, index, &word);
+ }
+
+ /* If we couldn't read any records then optimize is
+ complete. Increment the number of indexes that have
+ been optimized and set FTS index optimize state to
+ completed. */
+ if (error == DB_SUCCESS && optim->zip->n_words == 0) {
+
+ error = fts_optimize_index_completed(optim, index);
+
+ if (error == DB_SUCCESS) {
+ ++optim->n_completed;
+ }
+ }
+ }
+
+ return(error);
+}
+
+/**********************************************************************//**
+Delete the document ids in the delete, and delete cache tables.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_purge_deleted_doc_ids(
+/*===============================*/
+ fts_optimize_t* optim) /*!< in: optimize instance */
+{
+ ulint i;
+ pars_info_t* info;
+ que_t* graph;
+ doc_id_t* update;
+ doc_id_t write_doc_id;
+ dberr_t error = DB_SUCCESS;
+ char deleted[MAX_FULL_NAME_LEN];
+ char deleted_cache[MAX_FULL_NAME_LEN];
+
+ info = pars_info_create();
+
+ ut_a(ib_vector_size(optim->to_delete->doc_ids) > 0);
+
+ update = static_cast<doc_id_t*>(
+ ib_vector_get(optim->to_delete->doc_ids, 0));
+
+ /* Convert to "storage" byte order. */
+ fts_write_doc_id((byte*) &write_doc_id, *update);
+
+ /* This is required for the SQL parser to work. It must be able
+ to find the following variables. So we do it twice. */
+ fts_bind_doc_id(info, "doc_id1", &write_doc_id);
+ fts_bind_doc_id(info, "doc_id2", &write_doc_id);
+
+ /* Make sure the following two names are consistent with the name
+ used in the fts_delete_doc_ids_sql */
+ optim->fts_common_table.suffix = fts_common_tables[3];
+ fts_get_table_name(&optim->fts_common_table, deleted);
+ pars_info_bind_id(info, true, fts_common_tables[3], deleted);
+
+ optim->fts_common_table.suffix = fts_common_tables[4];
+ fts_get_table_name(&optim->fts_common_table, deleted_cache);
+ pars_info_bind_id(info, true, fts_common_tables[4], deleted_cache);
+
+ graph = fts_parse_sql(NULL, info, fts_delete_doc_ids_sql);
+
+ /* Delete the doc ids that were copied at the start. */
+ for (i = 0; i < ib_vector_size(optim->to_delete->doc_ids); ++i) {
+
+ update = static_cast<doc_id_t*>(ib_vector_get(
+ optim->to_delete->doc_ids, i));
+
+ /* Convert to "storage" byte order. */
+ fts_write_doc_id((byte*) &write_doc_id, *update);
+
+ fts_bind_doc_id(info, "doc_id1", &write_doc_id);
+
+ fts_bind_doc_id(info, "doc_id2", &write_doc_id);
+
+ error = fts_eval_sql(optim->trx, graph);
+
+ // FIXME: Check whether delete actually succeeded!
+ if (error != DB_SUCCESS) {
+
+ fts_sql_rollback(optim->trx);
+ break;
+ }
+ }
+
+ fts_que_graph_free(graph);
+
+ return(error);
+}
+
+/**********************************************************************//**
+Delete the document ids in the pending delete, and delete tables.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_purge_deleted_doc_id_snapshot(
+/*=======================================*/
+ fts_optimize_t* optim) /*!< in: optimize instance */
+{
+ dberr_t error;
+ que_t* graph;
+ pars_info_t* info;
+ char being_deleted[MAX_FULL_NAME_LEN];
+ char being_deleted_cache[MAX_FULL_NAME_LEN];
+
+ info = pars_info_create();
+
+ /* Make sure the following two names are consistent with the name
+ used in the fts_end_delete_sql */
+ optim->fts_common_table.suffix = fts_common_tables[0];
+ fts_get_table_name(&optim->fts_common_table, being_deleted);
+ pars_info_bind_id(info, true, fts_common_tables[0], being_deleted);
+
+ optim->fts_common_table.suffix = fts_common_tables[1];
+ fts_get_table_name(&optim->fts_common_table, being_deleted_cache);
+ pars_info_bind_id(info, true, fts_common_tables[1],
+ being_deleted_cache);
+
+ /* Delete the doc ids that were copied to delete pending state at
+ the start of optimize. */
+ graph = fts_parse_sql(NULL, info, fts_end_delete_sql);
+
+ error = fts_eval_sql(optim->trx, graph);
+ fts_que_graph_free(graph);
+
+ return(error);
+}
+
+/**********************************************************************//**
+Copy the deleted doc ids that will be purged during this optimize run
+to the being deleted FTS auxiliary tables. The transaction is committed
+upon successfull copy and rolled back on DB_DUPLICATE_KEY error.
+@return DB_SUCCESS if all OK */
+static
+ulint
+fts_optimize_being_deleted_count(
+/*=============================*/
+ fts_optimize_t* optim) /*!< in: optimize instance */
+{
+ fts_table_t fts_table;
+
+ FTS_INIT_FTS_TABLE(&fts_table, "BEING_DELETED", FTS_COMMON_TABLE,
+ optim->table);
+
+ return(fts_get_rows_count(&fts_table));
+}
+
+/*********************************************************************//**
+Copy the deleted doc ids that will be purged during this optimize run
+to the being deleted FTS auxiliary tables. The transaction is committed
+upon successfull copy and rolled back on DB_DUPLICATE_KEY error.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_create_deleted_doc_id_snapshot(
+/*========================================*/
+ fts_optimize_t* optim) /*!< in: optimize instance */
+{
+ dberr_t error;
+ que_t* graph;
+ pars_info_t* info;
+ char being_deleted[MAX_FULL_NAME_LEN];
+ char deleted[MAX_FULL_NAME_LEN];
+ char being_deleted_cache[MAX_FULL_NAME_LEN];
+ char deleted_cache[MAX_FULL_NAME_LEN];
+
+ info = pars_info_create();
+
+ /* Make sure the following four names are consistent with the name
+ used in the fts_init_delete_sql */
+ optim->fts_common_table.suffix = fts_common_tables[0];
+ fts_get_table_name(&optim->fts_common_table, being_deleted);
+ pars_info_bind_id(info, true, fts_common_tables[0], being_deleted);
+
+ optim->fts_common_table.suffix = fts_common_tables[3];
+ fts_get_table_name(&optim->fts_common_table, deleted);
+ pars_info_bind_id(info, true, fts_common_tables[3], deleted);
+
+ optim->fts_common_table.suffix = fts_common_tables[1];
+ fts_get_table_name(&optim->fts_common_table, being_deleted_cache);
+ pars_info_bind_id(info, true, fts_common_tables[1],
+ being_deleted_cache);
+
+ optim->fts_common_table.suffix = fts_common_tables[4];
+ fts_get_table_name(&optim->fts_common_table, deleted_cache);
+ pars_info_bind_id(info, true, fts_common_tables[4], deleted_cache);
+
+ /* Move doc_ids that are to be deleted to state being deleted. */
+ graph = fts_parse_sql(NULL, info, fts_init_delete_sql);
+
+ error = fts_eval_sql(optim->trx, graph);
+
+ fts_que_graph_free(graph);
+
+ if (error != DB_SUCCESS) {
+ fts_sql_rollback(optim->trx);
+ } else {
+ fts_sql_commit(optim->trx);
+ }
+
+ optim->del_list_regenerated = TRUE;
+
+ return(error);
+}
+
+/*********************************************************************//**
+Read in the document ids that are to be purged during optimize. The
+transaction is committed upon successfully read.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_read_deleted_doc_id_snapshot(
+/*======================================*/
+ fts_optimize_t* optim) /*!< in: optimize instance */
+{
+ dberr_t error;
+
+ optim->fts_common_table.suffix = "BEING_DELETED";
+
+ /* Read the doc_ids to delete. */
+ error = fts_table_fetch_doc_ids(
+ optim->trx, &optim->fts_common_table, optim->to_delete);
+
+ if (error == DB_SUCCESS) {
+
+ optim->fts_common_table.suffix = "BEING_DELETED_CACHE";
+
+ /* Read additional doc_ids to delete. */
+ error = fts_table_fetch_doc_ids(
+ optim->trx, &optim->fts_common_table, optim->to_delete);
+ }
+
+ if (error != DB_SUCCESS) {
+
+ fts_doc_ids_free(optim->to_delete);
+ optim->to_delete = NULL;
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Optimze all the FTS indexes, skipping those that have already been
+optimized, since the FTS auxiliary indexes are not guaranteed to be
+of the same cardinality.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_indexes(
+/*=================*/
+ fts_optimize_t* optim) /*!< in: optimize instance */
+{
+ ulint i;
+ dberr_t error = DB_SUCCESS;
+ fts_t* fts = optim->table->fts;
+
+ /* Optimize the FTS indexes. */
+ for (i = 0; i < ib_vector_size(fts->indexes); ++i) {
+ dict_index_t* index;
+
+#ifdef FTS_OPTIMIZE_DEBUG
+ time_t end_time;
+ time_t start_time;
+
+ /* Get the start and end optimize times for this index. */
+ error = fts_optimize_get_index_start_time(
+ optim->trx, index, &start_time);
+
+ if (error != DB_SUCCESS) {
+ break;
+ }
+
+ error = fts_optimize_get_index_end_time(
+ optim->trx, index, &end_time);
+
+ if (error != DB_SUCCESS) {
+ break;
+ }
+
+ /* Start time will be 0 only for the first time or after
+ completing the optimization of all FTS indexes. */
+ if (start_time == 0) {
+ start_time = time(NULL);
+
+ error = fts_optimize_set_index_start_time(
+ optim->trx, index, start_time);
+ }
+
+ /* Check if this index needs to be optimized or not. */
+ if (difftime(end_time, start_time) < 0) {
+ error = fts_optimize_index(optim, index);
+
+ if (error != DB_SUCCESS) {
+ break;
+ }
+ } else {
+ ++optim->n_completed;
+ }
+#endif
+ index = static_cast<dict_index_t*>(
+ ib_vector_getp(fts->indexes, i));
+ error = fts_optimize_index(optim, index);
+ }
+
+ if (error == DB_SUCCESS) {
+ fts_sql_commit(optim->trx);
+ } else {
+ fts_sql_rollback(optim->trx);
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Cleanup the snapshot tables and the master deleted table.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_purge_snapshot(
+/*========================*/
+ fts_optimize_t* optim) /*!< in: optimize instance */
+{
+ dberr_t error;
+
+ /* Delete the doc ids from the master deleted tables, that were
+ in the snapshot that was taken at the start of optimize. */
+ error = fts_optimize_purge_deleted_doc_ids(optim);
+
+ if (error == DB_SUCCESS) {
+ /* Destroy the deleted doc id snapshot. */
+ error = fts_optimize_purge_deleted_doc_id_snapshot(optim);
+ }
+
+ if (error == DB_SUCCESS) {
+ fts_sql_commit(optim->trx);
+ } else {
+ fts_sql_rollback(optim->trx);
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Reset the start time to 0 so that a new optimize can be started.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_optimize_reset_start_time(
+/*==========================*/
+ fts_optimize_t* optim) /*!< in: optimize instance */
+{
+ dberr_t error = DB_SUCCESS;
+#ifdef FTS_OPTIMIZE_DEBUG
+ fts_t* fts = optim->table->fts;
+
+ /* Optimization should have been completed for all indexes. */
+ ut_a(optim->n_completed == ib_vector_size(fts->indexes));
+
+ for (uint i = 0; i < ib_vector_size(fts->indexes); ++i) {
+ dict_index_t* index;
+
+ time_t start_time = 0;
+
+ /* Reset the start time to 0 for this index. */
+ error = fts_optimize_set_index_start_time(
+ optim->trx, index, start_time);
+
+ index = static_cast<dict_index_t*>(
+ ib_vector_getp(fts->indexes, i));
+ }
+#endif
+
+ if (error == DB_SUCCESS) {
+ fts_sql_commit(optim->trx);
+ } else {
+ fts_sql_rollback(optim->trx);
+ }
+
+ return(error);
+}
+
+/*********************************************************************//**
+Run OPTIMIZE on the given table by a background thread.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull))
+dberr_t
+fts_optimize_table_bk(
+/*==================*/
+ fts_slot_t* slot) /*!< in: table to optimiza */
+{
+ const time_t now = time(NULL);
+ const ulint interval = ulint(now - slot->last_run);
+
+ /* Avoid optimizing tables that were optimized recently. */
+ if (slot->last_run > 0
+ && lint(interval) >= 0
+ && interval < FTS_OPTIMIZE_INTERVAL_IN_SECS) {
+
+ return(DB_SUCCESS);
+ }
+
+ dict_table_t* table = slot->table;
+ dberr_t error;
+
+ if (table->is_accessible()
+ && table->fts && table->fts->cache
+ && table->fts->cache->deleted >= FTS_OPTIMIZE_THRESHOLD) {
+ error = fts_optimize_table(table);
+
+ slot->last_run = time(NULL);
+
+ if (error == DB_SUCCESS) {
+ slot->running = false;
+ slot->completed = slot->last_run;
+ }
+ } else {
+ /* Note time this run completed. */
+ slot->last_run = now;
+ error = DB_SUCCESS;
+ }
+
+ return(error);
+}
+/*********************************************************************//**
+Run OPTIMIZE on the given table.
+@return DB_SUCCESS if all OK */
+dberr_t
+fts_optimize_table(
+/*===============*/
+ dict_table_t* table) /*!< in: table to optimiza */
+{
+ if (srv_read_only_mode) {
+ return DB_READ_ONLY;
+ }
+
+ dberr_t error = DB_SUCCESS;
+ fts_optimize_t* optim = NULL;
+ fts_t* fts = table->fts;
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print)) {
+ ib::info() << "FTS start optimize " << table->name;
+ }
+
+ optim = fts_optimize_create(table);
+
+ // FIXME: Call this only at the start of optimize, currently we
+ // rely on DB_DUPLICATE_KEY to handle corrupting the snapshot.
+
+ /* Check whether there are still records in BEING_DELETED table */
+ if (fts_optimize_being_deleted_count(optim) == 0) {
+ /* Take a snapshot of the deleted document ids, they are copied
+ to the BEING_ tables. */
+ error = fts_optimize_create_deleted_doc_id_snapshot(optim);
+ }
+
+ /* A duplicate error is OK, since we don't erase the
+ doc ids from the being deleted state until all FTS
+ indexes have been optimized. */
+ if (error == DB_DUPLICATE_KEY) {
+ error = DB_SUCCESS;
+ }
+
+ if (error == DB_SUCCESS) {
+
+ /* These document ids will be filtered out during the
+ index optimization phase. They are in the snapshot that we
+ took above, at the start of the optimize. */
+ error = fts_optimize_read_deleted_doc_id_snapshot(optim);
+
+ if (error == DB_SUCCESS) {
+
+ /* Commit the read of being deleted
+ doc ids transaction. */
+ fts_sql_commit(optim->trx);
+
+ /* We would do optimization only if there
+ are deleted records to be cleaned up */
+ if (ib_vector_size(optim->to_delete->doc_ids) > 0) {
+ error = fts_optimize_indexes(optim);
+ }
+
+ } else {
+ ut_a(optim->to_delete == NULL);
+ }
+
+ /* Only after all indexes have been optimized can we
+ delete the (snapshot) doc ids in the pending delete,
+ and master deleted tables. */
+ if (error == DB_SUCCESS
+ && optim->n_completed == ib_vector_size(fts->indexes)) {
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print)) {
+ ib::info() << "FTS_OPTIMIZE: Completed"
+ " Optimize, cleanup DELETED table";
+ }
+
+ if (ib_vector_size(optim->to_delete->doc_ids) > 0) {
+
+ /* Purge the doc ids that were in the
+ snapshot from the snapshot tables and
+ the master deleted table. */
+ error = fts_optimize_purge_snapshot(optim);
+ }
+
+ if (error == DB_SUCCESS) {
+ /* Reset the start time of all the FTS indexes
+ so that optimize can be restarted. */
+ error = fts_optimize_reset_start_time(optim);
+ }
+ }
+ }
+
+ fts_optimize_free(optim);
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print)) {
+ ib::info() << "FTS end optimize " << table->name;
+ }
+
+ return(error);
+}
+
+/********************************************************************//**
+Add the table to add to the OPTIMIZER's list.
+@return new message instance */
+static
+fts_msg_t*
+fts_optimize_create_msg(
+/*====================*/
+ fts_msg_type_t type, /*!< in: type of message */
+ void* ptr) /*!< in: message payload */
+{
+ mem_heap_t* heap;
+ fts_msg_t* msg;
+
+ heap = mem_heap_create(sizeof(*msg) + sizeof(ib_list_node_t) + 16);
+ msg = static_cast<fts_msg_t*>(mem_heap_alloc(heap, sizeof(*msg)));
+
+ msg->ptr = ptr;
+ msg->type = type;
+ msg->heap = heap;
+
+ return(msg);
+}
+
+/** Add message to wqueue, signal thread pool*/
+static void add_msg(fts_msg_t *msg, bool wq_locked= false)
+{
+ ib_wqueue_add(fts_optimize_wq, msg, msg->heap, wq_locked);
+ srv_thread_pool->submit_task(&task);
+}
+
+/**
+Called by "idle" timer. Submits optimize task, which
+will only recalculate is_sync_needed, in case the queue is empty.
+*/
+static void timer_callback(void*)
+{
+ srv_thread_pool->submit_task(&task);
+}
+
+/** Add the table to add to the OPTIMIZER's list.
+@param[in] table table to add */
+void fts_optimize_add_table(dict_table_t* table)
+{
+ fts_msg_t* msg;
+
+ if (!fts_optimize_wq) {
+ return;
+ }
+
+ /* Make sure table with FTS index cannot be evicted */
+ dict_table_prevent_eviction(table);
+
+ msg = fts_optimize_create_msg(FTS_MSG_ADD_TABLE, table);
+
+ mutex_enter(&fts_optimize_wq->mutex);
+
+ add_msg(msg, true);
+
+ table->fts->in_queue = true;
+
+ mutex_exit(&fts_optimize_wq->mutex);
+}
+
+/**********************************************************************//**
+Remove the table from the OPTIMIZER's list. We do wait for
+acknowledgement from the consumer of the message. */
+void
+fts_optimize_remove_table(
+/*======================*/
+ dict_table_t* table) /*!< in: table to remove */
+{
+ fts_msg_t* msg;
+ os_event_t event;
+ fts_msg_del_t* remove;
+
+ /* if the optimize system not yet initialized, return */
+ if (!fts_optimize_wq) {
+ return;
+ }
+
+ /* FTS optimizer thread is already exited */
+ if (fts_opt_start_shutdown) {
+ ib::info() << "Try to remove table " << table->name
+ << " after FTS optimize thread exiting.";
+ /* If the table can't be removed then wait till
+ fts optimize thread shuts down */
+ while (fts_optimize_wq) {
+ os_thread_sleep(10000);
+ }
+ return;
+ }
+
+ mutex_enter(&fts_optimize_wq->mutex);
+
+ if (!table->fts->in_queue) {
+ mutex_exit(&fts_optimize_wq->mutex);
+ return;
+ }
+
+ msg = fts_optimize_create_msg(FTS_MSG_DEL_TABLE, NULL);
+
+ /* We will wait on this event until signalled by the consumer. */
+ event = os_event_create(0);
+
+ remove = static_cast<fts_msg_del_t*>(
+ mem_heap_alloc(msg->heap, sizeof(*remove)));
+
+ remove->table = table;
+ remove->event = event;
+ msg->ptr = remove;
+
+ ut_ad(!mutex_own(&dict_sys.mutex));
+
+ add_msg(msg, true);
+
+ mutex_exit(&fts_optimize_wq->mutex);
+
+ os_event_wait(event);
+
+ os_event_destroy(event);
+
+#ifdef UNIV_DEBUG
+ if (!fts_opt_start_shutdown) {
+ mutex_enter(&fts_optimize_wq->mutex);
+ ut_ad(!table->fts->in_queue);
+ mutex_exit(&fts_optimize_wq->mutex);
+ }
+#endif /* UNIV_DEBUG */
+}
+
+/** Send sync fts cache for the table.
+@param[in] table table to sync */
+void
+fts_optimize_request_sync_table(
+ dict_table_t* table)
+{
+ /* if the optimize system not yet initialized, return */
+ if (!fts_optimize_wq) {
+ return;
+ }
+
+ /* FTS optimizer thread is already exited */
+ if (fts_opt_start_shutdown) {
+ ib::info() << "Try to sync table " << table->name
+ << " after FTS optimize thread exiting.";
+ return;
+ }
+
+ mutex_enter(&fts_optimize_wq->mutex);
+
+ if (table->fts->sync_message) {
+ /* If the table already has SYNC message in
+ fts_optimize_wq queue then ignore it */
+ mutex_exit(&fts_optimize_wq->mutex);
+ return;
+ }
+
+ fts_msg_t* msg = fts_optimize_create_msg(FTS_MSG_SYNC_TABLE, table);
+
+ add_msg(msg, true);
+
+ table->fts->sync_message = true;
+
+ mutex_exit(&fts_optimize_wq->mutex);
+}
+
+/** Add a table to fts_slots if it doesn't already exist. */
+static bool fts_optimize_new_table(dict_table_t* table)
+{
+ ut_ad(table);
+
+ ulint i;
+ fts_slot_t* slot;
+ fts_slot_t* empty = NULL;
+
+ /* Search for duplicates, also find a free slot if one exists. */
+ for (i = 0; i < ib_vector_size(fts_slots); ++i) {
+
+ slot = static_cast<fts_slot_t*>(ib_vector_get(fts_slots, i));
+
+ if (!slot->table) {
+ empty = slot;
+ } else if (slot->table == table) {
+ /* Already exists in our optimize queue. */
+ return false;
+ }
+ }
+
+ slot = empty ? empty : static_cast<fts_slot_t*>(
+ ib_vector_push(fts_slots, NULL));
+
+ memset(slot, 0x0, sizeof(*slot));
+
+ slot->table = table;
+ return true;
+}
+
+/** Remove a table from fts_slots if it exists.
+@param[in,out] table table to be removed from fts_slots */
+static bool fts_optimize_del_table(const dict_table_t* table)
+{
+ ut_ad(table);
+ for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) {
+ fts_slot_t* slot;
+
+ slot = static_cast<fts_slot_t*>(ib_vector_get(fts_slots, i));
+
+ if (slot->table == table) {
+ if (UNIV_UNLIKELY(fts_enable_diag_print)) {
+ ib::info() << "FTS Optimize Removing table "
+ << table->name;
+ }
+
+ mutex_enter(&fts_optimize_wq->mutex);
+ slot->table->fts->in_queue = false;
+ mutex_exit(&fts_optimize_wq->mutex);
+ slot->table = NULL;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**********************************************************************//**
+Calculate how many tables in fts_slots need to be optimized.
+@return no. of tables to optimize */
+static ulint fts_optimize_how_many()
+{
+ ulint n_tables = 0;
+ const time_t current_time = time(NULL);
+
+ for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) {
+ const fts_slot_t* slot = static_cast<const fts_slot_t*>(
+ ib_vector_get_const(fts_slots, i));
+ if (!slot->table) {
+ continue;
+ }
+
+ const time_t end = slot->running
+ ? slot->last_run : slot->completed;
+ ulint interval = ulint(current_time - end);
+
+ if (lint(interval) < 0
+ || interval >= FTS_OPTIMIZE_INTERVAL_IN_SECS) {
+ ++n_tables;
+ }
+ }
+
+ return(n_tables);
+}
+
+/**********************************************************************//**
+Check if the total memory used by all FTS table exceeds the maximum limit.
+@return true if a sync is needed, false otherwise */
+static bool fts_is_sync_needed()
+{
+ ulint total_memory = 0;
+ const time_t now = time(NULL);
+ double time_diff = difftime(now, last_check_sync_time);
+
+ if (fts_need_sync || (time_diff >= 0 && time_diff < 5)) {
+ return(false);
+ }
+
+ last_check_sync_time = now;
+
+ for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) {
+ const fts_slot_t* slot = static_cast<const fts_slot_t*>(
+ ib_vector_get_const(fts_slots, i));
+
+ if (!slot->table) {
+ continue;
+ }
+
+ if (slot->table->fts && slot->table->fts->cache) {
+ total_memory += slot->table->fts->cache->total_size;
+ }
+
+ if (total_memory > fts_max_total_cache_size) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/** Sync fts cache of a table
+@param[in,out] table table to be synced
+@param[in] process_message processing messages from fts_optimize_wq */
+static void fts_optimize_sync_table(dict_table_t *table,
+ bool process_message= false)
+{
+ MDL_ticket* mdl_ticket= nullptr;
+ dict_table_t *sync_table= dict_acquire_mdl_shared<true>(table, fts_opt_thd,
+ &mdl_ticket);
+
+ if (!sync_table)
+ return;
+
+ if (sync_table->fts && sync_table->fts->cache && sync_table->is_accessible())
+ {
+ fts_sync_table(sync_table, false);
+ if (process_message)
+ {
+ mutex_enter(&fts_optimize_wq->mutex);
+ sync_table->fts->sync_message = false;
+ mutex_exit(&fts_optimize_wq->mutex);
+ }
+ }
+
+ DBUG_EXECUTE_IF("ib_optimize_wq_hang", os_thread_sleep(6000000););
+
+ if (mdl_ticket)
+ dict_table_close(sync_table, false, false, fts_opt_thd, mdl_ticket);
+}
+
+/**********************************************************************//**
+Optimize all FTS tables.
+@return Dummy return */
+static void fts_optimize_callback(void *)
+{
+ ut_ad(!srv_read_only_mode);
+
+ if (!fts_optimize_wq) {
+ /* Possibly timer initiated callback, can come after FTS_MSG_STOP.*/
+ return;
+ }
+
+ static ulint current = 0;
+ static ibool done = FALSE;
+ static ulint n_tables = ib_vector_size(fts_slots);
+ static ulint n_optimize = 0;
+
+ while (!done && srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
+ /* If there is no message in the queue and we have tables
+ to optimize then optimize the tables. */
+
+ if (!done
+ && ib_wqueue_is_empty(fts_optimize_wq)
+ && n_tables > 0
+ && n_optimize > 0) {
+ fts_slot_t* slot = static_cast<fts_slot_t*>(
+ ib_vector_get(fts_slots, current));
+
+ /* Handle the case of empty slots. */
+ if (slot->table) {
+ slot->running = true;
+ fts_optimize_table_bk(slot);
+ }
+
+ /* Wrap around the counter. */
+ if (++current >= ib_vector_size(fts_slots)) {
+ n_optimize = fts_optimize_how_many();
+ current = 0;
+ }
+
+ } else if (n_optimize == 0
+ || !ib_wqueue_is_empty(fts_optimize_wq)) {
+ fts_msg_t* msg = static_cast<fts_msg_t*>
+ (ib_wqueue_nowait(fts_optimize_wq));
+ /* Timeout ? */
+ if (msg == NULL) {
+ if (fts_is_sync_needed()) {
+ fts_need_sync = true;
+ }
+ if (n_tables)
+ timer->set_time(5000, 0);
+ return;
+ }
+
+ switch (msg->type) {
+ case FTS_MSG_STOP:
+ done = TRUE;
+ break;
+
+ case FTS_MSG_ADD_TABLE:
+ ut_a(!done);
+ if (fts_optimize_new_table(
+ static_cast<dict_table_t*>(
+ msg->ptr))) {
+ ++n_tables;
+ }
+ break;
+
+ case FTS_MSG_DEL_TABLE:
+ if (fts_optimize_del_table(
+ static_cast<fts_msg_del_t*>(
+ msg->ptr)->table)) {
+ --n_tables;
+ }
+
+ /* Signal the producer that we have
+ removed the table. */
+ os_event_set(
+ ((fts_msg_del_t*) msg->ptr)->event);
+ break;
+
+ case FTS_MSG_SYNC_TABLE:
+ DBUG_EXECUTE_IF(
+ "fts_instrument_msg_sync_sleep",
+ os_thread_sleep(300000););
+
+ fts_optimize_sync_table(
+ static_cast<dict_table_t*>(msg->ptr),
+ true);
+ break;
+
+ default:
+ ut_error;
+ }
+
+ mem_heap_free(msg->heap);
+ n_optimize = done ? 0 : fts_optimize_how_many();
+ }
+ }
+
+ /* Server is being shutdown, sync the data from FTS cache to disk
+ if needed */
+ if (n_tables > 0) {
+ for (ulint i = 0; i < ib_vector_size(fts_slots); i++) {
+ fts_slot_t* slot = static_cast<fts_slot_t*>(
+ ib_vector_get(fts_slots, i));
+
+ if (slot->table) {
+ fts_optimize_sync_table(slot->table);
+ }
+ }
+ }
+
+ ib_vector_free(fts_slots);
+ fts_slots = NULL;
+
+ ib_wqueue_free(fts_optimize_wq);
+ fts_optimize_wq = NULL;
+
+ innobase_destroy_background_thd(fts_opt_thd);
+ ib::info() << "FTS optimize thread exiting.";
+
+ os_event_set(fts_opt_shutdown_event);
+}
+
+/**********************************************************************//**
+Startup the optimize thread and create the work queue. */
+void
+fts_optimize_init(void)
+/*===================*/
+{
+ mem_heap_t* heap;
+ ib_alloc_t* heap_alloc;
+
+ ut_ad(!srv_read_only_mode);
+
+ /* For now we only support one optimize thread. */
+ ut_a(!fts_optimize_wq);
+
+ /* Create FTS optimize work queue */
+ fts_optimize_wq = ib_wqueue_create();
+ ut_a(fts_optimize_wq != NULL);
+ timer = srv_thread_pool->create_timer(timer_callback);
+
+ /* Create FTS vector to store fts_slot_t */
+ heap = mem_heap_create(sizeof(dict_table_t*) * 64);
+ heap_alloc = ib_heap_allocator_create(heap);
+ fts_slots = ib_vector_create(heap_alloc, sizeof(fts_slot_t), 4);
+
+ fts_opt_thd = innobase_create_background_thd("InnoDB FTS optimizer");
+ /* Add fts tables to fts_slots which could be skipped
+ during dict_load_table_one() because fts_optimize_thread
+ wasn't even started. */
+ mutex_enter(&dict_sys.mutex);
+ for (dict_table_t* table = UT_LIST_GET_FIRST(dict_sys.table_LRU);
+ table != NULL;
+ table = UT_LIST_GET_NEXT(table_LRU, table)) {
+ if (!table->fts || !dict_table_has_fts_index(table)) {
+ continue;
+ }
+
+ /* fts_optimize_thread is not started yet. So there is no
+ need to acquire fts_optimize_wq->mutex for adding the fts
+ table to the fts slots. */
+ ut_ad(!table->can_be_evicted);
+ fts_optimize_new_table(table);
+ table->fts->in_queue = true;
+ }
+ mutex_exit(&dict_sys.mutex);
+
+ fts_opt_shutdown_event = os_event_create(0);
+ last_check_sync_time = time(NULL);
+}
+
+/** Shutdown fts optimize thread. */
+void
+fts_optimize_shutdown()
+{
+ ut_ad(!srv_read_only_mode);
+
+ fts_msg_t* msg;
+
+ /* If there is an ongoing activity on dictionary, such as
+ srv_master_evict_from_table_cache(), wait for it */
+ dict_mutex_enter_for_mysql();
+
+ /* Tells FTS optimizer system that we are exiting from
+ optimizer thread, message send their after will not be
+ processed */
+ fts_opt_start_shutdown = true;
+ dict_mutex_exit_for_mysql();
+
+ /* We tell the OPTIMIZE thread to switch to state done, we
+ can't delete the work queue here because the add thread needs
+ deregister the FTS tables. */
+ timer->disarm();
+ task_group.cancel_pending(&task);
+
+ msg = fts_optimize_create_msg(FTS_MSG_STOP, NULL);
+
+ add_msg(msg);
+
+ os_event_wait(fts_opt_shutdown_event);
+
+ os_event_destroy(fts_opt_shutdown_event);
+ fts_opt_thd = NULL;
+ delete timer;
+ timer = NULL;
+}
+
+/** Sync the table during commit phase
+@param[in] table table to be synced */
+void fts_sync_during_ddl(dict_table_t* table)
+{
+ mutex_enter(&fts_optimize_wq->mutex);
+ if (!table->fts->sync_message)
+ {
+ mutex_exit(&fts_optimize_wq->mutex);
+ return;
+ }
+
+ mutex_exit(&fts_optimize_wq->mutex);
+ fts_sync_table(table, false);
+
+ mutex_enter(&fts_optimize_wq->mutex);
+ table->fts->sync_message = false;
+ mutex_exit(&fts_optimize_wq->mutex);
+}
diff --git a/storage/innobase/fts/fts0pars.cc b/storage/innobase/fts/fts0pars.cc
new file mode 100644
index 00000000..56cc8d60
--- /dev/null
+++ b/storage/innobase/fts/fts0pars.cc
@@ -0,0 +1,2007 @@
+/* A Bison parser, made by GNU Bison 2.5. */
+
+/* Bison implementation for Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* As a special exception, you may create a larger work that contains
+ part or all of the Bison parser skeleton and distribute that work
+ under terms of your choice, so long as that work isn't itself a
+ parser generator using the skeleton or a modified version thereof
+ as a parser skeleton. Alternatively, if you modify or redistribute
+ the parser skeleton itself, you may (at your option) remove this
+ special exception, which will cause the skeleton and the resulting
+ Bison output files to be licensed under the GNU General Public
+ License without this special exception.
+
+ This special exception was added by the Free Software Foundation in
+ version 2.2 of Bison. */
+
+/* C LALR(1) parser skeleton written by Richard Stallman, by
+ simplifying the original so-called "semantic" parser. */
+
+/* All symbols defined below should begin with yy or YY, to avoid
+ infringing on user name space. This should be done even for local
+ variables, as they might otherwise be expanded by user macros.
+ There are some unavoidable exceptions within include files to
+ define necessary library symbols; they are noted "INFRINGES ON
+ USER NAME SPACE" below. */
+
+/* Identify Bison output. */
+#define YYBISON 1
+
+/* Bison version. */
+#define YYBISON_VERSION "2.5"
+
+/* Skeleton name. */
+#define YYSKELETON_NAME "yacc.c"
+
+/* Pure parsers. */
+#define YYPURE 1
+
+/* Push parsers. */
+#define YYPUSH 0
+
+/* Pull parsers. */
+#define YYPULL 1
+
+/* Using locations. */
+#define YYLSP_NEEDED 0
+
+/* Substitute the variable and function names. */
+#define yyparse ftsparse
+#define yylex ftslex
+#define yyerror ftserror
+#define yylval ftslval
+#define yychar ftschar
+#define yydebug ftsdebug
+#define yynerrs ftsnerrs
+
+
+/* Copy the first part of user declarations. */
+
+/* Line 268 of yacc.c */
+#line 26 "fts0pars.y"
+
+#include "ha_prototypes.h"
+#include "mem0mem.h"
+#include "fts0ast.h"
+#include "fts0blex.h"
+#include "fts0tlex.h"
+#include "fts0pars.h"
+#include <my_sys.h>
+
+extern int fts_lexer(YYSTYPE*, fts_lexer_t*);
+extern int fts_blexer(YYSTYPE*, yyscan_t);
+extern int fts_tlexer(YYSTYPE*, yyscan_t);
+
+
+
+extern int ftserror(const char* p);
+
+/* Required for reentrant parser */
+#define ftslex fts_lexer
+
+#define YYERROR_VERBOSE
+
+/* For passing an argument to yyparse() */
+#define YYPARSE_PARAM state
+#define YYLEX_PARAM ((fts_ast_state_t*) state)->lexer
+
+#define YYTOKENFREE(token) fts_ast_string_free((token))
+
+
+typedef int (*fts_scanner)(YYSTYPE* val, yyscan_t yyscanner);
+
+struct fts_lexer_t {
+ fts_scanner scanner;
+ void* yyscanner;
+};
+
+
+
+/* Line 268 of yacc.c */
+#line 115 "fts0pars.cc"
+
+/* Enabling traces. */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+
+/* Enabling verbose error messages. */
+#ifdef YYERROR_VERBOSE
+# undef YYERROR_VERBOSE
+# define YYERROR_VERBOSE 1
+#else
+# define YYERROR_VERBOSE 0
+#endif
+
+/* Enabling the token table. */
+#ifndef YYTOKEN_TABLE
+# define YYTOKEN_TABLE 0
+#endif
+
+
+/* Tokens. */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+ /* Put the tokens into the symbol table, so that GDB and other debuggers
+ know about them. */
+ enum yytokentype {
+ FTS_OPER = 258,
+ FTS_TEXT = 259,
+ FTS_TERM = 260,
+ FTS_NUMB = 261
+ };
+#endif
+
+
+
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+typedef union YYSTYPE
+{
+
+/* Line 293 of yacc.c */
+#line 61 "fts0pars.y"
+
+ int oper;
+ fts_ast_string_t* token;
+ fts_ast_node_t* node;
+
+
+
+/* Line 293 of yacc.c */
+#line 165 "fts0pars.cc"
+} YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define yystype YYSTYPE /* obsolescent; will be withdrawn */
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+/* Copy the second part of user declarations. */
+
+
+/* Line 343 of yacc.c */
+#line 177 "fts0pars.cc"
+
+#ifdef short
+# undef short
+#endif
+
+#ifdef YYTYPE_UINT8
+typedef YYTYPE_UINT8 yytype_uint8;
+#else
+typedef unsigned char yytype_uint8;
+#endif
+
+#ifdef YYTYPE_INT8
+typedef YYTYPE_INT8 yytype_int8;
+#elif (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+typedef signed char yytype_int8;
+#else
+typedef short int yytype_int8;
+#endif
+
+#ifdef YYTYPE_UINT16
+typedef YYTYPE_UINT16 yytype_uint16;
+#else
+typedef unsigned short int yytype_uint16;
+#endif
+
+#ifdef YYTYPE_INT16
+typedef YYTYPE_INT16 yytype_int16;
+#else
+typedef short int yytype_int16;
+#endif
+
+#ifndef YYSIZE_T
+# ifdef __SIZE_TYPE__
+# define YYSIZE_T __SIZE_TYPE__
+# elif defined size_t
+# define YYSIZE_T size_t
+# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+# define YYSIZE_T size_t
+# else
+# define YYSIZE_T unsigned int
+# endif
+#endif
+
+#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
+
+#ifndef YY_
+# if defined YYENABLE_NLS && YYENABLE_NLS
+# if ENABLE_NLS
+# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
+# define YY_(msgid) dgettext ("bison-runtime", msgid)
+# endif
+# endif
+# ifndef YY_
+# define YY_(msgid) msgid
+# endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E. */
+#if ! defined lint || defined __GNUC__
+# define YYUSE(e) ((void) (e))
+#else
+# define YYUSE(e) /* empty */
+#endif
+
+/* Identity function, used to suppress warnings about constant conditions. */
+#ifndef lint
+# define YYID(n) (n)
+#else
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static int
+YYID (int yyi)
+#else
+static int
+YYID (yyi)
+ int yyi;
+#endif
+{
+ return yyi;
+}
+#endif
+
+#if ! defined yyoverflow || YYERROR_VERBOSE
+
+/* The parser invokes alloca or malloc; define the necessary symbols. */
+
+# ifdef YYSTACK_USE_ALLOCA
+# if YYSTACK_USE_ALLOCA
+# ifdef __GNUC__
+# define YYSTACK_ALLOC __builtin_alloca
+# elif defined __BUILTIN_VA_ARG_INCR
+# include <alloca.h> /* INFRINGES ON USER NAME SPACE */
+# elif defined _MSC_VER
+# include <malloc.h> /* INFRINGES ON USER NAME SPACE */
+# define alloca _alloca
+# else
+# define YYSTACK_ALLOC alloca
+# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+# ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
+# endif
+# endif
+# endif
+# endif
+# endif
+
+# ifdef YYSTACK_ALLOC
+ /* Pacify GCC's `empty if-body' warning. */
+# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
+# ifndef YYSTACK_ALLOC_MAXIMUM
+ /* The OS might guarantee only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ invoke alloca (N) if N exceeds 4096. Use a slightly smaller number
+ to allow for a few compiler-allocated temporary stack slots. */
+# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
+# endif
+# else
+# define YYSTACK_ALLOC YYMALLOC
+# define YYSTACK_FREE YYFREE
+# ifndef YYSTACK_ALLOC_MAXIMUM
+# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
+# endif
+# if (defined __cplusplus && ! defined EXIT_SUCCESS \
+ && ! ((defined YYMALLOC || defined malloc) \
+ && (defined YYFREE || defined free)))
+# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+# ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
+# endif
+# endif
+# ifndef YYMALLOC
+# define YYMALLOC malloc
+# if ! defined malloc && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
+# endif
+# endif
+# ifndef YYFREE
+# define YYFREE free
+# if ! defined free && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+void free (void *); /* INFRINGES ON USER NAME SPACE */
+# endif
+# endif
+# endif
+#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
+
+
+#if (! defined yyoverflow \
+ && (! defined __cplusplus \
+ || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+
+/* A type that is properly aligned for any stack member. */
+union yyalloc
+{
+ yytype_int16 yyss_alloc;
+ YYSTYPE yyvs_alloc;
+};
+
+/* The size of the maximum gap between one aligned stack and the next. */
+# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
+
+/* The size of an array large to enough to hold all stacks, each with
+ N elements. */
+# define YYSTACK_BYTES(N) \
+ ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
+ + YYSTACK_GAP_MAXIMUM)
+
+# define YYCOPY_NEEDED 1
+
+/* Relocate STACK from its old location to the new one. The
+ local variables YYSIZE and YYSTACKSIZE give the old and new number of
+ elements in the stack, and YYPTR gives the new location of the
+ stack. Advance YYPTR to a properly aligned location for the next
+ stack. */
+# define YYSTACK_RELOCATE(Stack_alloc, Stack) \
+ do \
+ { \
+ YYSIZE_T yynewbytes; \
+ YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \
+ Stack = &yyptr->Stack_alloc; \
+ yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
+ yyptr += yynewbytes / sizeof (*yyptr); \
+ } \
+ while (YYID (0))
+
+#endif
+
+#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
+/* Copy COUNT objects from FROM to TO. The source and destination do
+ not overlap. */
+# ifndef YYCOPY
+# if defined __GNUC__ && 1 < __GNUC__
+# define YYCOPY(To, From, Count) \
+ __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
+# else
+# define YYCOPY(To, From, Count) \
+ do \
+ { \
+ YYSIZE_T yyi; \
+ for (yyi = 0; yyi < (Count); yyi++) \
+ (To)[yyi] = (From)[yyi]; \
+ } \
+ while (YYID (0))
+# endif
+# endif
+#endif /* !YYCOPY_NEEDED */
+
+/* YYFINAL -- State number of the termination state. */
+#define YYFINAL 3
+/* YYLAST -- Last index in YYTABLE. */
+#define YYLAST 52
+
+/* YYNTOKENS -- Number of terminals. */
+#define YYNTOKENS 16
+/* YYNNTS -- Number of nonterminals. */
+#define YYNNTS 8
+/* YYNRULES -- Number of rules. */
+#define YYNRULES 24
+/* YYNRULES -- Number of states. */
+#define YYNSTATES 33
+
+/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
+#define YYUNDEFTOK 2
+#define YYMAXUTOK 261
+
+#define YYTRANSLATE(YYX) \
+ ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
+
+/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
+static const yytype_uint8 yytranslate[] =
+{
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 12, 13, 14, 7, 2, 8, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 10, 2, 11, 2, 15, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 9, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
+ 5, 6
+};
+
+#if YYDEBUG
+/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
+ YYRHS. */
+static const yytype_uint8 yyprhs[] =
+{
+ 0, 0, 3, 5, 6, 9, 12, 16, 21, 23,
+ 25, 28, 32, 36, 39, 44, 47, 49, 51, 53,
+ 55, 57, 59, 61, 64
+};
+
+/* YYRHS -- A `-1'-separated list of the rules' RHS. */
+static const yytype_int8 yyrhs[] =
+{
+ 17, 0, -1, 18, -1, -1, 18, 20, -1, 18,
+ 19, -1, 12, 18, 13, -1, 21, 12, 18, 13,
+ -1, 22, -1, 23, -1, 22, 14, -1, 23, 15,
+ 6, -1, 21, 22, 14, -1, 21, 22, -1, 21,
+ 23, 15, 6, -1, 21, 23, -1, 8, -1, 7,
+ -1, 9, -1, 10, -1, 11, -1, 5, -1, 6,
+ -1, 14, 22, -1, 4, -1
+};
+
+/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
+static const yytype_uint8 yyrline[] =
+{
+ 0, 79, 79, 85, 89, 99, 111, 119, 129, 133,
+ 137, 141, 146, 152, 157, 164, 170, 174, 178, 182,
+ 186, 191, 196, 202, 207
+};
+#endif
+
+#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+ First, the terminals, then, starting at YYNTOKENS, nonterminals. */
+static const char *const yytname[] =
+{
+ "$end", "error", "$undefined", "FTS_OPER", "FTS_TEXT", "FTS_TERM",
+ "FTS_NUMB", "'+'", "'-'", "'~'", "'<'", "'>'", "'('", "')'", "'*'",
+ "'@'", "$accept", "query", "expr_lst", "sub_expr", "expr", "prefix",
+ "term", "text", 0
+};
+#endif
+
+# ifdef YYPRINT
+/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
+ token YYLEX-NUM. */
+static const yytype_uint16 yytoknum[] =
+{
+ 0, 256, 257, 258, 259, 260, 261, 43, 45, 126,
+ 60, 62, 40, 41, 42, 64
+};
+# endif
+
+/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
+static const yytype_uint8 yyr1[] =
+{
+ 0, 16, 17, 18, 18, 18, 19, 19, 20, 20,
+ 20, 20, 20, 20, 20, 20, 21, 21, 21, 21,
+ 21, 22, 22, 22, 23
+};
+
+/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
+static const yytype_uint8 yyr2[] =
+{
+ 0, 2, 1, 0, 2, 2, 3, 4, 1, 1,
+ 2, 3, 3, 2, 4, 2, 1, 1, 1, 1,
+ 1, 1, 1, 2, 1
+};
+
+/* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
+ Performed when YYTABLE doesn't specify something else to do. Zero
+ means the default is an error. */
+static const yytype_uint8 yydefact[] =
+{
+ 3, 0, 2, 1, 24, 21, 22, 17, 16, 18,
+ 19, 20, 3, 0, 5, 4, 0, 8, 9, 0,
+ 23, 3, 13, 15, 10, 0, 6, 0, 12, 0,
+ 11, 7, 14
+};
+
+/* YYDEFGOTO[NTERM-NUM]. */
+static const yytype_int8 yydefgoto[] =
+{
+ -1, 1, 2, 14, 15, 16, 17, 18
+};
+
+/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+ STATE-NUM. */
+#define YYPACT_NINF -5
+static const yytype_int8 yypact[] =
+{
+ -5, 38, 18, -5, -5, -5, -5, -5, -5, -5,
+ -5, -5, -5, 31, -5, -5, 29, 30, 32, -4,
+ -5, -5, 34, 35, -5, 40, -5, 7, -5, 43,
+ -5, -5, -5
+};
+
+/* YYPGOTO[NTERM-NUM]. */
+static const yytype_int8 yypgoto[] =
+{
+ -5, -5, 19, -5, -5, -5, 26, 36
+};
+
+/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
+ positive, shift that token. If negative, reduce the rule which
+ number is the opposite. If YYTABLE_NINF, syntax error. */
+#define YYTABLE_NINF -1
+static const yytype_uint8 yytable[] =
+{
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 26,
+ 13, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 31, 13, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 19, 13, 4, 5, 6, 5, 6, 3, 20,
+ 27, 21, 22, 13, 24, 13, 30, 25, 28, 32,
+ 29, 0, 23
+};
+
+#define yypact_value_is_default(yystate) \
+ ((yystate) == (-5))
+
+#define yytable_value_is_error(yytable_value) \
+ YYID (0)
+
+static const yytype_int8 yycheck[] =
+{
+ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
+ 14, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 4, 5, 6, 7, 8, 9, 10, 11,
+ 12, 12, 14, 4, 5, 6, 5, 6, 0, 13,
+ 21, 12, 16, 14, 14, 14, 6, 15, 14, 6,
+ 15, -1, 16
+};
+
+/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+ symbol of state STATE-NUM. */
+static const yytype_uint8 yystos[] =
+{
+ 0, 17, 18, 0, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 14, 19, 20, 21, 22, 23, 18,
+ 22, 12, 22, 23, 14, 15, 13, 18, 14, 15,
+ 6, 13, 6
+};
+
+#define yyerrok (yyerrstatus = 0)
+#define yyclearin (yychar = YYEMPTY)
+#define YYEMPTY (-2)
+#define YYEOF 0
+
+#define YYACCEPT goto yyacceptlab
+#define YYABORT goto yyabortlab
+#define YYERROR goto yyerrorlab
+
+
+/* Like YYERROR except do call yyerror. This remains here temporarily
+ to ease the transition to the new meaning of YYERROR, for GCC.
+ Once GCC version 2 has supplanted version 1, this can go. However,
+ YYFAIL appears to be in use. Nevertheless, it is formally deprecated
+ in Bison 2.4.2's NEWS entry, where a plan to phase it out is
+ discussed. */
+
+#define YYFAIL goto yyerrlab
+#if defined YYFAIL
+ /* This is here to suppress warnings from the GCC cpp's
+ -Wunused-macros. Normally we don't worry about that warning, but
+ some users do, and we want to make it easy for users to remove
+ YYFAIL uses, which will produce warnings from Bison 2.5. */
+#endif
+
+#define YYRECOVERING() (!!yyerrstatus)
+
+#define YYBACKUP(Token, Value) \
+do \
+ if (yychar == YYEMPTY && yylen == 1) \
+ { \
+ yychar = (Token); \
+ yylval = (Value); \
+ YYPOPSTACK (1); \
+ goto yybackup; \
+ } \
+ else \
+ { \
+ yyerror (YY_("syntax error: cannot back up")); \
+ YYERROR; \
+ } \
+while (YYID (0))
+
+
+#define YYTERROR 1
+#define YYERRCODE 256
+
+#define YYERRCLEANUP \
+do \
+ switch (yylastchar) \
+ { \
+ case FTS_NUMB: \
+ case FTS_TEXT: \
+ case FTS_TERM: \
+ YYTOKENFREE(yylval.token); \
+ break; \
+ default: \
+ break; \
+ } \
+while (YYID (0))
+
+/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
+ If N is 0, then set CURRENT to the empty location which ends
+ the previous symbol: RHS[0] (always defined). */
+
+#define YYRHSLOC(Rhs, K) ((Rhs)[K])
+#ifndef YYLLOC_DEFAULT
+# define YYLLOC_DEFAULT(Current, Rhs, N) \
+ do \
+ if (YYID (N)) \
+ { \
+ (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
+ (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
+ (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
+ (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
+ } \
+ else \
+ { \
+ (Current).first_line = (Current).last_line = \
+ YYRHSLOC (Rhs, 0).last_line; \
+ (Current).first_column = (Current).last_column = \
+ YYRHSLOC (Rhs, 0).last_column; \
+ } \
+ while (YYID (0))
+#endif
+
+
+/* This macro is provided for backward compatibility. */
+
+#ifndef YY_LOCATION_PRINT
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
+#endif
+
+
+/* YYLEX -- calling `yylex' with the right arguments. */
+
+#ifdef YYLEX_PARAM
+# define YYLEX yylex (&yylval, YYLEX_PARAM)
+#else
+# define YYLEX yylex (&yylval)
+#endif
+
+/* Enable debugging if requested. */
+#if YYDEBUG
+
+# ifndef YYFPRINTF
+# include <stdio.h> /* INFRINGES ON USER NAME SPACE */
+# define YYFPRINTF fprintf
+# endif
+
+# define YYDPRINTF(Args) \
+do { \
+ if (yydebug) \
+ YYFPRINTF Args; \
+} while (YYID (0))
+
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
+do { \
+ if (yydebug) \
+ { \
+ YYFPRINTF (stderr, "%s ", Title); \
+ yy_symbol_print (stderr, \
+ Type, Value); \
+ YYFPRINTF (stderr, "\n"); \
+ } \
+} while (YYID (0))
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT. |
+`--------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_value_print (yyoutput, yytype, yyvaluep)
+ FILE *yyoutput;
+ int yytype;
+ YYSTYPE const * const yyvaluep;
+#endif
+{
+ if (!yyvaluep)
+ return;
+# ifdef YYPRINT
+ if (yytype < YYNTOKENS)
+ YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
+# else
+ YYUSE (yyoutput);
+# endif
+ switch (yytype)
+ {
+ default:
+ break;
+ }
+}
+
+
+/*--------------------------------.
+| Print this symbol on YYOUTPUT. |
+`--------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
+#else
+static void
+yy_symbol_print (yyoutput, yytype, yyvaluep)
+ FILE *yyoutput;
+ int yytype;
+ YYSTYPE const * const yyvaluep;
+#endif
+{
+ if (yytype < YYNTOKENS)
+ YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
+ else
+ YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
+
+ yy_symbol_value_print (yyoutput, yytype, yyvaluep);
+ YYFPRINTF (yyoutput, ")");
+}
+
+/*------------------------------------------------------------------.
+| yy_stack_print -- Print the state stack from its BOTTOM up to its |
+| TOP (included). |
+`------------------------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop)
+#else
+static void
+yy_stack_print (yybottom, yytop)
+ yytype_int16 *yybottom;
+ yytype_int16 *yytop;
+#endif
+{
+ YYFPRINTF (stderr, "Stack now");
+ for (; yybottom <= yytop; yybottom++)
+ {
+ int yybot = *yybottom;
+ YYFPRINTF (stderr, " %d", yybot);
+ }
+ YYFPRINTF (stderr, "\n");
+}
+
+# define YY_STACK_PRINT(Bottom, Top) \
+do { \
+ if (yydebug) \
+ yy_stack_print ((Bottom), (Top)); \
+} while (YYID (0))
+
+
+/*------------------------------------------------.
+| Report that the YYRULE is going to be reduced. |
+`------------------------------------------------*/
+
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yy_reduce_print (YYSTYPE *yyvsp, int yyrule)
+#else
+static void
+yy_reduce_print (yyvsp, yyrule)
+ YYSTYPE *yyvsp;
+ int yyrule;
+#endif
+{
+ int yynrhs = yyr2[yyrule];
+ int yyi;
+ unsigned long int yylno = yyrline[yyrule];
+ YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
+ yyrule - 1, yylno);
+ /* The symbols being reduced. */
+ for (yyi = 0; yyi < yynrhs; yyi++)
+ {
+ YYFPRINTF (stderr, " $%d = ", yyi + 1);
+ yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
+ &(yyvsp[(yyi + 1) - (yynrhs)])
+ );
+ YYFPRINTF (stderr, "\n");
+ }
+}
+
+# define YY_REDUCE_PRINT(Rule) \
+do { \
+ if (yydebug) \
+ yy_reduce_print (yyvsp, Rule); \
+} while (YYID (0))
+
+/* Nonzero means print parse trace. It is left uninitialized so that
+ multiple parsers can coexist. */
+int yydebug;
+#else /* !YYDEBUG */
+# define YYDPRINTF(Args)
+# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
+# define YY_STACK_PRINT(Bottom, Top)
+# define YY_REDUCE_PRINT(Rule)
+#endif /* !YYDEBUG */
+
+
+/* YYINITDEPTH -- initial size of the parser's stacks. */
+#ifndef YYINITDEPTH
+# define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
+ if the built-in stack extension method is used).
+
+ Do not make this value too large; the results are undefined if
+ YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
+ evaluated with infinite-precision integer arithmetic. */
+
+#ifndef YYMAXDEPTH
+# define YYMAXDEPTH 10000
+#endif
+
+
+#if YYERROR_VERBOSE
+
+# ifndef yystrlen
+# if defined __GLIBC__ && defined _STRING_H
+# define yystrlen strlen
+# else
+/* Return the length of YYSTR. */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static YYSIZE_T
+yystrlen (const char *yystr)
+#else
+static YYSIZE_T
+yystrlen (yystr)
+ const char *yystr;
+#endif
+{
+ YYSIZE_T yylen;
+ for (yylen = 0; yystr[yylen]; yylen++)
+ continue;
+ return yylen;
+}
+# endif
+# endif
+
+# ifndef yystpcpy
+# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
+# define yystpcpy stpcpy
+# else
+/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
+ YYDEST. */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static char *
+yystpcpy (char *yydest, const char *yysrc)
+#else
+static char *
+yystpcpy (yydest, yysrc)
+ char *yydest;
+ const char *yysrc;
+#endif
+{
+ char *yyd = yydest;
+ const char *yys = yysrc;
+
+ while ((*yyd++ = *yys++) != '\0')
+ continue;
+
+ return yyd - 1;
+}
+# endif
+# endif
+
+# ifndef yytnamerr
+/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
+ quotes and backslashes, so that it's suitable for yyerror. The
+ heuristic is that double-quoting is unnecessary unless the string
+ contains an apostrophe, a comma, or backslash (other than
+ backslash-backslash). YYSTR is taken from yytname. If YYRES is
+ null, do not copy; instead, return the length of what the result
+ would have been. */
+static YYSIZE_T
+yytnamerr (char *yyres, const char *yystr)
+{
+ if (*yystr == '"')
+ {
+ YYSIZE_T yyn = 0;
+ char const *yyp = yystr;
+
+ for (;;)
+ switch (*++yyp)
+ {
+ case '\'':
+ case ',':
+ goto do_not_strip_quotes;
+
+ case '\\':
+ if (*++yyp != '\\')
+ goto do_not_strip_quotes;
+ /* Fall through. */
+ default:
+ if (yyres)
+ yyres[yyn] = *yyp;
+ yyn++;
+ break;
+
+ case '"':
+ if (yyres)
+ yyres[yyn] = '\0';
+ return yyn;
+ }
+ do_not_strip_quotes: ;
+ }
+
+ if (! yyres)
+ return yystrlen (yystr);
+
+ return yystpcpy (yyres, yystr) - yyres;
+}
+# endif
+
+/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
+ about the unexpected token YYTOKEN for the state stack whose top is
+ YYSSP.
+
+ Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is
+ not large enough to hold the message. In that case, also set
+ *YYMSG_ALLOC to the required number of bytes. Return 2 if the
+ required number of bytes is too large to store. */
+static int
+yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
+ yytype_int16 *yyssp, int yytoken)
+{
+ YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
+ YYSIZE_T yysize = yysize0;
+ YYSIZE_T yysize1;
+ enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
+ /* Internationalized format string. */
+ const char *yyformat = 0;
+ /* Arguments of yyformat. */
+ char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
+ /* Number of reported tokens (one for the "unexpected", one per
+ "expected"). */
+ int yycount = 0;
+
+ /* There are many possibilities here to consider:
+ - Assume YYFAIL is not used. It's too flawed to consider. See
+ <http://lists.gnu.org/archive/html/bison-patches/2009-12/msg00024.html>
+ for details. YYERROR is fine as it does not invoke this
+ function.
+ - If this state is a consistent state with a default action, then
+ the only way this function was invoked is if the default action
+ is an error action. In that case, don't check for expected
+ tokens because there are none.
+ - The only way there can be no lookahead present (in yychar) is if
+ this state is a consistent state with a default action. Thus,
+ detecting the absence of a lookahead is sufficient to determine
+ that there is no unexpected or expected token to report. In that
+ case, just report a simple "syntax error".
+ - Don't assume there isn't a lookahead just because this state is a
+ consistent state with a default action. There might have been a
+ previous inconsistent state, consistent state with a non-default
+ action, or user semantic action that manipulated yychar.
+ - Of course, the expected token list depends on states to have
+ correct lookahead information, and it depends on the parser not
+ to perform extra reductions after fetching a lookahead from the
+ scanner and before detecting a syntax error. Thus, state merging
+ (from LALR or IELR) and default reductions corrupt the expected
+ token list. However, the list is correct for canonical LR with
+ one exception: it will still contain any token that will not be
+ accepted due to an error action in a later state.
+ */
+ if (yytoken != YYEMPTY)
+ {
+ int yyn = yypact[*yyssp];
+ yyarg[yycount++] = yytname[yytoken];
+ if (!yypact_value_is_default (yyn))
+ {
+ /* Start YYX at -YYN if negative to avoid negative indexes in
+ YYCHECK. In other words, skip the first -YYN actions for
+ this state because they are default actions. */
+ int yyxbegin = yyn < 0 ? -yyn : 0;
+ /* Stay within bounds of both yycheck and yytname. */
+ int yychecklim = YYLAST - yyn + 1;
+ int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
+ int yyx;
+
+ for (yyx = yyxbegin; yyx < yyxend; ++yyx)
+ if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR
+ && !yytable_value_is_error (yytable[yyx + yyn]))
+ {
+ if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
+ {
+ yycount = 1;
+ yysize = yysize0;
+ break;
+ }
+ yyarg[yycount++] = yytname[yyx];
+ yysize1 = yysize + yytnamerr (0, yytname[yyx]);
+ if (! (yysize <= yysize1
+ && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+ return 2;
+ yysize = yysize1;
+ }
+ }
+ }
+
+ switch (yycount)
+ {
+# define YYCASE_(N, S) \
+ case N: \
+ yyformat = S; \
+ break
+ YYCASE_(0, YY_("syntax error"));
+ YYCASE_(1, YY_("syntax error, unexpected %s"));
+ YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s"));
+ YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s"));
+ YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s"));
+ YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"));
+# undef YYCASE_
+ }
+
+ yysize1 = yysize + yystrlen (yyformat);
+ if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+ return 2;
+ yysize = yysize1;
+
+ if (*yymsg_alloc < yysize)
+ {
+ *yymsg_alloc = 2 * yysize;
+ if (! (yysize <= *yymsg_alloc
+ && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM))
+ *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM;
+ return 1;
+ }
+
+ /* Avoid sprintf, as that infringes on the user's name space.
+ Don't have undefined behavior even if the translation
+ produced a string with the wrong number of "%s"s. */
+ {
+ char *yyp = *yymsg;
+ int yyi = 0;
+ while ((*yyp = *yyformat) != '\0')
+ if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
+ {
+ yyp += yytnamerr (yyp, yyarg[yyi++]);
+ yyformat += 2;
+ }
+ else
+ {
+ yyp++;
+ yyformat++;
+ }
+ }
+ return 0;
+}
+#endif /* YYERROR_VERBOSE */
+
+/*-----------------------------------------------.
+| Release the memory associated to this symbol. |
+`-----------------------------------------------*/
+
+/*ARGSUSED*/
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+static void
+yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
+#else
+static void
+yydestruct (yymsg, yytype, yyvaluep)
+ const char *yymsg;
+ int yytype;
+ YYSTYPE *yyvaluep;
+#endif
+{
+ YYUSE (yyvaluep);
+
+ if (!yymsg)
+ yymsg = "Deleting";
+ YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
+
+ switch (yytype)
+ {
+
+ default:
+ break;
+ }
+}
+
+
+/* Prevent warnings from -Wmissing-prototypes. */
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+
+/*----------.
+| yyparse. |
+`----------*/
+
+#ifdef YYPARSE_PARAM
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void *YYPARSE_PARAM)
+#else
+int
+yyparse (YYPARSE_PARAM)
+ void *YYPARSE_PARAM;
+#endif
+#else /* ! YYPARSE_PARAM */
+#if (defined __STDC__ || defined __C99__FUNC__ \
+ || defined __cplusplus || defined _MSC_VER)
+int
+yyparse (void)
+#else
+int
+yyparse ()
+
+#endif
+#endif
+{
+/* The lookahead symbol. */
+int yychar;
+/* The backup of yychar when there is an error and we're in yyerrlab. */
+int yylastchar;
+
+/* The semantic value of the lookahead symbol. */
+YYSTYPE yylval;
+
+ /* Number of syntax errors so far. */
+ int yynerrs;
+
+ int yystate;
+ /* Number of tokens to shift before error messages enabled. */
+ int yyerrstatus;
+
+ /* The stacks and their tools:
+ `yyss': related to states.
+ `yyvs': related to semantic values.
+
+ Refer to the stacks thru separate pointers, to allow yyoverflow
+ to reallocate them elsewhere. */
+
+ /* The state stack. */
+ yytype_int16 yyssa[YYINITDEPTH];
+ yytype_int16 *yyss;
+ yytype_int16 *yyssp;
+
+ /* The semantic value stack. */
+ YYSTYPE yyvsa[YYINITDEPTH];
+ YYSTYPE *yyvs;
+ YYSTYPE *yyvsp;
+
+ YYSIZE_T yystacksize;
+
+ int yyn;
+ int yyresult;
+ /* Lookahead token as an internal (translated) token number. */
+ int yytoken;
+ /* The variables used to return semantic value and location from the
+ action routines. */
+ YYSTYPE yyval;
+
+#if YYERROR_VERBOSE
+ /* Buffer for error messages, and its allocated size. */
+ char yymsgbuf[128];
+ char *yymsg = yymsgbuf;
+ YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
+#endif
+
+#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N))
+
+ /* The number of symbols on the RHS of the reduced rule.
+ Keep to zero when no symbol should be popped. */
+ int yylen = 0;
+
+ yytoken = 0;
+ yyss = yyssa;
+ yyvs = yyvsa;
+ yystacksize = YYINITDEPTH;
+
+ YYDPRINTF ((stderr, "Starting parse\n"));
+
+ yystate = 0;
+ yyerrstatus = 0;
+ yynerrs = 0;
+ yychar = YYEMPTY; /* Cause a token to be read. */
+
+ /* Initialize stack pointers.
+ Waste one element of value and location stack
+ so that they stay on the same level as the state stack.
+ The wasted elements are never initialized. */
+ yyssp = yyss;
+ yyvsp = yyvs;
+
+ goto yysetstate;
+
+/*------------------------------------------------------------.
+| yynewstate -- Push a new state, which is found in yystate. |
+`------------------------------------------------------------*/
+ yynewstate:
+ /* In all cases, when you get here, the value and location stacks
+ have just been pushed. So pushing a state here evens the stacks. */
+ yyssp++;
+
+ yysetstate:
+ *yyssp = yystate;
+
+ if (yyss + yystacksize - 1 <= yyssp)
+ {
+ /* Get the current used size of the three stacks, in elements. */
+ YYSIZE_T yysize = yyssp - yyss + 1;
+
+#ifdef yyoverflow
+ {
+ /* Give user a chance to reallocate the stack. Use copies of
+ these so that the &'s don't force the real ones into
+ memory. */
+ YYSTYPE *yyvs1 = yyvs;
+ yytype_int16 *yyss1 = yyss;
+
+ /* Each stack pointer address is followed by the size of the
+ data in use in that stack, in bytes. This used to be a
+ conditional around just the two extra args, but that might
+ be undefined if yyoverflow is a macro. */
+ yyoverflow (YY_("memory exhausted"),
+ &yyss1, yysize * sizeof (*yyssp),
+ &yyvs1, yysize * sizeof (*yyvsp),
+ &yystacksize);
+
+ yyss = yyss1;
+ yyvs = yyvs1;
+ }
+#else /* no yyoverflow */
+# ifndef YYSTACK_RELOCATE
+ goto yyexhaustedlab;
+# else
+ /* Extend the stack our own way. */
+ if (YYMAXDEPTH <= yystacksize)
+ goto yyexhaustedlab;
+ yystacksize *= 2;
+ if (YYMAXDEPTH < yystacksize)
+ yystacksize = YYMAXDEPTH;
+
+ {
+ yytype_int16 *yyss1 = yyss;
+ union yyalloc *yyptr =
+ (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
+ if (! yyptr)
+ goto yyexhaustedlab;
+ YYSTACK_RELOCATE (yyss_alloc, yyss);
+ YYSTACK_RELOCATE (yyvs_alloc, yyvs);
+# undef YYSTACK_RELOCATE
+ if (yyss1 != yyssa)
+ YYSTACK_FREE (yyss1);
+ }
+# endif
+#endif /* no yyoverflow */
+
+ yyssp = yyss + yysize - 1;
+ yyvsp = yyvs + yysize - 1;
+
+ YYDPRINTF ((stderr, "Stack size increased to %lu\n",
+ (unsigned long int) yystacksize));
+
+ if (yyss + yystacksize - 1 <= yyssp)
+ YYABORT;
+ }
+
+ YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+
+ if (yystate == YYFINAL)
+ YYACCEPT;
+
+ goto yybackup;
+
+/*-----------.
+| yybackup. |
+`-----------*/
+yybackup:
+
+ /* Do appropriate processing given the current state. Read a
+ lookahead token if we need one and don't already have one. */
+
+ /* First try to decide what to do without reference to lookahead token. */
+ yyn = yypact[yystate];
+ if (yypact_value_is_default (yyn))
+ goto yydefault;
+
+ /* Not known => get a lookahead token if don't already have one. */
+
+ /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */
+ if (yychar == YYEMPTY)
+ {
+ YYDPRINTF ((stderr, "Reading a token: "));
+ yychar = YYLEX;
+ }
+
+ if (yychar <= YYEOF)
+ {
+ yychar = yytoken = YYEOF;
+ YYDPRINTF ((stderr, "Now at end of input.\n"));
+ }
+ else
+ {
+ yytoken = YYTRANSLATE (yychar);
+ YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
+ }
+
+ /* If the proper action on seeing token YYTOKEN is to reduce or to
+ detect an error, take that action. */
+ yyn += yytoken;
+ if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
+ goto yydefault;
+ yyn = yytable[yyn];
+ if (yyn <= 0)
+ {
+ if (yytable_value_is_error (yyn))
+ goto yyerrlab;
+ yyn = -yyn;
+ goto yyreduce;
+ }
+
+ /* Count tokens shifted since error; after three, turn off error
+ status. */
+ if (yyerrstatus)
+ yyerrstatus--;
+
+ /* Shift the lookahead token. */
+ YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
+
+ /* Discard the shifted token. */
+ yychar = YYEMPTY;
+
+ yystate = yyn;
+ *++yyvsp = yylval;
+
+ goto yynewstate;
+
+
+/*-----------------------------------------------------------.
+| yydefault -- do the default action for the current state. |
+`-----------------------------------------------------------*/
+yydefault:
+ yyn = yydefact[yystate];
+ if (yyn == 0)
+ goto yyerrlab;
+ goto yyreduce;
+
+
+/*-----------------------------.
+| yyreduce -- Do a reduction. |
+`-----------------------------*/
+yyreduce:
+ /* yyn is the number of a rule to reduce with. */
+ yylen = yyr2[yyn];
+
+ /* If YYLEN is nonzero, implement the default value of the action:
+ `$$ = $1'.
+
+ Otherwise, the following line sets YYVAL to garbage.
+ This behavior is undocumented and Bison
+ users should not rely upon it. Assigning to YYVAL
+ unconditionally makes the parser a bit smaller, and it avoids a
+ GCC warning that YYVAL may be used uninitialized. */
+ yyval = yyvsp[1-yylen];
+
+
+ YY_REDUCE_PRINT (yyn);
+ switch (yyn)
+ {
+ case 2:
+
+/* Line 1806 of yacc.c */
+#line 79 "fts0pars.y"
+ {
+ (yyval.node) = (yyvsp[(1) - (1)].node);
+ ((fts_ast_state_t*) state)->root = (yyval.node);
+ }
+ break;
+
+ case 3:
+
+/* Line 1806 of yacc.c */
+#line 85 "fts0pars.y"
+ {
+ (yyval.node) = NULL;
+ }
+ break;
+
+ case 4:
+
+/* Line 1806 of yacc.c */
+#line 89 "fts0pars.y"
+ {
+ (yyval.node) = (yyvsp[(1) - (2)].node);
+
+ if (!(yyval.node)) {
+ (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(2) - (2)].node));
+ } else {
+ fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
+ }
+ }
+ break;
+
+ case 5:
+
+/* Line 1806 of yacc.c */
+#line 99 "fts0pars.y"
+ {
+ (yyval.node) = (yyvsp[(1) - (2)].node);
+ (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node));
+
+ if (!(yyval.node)) {
+ (yyval.node) = (yyvsp[(2) - (2)].node);
+ } else {
+ fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
+ }
+ }
+ break;
+
+ case 6:
+
+/* Line 1806 of yacc.c */
+#line 111 "fts0pars.y"
+ {
+ (yyval.node) = (yyvsp[(2) - (3)].node);
+
+ if ((yyval.node)) {
+ (yyval.node) = fts_ast_create_node_subexp_list(state, (yyval.node));
+ }
+ }
+ break;
+
+ case 7:
+
+/* Line 1806 of yacc.c */
+#line 119 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node));
+
+ if ((yyvsp[(3) - (4)].node)) {
+ fts_ast_add_node((yyval.node),
+ fts_ast_create_node_subexp_list(state, (yyvsp[(3) - (4)].node)));
+ }
+ }
+ break;
+
+ case 8:
+
+/* Line 1806 of yacc.c */
+#line 129 "fts0pars.y"
+ {
+ (yyval.node) = (yyvsp[(1) - (1)].node);
+ }
+ break;
+
+ case 9:
+
+/* Line 1806 of yacc.c */
+#line 133 "fts0pars.y"
+ {
+ (yyval.node) = (yyvsp[(1) - (1)].node);
+ }
+ break;
+
+ case 10:
+
+/* Line 1806 of yacc.c */
+#line 137 "fts0pars.y"
+ {
+ fts_ast_term_set_wildcard((yyvsp[(1) - (2)].node));
+ }
+ break;
+
+ case 11:
+
+/* Line 1806 of yacc.c */
+#line 141 "fts0pars.y"
+ {
+ fts_ast_text_set_distance((yyvsp[(1) - (3)].node), fts_ast_string_to_ul((yyvsp[(3) - (3)].token), 10));
+ fts_ast_string_free((yyvsp[(3) - (3)].token));
+ }
+ break;
+
+ case 12:
+
+/* Line 1806 of yacc.c */
+#line 146 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (3)].node));
+ fts_ast_add_node((yyval.node), (yyvsp[(2) - (3)].node));
+ fts_ast_term_set_wildcard((yyvsp[(2) - (3)].node));
+ }
+ break;
+
+ case 13:
+
+/* Line 1806 of yacc.c */
+#line 152 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node));
+ fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
+ }
+ break;
+
+ case 14:
+
+/* Line 1806 of yacc.c */
+#line 157 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node));
+ fts_ast_add_node((yyval.node), (yyvsp[(2) - (4)].node));
+ fts_ast_text_set_distance((yyvsp[(2) - (4)].node), fts_ast_string_to_ul((yyvsp[(4) - (4)].token), 10));
+ fts_ast_string_free((yyvsp[(4) - (4)].token));
+ }
+ break;
+
+ case 15:
+
+/* Line 1806 of yacc.c */
+#line 164 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node));
+ fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
+ }
+ break;
+
+ case 16:
+
+/* Line 1806 of yacc.c */
+#line 170 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_oper(state, FTS_IGNORE);
+ }
+ break;
+
+ case 17:
+
+/* Line 1806 of yacc.c */
+#line 174 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_oper(state, FTS_EXIST);
+ }
+ break;
+
+ case 18:
+
+/* Line 1806 of yacc.c */
+#line 178 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_oper(state, FTS_NEGATE);
+ }
+ break;
+
+ case 19:
+
+/* Line 1806 of yacc.c */
+#line 182 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_oper(state, FTS_DECR_RATING);
+ }
+ break;
+
+ case 20:
+
+/* Line 1806 of yacc.c */
+#line 186 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_oper(state, FTS_INCR_RATING);
+ }
+ break;
+
+ case 21:
+
+/* Line 1806 of yacc.c */
+#line 191 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token));
+ fts_ast_string_free((yyvsp[(1) - (1)].token));
+ }
+ break;
+
+ case 22:
+
+/* Line 1806 of yacc.c */
+#line 196 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token));
+ fts_ast_string_free((yyvsp[(1) - (1)].token));
+ }
+ break;
+
+ case 23:
+
+/* Line 1806 of yacc.c */
+#line 202 "fts0pars.y"
+ {
+ (yyval.node) = (yyvsp[(2) - (2)].node);
+ }
+ break;
+
+ case 24:
+
+/* Line 1806 of yacc.c */
+#line 207 "fts0pars.y"
+ {
+ (yyval.node) = fts_ast_create_node_text(state, (yyvsp[(1) - (1)].token));
+ fts_ast_string_free((yyvsp[(1) - (1)].token));
+ }
+ break;
+
+
+
+/* Line 1806 of yacc.c */
+#line 1663 "fts0pars.cc"
+ default: break;
+ }
+ /* User semantic actions sometimes alter yychar, and that requires
+ that yytoken be updated with the new translation. We take the
+ approach of translating immediately before every use of yytoken.
+ One alternative is translating here after every semantic action,
+ but that translation would be missed if the semantic action invokes
+ YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or
+ if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an
+ incorrect destructor might then be invoked immediately. In the
+ case of YYERROR or YYBACKUP, subsequent parser actions might lead
+ to an incorrect destructor call or verbose syntax error message
+ before the lookahead is translated. */
+ YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
+
+ YYPOPSTACK (yylen);
+ yylen = 0;
+ YY_STACK_PRINT (yyss, yyssp);
+
+ *++yyvsp = yyval;
+
+ /* Now `shift' the result of the reduction. Determine what state
+ that goes to, based on the state we popped back to and the rule
+ number reduced by. */
+
+ yyn = yyr1[yyn];
+
+ yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
+ if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
+ yystate = yytable[yystate];
+ else
+ yystate = yydefgoto[yyn - YYNTOKENS];
+
+ goto yynewstate;
+
+
+/*------------------------------------.
+| yyerrlab -- here on detecting error |
+`------------------------------------*/
+yyerrlab:
+ /* Backup yychar, in case we would change it. */
+ yylastchar = yychar;
+ /* Make sure we have latest lookahead translation. See comments at
+ user semantic actions for why this is necessary. */
+ yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
+
+ /* If not already recovering from an error, report this error. */
+ if (!yyerrstatus)
+ {
+ ++yynerrs;
+#if ! YYERROR_VERBOSE
+ yyerror (YY_("syntax error"));
+#else
+# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \
+ yyssp, yytoken)
+ {
+ char const *yymsgp = YY_("syntax error");
+ int yysyntax_error_status;
+ yysyntax_error_status = YYSYNTAX_ERROR;
+ if (yysyntax_error_status == 0)
+ yymsgp = yymsg;
+ else if (yysyntax_error_status == 1)
+ {
+ if (yymsg != yymsgbuf)
+ YYSTACK_FREE (yymsg);
+ yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc);
+ if (!yymsg)
+ {
+ yymsg = yymsgbuf;
+ yymsg_alloc = sizeof yymsgbuf;
+ yysyntax_error_status = 2;
+ }
+ else
+ {
+ yysyntax_error_status = YYSYNTAX_ERROR;
+ yymsgp = yymsg;
+ }
+ }
+ yyerror (yymsgp);
+ if (yysyntax_error_status == 2)
+ goto yyexhaustedlab;
+ }
+# undef YYSYNTAX_ERROR
+#endif
+ }
+
+
+
+ if (yyerrstatus == 3)
+ {
+ /* If just tried and failed to reuse lookahead token after an
+ error, discard it. */
+
+ if (yychar <= YYEOF)
+ {
+ /* Return failure if at end of input. */
+ if (yychar == YYEOF)
+ {
+ /* Since we don't need the token, we have to free it first. */
+ YYERRCLEANUP;
+ YYABORT;
+ }
+ }
+ else
+ {
+ yydestruct ("Error: discarding",
+ yytoken, &yylval);
+ yychar = YYEMPTY;
+ }
+ }
+
+ /* Else will try to reuse lookahead token after shifting the error
+ token. */
+ goto yyerrlab1;
+
+
+/*---------------------------------------------------.
+| yyerrorlab -- error raised explicitly by YYERROR. |
+`---------------------------------------------------*/
+yyerrorlab:
+
+ /* Pacify compilers like GCC when the user code never invokes
+ YYERROR and the label yyerrorlab therefore never appears in user
+ code. */
+ if (/*CONSTCOND*/ 0)
+ goto yyerrorlab;
+
+ /* Do not reclaim the symbols of the rule which action triggered
+ this YYERROR. */
+ YYPOPSTACK (yylen);
+ yylen = 0;
+ YY_STACK_PRINT (yyss, yyssp);
+ yystate = *yyssp;
+ goto yyerrlab1;
+
+
+/*-------------------------------------------------------------.
+| yyerrlab1 -- common code for both syntax error and YYERROR. |
+`-------------------------------------------------------------*/
+yyerrlab1:
+ yyerrstatus = 3; /* Each real token shifted decrements this. */
+
+ for (;;)
+ {
+ yyn = yypact[yystate];
+ if (!yypact_value_is_default (yyn))
+ {
+ yyn += YYTERROR;
+ if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
+ {
+ yyn = yytable[yyn];
+ if (0 < yyn)
+ break;
+ }
+ }
+
+ /* Pop the current state because it cannot handle the error token. */
+ if (yyssp == yyss)
+ {
+ /* Since we don't need the error token, we have to free it first. */
+ YYERRCLEANUP;
+ YYABORT;
+ }
+
+
+ yydestruct ("Error: popping",
+ yystos[yystate], yyvsp);
+ YYPOPSTACK (1);
+ yystate = *yyssp;
+ YY_STACK_PRINT (yyss, yyssp);
+ }
+
+ *++yyvsp = yylval;
+
+
+ /* Shift the error token. */
+ YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
+
+ yystate = yyn;
+ goto yynewstate;
+
+
+/*-------------------------------------.
+| yyacceptlab -- YYACCEPT comes here. |
+`-------------------------------------*/
+yyacceptlab:
+ yyresult = 0;
+ goto yyreturn;
+
+/*-----------------------------------.
+| yyabortlab -- YYABORT comes here. |
+`-----------------------------------*/
+yyabortlab:
+ yyresult = 1;
+ goto yyreturn;
+
+#if !defined(yyoverflow) || YYERROR_VERBOSE
+/*-------------------------------------------------.
+| yyexhaustedlab -- memory exhaustion comes here. |
+`-------------------------------------------------*/
+yyexhaustedlab:
+ yyerror (YY_("memory exhausted"));
+ yyresult = 2;
+ /* Fall through. */
+#endif
+
+yyreturn:
+ if (yychar != YYEMPTY)
+ {
+ /* Make sure we have latest lookahead translation. See comments at
+ user semantic actions for why this is necessary. */
+ yytoken = YYTRANSLATE (yychar);
+ yydestruct ("Cleanup: discarding lookahead",
+ yytoken, &yylval);
+ }
+ /* Do not reclaim the symbols of the rule which action triggered
+ this YYABORT or YYACCEPT. */
+ YYPOPSTACK (yylen);
+ YY_STACK_PRINT (yyss, yyssp);
+ while (yyssp != yyss)
+ {
+ yydestruct ("Cleanup: popping",
+ yystos[*yyssp], yyvsp);
+ YYPOPSTACK (1);
+ }
+#ifndef yyoverflow
+ if (yyss != yyssa)
+ YYSTACK_FREE (yyss);
+#endif
+#if YYERROR_VERBOSE
+ if (yymsg != yymsgbuf)
+ YYSTACK_FREE (yymsg);
+#endif
+ /* Make sure YYID is used. */
+ return YYID (yyresult);
+}
+
+
+
+/* Line 2067 of yacc.c */
+#line 212 "fts0pars.y"
+
+
+/********************************************************************
+*/
+int
+ftserror(
+/*=====*/
+ const char* p)
+{
+ my_printf_error(ER_PARSE_ERROR, "%s", MYF(0), p);
+ return(0);
+}
+
+/********************************************************************
+Create a fts_lexer_t instance.*/
+fts_lexer_t*
+fts_lexer_create(
+/*=============*/
+ ibool boolean_mode,
+ const byte* query,
+ ulint query_len)
+{
+ fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>(
+ ut_malloc_nokey(sizeof(fts_lexer_t)));
+
+ if (boolean_mode) {
+ fts0blex_init(&fts_lexer->yyscanner);
+ fts0b_scan_bytes(
+ reinterpret_cast<const char*>(query),
+ static_cast<int>(query_len),
+ fts_lexer->yyscanner);
+ fts_lexer->scanner = fts_blexer;
+ /* FIXME: Debugging */
+ /* fts0bset_debug(1 , fts_lexer->yyscanner); */
+ } else {
+ fts0tlex_init(&fts_lexer->yyscanner);
+ fts0t_scan_bytes(
+ reinterpret_cast<const char*>(query),
+ static_cast<int>(query_len),
+ fts_lexer->yyscanner);
+ fts_lexer->scanner = fts_tlexer;
+ }
+
+ return(fts_lexer);
+}
+
+/********************************************************************
+Free an fts_lexer_t instance.*/
+void
+
+fts_lexer_free(
+/*===========*/
+ fts_lexer_t* fts_lexer)
+{
+ if (fts_lexer->scanner == fts_blexer) {
+ fts0blex_destroy(fts_lexer->yyscanner);
+ } else {
+ fts0tlex_destroy(fts_lexer->yyscanner);
+ }
+
+ ut_free(fts_lexer);
+}
+
+/********************************************************************
+Call the appropaiate scanner.*/
+int
+fts_lexer(
+/*======*/
+ YYSTYPE* val,
+ fts_lexer_t* fts_lexer)
+{
+ fts_scanner func_ptr;
+
+ func_ptr = fts_lexer->scanner;
+
+ return(func_ptr(val, fts_lexer->yyscanner));
+}
+
+/********************************************************************
+Parse the query.*/
+int
+fts_parse(
+/*======*/
+ fts_ast_state_t* state)
+{
+ return(ftsparse(state));
+}
+
diff --git a/storage/innobase/fts/fts0pars.y b/storage/innobase/fts/fts0pars.y
new file mode 100644
index 00000000..deebc79e
--- /dev/null
+++ b/storage/innobase/fts/fts0pars.y
@@ -0,0 +1,293 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**
+ * @file fts/fts0pars.y
+ * FTS parser: input file for the GNU Bison parser generator
+ *
+ * Created 2007/5/9 Sunny Bains
+ */
+
+%{
+#include "ha_prototypes.h"
+#include "mem0mem.h"
+#include "fts0ast.h"
+#include "fts0blex.h"
+#include "fts0tlex.h"
+#include "fts0pars.h"
+#include <my_sys.h>
+
+extern int fts_lexer(YYSTYPE*, fts_lexer_t*);
+extern int fts_blexer(YYSTYPE*, yyscan_t);
+extern int fts_tlexer(YYSTYPE*, yyscan_t);
+
+
+
+extern int ftserror(const char* p);
+
+/* Required for reentrant parser */
+#define ftslex fts_lexer
+
+#define YYERROR_VERBOSE
+
+/* For passing an argument to yyparse() */
+#define YYPARSE_PARAM state
+#define YYLEX_PARAM ((fts_ast_state_t*) state)->lexer
+
+
+typedef int (*fts_scanner)(YYSTYPE* val, yyscan_t yyscanner);
+
+struct fts_lexer_struct {
+ fts_scanner scanner;
+ void* yyscanner;
+};
+
+%}
+
+%union {
+ int oper;
+ fts_ast_string_t* token;
+ fts_ast_node_t* node;
+};
+
+/* Enable re-entrant parser */
+%pure_parser
+
+%token<oper> FTS_OPER
+%token<token> FTS_TEXT FTS_TERM FTS_NUMB
+
+%type<node> prefix term text expr sub_expr expr_lst query
+
+%nonassoc '+' '-' '~' '<' '>'
+
+%%
+
+query : expr_lst {
+ $$ = $1;
+ ((fts_ast_state_t*) state)->root = $$;
+ }
+ ;
+
+expr_lst: /* Empty */ {
+ $$ = NULL;
+ }
+
+ | expr_lst expr {
+ $$ = $1;
+
+ if (!$$) {
+ $$ = fts_ast_create_node_list(state, $2);
+ } else {
+ fts_ast_add_node($$, $2);
+ }
+ }
+
+ | expr_lst sub_expr {
+ $$ = $1;
+ $$ = fts_ast_create_node_list(state, $1);
+
+ if (!$$) {
+ $$ = $2;
+ } else {
+ fts_ast_add_node($$, $2);
+ }
+ }
+ ;
+
+sub_expr: '(' expr_lst ')' {
+ $$ = $2;
+
+ if ($$) {
+ $$ = fts_ast_create_node_subexp_list(state, $$);
+ }
+ }
+
+ | prefix '(' expr_lst ')' {
+ $$ = fts_ast_create_node_list(state, $1);
+
+ if ($3) {
+ fts_ast_add_node($$,
+ fts_ast_create_node_subexp_list(state, $3));
+ }
+ }
+ ;
+
+expr : term {
+ $$ = $1;
+ }
+
+ | text {
+ $$ = $1;
+ }
+
+ | term '*' {
+ fts_ast_term_set_wildcard($1);
+ }
+
+ | text '@' FTS_NUMB {
+ fts_ast_text_set_distance($1, fts_ast_string_to_ul($3, 10));
+ fts_ast_string_free($3);
+ }
+
+ | prefix term '*' {
+ $$ = fts_ast_create_node_list(state, $1);
+ fts_ast_add_node($$, $2);
+ fts_ast_term_set_wildcard($2);
+ }
+
+ | prefix term {
+ $$ = fts_ast_create_node_list(state, $1);
+ fts_ast_add_node($$, $2);
+ }
+
+ | prefix text '@' FTS_NUMB {
+ $$ = fts_ast_create_node_list(state, $1);
+ fts_ast_add_node($$, $2);
+ fts_ast_text_set_distance($2, fts_ast_string_to_ul($4, 10));
+ fts_ast_string_free($4);
+ }
+
+ | prefix text {
+ $$ = fts_ast_create_node_list(state, $1);
+ fts_ast_add_node($$, $2);
+ }
+ ;
+
+prefix : '-' {
+ $$ = fts_ast_create_node_oper(state, FTS_IGNORE);
+ }
+
+ | '+' {
+ $$ = fts_ast_create_node_oper(state, FTS_EXIST);
+ }
+
+ | '~' {
+ $$ = fts_ast_create_node_oper(state, FTS_NEGATE);
+ }
+
+ | '<' {
+ $$ = fts_ast_create_node_oper(state, FTS_DECR_RATING);
+ }
+
+ | '>' {
+ $$ = fts_ast_create_node_oper(state, FTS_INCR_RATING);
+ }
+ ;
+
+term : FTS_TERM {
+ $$ = fts_ast_create_node_term(state, $1);
+ fts_ast_string_free($1);
+ }
+
+ | FTS_NUMB {
+ $$ = fts_ast_create_node_term(state, $1);
+ fts_ast_string_free($1);
+ }
+
+ /* Ignore leading '*' */
+ | '*' term {
+ $$ = $2;
+ }
+ ;
+
+text : FTS_TEXT {
+ $$ = fts_ast_create_node_text(state, $1);
+ fts_ast_string_free($1);
+ }
+ ;
+%%
+
+/********************************************************************
+*/
+int
+ftserror(
+/*=====*/
+ const char* p)
+{
+ fprintf(stderr, "%s\n", p);
+ return(0);
+}
+
+/********************************************************************
+Create a fts_lexer_t instance.*/
+fts_lexer_t*
+fts_lexer_create(
+/*=============*/
+ ibool boolean_mode,
+ const byte* query,
+ ulint query_len)
+{
+ fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>(
+ ut_malloc_nokey(sizeof(fts_lexer_t)));
+
+ if (boolean_mode) {
+ fts0blex_init(&fts_lexer->yyscanner);
+ fts0b_scan_bytes((char*) query, (int) query_len, fts_lexer->yyscanner);
+ fts_lexer->scanner = fts_blexer;
+ /* FIXME: Debugging */
+ /* fts0bset_debug(1 , fts_lexer->yyscanner); */
+ } else {
+ fts0tlex_init(&fts_lexer->yyscanner);
+ fts0t_scan_bytes((char*) query, (int) query_len, fts_lexer->yyscanner);
+ fts_lexer->scanner = fts_tlexer;
+ }
+
+ return(fts_lexer);
+}
+
+/********************************************************************
+Free an fts_lexer_t instance.*/
+void
+
+fts_lexer_free(
+/*===========*/
+ fts_lexer_t* fts_lexer)
+{
+ if (fts_lexer->scanner == fts_blexer) {
+ fts0blex_destroy(fts_lexer->yyscanner);
+ } else {
+ fts0tlex_destroy(fts_lexer->yyscanner);
+ }
+
+ ut_free(fts_lexer);
+}
+
+/********************************************************************
+Call the appropaiate scanner.*/
+int
+fts_lexer(
+/*======*/
+ YYSTYPE* val,
+ fts_lexer_t* fts_lexer)
+{
+ fts_scanner func_ptr;
+
+ func_ptr = fts_lexer->scanner;
+
+ return(func_ptr(val, fts_lexer->yyscanner));
+}
+
+/********************************************************************
+Parse the query.*/
+int
+fts_parse(
+/*======*/
+ fts_ast_state_t* state)
+{
+ return(ftsparse(state));
+}
diff --git a/storage/innobase/fts/fts0plugin.cc b/storage/innobase/fts/fts0plugin.cc
new file mode 100644
index 00000000..de99d170
--- /dev/null
+++ b/storage/innobase/fts/fts0plugin.cc
@@ -0,0 +1,283 @@
+/*****************************************************************************
+
+Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fts/fts0plugin.cc
+Full Text Search plugin support.
+
+Created 2013/06/04 Shaohua Wang
+***********************************************************************/
+
+#include "fts0ast.h"
+#include "fts0plugin.h"
+#include "fts0tokenize.h"
+
+#include "ft_global.h"
+
+/******************************************************************//**
+FTS default parser init
+@return 0 */
+static int fts_default_parser_init(MYSQL_FTPARSER_PARAM*) { return 0; }
+
+/******************************************************************//**
+FTS default parser deinit
+@return 0 */
+static int fts_default_parser_deinit(MYSQL_FTPARSER_PARAM*) { return 0; }
+
+/******************************************************************//**
+FTS default parser parse from ft_static.c in MYISAM.
+@return 0 if parse successfully, or return non-zero */
+static
+int
+fts_default_parser_parse(
+/*=====================*/
+ MYSQL_FTPARSER_PARAM *param) /*!< in: plugin parser param */
+{
+ return(param->mysql_parse(param, param->doc, param->length));
+}
+
+/* FTS default parser from ft_static.c in MYISAM. */
+struct st_mysql_ftparser fts_default_parser =
+{
+ MYSQL_FTPARSER_INTERFACE_VERSION,
+ fts_default_parser_parse,
+ fts_default_parser_init,
+ fts_default_parser_deinit
+};
+
+/******************************************************************//**
+Get a operator node from token boolean info
+@return node */
+static
+fts_ast_node_t*
+fts_query_get_oper_node(
+/*====================*/
+ MYSQL_FTPARSER_BOOLEAN_INFO* info, /*!< in: token info */
+ fts_ast_state_t* state) /*!< in/out: query parse state*/
+{
+ fts_ast_node_t* oper_node = NULL;
+
+ if (info->yesno > 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_EXIST);
+ } else if (info->yesno < 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_IGNORE);
+ } else if (info->weight_adjust > 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_INCR_RATING);
+ } else if (info->weight_adjust < 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_DECR_RATING);
+ } else if (info->wasign > 0) {
+ oper_node = fts_ast_create_node_oper(state, FTS_NEGATE);
+ }
+
+ return(oper_node);
+}
+
+/******************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for query parse.
+Refer to 'st_mysql_ftparser_param' for more detail.
+Note:
+a. Parse logic refers to 'ftb_query_add_word' from ft_boolean_search.c in MYISAM;
+b. Parse node or tree refers to fts0pars.y.
+@return 0 if add successfully, or return non-zero. */
+static
+int
+fts_query_add_word_for_parser(
+/*==========================*/
+ MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */
+ const char* word, /*!< in: token */
+ int word_len, /*!< in: token length */
+ MYSQL_FTPARSER_BOOLEAN_INFO* info) /*!< in: token info */
+{
+ fts_ast_state_t* state =
+ static_cast<fts_ast_state_t*>(param->mysql_ftparam);
+ fts_ast_node_t* cur_node = state->cur_node;
+ fts_ast_node_t* oper_node = NULL;
+ fts_ast_node_t* term_node = NULL;
+ fts_ast_node_t* node = NULL;
+
+ switch (info->type) {
+ case FT_TOKEN_STOPWORD:
+ /* We only handler stopword in phrase */
+ if (cur_node->type != FTS_AST_PARSER_PHRASE_LIST) {
+ break;
+ }
+ /* fall through */
+
+ case FT_TOKEN_WORD:
+ term_node = fts_ast_create_node_term_for_parser(
+ state, word, ulint(word_len));
+
+ if (info->trunc) {
+ fts_ast_term_set_wildcard(term_node);
+ }
+
+ if (cur_node->type == FTS_AST_PARSER_PHRASE_LIST) {
+ /* Ignore operator inside phrase */
+ fts_ast_add_node(cur_node, term_node);
+ } else {
+ ut_ad(cur_node->type == FTS_AST_LIST
+ || cur_node->type == FTS_AST_SUBEXP_LIST);
+ oper_node = fts_query_get_oper_node(info, state);
+
+ if (oper_node) {
+ node = fts_ast_create_node_list(state, oper_node);
+ fts_ast_add_node(node, term_node);
+ fts_ast_add_node(cur_node, node);
+ } else {
+ fts_ast_add_node(cur_node, term_node);
+ }
+ }
+
+ break;
+
+ case FT_TOKEN_LEFT_PAREN:
+ /* Check parse error */
+ if (cur_node->type != FTS_AST_LIST
+ && cur_node->type != FTS_AST_SUBEXP_LIST) {
+ return(1);
+ }
+
+ /* Set operator */
+ oper_node = fts_query_get_oper_node(info, state);
+ if (oper_node != NULL) {
+ node = fts_ast_create_node_list(state, oper_node);
+ fts_ast_add_node(cur_node, node);
+ node->go_up = true;
+ node->up_node = cur_node;
+ cur_node = node;
+ }
+
+ if (info->quot) {
+ /* Phrase node */
+ node = fts_ast_create_node_phrase_list(state);
+ } else {
+ /* Subexp list node */
+ node = fts_ast_create_node_subexp_list(state, NULL);
+ }
+
+ fts_ast_add_node(cur_node, node);
+
+ node->up_node = cur_node;
+ state->cur_node = node;
+ state->depth += 1;
+
+ break;
+
+ case FT_TOKEN_RIGHT_PAREN:
+ info->quot = 0;
+
+ if (cur_node->up_node != NULL) {
+ cur_node = cur_node->up_node;
+
+ if (cur_node->go_up) {
+ ut_a(cur_node->up_node
+ && !(cur_node->up_node->go_up));
+ cur_node = cur_node->up_node;
+ }
+ }
+
+ state->cur_node = cur_node;
+
+ if (state->depth > 0) {
+ state->depth--;
+ } else {
+ /* Parentheses mismatch */
+ return(1);
+ }
+
+ break;
+
+ case FT_TOKEN_EOF:
+ default:
+ break;
+ }
+
+ return(0);
+}
+
+/******************************************************************//**
+FTS plugin parser 'myql_parser' callback function for query parse.
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return 0 if parse successfully */
+static
+int
+fts_parse_query_internal(
+/*=====================*/
+ MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */
+ const char* query, /*!< in: query string */
+ int len) /*!< in: query length */
+{
+ MYSQL_FTPARSER_BOOLEAN_INFO info;
+ const CHARSET_INFO* cs = param->cs;
+ uchar** start = (uchar**)(&query);
+ uchar* end = (uchar*)(query + len);
+ FT_WORD w = {NULL, 0, 0};
+
+ info.prev = ' ';
+ info.quot = 0;
+ memset(&w, 0, sizeof(w));
+ /* Note: We don't handle simple parser mode here,
+ but user supplied plugin parser should handler it. */
+ while (fts_get_word(cs, start, end, &w, &info)) {
+ int ret = param->mysql_add_word(
+ param,
+ reinterpret_cast<char*>(w.pos),
+ int(w.len), &info);
+ if (ret) {
+ return(ret);
+ }
+ }
+
+ return(0);
+}
+
+/******************************************************************//**
+fts parse query by plugin parser.
+@return 0 if parse successfully, or return non-zero. */
+int
+fts_parse_by_parser(
+/*================*/
+ ibool mode, /*!< in: parse boolean mode */
+ uchar* query_str, /*!< in: query string */
+ ulint query_len, /*!< in: query string length */
+ st_mysql_ftparser* parser, /*!< in: fts plugin parser */
+ fts_ast_state_t* state) /*!< in/out: parser state */
+{
+ MYSQL_FTPARSER_PARAM param;
+ int ret;
+
+ ut_ad(parser);
+
+ /* Initial parser param */
+ param.mysql_parse = fts_parse_query_internal;
+ param.mysql_add_word = fts_query_add_word_for_parser;
+ param.mysql_ftparam = static_cast<void*>(state);
+ param.cs = state->charset;
+ param.doc = reinterpret_cast<char*>(query_str);
+ param.length = static_cast<int>(query_len);
+ param.flags = 0;
+ param.mode = mode ?
+ MYSQL_FTPARSER_FULL_BOOLEAN_INFO :
+ MYSQL_FTPARSER_SIMPLE_MODE;
+
+ PARSER_INIT(parser, &param);
+ ret = parser->parse(&param);
+ PARSER_DEINIT(parser, &param);
+
+ return(ret | state->depth);
+}
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
new file mode 100644
index 00000000..8e2cb838
--- /dev/null
+++ b/storage/innobase/fts/fts0que.cc
@@ -0,0 +1,4596 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2020, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fts/fts0que.cc
+Full Text Search functionality.
+
+Created 2007/03/27 Sunny Bains
+Completed 2011/7/10 Sunny and Jimmy Yang
+*******************************************************/
+
+#include "dict0dict.h"
+#include "ut0rbt.h"
+#include "row0sel.h"
+#include "fts0fts.h"
+#include "fts0priv.h"
+#include "fts0ast.h"
+#include "fts0pars.h"
+#include "fts0types.h"
+#include "fts0plugin.h"
+
+#include <iomanip>
+#include <vector>
+
+#define FTS_ELEM(t, n, i, j) (t[(i) * n + (j)])
+
+#define RANK_DOWNGRADE (-1.0F)
+#define RANK_UPGRADE (1.0F)
+
+/* Maximum number of words supported in a phrase or proximity search. */
+#define MAX_PROXIMITY_ITEM 128
+
+/* Memory used by rbt itself for create and node add */
+#define SIZEOF_RBT_CREATE sizeof(ib_rbt_t) + sizeof(ib_rbt_node_t) * 2
+#define SIZEOF_RBT_NODE_ADD sizeof(ib_rbt_node_t)
+
+/*Initial byte length for 'words' in fts_ranking_t */
+#define RANKING_WORDS_INIT_LEN 4
+
+// FIXME: Need to have a generic iterator that traverses the ilist.
+
+typedef std::vector<fts_string_t, ut_allocator<fts_string_t> > word_vector_t;
+
+struct fts_word_freq_t;
+
+/** State of an FTS query. */
+struct fts_query_t {
+ mem_heap_t* heap; /*!< Heap to use for allocations */
+
+ trx_t* trx; /*!< The query transaction */
+
+ dict_index_t* index; /*!< The FTS index to search */
+ /*!< FTS auxiliary common table def */
+
+ fts_table_t fts_common_table;
+
+ fts_table_t fts_index_table;/*!< FTS auxiliary index table def */
+
+ size_t total_size; /*!< total memory size used by query */
+
+ fts_doc_ids_t* deleted; /*!< Deleted doc ids that need to be
+ filtered from the output */
+
+ fts_ast_node_t* root; /*!< Abstract syntax tree */
+
+ fts_ast_node_t* cur_node; /*!< Current tree node */
+
+ ib_rbt_t* word_map; /*!< Matched word map for
+ searching by word*/
+
+ word_vector_t* word_vector; /*!< Matched word vector for
+ searching by index */
+
+ ib_rbt_t* doc_ids; /*!< The current set of matching
+ doc ids, elements are of
+ type fts_ranking_t */
+
+ ib_rbt_t* intersection; /*!< The doc ids that were found in
+ doc_ids, this tree will become
+ the new doc_ids, elements are of type
+ fts_ranking_t */
+
+ /*!< Prepared statement to read the
+ nodes from the FTS INDEX */
+ que_t* read_nodes_graph;
+
+ fts_ast_oper_t oper; /*!< Current boolean mode operator */
+
+ /*!< TRUE if we want to collect the
+ word positions within the document */
+ ibool collect_positions;
+
+ ulint flags; /*!< Specify the full text search type,
+ such as boolean search, phrase
+ search, proximity search etc. */
+
+ ulint distance; /*!< The proximity distance of a
+ phrase search. */
+
+ /*!< These doc ids are used as a
+ boundary condition when searching the
+ FTS index rows */
+
+ doc_id_t lower_doc_id; /*!< Lowest doc id in doc_ids */
+
+ doc_id_t upper_doc_id; /*!< Highest doc id in doc_ids */
+
+ bool boolean_mode; /*!< TRUE if boolean mode query */
+
+ ib_vector_t* matched; /*!< Array of matching documents
+ (fts_match_t) to search for a phrase */
+
+ ib_vector_t** match_array; /*!< Used for proximity search, contains
+ position info for each matched word
+ in the word list */
+
+ ib_uint64_t total_docs; /*!< The total number of documents */
+
+ ulint total_words; /*!< The total number of words */
+
+ dberr_t error; /*!< Error code if any, that is
+ encountered during query processing */
+
+ ib_rbt_t* word_freqs; /*!< RB tree of word frequencies per
+ document, its elements are of type
+ fts_word_freq_t */
+
+ ib_rbt_t* wildcard_words; /*!< words with wildcard */
+
+ bool multi_exist; /*!< multiple FTS_EXIST oper */
+ byte visiting_sub_exp; /*!< count of nested
+ fts_ast_visit_sub_exp() */
+
+ st_mysql_ftparser* parser; /*!< fts plugin parser */
+};
+
+/** For phrase matching, first we collect the documents and the positions
+then we match. */
+struct fts_match_t {
+ doc_id_t doc_id; /*!< Document id */
+
+ ulint start; /*!< Start the phrase match from
+ this offset within the positions
+ vector. */
+
+ ib_vector_t* positions; /*!< Offsets of a word in a
+ document */
+};
+
+/** For matching tokens in a phrase search. We use this data structure in
+the callback that determines whether a document should be accepted or
+rejected for a phrase search. */
+struct fts_select_t {
+ doc_id_t doc_id; /*!< The document id to match */
+
+ ulint min_pos; /*!< For found to be TRUE at least
+ one position must be greater than
+ min_pos. */
+
+ ibool found; /*!< TRUE if found */
+
+ fts_word_freq_t*
+ word_freq; /*!< Word frequency instance of the
+ current word being looked up in
+ the FTS index */
+};
+
+typedef std::vector<ulint, ut_allocator<ulint> > pos_vector_t;
+
+/** structure defines a set of ranges for original documents, each of which
+has a minimum position and maximum position. Text in such range should
+contain all words in the proximity search. We will need to count the
+words in such range to make sure it is less than the specified distance
+of the proximity search */
+struct fts_proximity_t {
+ ulint n_pos; /*!< number of position set, defines
+ a range (min to max) containing all
+ matching words */
+ pos_vector_t min_pos; /*!< the minimum position (in bytes)
+ of the range */
+ pos_vector_t max_pos; /*!< the maximum position (in bytes)
+ of the range */
+};
+
+/** The match positions and tokesn to match */
+struct fts_phrase_t {
+ fts_phrase_t(const dict_table_t* table)
+ :
+ found(false),
+ match(NULL),
+ tokens(NULL),
+ distance(0),
+ charset(NULL),
+ heap(NULL),
+ zip_size(table->space->zip_size()),
+ proximity_pos(NULL),
+ parser(NULL)
+ {
+ }
+
+ /** Match result */
+ ibool found;
+
+ /** Positions within text */
+ const fts_match_t* match;
+
+ /** Tokens to match */
+ const ib_vector_t* tokens;
+
+ /** For matching on proximity distance. Can be 0 for exact match */
+ ulint distance;
+
+ /** Phrase match charset */
+ CHARSET_INFO* charset;
+
+ /** Heap for word processing */
+ mem_heap_t* heap;
+
+ /** ROW_FORMAT=COMPRESSED page size, or 0 */
+ const ulint zip_size;
+
+ /** Position info for proximity search verification. Records the
+ min and max position of words matched */
+ fts_proximity_t* proximity_pos;
+
+ /** FTS plugin parser */
+ st_mysql_ftparser* parser;
+};
+
+/** Paramter passed to fts phrase match by parser */
+struct fts_phrase_param_t {
+ fts_phrase_t* phrase; /*!< Match phrase instance */
+ ulint token_index; /*!< Index of token to match next */
+ mem_heap_t* heap; /*!< Heap for word processing */
+};
+
+/** For storing the frequncy of a word/term in a document */
+struct fts_doc_freq_t {
+ doc_id_t doc_id; /*!< Document id */
+ ulint freq; /*!< Frequency of a word in a document */
+};
+
+/** To determine the word frequency per document. */
+struct fts_word_freq_t {
+ fts_string_t word; /*!< Word for which we need the freq,
+ it's allocated on the query heap */
+
+ ib_rbt_t* doc_freqs; /*!< RB Tree for storing per document
+ word frequencies. The elements are
+ of type fts_doc_freq_t */
+ ib_uint64_t doc_count; /*!< Total number of documents that
+ contain this word */
+ double idf; /*!< Inverse document frequency */
+};
+
+/********************************************************************
+Callback function to fetch the rows in an FTS INDEX record.
+@return always TRUE */
+static
+ibool
+fts_query_index_fetch_nodes(
+/*========================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg); /*!< in: pointer to ib_vector_t */
+
+/********************************************************************
+Read and filter nodes.
+@return fts_node_t instance */
+static
+dberr_t
+fts_query_filter_doc_ids(
+/*=====================*/
+ fts_query_t* query, /*!< in: query instance */
+ const fts_string_t* word, /*!< in: the current word */
+ fts_word_freq_t* word_freq, /*!< in/out: word frequency */
+ const fts_node_t* node, /*!< in: current FTS node */
+ void* data, /*!< in: doc id ilist */
+ ulint len, /*!< in: doc id ilist size */
+ ibool calc_doc_count);/*!< in: whether to remember doc
+ count */
+
+/** Process (nested) sub-expression, create a new result set to store the
+sub-expression result by processing nodes under current sub-expression
+list. Merge the sub-expression result with that of parent expression list.
+@param[in,out] node current root node
+@param[in,out] visitor callback function
+@param[in,out] arg argument for callback
+@return DB_SUCCESS if all go well */
+static
+dberr_t
+fts_ast_visit_sub_exp(
+ fts_ast_node_t* node,
+ fts_ast_callback visitor,
+ void* arg);
+
+#if 0
+/*****************************************************************//***
+Find a doc_id in a word's ilist.
+@return TRUE if found. */
+static
+ibool
+fts_query_find_doc_id(
+/*==================*/
+ fts_select_t* select, /*!< in/out: search the doc id selected,
+ update the frequency if found. */
+ void* data, /*!< in: doc id ilist */
+ ulint len); /*!< in: doc id ilist size */
+#endif
+
+/*************************************************************//**
+This function implements a simple "blind" query expansion search:
+words in documents found in the first search pass will be used as
+search arguments to search the document again, thus "expand"
+the search result set.
+@return DB_SUCCESS if success, otherwise the error code */
+static
+dberr_t
+fts_expand_query(
+/*=============*/
+ dict_index_t* index, /*!< in: FTS index to search */
+ fts_query_t* query) /*!< in: query result, to be freed
+ by the client */
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+/*************************************************************//**
+This function finds documents that contain all words in a
+phrase or proximity search. And if proximity search, verify
+the words are close enough to each other, as in specified distance.
+This function is called for phrase and proximity search.
+@return TRUE if documents are found, FALSE if otherwise */
+static
+ibool
+fts_phrase_or_proximity_search(
+/*===========================*/
+ fts_query_t* query, /*!< in/out: query instance
+ query->doc_ids might be instantiated
+ with qualified doc IDs */
+ ib_vector_t* tokens); /*!< in: Tokens contain words */
+/*************************************************************//**
+This function checks whether words in result documents are close to
+each other (within proximity range as specified by "distance").
+If "distance" is MAX_ULINT, then it will find all combinations of
+positions of matching words and store min and max positions
+in the "qualified_pos" for later verification.
+@return true if words are close to each other, false if otherwise */
+static
+bool
+fts_proximity_get_positions(
+/*========================*/
+ fts_match_t** match, /*!< in: query instance */
+ ulint num_match, /*!< in: number of matching
+ items */
+ ulint distance, /*!< in: distance value
+ for proximity search */
+ fts_proximity_t* qualified_pos); /*!< out: the position info
+ records ranges containing
+ all matching words. */
+#if 0
+/********************************************************************
+Get the total number of words in a documents. */
+static
+ulint
+fts_query_terms_in_document(
+/*========================*/
+ /*!< out: DB_SUCCESS if all go well
+ else error code */
+ fts_query_t* query, /*!< in: FTS query state */
+ doc_id_t doc_id, /*!< in: the word to check */
+ ulint* total); /*!< out: total words in document */
+#endif
+
+/********************************************************************
+Compare two fts_doc_freq_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_freq_doc_id_cmp(
+/*================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const fts_doc_freq_t* fq1 = (const fts_doc_freq_t*) p1;
+ const fts_doc_freq_t* fq2 = (const fts_doc_freq_t*) p2;
+
+ return((int) (fq1->doc_id - fq2->doc_id));
+}
+
+#if 0
+/*******************************************************************//**
+Print the table used for calculating LCS. */
+static
+void
+fts_print_lcs_table(
+/*================*/
+ const ulint* table, /*!< in: array to print */
+ ulint n_rows, /*!< in: total no. of rows */
+ ulint n_cols) /*!< in: total no. of cols */
+{
+ ulint i;
+
+ for (i = 0; i < n_rows; ++i) {
+ ulint j;
+
+ printf("\n");
+
+ for (j = 0; j < n_cols; ++j) {
+
+ printf("%2lu ", FTS_ELEM(table, n_cols, i, j));
+ }
+ }
+}
+
+/********************************************************************
+Find the longest common subsequence between the query string and
+the document. */
+static
+ulint
+fts_query_lcs(
+/*==========*/
+ /*!< out: LCS (length) between
+ two ilists */
+ const ulint* p1, /*!< in: word positions of query */
+ ulint len_p1, /*!< in: no. of elements in p1 */
+ const ulint* p2, /*!< in: word positions within document */
+ ulint len_p2) /*!< in: no. of elements in p2 */
+{
+ int i;
+ ulint len = 0;
+ ulint r = len_p1;
+ ulint c = len_p2;
+ ulint size = (r + 1) * (c + 1) * sizeof(ulint);
+ ulint* table = (ulint*) ut_malloc_nokey(size);
+
+ /* Traverse the table backwards, from the last row to the first and
+ also from the last column to the first. We compute the smaller
+ common subsequeces first, then use the caluclated values to determine
+ the longest common subsequence. The result will be in TABLE[0][0]. */
+ for (i = r; i >= 0; --i) {
+ int j;
+
+ for (j = c; j >= 0; --j) {
+
+ if (p1[i] == (ulint) -1 || p2[j] == (ulint) -1) {
+
+ FTS_ELEM(table, c, i, j) = 0;
+
+ } else if (p1[i] == p2[j]) {
+
+ FTS_ELEM(table, c, i, j) = FTS_ELEM(
+ table, c, i + 1, j + 1) + 1;
+
+ } else {
+
+ ulint value;
+
+ value = ut_max(
+ FTS_ELEM(table, c, i + 1, j),
+ FTS_ELEM(table, c, i, j + 1));
+
+ FTS_ELEM(table, c, i, j) = value;
+ }
+ }
+ }
+
+ len = FTS_ELEM(table, c, 0, 0);
+
+ fts_print_lcs_table(table, r, c);
+ printf("\nLen=" ULINTPF "\n", len);
+
+ ut_free(table);
+
+ return(len);
+}
+#endif
+
+/*******************************************************************//**
+Compare two fts_ranking_t instance on their rank value and doc ids in
+descending order on the rank and ascending order on doc id.
+@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */
+static
+int
+fts_query_compare_rank(
+/*===================*/
+ const void* p1, /*!< in: pointer to elem */
+ const void* p2) /*!< in: pointer to elem */
+{
+ const fts_ranking_t* r1 = (const fts_ranking_t*) p1;
+ const fts_ranking_t* r2 = (const fts_ranking_t*) p2;
+
+ if (r2->rank < r1->rank) {
+ return(-1);
+ } else if (r2->rank == r1->rank) {
+
+ if (r1->doc_id < r2->doc_id) {
+ return(1);
+ } else if (r1->doc_id > r2->doc_id) {
+ return(1);
+ }
+
+ return(0);
+ }
+
+ return(1);
+}
+
+/*******************************************************************//**
+Create words in ranking */
+static
+void
+fts_ranking_words_create(
+/*=====================*/
+ fts_query_t* query, /*!< in: query instance */
+ fts_ranking_t* ranking) /*!< in: ranking instance */
+{
+ ranking->words = static_cast<byte*>(
+ mem_heap_zalloc(query->heap, RANKING_WORDS_INIT_LEN));
+ ranking->words_len = RANKING_WORDS_INIT_LEN;
+}
+
+/*
+The optimization here is using a char array(bitmap) to replace words rb tree
+in fts_ranking_t.
+
+It can save lots of memory except in some cases of QUERY EXPANSION.
+
+'word_map' is used as a word dictionary, in which the key is a word, the value
+is a number. In 'fts_ranking_words_add', we first check if the word is in 'word_map'.
+if not, we add it into 'word_map', and give it a position(actually a number).
+then we set the corresponding bit to '1' at the position in the char array 'words'.
+
+'word_vector' is a useful backup of 'word_map', and we can get a word by its position,
+more quickly than searching by value in 'word_map'. we use 'word_vector'
+in 'fts_query_calculate_ranking' and 'fts_expand_query'. In the two functions, we need
+to scan the bitmap 'words', and get a word when a bit is '1', then we get word_freq
+by the word.
+*/
+
+/*******************************************************************//**
+Add a word into ranking */
+static
+void
+fts_ranking_words_add(
+/*==================*/
+ fts_query_t* query, /*!< in: query instance */
+ fts_ranking_t* ranking, /*!< in: ranking instance */
+ const fts_string_t* word) /*!< in: term/word to add */
+{
+ ulint pos;
+ ulint byte_offset;
+ ulint bit_offset;
+ ib_rbt_bound_t parent;
+
+ /* Note: we suppose the word map and vector are append-only. */
+ ut_ad(query->word_vector->size() == rbt_size(query->word_map));
+
+ /* We use ib_rbt to simulate a map, f_n_char means position. */
+ if (rbt_search(query->word_map, &parent, word) == 0) {
+ fts_string_t* result_word;
+
+ result_word = rbt_value(fts_string_t, parent.last);
+ pos = result_word->f_n_char;
+ ut_ad(pos < rbt_size(query->word_map));
+ } else {
+ /* Add the word to map. */
+ fts_string_t new_word;
+
+ pos = rbt_size(query->word_map);
+
+ fts_string_dup(&new_word, word, query->heap);
+ new_word.f_n_char = pos;
+
+ rbt_add_node(query->word_map, &parent, &new_word);
+ ut_ad(rbt_validate(query->word_map));
+ query->word_vector->push_back(new_word);
+ }
+
+ /* Check words len */
+ byte_offset = pos / CHAR_BIT;
+ if (byte_offset >= ranking->words_len) {
+ byte* words = ranking->words;
+ ulint words_len = ranking->words_len;
+
+ while (byte_offset >= words_len) {
+ words_len *= 2;
+ }
+
+ ranking->words = static_cast<byte*>(
+ mem_heap_zalloc(query->heap, words_len));
+ memcpy(ranking->words, words, ranking->words_len);
+ ranking->words_len = words_len;
+ }
+
+ /* Set ranking words */
+ ut_ad(byte_offset < ranking->words_len);
+ bit_offset = pos % CHAR_BIT;
+ ranking->words[byte_offset] = static_cast<byte>(
+ ranking->words[byte_offset] | 1 << bit_offset);
+}
+
+/*******************************************************************//**
+Get a word from a ranking
+@return true if it's successful */
+static
+bool
+fts_ranking_words_get_next(
+/*=======================*/
+ const fts_query_t* query, /*!< in: query instance */
+ fts_ranking_t* ranking,/*!< in: ranking instance */
+ ulint* pos, /*!< in/out: word start pos */
+ fts_string_t* word) /*!< in/out: term/word to add */
+{
+ bool ret = false;
+ ulint max_pos = ranking->words_len * CHAR_BIT;
+
+ /* Search for next word */
+ while (*pos < max_pos) {
+ ulint byte_offset = *pos / CHAR_BIT;
+ ulint bit_offset = *pos % CHAR_BIT;
+
+ if (ranking->words[byte_offset] & (1 << bit_offset)) {
+ ret = true;
+ break;
+ }
+
+ *pos += 1;
+ };
+
+ /* Get next word from word vector */
+ if (ret) {
+ ut_ad(*pos < query->word_vector->size());
+ *word = query->word_vector->at((size_t)*pos);
+ *pos += 1;
+ }
+
+ return ret;
+}
+
+/*******************************************************************//**
+Add a word if it doesn't exist, to the term freq RB tree. We store
+a pointer to the word that is passed in as the argument.
+@return pointer to word */
+static
+fts_word_freq_t*
+fts_query_add_word_freq(
+/*====================*/
+ fts_query_t* query, /*!< in: query instance */
+ const fts_string_t* word) /*!< in: term/word to add */
+{
+ ib_rbt_bound_t parent;
+
+ /* Lookup the word in our rb tree and add if it doesn't exist. */
+ if (rbt_search(query->word_freqs, &parent, word) != 0) {
+ fts_word_freq_t word_freq;
+
+ memset(&word_freq, 0, sizeof(word_freq));
+
+ fts_string_dup(&word_freq.word, word, query->heap);
+
+ word_freq.doc_count = 0;
+
+ word_freq.doc_freqs = rbt_create(
+ sizeof(fts_doc_freq_t), fts_freq_doc_id_cmp);
+
+ parent.last = rbt_add_node(
+ query->word_freqs, &parent, &word_freq);
+
+ query->total_size += word->f_len
+ + SIZEOF_RBT_CREATE
+ + SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_word_freq_t);
+ }
+
+ return(rbt_value(fts_word_freq_t, parent.last));
+}
+
+/*******************************************************************//**
+Add a doc id if it doesn't exist, to the doc freq RB tree.
+@return pointer to word */
+static
+fts_doc_freq_t*
+fts_query_add_doc_freq(
+/*===================*/
+ fts_query_t* query, /*!< in: query instance */
+ ib_rbt_t* doc_freqs, /*!< in: rb tree of fts_doc_freq_t */
+ doc_id_t doc_id) /*!< in: doc id to add */
+{
+ ib_rbt_bound_t parent;
+
+ /* Lookup the doc id in our rb tree and add if it doesn't exist. */
+ if (rbt_search(doc_freqs, &parent, &doc_id) != 0) {
+ fts_doc_freq_t doc_freq;
+
+ memset(&doc_freq, 0, sizeof(doc_freq));
+
+ doc_freq.freq = 0;
+ doc_freq.doc_id = doc_id;
+
+ parent.last = rbt_add_node(doc_freqs, &parent, &doc_freq);
+
+ query->total_size += SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_doc_freq_t);
+ }
+
+ return(rbt_value(fts_doc_freq_t, parent.last));
+}
+
+/*******************************************************************//**
+Add the doc id to the query set only if it's not in the
+deleted array. */
+static
+void
+fts_query_union_doc_id(
+/*===================*/
+ fts_query_t* query, /*!< in: query instance */
+ doc_id_t doc_id, /*!< in: the doc id to add */
+ fts_rank_t rank) /*!< in: if non-zero, it is the
+ rank associated with the doc_id */
+{
+ ib_rbt_bound_t parent;
+ ulint size = ib_vector_size(query->deleted->doc_ids);
+ doc_id_t* updates = (doc_id_t*) query->deleted->doc_ids->data;
+
+ /* Check if the doc id is deleted and it's not already in our set. */
+ if (fts_bsearch(updates, 0, static_cast<int>(size), doc_id) < 0
+ && rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
+
+ fts_ranking_t ranking;
+
+ ranking.rank = rank;
+ ranking.doc_id = doc_id;
+ fts_ranking_words_create(query, &ranking);
+
+ rbt_add_node(query->doc_ids, &parent, &ranking);
+
+ query->total_size += SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t) + RANKING_WORDS_INIT_LEN;
+ }
+}
+
+/*******************************************************************//**
+Remove the doc id from the query set only if it's not in the
+deleted set. */
+static
+void
+fts_query_remove_doc_id(
+/*====================*/
+ fts_query_t* query, /*!< in: query instance */
+ doc_id_t doc_id) /*!< in: the doc id to add */
+{
+ ib_rbt_bound_t parent;
+ ulint size = ib_vector_size(query->deleted->doc_ids);
+ doc_id_t* updates = (doc_id_t*) query->deleted->doc_ids->data;
+
+ /* Check if the doc id is deleted and it's in our set. */
+ if (fts_bsearch(updates, 0, static_cast<int>(size), doc_id) < 0
+ && rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
+ ut_free(rbt_remove_node(query->doc_ids, parent.last));
+
+ ut_ad(query->total_size >=
+ SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t));
+ query->total_size -= SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t);
+ }
+}
+
+/*******************************************************************//**
+Find the doc id in the query set but not in the deleted set, artificialy
+downgrade or upgrade its ranking by a value and make/initialize its ranking
+under or above its normal range 0 to 1. This is used for Boolean Search
+operator such as Negation operator, which makes word's contribution to the
+row's relevance to be negative */
+static
+void
+fts_query_change_ranking(
+/*====================*/
+ fts_query_t* query, /*!< in: query instance */
+ doc_id_t doc_id, /*!< in: the doc id to add */
+ ibool downgrade) /*!< in: Whether to downgrade ranking */
+{
+ ib_rbt_bound_t parent;
+ ulint size = ib_vector_size(query->deleted->doc_ids);
+ doc_id_t* updates = (doc_id_t*) query->deleted->doc_ids->data;
+
+ /* Check if the doc id is deleted and it's in our set. */
+ if (fts_bsearch(updates, 0, static_cast<int>(size), doc_id) < 0
+ && rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
+
+ fts_ranking_t* ranking;
+
+ ranking = rbt_value(fts_ranking_t, parent.last);
+
+ ranking->rank += downgrade ? RANK_DOWNGRADE : RANK_UPGRADE;
+
+ /* Allow at most 2 adjustment by RANK_DOWNGRADE (-0.5)
+ and RANK_UPGRADE (0.5) */
+ if (ranking->rank >= 1.0F) {
+ ranking->rank = 1.0F;
+ } else if (ranking->rank <= -1.0F) {
+ ranking->rank = -1.0F;
+ }
+ }
+}
+
+/*******************************************************************//**
+Check the doc id in the query set only if it's not in the
+deleted array. The doc ids that were found are stored in
+another rb tree (fts_query_t::intersect). */
+static
+void
+fts_query_intersect_doc_id(
+/*=======================*/
+ fts_query_t* query, /*!< in: query instance */
+ doc_id_t doc_id, /*!< in: the doc id to add */
+ fts_rank_t rank) /*!< in: if non-zero, it is the
+ rank associated with the doc_id */
+{
+ ib_rbt_bound_t parent;
+ ulint size = ib_vector_size(query->deleted->doc_ids);
+ doc_id_t* updates = (doc_id_t*) query->deleted->doc_ids->data;
+ fts_ranking_t* ranking= NULL;
+
+ /* There are three types of intersect:
+ 1. '+a': doc_ids is empty, add doc into intersect if it matches 'a'.
+ 2. 'a +b': docs match 'a' is in doc_ids, add doc into intersect
+ if it matches 'b'. if the doc is also in doc_ids, then change the
+ doc's rank, and add 'a' in doc's words.
+ 3. '+a +b': docs matching '+a' is in doc_ids, add doc into intsersect
+ if it matches 'b' and it's in doc_ids.(multi_exist = true). */
+
+ /* Check if the doc id is deleted and it's in our set */
+ if (fts_bsearch(updates, 0, static_cast<int>(size), doc_id) < 0) {
+ fts_ranking_t new_ranking;
+
+ if (rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
+ if (query->multi_exist) {
+ return;
+ } else {
+ new_ranking.words = NULL;
+ }
+ } else {
+ ranking = rbt_value(fts_ranking_t, parent.last);
+
+ /* We've just checked the doc id before */
+ if (ranking->words == NULL) {
+ ut_ad(rbt_search(query->intersection, &parent,
+ ranking) == 0);
+ return;
+ }
+
+ /* Merge rank */
+ rank += ranking->rank;
+ if (rank >= 1.0F) {
+ rank = 1.0F;
+ } else if (rank <= -1.0F) {
+ rank = -1.0F;
+ }
+
+ /* Take words */
+ new_ranking.words = ranking->words;
+ new_ranking.words_len = ranking->words_len;
+ }
+
+ new_ranking.rank = rank;
+ new_ranking.doc_id = doc_id;
+
+ if (rbt_search(query->intersection, &parent,
+ &new_ranking) != 0) {
+ if (new_ranking.words == NULL) {
+ fts_ranking_words_create(query, &new_ranking);
+
+ query->total_size += RANKING_WORDS_INIT_LEN;
+ } else {
+ /* Note that the intersection has taken
+ ownership of the ranking data. */
+ ranking->words = NULL;
+ }
+
+ rbt_add_node(query->intersection,
+ &parent, &new_ranking);
+
+ query->total_size += SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t);
+ }
+ }
+}
+
+/*******************************************************************//**
+Free the document ranking rb tree. */
+static
+void
+fts_query_free_doc_ids(
+/*===================*/
+ fts_query_t* query, /*!< in: query instance */
+ ib_rbt_t* doc_ids) /*!< in: rb tree to free */
+{
+ const ib_rbt_node_t* node;
+
+ for (node = rbt_first(doc_ids); node; node = rbt_first(doc_ids)) {
+
+ fts_ranking_t* ranking;
+
+ ranking = rbt_value(fts_ranking_t, node);
+
+ if (ranking->words) {
+ ranking->words = NULL;
+ }
+
+ ut_free(rbt_remove_node(doc_ids, node));
+
+ ut_ad(query->total_size >=
+ SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t));
+ query->total_size -= SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t);
+ }
+
+ rbt_free(doc_ids);
+
+ ut_ad(query->total_size >= SIZEOF_RBT_CREATE);
+ query->total_size -= SIZEOF_RBT_CREATE;
+}
+
+/*******************************************************************//**
+Add the word to the documents "list" of matching words from
+the query. We make a copy of the word from the query heap. */
+static
+void
+fts_query_add_word_to_document(
+/*===========================*/
+ fts_query_t* query, /*!< in: query to update */
+ doc_id_t doc_id, /*!< in: the document to update */
+ const fts_string_t* word) /*!< in: the token to add */
+{
+ ib_rbt_bound_t parent;
+ fts_ranking_t* ranking = NULL;
+
+ if (query->flags == FTS_OPT_RANKING) {
+ return;
+ }
+
+ /* First we search the intersection RB tree as it could have
+ taken ownership of the words rb tree instance. */
+ if (query->intersection
+ && rbt_search(query->intersection, &parent, &doc_id) == 0) {
+
+ ranking = rbt_value(fts_ranking_t, parent.last);
+ }
+
+ if (ranking == NULL
+ && rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
+
+ ranking = rbt_value(fts_ranking_t, parent.last);
+ }
+
+ if (ranking != NULL) {
+ fts_ranking_words_add(query, ranking, word);
+ }
+}
+
+/*******************************************************************//**
+Check the node ilist. */
+static
+void
+fts_query_check_node(
+/*=================*/
+ fts_query_t* query, /*!< in: query to update */
+ const fts_string_t* token, /*!< in: the token to search */
+ const fts_node_t* node) /*!< in: node to check */
+{
+ /* Skip nodes whose doc ids are out range. */
+ if (query->oper == FTS_EXIST
+ && ((query->upper_doc_id > 0
+ && node->first_doc_id > query->upper_doc_id)
+ || (query->lower_doc_id > 0
+ && node->last_doc_id < query->lower_doc_id))) {
+
+ /* Ignore */
+
+ } else {
+ int ret;
+ ib_rbt_bound_t parent;
+ ulint ilist_size = node->ilist_size;
+ fts_word_freq_t*word_freqs;
+
+ /* The word must exist. */
+ ret = rbt_search(query->word_freqs, &parent, token);
+ ut_a(ret == 0);
+
+ word_freqs = rbt_value(fts_word_freq_t, parent.last);
+
+ query->error = fts_query_filter_doc_ids(
+ query, token, word_freqs, node,
+ node->ilist, ilist_size, TRUE);
+ }
+}
+
+/*****************************************************************//**
+Search index cache for word with wildcard match.
+@return number of words matched */
+static
+ulint
+fts_cache_find_wildcard(
+/*====================*/
+ fts_query_t* query, /*!< in: query instance */
+ const fts_index_cache_t*index_cache, /*!< in: cache to search */
+ const fts_string_t* token) /*!< in: token to search */
+{
+ ib_rbt_bound_t parent;
+ const ib_vector_t* nodes = NULL;
+ fts_string_t srch_text;
+ byte term[FTS_MAX_WORD_LEN + 1];
+ ulint num_word = 0;
+
+ srch_text.f_len = (token->f_str[token->f_len - 1] == '%')
+ ? token->f_len - 1
+ : token->f_len;
+
+ strncpy((char*) term, (char*) token->f_str, srch_text.f_len);
+ term[srch_text.f_len] = '\0';
+ srch_text.f_str = term;
+
+ /* Lookup the word in the rb tree */
+ if (rbt_search_cmp(index_cache->words, &parent, &srch_text, NULL,
+ innobase_fts_text_cmp_prefix) == 0) {
+ const fts_tokenizer_word_t* word;
+ ulint i;
+ const ib_rbt_node_t* cur_node;
+ ibool forward = FALSE;
+
+ word = rbt_value(fts_tokenizer_word_t, parent.last);
+ cur_node = parent.last;
+
+ while (innobase_fts_text_cmp_prefix(
+ index_cache->charset, &srch_text, &word->text) == 0) {
+
+ nodes = word->nodes;
+
+ for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
+ int ret;
+ const fts_node_t* node;
+ ib_rbt_bound_t freq_parent;
+ fts_word_freq_t* word_freqs;
+
+ node = static_cast<const fts_node_t*>(
+ ib_vector_get_const(nodes, i));
+
+ ret = rbt_search(query->word_freqs,
+ &freq_parent,
+ &srch_text);
+
+ ut_a(ret == 0);
+
+ word_freqs = rbt_value(
+ fts_word_freq_t,
+ freq_parent.last);
+
+ query->error = fts_query_filter_doc_ids(
+ query, &srch_text,
+ word_freqs, node,
+ node->ilist, node->ilist_size, TRUE);
+
+ if (query->error != DB_SUCCESS) {
+ return(0);
+ }
+ }
+
+ num_word++;
+
+ if (!forward) {
+ cur_node = rbt_prev(
+ index_cache->words, cur_node);
+ } else {
+cont_search:
+ cur_node = rbt_next(
+ index_cache->words, cur_node);
+ }
+
+ if (!cur_node) {
+ break;
+ }
+
+ word = rbt_value(fts_tokenizer_word_t, cur_node);
+ }
+
+ if (!forward) {
+ forward = TRUE;
+ cur_node = parent.last;
+ goto cont_search;
+ }
+ }
+
+ return(num_word);
+}
+
+/*****************************************************************//**
+Set difference.
+@return DB_SUCCESS if all go well */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_query_difference(
+/*=================*/
+ fts_query_t* query, /*!< in: query instance */
+ const fts_string_t* token) /*!< in: token to search */
+{
+ ulint n_doc_ids= 0;
+ trx_t* trx = query->trx;
+ dict_table_t* table = query->index->table;
+
+ ut_a(query->oper == FTS_IGNORE);
+
+#ifdef FTS_INTERNAL_DIAG_PRINT
+ {
+ ib::info out;
+ out << "DIFFERENCE: Searching: '";
+ out.write(token->f_str, token->f_len);
+ out << "'";
+ }
+#endif
+
+ if (query->doc_ids) {
+ n_doc_ids = rbt_size(query->doc_ids);
+ }
+
+ /* There is nothing we can substract from an empty set. */
+ if (query->doc_ids && !rbt_empty(query->doc_ids)) {
+ ulint i;
+ fts_fetch_t fetch;
+ const ib_vector_t* nodes;
+ const fts_index_cache_t*index_cache;
+ que_t* graph = NULL;
+ fts_cache_t* cache = table->fts->cache;
+ dberr_t error;
+
+ rw_lock_x_lock(&cache->lock);
+
+ index_cache = fts_find_index_cache(cache, query->index);
+
+ /* Must find the index cache */
+ ut_a(index_cache != NULL);
+
+ /* Search the cache for a matching word first. */
+ if (query->cur_node->term.wildcard
+ && query->flags != FTS_PROXIMITY
+ && query->flags != FTS_PHRASE) {
+ fts_cache_find_wildcard(query, index_cache, token);
+ } else {
+ nodes = fts_cache_find_word(index_cache, token);
+
+ for (i = 0; nodes && i < ib_vector_size(nodes)
+ && query->error == DB_SUCCESS; ++i) {
+ const fts_node_t* node;
+
+ node = static_cast<const fts_node_t*>(
+ ib_vector_get_const(nodes, i));
+
+ fts_query_check_node(query, token, node);
+ }
+ }
+
+ rw_lock_x_unlock(&cache->lock);
+
+ /* error is passed by 'query->error' */
+ if (query->error != DB_SUCCESS) {
+ ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ return(query->error);
+ }
+
+ /* Setup the callback args for filtering and
+ consolidating the ilist. */
+ fetch.read_arg = query;
+ fetch.read_record = fts_query_index_fetch_nodes;
+
+ error = fts_index_fetch_nodes(
+ trx, &graph, &query->fts_index_table, token, &fetch);
+
+ /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
+ ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
+ if (error != DB_SUCCESS) {
+ query->error = error;
+ }
+
+ fts_que_graph_free(graph);
+ }
+
+ /* The size can't increase. */
+ ut_a(rbt_size(query->doc_ids) <= n_doc_ids);
+
+ return(query->error);
+}
+
+/*****************************************************************//**
+Intersect the token doc ids with the current set.
+@return DB_SUCCESS if all go well */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_query_intersect(
+/*================*/
+ fts_query_t* query, /*!< in: query instance */
+ const fts_string_t* token) /*!< in: the token to search */
+{
+ trx_t* trx = query->trx;
+ dict_table_t* table = query->index->table;
+
+ ut_a(query->oper == FTS_EXIST);
+
+#ifdef FTS_INTERNAL_DIAG_PRINT
+ {
+ ib::info out;
+ out << "INTERSECT: Searching: '";
+ out.write(token->f_str, token->f_len);
+ out << "'";
+ }
+#endif
+
+ /* If the words set is not empty and multi exist is true,
+ we know the intersection set is empty in advance. */
+ if (!(rbt_empty(query->doc_ids) && query->multi_exist)) {
+ ulint n_doc_ids = 0;
+ ulint i;
+ fts_fetch_t fetch;
+ const ib_vector_t* nodes;
+ const fts_index_cache_t*index_cache;
+ que_t* graph = NULL;
+ fts_cache_t* cache = table->fts->cache;
+ dberr_t error;
+
+ ut_a(!query->intersection);
+
+ n_doc_ids = rbt_size(query->doc_ids);
+
+ /* Create the rb tree that will hold the doc ids of
+ the intersection. */
+ query->intersection = rbt_create(
+ sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+
+ query->total_size += SIZEOF_RBT_CREATE;
+
+ /* This is to avoid decompressing the ilist if the
+ node's ilist doc ids are out of range. */
+ if (!rbt_empty(query->doc_ids) && query->multi_exist) {
+ const ib_rbt_node_t* node;
+ doc_id_t* doc_id;
+
+ node = rbt_first(query->doc_ids);
+ doc_id = rbt_value(doc_id_t, node);
+ query->lower_doc_id = *doc_id;
+
+ node = rbt_last(query->doc_ids);
+ doc_id = rbt_value(doc_id_t, node);
+ query->upper_doc_id = *doc_id;
+
+ } else {
+ query->lower_doc_id = 0;
+ query->upper_doc_id = 0;
+ }
+
+ /* Search the cache for a matching word first. */
+
+ rw_lock_x_lock(&cache->lock);
+
+ /* Search for the index specific cache. */
+ index_cache = fts_find_index_cache(cache, query->index);
+
+ /* Must find the index cache. */
+ ut_a(index_cache != NULL);
+
+ if (query->cur_node->term.wildcard) {
+ /* Wildcard search the index cache */
+ fts_cache_find_wildcard(query, index_cache, token);
+ } else {
+ nodes = fts_cache_find_word(index_cache, token);
+
+ for (i = 0; nodes && i < ib_vector_size(nodes)
+ && query->error == DB_SUCCESS; ++i) {
+ const fts_node_t* node;
+
+ node = static_cast<const fts_node_t*>(
+ ib_vector_get_const(nodes, i));
+
+ fts_query_check_node(query, token, node);
+ }
+ }
+
+ rw_lock_x_unlock(&cache->lock);
+
+ /* error is passed by 'query->error' */
+ if (query->error != DB_SUCCESS) {
+ ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ return(query->error);
+ }
+
+ /* Setup the callback args for filtering and
+ consolidating the ilist. */
+ fetch.read_arg = query;
+ fetch.read_record = fts_query_index_fetch_nodes;
+
+ error = fts_index_fetch_nodes(
+ trx, &graph, &query->fts_index_table, token, &fetch);
+
+ /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
+ ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
+ if (error != DB_SUCCESS) {
+ query->error = error;
+ }
+
+ fts_que_graph_free(graph);
+
+ if (query->error == DB_SUCCESS) {
+ /* Make the intesection (rb tree) the current doc id
+ set and free the old set. */
+ fts_query_free_doc_ids(query, query->doc_ids);
+ query->doc_ids = query->intersection;
+ query->intersection = NULL;
+
+ ut_a(!query->multi_exist || (query->multi_exist
+ && rbt_size(query->doc_ids) <= n_doc_ids));
+ }
+ }
+
+ return(query->error);
+}
+
+/*****************************************************************//**
+Query index cache.
+@return DB_SUCCESS if all go well */
+static
+dberr_t
+fts_query_cache(
+/*============*/
+ fts_query_t* query, /*!< in/out: query instance */
+ const fts_string_t* token) /*!< in: token to search */
+{
+ const fts_index_cache_t*index_cache;
+ dict_table_t* table = query->index->table;
+ fts_cache_t* cache = table->fts->cache;
+
+ /* Search the cache for a matching word first. */
+ rw_lock_x_lock(&cache->lock);
+
+ /* Search for the index specific cache. */
+ index_cache = fts_find_index_cache(cache, query->index);
+
+ /* Must find the index cache. */
+ ut_a(index_cache != NULL);
+
+ if (query->cur_node->term.wildcard
+ && query->flags != FTS_PROXIMITY
+ && query->flags != FTS_PHRASE) {
+ /* Wildcard search the index cache */
+ fts_cache_find_wildcard(query, index_cache, token);
+ } else {
+ const ib_vector_t* nodes;
+ ulint i;
+
+ nodes = fts_cache_find_word(index_cache, token);
+
+ for (i = 0; nodes && i < ib_vector_size(nodes)
+ && query->error == DB_SUCCESS; ++i) {
+ const fts_node_t* node;
+
+ node = static_cast<const fts_node_t*>(
+ ib_vector_get_const(nodes, i));
+
+ fts_query_check_node(query, token, node);
+ }
+ }
+
+ rw_lock_x_unlock(&cache->lock);
+
+ return(query->error);
+}
+
+/*****************************************************************//**
+Set union.
+@return DB_SUCCESS if all go well */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_query_union(
+/*============*/
+ fts_query_t* query, /*!< in: query instance */
+ fts_string_t* token) /*!< in: token to search */
+{
+ fts_fetch_t fetch;
+ ulint n_doc_ids = 0;
+ trx_t* trx = query->trx;
+ que_t* graph = NULL;
+ dberr_t error;
+
+ ut_a(query->oper == FTS_NONE || query->oper == FTS_DECR_RATING ||
+ query->oper == FTS_NEGATE || query->oper == FTS_INCR_RATING);
+
+#ifdef FTS_INTERNAL_DIAG_PRINT
+ {
+ ib::info out;
+ out << "UNION: Searching: '";
+ out.write(token->f_str, token->f_len);
+ out << "'";
+ }
+#endif
+
+ if (query->doc_ids) {
+ n_doc_ids = rbt_size(query->doc_ids);
+ }
+
+ if (token->f_len == 0) {
+ return(query->error);
+ }
+
+ fts_query_cache(query, token);
+
+ /* Setup the callback args for filtering and
+ consolidating the ilist. */
+ fetch.read_arg = query;
+ fetch.read_record = fts_query_index_fetch_nodes;
+
+ /* Read the nodes from disk. */
+ error = fts_index_fetch_nodes(
+ trx, &graph, &query->fts_index_table, token, &fetch);
+
+ /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
+ ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
+ if (error != DB_SUCCESS) {
+ query->error = error;
+ }
+
+ fts_que_graph_free(graph);
+
+ if (query->error == DB_SUCCESS) {
+
+ /* The size can't decrease. */
+ ut_a(rbt_size(query->doc_ids) >= n_doc_ids);
+
+ /* Calulate the number of doc ids that were added to
+ the current doc id set. */
+ if (query->doc_ids) {
+ n_doc_ids = rbt_size(query->doc_ids) - n_doc_ids;
+ }
+ }
+
+ return(query->error);
+}
+
+/*****************************************************************//**
+Depending upon the current query operator process the doc id.
+return DB_SUCCESS if all go well
+or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */
+static
+dberr_t
+fts_query_process_doc_id(
+/*=====================*/
+ fts_query_t* query, /*!< in: query instance */
+ doc_id_t doc_id, /*!< in: doc id to process */
+ fts_rank_t rank) /*!< in: if non-zero, it is the
+ rank associated with the doc_id */
+{
+ if (query->flags == FTS_OPT_RANKING) {
+ return(DB_SUCCESS);
+ }
+
+ switch (query->oper) {
+ case FTS_NONE:
+ fts_query_union_doc_id(query, doc_id, rank);
+ break;
+
+ case FTS_EXIST:
+ fts_query_intersect_doc_id(query, doc_id, rank);
+ break;
+
+ case FTS_IGNORE:
+ fts_query_remove_doc_id(query, doc_id);
+ break;
+
+ case FTS_NEGATE:
+ fts_query_change_ranking(query, doc_id, TRUE);
+ break;
+
+ case FTS_DECR_RATING:
+ fts_query_union_doc_id(query, doc_id, rank);
+ fts_query_change_ranking(query, doc_id, TRUE);
+ break;
+
+ case FTS_INCR_RATING:
+ fts_query_union_doc_id(query, doc_id, rank);
+ fts_query_change_ranking(query, doc_id, FALSE);
+ break;
+
+ default:
+ ut_error;
+ }
+
+ if (query->total_size > fts_result_cache_limit) {
+ return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ } else {
+ return(DB_SUCCESS);
+ }
+}
+
+/*****************************************************************//**
+Merge two result sets. */
+static
+dberr_t
+fts_merge_doc_ids(
+/*==============*/
+ fts_query_t* query, /*!< in,out: query instance */
+ const ib_rbt_t* doc_ids) /*!< in: result set to merge */
+{
+ const ib_rbt_node_t* node;
+
+ DBUG_ENTER("fts_merge_doc_ids");
+
+ ut_a(!query->intersection);
+
+ /* To process FTS_EXIST operation (intersection), we need
+ to create a new result set for fts_query_intersect(). */
+ if (query->oper == FTS_EXIST) {
+
+ query->intersection = rbt_create(
+ sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+
+ query->total_size += SIZEOF_RBT_CREATE;
+ }
+
+ /* Merge the elements to the result set. */
+ for (node = rbt_first(doc_ids); node; node = rbt_next(doc_ids, node)) {
+ fts_ranking_t* ranking;
+ ulint pos = 0;
+ fts_string_t word;
+
+ ranking = rbt_value(fts_ranking_t, node);
+
+ query->error = fts_query_process_doc_id(
+ query, ranking->doc_id, ranking->rank);
+
+ if (query->error != DB_SUCCESS) {
+ DBUG_RETURN(query->error);
+ }
+
+ /* Merge words. Don't need to take operator into account. */
+ ut_a(ranking->words);
+ while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
+ fts_query_add_word_to_document(query, ranking->doc_id,
+ &word);
+ }
+ }
+
+ /* If it is an intersection operation, reset query->doc_ids
+ to query->intersection and free the old result list. */
+ if (query->oper == FTS_EXIST && query->intersection != NULL) {
+ fts_query_free_doc_ids(query, query->doc_ids);
+ query->doc_ids = query->intersection;
+ query->intersection = NULL;
+ }
+
+ DBUG_RETURN(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Skip non-whitespace in a string. Move ptr to the next word boundary.
+@return pointer to first whitespace character or end */
+UNIV_INLINE
+byte*
+fts_query_skip_word(
+/*================*/
+ byte* ptr, /*!< in: start of scan */
+ const byte* end) /*!< in: pointer to end of string */
+{
+ /* TODO: Does this have to be UTF-8 too ? */
+ while (ptr < end && !(ispunct(*ptr) || isspace(*ptr))) {
+ ++ptr;
+ }
+
+ return(ptr);
+}
+
+/*****************************************************************//**
+Check whether the remaining terms in the phrase match the text.
+@return TRUE if matched else FALSE */
+static
+ibool
+fts_query_match_phrase_terms(
+/*=========================*/
+ fts_phrase_t* phrase, /*!< in: phrase to match */
+ byte** start, /*!< in/out: text to search, we can't
+ make this const becase we need to
+ first convert the string to
+ lowercase */
+ const byte* end, /*!< in: pointer to the end of
+ the string to search */
+ mem_heap_t* heap) /*!< in: heap */
+{
+ ulint i;
+ byte* ptr = *start;
+ const ib_vector_t* tokens = phrase->tokens;
+ ulint distance = phrase->distance;
+
+ /* We check only from the second term onwards, since the first
+ must have matched otherwise we wouldn't be here. */
+ for (i = 1; ptr < end && i < ib_vector_size(tokens); /* No op */) {
+ fts_string_t match;
+ fts_string_t cmp_str;
+ const fts_string_t* token;
+ int result;
+ ulint ret;
+
+ ret = innobase_mysql_fts_get_token(
+ phrase->charset, ptr,
+ const_cast<byte*>(end), &match);
+
+ if (match.f_len > 0) {
+ /* Get next token to match. */
+ token = static_cast<const fts_string_t*>(
+ ib_vector_get_const(tokens, i));
+
+ fts_string_dup(&cmp_str, &match, heap);
+
+ result = innobase_fts_text_case_cmp(
+ phrase->charset, token, &cmp_str);
+
+ /* Skip the rest of the tokens if this one doesn't
+ match and the proximity distance is exceeded. */
+ if (result
+ && (distance == ULINT_UNDEFINED
+ || distance == 0)) {
+
+ break;
+ }
+
+ /* This token matched move to the next token. */
+ if (result == 0) {
+ /* Advance the text to search by the length
+ of the last token. */
+ ptr += ret;
+
+ /* Advance to the next token. */
+ ++i;
+ } else {
+
+ ut_a(distance != ULINT_UNDEFINED);
+
+ ptr = fts_query_skip_word(ptr, end);
+ }
+
+ /* Distance can be 0 for exact matches. */
+ if (distance != ULINT_UNDEFINED && distance > 0) {
+ --distance;
+ }
+ } else {
+ ptr += ret;
+ }
+ }
+
+ *start = ptr;
+
+ /* Can't be greater than the number of elements. */
+ ut_a(i <= ib_vector_size(tokens));
+
+ /* This is the case for multiple words. */
+ if (i == ib_vector_size(tokens)) {
+ phrase->found = TRUE;
+ }
+
+ return(phrase->found);
+}
+
+/*****************************************************************//**
+Callback function to count the number of words in position ranges,
+and see whether the word count is in specified "phrase->distance"
+@return true if the number of characters is less than the "distance" */
+static
+bool
+fts_proximity_is_word_in_range(
+/*===========================*/
+ const fts_phrase_t*
+ phrase, /*!< in: phrase with the search info */
+ byte* start, /*!< in: text to search */
+ ulint total_len) /*!< in: length of text */
+{
+ fts_proximity_t* proximity_pos = phrase->proximity_pos;
+
+ ut_ad(proximity_pos->n_pos == proximity_pos->min_pos.size());
+ ut_ad(proximity_pos->n_pos == proximity_pos->max_pos.size());
+
+ /* Search each matched position pair (with min and max positions)
+ and count the number of words in the range */
+ for (ulint i = 0; i < proximity_pos->n_pos; i++) {
+ ulint cur_pos = proximity_pos->min_pos[i];
+ ulint n_word = 0;
+
+ ut_ad(proximity_pos->max_pos[i] <= total_len);
+
+ /* Walk through words in the range and count them */
+ while (cur_pos <= proximity_pos->max_pos[i]) {
+ ulint len;
+ fts_string_t str;
+
+ len = innobase_mysql_fts_get_token(
+ phrase->charset,
+ start + cur_pos,
+ start + total_len, &str);
+
+ if (len == 0) {
+ break;
+ }
+
+ /* Advances position with "len" bytes */
+ cur_pos += len;
+
+ /* Record the number of words */
+ if (str.f_n_char > 0) {
+ n_word++;
+ }
+
+ if (n_word > phrase->distance) {
+ break;
+ }
+ }
+
+ /* Check if the number of words is less than specified
+ "distance" */
+ if (n_word && n_word <= phrase->distance) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/*****************************************************************//**
+FTS plugin parser 'myql_add_word' callback function for phrase match
+Refer to 'st_mysql_ftparser_param' for more detail.
+@return 0 if match, or return non-zero */
+static
+int
+fts_query_match_phrase_add_word_for_parser(
+/*=======================================*/
+ MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */
+ const char* word, /*!< in: token */
+ int word_len, /*!< in: token length */
+ MYSQL_FTPARSER_BOOLEAN_INFO*)
+{
+ fts_phrase_param_t* phrase_param;
+ fts_phrase_t* phrase;
+ const ib_vector_t* tokens;
+ fts_string_t match;
+ fts_string_t cmp_str;
+ const fts_string_t* token;
+ int result;
+ mem_heap_t* heap;
+
+ phrase_param = static_cast<fts_phrase_param_t*>(param->mysql_ftparam);
+ heap = phrase_param->heap;
+ phrase = phrase_param->phrase;
+ tokens = phrase->tokens;
+
+ /* In case plugin parser doesn't check return value */
+ if (phrase_param->token_index == ib_vector_size(tokens)) {
+ return(1);
+ }
+
+ match.f_str = (uchar *)(word);
+ match.f_len = ulint(word_len);
+ match.f_n_char= fts_get_token_size(phrase->charset, word, match.f_len);
+
+ if (match.f_len > 0) {
+ /* Get next token to match. */
+ ut_a(phrase_param->token_index < ib_vector_size(tokens));
+ token = static_cast<const fts_string_t*>(
+ ib_vector_get_const(tokens, phrase_param->token_index));
+
+ fts_string_dup(&cmp_str, &match, heap);
+
+ result = innobase_fts_text_case_cmp(
+ phrase->charset, token, &cmp_str);
+
+ if (result == 0) {
+ phrase_param->token_index++;
+ } else {
+ return(1);
+ }
+ }
+
+ /* Can't be greater than the number of elements. */
+ ut_a(phrase_param->token_index <= ib_vector_size(tokens));
+
+ /* This is the case for multiple words. */
+ if (phrase_param->token_index == ib_vector_size(tokens)) {
+ phrase->found = TRUE;
+ }
+
+ return(static_cast<int>(phrase->found));
+}
+
+/*****************************************************************//**
+Check whether the terms in the phrase match the text.
+@return TRUE if matched else FALSE */
+static
+ibool
+fts_query_match_phrase_terms_by_parser(
+/*===================================*/
+ fts_phrase_param_t* phrase_param, /* in/out: phrase param */
+ st_mysql_ftparser* parser, /* in: plugin fts parser */
+ byte* text, /* in: text to check */
+ ulint len) /* in: text length */
+{
+ MYSQL_FTPARSER_PARAM param;
+
+ ut_a(parser);
+
+ /* Set paramters for param */
+ param.mysql_parse = fts_tokenize_document_internal;
+ param.mysql_add_word = fts_query_match_phrase_add_word_for_parser;
+ param.mysql_ftparam = phrase_param;
+ param.cs = phrase_param->phrase->charset;
+ param.doc = reinterpret_cast<char*>(text);
+ param.length = static_cast<int>(len);
+ param.mode= MYSQL_FTPARSER_WITH_STOPWORDS;
+
+ PARSER_INIT(parser, &param);
+ parser->parse(&param);
+ PARSER_DEINIT(parser, &param);
+
+ return(phrase_param->phrase->found);
+}
+
+/*****************************************************************//**
+Callback function to fetch and search the document.
+@return TRUE if matched else FALSE */
+static
+ibool
+fts_query_match_phrase(
+/*===================*/
+ fts_phrase_t* phrase, /*!< in: phrase to match */
+ byte* start, /*!< in: text to search, we can't make
+ this const becase we need to first
+ convert the string to lowercase */
+ ulint cur_len, /*!< in: length of text */
+ ulint prev_len, /*!< in: total length for searched
+ doc fields*/
+ mem_heap_t* heap) /* heap */
+{
+ ulint i;
+ const fts_string_t* first;
+ const byte* end = start + cur_len;
+ const ib_vector_t* tokens = phrase->tokens;
+ const ib_vector_t* positions = phrase->match->positions;
+
+ ut_a(!phrase->found);
+ ut_a(phrase->match->doc_id > 0);
+ ut_a(ib_vector_size(tokens) > 0);
+ ut_a(ib_vector_size(positions) > 0);
+
+ first = static_cast<const fts_string_t*>(
+ ib_vector_get_const(tokens, 0));
+
+ ut_a(phrase->match->start < ib_vector_size(positions));
+
+ for (i = phrase->match->start; i < ib_vector_size(positions); ++i) {
+ ulint pos;
+ byte* ptr = start;
+
+ pos = *(ulint*) ib_vector_get_const(positions, i);
+
+ if (pos == ULINT_UNDEFINED) {
+ break;
+ }
+
+ if (pos < prev_len) {
+ continue;
+ }
+
+ /* Document positions are calculated from the beginning
+ of the first field, need to save the length for each
+ searched field to adjust the doc position when search
+ phrases. */
+ pos -= prev_len;
+ ptr = start + pos;
+
+ /* Within limits ? */
+ if (ptr >= end) {
+ break;
+ }
+
+ if (phrase->parser) {
+ fts_phrase_param_t phrase_param;
+
+ phrase_param.phrase = phrase;
+ phrase_param.token_index = 0;
+ phrase_param.heap = heap;
+
+ if (fts_query_match_phrase_terms_by_parser(
+ &phrase_param,
+ phrase->parser,
+ ptr,
+ ulint(end - ptr))) {
+ break;
+ }
+ } else {
+ fts_string_t match;
+ fts_string_t cmp_str;
+ ulint ret;
+
+ match.f_str = ptr;
+ ret = innobase_mysql_fts_get_token(
+ phrase->charset, start + pos,
+ const_cast<byte*>(end), &match);
+
+ if (match.f_len == 0) {
+ break;
+ }
+
+ fts_string_dup(&cmp_str, &match, heap);
+
+ if (innobase_fts_text_case_cmp(
+ phrase->charset, first, &cmp_str) == 0) {
+
+ /* This is the case for the single word
+ in the phrase. */
+ if (ib_vector_size(phrase->tokens) == 1) {
+ phrase->found = TRUE;
+ break;
+ }
+
+ ptr += ret;
+
+ /* Match the remaining terms in the phrase. */
+ if (fts_query_match_phrase_terms(phrase, &ptr,
+ end, heap)) {
+ break;
+ }
+ }
+ }
+ }
+
+ return(phrase->found);
+}
+
+/*****************************************************************//**
+Callback function to fetch and search the document.
+@return whether the phrase is found */
+static
+ibool
+fts_query_fetch_document(
+/*=====================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: fts_doc_t* */
+{
+
+ que_node_t* exp;
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ fts_phrase_t* phrase = static_cast<fts_phrase_t*>(user_arg);
+ ulint prev_len = 0;
+ ulint total_len = 0;
+ byte* document_text = NULL;
+
+ exp = node->select_list;
+
+ phrase->found = FALSE;
+
+ /* For proximity search, we will need to get the whole document
+ from all fields, so first count the total length of the document
+ from all the fields */
+ if (phrase->proximity_pos) {
+ while (exp) {
+ ulint field_len;
+ dfield_t* dfield = que_node_get_val(exp);
+ byte* data = static_cast<byte*>(
+ dfield_get_data(dfield));
+
+ if (dfield_is_ext(dfield)) {
+ ulint local_len = dfield_get_len(dfield);
+
+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+ field_len = mach_read_from_4(
+ data + local_len + BTR_EXTERN_LEN + 4);
+ } else {
+ field_len = dfield_get_len(dfield);
+ }
+
+ if (field_len != UNIV_SQL_NULL) {
+ total_len += field_len + 1;
+ }
+
+ exp = que_node_get_next(exp);
+ }
+
+ document_text = static_cast<byte*>(mem_heap_zalloc(
+ phrase->heap, total_len));
+
+ if (!document_text) {
+ return(FALSE);
+ }
+ }
+
+ exp = node->select_list;
+
+ while (exp) {
+ dfield_t* dfield = que_node_get_val(exp);
+ byte* data = static_cast<byte*>(
+ dfield_get_data(dfield));
+ ulint cur_len;
+
+ if (dfield_is_ext(dfield)) {
+ data = btr_copy_externally_stored_field(
+ &cur_len, data, phrase->zip_size,
+ dfield_get_len(dfield), phrase->heap);
+ } else {
+ cur_len = dfield_get_len(dfield);
+ }
+
+ if (cur_len != UNIV_SQL_NULL && cur_len != 0) {
+ if (phrase->proximity_pos) {
+ ut_ad(prev_len + cur_len <= total_len);
+ memcpy(document_text + prev_len, data, cur_len);
+ } else {
+ /* For phrase search */
+ phrase->found =
+ fts_query_match_phrase(
+ phrase,
+ static_cast<byte*>(data),
+ cur_len, prev_len,
+ phrase->heap);
+ }
+
+ /* Document positions are calculated from the beginning
+ of the first field, need to save the length for each
+ searched field to adjust the doc position when search
+ phrases. */
+ prev_len += cur_len + 1;
+ }
+
+ if (phrase->found) {
+ break;
+ }
+
+ exp = que_node_get_next(exp);
+ }
+
+ if (phrase->proximity_pos) {
+ ut_ad(prev_len <= total_len);
+
+ phrase->found = fts_proximity_is_word_in_range(
+ phrase, document_text, total_len);
+ }
+
+ return(phrase->found);
+}
+
+#if 0
+/********************************************************************
+Callback function to check whether a record was found or not. */
+static
+ibool
+fts_query_select(
+/*=============*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: fts_doc_t* */
+{
+ int i;
+ que_node_t* exp;
+ sel_node_t* node = row;
+ fts_select_t* select = user_arg;
+
+ ut_a(select->word_freq);
+ ut_a(select->word_freq->doc_freqs);
+
+ exp = node->select_list;
+
+ for (i = 0; exp && !select->found; ++i) {
+ dfield_t* dfield = que_node_get_val(exp);
+ void* data = dfield_get_data(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ switch (i) {
+ case 0: /* DOC_COUNT */
+ if (len != UNIV_SQL_NULL && len != 0) {
+
+ select->word_freq->doc_count +=
+ mach_read_from_4(data);
+ }
+ break;
+
+ case 1: /* ILIST */
+ if (len != UNIV_SQL_NULL && len != 0) {
+
+ fts_query_find_doc_id(select, data, len);
+ }
+ break;
+
+ default:
+ ut_error;
+ }
+
+ exp = que_node_get_next(exp);
+ }
+
+ return(FALSE);
+}
+
+/********************************************************************
+Read the rows from the FTS index, that match word and where the
+doc id is between first and last doc id.
+@return DB_SUCCESS if all go well else error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_query_find_term(
+/*================*/
+ fts_query_t* query, /*!< in: FTS query state */
+ que_t** graph, /*!< in: prepared statement */
+ const fts_string_t* word, /*!< in: the word to fetch */
+ doc_id_t doc_id, /*!< in: doc id to match */
+ ulint* min_pos,/*!< in/out: pos found must be
+ greater than this minimum value. */
+ ibool* found) /*!< out: TRUE if found else FALSE */
+{
+ pars_info_t* info;
+ dberr_t error;
+ fts_select_t select;
+ doc_id_t match_doc_id;
+ trx_t* trx = query->trx;
+ char table_name[MAX_FULL_NAME_LEN];
+
+ trx->op_info = "fetching FTS index matching nodes";
+
+ if (*graph) {
+ info = (*graph)->info;
+ } else {
+ ulint selected;
+
+ info = pars_info_create();
+
+ selected = fts_select_index(*word->f_str);
+ query->fts_index_table.suffix = fts_get_suffix(selected);
+
+ fts_get_table_name(&query->fts_index_table, table_name);
+ pars_info_bind_id(info, true, "index_table_name", table_name);
+ }
+
+ select.found = FALSE;
+ select.doc_id = doc_id;
+ select.min_pos = *min_pos;
+ select.word_freq = fts_query_add_word_freq(query, word->f_str);
+
+ pars_info_bind_function(info, "my_func", fts_query_select, &select);
+ pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
+
+ /* Convert to "storage" byte order. */
+ fts_write_doc_id((byte*) &match_doc_id, doc_id);
+
+ fts_bind_doc_id(info, "min_doc_id", &match_doc_id);
+
+ fts_bind_doc_id(info, "max_doc_id", &match_doc_id);
+
+ if (!*graph) {
+
+ *graph = fts_parse_sql(
+ &query->fts_index_table,
+ info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT doc_count, ilist\n"
+ " FROM $index_table_name\n"
+ " WHERE word LIKE :word AND"
+ " first_doc_id <= :min_doc_id AND"
+ " last_doc_id >= :max_doc_id\n"
+ " ORDER BY first_doc_id;\n"
+ "BEGIN\n"
+ "\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+ }
+
+ for (;;) {
+ error = fts_eval_sql(trx, *graph);
+
+ if (error == DB_SUCCESS) {
+
+ break; /* Exit the loop. */
+ } else {
+
+ if (error == DB_LOCK_WAIT_TIMEOUT) {
+ ib::warn() << "lock wait timeout reading FTS"
+ " index. Retrying!";
+
+ trx->error_state = DB_SUCCESS;
+ } else {
+ ib::error() << error
+ << " while reading FTS index.";
+
+ break; /* Exit the loop. */
+ }
+ }
+ }
+
+ /* Value to return */
+ *found = select.found;
+
+ if (*found) {
+ *min_pos = select.min_pos;
+ }
+
+ return(error);
+}
+
+/********************************************************************
+Callback aggregator for int columns. */
+static
+ibool
+fts_query_sum(
+/*==========*/
+ /*!< out: always returns TRUE */
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: ulint* */
+{
+
+ que_node_t* exp;
+ sel_node_t* node = row;
+ ulint* total = user_arg;
+
+ exp = node->select_list;
+
+ while (exp) {
+ dfield_t* dfield = que_node_get_val(exp);
+ void* data = dfield_get_data(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ if (len != UNIV_SQL_NULL && len != 0) {
+ *total += mach_read_from_4(data);
+ }
+
+ exp = que_node_get_next(exp);
+ }
+
+ return(TRUE);
+}
+
+/********************************************************************
+Calculate the total documents that contain a particular word (term).
+@return DB_SUCCESS if all go well else error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_query_total_docs_containing_term(
+/*=================================*/
+ fts_query_t* query, /*!< in: FTS query state */
+ const fts_string_t* word, /*!< in: the word to check */
+ ulint* total) /*!< out: documents containing word */
+{
+ pars_info_t* info;
+ dberr_t error;
+ que_t* graph;
+ ulint selected;
+ trx_t* trx = query->trx;
+ char table_name[MAX_FULL_NAME_LEN]
+
+ trx->op_info = "fetching FTS index document count";
+
+ *total = 0;
+
+ info = pars_info_create();
+
+ pars_info_bind_function(info, "my_func", fts_query_sum, total);
+ pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
+
+ selected = fts_select_index(*word->f_str);
+
+ query->fts_index_table.suffix = fts_get_suffix(selected);
+
+ fts_get_table_name(&query->fts_index_table, table_name);
+
+ pars_info_bind_id(info, true, "index_table_name", table_name);
+
+ graph = fts_parse_sql(
+ &query->fts_index_table,
+ info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT doc_count\n"
+ " FROM $index_table_name\n"
+ " WHERE word = :word"
+ " ORDER BY first_doc_id;\n"
+ "BEGIN\n"
+ "\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+
+ for (;;) {
+ error = fts_eval_sql(trx, graph);
+
+ if (error == DB_SUCCESS) {
+
+ break; /* Exit the loop. */
+ } else {
+
+ if (error == DB_LOCK_WAIT_TIMEOUT) {
+ ib::warn() << "lock wait timeout reading FTS"
+ " index. Retrying!";
+
+ trx->error_state = DB_SUCCESS;
+ } else {
+ ib::error() << error
+ << " while reading FTS index.";
+
+ break; /* Exit the loop. */
+ }
+ }
+ }
+
+ fts_que_graph_free(graph);
+
+ return(error);
+}
+
+/********************************************************************
+Get the total number of words in a documents.
+@return DB_SUCCESS if all go well else error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_query_terms_in_document(
+/*========================*/
+ fts_query_t* query, /*!< in: FTS query state */
+ doc_id_t doc_id, /*!< in: the word to check */
+ ulint* total) /*!< out: total words in document */
+{
+ pars_info_t* info;
+ dberr_t error;
+ que_t* graph;
+ doc_id_t read_doc_id;
+ trx_t* trx = query->trx;
+ char table_name[MAX_FULL_NAME_LEN];
+
+ trx->op_info = "fetching FTS document term count";
+
+ *total = 0;
+
+ info = pars_info_create();
+
+ pars_info_bind_function(info, "my_func", fts_query_sum, total);
+
+ /* Convert to "storage" byte order. */
+ fts_write_doc_id((byte*) &read_doc_id, doc_id);
+ fts_bind_doc_id(info, "doc_id", &read_doc_id);
+
+ query->fts_index_table.suffix = "DOC_ID";
+
+ fts_get_table_name(&query->fts_index_table, table_name);
+
+ pars_info_bind_id(info, true, "index_table_name", table_name);
+
+ graph = fts_parse_sql(
+ &query->fts_index_table,
+ info,
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS"
+ " SELECT count\n"
+ " FROM $index_table_name\n"
+ " WHERE doc_id = :doc_id"
+ " BEGIN\n"
+ "\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE c;");
+
+ for (;;) {
+ error = fts_eval_sql(trx, graph);
+
+ if (error == DB_SUCCESS) {
+
+ break; /* Exit the loop. */
+ } else {
+
+ if (error == DB_LOCK_WAIT_TIMEOUT) {
+ ib::warn() << "lock wait timeout reading FTS"
+ " doc id table. Retrying!";
+
+ trx->error_state = DB_SUCCESS;
+ } else {
+ ib::error() << error << " while reading FTS"
+ " doc id table.";
+
+ break; /* Exit the loop. */
+ }
+ }
+ }
+
+ fts_que_graph_free(graph);
+
+ return(error);
+}
+#endif
+
+/*****************************************************************//**
+Retrieve the document and match the phrase tokens.
+@return DB_SUCCESS or error code */
+MY_ATTRIBUTE((nonnull(1,2,3,6), warn_unused_result))
+static
+dberr_t
+fts_query_match_document(
+/*=====================*/
+ ib_vector_t* tokens, /*!< in: phrase tokens */
+ fts_get_doc_t* get_doc, /*!< in: table and prepared statements */
+ fts_match_t* match, /*!< in: doc id and positions */
+ ulint distance, /*!< in: proximity distance */
+ st_mysql_ftparser* parser, /*!< in: fts plugin parser */
+ ibool* found) /*!< out: TRUE if phrase found */
+{
+ dberr_t error;
+ fts_phrase_t phrase(get_doc->index_cache->index->table);
+
+ phrase.match = match; /* Positions to match */
+ phrase.tokens = tokens; /* Tokens to match */
+ phrase.distance = distance;
+ phrase.charset = get_doc->index_cache->charset;
+ phrase.heap = mem_heap_create(512);
+ phrase.parser = parser;
+
+ *found = phrase.found = FALSE;
+
+ error = fts_doc_fetch_by_doc_id(
+ get_doc, match->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL,
+ fts_query_fetch_document, &phrase);
+
+ if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ ib::error() << "(" << error << ") matching document.";
+ } else {
+ *found = phrase.found;
+ }
+
+ mem_heap_free(phrase.heap);
+
+ return(error);
+}
+
+/*****************************************************************//**
+This function fetches the original documents and count the
+words in between matching words to see that is in specified distance
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+bool
+fts_query_is_in_proximity_range(
+/*============================*/
+ const fts_query_t* query, /*!< in: query instance */
+ fts_match_t** match, /*!< in: query instance */
+ fts_proximity_t* qualified_pos) /*!< in: position info for
+ qualified ranges */
+{
+ fts_get_doc_t get_doc;
+ fts_cache_t* cache = query->index->table->fts->cache;
+ dberr_t err;
+
+ memset(&get_doc, 0x0, sizeof(get_doc));
+
+ rw_lock_x_lock(&cache->lock);
+ get_doc.index_cache = fts_find_index_cache(cache, query->index);
+ rw_lock_x_unlock(&cache->lock);
+ ut_a(get_doc.index_cache != NULL);
+
+ fts_phrase_t phrase(get_doc.index_cache->index->table);
+
+ phrase.distance = query->distance;
+ phrase.charset = get_doc.index_cache->charset;
+ phrase.heap = mem_heap_create(512);
+ phrase.proximity_pos = qualified_pos;
+ phrase.found = FALSE;
+
+ err = fts_doc_fetch_by_doc_id(
+ &get_doc, match[0]->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL,
+ fts_query_fetch_document, &phrase);
+
+ if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ ib::error() << "(" << err << ") in verification"
+ " phase of proximity search";
+ }
+
+ /* Free the prepared statement. */
+ if (get_doc.get_document_graph) {
+ fts_que_graph_free(get_doc.get_document_graph);
+ get_doc.get_document_graph = NULL;
+ }
+
+ mem_heap_free(phrase.heap);
+
+ return(err == DB_SUCCESS && phrase.found);
+}
+
+/*****************************************************************//**
+Iterate over the matched document ids and search the for the
+actual phrase in the text.
+@return DB_SUCCESS if all OK */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_query_search_phrase(
+/*====================*/
+ fts_query_t* query, /*!< in: query instance */
+ ib_vector_t* orig_tokens, /*!< in: tokens to search,
+ with any stopwords in the
+ original phrase */
+ ib_vector_t* tokens) /*!< in: tokens that does
+ not include stopwords and
+ can be used to calculate
+ ranking */
+{
+ ulint i;
+ fts_get_doc_t get_doc;
+ ulint n_matched;
+ fts_cache_t* cache = query->index->table->fts->cache;
+
+ n_matched = ib_vector_size(query->matched);
+
+ /* Setup the doc retrieval infrastructure. */
+ memset(&get_doc, 0x0, sizeof(get_doc));
+
+ rw_lock_x_lock(&cache->lock);
+
+ get_doc.index_cache = fts_find_index_cache(cache, query->index);
+
+ /* Must find the index cache */
+ ut_a(get_doc.index_cache != NULL);
+
+ rw_lock_x_unlock(&cache->lock);
+
+#ifdef FTS_INTERNAL_DIAG_PRINT
+ ib::info() << "Start phrase search";
+#endif
+
+ /* Read the document from disk and do the actual
+ match, matching documents will be added to the current
+ doc id set. */
+ for (i = 0; i < n_matched && query->error == DB_SUCCESS; ++i) {
+ fts_match_t* match;
+ ibool found = FALSE;
+
+ match = static_cast<fts_match_t*>(
+ ib_vector_get(query->matched, i));
+
+ /* Skip the document ids that were filtered out by
+ an earlier pass. */
+ if (match->doc_id != 0) {
+
+ query->error = fts_query_match_document(
+ orig_tokens, &get_doc, match,
+ query->distance, query->parser, &found);
+
+ if (query->error == DB_SUCCESS && found) {
+ ulint z;
+
+ query->error = fts_query_process_doc_id(query,
+ match->doc_id, 0);
+ if (query->error != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ for (z = 0; z < ib_vector_size(tokens); z++) {
+ fts_string_t* token;
+ token = static_cast<fts_string_t*>(
+ ib_vector_get(tokens, z));
+ fts_query_add_word_to_document(
+ query, match->doc_id, token);
+ }
+ }
+ }
+ }
+
+func_exit:
+ /* Free the prepared statement. */
+ if (get_doc.get_document_graph) {
+ fts_que_graph_free(get_doc.get_document_graph);
+ get_doc.get_document_graph = NULL;
+ }
+
+ return(query->error);
+}
+
+/** Split the phrase into tokens
+@param[in,out] query query instance
+@param[in] node query node to search
+@param[in,out] tokens token vector
+@param[in,out] orig_tokens original node tokens include stopword
+@param[in,out] heap mem heap */
+static
+void
+fts_query_phrase_split(
+ fts_query_t* query,
+ const fts_ast_node_t* node,
+ ib_vector_t* tokens,
+ ib_vector_t* orig_tokens,
+ mem_heap_t* heap)
+{
+ fts_string_t phrase;
+ ulint len = 0;
+ ulint cur_pos = 0;
+ fts_ast_node_t* term_node = NULL;
+
+ if (node->type == FTS_AST_TEXT) {
+ phrase.f_str = node->text.ptr->str;
+ phrase.f_len = node->text.ptr->len;
+ len = phrase.f_len;
+ } else {
+ ut_ad(node->type == FTS_AST_PARSER_PHRASE_LIST);
+ phrase.f_str = NULL;
+ phrase.f_len = 0;
+ term_node = node->list.head;
+ }
+
+ while (true) {
+ fts_cache_t* cache = query->index->table->fts->cache;
+ ulint cur_len;
+ fts_string_t result_str;
+
+ if (node->type == FTS_AST_TEXT) {
+ if (cur_pos >= len) {
+ break;
+ }
+
+ cur_len = innobase_mysql_fts_get_token(
+ query->fts_index_table.charset,
+ reinterpret_cast<const byte*>(phrase.f_str)
+ + cur_pos,
+ reinterpret_cast<const byte*>(phrase.f_str)
+ + len,
+ &result_str);
+
+ if (cur_len == 0) {
+ break;
+ }
+
+ cur_pos += cur_len;
+ } else {
+ ut_ad(node->type == FTS_AST_PARSER_PHRASE_LIST);
+ /* Term node in parser phrase list */
+ if (term_node == NULL) {
+ break;
+ }
+
+ ut_a(term_node->type == FTS_AST_TERM);
+ result_str.f_str = term_node->term.ptr->str;
+ result_str.f_len = term_node->term.ptr->len;
+ result_str.f_n_char = fts_get_token_size(
+ query->fts_index_table.charset,
+ reinterpret_cast<char*>(result_str.f_str),
+ result_str.f_len);
+
+ term_node = term_node->next;
+ }
+
+ if (result_str.f_n_char == 0) {
+ continue;
+ }
+
+ fts_string_t* token = static_cast<fts_string_t*>(
+ ib_vector_push(tokens, NULL));
+ fts_string_dup(token, &result_str, heap);
+
+ if (fts_check_token(
+ &result_str,
+ cache->stopword_info.cached_stopword,
+ query->fts_index_table.charset)) {
+ /* Add the word to the RB tree so that we can
+ calculate it's frequencey within a document. */
+ fts_query_add_word_freq(query, token);
+ } else {
+ ib_vector_pop(tokens);
+ }
+
+ /* we will start to store all words including stopwords
+ in the "orig_tokens" vector, but skip any leading words
+ that are stopwords */
+ if (!ib_vector_is_empty(tokens)) {
+ fts_string_t* orig_token = static_cast<fts_string_t*>(
+ ib_vector_push(orig_tokens, NULL));
+
+ orig_token->f_str = token->f_str;
+ orig_token->f_len = token->f_len;
+ }
+ }
+}
+
+/*****************************************************************//**
+Text/Phrase search.
+@return DB_SUCCESS or error code */
+static MY_ATTRIBUTE((warn_unused_result))
+dberr_t
+fts_query_phrase_search(
+/*====================*/
+ fts_query_t* query, /*!< in: query instance */
+ const fts_ast_node_t* node) /*!< in: node to search */
+{
+ ib_vector_t* tokens;
+ ib_vector_t* orig_tokens;
+ mem_heap_t* heap = mem_heap_create(sizeof(fts_string_t));
+ ib_alloc_t* heap_alloc;
+ ulint num_token;
+
+ heap_alloc = ib_heap_allocator_create(heap);
+
+ tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
+ orig_tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
+
+ if (query->distance != ULINT_UNDEFINED && query->distance > 0) {
+ query->flags = FTS_PROXIMITY;
+ } else {
+ query->flags = FTS_PHRASE;
+ }
+
+ /* Split the phrase into tokens. */
+ fts_query_phrase_split(query, node, tokens, orig_tokens, heap);
+
+ num_token = ib_vector_size(tokens);
+ if (num_token > MAX_PROXIMITY_ITEM) {
+ query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE;
+ goto func_exit;
+ }
+
+ ut_ad(ib_vector_size(orig_tokens) >= num_token);
+
+ /* Ignore empty strings. */
+ if (num_token > 0) {
+ fts_string_t* token = NULL;
+ fts_fetch_t fetch;
+ trx_t* trx = query->trx;
+ fts_ast_oper_t oper = query->oper;
+ que_t* graph = NULL;
+ ulint i;
+ dberr_t error;
+
+ /* Create the vector for storing matching document ids
+ and the positions of the first token of the phrase. */
+ if (!query->matched) {
+ ib_alloc_t* heap_alloc;
+
+ heap_alloc = ib_heap_allocator_create(heap);
+
+ if (!(query->flags & FTS_PROXIMITY)
+ && !(query->flags & FTS_PHRASE)) {
+ query->matched = ib_vector_create(
+ heap_alloc, sizeof(fts_match_t),
+ 64);
+ } else {
+ ut_a(num_token <= MAX_PROXIMITY_ITEM);
+ query->match_array =
+ (ib_vector_t**) mem_heap_alloc(
+ heap,
+ num_token *
+ sizeof(query->matched));
+
+ for (i = 0; i < num_token; i++) {
+ query->match_array[i] =
+ ib_vector_create(
+ heap_alloc, sizeof(fts_match_t),
+ 64);
+ }
+
+ query->matched = query->match_array[0];
+ }
+ }
+
+ /* Setup the callback args for filtering and consolidating
+ the ilist. */
+ fetch.read_arg = query;
+ fetch.read_record = fts_query_index_fetch_nodes;
+
+ for (i = 0; i < num_token; i++) {
+ /* Search for the first word from the phrase. */
+ token = static_cast<fts_string_t*>(
+ ib_vector_get(tokens, i));
+
+ if (query->flags & FTS_PROXIMITY
+ || query->flags & FTS_PHRASE) {
+ query->matched = query->match_array[i];
+ }
+
+ error = fts_index_fetch_nodes(
+ trx, &graph, &query->fts_index_table,
+ token, &fetch);
+
+ /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
+ ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
+ if (error != DB_SUCCESS) {
+ query->error = error;
+ }
+
+ fts_que_graph_free(graph);
+ graph = NULL;
+
+ fts_query_cache(query, token);
+
+ if (!(query->flags & FTS_PHRASE)
+ && !(query->flags & FTS_PROXIMITY)) {
+ break;
+ }
+
+ /* If any of the token can't be found,
+ no need to continue match */
+ if (ib_vector_is_empty(query->match_array[i])
+ || query->error != DB_SUCCESS) {
+ goto func_exit;
+ }
+ }
+
+ /* Just a single word, no need to fetch the original
+ documents to do phrase matching */
+ if (ib_vector_size(orig_tokens) == 1
+ && !ib_vector_is_empty(query->match_array[0])) {
+ fts_match_t* match;
+ ulint n_matched;
+
+ n_matched = ib_vector_size(query->match_array[0]);
+
+ for (i = 0; i < n_matched; i++) {
+ match = static_cast<fts_match_t*>(
+ ib_vector_get(
+ query->match_array[0], i));
+
+ query->error = fts_query_process_doc_id(
+ query, match->doc_id, 0);
+ if (query->error != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ fts_query_add_word_to_document(
+ query, match->doc_id, token);
+ }
+ query->oper = oper;
+ goto func_exit;
+ }
+
+ /* If we are doing proximity search, verify the distance
+ between all words, and check they are in specified distance. */
+ if (query->flags & FTS_PROXIMITY) {
+ fts_phrase_or_proximity_search(query, tokens);
+ } else {
+ ibool matched;
+
+ /* Phrase Search case:
+ We filter out the doc ids that don't contain
+ all the tokens in the phrase. It's cheaper to
+ search the ilist than bringing the documents in
+ and then doing a search through the text. Isolated
+ testing shows this also helps in mitigating disruption
+ of the buffer cache. */
+ matched = fts_phrase_or_proximity_search(query, tokens);
+ query->matched = query->match_array[0];
+
+ /* Read the actual text in and search for the phrase. */
+ if (matched) {
+ ut_ad(query->error == DB_SUCCESS);
+ query->error = fts_query_search_phrase(
+ query, orig_tokens, tokens);
+ }
+ }
+
+ /* Restore original operation. */
+ query->oper = oper;
+
+ if (query->error != DB_SUCCESS) {
+ goto func_exit;
+ }
+ }
+
+func_exit:
+ mem_heap_free(heap);
+
+ /* Don't need it anymore. */
+ query->matched = NULL;
+
+ return(query->error);
+}
+
+/*****************************************************************//**
+Find the word and evaluate.
+@return DB_SUCCESS if all go well */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_query_execute(
+/*==============*/
+ fts_query_t* query, /*!< in: query instance */
+ fts_string_t* token) /*!< in: token to search */
+{
+ switch (query->oper) {
+ case FTS_NONE:
+ case FTS_NEGATE:
+ case FTS_INCR_RATING:
+ case FTS_DECR_RATING:
+ query->error = fts_query_union(query, token);
+ break;
+
+ case FTS_EXIST:
+ query->error = fts_query_intersect(query, token);
+ break;
+
+ case FTS_IGNORE:
+ query->error = fts_query_difference(query, token);
+ break;
+
+ default:
+ ut_error;
+ }
+
+ return(query->error);
+}
+
+/*****************************************************************//**
+Create a wildcard string. It's the responsibility of the caller to
+free the byte* pointer. It's allocated using ut_malloc_nokey().
+@return ptr to allocated memory */
+static
+byte*
+fts_query_get_token(
+/*================*/
+ fts_ast_node_t* node, /*!< in: the current sub tree */
+ fts_string_t* token) /*!< in: token to create */
+{
+ ulint str_len;
+ byte* new_ptr = NULL;
+
+ str_len = node->term.ptr->len;
+
+ ut_a(node->type == FTS_AST_TERM);
+
+ token->f_len = str_len;
+ token->f_str = node->term.ptr->str;
+
+ if (node->term.wildcard) {
+
+ token->f_str = static_cast<byte*>(ut_malloc_nokey(str_len + 2));
+ token->f_len = str_len + 1;
+
+ memcpy(token->f_str, node->term.ptr->str, str_len);
+
+ token->f_str[str_len] = '%';
+ token->f_str[token->f_len] = 0;
+
+ new_ptr = token->f_str;
+ }
+
+ return(new_ptr);
+}
+
+static dberr_t fts_ast_visit_sub_exp(fts_ast_node_t*, fts_ast_callback, void*);
+
+/*****************************************************************//**
+Visit every node of the AST. */
+static
+dberr_t
+fts_query_visitor(
+/*==============*/
+ fts_ast_oper_t oper, /*!< in: current operator */
+ fts_ast_node_t* node, /*!< in: The root of the current subtree*/
+ void* arg) /*!< in: callback arg*/
+{
+ byte* ptr;
+ fts_string_t token;
+ fts_query_t* query = static_cast<fts_query_t*>(arg);
+
+ ut_a(node);
+ DBUG_ENTER("fts_query_visitor");
+ DBUG_PRINT("fts", ("nodetype: %s", fts_ast_node_type_get(node->type)));
+
+ token.f_n_char = 0;
+ query->oper = oper;
+ query->cur_node = node;
+
+ switch (node->type) {
+ case FTS_AST_TEXT:
+ case FTS_AST_PARSER_PHRASE_LIST:
+
+ if (query->oper == FTS_EXIST) {
+ ut_ad(query->intersection == NULL);
+ query->intersection = rbt_create(
+ sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+
+ query->total_size += SIZEOF_RBT_CREATE;
+ }
+
+ /* Set the current proximity distance. */
+ query->distance = node->text.distance;
+
+ /* Force collection of doc ids and the positions. */
+ query->collect_positions = TRUE;
+
+ query->error = fts_query_phrase_search(query, node);
+
+ query->collect_positions = FALSE;
+
+ if (query->oper == FTS_EXIST) {
+ fts_query_free_doc_ids(query, query->doc_ids);
+ query->doc_ids = query->intersection;
+ query->intersection = NULL;
+ }
+
+ break;
+
+ case FTS_AST_TERM:
+ token.f_str = node->term.ptr->str;
+ token.f_len = node->term.ptr->len;
+
+ /* Collect wildcard words for QUERY EXPANSION. */
+ if (node->term.wildcard && query->wildcard_words != NULL) {
+ ib_rbt_bound_t parent;
+
+ if (rbt_search(query->wildcard_words, &parent, &token)
+ != 0) {
+ fts_string_t word;
+
+ fts_string_dup(&word, &token, query->heap);
+ rbt_add_node(query->wildcard_words, &parent,
+ &word);
+ }
+ }
+
+ /* Add the word to our RB tree that will be used to
+ calculate this terms per document frequency. */
+ fts_query_add_word_freq(query, &token);
+
+ ptr = fts_query_get_token(node, &token);
+ query->error = fts_query_execute(query, &token);
+
+ if (ptr) {
+ ut_free(ptr);
+ }
+
+ break;
+
+ case FTS_AST_SUBEXP_LIST:
+ query->error = fts_ast_visit_sub_exp(node, fts_query_visitor, arg);
+ break;
+
+ default:
+ ut_error;
+ }
+
+ if (query->oper == FTS_EXIST) {
+ query->multi_exist = true;
+ }
+
+ DBUG_RETURN(query->error);
+}
+
+/** Process (nested) sub-expression, create a new result set to store the
+sub-expression result by processing nodes under current sub-expression
+list. Merge the sub-expression result with that of parent expression list.
+@param[in,out] node current root node
+@param[in,out] visitor callback function
+@param[in,out] arg argument for callback
+@return DB_SUCCESS if all go well */
+static
+dberr_t
+fts_ast_visit_sub_exp(
+ fts_ast_node_t* node,
+ fts_ast_callback visitor,
+ void* arg)
+{
+ fts_ast_oper_t cur_oper;
+ fts_query_t* query = static_cast<fts_query_t*>(arg);
+ ib_rbt_t* parent_doc_ids;
+ ib_rbt_t* subexpr_doc_ids;
+ dberr_t error = DB_SUCCESS;
+ bool will_be_ignored = false;
+ bool multi_exist;
+
+ DBUG_ENTER("fts_ast_visit_sub_exp");
+
+ ut_a(node->type == FTS_AST_SUBEXP_LIST);
+
+ /* To avoid stack overflow, we limit the mutual recursion
+ depth between fts_ast_visit(), fts_query_visitor() and
+ fts_ast_visit_sub_exp(). */
+ if (query->visiting_sub_exp++ > 31) {
+ query->error = DB_OUT_OF_MEMORY;
+ DBUG_RETURN(query->error);
+ }
+
+ cur_oper = query->oper;
+
+ /* Save current result set */
+ parent_doc_ids = query->doc_ids;
+
+ /* Create new result set to store the sub-expression result. We
+ will merge this result set with the parent after processing. */
+ query->doc_ids = rbt_create(sizeof(fts_ranking_t),
+ fts_ranking_doc_id_cmp);
+
+ query->total_size += SIZEOF_RBT_CREATE;
+
+ multi_exist = query->multi_exist;
+ query->multi_exist = false;
+ /* Process nodes in current sub-expression and store its
+ result set in query->doc_ids we created above. */
+ error = fts_ast_visit(FTS_NONE, node, visitor,
+ arg, &will_be_ignored);
+
+ /* Reinstate parent node state */
+ query->multi_exist = multi_exist;
+ query->oper = cur_oper;
+ query->visiting_sub_exp--;
+
+ /* Merge the sub-expression result with the parent result set. */
+ subexpr_doc_ids = query->doc_ids;
+ query->doc_ids = parent_doc_ids;
+ if (error == DB_SUCCESS) {
+ error = fts_merge_doc_ids(query, subexpr_doc_ids);
+ }
+
+ /* Free current result set. Result already merged into parent. */
+ fts_query_free_doc_ids(query, subexpr_doc_ids);
+
+ DBUG_RETURN(error);
+}
+
+#if 0
+/*****************************************************************//***
+Check if the doc id exists in the ilist.
+@return TRUE if doc id found */
+static
+ulint
+fts_query_find_doc_id(
+/*==================*/
+ fts_select_t* select, /*!< in/out: contains the doc id to
+ find, we update the word freq if
+ document found */
+ void* data, /*!< in: doc id ilist */
+ ulint len) /*!< in: doc id ilist size */
+{
+ byte* ptr = data;
+ doc_id_t doc_id = 0;
+ ulint decoded = 0;
+
+ /* Decode the ilist and search for selected doc_id. We also
+ calculate the frequency of the word in the document if found. */
+ while (decoded < len && !select->found) {
+ ulint freq = 0;
+ ulint min_pos = 0;
+ ulint last_pos = 0;
+ ulint pos = fts_decode_vlc(&ptr);
+
+ /* Add the delta. */
+ doc_id += pos;
+
+ while (*ptr) {
+ ++freq;
+ last_pos += fts_decode_vlc(&ptr);
+
+ /* Only if min_pos is not set and the current
+ term exists in a position greater than the
+ min_pos of the previous term. */
+ if (min_pos == 0 && last_pos > select->min_pos) {
+ min_pos = last_pos;
+ }
+ }
+
+ /* Skip the end of word position marker. */
+ ++ptr;
+
+ /* Bytes decoded so far. */
+ decoded = ptr - (byte*) data;
+
+ /* A word may exist in the document but we only consider a
+ match if it exists in a position that is greater than the
+ position of the previous term. */
+ if (doc_id == select->doc_id && min_pos > 0) {
+ fts_doc_freq_t* doc_freq;
+
+ /* Add the doc id to the doc freq rb tree, if
+ the doc id doesn't exist it will be created. */
+ doc_freq = fts_query_add_doc_freq(
+ select->word_freq->doc_freqs, doc_id);
+
+ /* Avoid duplicating the frequency tally */
+ if (doc_freq->freq == 0) {
+ doc_freq->freq = freq;
+ }
+
+ select->found = TRUE;
+ select->min_pos = min_pos;
+ }
+ }
+
+ return(select->found);
+}
+#endif
+
+/*****************************************************************//**
+Read and filter nodes.
+@return DB_SUCCESS if all go well,
+or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */
+static
+dberr_t
+fts_query_filter_doc_ids(
+/*=====================*/
+ fts_query_t* query, /*!< in: query instance */
+ const fts_string_t* word, /*!< in: the current word */
+ fts_word_freq_t* word_freq, /*!< in/out: word frequency */
+ const fts_node_t* node, /*!< in: current FTS node */
+ void* data, /*!< in: doc id ilist */
+ ulint len, /*!< in: doc id ilist size */
+ ibool calc_doc_count) /*!< in: whether to remember doc count */
+{
+ byte* ptr = static_cast<byte*>(data);
+ doc_id_t doc_id = 0;
+ ulint decoded = 0;
+ ib_rbt_t* doc_freqs = word_freq->doc_freqs;
+
+ /* Decode the ilist and add the doc ids to the query doc_id set. */
+ while (decoded < len) {
+ ulint freq = 0;
+ fts_doc_freq_t* doc_freq;
+ fts_match_t* match = NULL;
+ ulint last_pos = 0;
+ ulint pos = fts_decode_vlc(&ptr);
+
+ /* Some sanity checks. */
+ if (doc_id == 0) {
+ ut_a(pos == node->first_doc_id);
+ }
+
+ /* Add the delta. */
+ doc_id += pos;
+
+ if (calc_doc_count) {
+ word_freq->doc_count++;
+ }
+
+ /* We simply collect the matching instances here. */
+ if (query->collect_positions) {
+ ib_alloc_t* heap_alloc;
+
+ /* Create a new fts_match_t instance. */
+ match = static_cast<fts_match_t*>(
+ ib_vector_push(query->matched, NULL));
+
+ match->start = 0;
+ match->doc_id = doc_id;
+ heap_alloc = ib_vector_allocator(query->matched);
+
+ /* Allocate from the same heap as the
+ parent container. */
+ match->positions = ib_vector_create(
+ heap_alloc, sizeof(ulint), 64);
+
+ query->total_size += sizeof(fts_match_t)
+ + sizeof(ib_vector_t)
+ + sizeof(ulint) * 64;
+ }
+
+ /* Unpack the positions within the document. */
+ while (*ptr) {
+ last_pos += fts_decode_vlc(&ptr);
+
+ /* Collect the matching word positions, for phrase
+ matching later. */
+ if (query->collect_positions) {
+ ib_vector_push(match->positions, &last_pos);
+ }
+
+ ++freq;
+ }
+
+ /* End of list marker. */
+ last_pos = (ulint) -1;
+
+ if (query->collect_positions) {
+ ut_a(match != NULL);
+ ib_vector_push(match->positions, &last_pos);
+ }
+
+ /* Add the doc id to the doc freq rb tree, if the doc id
+ doesn't exist it will be created. */
+ doc_freq = fts_query_add_doc_freq(query, doc_freqs, doc_id);
+
+ /* Avoid duplicating frequency tally. */
+ if (doc_freq->freq == 0) {
+ doc_freq->freq = freq;
+ }
+
+ /* Skip the end of word position marker. */
+ ++ptr;
+
+ /* Bytes decoded so far */
+ decoded = ulint(ptr - (byte*) data);
+
+ /* We simply collect the matching documents and the
+ positions here and match later. */
+ if (!query->collect_positions) {
+ /* We ignore error here and will check it later */
+ fts_query_process_doc_id(query, doc_id, 0);
+
+ /* Add the word to the document's matched RB tree. */
+ fts_query_add_word_to_document(query, doc_id, word);
+ }
+ }
+
+ /* Some sanity checks. */
+ ut_a(doc_id == node->last_doc_id);
+
+ if (query->total_size > fts_result_cache_limit) {
+ return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ } else {
+ return(DB_SUCCESS);
+ }
+}
+
+/*****************************************************************//**
+Read the FTS INDEX row.
+@return DB_SUCCESS if all go well. */
+static
+dberr_t
+fts_query_read_node(
+/*================*/
+ fts_query_t* query, /*!< in: query instance */
+ const fts_string_t* word, /*!< in: current word */
+ que_node_t* exp) /*!< in: query graph node */
+{
+ int i;
+ int ret;
+ fts_node_t node;
+ ib_rbt_bound_t parent;
+ fts_word_freq_t* word_freq;
+ ibool skip = FALSE;
+ fts_string_t term;
+ byte buf[FTS_MAX_WORD_LEN + 1];
+ dberr_t error = DB_SUCCESS;
+
+ ut_a(query->cur_node->type == FTS_AST_TERM
+ || query->cur_node->type == FTS_AST_TEXT
+ || query->cur_node->type == FTS_AST_PARSER_PHRASE_LIST);
+
+ memset(&node, 0, sizeof(node));
+ term.f_str = buf;
+
+ /* Need to consider the wildcard search case, the word frequency
+ is created on the search string not the actual word. So we need
+ to assign the frequency on search string behalf. */
+ if (query->cur_node->type == FTS_AST_TERM
+ && query->cur_node->term.wildcard) {
+
+ term.f_len = query->cur_node->term.ptr->len;
+ ut_ad(FTS_MAX_WORD_LEN >= term.f_len);
+ memcpy(term.f_str, query->cur_node->term.ptr->str, term.f_len);
+ } else {
+ term.f_len = word->f_len;
+ ut_ad(FTS_MAX_WORD_LEN >= word->f_len);
+ memcpy(term.f_str, word->f_str, word->f_len);
+ }
+
+ /* Lookup the word in our rb tree, it must exist. */
+ ret = rbt_search(query->word_freqs, &parent, &term);
+
+ ut_a(ret == 0);
+
+ word_freq = rbt_value(fts_word_freq_t, parent.last);
+
+ /* Start from 1 since the first column has been read by the caller.
+ Also, we rely on the order of the columns projected, to filter
+ out ilists that are out of range and we always want to read
+ the doc_count irrespective of the suitablility of the row. */
+
+ for (i = 1; exp && !skip; exp = que_node_get_next(exp), ++i) {
+
+ dfield_t* dfield = que_node_get_val(exp);
+ byte* data = static_cast<byte*>(
+ dfield_get_data(dfield));
+ ulint len = dfield_get_len(dfield);
+
+ ut_a(len != UNIV_SQL_NULL);
+
+ /* Note: The column numbers below must match the SELECT. */
+
+ switch (i) {
+ case 1: /* DOC_COUNT */
+ word_freq->doc_count += mach_read_from_4(data);
+ break;
+
+ case 2: /* FIRST_DOC_ID */
+ node.first_doc_id = fts_read_doc_id(data);
+
+ /* Skip nodes whose doc ids are out range. */
+ if (query->oper == FTS_EXIST
+ && query->upper_doc_id > 0
+ && node.first_doc_id > query->upper_doc_id) {
+ skip = TRUE;
+ }
+ break;
+
+ case 3: /* LAST_DOC_ID */
+ node.last_doc_id = fts_read_doc_id(data);
+
+ /* Skip nodes whose doc ids are out range. */
+ if (query->oper == FTS_EXIST
+ && query->lower_doc_id > 0
+ && node.last_doc_id < query->lower_doc_id) {
+ skip = TRUE;
+ }
+ break;
+
+ case 4: /* ILIST */
+
+ error = fts_query_filter_doc_ids(
+ query, &word_freq->word, word_freq,
+ &node, data, len, FALSE);
+
+ break;
+
+ default:
+ ut_error;
+ }
+ }
+
+ if (!skip) {
+ /* Make sure all columns were read. */
+
+ ut_a(i == 5);
+ }
+
+ return error;
+}
+
+/*****************************************************************//**
+Callback function to fetch the rows in an FTS INDEX record.
+@return always returns TRUE */
+static
+ibool
+fts_query_index_fetch_nodes(
+/*========================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: pointer to fts_fetch_t */
+{
+ fts_string_t key;
+ sel_node_t* sel_node = static_cast<sel_node_t*>(row);
+ fts_fetch_t* fetch = static_cast<fts_fetch_t*>(user_arg);
+ fts_query_t* query = static_cast<fts_query_t*>(fetch->read_arg);
+ que_node_t* exp = sel_node->select_list;
+ dfield_t* dfield = que_node_get_val(exp);
+ void* data = dfield_get_data(dfield);
+ ulint dfield_len = dfield_get_len(dfield);
+
+ key.f_str = static_cast<byte*>(data);
+ key.f_len = dfield_len;
+
+ ut_a(dfield_len <= FTS_MAX_WORD_LEN);
+
+ /* Note: we pass error out by 'query->error' */
+ query->error = fts_query_read_node(query, &key, que_node_get_next(exp));
+
+ if (query->error != DB_SUCCESS) {
+ ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
+ return(FALSE);
+ } else {
+ return(TRUE);
+ }
+}
+
+/*****************************************************************//**
+Calculate the inverse document frequency (IDF) for all the terms. */
+static
+void
+fts_query_calculate_idf(
+/*====================*/
+ fts_query_t* query) /*!< in: Query state */
+{
+ const ib_rbt_node_t* node;
+ ib_uint64_t total_docs = query->total_docs;
+
+ /* We need to free any instances of fts_doc_freq_t that we
+ may have allocated. */
+ for (node = rbt_first(query->word_freqs);
+ node;
+ node = rbt_next(query->word_freqs, node)) {
+
+ fts_word_freq_t* word_freq;
+
+ word_freq = rbt_value(fts_word_freq_t, node);
+
+ if (word_freq->doc_count > 0) {
+ if (total_docs == word_freq->doc_count) {
+ /* QP assume ranking > 0 if we find
+ a match. Since Log10(1) = 0, we cannot
+ make IDF a zero value if do find a
+ word in all documents. So let's make
+ it an arbitrary very small number */
+ word_freq->idf = log10(1.0001);
+ } else {
+ word_freq->idf = log10(
+ static_cast<double>(total_docs)
+ / static_cast<double>(
+ word_freq->doc_count));
+ }
+ }
+ }
+}
+
+/*****************************************************************//**
+Calculate the ranking of the document. */
+static
+void
+fts_query_calculate_ranking(
+/*========================*/
+ const fts_query_t* query, /*!< in: query state */
+ fts_ranking_t* ranking) /*!< in: Document to rank */
+{
+ ulint pos = 0;
+ fts_string_t word;
+
+ /* At this stage, ranking->rank should not exceed the 1.0
+ bound */
+ ut_ad(ranking->rank <= 1.0 && ranking->rank >= -1.0);
+ ut_ad(rbt_size(query->word_map) == query->word_vector->size());
+
+ while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
+ int ret;
+ ib_rbt_bound_t parent;
+ double weight;
+ fts_doc_freq_t* doc_freq;
+ fts_word_freq_t* word_freq;
+
+ ret = rbt_search(query->word_freqs, &parent, &word);
+
+ /* It must exist. */
+ ut_a(ret == 0);
+
+ word_freq = rbt_value(fts_word_freq_t, parent.last);
+
+ ret = rbt_search(
+ word_freq->doc_freqs, &parent, &ranking->doc_id);
+
+ /* It must exist. */
+ ut_a(ret == 0);
+
+ doc_freq = rbt_value(fts_doc_freq_t, parent.last);
+
+ weight = (double) doc_freq->freq * word_freq->idf;
+
+ ranking->rank += (fts_rank_t) (weight * word_freq->idf);
+ }
+}
+
+/*****************************************************************//**
+Add ranking to the result set. */
+static
+void
+fts_query_add_ranking(
+/*==================*/
+ fts_query_t* query, /*!< in: query state */
+ ib_rbt_t* ranking_tree, /*!< in: ranking tree */
+ const fts_ranking_t* new_ranking) /*!< in: ranking of a document */
+{
+ ib_rbt_bound_t parent;
+
+ /* Lookup the ranking in our rb tree and add if it doesn't exist. */
+ if (rbt_search(ranking_tree, &parent, new_ranking) == 0) {
+ fts_ranking_t* ranking;
+
+ ranking = rbt_value(fts_ranking_t, parent.last);
+
+ ranking->rank += new_ranking->rank;
+
+ ut_a(ranking->words == NULL);
+ } else {
+ rbt_add_node(ranking_tree, &parent, new_ranking);
+
+ query->total_size += SIZEOF_RBT_NODE_ADD
+ + sizeof(fts_ranking_t);
+ }
+}
+
+/*****************************************************************//**
+Retrieve the FTS Relevance Ranking result for doc with doc_id
+@return the relevance ranking value, 0 if no ranking value
+present. */
+float
+fts_retrieve_ranking(
+/*=================*/
+ fts_result_t* result, /*!< in: FTS result structure */
+ doc_id_t doc_id) /*!< in: doc_id of the item to retrieve */
+{
+ ib_rbt_bound_t parent;
+ fts_ranking_t new_ranking;
+
+ DBUG_ENTER("fts_retrieve_ranking");
+
+ if (!result || !result->rankings_by_id) {
+ DBUG_RETURN(0);
+ }
+
+ new_ranking.doc_id = doc_id;
+
+ /* Lookup the ranking in our rb tree */
+ if (rbt_search(result->rankings_by_id, &parent, &new_ranking) == 0) {
+ fts_ranking_t* ranking;
+
+ ranking = rbt_value(fts_ranking_t, parent.last);
+
+ DBUG_RETURN(ranking->rank);
+ }
+
+ DBUG_RETURN(0);
+}
+
+/*****************************************************************//**
+Create the result and copy the data to it. */
+static
+fts_result_t*
+fts_query_prepare_result(
+/*=====================*/
+ fts_query_t* query, /*!< in: Query state */
+ fts_result_t* result) /*!< in: result this can contain
+ data from a previous search on
+ another FTS index */
+{
+ const ib_rbt_node_t* node;
+ bool result_is_null = false;
+
+ DBUG_ENTER("fts_query_prepare_result");
+
+ if (result == NULL) {
+ result = static_cast<fts_result_t*>(
+ ut_zalloc_nokey(sizeof(*result)));
+
+ result->rankings_by_id = rbt_create(
+ sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+
+ query->total_size += sizeof(fts_result_t) + SIZEOF_RBT_CREATE;
+ result_is_null = true;
+ }
+
+ if (query->flags == FTS_OPT_RANKING) {
+ fts_word_freq_t* word_freq;
+ ulint size = ib_vector_size(query->deleted->doc_ids);
+ doc_id_t* updates =
+ (doc_id_t*) query->deleted->doc_ids->data;
+
+ node = rbt_first(query->word_freqs);
+ ut_ad(node);
+ word_freq = rbt_value(fts_word_freq_t, node);
+
+ for (node = rbt_first(word_freq->doc_freqs);
+ node;
+ node = rbt_next(word_freq->doc_freqs, node)) {
+ fts_doc_freq_t* doc_freq;
+ fts_ranking_t ranking;
+
+ doc_freq = rbt_value(fts_doc_freq_t, node);
+
+ /* Don't put deleted docs into result */
+ if (fts_bsearch(updates, 0, static_cast<int>(size),
+ doc_freq->doc_id) >= 0) {
+ /* one less matching doc count */
+ --word_freq->doc_count;
+ continue;
+ }
+
+ ranking.doc_id = doc_freq->doc_id;
+ ranking.rank = static_cast<fts_rank_t>(doc_freq->freq);
+ ranking.words = NULL;
+
+ fts_query_add_ranking(query, result->rankings_by_id,
+ &ranking);
+
+ if (query->total_size > fts_result_cache_limit) {
+ query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
+ fts_query_free_result(result);
+ DBUG_RETURN(NULL);
+ }
+ }
+
+ /* Calculate IDF only after we exclude the deleted items */
+ fts_query_calculate_idf(query);
+
+ node = rbt_first(query->word_freqs);
+ word_freq = rbt_value(fts_word_freq_t, node);
+
+ /* Calculate the ranking for each doc */
+ for (node = rbt_first(result->rankings_by_id);
+ node != NULL;
+ node = rbt_next(result->rankings_by_id, node)) {
+
+ fts_ranking_t* ranking;
+
+ ranking = rbt_value(fts_ranking_t, node);
+
+ ranking->rank = static_cast<fts_rank_t>(
+ ranking->rank * word_freq->idf * word_freq->idf);
+ }
+
+ DBUG_RETURN(result);
+ }
+
+ ut_a(rbt_size(query->doc_ids) > 0);
+
+ for (node = rbt_first(query->doc_ids);
+ node;
+ node = rbt_next(query->doc_ids, node)) {
+
+ fts_ranking_t* ranking;
+
+ ranking = rbt_value(fts_ranking_t, node);
+ fts_query_calculate_ranking(query, ranking);
+
+ // FIXME: I think we may requre this information to improve the
+ // ranking of doc ids which have more word matches from
+ // different FTS indexes.
+
+ /* We don't need these anymore free the resources. */
+ ranking->words = NULL;
+
+ if (!result_is_null) {
+ fts_query_add_ranking(query, result->rankings_by_id, ranking);
+
+ if (query->total_size > fts_result_cache_limit) {
+ query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
+ fts_query_free_result(result);
+ DBUG_RETURN(NULL);
+ }
+ }
+ }
+
+ if (result_is_null) {
+ /* Use doc_ids directly */
+ rbt_free(result->rankings_by_id);
+ result->rankings_by_id = query->doc_ids;
+ query->doc_ids = NULL;
+ }
+
+ DBUG_RETURN(result);
+}
+
+/*****************************************************************//**
+Get the result of the query. Calculate the similarity coefficient. */
+static
+fts_result_t*
+fts_query_get_result(
+/*=================*/
+ fts_query_t* query, /*!< in: query instance */
+ fts_result_t* result) /*!< in: result */
+{
+ DBUG_ENTER("fts_query_get_result");
+
+ if (rbt_size(query->doc_ids) > 0 || query->flags == FTS_OPT_RANKING) {
+ /* Copy the doc ids to the result. */
+ result = fts_query_prepare_result(query, result);
+ } else {
+ /* Create an empty result instance. */
+ result = static_cast<fts_result_t*>(
+ ut_zalloc_nokey(sizeof(*result)));
+ }
+
+ DBUG_RETURN(result);
+}
+
+/*****************************************************************//**
+FTS Query free resources and reset. */
+static
+void
+fts_query_free(
+/*===========*/
+ fts_query_t* query) /*!< in: query instance to free*/
+{
+
+ if (query->read_nodes_graph) {
+ fts_que_graph_free(query->read_nodes_graph);
+ }
+
+ if (query->root) {
+ fts_ast_free_node(query->root);
+ }
+
+ if (query->deleted) {
+ fts_doc_ids_free(query->deleted);
+ }
+
+ if (query->intersection) {
+ fts_query_free_doc_ids(query, query->intersection);
+ }
+
+ if (query->doc_ids) {
+ fts_query_free_doc_ids(query, query->doc_ids);
+ }
+
+ if (query->word_freqs) {
+ const ib_rbt_node_t* node;
+
+ /* We need to free any instances of fts_doc_freq_t that we
+ may have allocated. */
+ for (node = rbt_first(query->word_freqs);
+ node;
+ node = rbt_next(query->word_freqs, node)) {
+
+ fts_word_freq_t* word_freq;
+
+ word_freq = rbt_value(fts_word_freq_t, node);
+
+ /* We need to cast away the const. */
+ rbt_free(word_freq->doc_freqs);
+ }
+
+ rbt_free(query->word_freqs);
+ }
+
+ if (query->wildcard_words != NULL) {
+ rbt_free(query->wildcard_words);
+ }
+
+ ut_a(!query->intersection);
+
+ if (query->word_map) {
+ rbt_free(query->word_map);
+ }
+
+ if (query->word_vector != NULL) {
+ UT_DELETE(query->word_vector);
+ }
+
+ if (query->heap) {
+ mem_heap_free(query->heap);
+ }
+
+ memset(query, 0, sizeof(*query));
+}
+
+/*****************************************************************//**
+Parse the query using flex/bison or plugin parser.
+@return parse tree node. */
+static
+fts_ast_node_t*
+fts_query_parse(
+/*============*/
+ fts_query_t* query, /*!< in: query instance */
+ byte* query_str, /*!< in: query string */
+ ulint query_len) /*!< in: query string length */
+{
+ int error;
+ fts_ast_state_t state;
+ bool mode = query->boolean_mode;
+ DBUG_ENTER("fts_query_parse");
+
+ memset(&state, 0x0, sizeof(state));
+
+ state.charset = query->fts_index_table.charset;
+
+ DBUG_EXECUTE_IF("fts_instrument_query_disable_parser",
+ query->parser = NULL;);
+
+ if (query->parser) {
+ state.root = state.cur_node =
+ fts_ast_create_node_list(&state, NULL);
+ error = fts_parse_by_parser(mode, query_str, query_len,
+ query->parser, &state);
+ } else {
+ /* Setup the scanner to use, this depends on the mode flag. */
+ state.lexer = fts_lexer_create(mode, query_str, query_len);
+ state.charset = query->fts_index_table.charset;
+ error = fts_parse(&state);
+ fts_lexer_free(state.lexer);
+ state.lexer = NULL;
+ }
+
+ /* Error during parsing ? */
+ if (error) {
+ /* Free the nodes that were allocated during parsing. */
+ fts_ast_state_free(&state);
+ } else {
+ query->root = state.root;
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print) && query->root) {
+ fts_ast_node_print(query->root);
+ }
+ }
+
+ DBUG_RETURN(state.root);
+}
+
+/*******************************************************************//**
+FTS Query optimization
+Set FTS_OPT_RANKING if it is a simple term query */
+static
+void
+fts_query_can_optimize(
+/*===================*/
+ fts_query_t* query, /*!< in/out: query instance */
+ uint flags) /*!< In: FTS search mode */
+{
+ fts_ast_node_t* node = query->root;
+
+ if (flags & FTS_EXPAND) {
+ return;
+ }
+
+ /* Check if it has only a term without oper */
+ ut_ad(node->type == FTS_AST_LIST);
+ node = node->list.head;
+ if (node != NULL && node->type == FTS_AST_TERM && node->next == NULL) {
+ query->flags = FTS_OPT_RANKING;
+ }
+}
+
+/** FTS Query entry point.
+@param[in,out] trx transaction
+@param[in] index fts index to search
+@param[in] flags FTS search mode
+@param[in] query_str FTS query
+@param[in] query_len FTS query string len in bytes
+@param[in,out] result result doc ids
+@return DB_SUCCESS if successful otherwise error code */
+dberr_t
+fts_query(
+ trx_t* trx,
+ dict_index_t* index,
+ uint flags,
+ const byte* query_str,
+ ulint query_len,
+ fts_result_t** result)
+{
+ fts_query_t query;
+ dberr_t error = DB_SUCCESS;
+ byte* lc_query_str;
+ ulint lc_query_str_len;
+ ulint result_len;
+ bool boolean_mode;
+ trx_t* query_trx; /* FIXME: use provided trx */
+ CHARSET_INFO* charset;
+ ulint start_time_ms;
+ bool will_be_ignored = false;
+
+ boolean_mode = flags & FTS_BOOL;
+
+ *result = NULL;
+ memset(&query, 0x0, sizeof(query));
+ query_trx = trx_create();
+ query_trx->op_info = "FTS query";
+
+ start_time_ms = ut_time_ms();
+
+ query.trx = query_trx;
+ query.index = index;
+ query.boolean_mode = boolean_mode;
+ query.deleted = fts_doc_ids_create();
+ query.cur_node = NULL;
+
+ query.fts_common_table.type = FTS_COMMON_TABLE;
+ query.fts_common_table.table_id = index->table->id;
+ query.fts_common_table.table = index->table;
+
+ charset = fts_index_get_charset(index);
+
+ query.fts_index_table.type = FTS_INDEX_TABLE;
+ query.fts_index_table.index_id = index->id;
+ query.fts_index_table.table_id = index->table->id;
+ query.fts_index_table.charset = charset;
+ query.fts_index_table.table = index->table;
+
+ query.word_map = rbt_create_arg_cmp(
+ sizeof(fts_string_t), innobase_fts_text_cmp, (void*)charset);
+ query.word_vector = UT_NEW_NOKEY(word_vector_t());
+ query.error = DB_SUCCESS;
+
+ /* Setup the RB tree that will be used to collect per term
+ statistics. */
+ query.word_freqs = rbt_create_arg_cmp(
+ sizeof(fts_word_freq_t), innobase_fts_text_cmp,
+ (void*) charset);
+
+ if (flags & FTS_EXPAND) {
+ query.wildcard_words = rbt_create_arg_cmp(
+ sizeof(fts_string_t), innobase_fts_text_cmp, (void *)charset);
+ }
+
+ query.total_size += SIZEOF_RBT_CREATE;
+
+ query.total_docs = dict_table_get_n_rows(index->table);
+
+ query.fts_common_table.suffix = "DELETED";
+
+ /* Read the deleted doc_ids, we need these for filtering. */
+ error = fts_table_fetch_doc_ids(
+ NULL, &query.fts_common_table, query.deleted);
+
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ query.fts_common_table.suffix = "DELETED_CACHE";
+
+ error = fts_table_fetch_doc_ids(
+ NULL, &query.fts_common_table, query.deleted);
+
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ /* Get the deleted doc ids that are in the cache. */
+ fts_cache_append_deleted_doc_ids(
+ index->table->fts->cache, query.deleted->doc_ids);
+ DEBUG_SYNC_C("fts_deleted_doc_ids_append");
+
+ /* Sort the vector so that we can do a binary search over the ids. */
+ ib_vector_sort(query.deleted->doc_ids, fts_doc_id_cmp);
+
+ /* Convert the query string to lower case before parsing. We own
+ the ut_malloc'ed result and so remember to free it before return. */
+
+ lc_query_str_len = query_len * charset->casedn_multiply + 1;
+ lc_query_str = static_cast<byte*>(ut_malloc_nokey(lc_query_str_len));
+
+ /* For binary collations, a case sensitive search is
+ performed. Hence don't convert to lower case. */
+ if (my_binary_compare(charset)) {
+ memcpy(lc_query_str, query_str, query_len);
+ lc_query_str[query_len]= 0;
+ result_len= query_len;
+ } else {
+ result_len = innobase_fts_casedn_str(
+ charset, (char*)( query_str), query_len,
+ (char*)(lc_query_str), lc_query_str_len);
+ }
+
+ ut_ad(result_len < lc_query_str_len);
+
+ lc_query_str[result_len] = 0;
+
+ query.heap = mem_heap_create(128);
+
+ /* Create the rb tree for the doc id (current) set. */
+ query.doc_ids = rbt_create(
+ sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
+ query.parser = index->parser;
+
+ query.total_size += SIZEOF_RBT_CREATE;
+
+ /* Parse the input query string. */
+ if (fts_query_parse(&query, lc_query_str, result_len)) {
+ fts_ast_node_t* ast = query.root;
+ ast->trx = trx;
+
+ /* Optimize query to check if it's a single term */
+ fts_query_can_optimize(&query, flags);
+
+ DBUG_EXECUTE_IF("fts_instrument_result_cache_limit",
+ fts_result_cache_limit = 2048;
+ );
+
+ /* Traverse the Abstract Syntax Tree (AST) and execute
+ the query. */
+ query.error = fts_ast_visit(
+ FTS_NONE, ast, fts_query_visitor,
+ &query, &will_be_ignored);
+ if (query.error == DB_INTERRUPTED) {
+ error = DB_INTERRUPTED;
+ ut_free(lc_query_str);
+ goto func_exit;
+ }
+
+ /* If query expansion is requested, extend the search
+ with first search pass result */
+ if (query.error == DB_SUCCESS && (flags & FTS_EXPAND)) {
+ query.error = fts_expand_query(index, &query);
+ }
+
+ /* Calculate the inverse document frequency of the terms. */
+ if (query.error == DB_SUCCESS
+ && query.flags != FTS_OPT_RANKING) {
+ fts_query_calculate_idf(&query);
+ }
+
+ /* Copy the result from the query state, so that we can
+ return it to the caller. */
+ if (query.error == DB_SUCCESS) {
+ *result = fts_query_get_result(&query, *result);
+ }
+
+ error = query.error;
+ } else {
+ /* still return an empty result set */
+ *result = static_cast<fts_result_t*>(
+ ut_zalloc_nokey(sizeof(**result)));
+ }
+
+ if (trx_is_interrupted(trx)) {
+ error = DB_INTERRUPTED;
+ ut_free(lc_query_str);
+ if (*result) {
+ fts_query_free_result(*result);
+ }
+ goto func_exit;
+ }
+
+ ut_free(lc_query_str);
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print) && (*result)) {
+ ulint diff_time = ut_time_ms() - start_time_ms;
+
+ ib::info() << "FTS Search Processing time: "
+ << diff_time / 1000 << " secs: " << diff_time % 1000
+ << " millisec: row(s) "
+ << ((*result)->rankings_by_id
+ ? lint(rbt_size((*result)->rankings_by_id))
+ : -1);
+
+ /* Log memory consumption & result size */
+ ib::info() << "Full Search Memory: " << query.total_size
+ << " (bytes), Row: "
+ << ((*result)->rankings_by_id
+ ? rbt_size((*result)->rankings_by_id)
+ : 0)
+ << ".";
+ }
+
+func_exit:
+ fts_query_free(&query);
+
+ query_trx->free();
+
+ return(error);
+}
+
+/*****************************************************************//**
+FTS Query free result, returned by fts_query(). */
+void
+fts_query_free_result(
+/*==================*/
+ fts_result_t* result) /*!< in: result instance to free.*/
+{
+ if (result) {
+ if (result->rankings_by_id != NULL) {
+ rbt_free(result->rankings_by_id);
+ result->rankings_by_id = NULL;
+ }
+ if (result->rankings_by_rank != NULL) {
+ rbt_free(result->rankings_by_rank);
+ result->rankings_by_rank = NULL;
+ }
+
+ ut_free(result);
+ result = NULL;
+ }
+}
+
+/*****************************************************************//**
+FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
+void
+fts_query_sort_result_on_rank(
+/*==========================*/
+ fts_result_t* result) /*!< out: result instance to sort.*/
+{
+ const ib_rbt_node_t* node;
+ ib_rbt_t* ranked;
+
+ ut_a(result->rankings_by_id != NULL);
+ if (result->rankings_by_rank) {
+ rbt_free(result->rankings_by_rank);
+ }
+
+ ranked = rbt_create(sizeof(fts_ranking_t), fts_query_compare_rank);
+
+ /* We need to free any instances of fts_doc_freq_t that we
+ may have allocated. */
+ for (node = rbt_first(result->rankings_by_id);
+ node;
+ node = rbt_next(result->rankings_by_id, node)) {
+
+ fts_ranking_t* ranking;
+
+ ranking = rbt_value(fts_ranking_t, node);
+
+ ut_a(ranking->words == NULL);
+
+ rbt_insert(ranked, ranking, ranking);
+ }
+
+ /* Reset the current node too. */
+ result->current = NULL;
+ result->rankings_by_rank = ranked;
+}
+
+/*******************************************************************//**
+A debug function to print result doc_id set. */
+static
+void
+fts_print_doc_id(
+/*=============*/
+ fts_query_t* query) /*!< in : tree that stores doc_ids.*/
+{
+ const ib_rbt_node_t* node;
+
+ /* Iterate each member of the doc_id set */
+ for (node = rbt_first(query->doc_ids);
+ node;
+ node = rbt_next(query->doc_ids, node)) {
+ fts_ranking_t* ranking;
+ ranking = rbt_value(fts_ranking_t, node);
+
+ ib::info() << "doc_ids info, doc_id: " << ranking->doc_id;
+
+ ulint pos = 0;
+ fts_string_t word;
+
+ while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
+ ib::info() << "doc_ids info, value: " << word.f_str;
+ }
+ }
+}
+
+/*************************************************************//**
+This function implements a simple "blind" query expansion search:
+words in documents found in the first search pass will be used as
+search arguments to search the document again, thus "expand"
+the search result set.
+@return DB_SUCCESS if success, otherwise the error code */
+static MY_ATTRIBUTE((nonnull, warn_unused_result))
+dberr_t
+fts_expand_query(
+/*=============*/
+ dict_index_t* index, /*!< in: FTS index to search */
+ fts_query_t* query) /*!< in: FTS query instance */
+{
+ const ib_rbt_node_t* node;
+ const ib_rbt_node_t* token_node;
+ fts_doc_t result_doc;
+ dberr_t error = DB_SUCCESS;
+ const fts_index_cache_t*index_cache;
+
+ /* If no doc is found in first search pass, return */
+ if (!rbt_size(query->doc_ids)) {
+ return(error);
+ }
+
+ /* Init "result_doc", to hold words from the first search pass */
+ fts_doc_init(&result_doc);
+
+ rw_lock_x_lock(&index->table->fts->cache->lock);
+ index_cache = fts_find_index_cache(index->table->fts->cache, index);
+ rw_lock_x_unlock(&index->table->fts->cache->lock);
+
+ ut_a(index_cache);
+
+ result_doc.tokens = rbt_create_arg_cmp(
+ sizeof(fts_token_t), innobase_fts_text_cmp,
+ (void*) index_cache->charset);
+
+ result_doc.charset = index_cache->charset;
+ result_doc.parser = index_cache->index->parser;
+
+ query->total_size += SIZEOF_RBT_CREATE;
+
+ if (UNIV_UNLIKELY(fts_enable_diag_print)) {
+ fts_print_doc_id(query);
+ }
+
+ for (node = rbt_first(query->doc_ids);
+ node;
+ node = rbt_next(query->doc_ids, node)) {
+
+ fts_ranking_t* ranking;
+ ulint prev_token_size;
+ ulint estimate_size;
+
+ prev_token_size = rbt_size(result_doc.tokens);
+
+ ranking = rbt_value(fts_ranking_t, node);
+
+ /* Fetch the documents with the doc_id from the
+ result of first seach pass. Since we do not
+ store document-to-word mapping, we need to
+ fetch the original document and parse them.
+ Future optimization could be done here if we
+ support some forms of document-to-word mapping */
+ fts_doc_fetch_by_doc_id(NULL, ranking->doc_id, index,
+ FTS_FETCH_DOC_BY_ID_EQUAL,
+ fts_query_expansion_fetch_doc,
+ &result_doc);
+
+ /* Estimate memory used, see fts_process_token and fts_token_t.
+ We ignore token size here. */
+ estimate_size = (rbt_size(result_doc.tokens) - prev_token_size)
+ * (SIZEOF_RBT_NODE_ADD + sizeof(fts_token_t)
+ + sizeof(ib_vector_t) + sizeof(ulint) * 32);
+ query->total_size += estimate_size;
+
+ if (query->total_size > fts_result_cache_limit) {
+ error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
+ goto func_exit;
+ }
+ }
+
+ /* Remove words that have already been searched in the first pass */
+ for (ulint i = 0; i < query->word_vector->size(); i++) {
+ fts_string_t word = query->word_vector->at(i);
+ ib_rbt_bound_t parent;
+
+ if (query->wildcard_words
+ && rbt_search(query->wildcard_words, &parent, &word) == 0) {
+ /* If it's a wildcard word, remove words having
+ it as prefix. */
+ while (rbt_search_cmp(result_doc.tokens,
+ &parent, &word, NULL,
+ innobase_fts_text_cmp_prefix)
+ == 0) {
+ ut_free(rbt_remove_node(result_doc.tokens,
+ parent.last));
+ }
+ } else {
+ /* We don't check return value, because the word may
+ have been deleted by a previous wildcard word as its
+ prefix, e.g. ('g * good'). */
+ rbt_delete(result_doc.tokens, &word);
+ }
+ }
+
+ /* Search the table the second time with expanded search list */
+ for (token_node = rbt_first(result_doc.tokens);
+ token_node;
+ token_node = rbt_next(result_doc.tokens, token_node)) {
+ fts_token_t* mytoken;
+ mytoken = rbt_value(fts_token_t, token_node);
+
+ /* '%' in the end is treated as prefix search,
+ it can cause assert failure, so we skip it. */
+ if (mytoken->text.f_str[mytoken->text.f_len - 1] == '%') {
+ continue;
+ }
+
+ ut_ad(mytoken->text.f_str[mytoken->text.f_len] == 0);
+ fts_query_add_word_freq(query, &mytoken->text);
+ error = fts_query_union(query, &mytoken->text);
+
+ if (error != DB_SUCCESS) {
+ break;
+ }
+ }
+
+func_exit:
+ fts_doc_free(&result_doc);
+
+ return(error);
+}
+/*************************************************************//**
+This function finds documents that contain all words in a
+phrase or proximity search. And if proximity search, verify
+the words are close enough to each other, as in specified distance.
+This function is called for phrase and proximity search.
+@return TRUE if documents are found, FALSE if otherwise */
+static
+ibool
+fts_phrase_or_proximity_search(
+/*===========================*/
+ fts_query_t* query, /*!< in/out: query instance.
+ query->doc_ids might be instantiated
+ with qualified doc IDs */
+ ib_vector_t* tokens) /*!< in: Tokens contain words */
+{
+ ulint n_matched;
+ ulint i;
+ ibool matched = FALSE;
+ ulint num_token = ib_vector_size(tokens);
+ fts_match_t* match[MAX_PROXIMITY_ITEM];
+ ibool end_list = FALSE;
+
+ /* Number of matched documents for the first token */
+ n_matched = ib_vector_size(query->match_array[0]);
+
+ /* We have a set of match list for each word, we shall
+ walk through the list and find common documents that
+ contain all the matching words. */
+ for (i = 0; i < n_matched; i++) {
+ ulint j;
+ ulint k = 0;
+ fts_proximity_t qualified_pos;
+
+ match[0] = static_cast<fts_match_t*>(
+ ib_vector_get(query->match_array[0], i));
+
+ /* For remaining match list for the token(word), we
+ try to see if there is a document with the same
+ doc id */
+ for (j = 1; j < num_token; j++) {
+ match[j] = static_cast<fts_match_t*>(
+ ib_vector_get(query->match_array[j], k));
+
+ while (match[j]->doc_id < match[0]->doc_id
+ && k < ib_vector_size(query->match_array[j])) {
+ match[j] = static_cast<fts_match_t*>(
+ ib_vector_get(
+ query->match_array[j], k));
+ k++;
+ }
+
+ if (match[j]->doc_id > match[0]->doc_id) {
+ /* no match */
+ if (query->flags & FTS_PHRASE) {
+ match[0]->doc_id = 0;
+ }
+ break;
+ }
+
+ if (k == ib_vector_size(query->match_array[j])) {
+ end_list = TRUE;
+
+ if (query->flags & FTS_PHRASE) {
+ ulint s;
+ /* Since i is the last doc id in the
+ match_array[j], remove all doc ids > i
+ from the match_array[0]. */
+ fts_match_t* match_temp;
+ for (s = i + 1; s < n_matched; s++) {
+ match_temp = static_cast<
+ fts_match_t*>(ib_vector_get(
+ query->match_array[0], s));
+ match_temp->doc_id = 0;
+ }
+
+ if (match[j]->doc_id !=
+ match[0]->doc_id) {
+ /* no match */
+ match[0]->doc_id = 0;
+ }
+ }
+
+ if (match[j]->doc_id != match[0]->doc_id) {
+ goto func_exit;
+ }
+ }
+
+ /* FIXME: A better solution will be a counter array
+ remember each run's last position. So we don't
+ reset it here very time */
+ k = 0;
+ }
+
+ if (j != num_token) {
+ continue;
+ }
+
+ /* For this matching doc, we need to further
+ verify whether the words in the doc are close
+ to each other, and within the distance specified
+ in the proximity search */
+ if (query->flags & FTS_PHRASE) {
+ matched = TRUE;
+ } else if (fts_proximity_get_positions(
+ match, num_token, ULINT_MAX, &qualified_pos)) {
+
+ /* Fetch the original documents and count the
+ words in between matching words to see that is in
+ specified distance */
+ if (fts_query_is_in_proximity_range(
+ query, match, &qualified_pos)) {
+ /* If so, mark we find a matching doc */
+ query->error = fts_query_process_doc_id(
+ query, match[0]->doc_id, 0);
+ if (query->error != DB_SUCCESS) {
+ matched = FALSE;
+ goto func_exit;
+ }
+
+ matched = TRUE;
+ for (ulint z = 0; z < num_token; z++) {
+ fts_string_t* token;
+ token = static_cast<fts_string_t*>(
+ ib_vector_get(tokens, z));
+ fts_query_add_word_to_document(
+ query, match[0]->doc_id, token);
+ }
+ }
+ }
+
+ if (end_list) {
+ break;
+ }
+ }
+
+func_exit:
+ return(matched);
+}
+
+/*************************************************************//**
+This function checks whether words in result documents are close to
+each other (within proximity range as specified by "distance").
+If "distance" is MAX_ULINT, then it will find all combinations of
+positions of matching words and store min and max positions
+in the "qualified_pos" for later verification.
+@return true if words are close to each other, false if otherwise */
+static
+bool
+fts_proximity_get_positions(
+/*========================*/
+ fts_match_t** match, /*!< in: query instance */
+ ulint num_match, /*!< in: number of matching
+ items */
+ ulint distance, /*!< in: distance value
+ for proximity search */
+ fts_proximity_t* qualified_pos) /*!< out: the position info
+ records ranges containing
+ all matching words. */
+{
+ ulint i;
+ ulint idx[MAX_PROXIMITY_ITEM];
+ ulint num_pos[MAX_PROXIMITY_ITEM];
+ ulint min_idx;
+
+ qualified_pos->n_pos = 0;
+
+ ut_a(num_match <= MAX_PROXIMITY_ITEM);
+
+ /* Each word could appear multiple times in a doc. So
+ we need to walk through each word's position list, and find
+ closest distance between different words to see if
+ they are in the proximity distance. */
+
+ /* Assume each word's position list is sorted, we
+ will just do a walk through to all words' lists
+ similar to a the merge phase of a merge sort */
+ for (i = 0; i < num_match; i++) {
+ /* idx is the current position we are checking
+ for a particular word */
+ idx[i] = 0;
+
+ /* Number of positions for this word */
+ num_pos[i] = ib_vector_size(match[i]->positions);
+ }
+
+ /* Start with the first word */
+ min_idx = 0;
+
+ while (idx[min_idx] < num_pos[min_idx]) {
+ ulint position[MAX_PROXIMITY_ITEM];
+ ulint min_pos = ULINT_MAX;
+ ulint max_pos = 0;
+
+ /* Check positions in each word position list, and
+ record the max/min position */
+ for (i = 0; i < num_match; i++) {
+ position[i] = *(ulint*) ib_vector_get_const(
+ match[i]->positions, idx[i]);
+
+ if (position[i] == ULINT_UNDEFINED) {
+ break;
+ }
+
+ if (position[i] < min_pos) {
+ min_pos = position[i];
+ min_idx = i;
+ }
+
+ if (position[i] > max_pos) {
+ max_pos = position[i];
+ }
+ }
+
+ /* If max and min position are within range, we
+ find a good match */
+ if (max_pos - min_pos <= distance
+ && (i >= num_match || position[i] != ULINT_UNDEFINED)) {
+ /* The charset has variable character
+ length encoding, record the min_pos and
+ max_pos, we will need to verify the actual
+ number of characters */
+ qualified_pos->min_pos.push_back(min_pos);
+ qualified_pos->max_pos.push_back(max_pos);
+ qualified_pos->n_pos++;
+ }
+
+ /* Otherwise, move to the next position is the
+ list for the word with the smallest position */
+ idx[min_idx]++;
+ }
+
+ return(qualified_pos->n_pos != 0);
+}
diff --git a/storage/innobase/fts/fts0sql.cc b/storage/innobase/fts/fts0sql.cc
new file mode 100644
index 00000000..180500f6
--- /dev/null
+++ b/storage/innobase/fts/fts0sql.cc
@@ -0,0 +1,258 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2019, 2020, MariaDB Corporation.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file fts/fts0sql.cc
+Full Text Search functionality.
+
+Created 2007-03-27 Sunny Bains
+*******************************************************/
+
+#include "que0que.h"
+#include "trx0roll.h"
+#include "pars0pars.h"
+#include "dict0dict.h"
+#include "fts0types.h"
+#include "fts0priv.h"
+
+/** SQL statements for creating the ancillary FTS tables. */
+
+/** Preamble to all SQL statements. */
+static const char* fts_sql_begin=
+ "PROCEDURE P() IS\n";
+
+/** Postamble to non-committing SQL statements. */
+static const char* fts_sql_end=
+ "\n"
+ "END;\n";
+
+/******************************************************************//**
+Get the table id.
+@return number of bytes written */
+int
+fts_get_table_id(
+/*=============*/
+ const fts_table_t*
+ fts_table, /*!< in: FTS Auxiliary table */
+ char* table_id) /*!< out: table id, must be at least
+ FTS_AUX_MIN_TABLE_ID_LENGTH bytes
+ long */
+{
+ int len;
+
+ ut_a(fts_table->table != NULL);
+
+ switch (fts_table->type) {
+ case FTS_COMMON_TABLE:
+ len = fts_write_object_id(fts_table->table_id, table_id);
+ break;
+
+ case FTS_INDEX_TABLE:
+
+ len = fts_write_object_id(fts_table->table_id, table_id);
+
+ table_id[len] = '_';
+ ++len;
+ table_id += len;
+
+ len += fts_write_object_id(fts_table->index_id, table_id);
+ break;
+
+ default:
+ ut_error;
+ }
+
+ ut_a(len >= 16);
+ ut_a(len < FTS_AUX_MIN_TABLE_ID_LENGTH);
+
+ return(len);
+}
+
+/** Construct the name of an internal FTS table for the given table.
+@param[in] fts_table metadata on fulltext-indexed table
+@param[in] dict_locked whether dict_sys.mutex is being held
+@return the prefix, must be freed with ut_free() */
+char* fts_get_table_name_prefix(const fts_table_t* fts_table)
+{
+ char table_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
+ const size_t table_id_len = size_t(fts_get_table_id(fts_table,
+ table_id)) + 1;
+ mutex_enter(&dict_sys.mutex);
+ /* Include the separator as well. */
+ const size_t dbname_len = fts_table->table->name.dblen() + 1;
+ ut_ad(dbname_len > 1);
+ const size_t prefix_name_len = dbname_len + 4 + table_id_len;
+ char* prefix_name = static_cast<char*>(
+ ut_malloc_nokey(prefix_name_len));
+ memcpy(prefix_name, fts_table->table->name.m_name, dbname_len);
+ mutex_exit(&dict_sys.mutex);
+ memcpy(prefix_name + dbname_len, "FTS_", 4);
+ memcpy(prefix_name + dbname_len + 4, table_id, table_id_len);
+ return prefix_name;
+}
+
+/** Construct the name of an internal FTS table for the given table.
+@param[in] fts_table metadata on fulltext-indexed table
+@param[out] table_name a name up to MAX_FULL_NAME_LEN
+@param[in] dict_locked whether dict_sys.mutex is being held */
+void fts_get_table_name(const fts_table_t* fts_table, char* table_name,
+ bool dict_locked)
+{
+ if (!dict_locked) {
+ mutex_enter(&dict_sys.mutex);
+ }
+ ut_ad(mutex_own(&dict_sys.mutex));
+ /* Include the separator as well. */
+ const size_t dbname_len = fts_table->table->name.dblen() + 1;
+ ut_ad(dbname_len > 1);
+ memcpy(table_name, fts_table->table->name.m_name, dbname_len);
+ if (!dict_locked) {
+ mutex_exit(&dict_sys.mutex);
+ }
+ memcpy(table_name += dbname_len, "FTS_", 4);
+ table_name += 4;
+ table_name += fts_get_table_id(fts_table, table_name);
+ *table_name++ = '_';
+ strcpy(table_name, fts_table->suffix);
+}
+
+/******************************************************************//**
+Parse an SQL string.
+@return query graph */
+que_t*
+fts_parse_sql(
+/*==========*/
+ fts_table_t* fts_table, /*!< in: FTS auxiliarry table info */
+ pars_info_t* info, /*!< in: info struct, or NULL */
+ const char* sql) /*!< in: SQL string to evaluate */
+{
+ char* str;
+ que_t* graph;
+ ibool dict_locked;
+
+ str = ut_str3cat(fts_sql_begin, sql, fts_sql_end);
+
+ dict_locked = (fts_table && fts_table->table->fts
+ && fts_table->table->fts->dict_locked);
+
+ if (!dict_locked) {
+ ut_ad(!mutex_own(&dict_sys.mutex));
+
+ /* The InnoDB SQL parser is not re-entrant. */
+ mutex_enter(&dict_sys.mutex);
+ }
+
+ graph = pars_sql(info, str);
+ ut_a(graph);
+
+ if (!dict_locked) {
+ mutex_exit(&dict_sys.mutex);
+ }
+
+ ut_free(str);
+
+ return(graph);
+}
+
+/******************************************************************//**
+Parse an SQL string.
+@return query graph */
+que_t*
+fts_parse_sql_no_dict_lock(
+/*=======================*/
+ pars_info_t* info, /*!< in: info struct, or NULL */
+ const char* sql) /*!< in: SQL string to evaluate */
+{
+ char* str;
+ que_t* graph;
+
+ ut_ad(mutex_own(&dict_sys.mutex));
+
+ str = ut_str3cat(fts_sql_begin, sql, fts_sql_end);
+
+ graph = pars_sql(info, str);
+ ut_a(graph);
+
+ ut_free(str);
+
+ return(graph);
+}
+
+/******************************************************************//**
+Evaluate an SQL query graph.
+@return DB_SUCCESS or error code */
+dberr_t
+fts_eval_sql(
+/*=========*/
+ trx_t* trx, /*!< in: transaction */
+ que_t* graph) /*!< in: Query graph to evaluate */
+{
+ que_thr_t* thr;
+
+ graph->trx = trx;
+ graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
+
+ ut_a(thr = que_fork_start_command(graph));
+
+ que_run_threads(thr);
+
+ return(trx->error_state);
+}
+
+/******************************************************************//**
+Construct the column specification part of the SQL string for selecting the
+indexed FTS columns for the given table. Adds the necessary bound
+ids to the given 'info' and returns the SQL string. Examples:
+
+One indexed column named "text":
+
+ "$sel0",
+ info/ids: sel0 -> "text"
+
+Two indexed columns named "subject" and "content":
+
+ "$sel0, $sel1",
+ info/ids: sel0 -> "subject", sel1 -> "content",
+@return heap-allocated WHERE string */
+const char*
+fts_get_select_columns_str(
+/*=======================*/
+ dict_index_t* index, /*!< in: index */
+ pars_info_t* info, /*!< in/out: parser info */
+ mem_heap_t* heap) /*!< in: memory heap */
+{
+ ulint i;
+ const char* str = "";
+
+ for (i = 0; i < index->n_user_defined_cols; i++) {
+ char* sel_str;
+
+ dict_field_t* field = dict_index_get_nth_field(index, i);
+
+ sel_str = mem_heap_printf(heap, "sel%lu", (ulong) i);
+
+ /* Set copy_name to TRUE since it's dynamic. */
+ pars_info_bind_id(info, TRUE, sel_str, field->name);
+
+ str = mem_heap_printf(
+ heap, "%s%s$%s", str, (*str) ? ", " : "", sel_str);
+ }
+
+ return(str);
+}
diff --git a/storage/innobase/fts/fts0tlex.cc b/storage/innobase/fts/fts0tlex.cc
new file mode 100644
index 00000000..29f73f23
--- /dev/null
+++ b/storage/innobase/fts/fts0tlex.cc
@@ -0,0 +1,2169 @@
+#include "univ.i"
+#line 2 "fts0tlex.cc"
+
+#line 4 "fts0tlex.cc"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 6
+#define YY_FLEX_SUBMINOR_VERSION 4
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+#ifdef yy_create_buffer
+#define fts0t_create_buffer_ALREADY_DEFINED
+#else
+#define yy_create_buffer fts0t_create_buffer
+#endif
+
+#ifdef yy_delete_buffer
+#define fts0t_delete_buffer_ALREADY_DEFINED
+#else
+#define yy_delete_buffer fts0t_delete_buffer
+#endif
+
+#ifdef yy_scan_buffer
+#define fts0t_scan_buffer_ALREADY_DEFINED
+#else
+#define yy_scan_buffer fts0t_scan_buffer
+#endif
+
+#ifdef yy_scan_string
+#define fts0t_scan_string_ALREADY_DEFINED
+#else
+#define yy_scan_string fts0t_scan_string
+#endif
+
+#ifdef yy_scan_bytes
+#define fts0t_scan_bytes_ALREADY_DEFINED
+#else
+#define yy_scan_bytes fts0t_scan_bytes
+#endif
+
+#ifdef yy_init_buffer
+#define fts0t_init_buffer_ALREADY_DEFINED
+#else
+#define yy_init_buffer fts0t_init_buffer
+#endif
+
+#ifdef yy_flush_buffer
+#define fts0t_flush_buffer_ALREADY_DEFINED
+#else
+#define yy_flush_buffer fts0t_flush_buffer
+#endif
+
+#ifdef yy_load_buffer_state
+#define fts0t_load_buffer_state_ALREADY_DEFINED
+#else
+#define yy_load_buffer_state fts0t_load_buffer_state
+#endif
+
+#ifdef yy_switch_to_buffer
+#define fts0t_switch_to_buffer_ALREADY_DEFINED
+#else
+#define yy_switch_to_buffer fts0t_switch_to_buffer
+#endif
+
+#ifdef yypush_buffer_state
+#define fts0tpush_buffer_state_ALREADY_DEFINED
+#else
+#define yypush_buffer_state fts0tpush_buffer_state
+#endif
+
+#ifdef yypop_buffer_state
+#define fts0tpop_buffer_state_ALREADY_DEFINED
+#else
+#define yypop_buffer_state fts0tpop_buffer_state
+#endif
+
+#ifdef yyensure_buffer_stack
+#define fts0tensure_buffer_stack_ALREADY_DEFINED
+#else
+#define yyensure_buffer_stack fts0tensure_buffer_stack
+#endif
+
+#ifdef yylex
+#define fts0tlex_ALREADY_DEFINED
+#else
+#define yylex fts0tlex
+#endif
+
+#ifdef yyrestart
+#define fts0trestart_ALREADY_DEFINED
+#else
+#define yyrestart fts0trestart
+#endif
+
+#ifdef yylex_init
+#define fts0tlex_init_ALREADY_DEFINED
+#else
+#define yylex_init fts0tlex_init
+#endif
+
+#ifdef yylex_init_extra
+#define fts0tlex_init_extra_ALREADY_DEFINED
+#else
+#define yylex_init_extra fts0tlex_init_extra
+#endif
+
+#ifdef yylex_destroy
+#define fts0tlex_destroy_ALREADY_DEFINED
+#else
+#define yylex_destroy fts0tlex_destroy
+#endif
+
+#ifdef yyget_debug
+#define fts0tget_debug_ALREADY_DEFINED
+#else
+#define yyget_debug fts0tget_debug
+#endif
+
+#ifdef yyset_debug
+#define fts0tset_debug_ALREADY_DEFINED
+#else
+#define yyset_debug fts0tset_debug
+#endif
+
+#ifdef yyget_extra
+#define fts0tget_extra_ALREADY_DEFINED
+#else
+#define yyget_extra fts0tget_extra
+#endif
+
+#ifdef yyset_extra
+#define fts0tset_extra_ALREADY_DEFINED
+#else
+#define yyset_extra fts0tset_extra
+#endif
+
+#ifdef yyget_in
+#define fts0tget_in_ALREADY_DEFINED
+#else
+#define yyget_in fts0tget_in
+#endif
+
+#ifdef yyset_in
+#define fts0tset_in_ALREADY_DEFINED
+#else
+#define yyset_in fts0tset_in
+#endif
+
+#ifdef yyget_out
+#define fts0tget_out_ALREADY_DEFINED
+#else
+#define yyget_out fts0tget_out
+#endif
+
+#ifdef yyset_out
+#define fts0tset_out_ALREADY_DEFINED
+#else
+#define yyset_out fts0tset_out
+#endif
+
+#ifdef yyget_leng
+#define fts0tget_leng_ALREADY_DEFINED
+#else
+#define yyget_leng fts0tget_leng
+#endif
+
+#ifdef yyget_text
+#define fts0tget_text_ALREADY_DEFINED
+#else
+#define yyget_text fts0tget_text
+#endif
+
+#ifdef yyget_lineno
+#define fts0tget_lineno_ALREADY_DEFINED
+#else
+#define yyget_lineno fts0tget_lineno
+#endif
+
+#ifdef yyset_lineno
+#define fts0tset_lineno_ALREADY_DEFINED
+#else
+#define yyset_lineno fts0tset_lineno
+#endif
+
+#ifdef yyget_column
+#define fts0tget_column_ALREADY_DEFINED
+#else
+#define yyget_column fts0tget_column
+#endif
+
+#ifdef yyset_column
+#define fts0tset_column_ALREADY_DEFINED
+#else
+#define yyset_column fts0tset_column
+#endif
+
+#ifdef yywrap
+#define fts0twrap_ALREADY_DEFINED
+#else
+#define yywrap fts0twrap
+#endif
+
+#ifdef yyalloc
+#define fts0talloc_ALREADY_DEFINED
+#else
+#define yyalloc fts0talloc
+#endif
+
+#ifdef yyrealloc
+#define fts0trealloc_ALREADY_DEFINED
+#else
+#define yyrealloc fts0trealloc
+#endif
+
+#ifdef yyfree
+#define fts0tfree_ALREADY_DEFINED
+#else
+#define yyfree fts0tfree
+#endif
+
+/* First, we deal with platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#ifndef SIZE_MAX
+#define SIZE_MAX (~(size_t)0)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+/* begin standard C++ headers. */
+
+/* TODO: this is always defined, so inline it */
+#define yyconst const
+
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define yynoreturn __attribute__((__noreturn__))
+#else
+#define yynoreturn
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an
+ * integer in range [0..255] for use as an array index.
+ */
+#define YY_SC_TO_UI(c) ((YY_CHAR) (c))
+
+/* An opaque pointer. */
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void* yyscan_t;
+#endif
+
+/* For convenience, these vars (plus the bison vars far below)
+ are macros in the reentrant scanner. */
+#define yyin yyg->yyin_r
+#define yyout yyg->yyout_r
+#define yyextra yyg->yyextra_r
+#define yyleng yyg->yyleng_r
+#define yytext yyg->yytext_r
+#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
+#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
+#define yy_flex_debug yyg->yy_flex_debug_r
+
+/* Enter a start condition. This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN yyg->yy_start = 1 + 2 *
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state. The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START ((yyg->yy_start - 1) / 2)
+#define YYSTATE YY_START
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart( yyin , yyscanner )
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+/* The state buf must be large enough to hold one state per character in the main buffer.
+ */
+#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+ #define YY_LESS_LINENO(n)
+ #define YY_LINENO_REWIND_TO(ptr)
+
+/* Return all but the first "n" matched characters back to the input stream. */
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ *yy_cp = yyg->yy_hold_char; \
+ YY_RESTORE_YY_MORE_OFFSET \
+ yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
+ YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+ } \
+ while ( 0 )
+#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner )
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ int yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ int yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ int yy_bs_lineno; /**< The line count. */
+ int yy_bs_column; /**< The column count. */
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+ /* When an EOF's been seen but there's still some text to process
+ * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+ * shouldn't try reading from the input source any more. We might
+ * still have a bunch of tokens to match, though, because of
+ * possible backing-up.
+ *
+ * When we actually see the EOF, we change the status to "new"
+ * (via yyrestart()), so that the user can continue scanning by
+ * just pointing yyin at a new input file.
+ */
+#define YY_BUFFER_EOF_PENDING 2
+
+ };
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ *
+ * Returns the top of the stack, or NULL.
+ */
+#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \
+ ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \
+ : NULL)
+/* Same as previous macro, but useful when we know that the buffer stack is not
+ * NULL or when we need an lvalue. For internal use only.
+ */
+#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top]
+
+void yyrestart ( FILE *input_file , yyscan_t yyscanner );
+void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner );
+YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size , yyscan_t yyscanner );
+void yy_delete_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner );
+void yy_flush_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner );
+void yypush_buffer_state ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner );
+void yypop_buffer_state ( yyscan_t yyscanner );
+
+static void yyensure_buffer_stack ( yyscan_t yyscanner );
+static void yy_load_buffer_state ( yyscan_t yyscanner );
+static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file , yyscan_t yyscanner );
+#define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER , yyscanner)
+
+YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size , yyscan_t yyscanner );
+YY_BUFFER_STATE yy_scan_string ( const char *yy_str , yyscan_t yyscanner );
+YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len , yyscan_t yyscanner );
+
+void *yyalloc ( yy_size_t , yyscan_t yyscanner );
+void *yyrealloc ( void *, yy_size_t , yyscan_t yyscanner );
+void yyfree ( void * , yyscan_t yyscanner );
+
+#define yy_new_buffer yy_create_buffer
+#define yy_set_interactive(is_interactive) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){ \
+ yyensure_buffer_stack (yyscanner); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
+ }
+#define yy_set_bol(at_bol) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){\
+ yyensure_buffer_stack (yyscanner); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
+ }
+#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
+
+/* Begin user sect3 */
+
+#define fts0twrap(yyscanner) (/*CONSTCOND*/1)
+#define YY_SKIP_YYWRAP
+typedef flex_uint8_t YY_CHAR;
+
+typedef int yy_state_type;
+
+#define yytext_ptr yytext_r
+
+static yy_state_type yy_get_previous_state ( yyscan_t yyscanner );
+static yy_state_type yy_try_NUL_trans ( yy_state_type current_state , yyscan_t yyscanner);
+static int yy_get_next_buffer ( yyscan_t yyscanner );
+static void yynoreturn yy_fatal_error ( const char* msg , yyscan_t yyscanner );
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+ yyg->yytext_ptr = yy_bp; \
+ yyleng = (int) (yy_cp - yy_bp); \
+ yyg->yy_hold_char = *yy_cp; \
+ *yy_cp = '\0'; \
+ yyg->yy_c_buf_p = yy_cp;
+#define YY_NUM_RULES 7
+#define YY_END_OF_BUFFER 8
+/* This struct is not used in this scanner,
+ but its presence is necessary. */
+struct yy_trans_info
+ {
+ flex_int32_t yy_verify;
+ flex_int32_t yy_nxt;
+ };
+static const flex_int16_t yy_accept[17] =
+ { 0,
+ 4, 4, 8, 4, 1, 6, 1, 5, 5, 2,
+ 4, 1, 1, 0, 3, 0
+ } ;
+
+static const YY_CHAR yy_ec[256] =
+ { 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 4, 1, 5, 1, 1, 6, 1, 1, 1,
+ 1, 7, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+ } ;
+
+static const YY_CHAR yy_meta[8] =
+ { 0,
+ 1, 2, 3, 4, 5, 5, 1
+ } ;
+
+static const flex_int16_t yy_base[20] =
+ { 0,
+ 0, 0, 18, 0, 6, 21, 0, 9, 21, 0,
+ 0, 0, 0, 4, 21, 21, 10, 11, 15
+ } ;
+
+static const flex_int16_t yy_def[20] =
+ { 0,
+ 16, 1, 16, 17, 17, 16, 18, 19, 16, 17,
+ 17, 5, 18, 19, 16, 0, 16, 16, 16
+ } ;
+
+static const flex_int16_t yy_nxt[29] =
+ { 0,
+ 4, 5, 6, 7, 8, 9, 10, 12, 15, 13,
+ 11, 11, 13, 15, 13, 14, 14, 16, 14, 14,
+ 3, 16, 16, 16, 16, 16, 16, 16
+ } ;
+
+static const flex_int16_t yy_chk[29] =
+ { 0,
+ 1, 1, 1, 1, 1, 1, 1, 5, 14, 5,
+ 17, 17, 18, 8, 18, 19, 19, 3, 19, 19,
+ 16, 16, 16, 16, 16, 16, 16, 16
+ } ;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+#line 1 "fts0tlex.l"
+/*****************************************************************************
+
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+/**
+ * @file fts/fts0tlex.l
+ * FTS parser lexical analyzer
+ *
+ * Created 2007/5/9 Sunny Bains
+ */
+#line 27 "fts0tlex.l"
+
+#include "fts0ast.h"
+#include "fts0pars.h"
+
+/* Required for reentrant parser */
+#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A) ut_error
+
+#line 671 "fts0tlex.cc"
+#define YY_NO_INPUT 1
+#line 673 "fts0tlex.cc"
+
+#define INITIAL 0
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+/* Holds the entire state of the reentrant scanner. */
+struct yyguts_t
+ {
+
+ /* User-defined. Not touched by flex. */
+ YY_EXTRA_TYPE yyextra_r;
+
+ /* The rest are the same as the globals declared in the non-reentrant scanner. */
+ FILE *yyin_r, *yyout_r;
+ size_t yy_buffer_stack_top; /**< index of top of stack. */
+ size_t yy_buffer_stack_max; /**< capacity of stack. */
+ YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */
+ char yy_hold_char;
+ int yy_n_chars;
+ int yyleng_r;
+ char *yy_c_buf_p;
+ int yy_init;
+ int yy_start;
+ int yy_did_buffer_switch_on_eof;
+ int yy_start_stack_ptr;
+ int yy_start_stack_depth;
+ int *yy_start_stack;
+ yy_state_type yy_last_accepting_state;
+ char* yy_last_accepting_cpos;
+
+ int yylineno_r;
+ int yy_flex_debug_r;
+
+ char *yytext_r;
+ int yy_more_flag;
+ int yy_more_len;
+
+ }; /* end struct yyguts_t */
+
+static int yy_init_globals ( yyscan_t yyscanner );
+
+int yylex_init (yyscan_t* scanner);
+
+int yylex_init_extra ( YY_EXTRA_TYPE user_defined, yyscan_t* scanner);
+
+/* Accessor methods to globals.
+ These are made visible to non-reentrant scanners for convenience. */
+
+int yylex_destroy ( yyscan_t yyscanner );
+
+int yyget_debug ( yyscan_t yyscanner );
+
+void yyset_debug ( int debug_flag , yyscan_t yyscanner );
+
+YY_EXTRA_TYPE yyget_extra ( yyscan_t yyscanner );
+
+void yyset_extra ( YY_EXTRA_TYPE user_defined , yyscan_t yyscanner );
+
+FILE *yyget_in ( yyscan_t yyscanner );
+
+void yyset_in ( FILE * _in_str , yyscan_t yyscanner );
+
+FILE *yyget_out ( yyscan_t yyscanner );
+
+void yyset_out ( FILE * _out_str , yyscan_t yyscanner );
+
+ int yyget_leng ( yyscan_t yyscanner );
+
+char *yyget_text ( yyscan_t yyscanner );
+
+int yyget_lineno ( yyscan_t yyscanner );
+
+void yyset_lineno ( int _line_number , yyscan_t yyscanner );
+
+int yyget_column ( yyscan_t yyscanner );
+
+void yyset_column ( int _column_no , yyscan_t yyscanner );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap ( yyscan_t yyscanner );
+#else
+extern int yywrap ( yyscan_t yyscanner );
+#endif
+#endif
+
+#ifndef YY_NO_UNPUT
+
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy ( char *, const char *, int , yyscan_t yyscanner);
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen ( const char * , yyscan_t yyscanner);
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput ( yyscan_t yyscanner );
+#else
+static int input ( yyscan_t yyscanner );
+#endif
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0)
+#endif
+
+/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
+ { \
+ int c = '*'; \
+ int n; \
+ for ( n = 0; n < max_size && \
+ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
+ buf[n] = (char) c; \
+ if ( c == '\n' ) \
+ buf[n++] = (char) c; \
+ if ( c == EOF && ferror( yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ result = n; \
+ } \
+ else \
+ { \
+ errno=0; \
+ while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \
+ { \
+ if( errno != EINTR) \
+ { \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ break; \
+ } \
+ errno=0; \
+ clearerr(yyin); \
+ } \
+ }\
+\
+
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner)
+#endif
+
+/* end tables serialization structures and prototypes */
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int yylex (yyscan_t yyscanner);
+
+#define YY_DECL int yylex (yyscan_t yyscanner)
+#endif /* !YY_DECL */
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK /*LINTED*/break;
+#endif
+
+#define YY_RULE_SETUP \
+ YY_USER_ACTION
+
+/** The main scanner function which does all the work.
+ */
+YY_DECL
+{
+ yy_state_type yy_current_state;
+ char *yy_cp, *yy_bp;
+ int yy_act;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if ( !yyg->yy_init )
+ {
+ yyg->yy_init = 1;
+
+#ifdef YY_USER_INIT
+ YY_USER_INIT;
+#endif
+
+ if ( ! yyg->yy_start )
+ yyg->yy_start = 1; /* first start state */
+
+ if ( ! yyin )
+ yyin = stdin;
+
+ if ( ! yyout )
+ yyout = stdout;
+
+ if ( ! YY_CURRENT_BUFFER ) {
+ yyensure_buffer_stack (yyscanner);
+ YY_CURRENT_BUFFER_LVALUE =
+ yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner);
+ }
+
+ yy_load_buffer_state( yyscanner );
+ }
+
+ {
+#line 45 "fts0tlex.l"
+
+
+#line 934 "fts0tlex.cc"
+
+ while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
+ {
+ yy_cp = yyg->yy_c_buf_p;
+
+ /* Support of yytext. */
+ *yy_cp = yyg->yy_hold_char;
+
+ /* yy_bp points to the position in yy_ch_buf of the start of
+ * the current run.
+ */
+ yy_bp = yy_cp;
+
+ yy_current_state = yyg->yy_start;
+yy_match:
+ do
+ {
+ YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ;
+ if ( yy_accept[yy_current_state] )
+ {
+ yyg->yy_last_accepting_state = yy_current_state;
+ yyg->yy_last_accepting_cpos = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 17 )
+ yy_c = yy_meta[yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+ ++yy_cp;
+ }
+ while ( yy_current_state != 16 );
+ yy_cp = yyg->yy_last_accepting_cpos;
+ yy_current_state = yyg->yy_last_accepting_state;
+
+yy_find_action:
+ yy_act = yy_accept[yy_current_state];
+
+ YY_DO_BEFORE_ACTION;
+
+do_action: /* This label is used only to access EOF actions. */
+
+ switch ( yy_act )
+ { /* beginning of action switch */
+ case 0: /* must back up */
+ /* undo the effects of YY_DO_BEFORE_ACTION */
+ *yy_cp = yyg->yy_hold_char;
+ yy_cp = yyg->yy_last_accepting_cpos;
+ yy_current_state = yyg->yy_last_accepting_state;
+ goto yy_find_action;
+
+case 1:
+YY_RULE_SETUP
+#line 47 "fts0tlex.l"
+/* Ignore whitespace */ ;
+ YY_BREAK
+case 2:
+YY_RULE_SETUP
+#line 49 "fts0tlex.l"
+{
+ val->oper = fts0tget_text(yyscanner)[0];
+
+ return(val->oper);
+}
+ YY_BREAK
+case 3:
+YY_RULE_SETUP
+#line 55 "fts0tlex.l"
+{
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
+
+ return(FTS_TEXT);
+}
+ YY_BREAK
+case 4:
+YY_RULE_SETUP
+#line 61 "fts0tlex.l"
+{
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
+
+ return(FTS_TERM);
+}
+ YY_BREAK
+case 5:
+YY_RULE_SETUP
+#line 66 "fts0tlex.l"
+;
+ YY_BREAK
+case 6:
+/* rule 6 can match eol */
+YY_RULE_SETUP
+#line 67 "fts0tlex.l"
+
+ YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 69 "fts0tlex.l"
+ECHO;
+ YY_BREAK
+#line 1035 "fts0tlex.cc"
+case YY_STATE_EOF(INITIAL):
+ yyterminate();
+
+ case YY_END_OF_BUFFER:
+ {
+ /* Amount of text matched not including the EOB char. */
+ int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1;
+
+ /* Undo the effects of YY_DO_BEFORE_ACTION. */
+ *yy_cp = yyg->yy_hold_char;
+ YY_RESTORE_YY_MORE_OFFSET
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
+ {
+ /* We're scanning a new file or input source. It's
+ * possible that this happened because the user
+ * just pointed yyin at a new source and called
+ * yylex(). If so, then we have to assure
+ * consistency between YY_CURRENT_BUFFER and our
+ * globals. Here is the right place to do so, because
+ * this is the first action (other than possibly a
+ * back-up) that will match for the new input source.
+ */
+ yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+ }
+
+ /* Note that here we test for yy_c_buf_p "<=" to the position
+ * of the first EOB in the buffer, since yy_c_buf_p will
+ * already have been incremented past the NUL character
+ * (since all states make transitions on EOB to the
+ * end-of-buffer state). Contrast this with the test
+ * in input().
+ */
+ if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
+ { /* This was really a NUL. */
+ yy_state_type yy_next_state;
+
+ yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( yyscanner );
+
+ /* Okay, we're now positioned to make the NUL
+ * transition. We couldn't have
+ * yy_get_previous_state() go ahead and do it
+ * for us because it doesn't know how to deal
+ * with the possibility of jamming (and we don't
+ * want to build jamming into it because then it
+ * will run more slowly).
+ */
+
+ yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner);
+
+ yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
+
+ if ( yy_next_state )
+ {
+ /* Consume the NUL. */
+ yy_cp = ++yyg->yy_c_buf_p;
+ yy_current_state = yy_next_state;
+ goto yy_match;
+ }
+
+ else
+ {
+ yy_cp = yyg->yy_last_accepting_cpos;
+ yy_current_state = yyg->yy_last_accepting_state;
+ goto yy_find_action;
+ }
+ }
+
+ else switch ( yy_get_next_buffer( yyscanner ) )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ yyg->yy_did_buffer_switch_on_eof = 0;
+
+ if ( yywrap( yyscanner ) )
+ {
+ /* Note: because we've taken care in
+ * yy_get_next_buffer() to have set up
+ * yytext, we can now set up
+ * yy_c_buf_p so that if some total
+ * hoser (like flex itself) wants to
+ * call the scanner after we return the
+ * YY_NULL, it'll still work - another
+ * YY_NULL will get returned.
+ */
+ yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ;
+
+ yy_act = YY_STATE_EOF(YY_START);
+ goto do_action;
+ }
+
+ else
+ {
+ if ( ! yyg->yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+ }
+ break;
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yyg->yy_c_buf_p =
+ yyg->yytext_ptr + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( yyscanner );
+
+ yy_cp = yyg->yy_c_buf_p;
+ yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
+ goto yy_match;
+
+ case EOB_ACT_LAST_MATCH:
+ yyg->yy_c_buf_p =
+ &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars];
+
+ yy_current_state = yy_get_previous_state( yyscanner );
+
+ yy_cp = yyg->yy_c_buf_p;
+ yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
+ goto yy_find_action;
+ }
+ break;
+ }
+
+ default:
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--no action found" );
+ } /* end of action switch */
+ } /* end of scanning one token */
+ } /* end of user's declarations */
+} /* end of yylex */
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ * EOB_ACT_LAST_MATCH -
+ * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ * EOB_ACT_END_OF_FILE - end of file
+ */
+static int yy_get_next_buffer (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+ char *source = yyg->yytext_ptr;
+ int number_to_move, i;
+ int ret_val;
+
+ if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] )
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--end of buffer missed" );
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
+ { /* Don't try to fill the buffer, so this is an EOF. */
+ if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 )
+ {
+ /* We matched a single character, the EOB, so
+ * treat this as a final EOF.
+ */
+ return EOB_ACT_END_OF_FILE;
+ }
+
+ else
+ {
+ /* We matched some text prior to the EOB, first
+ * process it.
+ */
+ return EOB_ACT_LAST_MATCH;
+ }
+ }
+
+ /* Try to read more data. */
+
+ /* First move last chars to start of buffer. */
+ number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr - 1);
+
+ for ( i = 0; i < number_to_move; ++i )
+ *(dest++) = *(source++);
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+ /* don't do the read, it's not guaranteed to return an EOF,
+ * just force an EOF
+ */
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0;
+
+ else
+ {
+ int num_to_read =
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+ while ( num_to_read <= 0 )
+ { /* Not enough room in the buffer - grow it. */
+
+ /* just a shorter name for the current buffer */
+ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
+
+ int yy_c_buf_p_offset =
+ (int) (yyg->yy_c_buf_p - b->yy_ch_buf);
+
+ if ( b->yy_is_our_buffer )
+ {
+ int new_size = b->yy_buf_size * 2;
+
+ if ( new_size <= 0 )
+ b->yy_buf_size += b->yy_buf_size / 8;
+ else
+ b->yy_buf_size *= 2;
+
+ b->yy_ch_buf = (char *)
+ /* Include room in for 2 EOB chars. */
+ yyrealloc( (void *) b->yy_ch_buf,
+ (yy_size_t) (b->yy_buf_size + 2) , yyscanner );
+ }
+ else
+ /* Can't grow it, we don't own it. */
+ b->yy_ch_buf = NULL;
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR(
+ "fatal error - scanner input buffer overflow" );
+
+ yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+ num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
+ number_to_move - 1;
+
+ }
+
+ if ( num_to_read > YY_READ_BUF_SIZE )
+ num_to_read = YY_READ_BUF_SIZE;
+
+ /* Read in more data. */
+ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
+ yyg->yy_n_chars, num_to_read );
+
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
+ }
+
+ if ( yyg->yy_n_chars == 0 )
+ {
+ if ( number_to_move == YY_MORE_ADJ )
+ {
+ ret_val = EOB_ACT_END_OF_FILE;
+ yyrestart( yyin , yyscanner);
+ }
+
+ else
+ {
+ ret_val = EOB_ACT_LAST_MATCH;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
+ YY_BUFFER_EOF_PENDING;
+ }
+ }
+
+ else
+ ret_val = EOB_ACT_CONTINUE_SCAN;
+
+ if ((yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+ /* Extend the array by 50%, plus the number we really need. */
+ int new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc(
+ (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size , yyscanner );
+ if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
+ /* "- 2" to take care of EOB's */
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2);
+ }
+
+ yyg->yy_n_chars += number_to_move;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
+
+ yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+ return ret_val;
+}
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+ static yy_state_type yy_get_previous_state (yyscan_t yyscanner)
+{
+ yy_state_type yy_current_state;
+ char *yy_cp;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ yy_current_state = yyg->yy_start;
+
+ for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp )
+ {
+ YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
+ if ( yy_accept[yy_current_state] )
+ {
+ yyg->yy_last_accepting_state = yy_current_state;
+ yyg->yy_last_accepting_cpos = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 17 )
+ yy_c = yy_meta[yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+ }
+
+ return yy_current_state;
+}
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ * next_state = yy_try_NUL_trans( current_state );
+ */
+ static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner)
+{
+ int yy_is_jam;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
+ char *yy_cp = yyg->yy_c_buf_p;
+
+ YY_CHAR yy_c = 1;
+ if ( yy_accept[yy_current_state] )
+ {
+ yyg->yy_last_accepting_state = yy_current_state;
+ yyg->yy_last_accepting_cpos = yy_cp;
+ }
+ while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
+ {
+ yy_current_state = (int) yy_def[yy_current_state];
+ if ( yy_current_state >= 17 )
+ yy_c = yy_meta[yy_c];
+ }
+ yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c];
+ yy_is_jam = (yy_current_state == 16);
+
+ (void)yyg;
+ return yy_is_jam ? 0 : yy_current_state;
+}
+
+#ifndef YY_NO_UNPUT
+
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+ static int yyinput (yyscan_t yyscanner)
+#else
+ static int input (yyscan_t yyscanner)
+#endif
+
+{
+ int c;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ *yyg->yy_c_buf_p = yyg->yy_hold_char;
+
+ if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
+ {
+ /* yy_c_buf_p now points to the character we want to return.
+ * If this occurs *before* the EOB characters, then it's a
+ * valid NUL; if not, then we've hit the end of the buffer.
+ */
+ if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
+ /* This was really a NUL. */
+ *yyg->yy_c_buf_p = '\0';
+
+ else
+ { /* need more input */
+ int offset = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr);
+ ++yyg->yy_c_buf_p;
+
+ switch ( yy_get_next_buffer( yyscanner ) )
+ {
+ case EOB_ACT_LAST_MATCH:
+ /* This happens because yy_g_n_b()
+ * sees that we've accumulated a
+ * token and flags that we need to
+ * try matching the token before
+ * proceeding. But for input(),
+ * there's no matching to consider.
+ * So convert the EOB_ACT_LAST_MATCH
+ * to EOB_ACT_END_OF_FILE.
+ */
+
+ /* Reset buffer status. */
+ yyrestart( yyin , yyscanner);
+
+ /*FALLTHROUGH*/
+
+ case EOB_ACT_END_OF_FILE:
+ {
+ if ( yywrap( yyscanner ) )
+ return 0;
+
+ if ( ! yyg->yy_did_buffer_switch_on_eof )
+ YY_NEW_FILE;
+#ifdef __cplusplus
+ return yyinput(yyscanner);
+#else
+ return input(yyscanner);
+#endif
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ yyg->yy_c_buf_p = yyg->yytext_ptr + offset;
+ break;
+ }
+ }
+ }
+
+ c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */
+ *yyg->yy_c_buf_p = '\0'; /* preserve yytext */
+ yyg->yy_hold_char = *++yyg->yy_c_buf_p;
+
+ return c;
+}
+#endif /* ifndef YY_NO_INPUT */
+
+/** Immediately switch to a different input stream.
+ * @param input_file A readable stream.
+ * @param yyscanner The scanner object.
+ * @note This function does not reset the start condition to @c INITIAL .
+ */
+ void yyrestart (FILE * input_file , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if ( ! YY_CURRENT_BUFFER ){
+ yyensure_buffer_stack (yyscanner);
+ YY_CURRENT_BUFFER_LVALUE =
+ yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner);
+ }
+
+ yy_init_buffer( YY_CURRENT_BUFFER, input_file , yyscanner);
+ yy_load_buffer_state( yyscanner );
+}
+
+/** Switch to a different input buffer.
+ * @param new_buffer The new input buffer.
+ * @param yyscanner The scanner object.
+ */
+ void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* TODO. We should be able to replace this entire function body
+ * with
+ * yypop_buffer_state();
+ * yypush_buffer_state(new_buffer);
+ */
+ yyensure_buffer_stack (yyscanner);
+ if ( YY_CURRENT_BUFFER == new_buffer )
+ return;
+
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *yyg->yy_c_buf_p = yyg->yy_hold_char;
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
+ }
+
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+ yy_load_buffer_state( yyscanner );
+
+ /* We don't actually know whether we did this switch during
+ * EOF (yywrap()) processing, but the only time this flag
+ * is looked at is after yywrap() is called, so it's safe
+ * to go ahead and always set it.
+ */
+ yyg->yy_did_buffer_switch_on_eof = 1;
+}
+
+static void yy_load_buffer_state (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+ yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+ yyg->yy_hold_char = *yyg->yy_c_buf_p;
+}
+
+/** Allocate and initialize an input buffer state.
+ * @param file A readable stream.
+ * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
+ * @param yyscanner The scanner object.
+ * @return the allocated buffer state.
+ */
+ YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner)
+{
+ YY_BUFFER_STATE b;
+
+ b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_buf_size = size;
+
+ /* yy_ch_buf has to be 2 characters longer than the size given because
+ * we need to put in 2 end-of-buffer characters.
+ */
+ b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) , yyscanner );
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_is_our_buffer = 1;
+
+ yy_init_buffer( b, file , yyscanner);
+
+ return b;
+}
+
+/** Destroy the buffer.
+ * @param b a buffer created with yy_create_buffer()
+ * @param yyscanner The scanner object.
+ */
+ void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if ( ! b )
+ return;
+
+ if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
+ YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
+
+ if ( b->yy_is_our_buffer )
+ yyfree( (void *) b->yy_ch_buf , yyscanner );
+
+ yyfree( (void *) b , yyscanner );
+}
+
+/* Initializes or reinitializes a buffer.
+ * This function is sometimes called more than once on the same buffer,
+ * such as during a yyrestart() or at EOF.
+ */
+ static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner)
+
+{
+ int oerrno = errno;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ yy_flush_buffer( b , yyscanner);
+
+ b->yy_input_file = file;
+ b->yy_fill_buffer = 1;
+
+ /* If b is the current buffer, then yy_init_buffer was _probably_
+ * called from yyrestart() or through yy_get_next_buffer.
+ * In that case, we don't want to reset the lineno or column.
+ */
+ if (b != YY_CURRENT_BUFFER){
+ b->yy_bs_lineno = 1;
+ b->yy_bs_column = 0;
+ }
+
+ b->yy_is_interactive = 0;
+
+ errno = oerrno;
+}
+
+/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
+ * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
+ * @param yyscanner The scanner object.
+ */
+ void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ if ( ! b )
+ return;
+
+ b->yy_n_chars = 0;
+
+ /* We always need two end-of-buffer characters. The first causes
+ * a transition to the end-of-buffer state. The second causes
+ * a jam in that state.
+ */
+ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+ b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+ b->yy_buf_pos = &b->yy_ch_buf[0];
+
+ b->yy_at_bol = 1;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ if ( b == YY_CURRENT_BUFFER )
+ yy_load_buffer_state( yyscanner );
+}
+
+/** Pushes the new state onto the stack. The new state becomes
+ * the current state. This function will allocate the stack
+ * if necessary.
+ * @param new_buffer The new state.
+ * @param yyscanner The scanner object.
+ */
+void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ if (new_buffer == NULL)
+ return;
+
+ yyensure_buffer_stack(yyscanner);
+
+ /* This block is copied from yy_switch_to_buffer. */
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *yyg->yy_c_buf_p = yyg->yy_hold_char;
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
+ }
+
+ /* Only push if top exists. Otherwise, replace top. */
+ if (YY_CURRENT_BUFFER)
+ yyg->yy_buffer_stack_top++;
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+ /* copied from yy_switch_to_buffer. */
+ yy_load_buffer_state( yyscanner );
+ yyg->yy_did_buffer_switch_on_eof = 1;
+}
+
+/** Removes and deletes the top of the stack, if present.
+ * The next element becomes the new top.
+ * @param yyscanner The scanner object.
+ */
+void yypop_buffer_state (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ if (!YY_CURRENT_BUFFER)
+ return;
+
+ yy_delete_buffer(YY_CURRENT_BUFFER , yyscanner);
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ if (yyg->yy_buffer_stack_top > 0)
+ --yyg->yy_buffer_stack_top;
+
+ if (YY_CURRENT_BUFFER) {
+ yy_load_buffer_state( yyscanner );
+ yyg->yy_did_buffer_switch_on_eof = 1;
+ }
+}
+
+/* Allocates the stack if it does not exist.
+ * Guarantees space for at least one push.
+ */
+static void yyensure_buffer_stack (yyscan_t yyscanner)
+{
+ yy_size_t num_to_alloc;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if (!yyg->yy_buffer_stack) {
+
+ /* First allocation is just for 2 elements, since we don't know if this
+ * scanner will even need a stack. We use 2 instead of 1 to avoid an
+ * immediate realloc on the next call.
+ */
+ num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc
+ (num_to_alloc * sizeof(struct yy_buffer_state*)
+ , yyscanner);
+ if ( ! yyg->yy_buffer_stack )
+ YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+
+ memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*));
+
+ yyg->yy_buffer_stack_max = num_to_alloc;
+ yyg->yy_buffer_stack_top = 0;
+ return;
+ }
+
+ if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){
+
+ /* Increase the buffer to prepare for a possible push. */
+ yy_size_t grow_size = 8 /* arbitrary grow size */;
+
+ num_to_alloc = yyg->yy_buffer_stack_max + grow_size;
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc
+ (yyg->yy_buffer_stack,
+ num_to_alloc * sizeof(struct yy_buffer_state*)
+ , yyscanner);
+ if ( ! yyg->yy_buffer_stack )
+ YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+
+ /* zero only the new slots.*/
+ memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*));
+ yyg->yy_buffer_stack_max = num_to_alloc;
+ }
+}
+
+/** Setup the input buffer state to scan directly from a user-specified character buffer.
+ * @param base the character buffer
+ * @param size the size in bytes of the character buffer
+ * @param yyscanner The scanner object.
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner)
+{
+ YY_BUFFER_STATE b;
+
+ if ( size < 2 ||
+ base[size-2] != YY_END_OF_BUFFER_CHAR ||
+ base[size-1] != YY_END_OF_BUFFER_CHAR )
+ /* They forgot to leave room for the EOB's. */
+ return NULL;
+
+ b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
+
+ b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */
+ b->yy_buf_pos = b->yy_ch_buf = base;
+ b->yy_is_our_buffer = 0;
+ b->yy_input_file = NULL;
+ b->yy_n_chars = b->yy_buf_size;
+ b->yy_is_interactive = 0;
+ b->yy_at_bol = 1;
+ b->yy_fill_buffer = 0;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ yy_switch_to_buffer( b , yyscanner );
+
+ return b;
+}
+
+/** Setup the input buffer state to scan a string. The next call to yylex() will
+ * scan from a @e copy of @a str.
+ * @param yystr a NUL-terminated string to scan
+ * @param yyscanner The scanner object.
+ * @return the newly allocated buffer state object.
+ * @note If you want to scan bytes that may contain NUL values, then use
+ * yy_scan_bytes() instead.
+ */
+YY_BUFFER_STATE yy_scan_string (const char * yystr , yyscan_t yyscanner)
+{
+
+ return yy_scan_bytes( yystr, (int) strlen(yystr) , yyscanner);
+}
+
+/** Setup the input buffer state to scan the given bytes. The next call to yylex() will
+ * scan from a @e copy of @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yyscanner The scanner object.
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len , yyscan_t yyscanner)
+{
+ YY_BUFFER_STATE b;
+ char *buf;
+ yy_size_t n;
+ int i;
+
+ /* Get memory for full buffer, including space for trailing EOB's. */
+ n = (yy_size_t) (_yybytes_len + 2);
+ buf = (char *) yyalloc( n , yyscanner );
+ if ( ! buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
+
+ for ( i = 0; i < _yybytes_len; ++i )
+ buf[i] = yybytes[i];
+
+ buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
+
+ b = yy_scan_buffer( buf, n , yyscanner);
+ if ( ! b )
+ YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
+
+ /* It's okay to grow etc. this buffer, and we should throw it
+ * away when we're done.
+ */
+ b->yy_is_our_buffer = 1;
+
+ return b;
+}
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+static void yynoreturn yy_fatal_error (const char* msg , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+ fprintf( stderr, "%s\n", msg );
+ exit( YY_EXIT_FAILURE );
+}
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ yytext[yyleng] = yyg->yy_hold_char; \
+ yyg->yy_c_buf_p = yytext + yyless_macro_arg; \
+ yyg->yy_hold_char = *yyg->yy_c_buf_p; \
+ *yyg->yy_c_buf_p = '\0'; \
+ yyleng = yyless_macro_arg; \
+ } \
+ while ( 0 )
+
+/* Accessor methods (get/set functions) to struct members. */
+
+/** Get the user-defined data for this scanner.
+ * @param yyscanner The scanner object.
+ */
+YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyextra;
+}
+
+/** Get the current line number.
+ * @param yyscanner The scanner object.
+ */
+int yyget_lineno (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if (! YY_CURRENT_BUFFER)
+ return 0;
+
+ return yylineno;
+}
+
+/** Get the current column number.
+ * @param yyscanner The scanner object.
+ */
+int yyget_column (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ if (! YY_CURRENT_BUFFER)
+ return 0;
+
+ return yycolumn;
+}
+
+/** Get the input stream.
+ * @param yyscanner The scanner object.
+ */
+FILE *yyget_in (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyin;
+}
+
+/** Get the output stream.
+ * @param yyscanner The scanner object.
+ */
+FILE *yyget_out (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyout;
+}
+
+/** Get the length of the current token.
+ * @param yyscanner The scanner object.
+ */
+int yyget_leng (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yyleng;
+}
+
+/** Get the current token.
+ * @param yyscanner The scanner object.
+ */
+
+char *yyget_text (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yytext;
+}
+
+/** Set the user-defined data. This data is never touched by the scanner.
+ * @param user_defined The data to be associated with this scanner.
+ * @param yyscanner The scanner object.
+ */
+void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyextra = user_defined ;
+}
+
+/** Set the current line number.
+ * @param _line_number line number
+ * @param yyscanner The scanner object.
+ */
+void yyset_lineno (int _line_number , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* lineno is only valid if an input buffer exists. */
+ if (! YY_CURRENT_BUFFER )
+ YY_FATAL_ERROR( "yyset_lineno called with no buffer" );
+
+ yylineno = _line_number;
+}
+
+/** Set the current column.
+ * @param _column_no column number
+ * @param yyscanner The scanner object.
+ */
+void yyset_column (int _column_no , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* column is only valid if an input buffer exists. */
+ if (! YY_CURRENT_BUFFER )
+ YY_FATAL_ERROR( "yyset_column called with no buffer" );
+
+ yycolumn = _column_no;
+}
+
+/** Set the input stream. This does not discard the current
+ * input buffer.
+ * @param _in_str A readable stream.
+ * @param yyscanner The scanner object.
+ * @see yy_switch_to_buffer
+ */
+void yyset_in (FILE * _in_str , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyin = _in_str ;
+}
+
+void yyset_out (FILE * _out_str , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyout = _out_str ;
+}
+
+int yyget_debug (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ return yy_flex_debug;
+}
+
+void yyset_debug (int _bdebug , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yy_flex_debug = _bdebug ;
+}
+
+/* Accessor methods for yylval and yylloc */
+
+/* User-visible API */
+
+/* yylex_init is special because it creates the scanner itself, so it is
+ * the ONLY reentrant function that doesn't take the scanner as the last argument.
+ * That's why we explicitly handle the declaration, instead of using our macros.
+ */
+int yylex_init(yyscan_t* ptr_yy_globals)
+{
+ if (ptr_yy_globals == NULL){
+ errno = EINVAL;
+ return 1;
+ }
+
+ *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL );
+
+ if (*ptr_yy_globals == NULL){
+ errno = ENOMEM;
+ return 1;
+ }
+
+ /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */
+ memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
+
+ return yy_init_globals ( *ptr_yy_globals );
+}
+
+/* yylex_init_extra has the same functionality as yylex_init, but follows the
+ * convention of taking the scanner as the last argument. Note however, that
+ * this is a *pointer* to a scanner, as it will be allocated by this call (and
+ * is the reason, too, why this function also must handle its own declaration).
+ * The user defined value in the first argument will be available to yyalloc in
+ * the yyextra field.
+ */
+int yylex_init_extra( YY_EXTRA_TYPE yy_user_defined, yyscan_t* ptr_yy_globals )
+{
+ struct yyguts_t dummy_yyguts;
+
+ yyset_extra (yy_user_defined, &dummy_yyguts);
+
+ if (ptr_yy_globals == NULL){
+ errno = EINVAL;
+ return 1;
+ }
+
+ *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
+
+ if (*ptr_yy_globals == NULL){
+ errno = ENOMEM;
+ return 1;
+ }
+
+ /* By setting to 0xAA, we expose bugs in
+ yy_init_globals. Leave at 0x00 for releases. */
+ memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
+
+ yyset_extra (yy_user_defined, *ptr_yy_globals);
+
+ return yy_init_globals ( *ptr_yy_globals );
+}
+
+static int yy_init_globals (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ /* Initialization is the same as for the non-reentrant scanner.
+ * This function is called from yylex_destroy(), so don't allocate here.
+ */
+
+ yyg->yy_buffer_stack = NULL;
+ yyg->yy_buffer_stack_top = 0;
+ yyg->yy_buffer_stack_max = 0;
+ yyg->yy_c_buf_p = NULL;
+ yyg->yy_init = 0;
+ yyg->yy_start = 0;
+
+ yyg->yy_start_stack_ptr = 0;
+ yyg->yy_start_stack_depth = 0;
+ yyg->yy_start_stack = NULL;
+
+/* Defined in main.c */
+#ifdef YY_STDINIT
+ yyin = stdin;
+ yyout = stdout;
+#else
+ yyin = NULL;
+ yyout = NULL;
+#endif
+
+ /* For future reference: Set errno on error, since we are called by
+ * yylex_init()
+ */
+ return 0;
+}
+
+/* yylex_destroy is for both reentrant and non-reentrant scanners. */
+int yylex_destroy (yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
+ /* Pop the buffer stack, destroying each element. */
+ while(YY_CURRENT_BUFFER){
+ yy_delete_buffer( YY_CURRENT_BUFFER , yyscanner );
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ yypop_buffer_state(yyscanner);
+ }
+
+ /* Destroy the stack itself. */
+ yyfree(yyg->yy_buffer_stack , yyscanner);
+ yyg->yy_buffer_stack = NULL;
+
+ /* Destroy the start condition stack. */
+ yyfree( yyg->yy_start_stack , yyscanner );
+ yyg->yy_start_stack = NULL;
+
+ /* Reset the globals. This is important in a non-reentrant scanner so the next time
+ * yylex() is called, initialization will occur. */
+ yy_init_globals( yyscanner);
+
+ /* Destroy the main struct (reentrant only). */
+ yyfree ( yyscanner , yyscanner );
+ yyscanner = NULL;
+ return 0;
+}
+
+/*
+ * Internal utility routines.
+ */
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char* s1, const char * s2, int n , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+
+ int i;
+ for ( i = 0; i < n; ++i )
+ s1[i] = s2[i];
+}
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (const char * s , yyscan_t yyscanner)
+{
+ int n;
+ for ( n = 0; s[n]; ++n )
+ ;
+
+ return n;
+}
+#endif
+
+void *yyalloc (yy_size_t size , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+ return malloc(size);
+}
+
+void *yyrealloc (void * ptr, yy_size_t size , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+
+ /* The cast to (char *) in the following accommodates both
+ * implementations that use char* generic pointers, and those
+ * that use void* generic pointers. It works with the latter
+ * because both ANSI C and C++ allow castless assignment from
+ * any pointer type to void*, and deal with argument conversions
+ * as though doing an assignment.
+ */
+ return realloc(ptr, size);
+}
+
+void yyfree (void * ptr , yyscan_t yyscanner)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ (void)yyg;
+ free( (char *) ptr ); /* see yyrealloc() for (char *) cast */
+}
+
+#define YYTABLES_NAME "yytables"
+
+#line 69 "fts0tlex.l"
+
+
diff --git a/storage/innobase/fts/fts0tlex.l b/storage/innobase/fts/fts0tlex.l
new file mode 100644
index 00000000..e19e907f
--- /dev/null
+++ b/storage/innobase/fts/fts0tlex.l
@@ -0,0 +1,69 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**
+ * @file fts/fts0tlex.l
+ * FTS parser lexical analyzer
+ *
+ * Created 2007/5/9 Sunny Bains
+ */
+
+%{
+
+#include "fts0ast.h"
+#include "fts0pars.h"
+
+/* Required for reentrant parser */
+#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner)
+#define exit(A) ut_error
+
+%}
+
+%option noinput
+%option nounput
+%option noyywrap
+%option nostdinit
+%option reentrant
+%option never-interactive
+
+
+%%
+
+[\t ]+ /* Ignore whitespace */ ;
+
+[*] {
+ val->oper = fts0tget_text(yyscanner)[0];
+
+ return(val->oper);
+}
+
+\"[^\"\n]*\" {
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
+
+ return(FTS_TEXT);
+}
+
+[^" \n\%]* {
+ val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
+
+ return(FTS_TERM);
+}
+. ;
+\n
+
+%%
diff --git a/storage/innobase/fts/make_parser.sh b/storage/innobase/fts/make_parser.sh
new file mode 100755
index 00000000..6b82c5ba
--- /dev/null
+++ b/storage/innobase/fts/make_parser.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+#
+# Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+
+TMPF=t.$$
+
+make -f Makefile.query
+
+echo '#include "univ.i"' > $TMPF
+
+# This is to avoid compiler warning about unused parameters.
+# FIXME: gcc extension "MY_ATTRIBUTE" causing compilation errors on windows
+# platform. Quote them out for now.
+sed -e '
+s/^\(static.*void.*yy_fatal_error.*msg.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
+s/^\(static.*void.*yy_flex_strncpy.*n.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
+s/^\(static.*int.*yy_flex_strlen.*s.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
+s/^\(\(static\|void\).*fts0[bt]alloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
+s/^\(\(static\|void\).*fts0[bt]realloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
+s/^\(\(static\|void\).*fts0[bt]free.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
+' < fts0blex.cc >> $TMPF
+
+mv $TMPF fts0blex.cc
+
+echo '#include "univ.i"' > $TMPF
+
+sed -e '
+s/^\(static.*void.*yy_fatal_error.*msg.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
+s/^\(static.*void.*yy_flex_strncpy.*n.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
+s/^\(static.*int.*yy_flex_strlen.*s.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
+s/^\(\(static\|void\).*fts0[bt]alloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
+s/^\(\(static\|void\).*fts0[bt]realloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
+s/^\(\(static\|void\).*fts0[bt]free.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
+' < fts0tlex.cc >> $TMPF
+
+mv $TMPF fts0tlex.cc