diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 18:00:34 +0000 |
commit | 3f619478f796eddbba6e39502fe941b285dd97b1 (patch) | |
tree | e2c7b5777f728320e5b5542b6213fd3591ba51e2 /storage/innobase/fts | |
parent | Initial commit. (diff) | |
download | mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.tar.xz mariadb-3f619478f796eddbba6e39502fe941b285dd97b1.zip |
Adding upstream version 1:10.11.6.upstream/1%10.11.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/innobase/fts')
-rw-r--r-- | storage/innobase/fts/Makefile.query | 18 | ||||
-rw-r--r-- | storage/innobase/fts/fts0ast.cc | 816 | ||||
-rw-r--r-- | storage/innobase/fts/fts0blex.cc | 2177 | ||||
-rw-r--r-- | storage/innobase/fts/fts0blex.l | 74 | ||||
-rw-r--r-- | storage/innobase/fts/fts0config.cc | 428 | ||||
-rw-r--r-- | storage/innobase/fts/fts0fts.cc | 6182 | ||||
-rw-r--r-- | storage/innobase/fts/fts0opt.cc | 3054 | ||||
-rw-r--r-- | storage/innobase/fts/fts0pars.cc | 2007 | ||||
-rw-r--r-- | storage/innobase/fts/fts0pars.y | 293 | ||||
-rw-r--r-- | storage/innobase/fts/fts0plugin.cc | 283 | ||||
-rw-r--r-- | storage/innobase/fts/fts0que.cc | 4612 | ||||
-rw-r--r-- | storage/innobase/fts/fts0sql.cc | 208 | ||||
-rw-r--r-- | storage/innobase/fts/fts0tlex.cc | 2169 | ||||
-rw-r--r-- | storage/innobase/fts/fts0tlex.l | 69 | ||||
-rwxr-xr-x | storage/innobase/fts/make_parser.sh | 49 |
15 files changed, 22439 insertions, 0 deletions
diff --git a/storage/innobase/fts/Makefile.query b/storage/innobase/fts/Makefile.query new file mode 100644 index 00000000..d91b1b92 --- /dev/null +++ b/storage/innobase/fts/Makefile.query @@ -0,0 +1,18 @@ +LEX=flex +YACC=bison +PREFIX=fts + +all: fts0pars.cc fts0blex.cc fts0tlex.cc + +fts0par.cc: fts0pars.y +fts0blex.cc: fts0blex.l +fts0tlex.cc: fts0tlex.l + +.l.cc: + echo '#include "univ.i"' > $*.cc + $(LEX) --stdout -P$(subst lex,,$*) -o $*.cc \ + --header-file=../include/$*.h $< >> $*.cc + +.y.cc: + $(YACC) -p $(PREFIX) -o $*.cc -d $< + mv $*.h ../include diff --git a/storage/innobase/fts/fts0ast.cc b/storage/innobase/fts/fts0ast.cc new file mode 100644 index 00000000..74d02d63 --- /dev/null +++ b/storage/innobase/fts/fts0ast.cc @@ -0,0 +1,816 @@ +/***************************************************************************** + +Copyright (c) 2007, 2020, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, 2022, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file fts/fts0ast.cc +Full Text Search parser helper file. + +Created 2007/3/16 Sunny Bains. +***********************************************************************/ + +#include "row0sel.h" +#include "fts0ast.h" +#include "fts0pars.h" +#include "fts0fts.h" +#include "trx0trx.h" + +/* The FTS ast visit pass. */ +enum fts_ast_visit_pass_t { + FTS_PASS_FIRST, /*!< First visit pass, + process operators excluding + FTS_EXIST and FTS_IGNORE */ + FTS_PASS_EXIST, /*!< Exist visit pass, + process operator FTS_EXIST */ + FTS_PASS_IGNORE /*!< Ignore visit pass, + process operator FTS_IGNORE */ +}; + +/******************************************************************//** +Create an empty fts_ast_node_t. +@return Create a new node */ +static +fts_ast_node_t* +fts_ast_node_create(void) +/*=====================*/ +{ + fts_ast_node_t* node; + + node = (fts_ast_node_t*) ut_zalloc_nokey(sizeof(*node)); + + return(node); +} + +/** Track node allocations, in case there is an error during parsing. */ +static +void +fts_ast_state_add_node( + fts_ast_state_t*state, /*!< in: ast instance */ + fts_ast_node_t* node) /*!< in: node to add to ast */ +{ + if (!state->list.head) { + ut_a(!state->list.tail); + + state->list.head = state->list.tail = node; + } else { + state->list.tail->next_alloc = node; + state->list.tail = node; + } +} + +/******************************************************************//** +Create a operator fts_ast_node_t. +@return new node */ +fts_ast_node_t* +fts_ast_create_node_oper( +/*=====================*/ + void* arg, /*!< in: ast state instance */ + fts_ast_oper_t oper) /*!< in: ast operator */ +{ + fts_ast_node_t* node = fts_ast_node_create(); + + node->type = FTS_AST_OPER; + node->oper = oper; + + fts_ast_state_add_node((fts_ast_state_t*) arg, node); + + return(node); +} + +/******************************************************************//** +This function takes ownership of the ptr and is responsible +for free'ing it +@return new node or a node list with tokenized words */ +fts_ast_node_t* +fts_ast_create_node_term( +/*=====================*/ + void* arg, /*!< in: ast state instance */ + const fts_ast_string_t* ptr) /*!< in: ast term string */ +{ + fts_ast_state_t* state = static_cast<fts_ast_state_t*>(arg); + ulint len = ptr->len; + ulint cur_pos = 0; + fts_ast_node_t* node = NULL; + fts_ast_node_t* node_list = NULL; + fts_ast_node_t* first_node = NULL; + + /* Scan the incoming string and filter out any "non-word" characters */ + while (cur_pos < len) { + fts_string_t str; + ulint cur_len; + + cur_len = innobase_mysql_fts_get_token( + state->charset, + reinterpret_cast<const byte*>(ptr->str) + cur_pos, + reinterpret_cast<const byte*>(ptr->str) + len, &str); + + if (cur_len == 0) { + break; + } + + cur_pos += cur_len; + + if (str.f_n_char > 0) { + /* If the subsequent term (after the first one)'s size + is less than fts_min_token_size or the term is greater + than fts_max_token_size, we shall ignore that. This is + to make consistent with MyISAM behavior */ + if ((first_node && (str.f_n_char < fts_min_token_size)) + || str.f_n_char > fts_max_token_size) { + continue; + } + + node = fts_ast_node_create(); + + node->type = FTS_AST_TERM; + + node->term.ptr = fts_ast_string_create( + str.f_str, str.f_len); + + fts_ast_state_add_node( + static_cast<fts_ast_state_t*>(arg), node); + + if (first_node) { + /* There is more than one word, create + a list to organize them */ + if (!node_list) { + node_list = fts_ast_create_node_list( + static_cast<fts_ast_state_t*>( + arg), + first_node); + } + + fts_ast_add_node(node_list, node); + } else { + first_node = node; + } + } + } + + return((node_list != NULL) ? node_list : first_node); +} + +/******************************************************************//** +Create an AST term node, makes a copy of ptr for plugin parser +@return node */ +fts_ast_node_t* +fts_ast_create_node_term_for_parser( +/*================================*/ + void* arg, /*!< in: ast state */ + const char* ptr, /*!< in: term string */ + const ulint len) /*!< in: term string length */ +{ + fts_ast_node_t* node = NULL; + + /* '%' as first char is forbidden for LIKE in internal SQL parser; + '%' as last char is reserved for wildcard search;*/ + if (len == 0 || len > FTS_MAX_WORD_LEN + || ptr[0] == '%' || ptr[len - 1] == '%') { + return(NULL); + } + + node = fts_ast_node_create(); + + node->type = FTS_AST_TERM; + + node->term.ptr = fts_ast_string_create( + reinterpret_cast<const byte*>(ptr), len); + + fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node); + + return(node); +} + +/******************************************************************//** +This function takes ownership of the ptr and is responsible +for free'ing it. +@return new node */ +fts_ast_node_t* +fts_ast_create_node_text( +/*=====================*/ + void* arg, /*!< in: ast state instance */ + const fts_ast_string_t* ptr) /*!< in: ast text string */ +{ + ulint len = ptr->len; + fts_ast_node_t* node = NULL; + + /* Once we come here, the string must have at least 2 quotes "" + around the query string, which could be empty. Also the query + string may contain 0x00 in it, we don't treat it as null-terminated. */ + ut_ad(len >= 2); + ut_ad(ptr->str[0] == '\"' && ptr->str[len - 1] == '\"'); + + if (len == 2) { + /* If the query string contains nothing except quotes, + it's obviously an invalid query. */ + return(NULL); + } + + node = fts_ast_node_create(); + + /*!< We ignore the actual quotes "" */ + len -= 2; + + node->type = FTS_AST_TEXT; + /*!< Skip copying the first quote */ + node->text.ptr = fts_ast_string_create( + reinterpret_cast<const byte*>(ptr->str + 1), len); + node->text.distance = ULINT_UNDEFINED; + + fts_ast_state_add_node((fts_ast_state_t*) arg, node); + + return(node); +} + +/******************************************************************//** +Create an AST phrase list node for plugin parser +@return node */ +fts_ast_node_t* +fts_ast_create_node_phrase_list( +/*============================*/ + void* arg) /*!< in: ast state */ +{ + fts_ast_node_t* node = fts_ast_node_create(); + + node->type = FTS_AST_PARSER_PHRASE_LIST; + + node->text.distance = ULINT_UNDEFINED; + node->list.head = node->list.tail = NULL; + + fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node); + + return(node); +} + +/******************************************************************//** +This function takes ownership of the expr and is responsible +for free'ing it. +@return new node */ +fts_ast_node_t* +fts_ast_create_node_list( +/*=====================*/ + void* arg, /*!< in: ast state instance */ + fts_ast_node_t* expr) /*!< in: ast expr instance */ +{ + fts_ast_node_t* node = fts_ast_node_create(); + + node->type = FTS_AST_LIST; + node->list.head = node->list.tail = expr; + + fts_ast_state_add_node((fts_ast_state_t*) arg, node); + + return(node); +} + +/******************************************************************//** +Create a sub-expression list node. This function takes ownership of +expr and is responsible for deleting it. +@return new node */ +fts_ast_node_t* +fts_ast_create_node_subexp_list( +/*============================*/ + void* arg, /*!< in: ast state instance */ + fts_ast_node_t* expr) /*!< in: ast expr instance */ +{ + fts_ast_node_t* node = fts_ast_node_create(); + + node->type = FTS_AST_SUBEXP_LIST; + node->list.head = node->list.tail = expr; + + fts_ast_state_add_node((fts_ast_state_t*) arg, node); + + return(node); +} + +/******************************************************************//** +Free an expr list node elements. */ +static +void +fts_ast_free_list( +/*==============*/ + fts_ast_node_t* node) /*!< in: ast node to free */ +{ + ut_a(node->type == FTS_AST_LIST + || node->type == FTS_AST_SUBEXP_LIST + || node->type == FTS_AST_PARSER_PHRASE_LIST); + + for (node = node->list.head; + node != NULL; + node = fts_ast_free_node(node)) { + + /*!< No op */ + } +} + +/********************************************************************//** +Free a fts_ast_node_t instance. +@return next node to free */ +fts_ast_node_t* +fts_ast_free_node( +/*==============*/ + fts_ast_node_t* node) /*!< in: the node to free */ +{ + fts_ast_node_t* next_node; + + switch (node->type) { + case FTS_AST_TEXT: + if (node->text.ptr) { + fts_ast_string_free(node->text.ptr); + node->text.ptr = NULL; + } + break; + + case FTS_AST_TERM: + if (node->term.ptr) { + fts_ast_string_free(node->term.ptr); + node->term.ptr = NULL; + } + break; + + case FTS_AST_LIST: + case FTS_AST_SUBEXP_LIST: + case FTS_AST_PARSER_PHRASE_LIST: + fts_ast_free_list(node); + node->list.head = node->list.tail = NULL; + break; + + case FTS_AST_OPER: + break; + + default: + ut_error; + } + + /*!< Get next node before freeing the node itself */ + next_node = node->next; + + ut_free(node); + + return(next_node); +} + +/******************************************************************//** +This AST takes ownership of the expr and is responsible +for free'ing it. +@return in param "list" */ +fts_ast_node_t* +fts_ast_add_node( +/*=============*/ + fts_ast_node_t* node, /*!< in: list instance */ + fts_ast_node_t* elem) /*!< in: node to add to list */ +{ + if (!elem) { + return(NULL); + } + + ut_a(!elem->next); + ut_a(node->type == FTS_AST_LIST + || node->type == FTS_AST_SUBEXP_LIST + || node->type == FTS_AST_PARSER_PHRASE_LIST); + + if (!node->list.head) { + ut_a(!node->list.tail); + + node->list.head = node->list.tail = elem; + } else { + ut_a(node->list.tail); + + node->list.tail->next = elem; + node->list.tail = elem; + } + + return(node); +} + +/******************************************************************//** +Set the wildcard attribute of a term. */ +void +fts_ast_term_set_wildcard( +/*======================*/ + fts_ast_node_t* node) /*!< in/out: set attribute of + a term node */ +{ + if (!node) { + return; + } + + /* If it's a node list, the wildcard should be set to the tail node*/ + if (node->type == FTS_AST_LIST) { + ut_ad(node->list.tail != NULL); + node = node->list.tail; + } + + ut_a(node->type == FTS_AST_TERM); + ut_a(!node->term.wildcard); + + node->term.wildcard = TRUE; +} + +/******************************************************************//** +Set the proximity attribute of a text node. */ +void +fts_ast_text_set_distance( +/*======================*/ + fts_ast_node_t* node, /*!< in/out: text node */ + ulint distance) /*!< in: the text proximity + distance */ +{ + if (node == NULL) { + return; + } + + ut_a(node->type == FTS_AST_TEXT); + ut_a(node->text.distance == ULINT_UNDEFINED); + + node->text.distance = distance; +} + +/******************************************************************//** +Free node and expr allocations. */ +void +fts_ast_state_free( +/*===============*/ + fts_ast_state_t*state) /*!< in: ast state to free */ +{ + fts_ast_node_t* node = state->list.head; + + /* Free the nodes that were allocated during parsing. */ + while (node) { + fts_ast_node_t* next = node->next_alloc; + + if (node->type == FTS_AST_TEXT && node->text.ptr) { + fts_ast_string_free(node->text.ptr); + node->text.ptr = NULL; + } else if (node->type == FTS_AST_TERM && node->term.ptr) { + fts_ast_string_free(node->term.ptr); + node->term.ptr = NULL; + } + + ut_free(node); + node = next; + } + + state->root = state->list.head = state->list.tail = NULL; +} + +/** Print the ast string +@param[in] str string to print */ +static +void +fts_ast_string_print( + const fts_ast_string_t* ast_str) +{ + for (ulint i = 0; i < ast_str->len; ++i) { + printf("%c", ast_str->str[i]); + } + + printf("\n"); +} + +/******************************************************************//** +Print an ast node recursively. */ +static +void +fts_ast_node_print_recursive( +/*=========================*/ + fts_ast_node_t* node, /*!< in: ast node to print */ + ulint level) /*!< in: recursive level */ +{ + /* Print alignment blank */ + for (ulint i = 0; i < level; i++) { + printf(" "); + } + + switch (node->type) { + case FTS_AST_TEXT: + printf("TEXT: "); + fts_ast_string_print(node->text.ptr); + break; + + case FTS_AST_TERM: + printf("TERM: "); + fts_ast_string_print(node->term.ptr); + break; + + case FTS_AST_LIST: + printf("LIST: \n"); + + for (node = node->list.head; node; node = node->next) { + fts_ast_node_print_recursive(node, level + 1); + } + break; + + case FTS_AST_SUBEXP_LIST: + printf("SUBEXP_LIST: \n"); + + for (node = node->list.head; node; node = node->next) { + fts_ast_node_print_recursive(node, level + 1); + } + break; + + case FTS_AST_OPER: + printf("OPER: %d\n", node->oper); + break; + + case FTS_AST_PARSER_PHRASE_LIST: + printf("PARSER_PHRASE_LIST: \n"); + + for (node = node->list.head; node; node = node->next) { + fts_ast_node_print_recursive(node, level + 1); + } + break; + + default: + ut_error; + } +} + +/******************************************************************//** +Print an ast node */ +void +fts_ast_node_print( +/*===============*/ + fts_ast_node_t* node) /*!< in: ast node to print */ +{ + fts_ast_node_print_recursive(node, 0); +} + +/** Check only union operation involved in the node +@param[in] node ast node to check +@return true if the node contains only union else false. */ +bool +fts_ast_node_check_union( + fts_ast_node_t* node) +{ + if (node->type == FTS_AST_LIST + || node->type == FTS_AST_SUBEXP_LIST) { + + for (node = node->list.head; node; node = node->next) { + if (!fts_ast_node_check_union(node)) { + return(false); + } + } + + } else if (node->type == FTS_AST_PARSER_PHRASE_LIST) { + /* Phrase search for plugin parser */ + return(false); + } else if (node->type == FTS_AST_OPER + && (node->oper == FTS_IGNORE + || node->oper == FTS_EXIST)) { + + return(false); + } else if (node->type == FTS_AST_TEXT) { + /* Distance or phrase search query. */ + return(false); + } + + return(true); +} + +/******************************************************************//** +Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE +nodes, which will be ignored in the first pass of each level, and visited in a +second and third pass after all other nodes in the same level are visited. +@return DB_SUCCESS if all went well */ +dberr_t +fts_ast_visit( +/*==========*/ + fts_ast_oper_t oper, /*!< in: current operator */ + fts_ast_node_t* node, /*!< in: current root node */ + fts_ast_callback visitor, /*!< in: callback function */ + void* arg, /*!< in: arg for callback */ + bool* has_ignore) /*!< out: true, if the operator + was ignored during processing, + currently we ignore FTS_EXIST + and FTS_IGNORE operators */ +{ + dberr_t error = DB_SUCCESS; + fts_ast_node_t* oper_node = NULL; + fts_ast_node_t* start_node; + bool revisit = false; + bool will_be_ignored = false; + fts_ast_visit_pass_t visit_pass = FTS_PASS_FIRST; + const trx_t* trx = node->trx; + + start_node = node->list.head; + + ut_a(node->type == FTS_AST_LIST + || node->type == FTS_AST_SUBEXP_LIST); + + if (oper == FTS_EXIST_SKIP) { + visit_pass = FTS_PASS_EXIST; + } else if (oper == FTS_IGNORE_SKIP) { + visit_pass = FTS_PASS_IGNORE; + } + + /* In the first pass of the tree, at the leaf level of the + tree, FTS_EXIST and FTS_IGNORE operation will be ignored. + It will be repeated at the level above the leaf level. + + The basic idea here is that when we encounter FTS_EXIST or + FTS_IGNORE, we will change the operator node into FTS_EXIST_SKIP + or FTS_IGNORE_SKIP, and term node & text node with the operators + is ignored in the first pass. We have two passes during the revisit: + We process nodes with FTS_EXIST_SKIP in the exist pass, and then + process nodes with FTS_IGNORE_SKIP in the ignore pass. + + The order should be restrictly followed, or we will get wrong results. + For example, we have a query 'a +b -c d +e -f'. + first pass: process 'a' and 'd' by union; + exist pass: process '+b' and '+e' by intersection; + ignore pass: process '-c' and '-f' by difference. */ + + for (node = node->list.head; + node && (error == DB_SUCCESS); + node = node->next) { + + switch (node->type) { + case FTS_AST_LIST: + if (visit_pass != FTS_PASS_FIRST) { + break; + } + + error = fts_ast_visit(oper, node, visitor, + arg, &will_be_ignored); + + /* If will_be_ignored is set to true, then + we encountered and ignored a FTS_EXIST or FTS_IGNORE + operator. */ + if (will_be_ignored) { + revisit = true; + /* Remember oper for list in case '-abc&def', + ignored oper is from previous node of list.*/ + node->oper = oper; + } + + break; + + case FTS_AST_OPER: + oper = node->oper; + oper_node = node; + + /* Change the operator for revisit */ + if (oper == FTS_EXIST) { + oper_node->oper = FTS_EXIST_SKIP; + } else if (oper == FTS_IGNORE) { + oper_node->oper = FTS_IGNORE_SKIP; + } + + break; + + default: + if (node->visited) { + continue; + } + + ut_a(oper == FTS_NONE || !oper_node + || oper_node->oper == oper + || oper_node->oper == FTS_EXIST_SKIP + || oper_node->oper == FTS_IGNORE_SKIP); + + if (oper== FTS_EXIST || oper == FTS_IGNORE) { + *has_ignore = true; + continue; + } + + /* Process leaf node accroding to its pass.*/ + if (oper == FTS_EXIST_SKIP + && visit_pass == FTS_PASS_EXIST) { + error = visitor(FTS_EXIST, node, arg); + node->visited = true; + } else if (oper == FTS_IGNORE_SKIP + && visit_pass == FTS_PASS_IGNORE) { + error = visitor(FTS_IGNORE, node, arg); + node->visited = true; + } else if (visit_pass == FTS_PASS_FIRST) { + error = visitor(oper, node, arg); + node->visited = true; + } + } + } + + if (trx_is_interrupted(trx)) { + return DB_INTERRUPTED; + } + + if (revisit) { + /* Exist pass processes the skipped FTS_EXIST operation. */ + for (node = start_node; + node && error == DB_SUCCESS; + node = node->next) { + + if (node->type == FTS_AST_LIST + && node->oper != FTS_IGNORE) { + error = fts_ast_visit(FTS_EXIST_SKIP, node, + visitor, arg, &will_be_ignored); + } + } + + /* Ignore pass processes the skipped FTS_IGNORE operation. */ + for (node = start_node; + node && error == DB_SUCCESS; + node = node->next) { + + if (node->type == FTS_AST_LIST) { + error = fts_ast_visit(FTS_IGNORE_SKIP, node, + visitor, arg, &will_be_ignored); + } + } + } + + return(error); +} + +/** +Create an ast string object, with NUL-terminator, so the string +has one more byte than len +@param[in] str pointer to string +@param[in] len length of the string +@return ast string with NUL-terminator */ +fts_ast_string_t* +fts_ast_string_create( + const byte* str, + ulint len) +{ + fts_ast_string_t* ast_str; + + ut_ad(len > 0); + + ast_str = static_cast<fts_ast_string_t*>( + ut_malloc_nokey(sizeof(fts_ast_string_t))); + + ast_str->str = static_cast<byte*>(ut_malloc_nokey(len + 1)); + + ast_str->len = len; + memcpy(ast_str->str, str, len); + ast_str->str[len] = '\0'; + + return(ast_str); +} + +/** +Free an ast string instance +@param[in,out] ast_str string to free */ +void +fts_ast_string_free( + fts_ast_string_t* ast_str) +{ + if (ast_str != NULL) { + ut_free(ast_str->str); + ut_free(ast_str); + } +} + +/** +Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul +@param[in] str string to translate +@param[in] base the base +@return translated number */ +ulint +fts_ast_string_to_ul( + const fts_ast_string_t* ast_str, + int base) +{ + return(strtoul(reinterpret_cast<const char*>(ast_str->str), + NULL, base)); +} + +#ifdef UNIV_DEBUG +const char* +fts_ast_node_type_get(fts_ast_type_t type) +{ + switch (type) { + case FTS_AST_OPER: + return("FTS_AST_OPER"); + case FTS_AST_NUMB: + return("FTS_AST_NUMB"); + case FTS_AST_TERM: + return("FTS_AST_TERM"); + case FTS_AST_TEXT: + return("FTS_AST_TEXT"); + case FTS_AST_LIST: + return("FTS_AST_LIST"); + case FTS_AST_SUBEXP_LIST: + return("FTS_AST_SUBEXP_LIST"); + case FTS_AST_PARSER_PHRASE_LIST: + return("FTS_AST_PARSER_PHRASE_LIST"); + } + ut_ad(0); + return("FTS_UNKNOWN"); +} +#endif /* UNIV_DEBUG */ diff --git a/storage/innobase/fts/fts0blex.cc b/storage/innobase/fts/fts0blex.cc new file mode 100644 index 00000000..6a2b4202 --- /dev/null +++ b/storage/innobase/fts/fts0blex.cc @@ -0,0 +1,2177 @@ +#include "univ.i" +#line 2 "fts0blex.cc" + +#line 4 "fts0blex.cc" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 6 +#define YY_FLEX_SUBMINOR_VERSION 4 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +#ifdef yy_create_buffer +#define fts0b_create_buffer_ALREADY_DEFINED +#else +#define yy_create_buffer fts0b_create_buffer +#endif + +#ifdef yy_delete_buffer +#define fts0b_delete_buffer_ALREADY_DEFINED +#else +#define yy_delete_buffer fts0b_delete_buffer +#endif + +#ifdef yy_scan_buffer +#define fts0b_scan_buffer_ALREADY_DEFINED +#else +#define yy_scan_buffer fts0b_scan_buffer +#endif + +#ifdef yy_scan_string +#define fts0b_scan_string_ALREADY_DEFINED +#else +#define yy_scan_string fts0b_scan_string +#endif + +#ifdef yy_scan_bytes +#define fts0b_scan_bytes_ALREADY_DEFINED +#else +#define yy_scan_bytes fts0b_scan_bytes +#endif + +#ifdef yy_init_buffer +#define fts0b_init_buffer_ALREADY_DEFINED +#else +#define yy_init_buffer fts0b_init_buffer +#endif + +#ifdef yy_flush_buffer +#define fts0b_flush_buffer_ALREADY_DEFINED +#else +#define yy_flush_buffer fts0b_flush_buffer +#endif + +#ifdef yy_load_buffer_state +#define fts0b_load_buffer_state_ALREADY_DEFINED +#else +#define yy_load_buffer_state fts0b_load_buffer_state +#endif + +#ifdef yy_switch_to_buffer +#define fts0b_switch_to_buffer_ALREADY_DEFINED +#else +#define yy_switch_to_buffer fts0b_switch_to_buffer +#endif + +#ifdef yypush_buffer_state +#define fts0bpush_buffer_state_ALREADY_DEFINED +#else +#define yypush_buffer_state fts0bpush_buffer_state +#endif + +#ifdef yypop_buffer_state +#define fts0bpop_buffer_state_ALREADY_DEFINED +#else +#define yypop_buffer_state fts0bpop_buffer_state +#endif + +#ifdef yyensure_buffer_stack +#define fts0bensure_buffer_stack_ALREADY_DEFINED +#else +#define yyensure_buffer_stack fts0bensure_buffer_stack +#endif + +#ifdef yylex +#define fts0blex_ALREADY_DEFINED +#else +#define yylex fts0blex +#endif + +#ifdef yyrestart +#define fts0brestart_ALREADY_DEFINED +#else +#define yyrestart fts0brestart +#endif + +#ifdef yylex_init +#define fts0blex_init_ALREADY_DEFINED +#else +#define yylex_init fts0blex_init +#endif + +#ifdef yylex_init_extra +#define fts0blex_init_extra_ALREADY_DEFINED +#else +#define yylex_init_extra fts0blex_init_extra +#endif + +#ifdef yylex_destroy +#define fts0blex_destroy_ALREADY_DEFINED +#else +#define yylex_destroy fts0blex_destroy +#endif + +#ifdef yyget_debug +#define fts0bget_debug_ALREADY_DEFINED +#else +#define yyget_debug fts0bget_debug +#endif + +#ifdef yyset_debug +#define fts0bset_debug_ALREADY_DEFINED +#else +#define yyset_debug fts0bset_debug +#endif + +#ifdef yyget_extra +#define fts0bget_extra_ALREADY_DEFINED +#else +#define yyget_extra fts0bget_extra +#endif + +#ifdef yyset_extra +#define fts0bset_extra_ALREADY_DEFINED +#else +#define yyset_extra fts0bset_extra +#endif + +#ifdef yyget_in +#define fts0bget_in_ALREADY_DEFINED +#else +#define yyget_in fts0bget_in +#endif + +#ifdef yyset_in +#define fts0bset_in_ALREADY_DEFINED +#else +#define yyset_in fts0bset_in +#endif + +#ifdef yyget_out +#define fts0bget_out_ALREADY_DEFINED +#else +#define yyget_out fts0bget_out +#endif + +#ifdef yyset_out +#define fts0bset_out_ALREADY_DEFINED +#else +#define yyset_out fts0bset_out +#endif + +#ifdef yyget_leng +#define fts0bget_leng_ALREADY_DEFINED +#else +#define yyget_leng fts0bget_leng +#endif + +#ifdef yyget_text +#define fts0bget_text_ALREADY_DEFINED +#else +#define yyget_text fts0bget_text +#endif + +#ifdef yyget_lineno +#define fts0bget_lineno_ALREADY_DEFINED +#else +#define yyget_lineno fts0bget_lineno +#endif + +#ifdef yyset_lineno +#define fts0bset_lineno_ALREADY_DEFINED +#else +#define yyset_lineno fts0bset_lineno +#endif + +#ifdef yyget_column +#define fts0bget_column_ALREADY_DEFINED +#else +#define yyget_column fts0bget_column +#endif + +#ifdef yyset_column +#define fts0bset_column_ALREADY_DEFINED +#else +#define yyset_column fts0bset_column +#endif + +#ifdef yywrap +#define fts0bwrap_ALREADY_DEFINED +#else +#define yywrap fts0bwrap +#endif + +#ifdef yyalloc +#define fts0balloc_ALREADY_DEFINED +#else +#define yyalloc fts0balloc +#endif + +#ifdef yyrealloc +#define fts0brealloc_ALREADY_DEFINED +#else +#define yyrealloc fts0brealloc +#endif + +#ifdef yyfree +#define fts0bfree_ALREADY_DEFINED +#else +#define yyfree fts0bfree +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include <inttypes.h> +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#ifndef SIZE_MAX +#define SIZE_MAX (~(size_t)0) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +/* begin standard C++ headers. */ + +/* TODO: this is always defined, so inline it */ +#define yyconst const + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define yynoreturn __attribute__((__noreturn__)) +#else +#define yynoreturn +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an + * integer in range [0..255] for use as an array index. + */ +#define YY_SC_TO_UI(c) ((YY_CHAR) (c)) + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yyg->yy_start = 1 + 2 * +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yyg->yy_start - 1) / 2) +#define YYSTATE YY_START +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin , yyscanner ) +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + #define YY_LESS_LINENO(n) + #define YY_LINENO_REWIND_TO(ptr) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = yyg->yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) +#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \ + ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \ + : 0) +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] + +void yyrestart ( FILE *input_file , yyscan_t yyscanner ); +void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size , yyscan_t yyscanner ); +void yy_delete_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner ); +void yy_flush_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner ); +void yypush_buffer_state ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner ); +void yypop_buffer_state ( yyscan_t yyscanner ); + +static void yyensure_buffer_stack ( yyscan_t yyscanner ); +static void yy_load_buffer_state ( yyscan_t yyscanner ); +static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file , yyscan_t yyscanner ); +#define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER , yyscanner) + +YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_string ( const char *yy_str , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len , yyscan_t yyscanner ); + +void *yyalloc ( yy_size_t , yyscan_t yyscanner ); +void *yyrealloc ( void *, yy_size_t , yyscan_t yyscanner ); +void yyfree ( void * , yyscan_t yyscanner ); + +#define yy_new_buffer yy_create_buffer +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ + +#define fts0bwrap(yyscanner) (/*CONSTCOND*/1) +#define YY_SKIP_YYWRAP +typedef flex_uint8_t YY_CHAR; + +typedef int yy_state_type; + +#define yytext_ptr yytext_r + +static yy_state_type yy_get_previous_state ( yyscan_t yyscanner ); +static yy_state_type yy_try_NUL_trans ( yy_state_type current_state , yyscan_t yyscanner); +static int yy_get_next_buffer ( yyscan_t yyscanner ); +static void yynoreturn yy_fatal_error ( const char* msg , yyscan_t yyscanner ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yyg->yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yyg->yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yyg->yy_c_buf_p = yy_cp; +#define YY_NUM_RULES 7 +#define YY_END_OF_BUFFER 8 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static const flex_int16_t yy_accept[19] = + { 0, + 4, 4, 8, 4, 1, 6, 1, 7, 7, 2, + 3, 4, 1, 1, 0, 5, 3, 0 + } ; + +static const YY_CHAR yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 4, 1, 5, 1, 1, 6, 1, 1, 7, + 7, 7, 7, 1, 7, 1, 1, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 1, 1, 7, + 1, 7, 1, 7, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static const YY_CHAR yy_meta[9] = + { 0, + 1, 2, 3, 4, 5, 5, 5, 1 + } ; + +static const flex_int16_t yy_base[22] = + { 0, + 0, 0, 22, 0, 7, 23, 0, 14, 23, 23, + 7, 0, 0, 0, 5, 23, 0, 23, 11, 12, + 16 + } ; + +static const flex_int16_t yy_def[22] = + { 0, + 18, 1, 18, 19, 19, 18, 20, 21, 18, 18, + 19, 19, 5, 20, 21, 18, 11, 0, 18, 18, + 18 + } ; + +static const flex_int16_t yy_nxt[32] = + { 0, + 4, 5, 6, 7, 8, 9, 10, 11, 13, 16, + 14, 12, 12, 14, 17, 14, 15, 15, 16, 15, + 15, 18, 3, 18, 18, 18, 18, 18, 18, 18, + 18 + } ; + +static const flex_int16_t yy_chk[32] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 5, 15, + 5, 19, 19, 20, 11, 20, 21, 21, 8, 21, + 21, 3, 18, 18, 18, 18, 18, 18, 18, 18, + 18 + } ; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +#line 1 "fts0blex.l" +/***************************************************************************** + +Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ +/** + * @file fts/fts0blex.l + * FTS parser lexical analyzer + * + * Created 2007/5/9 Sunny Bains + */ +#line 27 "fts0blex.l" + +#include "fts0ast.h" +#include "fts0pars.h" + +/* Required for reentrant parser */ +#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner) +#define exit(A) ut_error + +#line 675 "fts0blex.cc" +#define YY_NO_INPUT 1 +#line 677 "fts0blex.cc" + +#define INITIAL 0 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include <unistd.h> +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +/* Holds the entire state of the reentrant scanner. */ +struct yyguts_t + { + + /* User-defined. Not touched by flex. */ + YY_EXTRA_TYPE yyextra_r; + + /* The rest are the same as the globals declared in the non-reentrant scanner. */ + FILE *yyin_r, *yyout_r; + size_t yy_buffer_stack_top; /**< index of top of stack. */ + size_t yy_buffer_stack_max; /**< capacity of stack. */ + YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ + char yy_hold_char; + int yy_n_chars; + int yyleng_r; + char *yy_c_buf_p; + int yy_init; + int yy_start; + int yy_did_buffer_switch_on_eof; + int yy_start_stack_ptr; + int yy_start_stack_depth; + int *yy_start_stack; + yy_state_type yy_last_accepting_state; + char* yy_last_accepting_cpos; + + int yylineno_r; + int yy_flex_debug_r; + + char *yytext_r; + int yy_more_flag; + int yy_more_len; + + }; /* end struct yyguts_t */ + +static int yy_init_globals ( yyscan_t yyscanner ); + +int yylex_init (yyscan_t* scanner); + +int yylex_init_extra ( YY_EXTRA_TYPE user_defined, yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy ( yyscan_t yyscanner ); + +int yyget_debug ( yyscan_t yyscanner ); + +void yyset_debug ( int debug_flag , yyscan_t yyscanner ); + +YY_EXTRA_TYPE yyget_extra ( yyscan_t yyscanner ); + +void yyset_extra ( YY_EXTRA_TYPE user_defined , yyscan_t yyscanner ); + +FILE *yyget_in ( yyscan_t yyscanner ); + +void yyset_in ( FILE * _in_str , yyscan_t yyscanner ); + +FILE *yyget_out ( yyscan_t yyscanner ); + +void yyset_out ( FILE * _out_str , yyscan_t yyscanner ); + + int yyget_leng ( yyscan_t yyscanner ); + +char *yyget_text ( yyscan_t yyscanner ); + +int yyget_lineno ( yyscan_t yyscanner ); + +void yyset_lineno ( int _line_number , yyscan_t yyscanner ); + +int yyget_column ( yyscan_t yyscanner ); + +void yyset_column ( int _column_no , yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap ( yyscan_t yyscanner ); +#else +extern int yywrap ( yyscan_t yyscanner ); +#endif +#endif + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy ( char *, const char *, int , yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen ( const char * , yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput ( yyscan_t yyscanner ); +#else +static int input ( yyscan_t yyscanner ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + int n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex (yyscan_t yyscanner); + +#define YY_DECL int yylex (yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK /*LINTED*/break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + yy_state_type yy_current_state; + char *yy_cp, *yy_bp; + int yy_act; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( !yyg->yy_init ) + { + yyg->yy_init = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yyg->yy_start ) + yyg->yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); + } + + yy_load_buffer_state( yyscanner ); + } + + { +#line 44 "fts0blex.l" + + +#line 938 "fts0blex.cc" + + while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ + { + yy_cp = yyg->yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yyg->yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yyg->yy_start; +yy_match: + do + { + YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 19 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + ++yy_cp; + } + while ( yy_current_state != 18 ); + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + + YY_DO_BEFORE_ACTION; + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yyg->yy_hold_char; + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + +case 1: +YY_RULE_SETUP +#line 46 "fts0blex.l" +/* Ignore whitespace */ ; + YY_BREAK +case 2: +YY_RULE_SETUP +#line 48 "fts0blex.l" +{ + val->oper = fts0bget_text(yyscanner)[0]; + + return(val->oper); +} + YY_BREAK +case 3: +YY_RULE_SETUP +#line 54 "fts0blex.l" +{ + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); + + return(FTS_NUMB); +} + YY_BREAK +case 4: +YY_RULE_SETUP +#line 60 "fts0blex.l" +{ + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); + + return(FTS_TERM); +} + YY_BREAK +case 5: +YY_RULE_SETUP +#line 66 "fts0blex.l" +{ + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); + + return(FTS_TEXT); +} + YY_BREAK +case 6: +/* rule 6 can match eol */ +YY_RULE_SETUP +#line 72 "fts0blex.l" + + YY_BREAK +case 7: +YY_RULE_SETUP +#line 74 "fts0blex.l" +ECHO; + YY_BREAK +#line 1043 "fts0blex.cc" +case YY_STATE_EOF(INITIAL): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yyg->yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner); + + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yyg->yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_END_OF_FILE: + { + yyg->yy_did_buffer_switch_on_eof = 0; + + if ( yywrap( yyscanner ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = + yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yyg->yy_c_buf_p = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars]; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of user's declarations */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + char *source = yyg->yytext_ptr; + int number_to_move, i; + int ret_val; + + if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr - 1); + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; + + int yy_c_buf_p_offset = + (int) (yyg->yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yyrealloc( (void *) b->yy_ch_buf, + (yy_size_t) (b->yy_buf_size + 2) , yyscanner ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = NULL; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - + number_to_move - 1; + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + yyg->yy_n_chars, num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + if ( yyg->yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin , yyscanner); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + if ((yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { + /* Extend the array by 50%, plus the number we really need. */ + int new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc( + (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size , yyscanner ); + if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); + /* "- 2" to take care of EOB's */ + YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2); + } + + yyg->yy_n_chars += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (yyscan_t yyscanner) +{ + yy_state_type yy_current_state; + char *yy_cp; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_current_state = yyg->yy_start; + + for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp ) + { + YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 19 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner) +{ + int yy_is_jam; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */ + char *yy_cp = yyg->yy_c_buf_p; + + YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 19 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + yy_is_jam = (yy_current_state == 18); + + (void)yyg; + return yy_is_jam ? 0 : yy_current_state; +} + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (yyscan_t yyscanner) +#else + static int input (yyscan_t yyscanner) +#endif + +{ + int c; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + *yyg->yy_c_buf_p = yyg->yy_hold_char; + + if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + /* This was really a NUL. */ + *yyg->yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr); + ++yyg->yy_c_buf_p; + + switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin , yyscanner); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap( yyscanner ) ) + return 0; + + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(yyscanner); +#else + return input(yyscanner); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = yyg->yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */ + *yyg->yy_c_buf_p = '\0'; /* preserve yytext */ + yyg->yy_hold_char = *++yyg->yy_c_buf_p; + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * @param yyscanner The scanner object. + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! YY_CURRENT_BUFFER ){ + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); + } + + yy_init_buffer( YY_CURRENT_BUFFER, input_file , yyscanner); + yy_load_buffer_state( yyscanner ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * @param yyscanner The scanner object. + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (yyscanner); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state( yyscanner ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yyg->yy_did_buffer_switch_on_eof = 1; +} + +static void yy_load_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + yyg->yy_hold_char = *yyg->yy_c_buf_p; +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * @param yyscanner The scanner object. + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) , yyscanner ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file , yyscanner); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * @param yyscanner The scanner object. + */ + void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree( (void *) b->yy_ch_buf , yyscanner ); + + yyfree( (void *) b , yyscanner ); +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner) + +{ + int oerrno = errno; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_flush_buffer( b , yyscanner); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * @param yyscanner The scanner object. + */ + void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state( yyscanner ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * @param yyscanner The scanner object. + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(yyscanner); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + yyg->yy_buffer_stack_top++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state( yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * @param yyscanner The scanner object. + */ +void yypop_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER , yyscanner); + YY_CURRENT_BUFFER_LVALUE = NULL; + if (yyg->yy_buffer_stack_top > 0) + --yyg->yy_buffer_stack_top; + + if (YY_CURRENT_BUFFER) { + yy_load_buffer_state( yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (yyscan_t yyscanner) +{ + yy_size_t num_to_alloc; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (!yyg->yy_buffer_stack) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */ + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + yyg->yy_buffer_stack_max = num_to_alloc; + yyg->yy_buffer_stack_top = 0; + return; + } + + if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + yy_size_t grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = yyg->yy_buffer_stack_max + grow_size; + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc + (yyg->yy_buffer_stack, + num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + /* zero only the new slots.*/ + memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*)); + yyg->yy_buffer_stack_max = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return NULL; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = NULL; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b , yyscanner ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param yystr a NUL-terminated string to scan + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (const char * yystr , yyscan_t yyscanner) +{ + + return yy_scan_bytes( yystr, (int) strlen(yystr) , yyscanner); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = (yy_size_t) (_yybytes_len + 2); + buf = (char *) yyalloc( n , yyscanner ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n , yyscanner); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yynoreturn yy_fatal_error (const char* msg , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = yyg->yy_hold_char; \ + yyg->yy_c_buf_p = yytext + yyless_macro_arg; \ + yyg->yy_hold_char = *yyg->yy_c_buf_p; \ + *yyg->yy_c_buf_p = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the user-defined data for this scanner. + * @param yyscanner The scanner object. + */ +YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyextra; +} + +/** Get the current line number. + * @param yyscanner The scanner object. + */ +int yyget_lineno (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yylineno; +} + +/** Get the current column number. + * @param yyscanner The scanner object. + */ +int yyget_column (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yycolumn; +} + +/** Get the input stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_in (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyin; +} + +/** Get the output stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_out (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyout; +} + +/** Get the length of the current token. + * @param yyscanner The scanner object. + */ +int yyget_leng (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyleng; +} + +/** Get the current token. + * @param yyscanner The scanner object. + */ + +char *yyget_text (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yytext; +} + +/** Set the user-defined data. This data is never touched by the scanner. + * @param user_defined The data to be associated with this scanner. + * @param yyscanner The scanner object. + */ +void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyextra = user_defined ; +} + +/** Set the current line number. + * @param _line_number line number + * @param yyscanner The scanner object. + */ +void yyset_lineno (int _line_number , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* lineno is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_lineno called with no buffer" ); + + yylineno = _line_number; +} + +/** Set the current column. + * @param _column_no column number + * @param yyscanner The scanner object. + */ +void yyset_column (int _column_no , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* column is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_column called with no buffer" ); + + yycolumn = _column_no; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param _in_str A readable stream. + * @param yyscanner The scanner object. + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * _in_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyin = _in_str ; +} + +void yyset_out (FILE * _out_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyout = _out_str ; +} + +int yyget_debug (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yy_flex_debug; +} + +void yyset_debug (int _bdebug , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yy_flex_debug = _bdebug ; +} + +/* Accessor methods for yylval and yylloc */ + +/* User-visible API */ + +/* yylex_init is special because it creates the scanner itself, so it is + * the ONLY reentrant function that doesn't take the scanner as the last argument. + * That's why we explicitly handle the declaration, instead of using our macros. + */ +int yylex_init(yyscan_t* ptr_yy_globals) +{ + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + return yy_init_globals ( *ptr_yy_globals ); +} + +/* yylex_init_extra has the same functionality as yylex_init, but follows the + * convention of taking the scanner as the last argument. Note however, that + * this is a *pointer* to a scanner, as it will be allocated by this call (and + * is the reason, too, why this function also must handle its own declaration). + * The user defined value in the first argument will be available to yyalloc in + * the yyextra field. + */ +int yylex_init_extra( YY_EXTRA_TYPE yy_user_defined, yyscan_t* ptr_yy_globals ) +{ + struct yyguts_t dummy_yyguts; + + yyset_extra (yy_user_defined, &dummy_yyguts); + + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in + yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + yyset_extra (yy_user_defined, *ptr_yy_globals); + + return yy_init_globals ( *ptr_yy_globals ); +} + +static int yy_init_globals (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + yyg->yy_buffer_stack = NULL; + yyg->yy_buffer_stack_top = 0; + yyg->yy_buffer_stack_max = 0; + yyg->yy_c_buf_p = NULL; + yyg->yy_init = 0; + yyg->yy_start = 0; + + yyg->yy_start_stack_ptr = 0; + yyg->yy_start_stack_depth = 0; + yyg->yy_start_stack = NULL; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = NULL; + yyout = NULL; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer( YY_CURRENT_BUFFER , yyscanner ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(yyscanner); + } + + /* Destroy the stack itself. */ + yyfree(yyg->yy_buffer_stack , yyscanner); + yyg->yy_buffer_stack = NULL; + + /* Destroy the start condition stack. */ + yyfree( yyg->yy_start_stack , yyscanner ); + yyg->yy_start_stack = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( yyscanner); + + /* Destroy the main struct (reentrant only). */ + yyfree ( yyscanner , yyscanner ); + yyscanner = NULL; + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, const char * s2, int n , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + + int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (const char * s , yyscan_t yyscanner) +{ + int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +void *yyalloc (yy_size_t size , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + return malloc(size); +} + +void *yyrealloc (void * ptr, yy_size_t size , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return realloc(ptr, size); +} + +void yyfree (void * ptr , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ +} + +#define YYTABLES_NAME "yytables" + +#line 74 "fts0blex.l" + + diff --git a/storage/innobase/fts/fts0blex.l b/storage/innobase/fts/fts0blex.l new file mode 100644 index 00000000..cf19cd0f --- /dev/null +++ b/storage/innobase/fts/fts0blex.l @@ -0,0 +1,74 @@ +/***************************************************************************** + +Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/** + * @file fts/fts0blex.l + * FTS parser lexical analyzer + * + * Created 2007/5/9 Sunny Bains + */ + +%{ + +#include "fts0ast.h" +#include "fts0pars.h" + +/* Required for reentrant parser */ +#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner) +#define exit(A) ut_error + +%} + +%option noinput +%option nounput +%option noyywrap +%option nostdinit +%option reentrant +%option never-interactive + +%% + +[\t ]+ /* Ignore whitespace */ ; + +[*()+\-<>~@] { + val->oper = fts0bget_text(yyscanner)[0]; + + return(val->oper); +} + +[0-9]+ { + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); + + return(FTS_NUMB); +} + +[^" \n*()+\-<>~@%]* { + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); + + return(FTS_TERM); +} + +\"[^\"\n]*\" { + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); + + return(FTS_TEXT); +} + +\n + +%% diff --git a/storage/innobase/fts/fts0config.cc b/storage/innobase/fts/fts0config.cc new file mode 100644 index 00000000..4566224e --- /dev/null +++ b/storage/innobase/fts/fts0config.cc @@ -0,0 +1,428 @@ +/***************************************************************************** + +Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, 2021, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file fts/fts0config.cc +Full Text Search configuration table. + +Created 2007/5/9 Sunny Bains +***********************************************************************/ + +#include "trx0roll.h" +#include "row0sel.h" + +#include "fts0priv.h" + +/******************************************************************//** +Callback function for fetching the config value. +@return always returns TRUE */ +static +ibool +fts_config_fetch_value( +/*===================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: pointer to + ib_vector_t */ +{ + sel_node_t* node = static_cast<sel_node_t*>(row); + fts_string_t* value = static_cast<fts_string_t*>(user_arg); + + dfield_t* dfield = que_node_get_val(node->select_list); + dtype_t* type = dfield_get_type(dfield); + ulint len = dfield_get_len(dfield); + void* data = dfield_get_data(dfield); + + ut_a(dtype_get_mtype(type) == DATA_VARCHAR); + + if (len != UNIV_SQL_NULL) { + ulint max_len = ut_min(value->f_len - 1, len); + + memcpy(value->f_str, data, max_len); + value->f_len = max_len; + value->f_str[value->f_len] = '\0'; + } + + return(TRUE); +} + +/******************************************************************//** +Get value from the config table. The caller must ensure that enough +space is allocated for value to hold the column contents. +@return DB_SUCCESS or error code */ +dberr_t +fts_config_get_value( +/*=================*/ + trx_t* trx, /*!< transaction */ + fts_table_t* fts_table, /*!< in: the indexed + FTS table */ + const char* name, /*!< in: get config value for + this parameter name */ + fts_string_t* value) /*!< out: value read from + config table */ +{ + pars_info_t* info; + que_t* graph; + dberr_t error; + ulint name_len = strlen(name); + char table_name[MAX_FULL_NAME_LEN]; + + info = pars_info_create(); + + *value->f_str = '\0'; + ut_a(value->f_len > 0); + + pars_info_bind_function(info, "my_func", fts_config_fetch_value, + value); + + /* The len field of value must be set to the max bytes that + it can hold. On a successful read, the len field will be set + to the actual number of bytes copied to value. */ + pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len); + + fts_table->suffix = "CONFIG"; + fts_get_table_name(fts_table, table_name); + pars_info_bind_id(info, "table_name", table_name); + + graph = fts_parse_sql( + fts_table, + info, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS SELECT value FROM $table_name" + " WHERE key = :name;\n" + "BEGIN\n" + "" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;"); + + trx->op_info = "getting FTS config value"; + + error = fts_eval_sql(trx, graph); + que_graph_free(graph); + return(error); +} + +/*********************************************************************//** +Create the config table name for retrieving index specific value. +@return index config parameter name */ +char* +fts_config_create_index_param_name( +/*===============================*/ + const char* param, /*!< in: base name of param */ + const dict_index_t* index) /*!< in: index for config */ +{ + ulint len; + char* name; + + /* The format of the config name is: name_<index_id>. */ + len = strlen(param); + + /* Caller is responsible for deleting name. */ + name = static_cast<char*>(ut_malloc_nokey( + len + FTS_AUX_MIN_TABLE_ID_LENGTH + 2)); + ::strcpy(name, param); + name[len] = '_'; + + fts_write_object_id(index->id, name + len + 1); + + return(name); +} + +/******************************************************************//** +Get value specific to an FTS index from the config table. The caller +must ensure that enough space is allocated for value to hold the +column contents. +@return DB_SUCCESS or error code */ +dberr_t +fts_config_get_index_value( +/*=======================*/ + trx_t* trx, /*!< transaction */ + dict_index_t* index, /*!< in: index */ + const char* param, /*!< in: get config value for + this parameter name */ + fts_string_t* value) /*!< out: value read from + config table */ +{ + char* name; + dberr_t error; + fts_table_t fts_table; + + FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, + index->table); + + /* We are responsible for free'ing name. */ + name = fts_config_create_index_param_name(param, index); + + error = fts_config_get_value(trx, &fts_table, name, value); + + ut_free(name); + + return(error); +} + +/******************************************************************//** +Set the value in the config table for name. +@return DB_SUCCESS or error code */ +dberr_t +fts_config_set_value( +/*=================*/ + trx_t* trx, /*!< transaction */ + fts_table_t* fts_table, /*!< in: the indexed + FTS table */ + const char* name, /*!< in: get config value for + this parameter name */ + const fts_string_t* + value) /*!< in: value to update */ +{ + pars_info_t* info; + que_t* graph; + dberr_t error; + undo_no_t undo_no; + undo_no_t n_rows_updated; + ulint name_len = strlen(name); + char table_name[MAX_FULL_NAME_LEN]; + + info = pars_info_create(); + + pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len); + pars_info_bind_varchar_literal(info, "value", + value->f_str, value->f_len); + + const bool dict_locked = fts_table->table->fts->dict_locked; + + fts_table->suffix = "CONFIG"; + fts_get_table_name(fts_table, table_name, dict_locked); + pars_info_bind_id(info, "table_name", table_name); + + graph = fts_parse_sql( + fts_table, info, + "BEGIN UPDATE $table_name SET value = :value" + " WHERE key = :name;"); + + trx->op_info = "setting FTS config value"; + + undo_no = trx->undo_no; + + error = fts_eval_sql(trx, graph); + + que_graph_free(graph); + + n_rows_updated = trx->undo_no - undo_no; + + /* Check if we need to do an insert. */ + if (n_rows_updated == 0) { + info = pars_info_create(); + + pars_info_bind_varchar_literal( + info, "name", (byte*) name, name_len); + + pars_info_bind_varchar_literal( + info, "value", value->f_str, value->f_len); + + fts_get_table_name(fts_table, table_name, dict_locked); + pars_info_bind_id(info, "table_name", table_name); + + graph = fts_parse_sql( + fts_table, info, + "BEGIN\n" + "INSERT INTO $table_name VALUES(:name, :value);"); + + trx->op_info = "inserting FTS config value"; + + error = fts_eval_sql(trx, graph); + + que_graph_free(graph); + } + + return(error); +} + +/******************************************************************//** +Set the value specific to an FTS index in the config table. +@return DB_SUCCESS or error code */ +dberr_t +fts_config_set_index_value( +/*=======================*/ + trx_t* trx, /*!< transaction */ + dict_index_t* index, /*!< in: index */ + const char* param, /*!< in: get config value for + this parameter name */ + fts_string_t* value) /*!< out: value read from + config table */ +{ + char* name; + dberr_t error; + fts_table_t fts_table; + + FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, + index->table); + + /* We are responsible for free'ing name. */ + name = fts_config_create_index_param_name(param, index); + + error = fts_config_set_value(trx, &fts_table, name, value); + + ut_free(name); + + return(error); +} + +#ifdef FTS_OPTIMIZE_DEBUG +/******************************************************************//** +Get an ulint value from the config table. +@return DB_SUCCESS if all OK else error code */ +dberr_t +fts_config_get_index_ulint( +/*=======================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS index */ + const char* name, /*!< in: param name */ + ulint* int_value) /*!< out: value */ +{ + dberr_t error; + fts_string_t value; + + /* We set the length of value to the max bytes it can hold. This + information is used by the callback that reads the value.*/ + value.f_len = FTS_MAX_CONFIG_VALUE_LEN; + value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1)); + + error = fts_config_get_index_value(trx, index, name, &value); + + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { + ib::error() << "(" << error << ") reading `" << name << "'"; + } else { + *int_value = strtoul((char*) value.f_str, NULL, 10); + } + + ut_free(value.f_str); + + return(error); +} + +/******************************************************************//** +Set an ulint value in the config table. +@return DB_SUCCESS if all OK else error code */ +dberr_t +fts_config_set_index_ulint( +/*=======================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS index */ + const char* name, /*!< in: param name */ + ulint int_value) /*!< in: value */ +{ + dberr_t error; + fts_string_t value; + + /* We set the length of value to the max bytes it can hold. This + information is used by the callback that reads the value.*/ + value.f_len = FTS_MAX_CONFIG_VALUE_LEN; + value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1)); + + // FIXME: Get rid of snprintf + ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN); + + value.f_len = snprintf( + (char*) value.f_str, FTS_MAX_INT_LEN, ULINTPF, int_value); + + error = fts_config_set_index_value(trx, index, name, &value); + + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { + ib::error() << "(" << error << ") writing `" << name << "'"; + } + + ut_free(value.f_str); + + return(error); +} +#endif /* FTS_OPTIMIZE_DEBUG */ + +/******************************************************************//** +Get an ulint value from the config table. +@return DB_SUCCESS if all OK else error code */ +dberr_t +fts_config_get_ulint( +/*=================*/ + trx_t* trx, /*!< in: transaction */ + fts_table_t* fts_table, /*!< in: the indexed + FTS table */ + const char* name, /*!< in: param name */ + ulint* int_value) /*!< out: value */ +{ + dberr_t error; + fts_string_t value; + + /* We set the length of value to the max bytes it can hold. This + information is used by the callback that reads the value.*/ + value.f_len = FTS_MAX_CONFIG_VALUE_LEN; + value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1)); + + error = fts_config_get_value(trx, fts_table, name, &value); + + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { + ib::error() << "(" << error << ") reading `" << name << "'"; + } else { + *int_value = strtoul((char*) value.f_str, NULL, 10); + } + + ut_free(value.f_str); + + return(error); +} + +/******************************************************************//** +Set an ulint value in the config table. +@return DB_SUCCESS if all OK else error code */ +dberr_t +fts_config_set_ulint( +/*=================*/ + trx_t* trx, /*!< in: transaction */ + fts_table_t* fts_table, /*!< in: the indexed + FTS table */ + const char* name, /*!< in: param name */ + ulint int_value) /*!< in: value */ +{ + dberr_t error; + fts_string_t value; + + /* We set the length of value to the max bytes it can hold. This + information is used by the callback that reads the value.*/ + value.f_len = FTS_MAX_CONFIG_VALUE_LEN; + value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1)); + + ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN); + + value.f_len = (ulint) snprintf( + (char*) value.f_str, FTS_MAX_INT_LEN, ULINTPF, int_value); + + error = fts_config_set_value(trx, fts_table, name, &value); + + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { + ib::error() << "(" << error << ") writing `" << name << "'"; + } + + ut_free(value.f_str); + + return(error); +} diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc new file mode 100644 index 00000000..0775d939 --- /dev/null +++ b/storage/innobase/fts/fts0fts.cc @@ -0,0 +1,6182 @@ +/***************************************************************************** + +Copyright (c) 2011, 2021, Oracle and/or its affiliates. +Copyright (c) 2016, 2023, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file fts/fts0fts.cc +Full Text Search interface +***********************************************************************/ + +#include "trx0roll.h" +#include "trx0purge.h" +#include "row0mysql.h" +#include "row0upd.h" +#include "dict0types.h" +#include "dict0stats_bg.h" +#include "row0sel.h" +#include "fts0fts.h" +#include "fts0priv.h" +#include "fts0types.h" +#include "fts0types.inl" +#include "fts0vlc.h" +#include "fts0plugin.h" +#include "dict0stats.h" +#include "btr0pcur.h" + +static const ulint FTS_MAX_ID_LEN = 32; + +/** Column name from the FTS config table */ +#define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb" + +/** Verify if a aux table name is a obsolete table +by looking up the key word in the obsolete table names */ +#define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \ + (strstr((table_name), "DOC_ID") != NULL \ + || strstr((table_name), "ADDED") != NULL \ + || strstr((table_name), "STOPWORDS") != NULL) + +/** This is maximum FTS cache for each table and would be +a configurable variable */ +Atomic_relaxed<size_t> fts_max_cache_size; + +/** Whether the total memory used for FTS cache is exhausted, and we will +need a sync to free some memory */ +bool fts_need_sync = false; + +/** Variable specifying the total memory allocated for FTS cache */ +Atomic_relaxed<size_t> fts_max_total_cache_size; + +/** This is FTS result cache limit for each query and would be +a configurable variable */ +size_t fts_result_cache_limit; + +/** Variable specifying the maximum FTS max token size */ +ulong fts_max_token_size; + +/** Variable specifying the minimum FTS max token size */ +ulong fts_min_token_size; + + +// FIXME: testing +static time_t elapsed_time; +static ulint n_nodes; + +#ifdef FTS_CACHE_SIZE_DEBUG +/** The cache size permissible lower limit (1K) */ +static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1; + +/** The cache size permissible upper limit (1G) */ +static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024; +#endif + +/** Time to sleep after DEADLOCK error before retrying operation. */ +static const std::chrono::milliseconds FTS_DEADLOCK_RETRY_WAIT(100); + +/** InnoDB default stopword list: +There are different versions of stopwords, the stop words listed +below comes from "Google Stopword" list. Reference: +http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list. +The final version of InnoDB default stopword list is still pending +for decision */ +const char *fts_default_stopword[] = +{ + "a", + "about", + "an", + "are", + "as", + "at", + "be", + "by", + "com", + "de", + "en", + "for", + "from", + "how", + "i", + "in", + "is", + "it", + "la", + "of", + "on", + "or", + "that", + "the", + "this", + "to", + "was", + "what", + "when", + "where", + "who", + "will", + "with", + "und", + "the", + "www", + NULL +}; + +/** FTS auxiliary table suffixes that are common to all FT indexes. */ +const char* fts_common_tables[] = { + "BEING_DELETED", + "BEING_DELETED_CACHE", + "CONFIG", + "DELETED", + "DELETED_CACHE", + NULL +}; + +/** FTS auxiliary INDEX split intervals. */ +const fts_index_selector_t fts_index_selector[] = { + { 9, "INDEX_1" }, + { 65, "INDEX_2" }, + { 70, "INDEX_3" }, + { 75, "INDEX_4" }, + { 80, "INDEX_5" }, + { 85, "INDEX_6" }, + { 0 , NULL } +}; + +/** Default config values for FTS indexes on a table. */ +static const char* fts_config_table_insert_values_sql = + "PROCEDURE P() IS\n" + "BEGIN\n" + "\n" + "INSERT INTO $config_table VALUES('" + FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n" + "" + "INSERT INTO $config_table VALUES('" + FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n" + "" + "INSERT INTO $config_table VALUES ('" + FTS_SYNCED_DOC_ID "', '0');\n" + "" + "INSERT INTO $config_table VALUES ('" + FTS_TOTAL_DELETED_COUNT "', '0');\n" + "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */ + "INSERT INTO $config_table VALUES ('" + FTS_TABLE_STATE "', '0');\n" + "END;\n"; + +/** FTS tokenize parmameter for plugin parser */ +struct fts_tokenize_param_t { + fts_doc_t* result_doc; /*!< Result doc for tokens */ + ulint add_pos; /*!< Added position for tokens */ +}; + +/** Run SYNC on the table, i.e., write out data from the cache to the +FTS auxiliary INDEX table and clear the cache at the end. +@param[in,out] sync sync state +@param[in] unlock_cache whether unlock cache lock when write node +@param[in] wait whether wait when a sync is in progress +@return DB_SUCCESS if all OK */ +static +dberr_t +fts_sync( + fts_sync_t* sync, + bool unlock_cache, + bool wait); + +/****************************************************************//** +Release all resources help by the words rb tree e.g., the node ilist. */ +static +void +fts_words_free( +/*===========*/ + ib_rbt_t* words) /*!< in: rb tree of words */ + MY_ATTRIBUTE((nonnull)); +#ifdef FTS_CACHE_SIZE_DEBUG +/****************************************************************//** +Read the max cache size parameter from the config table. */ +static +void +fts_update_max_cache_size( +/*======================*/ + fts_sync_t* sync); /*!< in: sync state */ +#endif + +/*********************************************************************//** +This function fetches the document just inserted right before +we commit the transaction, and tokenize the inserted text data +and insert into FTS auxiliary table and its cache. */ +static +void +fts_add_doc_by_id( +/*==============*/ + fts_trx_table_t*ftt, /*!< in: FTS trx table */ + doc_id_t doc_id); /*!< in: doc id */ + +/** Tokenize a document. +@param[in,out] doc document to tokenize +@param[out] result tokenization result +@param[in] parser pluggable parser */ +static +void +fts_tokenize_document( + fts_doc_t* doc, + fts_doc_t* result, + st_mysql_ftparser* parser); + +/** Continue to tokenize a document. +@param[in,out] doc document to tokenize +@param[in] add_pos add this position to all tokens from this tokenization +@param[out] result tokenization result +@param[in] parser pluggable parser */ +static +void +fts_tokenize_document_next( + fts_doc_t* doc, + ulint add_pos, + fts_doc_t* result, + st_mysql_ftparser* parser); + +/** Create the vector of fts_get_doc_t instances. +@param[in,out] cache fts cache +@return vector of fts_get_doc_t instances */ +static +ib_vector_t* +fts_get_docs_create( + fts_cache_t* cache); + +/** Free the FTS cache. +@param[in,out] cache to be freed */ +static +void +fts_cache_destroy(fts_cache_t* cache) +{ + mysql_mutex_destroy(&cache->lock); + mysql_mutex_destroy(&cache->init_lock); + mysql_mutex_destroy(&cache->deleted_lock); + mysql_mutex_destroy(&cache->doc_id_lock); + pthread_cond_destroy(&cache->sync->cond); + + if (cache->stopword_info.cached_stopword) { + rbt_free(cache->stopword_info.cached_stopword); + } + + if (cache->sync_heap->arg) { + mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg)); + } + + mem_heap_free(cache->cache_heap); +} + +/** Get a character set based on precise type. +@param prtype precise type +@return the corresponding character set */ +UNIV_INLINE +CHARSET_INFO* +fts_get_charset(ulint prtype) +{ +#ifdef UNIV_DEBUG + switch (prtype & DATA_MYSQL_TYPE_MASK) { + case MYSQL_TYPE_BIT: + case MYSQL_TYPE_STRING: + case MYSQL_TYPE_VAR_STRING: + case MYSQL_TYPE_TINY_BLOB: + case MYSQL_TYPE_MEDIUM_BLOB: + case MYSQL_TYPE_BLOB: + case MYSQL_TYPE_LONG_BLOB: + case MYSQL_TYPE_VARCHAR: + break; + default: + ut_error; + } +#endif /* UNIV_DEBUG */ + + uint cs_num = (uint) dtype_get_charset_coll(prtype); + + if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) { + return(cs); + } + + ib::fatal() << "Unable to find charset-collation " << cs_num; + return(NULL); +} + +/****************************************************************//** +This function loads the default InnoDB stopword list */ +static +void +fts_load_default_stopword( +/*======================*/ + fts_stopword_t* stopword_info) /*!< in: stopword info */ +{ + fts_string_t str; + mem_heap_t* heap; + ib_alloc_t* allocator; + ib_rbt_t* stop_words; + + allocator = stopword_info->heap; + heap = static_cast<mem_heap_t*>(allocator->arg); + + if (!stopword_info->cached_stopword) { + stopword_info->cached_stopword = rbt_create_arg_cmp( + sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp, + &my_charset_latin1); + } + + stop_words = stopword_info->cached_stopword; + + str.f_n_char = 0; + + for (ulint i = 0; fts_default_stopword[i]; ++i) { + char* word; + fts_tokenizer_word_t new_word; + + /* We are going to duplicate the value below. */ + word = const_cast<char*>(fts_default_stopword[i]); + + new_word.nodes = ib_vector_create( + allocator, sizeof(fts_node_t), 4); + + str.f_len = strlen(word); + str.f_str = reinterpret_cast<byte*>(word); + + fts_string_dup(&new_word.text, &str, heap); + + rbt_insert(stop_words, &new_word, &new_word); + } + + stopword_info->status = STOPWORD_FROM_DEFAULT; +} + +/****************************************************************//** +Callback function to read a single stopword value. +@return Always return TRUE */ +static +ibool +fts_read_stopword( +/*==============*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: pointer to ib_vector_t */ +{ + ib_alloc_t* allocator; + fts_stopword_t* stopword_info; + sel_node_t* sel_node; + que_node_t* exp; + ib_rbt_t* stop_words; + dfield_t* dfield; + fts_string_t str; + mem_heap_t* heap; + ib_rbt_bound_t parent; + dict_table_t* table; + + sel_node = static_cast<sel_node_t*>(row); + table = sel_node->table_list->table; + stopword_info = static_cast<fts_stopword_t*>(user_arg); + + stop_words = stopword_info->cached_stopword; + allocator = static_cast<ib_alloc_t*>(stopword_info->heap); + heap = static_cast<mem_heap_t*>(allocator->arg); + + exp = sel_node->select_list; + + /* We only need to read the first column */ + dfield = que_node_get_val(exp); + + str.f_n_char = 0; + str.f_str = static_cast<byte*>(dfield_get_data(dfield)); + str.f_len = dfield_get_len(dfield); + exp = que_node_get_next(exp); + ut_ad(exp); + + if (table->versioned()) { + dfield = que_node_get_val(exp); + ut_ad(dfield_get_type(dfield)->vers_sys_end()); + void* data = dfield_get_data(dfield); + ulint len = dfield_get_len(dfield); + if (table->versioned_by_id()) { + ut_ad(len == sizeof trx_id_max_bytes); + if (0 != memcmp(data, trx_id_max_bytes, len)) { + return true; + } + } else { + ut_ad(len == sizeof timestamp_max_bytes); + if (0 != memcmp(data, timestamp_max_bytes, len)) { + return true; + } + } + } + ut_ad(!que_node_get_next(exp)); + + /* Only create new node if it is a value not already existed */ + if (str.f_len != UNIV_SQL_NULL + && rbt_search(stop_words, &parent, &str) != 0) { + + fts_tokenizer_word_t new_word; + + new_word.nodes = ib_vector_create( + allocator, sizeof(fts_node_t), 4); + + new_word.text.f_str = static_cast<byte*>( + mem_heap_alloc(heap, str.f_len + 1)); + + memcpy(new_word.text.f_str, str.f_str, str.f_len); + + new_word.text.f_n_char = 0; + new_word.text.f_len = str.f_len; + new_word.text.f_str[str.f_len] = 0; + + rbt_insert(stop_words, &new_word, &new_word); + } + + return(TRUE); +} + +/******************************************************************//** +Load user defined stopword from designated user table +@return whether the operation is successful */ +static +bool +fts_load_user_stopword( +/*===================*/ + fts_t* fts, /*!< in: FTS struct */ + const char* stopword_table_name, /*!< in: Stopword table + name */ + fts_stopword_t* stopword_info) /*!< in: Stopword info */ +{ + if (!fts->dict_locked) { + dict_sys.lock(SRW_LOCK_CALL); + } + + /* Validate the user table existence in the right format */ + bool ret= false; + const char* row_end; + stopword_info->charset = fts_valid_stopword_table(stopword_table_name, + &row_end); + if (!stopword_info->charset) { +cleanup: + if (!fts->dict_locked) { + dict_sys.unlock(); + } + + return ret; + } + + trx_t* trx = trx_create(); + trx->op_info = "Load user stopword table into FTS cache"; + + if (!stopword_info->cached_stopword) { + /* Create the stopword RB tree with the stopword column + charset. All comparison will use this charset */ + stopword_info->cached_stopword = rbt_create_arg_cmp( + sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp, + (void*)stopword_info->charset); + + } + + pars_info_t* info = pars_info_create(); + + pars_info_bind_id(info, "table_stopword", stopword_table_name); + pars_info_bind_id(info, "row_end", row_end); + + pars_info_bind_function(info, "my_func", fts_read_stopword, + stopword_info); + + que_t* graph = pars_sql( + info, + "PROCEDURE P() IS\n" + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT value, $row_end" + " FROM $table_stopword;\n" + "BEGIN\n" + "\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;" + "END;\n"); + + for (;;) { + dberr_t error = fts_eval_sql(trx, graph); + + if (UNIV_LIKELY(error == DB_SUCCESS)) { + fts_sql_commit(trx); + stopword_info->status = STOPWORD_USER_TABLE; + break; + } else { + fts_sql_rollback(trx); + + if (error == DB_LOCK_WAIT_TIMEOUT) { + ib::warn() << "Lock wait timeout reading user" + " stopword table. Retrying!"; + + trx->error_state = DB_SUCCESS; + } else { + ib::error() << "Error '" << error + << "' while reading user stopword" + " table."; + ret = FALSE; + break; + } + } + } + + que_graph_free(graph); + trx->free(); + ret = true; + goto cleanup; +} + +/******************************************************************//** +Initialize the index cache. */ +static +void +fts_index_cache_init( +/*=================*/ + ib_alloc_t* allocator, /*!< in: the allocator to use */ + fts_index_cache_t* index_cache) /*!< in: index cache */ +{ + ulint i; + + ut_a(index_cache->words == NULL); + + index_cache->words = rbt_create_arg_cmp( + sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp, + (void*) index_cache->charset); + + ut_a(index_cache->doc_stats == NULL); + + index_cache->doc_stats = ib_vector_create( + allocator, sizeof(fts_doc_stats_t), 4); + + for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) { + ut_a(index_cache->ins_graph[i] == NULL); + ut_a(index_cache->sel_graph[i] == NULL); + } +} + +/*********************************************************************//** +Initialize FTS cache. */ +void +fts_cache_init( +/*===========*/ + fts_cache_t* cache) /*!< in: cache to initialize */ +{ + ulint i; + + /* Just to make sure */ + ut_a(cache->sync_heap->arg == NULL); + + cache->sync_heap->arg = mem_heap_create(1024); + + cache->total_size = 0; + cache->total_size_at_sync = 0; + + mysql_mutex_lock(&cache->deleted_lock); + cache->deleted_doc_ids = ib_vector_create( + cache->sync_heap, sizeof(doc_id_t), 4); + mysql_mutex_unlock(&cache->deleted_lock); + + /* Reset the cache data for all the FTS indexes. */ + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + fts_index_cache_init(cache->sync_heap, index_cache); + } +} + +/****************************************************************//** +Create a FTS cache. */ +fts_cache_t* +fts_cache_create( +/*=============*/ + dict_table_t* table) /*!< in: table owns the FTS cache */ +{ + mem_heap_t* heap; + fts_cache_t* cache; + + heap = static_cast<mem_heap_t*>(mem_heap_create(512)); + + cache = static_cast<fts_cache_t*>( + mem_heap_zalloc(heap, sizeof(*cache))); + + cache->cache_heap = heap; + + mysql_mutex_init(fts_cache_mutex_key, &cache->lock, nullptr); + mysql_mutex_init(fts_cache_init_mutex_key, &cache->init_lock, nullptr); + mysql_mutex_init(fts_delete_mutex_key, &cache->deleted_lock, nullptr); + mysql_mutex_init(fts_doc_id_mutex_key, &cache->doc_id_lock, nullptr); + + /* This is the heap used to create the cache itself. */ + cache->self_heap = ib_heap_allocator_create(heap); + + /* This is a transient heap, used for storing sync data. */ + cache->sync_heap = ib_heap_allocator_create(heap); + cache->sync_heap->arg = NULL; + + cache->sync = static_cast<fts_sync_t*>( + mem_heap_zalloc(heap, sizeof(fts_sync_t))); + + cache->sync->table = table; + pthread_cond_init(&cache->sync->cond, nullptr); + + /* Create the index cache vector that will hold the inverted indexes. */ + cache->indexes = ib_vector_create( + cache->self_heap, sizeof(fts_index_cache_t), 2); + + fts_cache_init(cache); + + cache->stopword_info.cached_stopword = NULL; + cache->stopword_info.charset = NULL; + + cache->stopword_info.heap = cache->self_heap; + + cache->stopword_info.status = STOPWORD_NOT_INIT; + + return(cache); +} + +/*******************************************************************//** +Add a newly create index into FTS cache */ +void +fts_add_index( +/*==========*/ + dict_index_t* index, /*!< FTS index to be added */ + dict_table_t* table) /*!< table */ +{ + fts_t* fts = table->fts; + fts_cache_t* cache; + fts_index_cache_t* index_cache; + + ut_ad(fts); + cache = table->fts->cache; + + mysql_mutex_lock(&cache->init_lock); + + ib_vector_push(fts->indexes, &index); + + index_cache = fts_find_index_cache(cache, index); + + if (!index_cache) { + /* Add new index cache structure */ + index_cache = fts_cache_index_cache_create(table, index); + } + + mysql_mutex_unlock(&cache->init_lock); +} + +/*******************************************************************//** +recalibrate get_doc structure after index_cache in cache->indexes changed */ +static +void +fts_reset_get_doc( +/*==============*/ + fts_cache_t* cache) /*!< in: FTS index cache */ +{ + fts_get_doc_t* get_doc; + ulint i; + + mysql_mutex_assert_owner(&cache->init_lock); + + ib_vector_reset(cache->get_docs); + + for (i = 0; i < ib_vector_size(cache->indexes); i++) { + fts_index_cache_t* ind_cache; + + ind_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + get_doc = static_cast<fts_get_doc_t*>( + ib_vector_push(cache->get_docs, NULL)); + + memset(get_doc, 0x0, sizeof(*get_doc)); + + get_doc->index_cache = ind_cache; + get_doc->cache = cache; + } + + ut_ad(ib_vector_size(cache->get_docs) + == ib_vector_size(cache->indexes)); +} + +/*******************************************************************//** +Check an index is in the table->indexes list +@return TRUE if it exists */ +static +ibool +fts_in_dict_index( +/*==============*/ + dict_table_t* table, /*!< in: Table */ + dict_index_t* index_check) /*!< in: index to be checked */ +{ + dict_index_t* index; + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + if (index == index_check) { + return(TRUE); + } + } + + return(FALSE); +} + +/*******************************************************************//** +Check an index is in the fts->cache->indexes list +@return TRUE if it exists */ +static +ibool +fts_in_index_cache( +/*===============*/ + dict_table_t* table, /*!< in: Table */ + dict_index_t* index) /*!< in: index to be checked */ +{ + ulint i; + + for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) { + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(table->fts->cache->indexes, i)); + + if (index_cache->index == index) { + return(TRUE); + } + } + + return(FALSE); +} + +/*******************************************************************//** +Check indexes in the fts->indexes is also present in index cache and +table->indexes list +@return TRUE if all indexes match */ +ibool +fts_check_cached_index( +/*===================*/ + dict_table_t* table) /*!< in: Table where indexes are dropped */ +{ + ulint i; + + if (!table->fts || !table->fts->cache) { + return(TRUE); + } + + ut_a(ib_vector_size(table->fts->indexes) + == ib_vector_size(table->fts->cache->indexes)); + + for (i = 0; i < ib_vector_size(table->fts->indexes); i++) { + dict_index_t* index; + + index = static_cast<dict_index_t*>( + ib_vector_getp(table->fts->indexes, i)); + + if (!fts_in_index_cache(table, index)) { + return(FALSE); + } + + if (!fts_in_dict_index(table, index)) { + return(FALSE); + } + } + + return(TRUE); +} + +/** Clear all fts resources when there is no internal DOC_ID +and there are no new fts index to add. +@param[in,out] table table where fts is to be freed */ +void fts_clear_all(dict_table_t *table) +{ + if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) || + !table->fts || + !ib_vector_is_empty(table->fts->indexes)) + return; + + for (const dict_index_t *index= dict_table_get_first_index(table); + index; index= dict_table_get_next_index(index)) + if (index->type & DICT_FTS) + return; + + fts_optimize_remove_table(table); + + table->fts->~fts_t(); + table->fts= nullptr; + DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS); +} + +/*******************************************************************//** +Drop auxiliary tables related to an FTS index +@return DB_SUCCESS or error number */ +dberr_t +fts_drop_index( +/*===========*/ + dict_table_t* table, /*!< in: Table where indexes are dropped */ + dict_index_t* index, /*!< in: Index to be dropped */ + trx_t* trx) /*!< in: Transaction for the drop */ +{ + ib_vector_t* indexes = table->fts->indexes; + dberr_t err = DB_SUCCESS; + + ut_a(indexes); + + if ((ib_vector_size(indexes) == 1 + && (index == static_cast<dict_index_t*>( + ib_vector_getp(table->fts->indexes, 0))) + && DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) + || ib_vector_is_empty(indexes)) { + doc_id_t current_doc_id; + doc_id_t first_doc_id; + + DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS); + + current_doc_id = table->fts->cache->next_doc_id; + first_doc_id = table->fts->cache->first_doc_id; + fts_cache_clear(table->fts->cache); + fts_cache_destroy(table->fts->cache); + table->fts->cache = fts_cache_create(table); + table->fts->cache->next_doc_id = current_doc_id; + table->fts->cache->first_doc_id = first_doc_id; + } else { + fts_cache_t* cache = table->fts->cache; + fts_index_cache_t* index_cache; + + mysql_mutex_lock(&cache->init_lock); + + index_cache = fts_find_index_cache(cache, index); + + if (index_cache != NULL) { + if (index_cache->words) { + fts_words_free(index_cache->words); + rbt_free(index_cache->words); + } + + ib_vector_remove(cache->indexes, *(void**) index_cache); + } + + if (cache->get_docs) { + fts_reset_get_doc(cache); + } + + mysql_mutex_unlock(&cache->init_lock); + } + + err = fts_drop_index_tables(trx, *index); + + ib_vector_remove(indexes, (const void*) index); + + return(err); +} + +/****************************************************************//** +Create an FTS index cache. */ +CHARSET_INFO* +fts_index_get_charset( +/*==================*/ + dict_index_t* index) /*!< in: FTS index */ +{ + CHARSET_INFO* charset = NULL; + dict_field_t* field; + ulint prtype; + + field = dict_index_get_nth_field(index, 0); + prtype = field->col->prtype; + + charset = fts_get_charset(prtype); + +#ifdef FTS_DEBUG + /* Set up charset info for this index. Please note all + field of the FTS index should have the same charset */ + for (i = 1; i < index->n_fields; i++) { + CHARSET_INFO* fld_charset; + + field = dict_index_get_nth_field(index, i); + prtype = field->col->prtype; + + fld_charset = fts_get_charset(prtype); + + /* All FTS columns should have the same charset */ + if (charset) { + ut_a(charset == fld_charset); + } else { + charset = fld_charset; + } + } +#endif + + return(charset); + +} +/****************************************************************//** +Create an FTS index cache. +@return Index Cache */ +fts_index_cache_t* +fts_cache_index_cache_create( +/*=========================*/ + dict_table_t* table, /*!< in: table with FTS index */ + dict_index_t* index) /*!< in: FTS index */ +{ + ulint n_bytes; + fts_index_cache_t* index_cache; + fts_cache_t* cache = table->fts->cache; + + ut_a(cache != NULL); + + mysql_mutex_assert_owner(&cache->init_lock); + + /* Must not already exist in the cache vector. */ + ut_a(fts_find_index_cache(cache, index) == NULL); + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_push(cache->indexes, NULL)); + + memset(index_cache, 0x0, sizeof(*index_cache)); + + index_cache->index = index; + + index_cache->charset = fts_index_get_charset(index); + + n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX; + + index_cache->ins_graph = static_cast<que_t**>( + mem_heap_zalloc(static_cast<mem_heap_t*>( + cache->self_heap->arg), n_bytes)); + + index_cache->sel_graph = static_cast<que_t**>( + mem_heap_zalloc(static_cast<mem_heap_t*>( + cache->self_heap->arg), n_bytes)); + + fts_index_cache_init(cache->sync_heap, index_cache); + + if (cache->get_docs) { + fts_reset_get_doc(cache); + } + + return(index_cache); +} + +/****************************************************************//** +Release all resources help by the words rb tree e.g., the node ilist. */ +static +void +fts_words_free( +/*===========*/ + ib_rbt_t* words) /*!< in: rb tree of words */ +{ + const ib_rbt_node_t* rbt_node; + + /* Free the resources held by a word. */ + for (rbt_node = rbt_first(words); + rbt_node != NULL; + rbt_node = rbt_first(words)) { + + ulint i; + fts_tokenizer_word_t* word; + + word = rbt_value(fts_tokenizer_word_t, rbt_node); + + /* Free the ilists of this word. */ + for (i = 0; i < ib_vector_size(word->nodes); ++i) { + + fts_node_t* fts_node = static_cast<fts_node_t*>( + ib_vector_get(word->nodes, i)); + + ut_free(fts_node->ilist); + fts_node->ilist = NULL; + } + + /* NOTE: We are responsible for free'ing the node */ + ut_free(rbt_remove_node(words, rbt_node)); + } +} + +/** Clear cache. +@param[in,out] cache fts cache */ +void +fts_cache_clear( + fts_cache_t* cache) +{ + ulint i; + + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { + ulint j; + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + fts_words_free(index_cache->words); + + rbt_free(index_cache->words); + + index_cache->words = NULL; + + for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) { + + if (index_cache->ins_graph[j] != NULL) { + + que_graph_free(index_cache->ins_graph[j]); + + index_cache->ins_graph[j] = NULL; + } + + if (index_cache->sel_graph[j] != NULL) { + + que_graph_free(index_cache->sel_graph[j]); + + index_cache->sel_graph[j] = NULL; + } + } + + index_cache->doc_stats = NULL; + } + + fts_need_sync = false; + + cache->total_size = 0; + + mysql_mutex_lock(&cache->deleted_lock); + cache->deleted_doc_ids = NULL; + mysql_mutex_unlock(&cache->deleted_lock); + + mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg)); + cache->sync_heap->arg = NULL; +} + +/*********************************************************************//** +Search the index specific cache for a particular FTS index. +@return the index cache else NULL */ +UNIV_INLINE +fts_index_cache_t* +fts_get_index_cache( +/*================*/ + fts_cache_t* cache, /*!< in: cache to search */ + const dict_index_t* index) /*!< in: index to search for */ +{ +#ifdef SAFE_MUTEX + ut_ad(mysql_mutex_is_owner(&cache->lock) + || mysql_mutex_is_owner(&cache->init_lock)); +#endif /* SAFE_MUTEX */ + + for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) { + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + if (index_cache->index == index) { + + return(index_cache); + } + } + + return(NULL); +} + +#ifdef FTS_DEBUG +/*********************************************************************//** +Search the index cache for a get_doc structure. +@return the fts_get_doc_t item else NULL */ +static +fts_get_doc_t* +fts_get_index_get_doc( +/*==================*/ + fts_cache_t* cache, /*!< in: cache to search */ + const dict_index_t* index) /*!< in: index to search for */ +{ + ulint i; + + mysql_mutex_assert_owner(&cache->init_lock); + + for (i = 0; i < ib_vector_size(cache->get_docs); ++i) { + fts_get_doc_t* get_doc; + + get_doc = static_cast<fts_get_doc_t*>( + ib_vector_get(cache->get_docs, i)); + + if (get_doc->index_cache->index == index) { + + return(get_doc); + } + } + + return(NULL); +} +#endif + +/**********************************************************************//** +Find an existing word, or if not found, create one and return it. +@return specified word token */ +static +fts_tokenizer_word_t* +fts_tokenizer_word_get( +/*===================*/ + fts_cache_t* cache, /*!< in: cache */ + fts_index_cache_t* + index_cache, /*!< in: index cache */ + fts_string_t* text) /*!< in: node text */ +{ + fts_tokenizer_word_t* word; + ib_rbt_bound_t parent; + + mysql_mutex_assert_owner(&cache->lock); + + /* If it is a stopword, do not index it */ + if (!fts_check_token(text, + cache->stopword_info.cached_stopword, + index_cache->charset)) { + + return(NULL); + } + + /* Check if we found a match, if not then add word to tree. */ + if (rbt_search(index_cache->words, &parent, text) != 0) { + mem_heap_t* heap; + fts_tokenizer_word_t new_word; + + heap = static_cast<mem_heap_t*>(cache->sync_heap->arg); + + new_word.nodes = ib_vector_create( + cache->sync_heap, sizeof(fts_node_t), 4); + + fts_string_dup(&new_word.text, text, heap); + + parent.last = rbt_add_node( + index_cache->words, &parent, &new_word); + + /* Take into account the RB tree memory use and the vector. */ + cache->total_size += sizeof(new_word) + + sizeof(ib_rbt_node_t) + + text->f_len + + (sizeof(fts_node_t) * 4) + + sizeof(*new_word.nodes); + + ut_ad(rbt_validate(index_cache->words)); + } + + word = rbt_value(fts_tokenizer_word_t, parent.last); + + return(word); +} + +/**********************************************************************//** +Add the given doc_id/word positions to the given node's ilist. */ +void +fts_cache_node_add_positions( +/*=========================*/ + fts_cache_t* cache, /*!< in: cache */ + fts_node_t* node, /*!< in: word node */ + doc_id_t doc_id, /*!< in: doc id */ + ib_vector_t* positions) /*!< in: fts_token_t::positions */ +{ + ulint i; + byte* ptr; + byte* ilist; + ulint enc_len; + ulint last_pos; + byte* ptr_start; + doc_id_t doc_id_delta; + +#ifdef SAFE_MUTEX + if (cache) { + mysql_mutex_assert_owner(&cache->lock); + } +#endif /* SAFE_MUTEX */ + + ut_ad(doc_id >= node->last_doc_id); + + /* Calculate the space required to store the ilist. */ + doc_id_delta = doc_id - node->last_doc_id; + enc_len = fts_get_encoded_len(doc_id_delta); + + last_pos = 0; + for (i = 0; i < ib_vector_size(positions); i++) { + ulint pos = *(static_cast<ulint*>( + ib_vector_get(positions, i))); + + ut_ad(last_pos == 0 || pos > last_pos); + + enc_len += fts_get_encoded_len(pos - last_pos); + last_pos = pos; + } + + /* The 0x00 byte at the end of the token positions list. */ + enc_len++; + + if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) { + /* No need to allocate more space, we can fit in the new + data at the end of the old one. */ + ilist = NULL; + ptr = node->ilist + node->ilist_size; + } else { + ulint new_size = node->ilist_size + enc_len; + + /* Over-reserve space by a fixed size for small lengths and + by 20% for lengths >= 48 bytes. */ + if (new_size < 16) { + new_size = 16; + } else if (new_size < 32) { + new_size = 32; + } else if (new_size < 48) { + new_size = 48; + } else { + new_size = new_size * 6 / 5; + } + + ilist = static_cast<byte*>(ut_malloc_nokey(new_size)); + ptr = ilist + node->ilist_size; + + node->ilist_size_alloc = new_size; + if (cache) { + cache->total_size += new_size; + } + } + + ptr_start = ptr; + + /* Encode the new fragment. */ + ptr = fts_encode_int(doc_id_delta, ptr); + + last_pos = 0; + for (i = 0; i < ib_vector_size(positions); i++) { + ulint pos = *(static_cast<ulint*>( + ib_vector_get(positions, i))); + + ptr = fts_encode_int(pos - last_pos, ptr); + last_pos = pos; + } + + *ptr++ = 0; + + ut_a(enc_len == (ulint)(ptr - ptr_start)); + + if (ilist) { + /* Copy old ilist to the start of the new one and switch the + new one into place in the node. */ + if (node->ilist_size > 0) { + memcpy(ilist, node->ilist, node->ilist_size); + ut_free(node->ilist); + if (cache) { + cache->total_size -= node->ilist_size; + } + } + + node->ilist = ilist; + } + + node->ilist_size += enc_len; + + if (node->first_doc_id == FTS_NULL_DOC_ID) { + node->first_doc_id = doc_id; + } + + node->last_doc_id = doc_id; + ++node->doc_count; +} + +/**********************************************************************//** +Add document to the cache. */ +static +void +fts_cache_add_doc( +/*==============*/ + fts_cache_t* cache, /*!< in: cache */ + fts_index_cache_t* + index_cache, /*!< in: index cache */ + doc_id_t doc_id, /*!< in: doc id to add */ + ib_rbt_t* tokens) /*!< in: document tokens */ +{ + const ib_rbt_node_t* node; + ulint n_words; + fts_doc_stats_t* doc_stats; + + if (!tokens) { + return; + } + + mysql_mutex_assert_owner(&cache->lock); + + n_words = rbt_size(tokens); + + for (node = rbt_first(tokens); node; node = rbt_first(tokens)) { + + fts_tokenizer_word_t* word; + fts_node_t* fts_node = NULL; + fts_token_t* token = rbt_value(fts_token_t, node); + + /* Find and/or add token to the cache. */ + word = fts_tokenizer_word_get( + cache, index_cache, &token->text); + + if (!word) { + ut_free(rbt_remove_node(tokens, node)); + continue; + } + + if (ib_vector_size(word->nodes) > 0) { + fts_node = static_cast<fts_node_t*>( + ib_vector_last(word->nodes)); + } + + if (fts_node == NULL || fts_node->synced + || fts_node->ilist_size > FTS_ILIST_MAX_SIZE + || doc_id < fts_node->last_doc_id) { + + fts_node = static_cast<fts_node_t*>( + ib_vector_push(word->nodes, NULL)); + + memset(fts_node, 0x0, sizeof(*fts_node)); + + cache->total_size += sizeof(*fts_node); + } + + fts_cache_node_add_positions( + cache, fts_node, doc_id, token->positions); + + ut_free(rbt_remove_node(tokens, node)); + } + + ut_a(rbt_empty(tokens)); + + /* Add to doc ids processed so far. */ + doc_stats = static_cast<fts_doc_stats_t*>( + ib_vector_push(index_cache->doc_stats, NULL)); + + doc_stats->doc_id = doc_id; + doc_stats->word_count = n_words; + + /* Add the doc stats memory usage too. */ + cache->total_size += sizeof(*doc_stats); + + if (doc_id > cache->sync->max_doc_id) { + cache->sync->max_doc_id = doc_id; + } +} + +/** Drop a table. +@param trx transaction +@param table_name FTS_ table name +@param rename whether to rename before dropping +@return error code +@retval DB_SUCCESS if the table was dropped +@retval DB_FAIL if the table did not exist */ +static dberr_t fts_drop_table(trx_t *trx, const char *table_name, bool rename) +{ + if (dict_table_t *table= dict_table_open_on_name(table_name, true, + DICT_ERR_IGNORE_TABLESPACE)) + { + table->release(); + if (rename) + { + mem_heap_t *heap= mem_heap_create(FN_REFLEN); + char *tmp= dict_mem_create_temporary_tablename(heap, table->name.m_name, + table->id); + dberr_t err= row_rename_table_for_mysql(table->name.m_name, tmp, trx, + false); + mem_heap_free(heap); + if (err != DB_SUCCESS) + { + ib::error() << "Unable to rename table " << table_name << ": " << err; + return err; + } + } + if (dberr_t err= trx->drop_table(*table)) + { + ib::error() << "Unable to drop table " << table->name << ": " << err; + return err; + } + +#ifdef UNIV_DEBUG + for (auto &p : trx->mod_tables) + { + if (p.first == table) + p.second.set_aux_table(); + } +#endif /* UNIV_DEBUG */ + return DB_SUCCESS; + } + + return DB_FAIL; +} + +/****************************************************************//** +Rename a single auxiliary table due to database name change. +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_rename_one_aux_table( +/*=====================*/ + const char* new_name, /*!< in: new parent tbl name */ + const char* fts_table_old_name, /*!< in: old aux tbl name */ + trx_t* trx) /*!< in: transaction */ +{ + char fts_table_new_name[MAX_TABLE_NAME_LEN]; + ulint new_db_name_len = dict_get_db_name_len(new_name); + ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name); + ulint table_new_name_len = strlen(fts_table_old_name) + + new_db_name_len - old_db_name_len; + + /* Check if the new and old database names are the same, if so, + nothing to do */ + ut_ad((new_db_name_len != old_db_name_len) + || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0); + + /* Get the database name from "new_name", and table name + from the fts_table_old_name */ + strncpy(fts_table_new_name, new_name, new_db_name_len); + strncpy(fts_table_new_name + new_db_name_len, + strchr(fts_table_old_name, '/'), + table_new_name_len - new_db_name_len); + fts_table_new_name[table_new_name_len] = 0; + + return row_rename_table_for_mysql( + fts_table_old_name, fts_table_new_name, trx, false); +} + +/****************************************************************//** +Rename auxiliary tables for all fts index for a table. This(rename) +is due to database name change +@return DB_SUCCESS or error code */ +dberr_t +fts_rename_aux_tables( +/*==================*/ + dict_table_t* table, /*!< in: user Table */ + const char* new_name, /*!< in: new table name */ + trx_t* trx) /*!< in: transaction */ +{ + ulint i; + fts_table_t fts_table; + + FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table); + + dberr_t err = DB_SUCCESS; + char old_table_name[MAX_FULL_NAME_LEN]; + + /* Rename common auxiliary tables */ + for (i = 0; fts_common_tables[i] != NULL; ++i) { + fts_table.suffix = fts_common_tables[i]; + fts_get_table_name(&fts_table, old_table_name, true); + + err = fts_rename_one_aux_table(new_name, old_table_name, trx); + + if (err != DB_SUCCESS) { + return(err); + } + } + + fts_t* fts = table->fts; + + /* Rename index specific auxiliary tables */ + for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes); + ++i) { + dict_index_t* index; + + index = static_cast<dict_index_t*>( + ib_vector_getp(fts->indexes, i)); + + FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index); + + for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) { + fts_table.suffix = fts_get_suffix(j); + fts_get_table_name(&fts_table, old_table_name, true); + + err = fts_rename_one_aux_table( + new_name, old_table_name, trx); + + DBUG_EXECUTE_IF("fts_rename_failure", + err = DB_DEADLOCK; + fts_sql_rollback(trx);); + + if (err != DB_SUCCESS) { + return(err); + } + } + } + + return(DB_SUCCESS); +} + +/** Lock an internal FTS_ table, before fts_drop_table() */ +static dberr_t fts_lock_table(trx_t *trx, const char *table_name) +{ + ut_ad(purge_sys.must_wait_FTS()); + + if (dict_table_t *table= dict_table_open_on_name(table_name, false, + DICT_ERR_IGNORE_TABLESPACE)) + { + dberr_t err= lock_table_for_trx(table, trx, LOCK_X); + /* Wait for purge threads to stop using the table. */ + for (uint n= 15; table->get_ref_count() > 1; ) + { + if (!--n) + { + err= DB_LOCK_WAIT_TIMEOUT; + goto fail; + } + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } +fail: + table->release(); + return err; + } + return DB_SUCCESS; +} + +/** Lock the internal FTS_ tables for an index, before fts_drop_index_tables(). +@param trx transaction +@param index fulltext index */ +dberr_t fts_lock_index_tables(trx_t *trx, const dict_index_t &index) +{ + ut_ad(index.type & DICT_FTS); + fts_table_t fts_table; + char table_name[MAX_FULL_NAME_LEN]; + FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, (&index)); + for (const fts_index_selector_t *s= fts_index_selector; s->suffix; s++) + { + fts_table.suffix= s->suffix; + fts_get_table_name(&fts_table, table_name, false); + if (dberr_t err= fts_lock_table(trx, table_name)) + return err; + } + return DB_SUCCESS; +} + +/** Lock the internal common FTS_ tables, before fts_drop_common_tables(). +@param trx transaction +@param table table containing FULLTEXT INDEX +@return DB_SUCCESS or error code */ +dberr_t fts_lock_common_tables(trx_t *trx, const dict_table_t &table) +{ + fts_table_t fts_table; + char table_name[MAX_FULL_NAME_LEN]; + + FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, (&table)); + + for (const char **suffix= fts_common_tables; *suffix; suffix++) + { + fts_table.suffix= *suffix; + fts_get_table_name(&fts_table, table_name, false); + if (dberr_t err= fts_lock_table(trx, table_name)) + return err; + } + return DB_SUCCESS; +} + +/** This function make sure that table doesn't +have any other reference count. +@param table_name table name */ +static void fts_table_no_ref_count(const char *table_name) +{ + dict_table_t *table= dict_table_open_on_name( + table_name, true, DICT_ERR_IGNORE_TABLESPACE); + if (!table) + return; + + while (table->get_ref_count() > 1) + { + dict_sys.unlock(); + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + dict_sys.lock(SRW_LOCK_CALL); + } + + table->release(); +} + +/** Stop the purge thread and check n_ref_count of all auxiliary +and common table associated with the fts table. +@param table parent FTS table +@param already_stopped True indicates purge threads were + already stopped*/ +void purge_sys_t::stop_FTS(const dict_table_t &table, bool already_stopped) +{ + if (!already_stopped) + purge_sys.stop_FTS(); + + dict_sys.lock(SRW_LOCK_CALL); + + fts_table_t fts_table; + char table_name[MAX_FULL_NAME_LEN]; + + FTS_INIT_FTS_TABLE(&fts_table, nullptr, FTS_COMMON_TABLE, (&table)); + + for (const char **suffix= fts_common_tables; *suffix; suffix++) + { + fts_table.suffix= *suffix; + fts_get_table_name(&fts_table, table_name, true); + fts_table_no_ref_count(table_name); + } + + if (table.fts) + { + if (auto indexes= table.fts->indexes) + { + for (ulint i= 0;i < ib_vector_size(indexes); ++i) + { + const dict_index_t *index= static_cast<const dict_index_t*>( + ib_vector_getp(indexes, i)); + FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, index); + for (const fts_index_selector_t *s= fts_index_selector; + s->suffix; s++) + { + fts_table.suffix= s->suffix; + fts_get_table_name(&fts_table, table_name, true); + fts_table_no_ref_count(table_name); + } + } + } + } + + dict_sys.unlock(); +} + +/** Lock the internal FTS_ tables for table, before fts_drop_tables(). +@param trx transaction +@param table table containing FULLTEXT INDEX +@return DB_SUCCESS or error code */ +dberr_t fts_lock_tables(trx_t *trx, const dict_table_t &table) +{ + if (dberr_t err= fts_lock_common_tables(trx, table)) + return err; + + if (!table.fts) + return DB_SUCCESS; + + auto indexes= table.fts->indexes; + if (!indexes) + return DB_SUCCESS; + + for (ulint i= 0; i < ib_vector_size(indexes); ++i) + if (dberr_t err= + fts_lock_index_tables(trx, *static_cast<const dict_index_t*> + (ib_vector_getp(indexes, i)))) + return err; + return DB_SUCCESS; +} + +/** Drops the common ancillary tables needed for supporting an FTS index +on the given table. +@param trx transaction to drop fts common table +@param fts_table table with an FTS index +@param rename whether to rename before dropping +@return DB_SUCCESS or error code */ +static dberr_t fts_drop_common_tables(trx_t *trx, fts_table_t *fts_table, + bool rename) +{ + dberr_t error= DB_SUCCESS; + + for (ulint i= 0; fts_common_tables[i]; ++i) + { + char table_name[MAX_FULL_NAME_LEN]; + + fts_table->suffix= fts_common_tables[i]; + fts_get_table_name(fts_table, table_name, true); + + if (dberr_t err= fts_drop_table(trx, table_name, rename)) + { + if (trx->state != TRX_STATE_ACTIVE) + return err; + /* We only return the status of the last error. */ + if (err != DB_FAIL) + error= err; + } + } + + return error; +} + +/****************************************************************//** +Drops FTS auxiliary tables for an FTS index +@return DB_SUCCESS or error code */ +dberr_t fts_drop_index_tables(trx_t *trx, const dict_index_t &index) +{ + ulint i; + fts_table_t fts_table; + dberr_t error = DB_SUCCESS; + + FTS_INIT_INDEX_TABLE(&fts_table, nullptr, FTS_INDEX_TABLE, (&index)); + + for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) { + dberr_t err; + char table_name[MAX_FULL_NAME_LEN]; + + fts_table.suffix = fts_get_suffix(i); + fts_get_table_name(&fts_table, table_name, true); + + err = fts_drop_table(trx, table_name, false); + + /* We only return the status of the last error. */ + if (err != DB_SUCCESS && err != DB_FAIL) { + error = err; + } + } + + return(error); +} + +/****************************************************************//** +Drops FTS ancillary tables needed for supporting an FTS index +on the given table. +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_drop_all_index_tables( +/*======================*/ + trx_t* trx, /*!< in: transaction */ + const fts_t* fts) /*!< in: fts instance */ +{ + dberr_t error= DB_SUCCESS; + auto indexes= fts->indexes; + if (!indexes) + return DB_SUCCESS; + + for (ulint i= 0; i < ib_vector_size(indexes); ++i) + if (dberr_t err= fts_drop_index_tables(trx, + *static_cast<const dict_index_t*> + (ib_vector_getp(indexes, i)))) + error= err; + return error; +} + +/** Drop the internal FTS_ tables for table. +@param trx transaction +@param table table containing FULLTEXT INDEX +@return DB_SUCCESS or error code */ +dberr_t fts_drop_tables(trx_t *trx, const dict_table_t &table) +{ + dberr_t error; + fts_table_t fts_table; + + FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, (&table)); + + error = fts_drop_common_tables(trx, &fts_table, false); + + if (error == DB_SUCCESS && table.fts) { + error = fts_drop_all_index_tables(trx, table.fts); + } + + return(error); +} + +/** Create dict_table_t object for FTS Aux tables. +@param[in] aux_table_name FTS Aux table name +@param[in] table table object of FTS Index +@param[in] n_cols number of columns for FTS Aux table +@return table object for FTS Aux table */ +static +dict_table_t* +fts_create_in_mem_aux_table( + const char* aux_table_name, + const dict_table_t* table, + ulint n_cols) +{ + dict_table_t* new_table = dict_table_t::create( + {aux_table_name,strlen(aux_table_name)}, + nullptr, n_cols, 0, table->flags, + table->space_id == TRX_SYS_SPACE + ? 0 : table->space_id == SRV_TMP_SPACE_ID + ? DICT_TF2_TEMPORARY : DICT_TF2_USE_FILE_PER_TABLE); + + if (DICT_TF_HAS_DATA_DIR(table->flags)) { + ut_ad(table->data_dir_path != NULL); + new_table->data_dir_path = mem_heap_strdup( + new_table->heap, table->data_dir_path); + } + + return(new_table); +} + +/** Function to create on FTS common table. +@param[in,out] trx InnoDB transaction +@param[in] table Table that has FTS Index +@param[in] fts_table_name FTS AUX table name +@param[in] fts_suffix FTS AUX table suffix +@param[in,out] heap temporary memory heap +@return table object if created, else NULL */ +static +dict_table_t* +fts_create_one_common_table( + trx_t* trx, + const dict_table_t* table, + const char* fts_table_name, + const char* fts_suffix, + mem_heap_t* heap) +{ + dict_table_t* new_table; + dberr_t error; + bool is_config = strcmp(fts_suffix, "CONFIG") == 0; + + if (!is_config) { + + new_table = fts_create_in_mem_aux_table( + fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS); + + dict_mem_table_add_col( + new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED, + FTS_DELETED_TABLE_COL_LEN); + } else { + /* Config table has different schema. */ + new_table = fts_create_in_mem_aux_table( + fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS); + + dict_mem_table_add_col( + new_table, heap, "key", DATA_VARCHAR, 0, + FTS_CONFIG_TABLE_KEY_COL_LEN); + + dict_mem_table_add_col( + new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL, + FTS_CONFIG_TABLE_VALUE_COL_LEN); + } + + dict_table_add_system_columns(new_table, heap); + error = row_create_table_for_mysql(new_table, trx); + + if (error == DB_SUCCESS) { + + dict_index_t* index = dict_mem_index_create( + new_table, "FTS_COMMON_TABLE_IND", + DICT_UNIQUE|DICT_CLUSTERED, 1); + + if (!is_config) { + dict_mem_index_add_field(index, "doc_id", 0); + } else { + dict_mem_index_add_field(index, "key", 0); + } + + error = row_create_index_for_mysql(index, trx, NULL, + FIL_ENCRYPTION_DEFAULT, + FIL_DEFAULT_ENCRYPTION_KEY); + if (error == DB_SUCCESS) { + return new_table; + } + } + + ib::warn() << "Failed to create FTS common table " << fts_table_name; + trx->error_state = error; + return NULL; +} + +/** Creates the common auxiliary tables needed for supporting an FTS index +on the given table. +The following tables are created. +CREATE TABLE $FTS_PREFIX_DELETED + (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) +CREATE TABLE $FTS_PREFIX_DELETED_CACHE + (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) +CREATE TABLE $FTS_PREFIX_BEING_DELETED + (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) +CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE + (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) +CREATE TABLE $FTS_PREFIX_CONFIG + (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key) +@param[in,out] trx transaction +@param[in,out] table table with FTS index +@param[in] skip_doc_id_index Skip index on doc id +@return DB_SUCCESS if succeed */ +dberr_t +fts_create_common_tables( + trx_t* trx, + dict_table_t* table, + bool skip_doc_id_index) +{ + dberr_t error; + que_t* graph; + fts_table_t fts_table; + mem_heap_t* heap = mem_heap_create(1024); + pars_info_t* info; + char fts_name[MAX_FULL_NAME_LEN]; + char full_name[sizeof(fts_common_tables) / sizeof(char*)] + [MAX_FULL_NAME_LEN]; + + dict_index_t* index = NULL; + + FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table); + + error = fts_drop_common_tables(trx, &fts_table, true); + + if (error != DB_SUCCESS) { + + goto func_exit; + } + + /* Create the FTS tables that are common to an FTS index. */ + for (ulint i = 0; fts_common_tables[i] != NULL; ++i) { + + fts_table.suffix = fts_common_tables[i]; + fts_get_table_name(&fts_table, full_name[i], true); + dict_table_t* common_table = fts_create_one_common_table( + trx, table, full_name[i], fts_table.suffix, heap); + + if (!common_table) { + trx->error_state = DB_SUCCESS; + error = DB_ERROR; + goto func_exit; + } + + mem_heap_empty(heap); + } + + /* Write the default settings to the config table. */ + info = pars_info_create(); + + fts_table.suffix = "CONFIG"; + fts_get_table_name(&fts_table, fts_name, true); + pars_info_bind_id(info, "config_table", fts_name); + + graph = pars_sql( + info, fts_config_table_insert_values_sql); + + error = fts_eval_sql(trx, graph); + + que_graph_free(graph); + + if (error != DB_SUCCESS || skip_doc_id_index) { + + goto func_exit; + } + + if (table->versioned()) { + index = dict_mem_index_create(table, FTS_DOC_ID_INDEX_NAME, + DICT_UNIQUE, 2); + dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0); + dict_mem_index_add_field(index, table->cols[table->vers_end].name(*table), 0); + } else { + index = dict_mem_index_create(table, FTS_DOC_ID_INDEX_NAME, + DICT_UNIQUE, 1); + dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0); + } + + error = row_create_index_for_mysql(index, trx, NULL, + FIL_ENCRYPTION_DEFAULT, + FIL_DEFAULT_ENCRYPTION_KEY); + +func_exit: + mem_heap_free(heap); + + return(error); +} + +/** Create one FTS auxiliary index table for an FTS index. +@param[in,out] trx transaction +@param[in] index the index instance +@param[in] fts_table fts_table structure +@param[in,out] heap temporary memory heap +@see row_merge_create_fts_sort_index() +@return DB_SUCCESS or error code */ +static +dict_table_t* +fts_create_one_index_table( + trx_t* trx, + const dict_index_t* index, + const fts_table_t* fts_table, + mem_heap_t* heap) +{ + dict_field_t* field; + dict_table_t* new_table; + char table_name[MAX_FULL_NAME_LEN]; + dberr_t error; + CHARSET_INFO* charset; + + ut_ad(index->type & DICT_FTS); + + fts_get_table_name(fts_table, table_name, true); + + new_table = fts_create_in_mem_aux_table( + table_name, fts_table->table, + FTS_AUX_INDEX_TABLE_NUM_COLS); + + field = dict_index_get_nth_field(index, 0); + charset = fts_get_charset(field->col->prtype); + + dict_mem_table_add_col(new_table, heap, "word", + charset == &my_charset_latin1 + ? DATA_VARCHAR : DATA_VARMYSQL, + field->col->prtype, + FTS_MAX_WORD_LEN_IN_CHAR + * unsigned(field->col->mbmaxlen)); + + dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT, + DATA_NOT_NULL | DATA_UNSIGNED, + FTS_INDEX_FIRST_DOC_ID_LEN); + + dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT, + DATA_NOT_NULL | DATA_UNSIGNED, + FTS_INDEX_LAST_DOC_ID_LEN); + + dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT, + DATA_NOT_NULL | DATA_UNSIGNED, + FTS_INDEX_DOC_COUNT_LEN); + + /* The precise type calculation is as follows: + least signficiant byte: MySQL type code (not applicable for sys cols) + second least : DATA_NOT_NULL | DATA_BINARY_TYPE + third least : the MySQL charset-collation code (DATA_MTYPE_MAX) */ + + dict_mem_table_add_col( + new_table, heap, "ilist", DATA_BLOB, + (DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL, + FTS_INDEX_ILIST_LEN); + + dict_table_add_system_columns(new_table, heap); + error = row_create_table_for_mysql(new_table, trx); + + if (error == DB_SUCCESS) { + dict_index_t* index = dict_mem_index_create( + new_table, "FTS_INDEX_TABLE_IND", + DICT_UNIQUE|DICT_CLUSTERED, 2); + dict_mem_index_add_field(index, "word", 0); + dict_mem_index_add_field(index, "first_doc_id", 0); + + error = row_create_index_for_mysql(index, trx, NULL, + FIL_ENCRYPTION_DEFAULT, + FIL_DEFAULT_ENCRYPTION_KEY); + + if (error == DB_SUCCESS) { + return new_table; + } + } + + ib::warn() << "Failed to create FTS index table " << table_name; + trx->error_state = error; + return NULL; +} + +/** Creates the column specific ancillary tables needed for supporting an +FTS index on the given table. + +All FTS AUX Index tables have the following schema. +CREAT TABLE $FTS_PREFIX_INDEX_[1-6]( + word VARCHAR(FTS_MAX_WORD_LEN), + first_doc_id INT NOT NULL, + last_doc_id UNSIGNED NOT NULL, + doc_count UNSIGNED INT NOT NULL, + ilist VARBINARY NOT NULL, + UNIQUE CLUSTERED INDEX ON (word, first_doc_id)) +@param[in,out] trx dictionary transaction +@param[in] index fulltext index +@param[in] id table id +@return DB_SUCCESS or error code */ +dberr_t +fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id) +{ + ulint i; + fts_table_t fts_table; + dberr_t error = DB_SUCCESS; + mem_heap_t* heap = mem_heap_create(1024); + + fts_table.type = FTS_INDEX_TABLE; + fts_table.index_id = index->id; + fts_table.table_id = id; + fts_table.table = index->table; + + for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) { + dict_table_t* new_table; + + /* Create the FTS auxiliary tables that are specific + to an FTS index. */ + fts_table.suffix = fts_get_suffix(i); + + new_table = fts_create_one_index_table( + trx, index, &fts_table, heap); + + if (new_table == NULL) { + error = DB_FAIL; + break; + } + + mem_heap_empty(heap); + } + + mem_heap_free(heap); + + return(error); +} + +/******************************************************************//** +Calculate the new state of a row given the existing state and a new event. +@return new state of row */ +static +fts_row_state +fts_trx_row_get_new_state( +/*======================*/ + fts_row_state old_state, /*!< in: existing state of row */ + fts_row_state event) /*!< in: new event */ +{ + /* The rules for transforming states: + + I = inserted + M = modified + D = deleted + N = nothing + + M+D -> D: + + If the row existed before the transaction started and it is modified + during the transaction, followed by a deletion of the row, only the + deletion will be signaled. + + M+ -> M: + + If the row existed before the transaction started and it is modified + more than once during the transaction, only the last modification + will be signaled. + + IM*D -> N: + + If a new row is added during the transaction (and possibly modified + after its initial insertion) but it is deleted before the end of the + transaction, nothing will be signaled. + + IM* -> I: + + If a new row is added during the transaction and modified after its + initial insertion, only the addition will be signaled. + + M*DI -> M: + + If the row existed before the transaction started and it is deleted, + then re-inserted, only a modification will be signaled. Note that + this case is only possible if the table is using the row's primary + key for FTS row ids, since those can be re-inserted by the user, + which is not true for InnoDB generated row ids. + + It is easily seen that the above rules decompose such that we do not + need to store the row's entire history of events. Instead, we can + store just one state for the row and update that when new events + arrive. Then we can implement the above rules as a two-dimensional + look-up table, and get checking of invalid combinations "for free" + in the process. */ + + /* The lookup table for transforming states. old_state is the + Y-axis, event is the X-axis. */ + static const fts_row_state table[4][4] = { + /* I M D N */ + /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID }, + /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID }, + /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID }, + /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID } + }; + + fts_row_state result; + + ut_a(old_state < FTS_INVALID); + ut_a(event < FTS_INVALID); + + result = table[(int) old_state][(int) event]; + ut_a(result != FTS_INVALID); + + return(result); +} + +/******************************************************************//** +Create a savepoint instance. +@return savepoint instance */ +static +fts_savepoint_t* +fts_savepoint_create( +/*=================*/ + ib_vector_t* savepoints, /*!< out: InnoDB transaction */ + const char* name, /*!< in: savepoint name */ + mem_heap_t* heap) /*!< in: heap */ +{ + fts_savepoint_t* savepoint; + + savepoint = static_cast<fts_savepoint_t*>( + ib_vector_push(savepoints, NULL)); + + memset(savepoint, 0x0, sizeof(*savepoint)); + + if (name) { + savepoint->name = mem_heap_strdup(heap, name); + } + + savepoint->tables = rbt_create( + sizeof(fts_trx_table_t*), fts_trx_table_cmp); + + return(savepoint); +} + +/******************************************************************//** +Create an FTS trx. +@return FTS trx */ +fts_trx_t* +fts_trx_create( +/*===========*/ + trx_t* trx) /*!< in/out: InnoDB + transaction */ +{ + fts_trx_t* ftt; + ib_alloc_t* heap_alloc; + mem_heap_t* heap = mem_heap_create(1024); + trx_named_savept_t* savep; + + ut_a(trx->fts_trx == NULL); + + ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t))); + ftt->trx = trx; + ftt->heap = heap; + + heap_alloc = ib_heap_allocator_create(heap); + + ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create( + heap_alloc, sizeof(fts_savepoint_t), 4)); + + ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create( + heap_alloc, sizeof(fts_savepoint_t), 4)); + + /* Default instance has no name and no heap. */ + fts_savepoint_create(ftt->savepoints, NULL, NULL); + fts_savepoint_create(ftt->last_stmt, NULL, NULL); + + /* Copy savepoints that already set before. */ + for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints); + savep != NULL; + savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) { + + fts_savepoint_take(ftt, savep->name); + } + + return(ftt); +} + +/******************************************************************//** +Create an FTS trx table. +@return FTS trx table */ +static +fts_trx_table_t* +fts_trx_table_create( +/*=================*/ + fts_trx_t* fts_trx, /*!< in: FTS trx */ + dict_table_t* table) /*!< in: table */ +{ + fts_trx_table_t* ftt; + + ftt = static_cast<fts_trx_table_t*>( + mem_heap_zalloc(fts_trx->heap, sizeof *ftt)); + + ftt->table = table; + ftt->fts_trx = fts_trx; + + ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp); + + return(ftt); +} + +/******************************************************************//** +Clone an FTS trx table. +@return FTS trx table */ +static +fts_trx_table_t* +fts_trx_table_clone( +/*=================*/ + const fts_trx_table_t* ftt_src) /*!< in: FTS trx */ +{ + fts_trx_table_t* ftt; + + ftt = static_cast<fts_trx_table_t*>( + mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt))); + + memset(ftt, 0x0, sizeof(*ftt)); + + ftt->table = ftt_src->table; + ftt->fts_trx = ftt_src->fts_trx; + + ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp); + + /* Copy the rb tree values to the new savepoint. */ + rbt_merge_uniq(ftt->rows, ftt_src->rows); + + /* These are only added on commit. At this stage we only have + the updated row state. */ + ut_a(ftt_src->added_doc_ids == NULL); + + return(ftt); +} + +/******************************************************************//** +Initialize the FTS trx instance. +@return FTS trx instance */ +static +fts_trx_table_t* +fts_trx_init( +/*=========*/ + trx_t* trx, /*!< in: transaction */ + dict_table_t* table, /*!< in: FTS table instance */ + ib_vector_t* savepoints) /*!< in: Savepoints */ +{ + fts_trx_table_t* ftt; + ib_rbt_bound_t parent; + ib_rbt_t* tables; + fts_savepoint_t* savepoint; + + savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints)); + + tables = savepoint->tables; + rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL); + + if (parent.result == 0) { + fts_trx_table_t** fttp; + + fttp = rbt_value(fts_trx_table_t*, parent.last); + ftt = *fttp; + } else { + ftt = fts_trx_table_create(trx->fts_trx, table); + rbt_add_node(tables, &parent, &ftt); + } + + ut_a(ftt->table == table); + + return(ftt); +} + +/******************************************************************//** +Notify the FTS system about an operation on an FTS-indexed table. */ +static +void +fts_trx_table_add_op( +/*=================*/ + fts_trx_table_t*ftt, /*!< in: FTS trx table */ + doc_id_t doc_id, /*!< in: doc id */ + fts_row_state state, /*!< in: state of the row */ + ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */ +{ + ib_rbt_t* rows; + ib_rbt_bound_t parent; + + rows = ftt->rows; + rbt_search(rows, &parent, &doc_id); + + /* Row id found, update state, and if new state is FTS_NOTHING, + we delete the row from our tree. */ + if (parent.result == 0) { + fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last); + + row->state = fts_trx_row_get_new_state(row->state, state); + + if (row->state == FTS_NOTHING) { + if (row->fts_indexes) { + ib_vector_free(row->fts_indexes); + } + + ut_free(rbt_remove_node(rows, parent.last)); + row = NULL; + } else if (row->fts_indexes != NULL) { + ib_vector_free(row->fts_indexes); + row->fts_indexes = fts_indexes; + } + + } else { /* Row-id not found, create a new one. */ + fts_trx_row_t row; + + row.doc_id = doc_id; + row.state = state; + row.fts_indexes = fts_indexes; + + rbt_add_node(rows, &parent, &row); + } +} + +/******************************************************************//** +Notify the FTS system about an operation on an FTS-indexed table. */ +void +fts_trx_add_op( +/*===========*/ + trx_t* trx, /*!< in: InnoDB transaction */ + dict_table_t* table, /*!< in: table */ + doc_id_t doc_id, /*!< in: new doc id */ + fts_row_state state, /*!< in: state of the row */ + ib_vector_t* fts_indexes) /*!< in: FTS indexes affected + (NULL=all) */ +{ + fts_trx_table_t* tran_ftt; + fts_trx_table_t* stmt_ftt; + + if (!trx->fts_trx) { + trx->fts_trx = fts_trx_create(trx); + } + + tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints); + stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt); + + fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes); + fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes); +} + +/******************************************************************//** +Fetch callback that converts a textual document id to a binary value and +stores it in the given place. +@return always returns NULL */ +static +ibool +fts_fetch_store_doc_id( +/*===================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: doc_id_t* to store + doc_id in */ +{ + int n_parsed; + sel_node_t* node = static_cast<sel_node_t*>(row); + doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg); + dfield_t* dfield = que_node_get_val(node->select_list); + dtype_t* type = dfield_get_type(dfield); + ulint len = dfield_get_len(dfield); + + char buf[32]; + + ut_a(dtype_get_mtype(type) == DATA_VARCHAR); + ut_a(len > 0 && len < sizeof(buf)); + + memcpy(buf, dfield_get_data(dfield), len); + buf[len] = '\0'; + + n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id); + ut_a(n_parsed == 1); + + return(FALSE); +} + +#ifdef FTS_CACHE_SIZE_DEBUG +/******************************************************************//** +Get the max cache size in bytes. If there is an error reading the +value we simply print an error message here and return the default +value to the caller. +@return max cache size in bytes */ +static +ulint +fts_get_max_cache_size( +/*===================*/ + trx_t* trx, /*!< in: transaction */ + fts_table_t* fts_table) /*!< in: table instance */ +{ + dberr_t error; + fts_string_t value; + ulong cache_size_in_mb; + + /* Set to the default value. */ + cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB; + + /* We set the length of value to the max bytes it can hold. This + information is used by the callback that reads the value. */ + value.f_n_char = 0; + value.f_len = FTS_MAX_CONFIG_VALUE_LEN; + value.f_str = ut_malloc_nokey(value.f_len + 1); + + error = fts_config_get_value( + trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value); + + if (UNIV_LIKELY(error == DB_SUCCESS)) { + value.f_str[value.f_len] = 0; + cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10); + + if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) { + + ib::warn() << "FTS max cache size (" + << cache_size_in_mb << ") out of range." + " Minimum value is " + << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB + << "MB and the maximum value is " + << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB + << "MB, setting cache size to upper limit"; + + cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB; + + } else if (cache_size_in_mb + < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) { + + ib::warn() << "FTS max cache size (" + << cache_size_in_mb << ") out of range." + " Minimum value is " + << FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB + << "MB and the maximum value is" + << FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB + << "MB, setting cache size to lower limit"; + + cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB; + } + } else { + ib::error() << "(" << error << ") reading max" + " cache config value from config table " + << fts_table->table->name; + } + + ut_free(value.f_str); + + return(cache_size_in_mb * 1024 * 1024); +} +#endif + +/*********************************************************************//** +Get the next available document id. +@return DB_SUCCESS if OK */ +dberr_t +fts_get_next_doc_id( +/*================*/ + const dict_table_t* table, /*!< in: table */ + doc_id_t* doc_id) /*!< out: new document id */ +{ + fts_cache_t* cache = table->fts->cache; + + /* If the Doc ID system has not yet been initialized, we + will consult the CONFIG table and user table to re-establish + the initial value of the Doc ID */ + if (cache->first_doc_id == FTS_NULL_DOC_ID) { + fts_init_doc_id(table); + } + + if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { + *doc_id = FTS_NULL_DOC_ID; + return(DB_SUCCESS); + } + + DEBUG_SYNC_C("get_next_FTS_DOC_ID"); + mysql_mutex_lock(&cache->doc_id_lock); + *doc_id = cache->next_doc_id++; + mysql_mutex_unlock(&cache->doc_id_lock); + + return(DB_SUCCESS); +} + +/*********************************************************************//** +This function fetch the Doc ID from CONFIG table, and compare with +the Doc ID supplied. And store the larger one to the CONFIG table. +@return DB_SUCCESS if OK */ +static MY_ATTRIBUTE((nonnull)) +dberr_t +fts_cmp_set_sync_doc_id( +/*====================*/ + const dict_table_t* table, /*!< in: table */ + doc_id_t cmp_doc_id, /*!< in: Doc ID to compare */ + ibool read_only, /*!< in: TRUE if read the + synced_doc_id only */ + doc_id_t* doc_id) /*!< out: larger document id + after comparing "cmp_doc_id" + to the one stored in CONFIG + table */ +{ + if (srv_read_only_mode) { + return DB_READ_ONLY; + } + + trx_t* trx; + pars_info_t* info; + dberr_t error; + fts_table_t fts_table; + que_t* graph = NULL; + fts_cache_t* cache = table->fts->cache; + char table_name[MAX_FULL_NAME_LEN]; + ut_a(table->fts->doc_col != ULINT_UNDEFINED); + + fts_table.suffix = "CONFIG"; + fts_table.table_id = table->id; + fts_table.type = FTS_COMMON_TABLE; + fts_table.table = table; + + trx= trx_create(); +retry: + trx_start_internal(trx); + + trx->op_info = "update the next FTS document id"; + + info = pars_info_create(); + + pars_info_bind_function( + info, "my_func", fts_fetch_store_doc_id, doc_id); + + fts_get_table_name(&fts_table, table_name); + pars_info_bind_id(info, "config_table", table_name); + + graph = fts_parse_sql( + &fts_table, info, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS SELECT value FROM $config_table" + " WHERE key = 'synced_doc_id' FOR UPDATE;\n" + "BEGIN\n" + "" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;"); + + *doc_id = 0; + + error = fts_eval_sql(trx, graph); + + que_graph_free(graph); + + // FIXME: We need to retry deadlock errors + if (error != DB_SUCCESS) { + goto func_exit; + } + + if (read_only) { + /* InnoDB stores actual synced_doc_id value + 1 in + FTS_CONFIG table. Reduce the value by 1 while reading + after startup. */ + if (*doc_id) *doc_id -= 1; + goto func_exit; + } + + if (cmp_doc_id == 0 && *doc_id) { + cache->synced_doc_id = *doc_id - 1; + } else { + cache->synced_doc_id = ut_max(cmp_doc_id, *doc_id); + } + + mysql_mutex_lock(&cache->doc_id_lock); + /* For each sync operation, we will add next_doc_id by 1, + so to mark a sync operation */ + if (cache->next_doc_id < cache->synced_doc_id + 1) { + cache->next_doc_id = cache->synced_doc_id + 1; + } + mysql_mutex_unlock(&cache->doc_id_lock); + + if (cmp_doc_id && cmp_doc_id >= *doc_id) { + error = fts_update_sync_doc_id( + table, cache->synced_doc_id, trx); + } + + *doc_id = cache->next_doc_id; + +func_exit: + + if (UNIV_LIKELY(error == DB_SUCCESS)) { + fts_sql_commit(trx); + } else { + *doc_id = 0; + + ib::error() << "(" << error << ") while getting next doc id " + "for table " << table->name; + fts_sql_rollback(trx); + + if (error == DB_DEADLOCK || error == DB_LOCK_WAIT_TIMEOUT) { + DEBUG_SYNC_C("fts_cmp_set_sync_doc_id_retry"); + std::this_thread::sleep_for(FTS_DEADLOCK_RETRY_WAIT); + goto retry; + } + } + + trx->free(); + + return(error); +} + +/** Update the last document id. This function could create a new +transaction to update the last document id. +@param table table to be updated +@param doc_id last document id +@param trx update trx or null +@retval DB_SUCCESS if OK */ +dberr_t +fts_update_sync_doc_id( + const dict_table_t* table, + doc_id_t doc_id, + trx_t* trx) +{ + byte id[FTS_MAX_ID_LEN]; + pars_info_t* info; + fts_table_t fts_table; + ulint id_len; + que_t* graph = NULL; + dberr_t error; + ibool local_trx = FALSE; + fts_cache_t* cache = table->fts->cache; + char fts_name[MAX_FULL_NAME_LEN]; + + if (srv_read_only_mode) { + return DB_READ_ONLY; + } + + fts_table.suffix = "CONFIG"; + fts_table.table_id = table->id; + fts_table.type = FTS_COMMON_TABLE; + fts_table.table = table; + + if (!trx) { + trx = trx_create(); + trx_start_internal(trx); + + trx->op_info = "setting last FTS document id"; + local_trx = TRUE; + } + + info = pars_info_create(); + + id_len = (ulint) snprintf( + (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1); + + pars_info_bind_varchar_literal(info, "doc_id", id, id_len); + + fts_get_table_name(&fts_table, fts_name, + table->fts->dict_locked); + pars_info_bind_id(info, "table_name", fts_name); + + graph = fts_parse_sql( + &fts_table, info, + "BEGIN" + " UPDATE $table_name SET value = :doc_id" + " WHERE key = 'synced_doc_id';"); + + error = fts_eval_sql(trx, graph); + + que_graph_free(graph); + + if (local_trx) { + if (UNIV_LIKELY(error == DB_SUCCESS)) { + fts_sql_commit(trx); + cache->synced_doc_id = doc_id; + } else { + ib::error() << "(" << error << ") while" + " updating last doc id for table" + << table->name; + + fts_sql_rollback(trx); + } + trx->free(); + } + + return(error); +} + +/*********************************************************************//** +Create a new fts_doc_ids_t. +@return new fts_doc_ids_t */ +fts_doc_ids_t* +fts_doc_ids_create(void) +/*====================*/ +{ + fts_doc_ids_t* fts_doc_ids; + mem_heap_t* heap = mem_heap_create(512); + + fts_doc_ids = static_cast<fts_doc_ids_t*>( + mem_heap_alloc(heap, sizeof(*fts_doc_ids))); + + fts_doc_ids->self_heap = ib_heap_allocator_create(heap); + + fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create( + fts_doc_ids->self_heap, sizeof(doc_id_t), 32)); + + return(fts_doc_ids); +} + +/*********************************************************************//** +Do commit-phase steps necessary for the insertion of a new row. */ +void +fts_add( +/*====*/ + fts_trx_table_t*ftt, /*!< in: FTS trx table */ + fts_trx_row_t* row) /*!< in: row */ +{ + dict_table_t* table = ftt->table; + doc_id_t doc_id = row->doc_id; + + ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY); + + fts_add_doc_by_id(ftt, doc_id); + + mysql_mutex_lock(&table->fts->cache->deleted_lock); + ++table->fts->cache->added; + mysql_mutex_unlock(&table->fts->cache->deleted_lock); + + if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) + && doc_id >= table->fts->cache->next_doc_id) { + table->fts->cache->next_doc_id = doc_id + 1; + } +} + +/*********************************************************************//** +Do commit-phase steps necessary for the deletion of a row. +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_delete( +/*=======*/ + fts_trx_table_t*ftt, /*!< in: FTS trx table */ + fts_trx_row_t* row) /*!< in: row */ +{ + que_t* graph; + fts_table_t fts_table; + doc_id_t write_doc_id; + dict_table_t* table = ftt->table; + doc_id_t doc_id = row->doc_id; + trx_t* trx = ftt->fts_trx->trx; + pars_info_t* info = pars_info_create(); + fts_cache_t* cache = table->fts->cache; + + /* we do not index Documents whose Doc ID value is 0 */ + if (doc_id == FTS_NULL_DOC_ID) { + ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)); + return DB_SUCCESS; + } + + ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY); + + FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table); + + /* Convert to "storage" byte order. */ + fts_write_doc_id((byte*) &write_doc_id, doc_id); + fts_bind_doc_id(info, "doc_id", &write_doc_id); + + /* It is possible we update a record that has not yet been sync-ed + into cache from last crash (delete Doc will not initialize the + sync). Avoid any added counter accounting until the FTS cache + is re-established and sync-ed */ + if (table->fts->added_synced + && doc_id > cache->synced_doc_id) { + mysql_mutex_lock(&table->fts->cache->deleted_lock); + + /* The Doc ID could belong to those left in + ADDED table from last crash. So need to check + if it is less than first_doc_id when we initialize + the Doc ID system after reboot */ + if (doc_id >= table->fts->cache->first_doc_id + && table->fts->cache->added > 0) { + --table->fts->cache->added; + } + + mysql_mutex_unlock(&table->fts->cache->deleted_lock); + + /* Only if the row was really deleted. */ + ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY); + } + + /* Note the deleted document for OPTIMIZE to purge. */ + char table_name[MAX_FULL_NAME_LEN]; + + trx->op_info = "adding doc id to FTS DELETED"; + + fts_table.suffix = "DELETED"; + + fts_get_table_name(&fts_table, table_name); + pars_info_bind_id(info, "deleted", table_name); + + graph = fts_parse_sql(&fts_table, info, + "BEGIN INSERT INTO $deleted VALUES (:doc_id);"); + + dberr_t error = fts_eval_sql(trx, graph); + que_graph_free(graph); + + /* Increment the total deleted count, this is used to calculate the + number of documents indexed. */ + if (error == DB_SUCCESS) { + mysql_mutex_lock(&table->fts->cache->deleted_lock); + + ++table->fts->cache->deleted; + + mysql_mutex_unlock(&table->fts->cache->deleted_lock); + } + + return(error); +} + +/*********************************************************************//** +Do commit-phase steps necessary for the modification of a row. +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_modify( +/*=======*/ + fts_trx_table_t* ftt, /*!< in: FTS trx table */ + fts_trx_row_t* row) /*!< in: row */ +{ + dberr_t error; + + ut_a(row->state == FTS_MODIFY); + + error = fts_delete(ftt, row); + + if (error == DB_SUCCESS) { + fts_add(ftt, row); + } + + return(error); +} + +/*********************************************************************//** +The given transaction is about to be committed; do whatever is necessary +from the FTS system's POV. +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_commit_table( +/*=============*/ + fts_trx_table_t* ftt) /*!< in: FTS table to commit*/ +{ + if (srv_read_only_mode) { + return DB_READ_ONLY; + } + + const ib_rbt_node_t* node; + ib_rbt_t* rows; + dberr_t error = DB_SUCCESS; + fts_cache_t* cache = ftt->table->fts->cache; + trx_t* trx = trx_create(); + + trx_start_internal(trx); + + rows = ftt->rows; + + ftt->fts_trx->trx = trx; + + if (cache->get_docs == NULL) { + mysql_mutex_lock(&cache->init_lock); + if (cache->get_docs == NULL) { + cache->get_docs = fts_get_docs_create(cache); + } + mysql_mutex_unlock(&cache->init_lock); + } + + for (node = rbt_first(rows); + node != NULL && error == DB_SUCCESS; + node = rbt_next(rows, node)) { + + fts_trx_row_t* row = rbt_value(fts_trx_row_t, node); + + switch (row->state) { + case FTS_INSERT: + fts_add(ftt, row); + break; + + case FTS_MODIFY: + error = fts_modify(ftt, row); + break; + + case FTS_DELETE: + error = fts_delete(ftt, row); + break; + + default: + ut_error; + } + } + + fts_sql_commit(trx); + + trx->free(); + + return(error); +} + +/*********************************************************************//** +The given transaction is about to be committed; do whatever is necessary +from the FTS system's POV. +@return DB_SUCCESS or error code */ +dberr_t +fts_commit( +/*=======*/ + trx_t* trx) /*!< in: transaction */ +{ + const ib_rbt_node_t* node; + dberr_t error; + ib_rbt_t* tables; + fts_savepoint_t* savepoint; + + savepoint = static_cast<fts_savepoint_t*>( + ib_vector_last(trx->fts_trx->savepoints)); + tables = savepoint->tables; + + for (node = rbt_first(tables), error = DB_SUCCESS; + node != NULL && error == DB_SUCCESS; + node = rbt_next(tables, node)) { + + fts_trx_table_t** ftt; + + ftt = rbt_value(fts_trx_table_t*, node); + + error = fts_commit_table(*ftt); + } + + return(error); +} + +/*********************************************************************//** +Initialize a document. */ +void +fts_doc_init( +/*=========*/ + fts_doc_t* doc) /*!< in: doc to initialize */ +{ + mem_heap_t* heap = mem_heap_create(32); + + memset(doc, 0, sizeof(*doc)); + + doc->self_heap = ib_heap_allocator_create(heap); +} + +/*********************************************************************//** +Free document. */ +void +fts_doc_free( +/*=========*/ + fts_doc_t* doc) /*!< in: document */ +{ + mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg); + + if (doc->tokens) { + rbt_free(doc->tokens); + } + + ut_d(memset(doc, 0, sizeof(*doc))); + + mem_heap_free(heap); +} + +/*********************************************************************//** +Callback function for fetch that stores the text of an FTS document, +converting each column to UTF-16. +@return always FALSE */ +ibool +fts_query_expansion_fetch_doc( +/*==========================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: fts_doc_t* */ +{ + que_node_t* exp; + sel_node_t* node = static_cast<sel_node_t*>(row); + fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg); + dfield_t* dfield; + ulint len; + ulint doc_len; + fts_doc_t doc; + CHARSET_INFO* doc_charset = NULL; + ulint field_no = 0; + + len = 0; + + fts_doc_init(&doc); + doc.found = TRUE; + + exp = node->select_list; + doc_len = 0; + + doc_charset = result_doc->charset; + + /* Copy each indexed column content into doc->text.f_str */ + while (exp) { + dfield = que_node_get_val(exp); + len = dfield_get_len(dfield); + + /* NULL column */ + if (len == UNIV_SQL_NULL) { + exp = que_node_get_next(exp); + continue; + } + + if (!doc_charset) { + doc_charset = fts_get_charset(dfield->type.prtype); + } + + doc.charset = doc_charset; + + if (dfield_is_ext(dfield)) { + /* We ignore columns that are stored externally, this + could result in too many words to search */ + exp = que_node_get_next(exp); + continue; + } else { + doc.text.f_n_char = 0; + + doc.text.f_str = static_cast<byte*>( + dfield_get_data(dfield)); + + doc.text.f_len = len; + } + + if (field_no == 0) { + fts_tokenize_document(&doc, result_doc, + result_doc->parser); + } else { + fts_tokenize_document_next(&doc, doc_len, result_doc, + result_doc->parser); + } + + exp = que_node_get_next(exp); + + doc_len += (exp) ? len + 1 : len; + + field_no++; + } + + ut_ad(doc_charset); + + if (!result_doc->charset) { + result_doc->charset = doc_charset; + } + + fts_doc_free(&doc); + + return(FALSE); +} + +/*********************************************************************//** +fetch and tokenize the document. */ +static +void +fts_fetch_doc_from_rec( +/*===================*/ + fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */ + dict_index_t* clust_index, /*!< in: cluster index */ + btr_pcur_t* pcur, /*!< in: cursor whose position + has been stored */ + rec_offs* offsets, /*!< in: offsets */ + fts_doc_t* doc) /*!< out: fts doc to hold parsed + documents */ +{ + dict_index_t* index; + const rec_t* clust_rec; + const dict_field_t* ifield; + ulint clust_pos; + ulint doc_len = 0; + st_mysql_ftparser* parser; + + if (!get_doc) { + return; + } + + index = get_doc->index_cache->index; + parser = get_doc->index_cache->index->parser; + + clust_rec = btr_pcur_get_rec(pcur); + ut_ad(!page_rec_is_comp(clust_rec) + || rec_get_status(clust_rec) == REC_STATUS_ORDINARY); + + for (ulint i = 0; i < index->n_fields; i++) { + ifield = dict_index_get_nth_field(index, i); + clust_pos = dict_col_get_clust_pos(ifield->col, clust_index); + + if (!get_doc->index_cache->charset) { + get_doc->index_cache->charset = fts_get_charset( + ifield->col->prtype); + } + + if (rec_offs_nth_extern(offsets, clust_pos)) { + doc->text.f_str = + btr_rec_copy_externally_stored_field( + clust_rec, offsets, + btr_pcur_get_block(pcur)->zip_size(), + clust_pos, &doc->text.f_len, + static_cast<mem_heap_t*>( + doc->self_heap->arg)); + } else { + doc->text.f_str = (byte*) rec_get_nth_field( + clust_rec, offsets, clust_pos, + &doc->text.f_len); + } + + doc->found = TRUE; + doc->charset = get_doc->index_cache->charset; + + /* Null Field */ + if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) { + continue; + } + + if (!doc_len) { + fts_tokenize_document(doc, NULL, parser); + } else { + fts_tokenize_document_next(doc, doc_len, NULL, parser); + } + + doc_len += doc->text.f_len + 1; + } +} + +/** Fetch the data from tuple and tokenize the document. +@param[in] get_doc FTS index's get_doc struct +@param[in] tuple tuple should be arranged in table schema order +@param[out] doc fts doc to hold parsed documents. */ +static +void +fts_fetch_doc_from_tuple( + fts_get_doc_t* get_doc, + const dtuple_t* tuple, + fts_doc_t* doc) +{ + dict_index_t* index; + st_mysql_ftparser* parser; + ulint doc_len = 0; + ulint processed_doc = 0; + ulint num_field; + + if (get_doc == NULL) { + return; + } + + index = get_doc->index_cache->index; + parser = get_doc->index_cache->index->parser; + num_field = dict_index_get_n_fields(index); + + for (ulint i = 0; i < num_field; i++) { + const dict_field_t* ifield; + const dict_col_t* col; + ulint pos; + + ifield = dict_index_get_nth_field(index, i); + col = dict_field_get_col(ifield); + pos = dict_col_get_no(col); + const dfield_t* field = dtuple_get_nth_field(tuple, pos); + + if (!get_doc->index_cache->charset) { + get_doc->index_cache->charset = fts_get_charset( + ifield->col->prtype); + } + + ut_ad(!dfield_is_ext(field)); + + doc->text.f_str = (byte*) dfield_get_data(field); + doc->text.f_len = dfield_get_len(field); + doc->found = TRUE; + doc->charset = get_doc->index_cache->charset; + + /* field data is NULL. */ + if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) { + continue; + } + + if (processed_doc == 0) { + fts_tokenize_document(doc, NULL, parser); + } else { + fts_tokenize_document_next(doc, doc_len, NULL, parser); + } + + processed_doc++; + doc_len += doc->text.f_len + 1; + } +} + +/** Fetch the document from tuple, tokenize the text data and +insert the text data into fts auxiliary table and +its cache. Moreover this tuple fields doesn't contain any information +about externally stored field. This tuple contains data directly +converted from mysql. +@param[in] ftt FTS transaction table +@param[in] doc_id doc id +@param[in] tuple tuple from where data can be retrieved + and tuple should be arranged in table + schema order. */ +void +fts_add_doc_from_tuple( + fts_trx_table_t*ftt, + doc_id_t doc_id, + const dtuple_t* tuple) +{ + mtr_t mtr; + fts_cache_t* cache = ftt->table->fts->cache; + + ut_ad(cache->get_docs); + + if (!ftt->table->fts->added_synced) { + fts_init_index(ftt->table, FALSE); + } + + mtr_start(&mtr); + + ulint num_idx = ib_vector_size(cache->get_docs); + + for (ulint i = 0; i < num_idx; ++i) { + fts_doc_t doc; + dict_table_t* table; + fts_get_doc_t* get_doc; + + get_doc = static_cast<fts_get_doc_t*>( + ib_vector_get(cache->get_docs, i)); + table = get_doc->index_cache->index->table; + + fts_doc_init(&doc); + fts_fetch_doc_from_tuple( + get_doc, tuple, &doc); + + if (doc.found) { + mtr_commit(&mtr); + mysql_mutex_lock(&table->fts->cache->lock); + + if (table->fts->cache->stopword_info.status + & STOPWORD_NOT_INIT) { + fts_load_stopword(table, NULL, NULL, + true, true); + } + + fts_cache_add_doc( + table->fts->cache, + get_doc->index_cache, + doc_id, doc.tokens); + + mysql_mutex_unlock(&table->fts->cache->lock); + + if (cache->total_size > fts_max_cache_size / 5 + || fts_need_sync) { + fts_sync(cache->sync, true, false); + } + + mtr_start(&mtr); + + } + + fts_doc_free(&doc); + } + + mtr_commit(&mtr); +} + +/*********************************************************************//** +This function fetches the document inserted during the committing +transaction, and tokenize the inserted text data and insert into +FTS auxiliary table and its cache. */ +static +void +fts_add_doc_by_id( +/*==============*/ + fts_trx_table_t*ftt, /*!< in: FTS trx table */ + doc_id_t doc_id) /*!< in: doc id */ +{ + mtr_t mtr; + mem_heap_t* heap; + btr_pcur_t pcur; + dict_table_t* table; + dtuple_t* tuple; + dfield_t* dfield; + fts_get_doc_t* get_doc; + doc_id_t temp_doc_id; + dict_index_t* clust_index; + dict_index_t* fts_id_index; + ibool is_id_cluster; + fts_cache_t* cache = ftt->table->fts->cache; + + ut_ad(cache->get_docs); + + /* If Doc ID has been supplied by the user, then the table + might not yet be sync-ed */ + + if (!ftt->table->fts->added_synced) { + fts_init_index(ftt->table, FALSE); + } + + /* Get the first FTS index's get_doc */ + get_doc = static_cast<fts_get_doc_t*>( + ib_vector_get(cache->get_docs, 0)); + ut_ad(get_doc); + + table = get_doc->index_cache->index->table; + + heap = mem_heap_create(512); + + clust_index = dict_table_get_first_index(table); + fts_id_index = table->fts_doc_id_index; + + /* Check whether the index on FTS_DOC_ID is cluster index */ + is_id_cluster = (clust_index == fts_id_index); + + mtr_start(&mtr); + + /* Search based on Doc ID. Here, we'll need to consider the case + when there is no primary index on Doc ID */ + const ulint n_uniq = table->fts_n_uniq(); + tuple = dtuple_create(heap, n_uniq); + dfield = dtuple_get_nth_field(tuple, 0); + dfield->type.mtype = DATA_INT; + dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE; + + mach_write_to_8((byte*) &temp_doc_id, doc_id); + dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id)); + pcur.btr_cur.page_cur.index = fts_id_index; + + if (n_uniq == 2) { + ut_ad(table->versioned()); + ut_ad(fts_id_index->fields[1].col->vers_sys_end()); + dfield = dtuple_get_nth_field(tuple, 1); + dfield->type.mtype = fts_id_index->fields[1].col->mtype; + dfield->type.prtype = fts_id_index->fields[1].col->prtype; + if (table->versioned_by_id()) { + dfield_set_data(dfield, trx_id_max_bytes, + sizeof(trx_id_max_bytes)); + } else { + dfield_set_data(dfield, timestamp_max_bytes, + sizeof(timestamp_max_bytes)); + } + } + + /* If we have a match, add the data to doc structure */ + if (btr_pcur_open_with_no_init(tuple, PAGE_CUR_LE, + BTR_SEARCH_LEAF, &pcur, &mtr) + == DB_SUCCESS + && btr_pcur_get_low_match(&pcur) == n_uniq) { + const rec_t* rec; + btr_pcur_t* doc_pcur; + const rec_t* clust_rec; + btr_pcur_t clust_pcur; + rec_offs* offsets = NULL; + ulint num_idx = ib_vector_size(cache->get_docs); + + rec = btr_pcur_get_rec(&pcur); + + /* Doc could be deleted */ + if (page_rec_is_infimum(rec) + || rec_get_deleted_flag(rec, dict_table_is_comp(table))) { + + goto func_exit; + } + + if (is_id_cluster) { + clust_rec = rec; + doc_pcur = &pcur; + } else { + dtuple_t* clust_ref; + ulint n_fields; + + n_fields = dict_index_get_n_unique(clust_index); + + clust_ref = dtuple_create(heap, n_fields); + dict_index_copy_types(clust_ref, clust_index, n_fields); + + row_build_row_ref_in_tuple( + clust_ref, rec, fts_id_index, NULL); + clust_pcur.btr_cur.page_cur.index = clust_index; + + if (btr_pcur_open_with_no_init(clust_ref, + PAGE_CUR_LE, + BTR_SEARCH_LEAF, + &clust_pcur, &mtr) + != DB_SUCCESS) { + goto func_exit; + } + + doc_pcur = &clust_pcur; + clust_rec = btr_pcur_get_rec(&clust_pcur); + } + + offsets = rec_get_offsets(clust_rec, clust_index, NULL, + clust_index->n_core_fields, + ULINT_UNDEFINED, &heap); + + for (ulint i = 0; i < num_idx; ++i) { + fts_doc_t doc; + dict_table_t* table; + fts_get_doc_t* get_doc; + + get_doc = static_cast<fts_get_doc_t*>( + ib_vector_get(cache->get_docs, i)); + + table = get_doc->index_cache->index->table; + + fts_doc_init(&doc); + + fts_fetch_doc_from_rec( + get_doc, clust_index, doc_pcur, offsets, &doc); + + if (doc.found) { + + btr_pcur_store_position(doc_pcur, &mtr); + mtr_commit(&mtr); + + mysql_mutex_lock(&table->fts->cache->lock); + + if (table->fts->cache->stopword_info.status + & STOPWORD_NOT_INIT) { + fts_load_stopword(table, NULL, + NULL, true, true); + } + + fts_cache_add_doc( + table->fts->cache, + get_doc->index_cache, + doc_id, doc.tokens); + + bool need_sync = !cache->sync->in_progress + && (fts_need_sync + || (cache->total_size + - cache->total_size_at_sync) + > fts_max_cache_size / 10); + if (need_sync) { + cache->total_size_at_sync = + cache->total_size; + } + + mysql_mutex_unlock(&table->fts->cache->lock); + + DBUG_EXECUTE_IF( + "fts_instrument_sync", + fts_optimize_request_sync_table(table); + mysql_mutex_lock(&cache->lock); + if (cache->sync->in_progress) + my_cond_wait( + &cache->sync->cond, + &cache->lock.m_mutex); + mysql_mutex_unlock(&cache->lock); + ); + + DBUG_EXECUTE_IF( + "fts_instrument_sync_debug", + fts_sync(cache->sync, true, true); + ); + + DEBUG_SYNC_C("fts_instrument_sync_request"); + DBUG_EXECUTE_IF( + "fts_instrument_sync_request", + fts_optimize_request_sync_table(table); + ); + + if (need_sync) { + fts_optimize_request_sync_table(table); + } + + mtr_start(&mtr); + + if (i < num_idx - 1) { + if (doc_pcur->restore_position( + BTR_SEARCH_LEAF, &mtr) + != btr_pcur_t::SAME_ALL) { + ut_ad("invalid state" == 0); + i = num_idx - 1; + } + } + } + + fts_doc_free(&doc); + } + + if (!is_id_cluster) { + ut_free(doc_pcur->old_rec_buf); + } + } +func_exit: + mtr_commit(&mtr); + + ut_free(pcur.old_rec_buf); + + mem_heap_free(heap); +} + + +/*********************************************************************//** +Callback function to read a single ulint column. +return always returns TRUE */ +static +ibool +fts_read_ulint( +/*===========*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: pointer to ulint */ +{ + sel_node_t* sel_node = static_cast<sel_node_t*>(row); + ulint* value = static_cast<ulint*>(user_arg); + que_node_t* exp = sel_node->select_list; + dfield_t* dfield = que_node_get_val(exp); + void* data = dfield_get_data(dfield); + + *value = mach_read_from_4(static_cast<const byte*>(data)); + + return(TRUE); +} + +/*********************************************************************//** +Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists +@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */ +doc_id_t +fts_get_max_doc_id( +/*===============*/ + dict_table_t* table) /*!< in: user table */ +{ + dict_index_t* index; + dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL; + doc_id_t doc_id = 0; + mtr_t mtr; + btr_pcur_t pcur; + + index = table->fts_doc_id_index; + + if (!index) { + return(0); + } + + ut_ad(!index->is_instant()); + + dfield = dict_index_get_nth_field(index, 0); + +#if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */ + ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0); +#endif + + mtr.start(); + + /* fetch the largest indexes value */ + if (pcur.open_leaf(false, index, BTR_SEARCH_LEAF, &mtr) == DB_SUCCESS + && !page_is_empty(btr_pcur_get_page(&pcur))) { + const rec_t* rec = NULL; + constexpr ulint doc_id_len= 8; + + do { + rec = btr_pcur_get_rec(&pcur); + + if (!page_rec_is_user_rec(rec)) { + continue; + } + + if (index->n_uniq == 1) { + break; + } + + ut_ad(table->versioned()); + ut_ad(index->n_uniq == 2); + + const byte *data = rec + doc_id_len; + if (table->versioned_by_id()) { + if (0 == memcmp(data, trx_id_max_bytes, + sizeof trx_id_max_bytes)) { + break; + } + } else { + if (0 == memcmp(data, timestamp_max_bytes, + sizeof timestamp_max_bytes)) { + break; + } + } + } while (btr_pcur_move_to_prev(&pcur, &mtr)); + + if (!rec || rec_is_metadata(rec, *index)) { + goto func_exit; + } + + doc_id = fts_read_doc_id(rec); + } + +func_exit: + mtr.commit(); + return(doc_id); +} + +/*********************************************************************//** +Fetch document with the given document id. +@return DB_SUCCESS if OK else error */ +dberr_t +fts_doc_fetch_by_doc_id( +/*====================*/ + fts_get_doc_t* get_doc, /*!< in: state */ + doc_id_t doc_id, /*!< in: id of document to + fetch */ + dict_index_t* index_to_use, /*!< in: caller supplied FTS index, + or NULL */ + ulint option, /*!< in: search option, if it is + greater than doc_id or equal */ + fts_sql_callback + callback, /*!< in: callback to read */ + void* arg) /*!< in: callback arg */ +{ + pars_info_t* info; + dberr_t error; + const char* select_str; + doc_id_t write_doc_id; + dict_index_t* index; + trx_t* trx = trx_create(); + que_t* graph; + + trx->op_info = "fetching indexed FTS document"; + + /* The FTS index can be supplied by caller directly with + "index_to_use", otherwise, get it from "get_doc" */ + index = (index_to_use) ? index_to_use : get_doc->index_cache->index; + + if (get_doc && get_doc->get_document_graph) { + info = get_doc->get_document_graph->info; + } else { + info = pars_info_create(); + } + + /* Convert to "storage" byte order. */ + fts_write_doc_id((byte*) &write_doc_id, doc_id); + fts_bind_doc_id(info, "doc_id", &write_doc_id); + pars_info_bind_function(info, "my_func", callback, arg); + + select_str = fts_get_select_columns_str(index, info, info->heap); + pars_info_bind_id(info, "table_name", index->table->name.m_name); + + if (!get_doc || !get_doc->get_document_graph) { + if (option == FTS_FETCH_DOC_BY_ID_EQUAL) { + graph = fts_parse_sql( + NULL, + info, + mem_heap_printf(info->heap, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT %s FROM $table_name" + " WHERE %s = :doc_id;\n" + "BEGIN\n" + "" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c %% NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;", + select_str, FTS_DOC_ID_COL_NAME)); + } else { + ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE); + + /* This is used for crash recovery of table with + hidden DOC ID or FTS indexes. We will scan the table + to re-processing user table rows whose DOC ID or + FTS indexed documents have not been sync-ed to disc + during recent crash. + In the case that all fulltext indexes are dropped + for a table, we will keep the "hidden" FTS_DOC_ID + column, and this scan is to retreive the largest + DOC ID being used in the table to determine the + appropriate next DOC ID. + In the case of there exists fulltext index(es), this + operation will re-tokenize any docs that have not + been sync-ed to the disk, and re-prime the FTS + cached */ + graph = fts_parse_sql( + NULL, + info, + mem_heap_printf(info->heap, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT %s, %s FROM $table_name" + " WHERE %s > :doc_id;\n" + "BEGIN\n" + "" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c %% NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;", + FTS_DOC_ID_COL_NAME, + select_str, FTS_DOC_ID_COL_NAME)); + } + if (get_doc) { + get_doc->get_document_graph = graph; + } + } else { + graph = get_doc->get_document_graph; + } + + error = fts_eval_sql(trx, graph); + fts_sql_commit(trx); + trx->free(); + + if (!get_doc) { + que_graph_free(graph); + } + + return(error); +} + +/*********************************************************************//** +Write out a single word's data as new entry/entries in the INDEX table. +@return DB_SUCCESS if all OK. */ +dberr_t +fts_write_node( +/*===========*/ + trx_t* trx, /*!< in: transaction */ + que_t** graph, /*!< in: query graph */ + fts_table_t* fts_table, /*!< in: aux table */ + fts_string_t* word, /*!< in: word in UTF-8 */ + fts_node_t* node) /*!< in: node columns */ +{ + pars_info_t* info; + dberr_t error; + ib_uint32_t doc_count; + time_t start_time; + doc_id_t last_doc_id; + doc_id_t first_doc_id; + char table_name[MAX_FULL_NAME_LEN]; + + ut_a(node->ilist != NULL); + + if (*graph) { + info = (*graph)->info; + } else { + info = pars_info_create(); + + fts_get_table_name(fts_table, table_name); + pars_info_bind_id(info, "index_table_name", table_name); + } + + pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len); + + /* Convert to "storage" byte order. */ + fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id); + fts_bind_doc_id(info, "first_doc_id", &first_doc_id); + + /* Convert to "storage" byte order. */ + fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id); + fts_bind_doc_id(info, "last_doc_id", &last_doc_id); + + ut_a(node->last_doc_id >= node->first_doc_id); + + /* Convert to "storage" byte order. */ + mach_write_to_4((byte*) &doc_count, node->doc_count); + pars_info_bind_int4_literal( + info, "doc_count", (const ib_uint32_t*) &doc_count); + + /* Set copy_name to FALSE since it's a static. */ + pars_info_bind_literal( + info, "ilist", node->ilist, node->ilist_size, + DATA_BLOB, DATA_BINARY_TYPE); + + if (!*graph) { + + *graph = fts_parse_sql( + fts_table, + info, + "BEGIN\n" + "INSERT INTO $index_table_name VALUES" + " (:token, :first_doc_id," + " :last_doc_id, :doc_count, :ilist);"); + } + + start_time = time(NULL); + error = fts_eval_sql(trx, *graph); + elapsed_time += time(NULL) - start_time; + ++n_nodes; + + return(error); +} + +/*********************************************************************//** +Add rows to the DELETED_CACHE table. +@return DB_SUCCESS if all went well else error code*/ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_sync_add_deleted_cache( +/*=======================*/ + fts_sync_t* sync, /*!< in: sync state */ + ib_vector_t* doc_ids) /*!< in: doc ids to add */ +{ + ulint i; + pars_info_t* info; + que_t* graph; + fts_table_t fts_table; + char table_name[MAX_FULL_NAME_LEN]; + doc_id_t dummy = 0; + dberr_t error = DB_SUCCESS; + ulint n_elems = ib_vector_size(doc_ids); + + ut_a(ib_vector_size(doc_ids) > 0); + + ib_vector_sort(doc_ids, fts_doc_id_cmp); + + info = pars_info_create(); + + fts_bind_doc_id(info, "doc_id", &dummy); + + FTS_INIT_FTS_TABLE( + &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table); + + fts_get_table_name(&fts_table, table_name); + pars_info_bind_id(info, "table_name", table_name); + + graph = fts_parse_sql( + &fts_table, + info, + "BEGIN INSERT INTO $table_name VALUES (:doc_id);"); + + for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) { + doc_id_t* update; + doc_id_t write_doc_id; + + update = static_cast<doc_id_t*>(ib_vector_get(doc_ids, i)); + + /* Convert to "storage" byte order. */ + fts_write_doc_id((byte*) &write_doc_id, *update); + fts_bind_doc_id(info, "doc_id", &write_doc_id); + + error = fts_eval_sql(sync->trx, graph); + } + + que_graph_free(graph); + + return(error); +} + +/** Write the words and ilist to disk. +@param[in,out] trx transaction +@param[in] index_cache index cache +@param[in] unlock_cache whether unlock cache when write node +@return DB_SUCCESS if all went well else error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_sync_write_words( + trx_t* trx, + fts_index_cache_t* index_cache, + bool unlock_cache) +{ + fts_table_t fts_table; + ulint n_nodes = 0; + ulint n_words = 0; + const ib_rbt_node_t* rbt_node; + dberr_t error = DB_SUCCESS; + ibool print_error = FALSE; + dict_table_t* table = index_cache->index->table; + + FTS_INIT_INDEX_TABLE( + &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index); + + n_words = rbt_size(index_cache->words); + + /* We iterate over the entire tree, even if there is an error, + since we want to free the memory used during caching. */ + for (rbt_node = rbt_first(index_cache->words); + rbt_node; + rbt_node = rbt_next(index_cache->words, rbt_node)) { + + ulint i; + ulint selected; + fts_tokenizer_word_t* word; + + word = rbt_value(fts_tokenizer_word_t, rbt_node); + + DBUG_EXECUTE_IF( + "fts_instrument_write_words_before_select_index", + std::this_thread::sleep_for( + std::chrono::milliseconds(300));); + + selected = fts_select_index( + index_cache->charset, word->text.f_str, + word->text.f_len); + + fts_table.suffix = fts_get_suffix(selected); + + /* We iterate over all the nodes even if there was an error */ + for (i = 0; i < ib_vector_size(word->nodes); ++i) { + + fts_node_t* fts_node = static_cast<fts_node_t*>( + ib_vector_get(word->nodes, i)); + + if (fts_node->synced) { + continue; + } else { + fts_node->synced = true; + } + + /*FIXME: we need to handle the error properly. */ + if (error == DB_SUCCESS) { + if (unlock_cache) { + mysql_mutex_unlock( + &table->fts->cache->lock); + } + + error = fts_write_node( + trx, + &index_cache->ins_graph[selected], + &fts_table, &word->text, fts_node); + + DEBUG_SYNC_C("fts_write_node"); + DBUG_EXECUTE_IF("fts_write_node_crash", + DBUG_SUICIDE();); + + DBUG_EXECUTE_IF( + "fts_instrument_sync_sleep", + std::this_thread::sleep_for( + std::chrono::seconds(1));); + + if (unlock_cache) { + mysql_mutex_lock( + &table->fts->cache->lock); + } + } + } + + n_nodes += ib_vector_size(word->nodes); + + if (UNIV_UNLIKELY(error != DB_SUCCESS) && !print_error) { + ib::error() << "(" << error << ") writing" + " word node to FTS auxiliary index table " + << table->name; + print_error = TRUE; + } + } + + if (UNIV_UNLIKELY(fts_enable_diag_print)) { + printf("Avg number of nodes: %lf\n", + (double) n_nodes / (double) (n_words > 1 ? n_words : 1)); + } + + return(error); +} + +/*********************************************************************//** +Begin Sync, create transaction, acquire locks, etc. */ +static +void +fts_sync_begin( +/*===========*/ + fts_sync_t* sync) /*!< in: sync state */ +{ + fts_cache_t* cache = sync->table->fts->cache; + + n_nodes = 0; + elapsed_time = 0; + + sync->start_time = time(NULL); + + sync->trx = trx_create(); + trx_start_internal(sync->trx); + + if (UNIV_UNLIKELY(fts_enable_diag_print)) { + ib::info() << "FTS SYNC for table " << sync->table->name + << ", deleted count: " + << ib_vector_size(cache->deleted_doc_ids) + << " size: " << ib::bytes_iec{cache->total_size}; + } +} + +/*********************************************************************//** +Run SYNC on the table, i.e., write out data from the index specific +cache to the FTS aux INDEX table and FTS aux doc id stats table. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_sync_index( +/*===========*/ + fts_sync_t* sync, /*!< in: sync state */ + fts_index_cache_t* index_cache) /*!< in: index cache */ +{ + trx_t* trx = sync->trx; + + trx->op_info = "doing SYNC index"; + + if (UNIV_UNLIKELY(fts_enable_diag_print)) { + ib::info() << "SYNC words: " << rbt_size(index_cache->words); + } + + ut_ad(rbt_validate(index_cache->words)); + + return(fts_sync_write_words(trx, index_cache, sync->unlock_cache)); +} + +/** Check if index cache has been synced completely +@param[in,out] index_cache index cache +@return true if index is synced, otherwise false. */ +static +bool +fts_sync_index_check( + fts_index_cache_t* index_cache) +{ + const ib_rbt_node_t* rbt_node; + + for (rbt_node = rbt_first(index_cache->words); + rbt_node != NULL; + rbt_node = rbt_next(index_cache->words, rbt_node)) { + + fts_tokenizer_word_t* word; + word = rbt_value(fts_tokenizer_word_t, rbt_node); + + fts_node_t* fts_node; + fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes)); + + if (!fts_node->synced) { + return(false); + } + } + + return(true); +} + +/** Reset synced flag in index cache when rollback +@param[in,out] index_cache index cache */ +static +void +fts_sync_index_reset( + fts_index_cache_t* index_cache) +{ + const ib_rbt_node_t* rbt_node; + + for (rbt_node = rbt_first(index_cache->words); + rbt_node != NULL; + rbt_node = rbt_next(index_cache->words, rbt_node)) { + + fts_tokenizer_word_t* word; + word = rbt_value(fts_tokenizer_word_t, rbt_node); + + fts_node_t* fts_node; + fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes)); + + fts_node->synced = false; + } +} + +/** Commit the SYNC, change state of processed doc ids etc. +@param[in,out] sync sync state +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_sync_commit( + fts_sync_t* sync) +{ + dberr_t error; + trx_t* trx = sync->trx; + fts_cache_t* cache = sync->table->fts->cache; + doc_id_t last_doc_id; + + trx->op_info = "doing SYNC commit"; + + /* After each Sync, update the CONFIG table about the max doc id + we just sync-ed to index table */ + error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE, + &last_doc_id); + + /* Get the list of deleted documents that are either in the + cache or were headed there but were deleted before the add + thread got to them. */ + + if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) { + + error = fts_sync_add_deleted_cache( + sync, cache->deleted_doc_ids); + } + + /* We need to do this within the deleted lock since fts_delete() can + attempt to add a deleted doc id to the cache deleted id array. */ + fts_cache_clear(cache); + DEBUG_SYNC_C("fts_deleted_doc_ids_clear"); + fts_cache_init(cache); + mysql_mutex_unlock(&cache->lock); + + if (UNIV_LIKELY(error == DB_SUCCESS)) { + fts_sql_commit(trx); + } else { + fts_sql_rollback(trx); + ib::error() << "(" << error << ") during SYNC of " + "table " << sync->table->name; + } + + if (UNIV_UNLIKELY(fts_enable_diag_print) && elapsed_time) { + ib::info() << "SYNC for table " << sync->table->name + << ": SYNC time: " + << (time(NULL) - sync->start_time) + << " secs: elapsed " + << static_cast<double>(n_nodes) + / static_cast<double>(elapsed_time) + << " ins/sec"; + } + + /* Avoid assertion in trx_t::free(). */ + trx->dict_operation_lock_mode = false; + trx->free(); + + return(error); +} + +/** Rollback a sync operation +@param[in,out] sync sync state */ +static +void +fts_sync_rollback( + fts_sync_t* sync) +{ + trx_t* trx = sync->trx; + fts_cache_t* cache = sync->table->fts->cache; + + for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) { + ulint j; + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + /* Reset synced flag so nodes will not be skipped + in the next sync, see fts_sync_write_words(). */ + fts_sync_index_reset(index_cache); + + for (j = 0; fts_index_selector[j].value; ++j) { + + if (index_cache->ins_graph[j] != NULL) { + + que_graph_free(index_cache->ins_graph[j]); + + index_cache->ins_graph[j] = NULL; + } + + if (index_cache->sel_graph[j] != NULL) { + + que_graph_free(index_cache->sel_graph[j]); + + index_cache->sel_graph[j] = NULL; + } + } + } + + mysql_mutex_unlock(&cache->lock); + + fts_sql_rollback(trx); + + /* Avoid assertion in trx_t::free(). */ + trx->dict_operation_lock_mode = false; + trx->free(); +} + +/** Run SYNC on the table, i.e., write out data from the cache to the +FTS auxiliary INDEX table and clear the cache at the end. +@param[in,out] sync sync state +@param[in] unlock_cache whether unlock cache lock when write node +@param[in] wait whether wait when a sync is in progress +@return DB_SUCCESS if all OK */ +static +dberr_t +fts_sync( + fts_sync_t* sync, + bool unlock_cache, + bool wait) +{ + if (srv_read_only_mode) { + return DB_READ_ONLY; + } + + ulint i; + dberr_t error = DB_SUCCESS; + fts_cache_t* cache = sync->table->fts->cache; + + mysql_mutex_lock(&cache->lock); + + if (cache->total_size == 0) { + mysql_mutex_unlock(&cache->lock); + return DB_SUCCESS; + } + + /* Check if cache is being synced. + Note: we release cache lock in fts_sync_write_words() to + avoid long wait for the lock by other threads. */ + if (sync->in_progress) { + if (!wait) { + mysql_mutex_unlock(&cache->lock); + return(DB_SUCCESS); + } + do { + my_cond_wait(&sync->cond, &cache->lock.m_mutex); + } while (sync->in_progress); + } + + sync->unlock_cache = unlock_cache; + sync->in_progress = true; + + DEBUG_SYNC_C("fts_sync_begin"); + fts_sync_begin(sync); + +begin_sync: + const size_t fts_cache_size= fts_max_cache_size; + if (cache->total_size > fts_cache_size) { + /* Avoid the case: sync never finish when + insert/update keeps comming. */ + ut_ad(sync->unlock_cache); + sync->unlock_cache = false; + ib::warn() << "Total InnoDB FTS size " + << cache->total_size << " for the table " + << cache->sync->table->name + << " exceeds the innodb_ft_cache_size " + << fts_cache_size; + } + + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + if (index_cache->index->to_be_dropped) { + continue; + } + + DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing", + std::this_thread::sleep_for( + std::chrono::milliseconds(300));); + error = fts_sync_index(sync, index_cache); + + if (error != DB_SUCCESS) { + goto end_sync; + } + + if (!sync->unlock_cache + && cache->total_size < fts_max_cache_size) { + /* Reset the unlock cache if the value + is less than innodb_ft_cache_size */ + sync->unlock_cache = true; + } + } + + DBUG_EXECUTE_IF("fts_instrument_sync_interrupted", + sync->interrupted = true; + error = DB_INTERRUPTED; + goto end_sync; + ); + + /* Make sure all the caches are synced. */ + for (i = 0; i < ib_vector_size(cache->indexes); ++i) { + fts_index_cache_t* index_cache; + + index_cache = static_cast<fts_index_cache_t*>( + ib_vector_get(cache->indexes, i)); + + if (index_cache->index->to_be_dropped + || fts_sync_index_check(index_cache)) { + continue; + } + + goto begin_sync; + } + +end_sync: + if (error == DB_SUCCESS && !sync->interrupted) { + error = fts_sync_commit(sync); + } else { + fts_sync_rollback(sync); + } + + mysql_mutex_lock(&cache->lock); + ut_ad(sync->in_progress); + sync->interrupted = false; + sync->in_progress = false; + pthread_cond_broadcast(&sync->cond); + mysql_mutex_unlock(&cache->lock); + + /* We need to check whether an optimize is required, for that + we make copies of the two variables that control the trigger. These + variables can change behind our back and we don't want to hold the + lock for longer than is needed. */ + mysql_mutex_lock(&cache->deleted_lock); + + cache->added = 0; + cache->deleted = 0; + + mysql_mutex_unlock(&cache->deleted_lock); + + return(error); +} + +/** Run SYNC on the table, i.e., write out data from the cache to the +FTS auxiliary INDEX table and clear the cache at the end. +@param[in,out] table fts table +@param[in] wait whether wait for existing sync to finish +@return DB_SUCCESS on success, error code on failure. */ +dberr_t fts_sync_table(dict_table_t* table, bool wait) +{ + ut_ad(table->fts); + + return table->space && !table->corrupted && table->fts->cache + ? fts_sync(table->fts->cache->sync, !wait, wait) + : DB_SUCCESS; +} + +/** Check if a fts token is a stopword or less than fts_min_token_size +or greater than fts_max_token_size. +@param[in] token token string +@param[in] stopwords stopwords rb tree +@param[in] cs token charset +@retval true if it is not stopword and length in range +@retval false if it is stopword or lenght not in range */ +bool +fts_check_token( + const fts_string_t* token, + const ib_rbt_t* stopwords, + const CHARSET_INFO* cs) +{ + ut_ad(cs != NULL || stopwords == NULL); + + ib_rbt_bound_t parent; + + return(token->f_n_char >= fts_min_token_size + && token->f_n_char <= fts_max_token_size + && (stopwords == NULL + || rbt_search(stopwords, &parent, token) != 0)); +} + +/** Add the token and its start position to the token's list of positions. +@param[in,out] result_doc result doc rb tree +@param[in] str token string +@param[in] position token position */ +static +void +fts_add_token( + fts_doc_t* result_doc, + fts_string_t str, + ulint position) +{ + /* Ignore string whose character number is less than + "fts_min_token_size" or more than "fts_max_token_size" */ + + if (fts_check_token(&str, NULL, result_doc->charset)) { + + mem_heap_t* heap; + fts_string_t t_str; + fts_token_t* token; + ib_rbt_bound_t parent; + ulint newlen; + + heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg); + + t_str.f_n_char = str.f_n_char; + + t_str.f_len = str.f_len * result_doc->charset->casedn_multiply() + 1; + + t_str.f_str = static_cast<byte*>( + mem_heap_alloc(heap, t_str.f_len)); + + /* For binary collations, a case sensitive search is + performed. Hence don't convert to lower case. */ + if (my_binary_compare(result_doc->charset)) { + memcpy(t_str.f_str, str.f_str, str.f_len); + t_str.f_str[str.f_len]= 0; + newlen= str.f_len; + } else { + newlen = innobase_fts_casedn_str( + result_doc->charset, (char*) str.f_str, str.f_len, + (char*) t_str.f_str, t_str.f_len); + } + + t_str.f_len = newlen; + t_str.f_str[newlen] = 0; + + /* Add the word to the document statistics. If the word + hasn't been seen before we create a new entry for it. */ + if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) { + fts_token_t new_token; + + new_token.text.f_len = newlen; + new_token.text.f_str = t_str.f_str; + new_token.text.f_n_char = t_str.f_n_char; + + new_token.positions = ib_vector_create( + result_doc->self_heap, sizeof(ulint), 32); + + parent.last = rbt_add_node( + result_doc->tokens, &parent, &new_token); + + ut_ad(rbt_validate(result_doc->tokens)); + } + + token = rbt_value(fts_token_t, parent.last); + ib_vector_push(token->positions, &position); + } +} + +/******************************************************************** +Process next token from document starting at the given position, i.e., add +the token's start position to the token's list of positions. +@return number of characters handled in this call */ +static +ulint +fts_process_token( +/*==============*/ + fts_doc_t* doc, /* in/out: document to + tokenize */ + fts_doc_t* result, /* out: if provided, save + result here */ + ulint start_pos, /*!< in: start position in text */ + ulint add_pos) /*!< in: add this position to all + tokens from this tokenization */ +{ + ulint ret; + fts_string_t str; + ulint position; + fts_doc_t* result_doc; + byte buf[FTS_MAX_WORD_LEN + 1]; + + str.f_str = buf; + + /* Determine where to save the result. */ + result_doc = (result != NULL) ? result : doc; + + /* The length of a string in characters is set here only. */ + + ret = innobase_mysql_fts_get_token( + doc->charset, doc->text.f_str + start_pos, + doc->text.f_str + doc->text.f_len, &str); + + position = start_pos + ret - str.f_len + add_pos; + + fts_add_token(result_doc, str, position); + + return(ret); +} + +/*************************************************************//** +Get token char size by charset +@return token size */ +ulint +fts_get_token_size( +/*===============*/ + const CHARSET_INFO* cs, /*!< in: Character set */ + const char* token, /*!< in: token */ + ulint len) /*!< in: token length */ +{ + char* start; + char* end; + ulint size = 0; + + /* const_cast is for reinterpret_cast below, or it will fail. */ + start = const_cast<char*>(token); + end = start + len; + while (start < end) { + int ctype; + int mbl; + + mbl = cs->ctype( + &ctype, + reinterpret_cast<uchar*>(start), + reinterpret_cast<uchar*>(end)); + + size++; + + start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1); + } + + return(size); +} + +/*************************************************************//** +FTS plugin parser 'myql_parser' callback function for document tokenize. +Refer to 'st_mysql_ftparser_param' for more detail. +@return always returns 0 */ +int +fts_tokenize_document_internal( +/*===========================*/ + MYSQL_FTPARSER_PARAM* param, /*!< in: parser parameter */ + const char* doc,/*!< in/out: document */ + int len) /*!< in: document length */ +{ + fts_string_t str; + byte buf[FTS_MAX_WORD_LEN + 1]; + /* JAN: TODO: MySQL 5.7 + MYSQL_FTPARSER_BOOLEAN_INFO bool_info = + { FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 }; + */ + MYSQL_FTPARSER_BOOLEAN_INFO bool_info = + { FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0}; + + ut_ad(len >= 0); + + str.f_str = buf; + + for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) { + inc = innobase_mysql_fts_get_token( + const_cast<CHARSET_INFO*>(param->cs), + (uchar*)(doc) + i, + (uchar*)(doc) + len, + &str); + + if (str.f_len > 0) { + /* JAN: TODO: MySQL 5.7 + bool_info.position = + static_cast<int>(i + inc - str.f_len); + ut_ad(bool_info.position >= 0); + */ + + /* Stop when add word fails */ + if (param->mysql_add_word( + param, + reinterpret_cast<char*>(str.f_str), + static_cast<int>(str.f_len), + &bool_info)) { + break; + } + } + } + + return(0); +} + +/******************************************************************//** +FTS plugin parser 'myql_add_word' callback function for document tokenize. +Refer to 'st_mysql_ftparser_param' for more detail. +@return always returns 0 */ +static +int +fts_tokenize_add_word_for_parser( +/*=============================*/ + MYSQL_FTPARSER_PARAM* param, /* in: parser paramter */ + const char* word, /* in: token word */ + int word_len, /* in: word len */ + MYSQL_FTPARSER_BOOLEAN_INFO*) +{ + fts_string_t str; + fts_tokenize_param_t* fts_param; + fts_doc_t* result_doc; + ulint position; + + fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam); + result_doc = fts_param->result_doc; + ut_ad(result_doc != NULL); + + str.f_str = (byte*)(word); + str.f_len = ulint(word_len); + str.f_n_char = fts_get_token_size( + const_cast<CHARSET_INFO*>(param->cs), word, str.f_len); + + /* JAN: TODO: MySQL 5.7 FTS + ut_ad(boolean_info->position >= 0); + position = boolean_info->position + fts_param->add_pos; + */ + position = fts_param->add_pos++; + + fts_add_token(result_doc, str, position); + + return(0); +} + +/******************************************************************//** +Parse a document using an external / user supplied parser */ +static +void +fts_tokenize_by_parser( +/*===================*/ + fts_doc_t* doc, /* in/out: document to tokenize */ + st_mysql_ftparser* parser, /* in: plugin fts parser */ + fts_tokenize_param_t* fts_param) /* in: fts tokenize param */ +{ + MYSQL_FTPARSER_PARAM param; + + ut_a(parser); + + /* Set paramters for param */ + param.mysql_parse = fts_tokenize_document_internal; + param.mysql_add_word = fts_tokenize_add_word_for_parser; + param.mysql_ftparam = fts_param; + param.cs = doc->charset; + param.doc = reinterpret_cast<char*>(doc->text.f_str); + param.length = static_cast<int>(doc->text.f_len); + param.mode= MYSQL_FTPARSER_SIMPLE_MODE; + + PARSER_INIT(parser, ¶m); + parser->parse(¶m); + PARSER_DEINIT(parser, ¶m); +} + +/** Tokenize a document. +@param[in,out] doc document to tokenize +@param[out] result tokenization result +@param[in] parser pluggable parser */ +static +void +fts_tokenize_document( + fts_doc_t* doc, + fts_doc_t* result, + st_mysql_ftparser* parser) +{ + ut_a(!doc->tokens); + ut_a(doc->charset); + + doc->tokens = rbt_create_arg_cmp(sizeof(fts_token_t), + innobase_fts_text_cmp, + (void*) doc->charset); + + if (parser != NULL) { + fts_tokenize_param_t fts_param; + fts_param.result_doc = (result != NULL) ? result : doc; + fts_param.add_pos = 0; + + fts_tokenize_by_parser(doc, parser, &fts_param); + } else { + ulint inc; + + for (ulint i = 0; i < doc->text.f_len; i += inc) { + inc = fts_process_token(doc, result, i, 0); + ut_a(inc > 0); + } + } +} + +/** Continue to tokenize a document. +@param[in,out] doc document to tokenize +@param[in] add_pos add this position to all tokens from this tokenization +@param[out] result tokenization result +@param[in] parser pluggable parser */ +static +void +fts_tokenize_document_next( + fts_doc_t* doc, + ulint add_pos, + fts_doc_t* result, + st_mysql_ftparser* parser) +{ + ut_a(doc->tokens); + + if (parser) { + fts_tokenize_param_t fts_param; + + fts_param.result_doc = (result != NULL) ? result : doc; + fts_param.add_pos = add_pos; + + fts_tokenize_by_parser(doc, parser, &fts_param); + } else { + ulint inc; + + for (ulint i = 0; i < doc->text.f_len; i += inc) { + inc = fts_process_token(doc, result, i, add_pos); + ut_a(inc > 0); + } + } +} + +/** Create the vector of fts_get_doc_t instances. +@param[in,out] cache fts cache +@return vector of fts_get_doc_t instances */ +static +ib_vector_t* +fts_get_docs_create( + fts_cache_t* cache) +{ + ib_vector_t* get_docs; + + mysql_mutex_assert_owner(&cache->init_lock); + + /* We need one instance of fts_get_doc_t per index. */ + get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4); + + /* Create the get_doc instance, we need one of these + per FTS index. */ + for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) { + + dict_index_t** index; + fts_get_doc_t* get_doc; + + index = static_cast<dict_index_t**>( + ib_vector_get(cache->indexes, i)); + + get_doc = static_cast<fts_get_doc_t*>( + ib_vector_push(get_docs, NULL)); + + memset(get_doc, 0x0, sizeof(*get_doc)); + + get_doc->index_cache = fts_get_index_cache(cache, *index); + get_doc->cache = cache; + + /* Must find the index cache. */ + ut_a(get_doc->index_cache != NULL); + } + + return(get_docs); +} + +/******************************************************************** +Release any resources held by the fts_get_doc_t instances. */ +static +void +fts_get_docs_clear( +/*===============*/ + ib_vector_t* get_docs) /*!< in: Doc retrieval vector */ +{ + ulint i; + + /* Release the get doc graphs if any. */ + for (i = 0; i < ib_vector_size(get_docs); ++i) { + + fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>( + ib_vector_get(get_docs, i)); + + if (get_doc->get_document_graph != NULL) { + + ut_a(get_doc->index_cache); + + que_graph_free(get_doc->get_document_graph); + get_doc->get_document_graph = NULL; + } + } +} + +/*********************************************************************//** +Get the initial Doc ID by consulting the CONFIG table +@return initial Doc ID */ +doc_id_t +fts_init_doc_id( +/*============*/ + const dict_table_t* table) /*!< in: table */ +{ + doc_id_t max_doc_id = 0; + + mysql_mutex_lock(&table->fts->cache->lock); + + /* Return if the table is already initialized for DOC ID */ + if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) { + mysql_mutex_unlock(&table->fts->cache->lock); + return(0); + } + + DEBUG_SYNC_C("fts_initialize_doc_id"); + + /* Then compare this value with the ID value stored in the CONFIG + table. The larger one will be our new initial Doc ID */ + fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id); + + /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of + creating index (and add doc id column. No need to recovery + documents */ + if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) { + fts_init_index((dict_table_t*) table, TRUE); + } + + table->fts->added_synced = true; + + table->fts->cache->first_doc_id = max_doc_id; + + mysql_mutex_unlock(&table->fts->cache->lock); + + ut_ad(max_doc_id > 0); + + return(max_doc_id); +} + +#ifdef FTS_MULT_INDEX +/*********************************************************************//** +Check if the index is in the affected set. +@return TRUE if index is updated */ +static +ibool +fts_is_index_updated( +/*=================*/ + const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */ + const fts_get_doc_t* get_doc) /*!< in: info for reading + document */ +{ + ulint i; + dict_index_t* index = get_doc->index_cache->index; + + for (i = 0; i < ib_vector_size(fts_indexes); ++i) { + const dict_index_t* updated_fts_index; + + updated_fts_index = static_cast<const dict_index_t*>( + ib_vector_getp_const(fts_indexes, i)); + + ut_a(updated_fts_index != NULL); + + if (updated_fts_index == index) { + return(TRUE); + } + } + + return(FALSE); +} +#endif + +/*********************************************************************//** +Fetch COUNT(*) from specified table. +@return the number of rows in the table */ +ulint +fts_get_rows_count( +/*===============*/ + fts_table_t* fts_table) /*!< in: fts table to read */ +{ + trx_t* trx; + pars_info_t* info; + que_t* graph; + dberr_t error; + ulint count = 0; + char table_name[MAX_FULL_NAME_LEN]; + + trx = trx_create(); + trx->op_info = "fetching FT table rows count"; + + info = pars_info_create(); + + pars_info_bind_function(info, "my_func", fts_read_ulint, &count); + + fts_get_table_name(fts_table, table_name); + pars_info_bind_id(info, "table_name", table_name); + + graph = fts_parse_sql( + fts_table, + info, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT COUNT(*)" + " FROM $table_name;\n" + "BEGIN\n" + "\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;"); + + for (;;) { + error = fts_eval_sql(trx, graph); + + if (UNIV_LIKELY(error == DB_SUCCESS)) { + fts_sql_commit(trx); + + break; /* Exit the loop. */ + } else { + fts_sql_rollback(trx); + + if (error == DB_LOCK_WAIT_TIMEOUT) { + ib::warn() << "lock wait timeout reading" + " FTS table. Retrying!"; + + trx->error_state = DB_SUCCESS; + } else { + ib::error() << "(" << error + << ") while reading FTS table " + << table_name; + + break; /* Exit the loop. */ + } + } + } + + que_graph_free(graph); + + trx->free(); + + return(count); +} + +#ifdef FTS_CACHE_SIZE_DEBUG +/*********************************************************************//** +Read the max cache size parameter from the config table. */ +static +void +fts_update_max_cache_size( +/*======================*/ + fts_sync_t* sync) /*!< in: sync state */ +{ + trx_t* trx; + fts_table_t fts_table; + + trx = trx_create(); + + FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table); + + /* The size returned is in bytes. */ + sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table); + + fts_sql_commit(trx); + + trx->free(); +} +#endif /* FTS_CACHE_SIZE_DEBUG */ + +/*********************************************************************//** +Free the modified rows of a table. */ +UNIV_INLINE +void +fts_trx_table_rows_free( +/*====================*/ + ib_rbt_t* rows) /*!< in: rbt of rows to free */ +{ + const ib_rbt_node_t* node; + + for (node = rbt_first(rows); node; node = rbt_first(rows)) { + fts_trx_row_t* row; + + row = rbt_value(fts_trx_row_t, node); + + if (row->fts_indexes != NULL) { + /* This vector shouldn't be using the + heap allocator. */ + ut_a(row->fts_indexes->allocator->arg == NULL); + + ib_vector_free(row->fts_indexes); + row->fts_indexes = NULL; + } + + ut_free(rbt_remove_node(rows, node)); + } + + ut_a(rbt_empty(rows)); + rbt_free(rows); +} + +/*********************************************************************//** +Free an FTS savepoint instance. */ +UNIV_INLINE +void +fts_savepoint_free( +/*===============*/ + fts_savepoint_t* savepoint) /*!< in: savepoint instance */ +{ + const ib_rbt_node_t* node; + ib_rbt_t* tables = savepoint->tables; + + /* Nothing to free! */ + if (tables == NULL) { + return; + } + + for (node = rbt_first(tables); node; node = rbt_first(tables)) { + fts_trx_table_t* ftt; + fts_trx_table_t** fttp; + + fttp = rbt_value(fts_trx_table_t*, node); + ftt = *fttp; + + /* This can be NULL if a savepoint was released. */ + if (ftt->rows != NULL) { + fts_trx_table_rows_free(ftt->rows); + ftt->rows = NULL; + } + + /* This can be NULL if a savepoint was released. */ + if (ftt->added_doc_ids != NULL) { + fts_doc_ids_free(ftt->added_doc_ids); + ftt->added_doc_ids = NULL; + } + + /* The default savepoint name must be NULL. */ + if (ftt->docs_added_graph) { + que_graph_free(ftt->docs_added_graph); + } + + /* NOTE: We are responsible for free'ing the node */ + ut_free(rbt_remove_node(tables, node)); + } + + ut_a(rbt_empty(tables)); + rbt_free(tables); + savepoint->tables = NULL; +} + +/*********************************************************************//** +Free an FTS trx. */ +void +fts_trx_free( +/*=========*/ + fts_trx_t* fts_trx) /* in, own: FTS trx */ +{ + ulint i; + + for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) { + fts_savepoint_t* savepoint; + + savepoint = static_cast<fts_savepoint_t*>( + ib_vector_get(fts_trx->savepoints, i)); + + /* The default savepoint name must be NULL. */ + if (i == 0) { + ut_a(savepoint->name == NULL); + } + + fts_savepoint_free(savepoint); + } + + for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) { + fts_savepoint_t* savepoint; + + savepoint = static_cast<fts_savepoint_t*>( + ib_vector_get(fts_trx->last_stmt, i)); + + /* The default savepoint name must be NULL. */ + if (i == 0) { + ut_a(savepoint->name == NULL); + } + + fts_savepoint_free(savepoint); + } + + if (fts_trx->heap) { + mem_heap_free(fts_trx->heap); + } +} + +/*********************************************************************//** +Extract the doc id from the FTS hidden column. +@return doc id that was extracted from rec */ +doc_id_t +fts_get_doc_id_from_row( +/*====================*/ + dict_table_t* table, /*!< in: table */ + dtuple_t* row) /*!< in: row whose FTS doc id we + want to extract.*/ +{ + dfield_t* field; + doc_id_t doc_id = 0; + + ut_a(table->fts->doc_col != ULINT_UNDEFINED); + + field = dtuple_get_nth_field(row, table->fts->doc_col); + + ut_a(dfield_get_len(field) == sizeof(doc_id)); + ut_a(dfield_get_type(field)->mtype == DATA_INT); + + doc_id = fts_read_doc_id( + static_cast<const byte*>(dfield_get_data(field))); + + return(doc_id); +} + +/** Extract the doc id from the record that belongs to index. +@param[in] rec record containing FTS_DOC_ID +@param[in] index index of rec +@param[in] offsets rec_get_offsets(rec,index) +@return doc id that was extracted from rec */ +doc_id_t +fts_get_doc_id_from_rec( + const rec_t* rec, + const dict_index_t* index, + const rec_offs* offsets) +{ + ulint f = dict_col_get_index_pos( + &index->table->cols[index->table->fts->doc_col], index); + ulint len; + doc_id_t doc_id = mach_read_from_8( + rec_get_nth_field(rec, offsets, f, &len)); + ut_ad(len == 8); + return doc_id; +} + +/*********************************************************************//** +Search the index specific cache for a particular FTS index. +@return the index specific cache else NULL */ +fts_index_cache_t* +fts_find_index_cache( +/*=================*/ + const fts_cache_t* cache, /*!< in: cache to search */ + const dict_index_t* index) /*!< in: index to search for */ +{ + /* We cast away the const because our internal function, takes + non-const cache arg and returns a non-const pointer. */ + return(static_cast<fts_index_cache_t*>( + fts_get_index_cache((fts_cache_t*) cache, index))); +} + +/*********************************************************************//** +Search cache for word. +@return the word node vector if found else NULL */ +const ib_vector_t* +fts_cache_find_word( +/*================*/ + const fts_index_cache_t*index_cache, /*!< in: cache to search */ + const fts_string_t* text) /*!< in: word to search for */ +{ + ib_rbt_bound_t parent; + const ib_vector_t* nodes = NULL; + + mysql_mutex_assert_owner(&index_cache->index->table->fts->cache->lock); + + /* Lookup the word in the rb tree */ + if (rbt_search(index_cache->words, &parent, text) == 0) { + const fts_tokenizer_word_t* word; + + word = rbt_value(fts_tokenizer_word_t, parent.last); + + nodes = word->nodes; + } + + return(nodes); +} + +/*********************************************************************//** +Append deleted doc ids to vector. */ +void +fts_cache_append_deleted_doc_ids( +/*=============================*/ + fts_cache_t* cache, /*!< in: cache to use */ + ib_vector_t* vector) /*!< in: append to this vector */ +{ + mysql_mutex_lock(&cache->deleted_lock); + + if (cache->deleted_doc_ids) + for (ulint i= 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) + { + doc_id_t *update= static_cast<doc_id_t*>( + ib_vector_get(cache->deleted_doc_ids, i)); + ib_vector_push(vector, &update); + } + + mysql_mutex_unlock(&cache->deleted_lock); +} + +/*********************************************************************//** +Add the FTS document id hidden column. */ +void +fts_add_doc_id_column( +/*==================*/ + dict_table_t* table, /*!< in/out: Table with FTS index */ + mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */ +{ + dict_mem_table_add_col( + table, heap, + FTS_DOC_ID_COL_NAME, + DATA_INT, + dtype_form_prtype( + DATA_NOT_NULL | DATA_UNSIGNED + | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0), + sizeof(doc_id_t)); + DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID); +} + +/** Add new fts doc id to the update vector. +@param[in] table the table that contains the FTS index. +@param[in,out] ufield the fts doc id field in the update vector. + No new memory is allocated for this in this + function. +@param[in,out] next_doc_id the fts doc id that has been added to the + update vector. If 0, a new fts doc id is + automatically generated. The memory provided + for this argument will be used by the update + vector. Ensure that the life time of this + memory matches that of the update vector. +@return the fts doc id used in the update vector */ +doc_id_t +fts_update_doc_id( + dict_table_t* table, + upd_field_t* ufield, + doc_id_t* next_doc_id) +{ + doc_id_t doc_id; + dberr_t error = DB_SUCCESS; + + if (*next_doc_id) { + doc_id = *next_doc_id; + } else { + /* Get the new document id that will be added. */ + error = fts_get_next_doc_id(table, &doc_id); + } + + if (error == DB_SUCCESS) { + dict_index_t* clust_index; + dict_col_t* col = dict_table_get_nth_col( + table, table->fts->doc_col); + + ufield->exp = NULL; + + ufield->new_val.len = sizeof(doc_id); + + clust_index = dict_table_get_first_index(table); + + ufield->field_no = static_cast<unsigned>( + dict_col_get_clust_pos(col, clust_index)) + & dict_index_t::MAX_N_FIELDS; + dict_col_copy_type(col, dfield_get_type(&ufield->new_val)); + + /* It is possible we update record that has + not yet be sync-ed from last crash. */ + + /* Convert to storage byte order. */ + ut_a(doc_id != FTS_NULL_DOC_ID); + fts_write_doc_id((byte*) next_doc_id, doc_id); + + ufield->new_val.data = next_doc_id; + ufield->new_val.ext = 0; + } + + return(doc_id); +} + +/** fts_t constructor. +@param[in] table table with FTS indexes +@param[in,out] heap memory heap where 'this' is stored */ +fts_t::fts_t( + const dict_table_t* table, + mem_heap_t* heap) + : + added_synced(0), dict_locked(0), + add_wq(NULL), + cache(NULL), + doc_col(ULINT_UNDEFINED), in_queue(false), sync_message(false), + fts_heap(heap) +{ + ut_a(table->fts == NULL); + + ib_alloc_t* heap_alloc = ib_heap_allocator_create(fts_heap); + + indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4); + + dict_table_get_all_fts_indexes(table, indexes); +} + +/** fts_t destructor. */ +fts_t::~fts_t() +{ + ut_ad(add_wq == NULL); + + if (cache) { + fts_cache_clear(cache); + fts_cache_destroy(cache); + } + + /* There is no need to call ib_vector_free() on this->indexes + because it is stored in this->fts_heap. */ + mem_heap_free(fts_heap); +} + +/*********************************************************************//** +Create an instance of fts_t. +@return instance of fts_t */ +fts_t* +fts_create( +/*=======*/ + dict_table_t* table) /*!< in/out: table with FTS indexes */ +{ + fts_t* fts; + mem_heap_t* heap; + + heap = mem_heap_create(512); + + fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts))); + + new(fts) fts_t(table, heap); + + return(fts); +} + +/*********************************************************************//** +Take a FTS savepoint. */ +UNIV_INLINE +void +fts_savepoint_copy( +/*===============*/ + const fts_savepoint_t* src, /*!< in: source savepoint */ + fts_savepoint_t* dst) /*!< out: destination savepoint */ +{ + const ib_rbt_node_t* node; + const ib_rbt_t* tables; + + tables = src->tables; + + for (node = rbt_first(tables); node; node = rbt_next(tables, node)) { + + fts_trx_table_t* ftt_dst; + const fts_trx_table_t** ftt_src; + + ftt_src = rbt_value(const fts_trx_table_t*, node); + + ftt_dst = fts_trx_table_clone(*ftt_src); + + rbt_insert(dst->tables, &ftt_dst, &ftt_dst); + } +} + +/*********************************************************************//** +Take a FTS savepoint. */ +void +fts_savepoint_take( +/*===============*/ + fts_trx_t* fts_trx, /*!< in: fts transaction */ + const char* name) /*!< in: savepoint name */ +{ + mem_heap_t* heap; + fts_savepoint_t* savepoint; + fts_savepoint_t* last_savepoint; + + ut_a(name != NULL); + + heap = fts_trx->heap; + + /* The implied savepoint must exist. */ + ut_a(ib_vector_size(fts_trx->savepoints) > 0); + + last_savepoint = static_cast<fts_savepoint_t*>( + ib_vector_last(fts_trx->savepoints)); + savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap); + + if (last_savepoint->tables != NULL) { + fts_savepoint_copy(last_savepoint, savepoint); + } +} + +/*********************************************************************//** +Lookup a savepoint instance by name. +@return ULINT_UNDEFINED if not found */ +UNIV_INLINE +ulint +fts_savepoint_lookup( +/*==================*/ + ib_vector_t* savepoints, /*!< in: savepoints */ + const char* name) /*!< in: savepoint name */ +{ + ulint i; + + ut_a(ib_vector_size(savepoints) > 0); + + for (i = 1; i < ib_vector_size(savepoints); ++i) { + fts_savepoint_t* savepoint; + + savepoint = static_cast<fts_savepoint_t*>( + ib_vector_get(savepoints, i)); + + if (strcmp(name, savepoint->name) == 0) { + return(i); + } + } + + return(ULINT_UNDEFINED); +} + +/*********************************************************************//** +Release the savepoint data identified by name. All savepoints created +after the named savepoint are kept. +@return DB_SUCCESS or error code */ +void +fts_savepoint_release( +/*==================*/ + trx_t* trx, /*!< in: transaction */ + const char* name) /*!< in: savepoint name */ +{ + ut_a(name != NULL); + + ib_vector_t* savepoints = trx->fts_trx->savepoints; + + ut_a(ib_vector_size(savepoints) > 0); + + ulint i = fts_savepoint_lookup(savepoints, name); + if (i != ULINT_UNDEFINED) { + ut_a(i >= 1); + + fts_savepoint_t* savepoint; + savepoint = static_cast<fts_savepoint_t*>( + ib_vector_get(savepoints, i)); + + if (i == ib_vector_size(savepoints) - 1) { + /* If the savepoint is the last, we save its + tables to the previous savepoint. */ + fts_savepoint_t* prev_savepoint; + prev_savepoint = static_cast<fts_savepoint_t*>( + ib_vector_get(savepoints, i - 1)); + + ib_rbt_t* tables = savepoint->tables; + savepoint->tables = prev_savepoint->tables; + prev_savepoint->tables = tables; + } + + fts_savepoint_free(savepoint); + ib_vector_remove(savepoints, *(void**)savepoint); + + /* Make sure we don't delete the implied savepoint. */ + ut_a(ib_vector_size(savepoints) > 0); + } +} + +/**********************************************************************//** +Refresh last statement savepoint. */ +void +fts_savepoint_laststmt_refresh( +/*===========================*/ + trx_t* trx) /*!< in: transaction */ +{ + + fts_trx_t* fts_trx; + fts_savepoint_t* savepoint; + + fts_trx = trx->fts_trx; + + savepoint = static_cast<fts_savepoint_t*>( + ib_vector_pop(fts_trx->last_stmt)); + fts_savepoint_free(savepoint); + + ut_ad(ib_vector_is_empty(fts_trx->last_stmt)); + savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL); +} + +/******************************************************************** +Undo the Doc ID add/delete operations in last stmt */ +static +void +fts_undo_last_stmt( +/*===============*/ + fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */ + fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */ +{ + ib_rbt_t* s_rows; + ib_rbt_t* l_rows; + const ib_rbt_node_t* node; + + l_rows = l_ftt->rows; + s_rows = s_ftt->rows; + + for (node = rbt_first(l_rows); + node; + node = rbt_next(l_rows, node)) { + fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node); + ib_rbt_bound_t parent; + + rbt_search(s_rows, &parent, &(l_row->doc_id)); + + if (parent.result == 0) { + fts_trx_row_t* s_row = rbt_value( + fts_trx_row_t, parent.last); + + switch (l_row->state) { + case FTS_INSERT: + ut_free(rbt_remove_node(s_rows, parent.last)); + break; + + case FTS_DELETE: + if (s_row->state == FTS_NOTHING) { + s_row->state = FTS_INSERT; + } else if (s_row->state == FTS_DELETE) { + ut_free(rbt_remove_node( + s_rows, parent.last)); + } + break; + + /* FIXME: Check if FTS_MODIFY need to be addressed */ + case FTS_MODIFY: + case FTS_NOTHING: + break; + default: + ut_error; + } + } + } +} + +/**********************************************************************//** +Rollback to savepoint indentified by name. +@return DB_SUCCESS or error code */ +void +fts_savepoint_rollback_last_stmt( +/*=============================*/ + trx_t* trx) /*!< in: transaction */ +{ + ib_vector_t* savepoints; + fts_savepoint_t* savepoint; + fts_savepoint_t* last_stmt; + fts_trx_t* fts_trx; + ib_rbt_bound_t parent; + const ib_rbt_node_t* node; + ib_rbt_t* l_tables; + ib_rbt_t* s_tables; + + fts_trx = trx->fts_trx; + savepoints = fts_trx->savepoints; + + savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints)); + last_stmt = static_cast<fts_savepoint_t*>( + ib_vector_last(fts_trx->last_stmt)); + + l_tables = last_stmt->tables; + s_tables = savepoint->tables; + + for (node = rbt_first(l_tables); + node; + node = rbt_next(l_tables, node)) { + + fts_trx_table_t** l_ftt; + + l_ftt = rbt_value(fts_trx_table_t*, node); + + rbt_search_cmp( + s_tables, &parent, &(*l_ftt)->table->id, + fts_trx_table_id_cmp, NULL); + + if (parent.result == 0) { + fts_trx_table_t** s_ftt; + + s_ftt = rbt_value(fts_trx_table_t*, parent.last); + + fts_undo_last_stmt(*s_ftt, *l_ftt); + } + } +} + +/**********************************************************************//** +Rollback to savepoint indentified by name. +@return DB_SUCCESS or error code */ +void +fts_savepoint_rollback( +/*===================*/ + trx_t* trx, /*!< in: transaction */ + const char* name) /*!< in: savepoint name */ +{ + ulint i; + ib_vector_t* savepoints; + + ut_a(name != NULL); + + savepoints = trx->fts_trx->savepoints; + + /* We pop all savepoints from the the top of the stack up to + and including the instance that was found. */ + i = fts_savepoint_lookup(savepoints, name); + + if (i != ULINT_UNDEFINED) { + fts_savepoint_t* savepoint; + + ut_a(i > 0); + + while (ib_vector_size(savepoints) > i) { + fts_savepoint_t* savepoint; + + savepoint = static_cast<fts_savepoint_t*>( + ib_vector_pop(savepoints)); + + if (savepoint->name != NULL) { + /* Since name was allocated on the heap, the + memory will be released when the transaction + completes. */ + savepoint->name = NULL; + + fts_savepoint_free(savepoint); + } + } + + /* Pop all a elements from the top of the stack that may + have been released. We have to be careful that we don't + delete the implied savepoint. */ + + for (savepoint = static_cast<fts_savepoint_t*>( + ib_vector_last(savepoints)); + ib_vector_size(savepoints) > 1 + && savepoint->name == NULL; + savepoint = static_cast<fts_savepoint_t*>( + ib_vector_last(savepoints))) { + + ib_vector_pop(savepoints); + } + + /* Make sure we don't delete the implied savepoint. */ + ut_a(ib_vector_size(savepoints) > 0); + + /* Restore the savepoint. */ + fts_savepoint_take(trx->fts_trx, name); + } +} + +bool fts_check_aux_table(const char *name, + table_id_t *table_id, + index_id_t *index_id) +{ + ulint len= strlen(name); + const char* ptr; + const char* end= name + len; + + ut_ad(len <= MAX_FULL_NAME_LEN); + ptr= static_cast<const char*>(memchr(name, '/', len)); + IF_WIN(if (!ptr) ptr= static_cast<const char*>(memchr(name, '\\', len)), ); + + if (!ptr) + return false; + + /* We will start the match after the '/' */ + ++ptr; + len= end - ptr; + + /* All auxiliary tables are prefixed with "FTS_" and the name + length will be at the very least greater than 20 bytes. */ + if (len > 24 && !memcmp(ptr, "FTS_", 4)) + { + /* Skip the prefix. */ + ptr+= 4; + len-= 4; + + const char *table_id_ptr= ptr; + /* Skip the table id. */ + ptr= static_cast<const char*>(memchr(ptr, '_', len)); + + if (!ptr) + return false; + + /* Skip the underscore. */ + ++ptr; + ut_ad(end > ptr); + len= end - ptr; + + sscanf(table_id_ptr, UINT64PFx, table_id); + /* First search the common table suffix array. */ + for (ulint i = 0; fts_common_tables[i]; ++i) + { + if (!strncmp(ptr, fts_common_tables[i], len)) + return true; + } + + /* Could be obsolete common tables. */ + if ((len == 5 && !memcmp(ptr, "ADDED", len)) || + (len == 9 && !memcmp(ptr, "STOPWORDS", len))) + return true; + + const char* index_id_ptr= ptr; + /* Skip the index id. */ + ptr= static_cast<const char*>(memchr(ptr, '_', len)); + if (!ptr) + return false; + + sscanf(index_id_ptr, UINT64PFx, index_id); + + /* Skip the underscore. */ + ++ptr; + ut_a(end > ptr); + len= end - ptr; + + if (len <= 4) + return false; + + len-= 4; /* .ibd suffix */ + + if (len > 7) + return false; + + /* Search the FT index specific array. */ + for (ulint i = 0; i < FTS_NUM_AUX_INDEX; ++i) + { + if (!memcmp(ptr, "INDEX_", len - 1)) + return true; + } + + /* Other FT index specific table(s). */ + if (len == 6 && !memcmp(ptr, "DOC_ID", len)) + return true; + } + + return false; +} + +/**********************************************************************//** +Check whether user supplied stopword table is of the right format. +Caller is responsible to hold dictionary locks. +@param stopword_table_name table name +@param row_end name of the system-versioning end column, or "value" +@return the stopword column charset +@retval NULL if the table does not exist or qualify */ +CHARSET_INFO* +fts_valid_stopword_table( +/*=====================*/ + const char* stopword_table_name, /*!< in: Stopword table + name */ + const char** row_end) /* row_end value of system-versioned table */ +{ + dict_table_t* table; + dict_col_t* col = NULL; + + if (!stopword_table_name) { + return(NULL); + } + + table = dict_sys.load_table( + {stopword_table_name, strlen(stopword_table_name)}); + + if (!table) { + ib::error() << "User stopword table " << stopword_table_name + << " does not exist."; + + return(NULL); + } else { + if (strcmp(dict_table_get_col_name(table, 0), "value")) { + ib::error() << "Invalid column name for stopword" + " table " << stopword_table_name << ". Its" + " first column must be named as 'value'."; + + return(NULL); + } + + col = dict_table_get_nth_col(table, 0); + + if (col->mtype != DATA_VARCHAR + && col->mtype != DATA_VARMYSQL) { + ib::error() << "Invalid column type for stopword" + " table " << stopword_table_name << ". Its" + " first column must be of varchar type"; + + return(NULL); + } + } + + ut_ad(col); + ut_ad(!table->versioned() || col->ind != table->vers_end); + + if (row_end) { + *row_end = table->versioned() + ? dict_table_get_col_name(table, table->vers_end) + : "value"; /* for fts_load_user_stopword() */ + } + + return(fts_get_charset(col->prtype)); +} + +/**********************************************************************//** +This function loads the stopword into the FTS cache. It also +records/fetches stopword configuration to/from FTS configure +table, depending on whether we are creating or reloading the +FTS. +@return true if load operation is successful */ +bool +fts_load_stopword( +/*==============*/ + const dict_table_t* + table, /*!< in: Table with FTS */ + trx_t* trx, /*!< in: Transactions */ + const char* session_stopword_table, /*!< in: Session stopword table + name */ + bool stopword_is_on, /*!< in: Whether stopword + option is turned on/off */ + bool reload) /*!< in: Whether it is + for reloading FTS table */ +{ + fts_table_t fts_table; + fts_string_t str; + dberr_t error = DB_SUCCESS; + ulint use_stopword; + fts_cache_t* cache; + const char* stopword_to_use = NULL; + ibool new_trx = FALSE; + byte str_buffer[MAX_FULL_NAME_LEN + 1]; + + FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table); + + cache = table->fts->cache; + + if (!reload && !(cache->stopword_info.status & STOPWORD_NOT_INIT)) { + return true; + } + + if (!trx) { + trx = trx_create(); +#ifdef UNIV_DEBUG + trx->start_line = __LINE__; + trx->start_file = __FILE__; +#endif + trx_start_internal_low(trx, !high_level_read_only); + trx->op_info = "upload FTS stopword"; + new_trx = TRUE; + } + + /* First check whether stopword filtering is turned off */ + if (reload) { + error = fts_config_get_ulint( + trx, &fts_table, FTS_USE_STOPWORD, &use_stopword); + } else { + use_stopword = (ulint) stopword_is_on; + + error = fts_config_set_ulint( + trx, &fts_table, FTS_USE_STOPWORD, use_stopword); + } + + if (error != DB_SUCCESS) { + goto cleanup; + } + + /* If stopword is turned off, no need to continue to load the + stopword into cache, but still need to do initialization */ + if (!use_stopword) { + cache->stopword_info.status = STOPWORD_OFF; + goto cleanup; + } + + if (reload) { + /* Fetch the stopword table name from FTS config + table */ + str.f_n_char = 0; + str.f_str = str_buffer; + str.f_len = sizeof(str_buffer) - 1; + + error = fts_config_get_value( + trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str); + + if (error != DB_SUCCESS) { + goto cleanup; + } + + if (*str.f_str) { + stopword_to_use = (const char*) str.f_str; + } + } else { + stopword_to_use = session_stopword_table; + } + + if (stopword_to_use + && fts_load_user_stopword(table->fts, stopword_to_use, + &cache->stopword_info)) { + /* Save the stopword table name to the configure + table */ + if (!reload) { + str.f_n_char = 0; + str.f_str = (byte*) stopword_to_use; + str.f_len = strlen(stopword_to_use); + + error = fts_config_set_value( + trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str); + } + } else { + /* Load system default stopword list */ + fts_load_default_stopword(&cache->stopword_info); + } + +cleanup: + if (new_trx) { + if (error == DB_SUCCESS) { + fts_sql_commit(trx); + } else { + fts_sql_rollback(trx); + } + + trx->free(); + } + + if (!cache->stopword_info.cached_stopword) { + cache->stopword_info.cached_stopword = rbt_create_arg_cmp( + sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp, + &my_charset_latin1); + } + + return error == DB_SUCCESS; +} + +/**********************************************************************//** +Callback function when we initialize the FTS at the start up +time. It recovers the maximum Doc IDs presented in the current table. +Tested by innodb_fts.crash_recovery +@return: always returns TRUE */ +static +ibool +fts_init_get_doc_id( +/*================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: table with fts */ +{ + doc_id_t doc_id = FTS_NULL_DOC_ID; + sel_node_t* node = static_cast<sel_node_t*>(row); + que_node_t* exp = node->select_list; + dict_table_t* table = static_cast<dict_table_t *>(user_arg); + fts_cache_t* cache = table->fts->cache; + + ut_ad(ib_vector_is_empty(cache->get_docs)); + + /* Copy each indexed column content into doc->text.f_str */ + if (exp) { + dfield_t* dfield = que_node_get_val(exp); + dtype_t* type = dfield_get_type(dfield); + void* data = dfield_get_data(dfield); + + ut_a(dtype_get_mtype(type) == DATA_INT); + + doc_id = static_cast<doc_id_t>(mach_read_from_8( + static_cast<const byte*>(data))); + + exp = que_node_get_next(que_node_get_next(exp)); + if (exp) { + ut_ad(table->versioned()); + dfield = que_node_get_val(exp); + type = dfield_get_type(dfield); + ut_ad(type->vers_sys_end()); + data = dfield_get_data(dfield); + ulint len = dfield_get_len(dfield); + if (table->versioned_by_id()) { + ut_ad(len == sizeof trx_id_max_bytes); + if (0 != memcmp(data, trx_id_max_bytes, len)) { + return true; + } + } else { + ut_ad(len == sizeof timestamp_max_bytes); + if (0 != memcmp(data, timestamp_max_bytes, len)) { + return true; + } + } + ut_ad(!(exp = que_node_get_next(exp))); + } + ut_ad(!exp); + + if (doc_id >= cache->next_doc_id) { + cache->next_doc_id = doc_id + 1; + } + } + + return(TRUE); +} + +/**********************************************************************//** +Callback function when we initialize the FTS at the start up +time. It recovers Doc IDs that have not sync-ed to the auxiliary +table, and require to bring them back into FTS index. +@return: always returns TRUE */ +static +ibool +fts_init_recover_doc( +/*=================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: fts cache */ +{ + + fts_doc_t doc; + ulint doc_len = 0; + ulint field_no = 0; + fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg); + doc_id_t doc_id = FTS_NULL_DOC_ID; + sel_node_t* node = static_cast<sel_node_t*>(row); + que_node_t* exp = node->select_list; + fts_cache_t* cache = get_doc->cache; + st_mysql_ftparser* parser = get_doc->index_cache->index->parser; + + fts_doc_init(&doc); + doc.found = TRUE; + + ut_ad(cache); + + /* Copy each indexed column content into doc->text.f_str */ + while (exp) { + dfield_t* dfield = que_node_get_val(exp); + ulint len = dfield_get_len(dfield); + + if (field_no == 0) { + dtype_t* type = dfield_get_type(dfield); + void* data = dfield_get_data(dfield); + + ut_a(dtype_get_mtype(type) == DATA_INT); + + doc_id = static_cast<doc_id_t>(mach_read_from_8( + static_cast<const byte*>(data))); + + field_no++; + exp = que_node_get_next(exp); + continue; + } + + if (len == UNIV_SQL_NULL) { + exp = que_node_get_next(exp); + continue; + } + + ut_ad(get_doc); + + if (!get_doc->index_cache->charset) { + get_doc->index_cache->charset = fts_get_charset( + dfield->type.prtype); + } + + doc.charset = get_doc->index_cache->charset; + + if (dfield_is_ext(dfield)) { + dict_table_t* table = cache->sync->table; + + doc.text.f_str = btr_copy_externally_stored_field( + &doc.text.f_len, + static_cast<byte*>(dfield_get_data(dfield)), + table->space->zip_size(), len, + static_cast<mem_heap_t*>(doc.self_heap->arg)); + } else { + doc.text.f_str = static_cast<byte*>( + dfield_get_data(dfield)); + + doc.text.f_len = len; + } + + if (field_no == 1) { + fts_tokenize_document(&doc, NULL, parser); + } else { + fts_tokenize_document_next(&doc, doc_len, NULL, parser); + } + + exp = que_node_get_next(exp); + + doc_len += (exp) ? len + 1 : len; + + field_no++; + } + + fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens); + + fts_doc_free(&doc); + + cache->added++; + + if (doc_id >= cache->next_doc_id) { + cache->next_doc_id = doc_id + 1; + } + + return(TRUE); +} + +/**********************************************************************//** +This function brings FTS index in sync when FTS index is first +used. There are documents that have not yet sync-ed to auxiliary +tables from last server abnormally shutdown, we will need to bring +such document into FTS cache before any further operations */ +void +fts_init_index( +/*===========*/ + dict_table_t* table, /*!< in: Table with FTS */ + bool has_cache_lock) /*!< in: Whether we already have + cache lock */ +{ + dict_index_t* index; + doc_id_t start_doc; + fts_get_doc_t* get_doc = NULL; + fts_cache_t* cache = table->fts->cache; + bool need_init = false; + + /* First check cache->get_docs is initialized */ + if (!has_cache_lock) { + mysql_mutex_lock(&cache->lock); + } + + mysql_mutex_lock(&cache->init_lock); + if (cache->get_docs == NULL) { + cache->get_docs = fts_get_docs_create(cache); + } + mysql_mutex_unlock(&cache->init_lock); + + if (table->fts->added_synced) { + goto func_exit; + } + + need_init = true; + + start_doc = cache->synced_doc_id; + + if (!start_doc) { + fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc); + cache->synced_doc_id = start_doc; + } + + /* No FTS index, this is the case when previous FTS index + dropped, and we re-initialize the Doc ID system for subsequent + insertion */ + if (ib_vector_is_empty(cache->get_docs)) { + index = table->fts_doc_id_index; + + ut_a(index); + + fts_doc_fetch_by_doc_id(NULL, start_doc, index, + FTS_FETCH_DOC_BY_ID_LARGE, + fts_init_get_doc_id, table); + } else { + if (table->fts->cache->stopword_info.status + & STOPWORD_NOT_INIT) { + fts_load_stopword(table, NULL, NULL, true, true); + } + + for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) { + get_doc = static_cast<fts_get_doc_t*>( + ib_vector_get(cache->get_docs, i)); + + index = get_doc->index_cache->index; + + fts_doc_fetch_by_doc_id(NULL, start_doc, index, + FTS_FETCH_DOC_BY_ID_LARGE, + fts_init_recover_doc, get_doc); + } + } + + table->fts->added_synced = true; + + fts_get_docs_clear(cache->get_docs); + +func_exit: + if (!has_cache_lock) { + mysql_mutex_unlock(&cache->lock); + } + + if (need_init) { + dict_sys.lock(SRW_LOCK_CALL); + /* Register the table with the optimize thread. */ + fts_optimize_add_table(table); + dict_sys.unlock(); + } +} diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc new file mode 100644 index 00000000..fe31767d --- /dev/null +++ b/storage/innobase/fts/fts0opt.cc @@ -0,0 +1,3054 @@ +/***************************************************************************** + +Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2016, 2022, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file fts/fts0opt.cc +Full Text Search optimize thread + +Created 2007/03/27 Sunny Bains +Completed 2011/7/10 Sunny and Jimmy Yang + +***********************************************************************/ + +#include "fts0fts.h" +#include "row0sel.h" +#include "que0types.h" +#include "fts0priv.h" +#include "fts0types.h" +#include "ut0wqueue.h" +#include "srv0start.h" +#include "ut0list.h" +#include "zlib.h" +#include "fts0opt.h" +#include "fts0vlc.h" +#include "wsrep.h" + +#ifdef WITH_WSREP +extern Atomic_relaxed<bool> wsrep_sst_disable_writes; +#else +constexpr bool wsrep_sst_disable_writes= false; +#endif + +/** The FTS optimize thread's work queue. */ +ib_wqueue_t* fts_optimize_wq; +static void fts_optimize_callback(void *); +static void timer_callback(void*); +static tpool::timer* timer; + +static tpool::task_group task_group(1); +static tpool::task task(fts_optimize_callback,0, &task_group); + +/** FTS optimize thread, for MDL acquisition */ +static THD *fts_opt_thd; + +/** The FTS vector to store fts_slot_t */ +static ib_vector_t* fts_slots; + +/** Default optimize interval in secs. */ +static const ulint FTS_OPTIMIZE_INTERVAL_IN_SECS = 300; + +/** Server is shutting down, so does we exiting the optimize thread */ +static bool fts_opt_start_shutdown = false; + +/** Condition variable for shutting down the optimize thread. +Protected by fts_optimize_wq->mutex. */ +static pthread_cond_t fts_opt_shutdown_cond; + +/** Initial size of nodes in fts_word_t. */ +static const ulint FTS_WORD_NODES_INIT_SIZE = 64; + +/** Last time we did check whether system need a sync */ +static time_t last_check_sync_time; + +/** FTS optimize thread message types. */ +enum fts_msg_type_t { + FTS_MSG_STOP, /*!< Stop optimizing and exit thread */ + + FTS_MSG_ADD_TABLE, /*!< Add table to the optimize thread's + work queue */ + + FTS_MSG_DEL_TABLE, /*!< Remove a table from the optimize + threads work queue */ + FTS_MSG_SYNC_TABLE /*!< Sync fts cache of a table */ +}; + +/** Compressed list of words that have been read from FTS INDEX +that needs to be optimized. */ +struct fts_zip_t { + lint status; /*!< Status of (un)/zip operation */ + + ulint n_words; /*!< Number of words compressed */ + + ulint block_sz; /*!< Size of a block in bytes */ + + ib_vector_t* blocks; /*!< Vector of compressed blocks */ + + ib_alloc_t* heap_alloc; /*!< Heap to use for allocations */ + + ulint pos; /*!< Offset into blocks */ + + ulint last_big_block; /*!< Offset of last block in the + blocks array that is of size + block_sz. Blocks beyond this offset + are of size FTS_MAX_WORD_LEN */ + + z_streamp zp; /*!< ZLib state */ + + /*!< The value of the last word read + from the FTS INDEX table. This is + used to discard duplicates */ + + fts_string_t word; /*!< UTF-8 string */ + + ulint max_words; /*!< maximum number of words to read + in one pase */ +}; + +/** Prepared statemets used during optimize */ +struct fts_optimize_graph_t { + /*!< Delete a word from FTS INDEX */ + que_t* delete_nodes_graph; + /*!< Insert a word into FTS INDEX */ + que_t* write_nodes_graph; + /*!< COMMIT a transaction */ + que_t* commit_graph; + /*!< Read the nodes from FTS_INDEX */ + que_t* read_nodes_graph; +}; + +/** Used by fts_optimize() to store state. */ +struct fts_optimize_t { + trx_t* trx; /*!< The transaction used for all SQL */ + + ib_alloc_t* self_heap; /*!< Heap to use for allocations */ + + char* name_prefix; /*!< FTS table name prefix */ + + fts_table_t fts_index_table;/*!< Common table definition */ + + /*!< Common table definition */ + fts_table_t fts_common_table; + + dict_table_t* table; /*!< Table that has to be queried */ + + dict_index_t* index; /*!< The FTS index to be optimized */ + + fts_doc_ids_t* to_delete; /*!< doc ids to delete, we check against + this vector and purge the matching + entries during the optimizing + process. The vector entries are + sorted on doc id */ + + ulint del_pos; /*!< Offset within to_delete vector, + this is used to keep track of where + we are up to in the vector */ + + ibool done; /*!< TRUE when optimize finishes */ + + ib_vector_t* words; /*!< Word + Nodes read from FTS_INDEX, + it contains instances of fts_word_t */ + + fts_zip_t* zip; /*!< Words read from the FTS_INDEX */ + + fts_optimize_graph_t /*!< Prepared statements used during */ + graph; /*optimize */ + + ulint n_completed; /*!< Number of FTS indexes that have + been optimized */ + ibool del_list_regenerated; + /*!< BEING_DELETED list regenarated */ +}; + +/** Used by the optimize, to keep state during compacting nodes. */ +struct fts_encode_t { + doc_id_t src_last_doc_id;/*!< Last doc id read from src node */ + byte* src_ilist_ptr; /*!< Current ptr within src ilist */ +}; + +/** We use this information to determine when to start the optimize +cycle for a table. */ +struct fts_slot_t { + /** table, or NULL if the slot is unused */ + dict_table_t* table; + + /** whether this slot is being processed */ + bool running; + + ulint added; /*!< Number of doc ids added since the + last time this table was optimized */ + + ulint deleted; /*!< Number of doc ids deleted since the + last time this table was optimized */ + + /** time(NULL) of completing fts_optimize_table_bk() */ + time_t last_run; + + /** time(NULL) of latest successful fts_optimize_table() */ + time_t completed; +}; + +/** A table remove message for the FTS optimize thread. */ +struct fts_msg_del_t +{ + /** the table to remove */ + dict_table_t *table; + /** condition variable to signal message consumption */ + pthread_cond_t *cond; +}; + +/** The FTS optimize message work queue message type. */ +struct fts_msg_t { + fts_msg_type_t type; /*!< Message type */ + + void* ptr; /*!< The message contents */ + + mem_heap_t* heap; /*!< The heap used to allocate this + message, the message consumer will + free the heap. */ +}; + +/** The number of words to read and optimize in a single pass. */ +ulong fts_num_word_optimize; + +/** Whether to enable additional FTS diagnostic printout. */ +char fts_enable_diag_print; + +/** ZLib compressed block size.*/ +static ulint FTS_ZIP_BLOCK_SIZE = 1024; + +/** The amount of time optimizing in a single pass, in seconds. */ +static ulint fts_optimize_time_limit; + +/** It's defined in fts0fts.cc */ +extern const char* fts_common_tables[]; + +/** SQL Statement for changing state of rows to be deleted from FTS Index. */ +static const char* fts_init_delete_sql = + "BEGIN\n" + "\n" + "INSERT INTO $BEING_DELETED\n" + "SELECT doc_id FROM $DELETED;\n" + "\n" + "INSERT INTO $BEING_DELETED_CACHE\n" + "SELECT doc_id FROM $DELETED_CACHE;\n"; + +static const char* fts_delete_doc_ids_sql = + "BEGIN\n" + "\n" + "DELETE FROM $DELETED WHERE doc_id = :doc_id1;\n" + "DELETE FROM $DELETED_CACHE WHERE doc_id = :doc_id2;\n"; + +static const char* fts_end_delete_sql = + "BEGIN\n" + "\n" + "DELETE FROM $BEING_DELETED;\n" + "DELETE FROM $BEING_DELETED_CACHE;\n"; + +/**********************************************************************//** +Initialize fts_zip_t. */ +static +void +fts_zip_initialize( +/*===============*/ + fts_zip_t* zip) /*!< out: zip instance to initialize */ +{ + zip->pos = 0; + zip->n_words = 0; + + zip->status = Z_OK; + + zip->last_big_block = 0; + + zip->word.f_len = 0; + *zip->word.f_str = 0; + + ib_vector_reset(zip->blocks); + + memset(zip->zp, 0, sizeof(*zip->zp)); +} + +/**********************************************************************//** +Create an instance of fts_zip_t. +@return a new instance of fts_zip_t */ +static +fts_zip_t* +fts_zip_create( +/*===========*/ + mem_heap_t* heap, /*!< in: heap */ + ulint block_sz, /*!< in: size of a zip block.*/ + ulint max_words) /*!< in: max words to read */ +{ + fts_zip_t* zip; + + zip = static_cast<fts_zip_t*>(mem_heap_zalloc(heap, sizeof(*zip))); + + zip->word.f_str = static_cast<byte*>( + mem_heap_zalloc(heap, FTS_MAX_WORD_LEN + 1)); + + zip->block_sz = block_sz; + + zip->heap_alloc = ib_heap_allocator_create(heap); + + zip->blocks = ib_vector_create(zip->heap_alloc, sizeof(void*), 128); + + zip->max_words = max_words; + + zip->zp = static_cast<z_stream*>( + mem_heap_zalloc(heap, sizeof(*zip->zp))); + + return(zip); +} + +/**********************************************************************//** +Initialize an instance of fts_zip_t. */ +static +void +fts_zip_init( +/*=========*/ + + fts_zip_t* zip) /*!< in: zip instance to init */ +{ + memset(zip->zp, 0, sizeof(*zip->zp)); + + zip->word.f_len = 0; + *zip->word.f_str = '\0'; +} + +/**********************************************************************//** +Create a fts_optimizer_word_t instance. +@return new instance */ +static +fts_word_t* +fts_word_init( +/*==========*/ + fts_word_t* word, /*!< in: word to initialize */ + byte* utf8, /*!< in: UTF-8 string */ + ulint len) /*!< in: length of string in bytes */ +{ + mem_heap_t* heap = mem_heap_create(sizeof(fts_node_t)); + + memset(word, 0, sizeof(*word)); + + word->text.f_len = len; + word->text.f_str = static_cast<byte*>(mem_heap_alloc(heap, len + 1)); + + /* Need to copy the NUL character too. */ + memcpy(word->text.f_str, utf8, word->text.f_len); + word->text.f_str[word->text.f_len] = 0; + + word->heap_alloc = ib_heap_allocator_create(heap); + + word->nodes = ib_vector_create( + word->heap_alloc, sizeof(fts_node_t), FTS_WORD_NODES_INIT_SIZE); + + return(word); +} + +/**********************************************************************//** +Read the FTS INDEX row. +@return fts_node_t instance */ +static +fts_node_t* +fts_optimize_read_node( +/*===================*/ + fts_word_t* word, /*!< in: */ + que_node_t* exp) /*!< in: */ +{ + int i; + fts_node_t* node = static_cast<fts_node_t*>( + ib_vector_push(word->nodes, NULL)); + + /* Start from 1 since the first node has been read by the caller */ + for (i = 1; exp; exp = que_node_get_next(exp), ++i) { + + dfield_t* dfield = que_node_get_val(exp); + byte* data = static_cast<byte*>( + dfield_get_data(dfield)); + ulint len = dfield_get_len(dfield); + + ut_a(len != UNIV_SQL_NULL); + + /* Note: The column numbers below must match the SELECT */ + switch (i) { + case 1: /* DOC_COUNT */ + node->doc_count = mach_read_from_4(data); + break; + + case 2: /* FIRST_DOC_ID */ + node->first_doc_id = fts_read_doc_id(data); + break; + + case 3: /* LAST_DOC_ID */ + node->last_doc_id = fts_read_doc_id(data); + break; + + case 4: /* ILIST */ + node->ilist_size_alloc = node->ilist_size = len; + node->ilist = static_cast<byte*>(ut_malloc_nokey(len)); + memcpy(node->ilist, data, len); + break; + + default: + ut_error; + } + } + + /* Make sure all columns were read. */ + ut_a(i == 5); + + return(node); +} + +/**********************************************************************//** +Callback function to fetch the rows in an FTS INDEX record. +@return always returns non-NULL */ +ibool +fts_optimize_index_fetch_node( +/*==========================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: pointer to ib_vector_t */ +{ + fts_word_t* word; + sel_node_t* sel_node = static_cast<sel_node_t*>(row); + fts_fetch_t* fetch = static_cast<fts_fetch_t*>(user_arg); + ib_vector_t* words = static_cast<ib_vector_t*>(fetch->read_arg); + que_node_t* exp = sel_node->select_list; + dfield_t* dfield = que_node_get_val(exp); + void* data = dfield_get_data(dfield); + ulint dfield_len = dfield_get_len(dfield); + fts_node_t* node; + bool is_word_init = false; + + ut_a(dfield_len <= FTS_MAX_WORD_LEN); + + if (ib_vector_size(words) == 0) { + + word = static_cast<fts_word_t*>(ib_vector_push(words, NULL)); + fts_word_init(word, (byte*) data, dfield_len); + is_word_init = true; + } + + word = static_cast<fts_word_t*>(ib_vector_last(words)); + + if (dfield_len != word->text.f_len + || memcmp(word->text.f_str, data, dfield_len)) { + + word = static_cast<fts_word_t*>(ib_vector_push(words, NULL)); + fts_word_init(word, (byte*) data, dfield_len); + is_word_init = true; + } + + node = fts_optimize_read_node(word, que_node_get_next(exp)); + + fetch->total_memory += node->ilist_size; + if (is_word_init) { + fetch->total_memory += sizeof(fts_word_t) + + sizeof(ib_alloc_t) + sizeof(ib_vector_t) + dfield_len + + sizeof(fts_node_t) * FTS_WORD_NODES_INIT_SIZE; + } else if (ib_vector_size(words) > FTS_WORD_NODES_INIT_SIZE) { + fetch->total_memory += sizeof(fts_node_t); + } + + if (fetch->total_memory >= fts_result_cache_limit) { + return(FALSE); + } + + return(TRUE); +} + +/**********************************************************************//** +Read the rows from the FTS inde. +@return DB_SUCCESS or error code */ +dberr_t +fts_index_fetch_nodes( +/*==================*/ + trx_t* trx, /*!< in: transaction */ + que_t** graph, /*!< in: prepared statement */ + fts_table_t* fts_table, /*!< in: table of the FTS INDEX */ + const fts_string_t* + word, /*!< in: the word to fetch */ + fts_fetch_t* fetch) /*!< in: fetch callback.*/ +{ + pars_info_t* info; + dberr_t error; + char table_name[MAX_FULL_NAME_LEN]; + + trx->op_info = "fetching FTS index nodes"; + + if (*graph) { + info = (*graph)->info; + } else { + ulint selected; + + info = pars_info_create(); + + ut_a(fts_table->type == FTS_INDEX_TABLE); + + selected = fts_select_index(fts_table->charset, + word->f_str, word->f_len); + + fts_table->suffix = fts_get_suffix(selected); + + fts_get_table_name(fts_table, table_name); + + pars_info_bind_id(info, "table_name", table_name); + } + + pars_info_bind_function(info, "my_func", fetch->read_record, fetch); + pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len); + + if (!*graph) { + + *graph = fts_parse_sql( + fts_table, + info, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT word, doc_count, first_doc_id, last_doc_id," + " ilist\n" + " FROM $table_name\n" + " WHERE word LIKE :word\n" + " ORDER BY first_doc_id;\n" + "BEGIN\n" + "\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;"); + } + + for (;;) { + error = fts_eval_sql(trx, *graph); + + if (UNIV_LIKELY(error == DB_SUCCESS)) { + fts_sql_commit(trx); + + break; /* Exit the loop. */ + } else { + fts_sql_rollback(trx); + + if (error == DB_LOCK_WAIT_TIMEOUT) { + ib::warn() << "lock wait timeout reading" + " FTS index. Retrying!"; + + trx->error_state = DB_SUCCESS; + } else { + ib::error() << "(" << error + << ") while reading FTS index."; + + break; /* Exit the loop. */ + } + } + } + + return(error); +} + +/**********************************************************************//** +Read a word */ +static +byte* +fts_zip_read_word( +/*==============*/ + fts_zip_t* zip, /*!< in: Zip state + data */ + fts_string_t* word) /*!< out: uncompressed word */ +{ + short len = 0; + void* null = NULL; + byte* ptr = word->f_str; + int flush = Z_NO_FLUSH; + + /* Either there was an error or we are at the Z_STREAM_END. */ + if (zip->status != Z_OK) { + return(NULL); + } + + zip->zp->next_out = reinterpret_cast<byte*>(&len); + zip->zp->avail_out = sizeof(len); + + while (zip->status == Z_OK && zip->zp->avail_out > 0) { + + /* Finished decompressing block. */ + if (zip->zp->avail_in == 0) { + + /* Free the block that's been decompressed. */ + if (zip->pos > 0) { + ulint prev = zip->pos - 1; + + ut_a(zip->pos < ib_vector_size(zip->blocks)); + + ut_free(ib_vector_getp(zip->blocks, prev)); + ib_vector_set(zip->blocks, prev, &null); + } + + /* Any more blocks to decompress. */ + if (zip->pos < ib_vector_size(zip->blocks)) { + + zip->zp->next_in = static_cast<byte*>( + ib_vector_getp( + zip->blocks, zip->pos)); + + if (zip->pos > zip->last_big_block) { + zip->zp->avail_in = + FTS_MAX_WORD_LEN; + } else { + zip->zp->avail_in = + static_cast<uInt>(zip->block_sz); + } + + ++zip->pos; + } else { + flush = Z_FINISH; + } + } + + switch (zip->status = inflate(zip->zp, flush)) { + case Z_OK: + if (zip->zp->avail_out == 0 && len > 0) { + + ut_a(len <= FTS_MAX_WORD_LEN); + ptr[len] = 0; + + zip->zp->next_out = ptr; + zip->zp->avail_out = uInt(len); + + word->f_len = ulint(len); + len = 0; + } + break; + + case Z_BUF_ERROR: /* No progress possible. */ + case Z_STREAM_END: + inflateEnd(zip->zp); + break; + + case Z_STREAM_ERROR: + default: + ut_error; + } + } + + /* All blocks must be freed at end of inflate. */ + if (zip->status != Z_OK) { + for (ulint i = 0; i < ib_vector_size(zip->blocks); ++i) { + if (ib_vector_getp(zip->blocks, i)) { + ut_free(ib_vector_getp(zip->blocks, i)); + ib_vector_set(zip->blocks, i, &null); + } + } + } + + if (ptr != NULL) { + ut_ad(word->f_len == strlen((char*) ptr)); + } + + return(zip->status == Z_OK || zip->status == Z_STREAM_END ? ptr : NULL); +} + +/**********************************************************************//** +Callback function to fetch and compress the word in an FTS +INDEX record. +@return FALSE on EOF */ +static +ibool +fts_fetch_index_words( +/*==================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: pointer to ib_vector_t */ +{ + sel_node_t* sel_node = static_cast<sel_node_t*>(row); + fts_zip_t* zip = static_cast<fts_zip_t*>(user_arg); + que_node_t* exp = sel_node->select_list; + dfield_t* dfield = que_node_get_val(exp); + + ut_a(dfield_get_len(dfield) <= FTS_MAX_WORD_LEN); + + uint16 len = uint16(dfield_get_len(dfield)); + void* data = dfield_get_data(dfield); + + /* Skip the duplicate words. */ + if (zip->word.f_len == len && !memcmp(zip->word.f_str, data, len)) { + return(TRUE); + } + + memcpy(zip->word.f_str, data, len); + zip->word.f_len = len; + + ut_a(zip->zp->avail_in == 0); + ut_a(zip->zp->next_in == NULL); + + /* The string is prefixed by len. */ + /* FIXME: This is not byte order agnostic (InnoDB data files + with FULLTEXT INDEX are not portable between little-endian and + big-endian systems!) */ + zip->zp->next_in = reinterpret_cast<byte*>(&len); + zip->zp->avail_in = sizeof(len); + + /* Compress the word, create output blocks as necessary. */ + while (zip->zp->avail_in > 0) { + + /* No space left in output buffer, create a new one. */ + if (zip->zp->avail_out == 0) { + byte* block; + + block = static_cast<byte*>( + ut_malloc_nokey(zip->block_sz)); + + ib_vector_push(zip->blocks, &block); + + zip->zp->next_out = block; + zip->zp->avail_out = static_cast<uInt>(zip->block_sz); + } + + switch (zip->status = deflate(zip->zp, Z_NO_FLUSH)) { + case Z_OK: + if (zip->zp->avail_in == 0) { + zip->zp->next_in = static_cast<byte*>(data); + zip->zp->avail_in = uInt(len); + ut_a(len <= FTS_MAX_WORD_LEN); + len = 0; + } + continue; + + case Z_STREAM_END: + case Z_BUF_ERROR: + case Z_STREAM_ERROR: + default: + ut_error; + } + } + + /* All data should have been compressed. */ + ut_a(zip->zp->avail_in == 0); + zip->zp->next_in = NULL; + + ++zip->n_words; + + return(zip->n_words >= zip->max_words ? FALSE : TRUE); +} + +/**********************************************************************//** +Finish Zip deflate. */ +static +void +fts_zip_deflate_end( +/*================*/ + fts_zip_t* zip) /*!< in: instance that should be closed*/ +{ + ut_a(zip->zp->avail_in == 0); + ut_a(zip->zp->next_in == NULL); + + zip->status = deflate(zip->zp, Z_FINISH); + + ut_a(ib_vector_size(zip->blocks) > 0); + zip->last_big_block = ib_vector_size(zip->blocks) - 1; + + /* Allocate smaller block(s), since this is trailing data. */ + while (zip->status == Z_OK) { + byte* block; + + ut_a(zip->zp->avail_out == 0); + + block = static_cast<byte*>( + ut_malloc_nokey(FTS_MAX_WORD_LEN + 1)); + + ib_vector_push(zip->blocks, &block); + + zip->zp->next_out = block; + zip->zp->avail_out = FTS_MAX_WORD_LEN; + + zip->status = deflate(zip->zp, Z_FINISH); + } + + ut_a(zip->status == Z_STREAM_END); + + zip->status = deflateEnd(zip->zp); + ut_a(zip->status == Z_OK); + + /* Reset the ZLib data structure. */ + memset(zip->zp, 0, sizeof(*zip->zp)); +} + +/**********************************************************************//** +Read the words from the FTS INDEX. +@return DB_SUCCESS if all OK, DB_TABLE_NOT_FOUND if no more indexes + to search else error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_index_fetch_words( +/*==================*/ + fts_optimize_t* optim, /*!< in: optimize scratch pad */ + const fts_string_t* word, /*!< in: get words greater than this + word */ + ulint n_words)/*!< in: max words to read */ +{ + pars_info_t* info; + que_t* graph; + ulint selected; + fts_zip_t* zip = NULL; + dberr_t error = DB_SUCCESS; + mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg); + ibool inited = FALSE; + + optim->trx->op_info = "fetching FTS index words"; + + if (optim->zip == NULL) { + optim->zip = fts_zip_create(heap, FTS_ZIP_BLOCK_SIZE, n_words); + } else { + fts_zip_initialize(optim->zip); + } + + for (selected = fts_select_index( + optim->fts_index_table.charset, word->f_str, word->f_len); + selected < FTS_NUM_AUX_INDEX; + selected++) { + + char table_name[MAX_FULL_NAME_LEN]; + + optim->fts_index_table.suffix = fts_get_suffix(selected); + + info = pars_info_create(); + + pars_info_bind_function( + info, "my_func", fts_fetch_index_words, optim->zip); + + pars_info_bind_varchar_literal( + info, "word", word->f_str, word->f_len); + + fts_get_table_name(&optim->fts_index_table, table_name); + pars_info_bind_id(info, "table_name", table_name); + + graph = fts_parse_sql( + &optim->fts_index_table, + info, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT word\n" + " FROM $table_name\n" + " WHERE word > :word\n" + " ORDER BY word;\n" + "BEGIN\n" + "\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;"); + + zip = optim->zip; + + for (;;) { + int err; + + if (!inited && ((err = deflateInit(zip->zp, 9)) + != Z_OK)) { + ib::error() << "ZLib deflateInit() failed: " + << err; + + error = DB_ERROR; + break; + } else { + inited = TRUE; + error = fts_eval_sql(optim->trx, graph); + } + + if (UNIV_LIKELY(error == DB_SUCCESS)) { + //FIXME fts_sql_commit(optim->trx); + break; + } else { + //FIXME fts_sql_rollback(optim->trx); + + if (error == DB_LOCK_WAIT_TIMEOUT) { + ib::warn() << "Lock wait timeout" + " reading document. Retrying!"; + + /* We need to reset the ZLib state. */ + inited = FALSE; + deflateEnd(zip->zp); + fts_zip_init(zip); + + optim->trx->error_state = DB_SUCCESS; + } else { + ib::error() << "(" << error + << ") while reading document."; + + break; /* Exit the loop. */ + } + } + } + + que_graph_free(graph); + + /* Check if max word to fetch is exceeded */ + if (optim->zip->n_words >= n_words) { + break; + } + } + + if (error == DB_SUCCESS && zip->status == Z_OK && zip->n_words > 0) { + + /* All data should have been read. */ + ut_a(zip->zp->avail_in == 0); + + fts_zip_deflate_end(zip); + } else { + deflateEnd(zip->zp); + } + + return(error); +} + +/**********************************************************************//** +Callback function to fetch the doc id from the record. +@return always returns TRUE */ +static +ibool +fts_fetch_doc_ids( +/*==============*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: pointer to ib_vector_t */ +{ + que_node_t* exp; + int i = 0; + sel_node_t* sel_node = static_cast<sel_node_t*>(row); + fts_doc_ids_t* fts_doc_ids = static_cast<fts_doc_ids_t*>(user_arg); + doc_id_t* update = static_cast<doc_id_t*>( + ib_vector_push(fts_doc_ids->doc_ids, NULL)); + + for (exp = sel_node->select_list; + exp; + exp = que_node_get_next(exp), ++i) { + + dfield_t* dfield = que_node_get_val(exp); + void* data = dfield_get_data(dfield); + ulint len = dfield_get_len(dfield); + + ut_a(len != UNIV_SQL_NULL); + + /* Note: The column numbers below must match the SELECT. */ + switch (i) { + case 0: /* DOC_ID */ + *update = fts_read_doc_id( + static_cast<byte*>(data)); + break; + + default: + ut_error; + } + } + + return(TRUE); +} + +/**********************************************************************//** +Read the rows from a FTS common auxiliary table. +@return DB_SUCCESS or error code */ +dberr_t +fts_table_fetch_doc_ids( +/*====================*/ + trx_t* trx, /*!< in: transaction */ + fts_table_t* fts_table, /*!< in: table */ + fts_doc_ids_t* doc_ids) /*!< in: For collecting doc ids */ +{ + dberr_t error; + que_t* graph; + pars_info_t* info = pars_info_create(); + ibool alloc_bk_trx = FALSE; + char table_name[MAX_FULL_NAME_LEN]; + + ut_a(fts_table->suffix != NULL); + ut_a(fts_table->type == FTS_COMMON_TABLE); + + if (!trx) { + trx = trx_create(); + alloc_bk_trx = TRUE; + } + + trx->op_info = "fetching FTS doc ids"; + + pars_info_bind_function(info, "my_func", fts_fetch_doc_ids, doc_ids); + + fts_get_table_name(fts_table, table_name); + pars_info_bind_id(info, "table_name", table_name); + + graph = fts_parse_sql( + fts_table, + info, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT doc_id FROM $table_name;\n" + "BEGIN\n" + "\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;"); + + error = fts_eval_sql(trx, graph); + fts_sql_commit(trx); + que_graph_free(graph); + + if (error == DB_SUCCESS) { + ib_vector_sort(doc_ids->doc_ids, fts_doc_id_cmp); + } + + if (alloc_bk_trx) { + trx->free(); + } + + return(error); +} + +/**********************************************************************//** +Do a binary search for a doc id in the array +@return +ve index if found -ve index where it should be inserted + if not found */ +int +fts_bsearch( +/*========*/ + doc_id_t* array, /*!< in: array to sort */ + int lower, /*!< in: the array lower bound */ + int upper, /*!< in: the array upper bound */ + doc_id_t doc_id) /*!< in: the doc id to search for */ +{ + int orig_size = upper; + + if (upper == 0) { + /* Nothing to search */ + return(-1); + } else { + while (lower < upper) { + int i = (lower + upper) >> 1; + + if (doc_id > array[i]) { + lower = i + 1; + } else if (doc_id < array[i]) { + upper = i - 1; + } else { + return(i); /* Found. */ + } + } + } + + if (lower == upper && lower < orig_size) { + if (doc_id == array[lower]) { + return(lower); + } else if (lower == 0) { + return(-1); + } + } + + /* Not found. */ + return( (lower == 0) ? -1 : -(lower)); +} + +/**********************************************************************//** +Search in the to delete array whether any of the doc ids within +the [first, last] range are to be deleted +@return +ve index if found -ve index where it should be inserted + if not found */ +static +int +fts_optimize_lookup( +/*================*/ + ib_vector_t* doc_ids, /*!< in: array to search */ + ulint lower, /*!< in: lower limit of array */ + doc_id_t first_doc_id, /*!< in: doc id to lookup */ + doc_id_t last_doc_id) /*!< in: doc id to lookup */ +{ + int pos; + int upper = static_cast<int>(ib_vector_size(doc_ids)); + doc_id_t* array = (doc_id_t*) doc_ids->data; + + pos = fts_bsearch(array, static_cast<int>(lower), upper, first_doc_id); + + ut_a(abs(pos) <= upper + 1); + + if (pos < 0) { + + int i = abs(pos); + + /* If i is 1, it could be first_doc_id is less than + either the first or second array item, do a + double check */ + if (i == 1 && array[0] <= last_doc_id + && first_doc_id < array[0]) { + pos = 0; + } else if (i < upper && array[i] <= last_doc_id) { + + /* Check if the "next" doc id is within the + first & last doc id of the node. */ + pos = i; + } + } + + return(pos); +} + +/**********************************************************************//** +Encode the word pos list into the node +@return DB_SUCCESS or error code*/ +static MY_ATTRIBUTE((nonnull)) +dberr_t +fts_optimize_encode_node( +/*=====================*/ + fts_node_t* node, /*!< in: node to fill*/ + doc_id_t doc_id, /*!< in: doc id to encode */ + fts_encode_t* enc) /*!< in: encoding state.*/ +{ + byte* dst; + ulint enc_len; + ulint pos_enc_len; + doc_id_t doc_id_delta; + dberr_t error = DB_SUCCESS; + const byte* src = enc->src_ilist_ptr; + + if (node->first_doc_id == 0) { + ut_a(node->last_doc_id == 0); + + node->first_doc_id = doc_id; + } + + /* Calculate the space required to store the ilist. */ + ut_ad(doc_id > node->last_doc_id); + doc_id_delta = doc_id - node->last_doc_id; + enc_len = fts_get_encoded_len(static_cast<ulint>(doc_id_delta)); + + /* Calculate the size of the encoded pos array. */ + while (*src) { + fts_decode_vlc(&src); + } + + /* Skip the 0x00 byte at the end of the word positions list. */ + ++src; + + /* Number of encoded pos bytes to copy. */ + pos_enc_len = ulint(src - enc->src_ilist_ptr); + + /* Total number of bytes required for copy. */ + enc_len += pos_enc_len; + + /* Check we have enough space in the destination buffer for + copying the document word list. */ + if (!node->ilist) { + ulint new_size; + + ut_a(node->ilist_size == 0); + + new_size = enc_len > FTS_ILIST_MAX_SIZE + ? enc_len : FTS_ILIST_MAX_SIZE; + + node->ilist = static_cast<byte*>(ut_malloc_nokey(new_size)); + node->ilist_size_alloc = new_size; + + } else if ((node->ilist_size + enc_len) > node->ilist_size_alloc) { + ulint new_size = node->ilist_size + enc_len; + byte* ilist = static_cast<byte*>(ut_malloc_nokey(new_size)); + + memcpy(ilist, node->ilist, node->ilist_size); + + ut_free(node->ilist); + + node->ilist = ilist; + node->ilist_size_alloc = new_size; + } + + src = enc->src_ilist_ptr; + dst = node->ilist + node->ilist_size; + + /* Encode the doc id. Cast to ulint, the delta should be small and + therefore no loss of precision. */ + dst = fts_encode_int(doc_id_delta, dst); + + /* Copy the encoded pos array. */ + memcpy(dst, src, pos_enc_len); + + node->last_doc_id = doc_id; + + /* Data copied upto here. */ + node->ilist_size += enc_len; + enc->src_ilist_ptr += pos_enc_len; + + ut_a(node->ilist_size <= node->ilist_size_alloc); + + return(error); +} + +/**********************************************************************//** +Optimize the data contained in a node. +@return DB_SUCCESS or error code*/ +static MY_ATTRIBUTE((nonnull)) +dberr_t +fts_optimize_node( +/*==============*/ + ib_vector_t* del_vec, /*!< in: vector of doc ids to delete*/ + int* del_pos, /*!< in: offset into above vector */ + fts_node_t* dst_node, /*!< in: node to fill*/ + fts_node_t* src_node, /*!< in: source node for data*/ + fts_encode_t* enc) /*!< in: encoding state */ +{ + ulint copied; + dberr_t error = DB_SUCCESS; + doc_id_t doc_id = enc->src_last_doc_id; + + if (!enc->src_ilist_ptr) { + enc->src_ilist_ptr = src_node->ilist; + } + + copied = ulint(enc->src_ilist_ptr - src_node->ilist); + + /* While there is data in the source node and space to copy + into in the destination node. */ + while (copied < src_node->ilist_size + && dst_node->ilist_size < FTS_ILIST_MAX_SIZE) { + + doc_id_t delta; + doc_id_t del_doc_id = FTS_NULL_DOC_ID; + + delta = fts_decode_vlc( + (const byte**)&enc->src_ilist_ptr); + +test_again: + /* Check whether the doc id is in the delete list, if + so then we skip the entries but we need to track the + delta for decoding the entries following this document's + entries. */ + if (*del_pos >= 0 && *del_pos < (int) ib_vector_size(del_vec)) { + doc_id_t* update; + + update = (doc_id_t*) ib_vector_get( + del_vec, ulint(*del_pos)); + + del_doc_id = *update; + } + + if (enc->src_ilist_ptr == src_node->ilist && doc_id == 0) { + ut_a(delta == src_node->first_doc_id); + } + + doc_id += delta; + + if (del_doc_id > 0 && doc_id == del_doc_id) { + + ++*del_pos; + + /* Skip the entries for this document. */ + while (*enc->src_ilist_ptr) { + fts_decode_vlc((const byte**)&enc->src_ilist_ptr); + } + + /* Skip the end of word position marker. */ + ++enc->src_ilist_ptr; + + } else { + + /* DOC ID already becomes larger than + del_doc_id, check the next del_doc_id */ + if (del_doc_id > 0 && doc_id > del_doc_id) { + del_doc_id = 0; + ++*del_pos; + delta = 0; + goto test_again; + } + + /* Decode and copy the word positions into + the dest node. */ + fts_optimize_encode_node(dst_node, doc_id, enc); + + ++dst_node->doc_count; + + ut_a(dst_node->last_doc_id == doc_id); + } + + /* Bytes copied so for from source. */ + copied = ulint(enc->src_ilist_ptr - src_node->ilist); + } + + if (copied >= src_node->ilist_size) { + ut_a(doc_id == src_node->last_doc_id); + } + + enc->src_last_doc_id = doc_id; + + return(error); +} + +/**********************************************************************//** +Determine the starting pos within the deleted doc id vector for a word. +@return delete position */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +int +fts_optimize_deleted_pos( +/*=====================*/ + fts_optimize_t* optim, /*!< in: optimize state data */ + fts_word_t* word) /*!< in: the word data to check */ +{ + int del_pos; + ib_vector_t* del_vec = optim->to_delete->doc_ids; + + /* Get the first and last dict ids for the word, we will use + these values to determine which doc ids need to be removed + when we coalesce the nodes. This way we can reduce the numer + of elements that need to be searched in the deleted doc ids + vector and secondly we can remove the doc ids during the + coalescing phase. */ + if (ib_vector_size(del_vec) > 0) { + fts_node_t* node; + doc_id_t last_id; + doc_id_t first_id; + ulint size = ib_vector_size(word->nodes); + + node = (fts_node_t*) ib_vector_get(word->nodes, 0); + first_id = node->first_doc_id; + + node = (fts_node_t*) ib_vector_get(word->nodes, size - 1); + last_id = node->last_doc_id; + + ut_a(first_id <= last_id); + + del_pos = fts_optimize_lookup( + del_vec, optim->del_pos, first_id, last_id); + } else { + + del_pos = -1; /* Note that there is nothing to delete. */ + } + + return(del_pos); +} + +#define FTS_DEBUG_PRINT +/**********************************************************************//** +Compact the nodes for a word, we also remove any doc ids during the +compaction pass. +@return DB_SUCCESS or error code.*/ +static +ib_vector_t* +fts_optimize_word( +/*==============*/ + fts_optimize_t* optim, /*!< in: optimize state data */ + fts_word_t* word) /*!< in: the word to optimize */ +{ + fts_encode_t enc; + ib_vector_t* nodes; + ulint i = 0; + int del_pos; + fts_node_t* dst_node = NULL; + ib_vector_t* del_vec = optim->to_delete->doc_ids; + ulint size = ib_vector_size(word->nodes); + + del_pos = fts_optimize_deleted_pos(optim, word); + nodes = ib_vector_create(word->heap_alloc, sizeof(*dst_node), 128); + + enc.src_last_doc_id = 0; + enc.src_ilist_ptr = NULL; + + while (i < size) { + ulint copied; + fts_node_t* src_node; + + src_node = (fts_node_t*) ib_vector_get(word->nodes, i); + + if (dst_node == NULL + || dst_node->last_doc_id > src_node->first_doc_id) { + + dst_node = static_cast<fts_node_t*>( + ib_vector_push(nodes, NULL)); + memset(dst_node, 0, sizeof(*dst_node)); + } + + /* Copy from the src to the dst node. */ + fts_optimize_node(del_vec, &del_pos, dst_node, src_node, &enc); + + ut_a(enc.src_ilist_ptr != NULL); + + /* Determine the numer of bytes copied to dst_node. */ + copied = ulint(enc.src_ilist_ptr - src_node->ilist); + + /* Can't copy more than whats in the vlc array. */ + ut_a(copied <= src_node->ilist_size); + + /* We are done with this node release the resources. */ + if (copied == src_node->ilist_size) { + + enc.src_last_doc_id = 0; + enc.src_ilist_ptr = NULL; + + ut_free(src_node->ilist); + + src_node->ilist = NULL; + src_node->ilist_size = src_node->ilist_size_alloc = 0; + + src_node = NULL; + + ++i; /* Get next source node to OPTIMIZE. */ + } + + if (dst_node->ilist_size >= FTS_ILIST_MAX_SIZE || i >= size) { + + dst_node = NULL; + } + } + + /* All dst nodes created should have been added to the vector. */ + ut_a(dst_node == NULL); + + /* Return the OPTIMIZED nodes. */ + return(nodes); +} + +/**********************************************************************//** +Update the FTS index table. This is a delete followed by an insert. +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_write_word( +/*====================*/ + trx_t* trx, /*!< in: transaction */ + fts_table_t* fts_table, /*!< in: table of FTS index */ + fts_string_t* word, /*!< in: word data to write */ + ib_vector_t* nodes) /*!< in: the nodes to write */ +{ + ulint i; + pars_info_t* info; + que_t* graph; + ulint selected; + dberr_t error = DB_SUCCESS; + char table_name[MAX_FULL_NAME_LEN]; + + info = pars_info_create(); + + ut_ad(fts_table->charset); + + pars_info_bind_varchar_literal( + info, "word", word->f_str, word->f_len); + + selected = fts_select_index(fts_table->charset, + word->f_str, word->f_len); + + fts_table->suffix = fts_get_suffix(selected); + fts_get_table_name(fts_table, table_name); + pars_info_bind_id(info, "table_name", table_name); + + graph = fts_parse_sql( + fts_table, + info, + "BEGIN DELETE FROM $table_name WHERE word = :word;"); + + error = fts_eval_sql(trx, graph); + + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { + ib::error() << "(" << error << ") during optimize," + " when deleting a word from the FTS index."; + } + + que_graph_free(graph); + graph = NULL; + + /* Even if the operation needs to be rolled back and redone, + we iterate over the nodes in order to free the ilist. */ + for (i = 0; i < ib_vector_size(nodes); ++i) { + + fts_node_t* node = (fts_node_t*) ib_vector_get(nodes, i); + + if (error == DB_SUCCESS) { + /* Skip empty node. */ + if (node->ilist == NULL) { + ut_ad(node->ilist_size == 0); + continue; + } + + error = fts_write_node( + trx, &graph, fts_table, word, node); + + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { + ib::error() << "(" << error << ")" + " during optimize, while adding a" + " word to the FTS index."; + } + } + + ut_free(node->ilist); + node->ilist = NULL; + node->ilist_size = node->ilist_size_alloc = 0; + } + + if (graph != NULL) { + que_graph_free(graph); + } + + return(error); +} + +/**********************************************************************//** +Free fts_optimizer_word_t instanace.*/ +void +fts_word_free( +/*==========*/ + fts_word_t* word) /*!< in: instance to free.*/ +{ + mem_heap_t* heap = static_cast<mem_heap_t*>(word->heap_alloc->arg); + +#ifdef UNIV_DEBUG + memset(word, 0, sizeof(*word)); +#endif /* UNIV_DEBUG */ + + mem_heap_free(heap); +} + +/**********************************************************************//** +Optimize the word ilist and rewrite data to the FTS index. +@return status one of RESTART, EXIT, ERROR */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_compact( +/*=================*/ + fts_optimize_t* optim, /*!< in: optimize state data */ + dict_index_t* index, /*!< in: current FTS being optimized */ + time_t start_time) /*!< in: optimize start time */ +{ + ulint i; + dberr_t error = DB_SUCCESS; + ulint size = ib_vector_size(optim->words); + + for (i = 0; i < size && error == DB_SUCCESS && !optim->done; ++i) { + fts_word_t* word; + ib_vector_t* nodes; + trx_t* trx = optim->trx; + + word = (fts_word_t*) ib_vector_get(optim->words, i); + + /* nodes is allocated from the word heap and will be destroyed + when the word is freed. We however have to be careful about + the ilist, that needs to be freed explicitly. */ + nodes = fts_optimize_word(optim, word); + + /* Update the data on disk. */ + error = fts_optimize_write_word( + trx, &optim->fts_index_table, &word->text, nodes); + + if (error == DB_SUCCESS) { + /* Write the last word optimized to the config table, + we use this value for restarting optimize. */ + error = fts_config_set_index_value( + optim->trx, index, + FTS_LAST_OPTIMIZED_WORD, &word->text); + } + + /* Free the word that was optimized. */ + fts_word_free(word); + + ulint interval = ulint(time(NULL) - start_time); + + if (fts_optimize_time_limit > 0 + && (lint(interval) < 0 + || interval > fts_optimize_time_limit)) { + + optim->done = TRUE; + } + } + + return(error); +} + +/**********************************************************************//** +Create an instance of fts_optimize_t. Also create a new +background transaction.*/ +static +fts_optimize_t* +fts_optimize_create( +/*================*/ + dict_table_t* table) /*!< in: table with FTS indexes */ +{ + fts_optimize_t* optim; + mem_heap_t* heap = mem_heap_create(128); + + optim = (fts_optimize_t*) mem_heap_zalloc(heap, sizeof(*optim)); + + optim->self_heap = ib_heap_allocator_create(heap); + + optim->to_delete = fts_doc_ids_create(); + + optim->words = ib_vector_create( + optim->self_heap, sizeof(fts_word_t), 256); + + optim->table = table; + + optim->trx = trx_create(); + trx_start_internal(optim->trx); + + optim->fts_common_table.table_id = table->id; + optim->fts_common_table.type = FTS_COMMON_TABLE; + optim->fts_common_table.table = table; + + optim->fts_index_table.table_id = table->id; + optim->fts_index_table.type = FTS_INDEX_TABLE; + optim->fts_index_table.table = table; + + /* The common prefix for all this parent table's aux tables. */ + char table_id[FTS_AUX_MIN_TABLE_ID_LENGTH]; + const size_t table_id_len = 1 + + size_t(fts_get_table_id(&optim->fts_common_table, table_id)); + dict_sys.freeze(SRW_LOCK_CALL); + /* Include the separator as well. */ + const size_t dbname_len = table->name.dblen() + 1; + ut_ad(dbname_len > 1); + const size_t prefix_name_len = dbname_len + 4 + table_id_len; + char* prefix_name = static_cast<char*>( + ut_malloc_nokey(prefix_name_len)); + memcpy(prefix_name, table->name.m_name, dbname_len); + dict_sys.unfreeze(); + memcpy(prefix_name + dbname_len, "FTS_", 4); + memcpy(prefix_name + dbname_len + 4, table_id, table_id_len); + optim->name_prefix =prefix_name; + + return(optim); +} + +#ifdef FTS_OPTIMIZE_DEBUG +/**********************************************************************//** +Get optimize start time of an FTS index. +@return DB_SUCCESS if all OK else error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_get_index_start_time( +/*==============================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS index */ + time_t* start_time) /*!< out: time in secs */ +{ + return(fts_config_get_index_ulint( + trx, index, FTS_OPTIMIZE_START_TIME, + (ulint*) start_time)); +} + +/**********************************************************************//** +Set the optimize start time of an FTS index. +@return DB_SUCCESS if all OK else error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_set_index_start_time( +/*==============================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS index */ + time_t start_time) /*!< in: start time */ +{ + return(fts_config_set_index_ulint( + trx, index, FTS_OPTIMIZE_START_TIME, + (ulint) start_time)); +} + +/**********************************************************************//** +Get optimize end time of an FTS index. +@return DB_SUCCESS if all OK else error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_get_index_end_time( +/*============================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS index */ + time_t* end_time) /*!< out: time in secs */ +{ + return(fts_config_get_index_ulint( + trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time)); +} + +/**********************************************************************//** +Set the optimize end time of an FTS index. +@return DB_SUCCESS if all OK else error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_set_index_end_time( +/*============================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS index */ + time_t end_time) /*!< in: end time */ +{ + return(fts_config_set_index_ulint( + trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time)); +} +#endif + +/**********************************************************************//** +Free the optimize prepared statements.*/ +static +void +fts_optimize_graph_free( +/*====================*/ + fts_optimize_graph_t* graph) /*!< in/out: The graph instances + to free */ +{ + if (graph->commit_graph) { + que_graph_free(graph->commit_graph); + graph->commit_graph = NULL; + } + + if (graph->write_nodes_graph) { + que_graph_free(graph->write_nodes_graph); + graph->write_nodes_graph = NULL; + } + + if (graph->delete_nodes_graph) { + que_graph_free(graph->delete_nodes_graph); + graph->delete_nodes_graph = NULL; + } + + if (graph->read_nodes_graph) { + que_graph_free(graph->read_nodes_graph); + graph->read_nodes_graph = NULL; + } +} + +/**********************************************************************//** +Free all optimize resources. */ +static +void +fts_optimize_free( +/*==============*/ + fts_optimize_t* optim) /*!< in: table with on FTS index */ +{ + mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg); + + trx_commit_for_mysql(optim->trx); + optim->trx->free(); + optim->trx = NULL; + + fts_doc_ids_free(optim->to_delete); + fts_optimize_graph_free(&optim->graph); + + ut_free(optim->name_prefix); + + /* This will free the heap from which optim itself was allocated. */ + mem_heap_free(heap); +} + +/**********************************************************************//** +Get the max time optimize should run in millisecs. +@return max optimize time limit in millisecs. */ +static +ulint +fts_optimize_get_time_limit( +/*========================*/ + trx_t* trx, /*!< in: transaction */ + fts_table_t* fts_table) /*!< in: aux table */ +{ + ulint time_limit = 0; + + fts_config_get_ulint( + trx, fts_table, + FTS_OPTIMIZE_LIMIT_IN_SECS, &time_limit); + + /* FIXME: This is returning milliseconds, while the variable + is being stored and interpreted as seconds! */ + return(time_limit * 1000); +} + +/**********************************************************************//** +Run OPTIMIZE on the given table. Note: this can take a very long time +(hours). */ +static +void +fts_optimize_words( +/*===============*/ + fts_optimize_t* optim, /*!< in: optimize instance */ + dict_index_t* index, /*!< in: current FTS being optimized */ + fts_string_t* word) /*!< in: the starting word to optimize */ +{ + fts_fetch_t fetch; + que_t* graph = NULL; + CHARSET_INFO* charset = optim->fts_index_table.charset; + + ut_a(!optim->done); + + /* Get the time limit from the config table. */ + fts_optimize_time_limit = fts_optimize_get_time_limit( + optim->trx, &optim->fts_common_table); + + const time_t start_time = time(NULL); + + /* Setup the callback to use for fetching the word ilist etc. */ + fetch.read_arg = optim->words; + fetch.read_record = fts_optimize_index_fetch_node; + + while (!optim->done) { + dberr_t error; + trx_t* trx = optim->trx; + ulint selected; + + ut_a(ib_vector_size(optim->words) == 0); + + selected = fts_select_index(charset, word->f_str, word->f_len); + + /* Read the index records to optimize. */ + fetch.total_memory = 0; + error = fts_index_fetch_nodes( + trx, &graph, &optim->fts_index_table, word, + &fetch); + ut_ad(fetch.total_memory < fts_result_cache_limit); + + if (error == DB_SUCCESS) { + /* There must be some nodes to read. */ + ut_a(ib_vector_size(optim->words) > 0); + + /* Optimize the nodes that were read and write + back to DB. */ + error = fts_optimize_compact(optim, index, start_time); + + if (error == DB_SUCCESS) { + fts_sql_commit(optim->trx); + } else { + fts_sql_rollback(optim->trx); + } + } + + ib_vector_reset(optim->words); + + if (error == DB_SUCCESS) { + if (!optim->done) { + if (!fts_zip_read_word(optim->zip, word)) { + optim->done = TRUE; + } else if (selected + != fts_select_index( + charset, word->f_str, + word->f_len) + && graph) { + que_graph_free(graph); + graph = NULL; + } + } + } else if (error == DB_LOCK_WAIT_TIMEOUT) { + ib::warn() << "Lock wait timeout during optimize." + " Retrying!"; + + trx->error_state = DB_SUCCESS; + } else if (error == DB_DEADLOCK) { + ib::warn() << "Deadlock during optimize. Retrying!"; + + trx->error_state = DB_SUCCESS; + } else { + optim->done = TRUE; /* Exit the loop. */ + } + } + + if (graph != NULL) { + que_graph_free(graph); + } +} + +/**********************************************************************//** +Optimize is complete. Set the completion time, and reset the optimize +start string for this FTS index to "". +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_index_completed( +/*=========================*/ + fts_optimize_t* optim, /*!< in: optimize instance */ + dict_index_t* index) /*!< in: table with one FTS index */ +{ + fts_string_t word; + dberr_t error; + byte buf[sizeof(ulint)]; +#ifdef FTS_OPTIMIZE_DEBUG + time_t end_time = time(NULL); + + error = fts_optimize_set_index_end_time(optim->trx, index, end_time); +#endif + + /* If we've reached the end of the index then set the start + word to the empty string. */ + + word.f_len = 0; + word.f_str = buf; + *word.f_str = '\0'; + + error = fts_config_set_index_value( + optim->trx, index, FTS_LAST_OPTIMIZED_WORD, &word); + + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { + ib::error() << "(" << error << ") while updating" + " last optimized word!"; + } + + return(error); +} + + +/**********************************************************************//** +Read the list of words from the FTS auxiliary index that will be +optimized in this pass. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_index_read_words( +/*==========================*/ + fts_optimize_t* optim, /*!< in: optimize instance */ + dict_index_t* index, /*!< in: table with one FTS index */ + fts_string_t* word) /*!< in: buffer to use */ +{ + dberr_t error = DB_SUCCESS; + + if (optim->del_list_regenerated) { + word->f_len = 0; + } else { + + /* Get the last word that was optimized from + the config table. */ + error = fts_config_get_index_value( + optim->trx, index, FTS_LAST_OPTIMIZED_WORD, word); + } + + /* If record not found then we start from the top. */ + if (error == DB_RECORD_NOT_FOUND) { + word->f_len = 0; + error = DB_SUCCESS; + } + + while (error == DB_SUCCESS) { + + error = fts_index_fetch_words( + optim, word, fts_num_word_optimize); + + if (error == DB_SUCCESS) { + /* Reset the last optimized word to '' if no + more words could be read from the FTS index. */ + if (optim->zip->n_words == 0) { + word->f_len = 0; + *word->f_str = 0; + } + + break; + } + } + + return(error); +} + +/**********************************************************************//** +Run OPTIMIZE on the given FTS index. Note: this can take a very long +time (hours). +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_index( +/*===============*/ + fts_optimize_t* optim, /*!< in: optimize instance */ + dict_index_t* index) /*!< in: table with one FTS index */ +{ + fts_string_t word; + dberr_t error; + byte str[FTS_MAX_WORD_LEN + 1]; + + /* Set the current index that we have to optimize. */ + optim->fts_index_table.index_id = index->id; + optim->fts_index_table.charset = fts_index_get_charset(index); + + optim->done = FALSE; /* Optimize until !done */ + + /* We need to read the last word optimized so that we start from + the next word. */ + word.f_str = str; + + /* We set the length of word to the size of str since we + need to pass the max len info to the fts_get_config_value() function. */ + word.f_len = sizeof(str) - 1; + + memset(word.f_str, 0x0, word.f_len); + + /* Read the words that will be optimized in this pass. */ + error = fts_optimize_index_read_words(optim, index, &word); + + if (error == DB_SUCCESS) { + int zip_error; + + ut_a(optim->zip->pos == 0); + ut_a(optim->zip->zp->total_in == 0); + ut_a(optim->zip->zp->total_out == 0); + + zip_error = inflateInit(optim->zip->zp); + ut_a(zip_error == Z_OK); + + word.f_len = 0; + word.f_str = str; + + /* Read the first word to optimize from the Zip buffer. */ + if (!fts_zip_read_word(optim->zip, &word)) { + + optim->done = TRUE; + } else { + fts_optimize_words(optim, index, &word); + } + + /* If we couldn't read any records then optimize is + complete. Increment the number of indexes that have + been optimized and set FTS index optimize state to + completed. */ + if (error == DB_SUCCESS && optim->zip->n_words == 0) { + + error = fts_optimize_index_completed(optim, index); + + if (error == DB_SUCCESS) { + ++optim->n_completed; + } + } + } + + return(error); +} + +/**********************************************************************//** +Delete the document ids in the delete, and delete cache tables. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_purge_deleted_doc_ids( +/*===============================*/ + fts_optimize_t* optim) /*!< in: optimize instance */ +{ + ulint i; + pars_info_t* info; + que_t* graph; + doc_id_t* update; + doc_id_t write_doc_id; + dberr_t error = DB_SUCCESS; + char deleted[MAX_FULL_NAME_LEN]; + char deleted_cache[MAX_FULL_NAME_LEN]; + + info = pars_info_create(); + + ut_a(ib_vector_size(optim->to_delete->doc_ids) > 0); + + update = static_cast<doc_id_t*>( + ib_vector_get(optim->to_delete->doc_ids, 0)); + + /* Convert to "storage" byte order. */ + fts_write_doc_id((byte*) &write_doc_id, *update); + + /* This is required for the SQL parser to work. It must be able + to find the following variables. So we do it twice. */ + fts_bind_doc_id(info, "doc_id1", &write_doc_id); + fts_bind_doc_id(info, "doc_id2", &write_doc_id); + + /* Make sure the following two names are consistent with the name + used in the fts_delete_doc_ids_sql */ + optim->fts_common_table.suffix = fts_common_tables[3]; + fts_get_table_name(&optim->fts_common_table, deleted); + pars_info_bind_id(info, fts_common_tables[3], deleted); + + optim->fts_common_table.suffix = fts_common_tables[4]; + fts_get_table_name(&optim->fts_common_table, deleted_cache); + pars_info_bind_id(info, fts_common_tables[4], deleted_cache); + + graph = fts_parse_sql(NULL, info, fts_delete_doc_ids_sql); + + /* Delete the doc ids that were copied at the start. */ + for (i = 0; i < ib_vector_size(optim->to_delete->doc_ids); ++i) { + + update = static_cast<doc_id_t*>(ib_vector_get( + optim->to_delete->doc_ids, i)); + + /* Convert to "storage" byte order. */ + fts_write_doc_id((byte*) &write_doc_id, *update); + + fts_bind_doc_id(info, "doc_id1", &write_doc_id); + + fts_bind_doc_id(info, "doc_id2", &write_doc_id); + + error = fts_eval_sql(optim->trx, graph); + + // FIXME: Check whether delete actually succeeded! + if (error != DB_SUCCESS) { + + fts_sql_rollback(optim->trx); + break; + } + } + + que_graph_free(graph); + + return(error); +} + +/**********************************************************************//** +Delete the document ids in the pending delete, and delete tables. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_purge_deleted_doc_id_snapshot( +/*=======================================*/ + fts_optimize_t* optim) /*!< in: optimize instance */ +{ + dberr_t error; + que_t* graph; + pars_info_t* info; + char being_deleted[MAX_FULL_NAME_LEN]; + char being_deleted_cache[MAX_FULL_NAME_LEN]; + + info = pars_info_create(); + + /* Make sure the following two names are consistent with the name + used in the fts_end_delete_sql */ + optim->fts_common_table.suffix = fts_common_tables[0]; + fts_get_table_name(&optim->fts_common_table, being_deleted); + pars_info_bind_id(info, fts_common_tables[0], being_deleted); + + optim->fts_common_table.suffix = fts_common_tables[1]; + fts_get_table_name(&optim->fts_common_table, being_deleted_cache); + pars_info_bind_id(info, fts_common_tables[1], being_deleted_cache); + + /* Delete the doc ids that were copied to delete pending state at + the start of optimize. */ + graph = fts_parse_sql(NULL, info, fts_end_delete_sql); + + error = fts_eval_sql(optim->trx, graph); + que_graph_free(graph); + + return(error); +} + +/**********************************************************************//** +Copy the deleted doc ids that will be purged during this optimize run +to the being deleted FTS auxiliary tables. The transaction is committed +upon successfull copy and rolled back on DB_DUPLICATE_KEY error. +@return DB_SUCCESS if all OK */ +static +ulint +fts_optimize_being_deleted_count( +/*=============================*/ + fts_optimize_t* optim) /*!< in: optimize instance */ +{ + fts_table_t fts_table; + + FTS_INIT_FTS_TABLE(&fts_table, "BEING_DELETED", FTS_COMMON_TABLE, + optim->table); + + return(fts_get_rows_count(&fts_table)); +} + +/*********************************************************************//** +Copy the deleted doc ids that will be purged during this optimize run +to the being deleted FTS auxiliary tables. The transaction is committed +upon successfull copy and rolled back on DB_DUPLICATE_KEY error. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_create_deleted_doc_id_snapshot( +/*========================================*/ + fts_optimize_t* optim) /*!< in: optimize instance */ +{ + dberr_t error; + que_t* graph; + pars_info_t* info; + char being_deleted[MAX_FULL_NAME_LEN]; + char deleted[MAX_FULL_NAME_LEN]; + char being_deleted_cache[MAX_FULL_NAME_LEN]; + char deleted_cache[MAX_FULL_NAME_LEN]; + + info = pars_info_create(); + + /* Make sure the following four names are consistent with the name + used in the fts_init_delete_sql */ + optim->fts_common_table.suffix = fts_common_tables[0]; + fts_get_table_name(&optim->fts_common_table, being_deleted); + pars_info_bind_id(info, fts_common_tables[0], being_deleted); + + optim->fts_common_table.suffix = fts_common_tables[3]; + fts_get_table_name(&optim->fts_common_table, deleted); + pars_info_bind_id(info, fts_common_tables[3], deleted); + + optim->fts_common_table.suffix = fts_common_tables[1]; + fts_get_table_name(&optim->fts_common_table, being_deleted_cache); + pars_info_bind_id(info, fts_common_tables[1], being_deleted_cache); + + optim->fts_common_table.suffix = fts_common_tables[4]; + fts_get_table_name(&optim->fts_common_table, deleted_cache); + pars_info_bind_id(info, fts_common_tables[4], deleted_cache); + + /* Move doc_ids that are to be deleted to state being deleted. */ + graph = fts_parse_sql(NULL, info, fts_init_delete_sql); + + error = fts_eval_sql(optim->trx, graph); + + que_graph_free(graph); + + if (error != DB_SUCCESS) { + fts_sql_rollback(optim->trx); + } else { + fts_sql_commit(optim->trx); + } + + optim->del_list_regenerated = TRUE; + + return(error); +} + +/*********************************************************************//** +Read in the document ids that are to be purged during optimize. The +transaction is committed upon successfully read. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_read_deleted_doc_id_snapshot( +/*======================================*/ + fts_optimize_t* optim) /*!< in: optimize instance */ +{ + dberr_t error; + + optim->fts_common_table.suffix = "BEING_DELETED"; + + /* Read the doc_ids to delete. */ + error = fts_table_fetch_doc_ids( + optim->trx, &optim->fts_common_table, optim->to_delete); + + if (error == DB_SUCCESS) { + + optim->fts_common_table.suffix = "BEING_DELETED_CACHE"; + + /* Read additional doc_ids to delete. */ + error = fts_table_fetch_doc_ids( + optim->trx, &optim->fts_common_table, optim->to_delete); + } + + if (error != DB_SUCCESS) { + + fts_doc_ids_free(optim->to_delete); + optim->to_delete = NULL; + } + + return(error); +} + +/*********************************************************************//** +Optimze all the FTS indexes, skipping those that have already been +optimized, since the FTS auxiliary indexes are not guaranteed to be +of the same cardinality. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_indexes( +/*=================*/ + fts_optimize_t* optim) /*!< in: optimize instance */ +{ + ulint i; + dberr_t error = DB_SUCCESS; + fts_t* fts = optim->table->fts; + + /* Optimize the FTS indexes. */ + for (i = 0; i < ib_vector_size(fts->indexes); ++i) { + dict_index_t* index; + +#ifdef FTS_OPTIMIZE_DEBUG + time_t end_time; + time_t start_time; + + /* Get the start and end optimize times for this index. */ + error = fts_optimize_get_index_start_time( + optim->trx, index, &start_time); + + if (error != DB_SUCCESS) { + break; + } + + error = fts_optimize_get_index_end_time( + optim->trx, index, &end_time); + + if (error != DB_SUCCESS) { + break; + } + + /* Start time will be 0 only for the first time or after + completing the optimization of all FTS indexes. */ + if (start_time == 0) { + start_time = time(NULL); + + error = fts_optimize_set_index_start_time( + optim->trx, index, start_time); + } + + /* Check if this index needs to be optimized or not. */ + if (difftime(end_time, start_time) < 0) { + error = fts_optimize_index(optim, index); + + if (error != DB_SUCCESS) { + break; + } + } else { + ++optim->n_completed; + } +#endif + index = static_cast<dict_index_t*>( + ib_vector_getp(fts->indexes, i)); + error = fts_optimize_index(optim, index); + } + + if (error == DB_SUCCESS) { + fts_sql_commit(optim->trx); + } else { + fts_sql_rollback(optim->trx); + } + + return(error); +} + +/*********************************************************************//** +Cleanup the snapshot tables and the master deleted table. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_purge_snapshot( +/*========================*/ + fts_optimize_t* optim) /*!< in: optimize instance */ +{ + dberr_t error; + + /* Delete the doc ids from the master deleted tables, that were + in the snapshot that was taken at the start of optimize. */ + error = fts_optimize_purge_deleted_doc_ids(optim); + + if (error == DB_SUCCESS) { + /* Destroy the deleted doc id snapshot. */ + error = fts_optimize_purge_deleted_doc_id_snapshot(optim); + } + + if (error == DB_SUCCESS) { + fts_sql_commit(optim->trx); + } else { + fts_sql_rollback(optim->trx); + } + + return(error); +} + +/*********************************************************************//** +Reset the start time to 0 so that a new optimize can be started. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_optimize_reset_start_time( +/*==========================*/ + fts_optimize_t* optim) /*!< in: optimize instance */ +{ + dberr_t error = DB_SUCCESS; +#ifdef FTS_OPTIMIZE_DEBUG + fts_t* fts = optim->table->fts; + + /* Optimization should have been completed for all indexes. */ + ut_a(optim->n_completed == ib_vector_size(fts->indexes)); + + for (uint i = 0; i < ib_vector_size(fts->indexes); ++i) { + dict_index_t* index; + + time_t start_time = 0; + + /* Reset the start time to 0 for this index. */ + error = fts_optimize_set_index_start_time( + optim->trx, index, start_time); + + index = static_cast<dict_index_t*>( + ib_vector_getp(fts->indexes, i)); + } +#endif + + if (error == DB_SUCCESS) { + fts_sql_commit(optim->trx); + } else { + fts_sql_rollback(optim->trx); + } + + return(error); +} + +/*********************************************************************//** +Run OPTIMIZE on the given table by a background thread. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull)) +dberr_t +fts_optimize_table_bk( +/*==================*/ + fts_slot_t* slot) /*!< in: table to optimiza */ +{ + const time_t now = time(NULL); + const ulint interval = ulint(now - slot->last_run); + + /* Avoid optimizing tables that were optimized recently. */ + if (slot->last_run > 0 + && lint(interval) >= 0 + && interval < FTS_OPTIMIZE_INTERVAL_IN_SECS) { + + return(DB_SUCCESS); + } + + dict_table_t* table = slot->table; + dberr_t error; + + if (table->is_accessible() + && table->fts && table->fts->cache + && table->fts->cache->deleted >= FTS_OPTIMIZE_THRESHOLD) { + error = fts_optimize_table(table); + + slot->last_run = time(NULL); + + if (error == DB_SUCCESS) { + slot->running = false; + slot->completed = slot->last_run; + } + } else { + /* Note time this run completed. */ + slot->last_run = now; + error = DB_SUCCESS; + } + + return(error); +} +/*********************************************************************//** +Run OPTIMIZE on the given table. +@return DB_SUCCESS if all OK */ +dberr_t +fts_optimize_table( +/*===============*/ + dict_table_t* table) /*!< in: table to optimiza */ +{ + if (srv_read_only_mode) { + return DB_READ_ONLY; + } + + dberr_t error = DB_SUCCESS; + fts_optimize_t* optim = NULL; + fts_t* fts = table->fts; + + if (UNIV_UNLIKELY(fts_enable_diag_print)) { + ib::info() << "FTS start optimize " << table->name; + } + + optim = fts_optimize_create(table); + + // FIXME: Call this only at the start of optimize, currently we + // rely on DB_DUPLICATE_KEY to handle corrupting the snapshot. + + /* Check whether there are still records in BEING_DELETED table */ + if (fts_optimize_being_deleted_count(optim) == 0) { + /* Take a snapshot of the deleted document ids, they are copied + to the BEING_ tables. */ + error = fts_optimize_create_deleted_doc_id_snapshot(optim); + } + + /* A duplicate error is OK, since we don't erase the + doc ids from the being deleted state until all FTS + indexes have been optimized. */ + if (error == DB_DUPLICATE_KEY) { + error = DB_SUCCESS; + } + + if (error == DB_SUCCESS) { + + /* These document ids will be filtered out during the + index optimization phase. They are in the snapshot that we + took above, at the start of the optimize. */ + error = fts_optimize_read_deleted_doc_id_snapshot(optim); + + if (error == DB_SUCCESS) { + + /* Commit the read of being deleted + doc ids transaction. */ + fts_sql_commit(optim->trx); + + /* We would do optimization only if there + are deleted records to be cleaned up */ + if (ib_vector_size(optim->to_delete->doc_ids) > 0) { + error = fts_optimize_indexes(optim); + } + + } else { + ut_a(optim->to_delete == NULL); + } + + /* Only after all indexes have been optimized can we + delete the (snapshot) doc ids in the pending delete, + and master deleted tables. */ + if (error == DB_SUCCESS + && optim->n_completed == ib_vector_size(fts->indexes)) { + + if (UNIV_UNLIKELY(fts_enable_diag_print)) { + ib::info() << "FTS_OPTIMIZE: Completed" + " Optimize, cleanup DELETED table"; + } + + if (ib_vector_size(optim->to_delete->doc_ids) > 0) { + + /* Purge the doc ids that were in the + snapshot from the snapshot tables and + the master deleted table. */ + error = fts_optimize_purge_snapshot(optim); + } + + if (error == DB_SUCCESS) { + /* Reset the start time of all the FTS indexes + so that optimize can be restarted. */ + error = fts_optimize_reset_start_time(optim); + } + } + } + + fts_optimize_free(optim); + + if (UNIV_UNLIKELY(fts_enable_diag_print)) { + ib::info() << "FTS end optimize " << table->name; + } + + return(error); +} + +/********************************************************************//** +Add the table to add to the OPTIMIZER's list. +@return new message instance */ +static +fts_msg_t* +fts_optimize_create_msg( +/*====================*/ + fts_msg_type_t type, /*!< in: type of message */ + void* ptr) /*!< in: message payload */ +{ + mem_heap_t* heap; + fts_msg_t* msg; + + heap = mem_heap_create(sizeof(*msg) + sizeof(ib_list_node_t) + 16); + msg = static_cast<fts_msg_t*>(mem_heap_alloc(heap, sizeof(*msg))); + + msg->ptr = ptr; + msg->type = type; + msg->heap = heap; + + return(msg); +} + +/** Add message to wqueue, signal thread pool*/ +static void add_msg(fts_msg_t *msg) +{ + ib_wqueue_add(fts_optimize_wq, msg, msg->heap, true); + srv_thread_pool->submit_task(&task); +} + +/** +Called by "idle" timer. Submits optimize task, which +will only recalculate is_sync_needed, in case the queue is empty. +*/ +static void timer_callback(void*) +{ + srv_thread_pool->submit_task(&task); +} + +/** Add the table to add to the OPTIMIZER's list. +@param[in] table table to add */ +void fts_optimize_add_table(dict_table_t* table) +{ + fts_msg_t* msg; + + if (!fts_optimize_wq) { + return; + } + + /* Make sure table with FTS index cannot be evicted */ + dict_sys.prevent_eviction(table); + + msg = fts_optimize_create_msg(FTS_MSG_ADD_TABLE, table); + + mysql_mutex_lock(&fts_optimize_wq->mutex); + + add_msg(msg); + + table->fts->in_queue = true; + + mysql_mutex_unlock(&fts_optimize_wq->mutex); +} + +/**********************************************************************//** +Remove the table from the OPTIMIZER's list. We do wait for +acknowledgement from the consumer of the message. */ +void +fts_optimize_remove_table( +/*======================*/ + dict_table_t* table) /*!< in: table to remove */ +{ + if (!fts_optimize_wq) + return; + + if (fts_opt_start_shutdown) + { + ib::info() << "Try to remove table " << table->name + << " after FTS optimize thread exiting."; + while (fts_optimize_wq) + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + return; + } + + mysql_mutex_lock(&fts_optimize_wq->mutex); + + if (table->fts->in_queue) + { + fts_msg_t *msg= fts_optimize_create_msg(FTS_MSG_DEL_TABLE, nullptr); + pthread_cond_t cond; + pthread_cond_init(&cond, nullptr); + msg->ptr= new(mem_heap_alloc(msg->heap, sizeof(fts_msg_del_t))) + fts_msg_del_t{table, &cond}; + add_msg(msg); + my_cond_wait(&cond, &fts_optimize_wq->mutex.m_mutex); + pthread_cond_destroy(&cond); + ut_ad(!table->fts->in_queue); + } + + mysql_mutex_unlock(&fts_optimize_wq->mutex); +} + +/** Send sync fts cache for the table. +@param[in] table table to sync */ +void +fts_optimize_request_sync_table( + dict_table_t* table) +{ + /* if the optimize system not yet initialized, return */ + if (!fts_optimize_wq) { + return; + } + + mysql_mutex_lock(&fts_optimize_wq->mutex); + + /* FTS optimizer thread is already exited */ + if (fts_opt_start_shutdown) { + ib::info() << "Try to sync table " << table->name + << " after FTS optimize thread exiting."; + } else if (table->fts->sync_message) { + /* If the table already has SYNC message in + fts_optimize_wq queue then ignore it */ + } else { + add_msg(fts_optimize_create_msg(FTS_MSG_SYNC_TABLE, table)); + table->fts->sync_message = true; + DBUG_EXECUTE_IF("fts_optimize_wq_count_check", + DBUG_ASSERT(fts_optimize_wq->length <= 1000);); + } + + mysql_mutex_unlock(&fts_optimize_wq->mutex); +} + +/** Add a table to fts_slots if it doesn't already exist. */ +static bool fts_optimize_new_table(dict_table_t* table) +{ + ut_ad(table); + + ulint i; + fts_slot_t* slot; + fts_slot_t* empty = NULL; + + /* Search for duplicates, also find a free slot if one exists. */ + for (i = 0; i < ib_vector_size(fts_slots); ++i) { + + slot = static_cast<fts_slot_t*>(ib_vector_get(fts_slots, i)); + + if (!slot->table) { + empty = slot; + } else if (slot->table == table) { + /* Already exists in our optimize queue. */ + return false; + } + } + + slot = empty ? empty : static_cast<fts_slot_t*>( + ib_vector_push(fts_slots, NULL)); + + memset(slot, 0x0, sizeof(*slot)); + + slot->table = table; + return true; +} + +/** Remove a table from fts_slots if it exists. +@param remove table to be removed from fts_slots */ +static bool fts_optimize_del_table(fts_msg_del_t *remove) +{ + const dict_table_t* table = remove->table; + ut_ad(table); + for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) { + fts_slot_t* slot; + + slot = static_cast<fts_slot_t*>(ib_vector_get(fts_slots, i)); + + if (slot->table == table) { + if (UNIV_UNLIKELY(fts_enable_diag_print)) { + ib::info() << "FTS Optimize Removing table " + << table->name; + } + + mysql_mutex_lock(&fts_optimize_wq->mutex); + table->fts->in_queue = false; + pthread_cond_signal(remove->cond); + mysql_mutex_unlock(&fts_optimize_wq->mutex); + slot->table = NULL; + return true; + } + } + + mysql_mutex_lock(&fts_optimize_wq->mutex); + pthread_cond_signal(remove->cond); + mysql_mutex_unlock(&fts_optimize_wq->mutex); + return false; +} + +/**********************************************************************//** +Calculate how many tables in fts_slots need to be optimized. +@return no. of tables to optimize */ +static ulint fts_optimize_how_many() +{ + ulint n_tables = 0; + const time_t current_time = time(NULL); + + for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) { + const fts_slot_t* slot = static_cast<const fts_slot_t*>( + ib_vector_get_const(fts_slots, i)); + if (!slot->table) { + continue; + } + + const time_t end = slot->running + ? slot->last_run : slot->completed; + ulint interval = ulint(current_time - end); + + if (lint(interval) < 0 + || interval >= FTS_OPTIMIZE_INTERVAL_IN_SECS) { + ++n_tables; + } + } + + return(n_tables); +} + +/**********************************************************************//** +Check if the total memory used by all FTS table exceeds the maximum limit. +@return true if a sync is needed, false otherwise */ +static bool fts_is_sync_needed() +{ + ulint total_memory = 0; + const time_t now = time(NULL); + double time_diff = difftime(now, last_check_sync_time); + + if (fts_need_sync || (time_diff >= 0 && time_diff < 5)) { + return(false); + } + + last_check_sync_time = now; + + for (ulint i = 0; i < ib_vector_size(fts_slots); ++i) { + const fts_slot_t* slot = static_cast<const fts_slot_t*>( + ib_vector_get_const(fts_slots, i)); + + if (!slot->table) { + continue; + } + + if (slot->table->fts && slot->table->fts->cache) { + total_memory += slot->table->fts->cache->total_size; + } + + if (total_memory > fts_max_total_cache_size) { + return(true); + } + } + + return(false); +} + +/** Sync fts cache of a table +@param[in,out] table table to be synced +@param[in] process_message processing messages from fts_optimize_wq */ +static void fts_optimize_sync_table(dict_table_t *table, + bool process_message= false) +{ + MDL_ticket* mdl_ticket= nullptr; + dict_table_t *sync_table= dict_acquire_mdl_shared<true>(table, fts_opt_thd, + &mdl_ticket); + + if (!sync_table) + return; + + if (sync_table->fts && sync_table->fts->cache && sync_table->is_accessible()) + { + fts_sync_table(sync_table, false); + if (process_message) + { + mysql_mutex_lock(&fts_optimize_wq->mutex); + sync_table->fts->sync_message = false; + mysql_mutex_unlock(&fts_optimize_wq->mutex); + } + } + + DBUG_EXECUTE_IF("ib_optimize_wq_hang", + std::this_thread::sleep_for(std::chrono::seconds(6));); + + if (mdl_ticket) + dict_table_close(sync_table, false, fts_opt_thd, mdl_ticket); +} + +/**********************************************************************//** +Optimize all FTS tables. +@return Dummy return */ +static void fts_optimize_callback(void *) +{ + ut_ad(!srv_read_only_mode); + + static ulint current; + static bool done; + static ulint n_optimize; + + if (!fts_optimize_wq || done) { + /* Possibly timer initiated callback, can come after FTS_MSG_STOP.*/ + return; + } + + static ulint n_tables = ib_vector_size(fts_slots); + + while (!done && srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) { + /* If there is no message in the queue and we have tables + to optimize then optimize the tables. */ + + if (!done + && ib_wqueue_is_empty(fts_optimize_wq) + && n_tables > 0 + && n_optimize > 0) { + + /* The queue is empty but we have tables + to optimize. */ + if (UNIV_UNLIKELY(wsrep_sst_disable_writes)) { +retry_later: + if (fts_is_sync_needed()) { + fts_need_sync = true; + } + if (n_tables) { + timer->set_time(5000, 0); + } + return; + } + + fts_slot_t* slot = static_cast<fts_slot_t*>( + ib_vector_get(fts_slots, current)); + + /* Handle the case of empty slots. */ + if (slot->table) { + slot->running = true; + fts_optimize_table_bk(slot); + } + + /* Wrap around the counter. */ + if (++current >= ib_vector_size(fts_slots)) { + n_optimize = fts_optimize_how_many(); + current = 0; + } + } else if (n_optimize == 0 + || !ib_wqueue_is_empty(fts_optimize_wq)) { + fts_msg_t* msg = static_cast<fts_msg_t*> + (ib_wqueue_nowait(fts_optimize_wq)); + /* Timeout ? */ + if (!msg) { + goto retry_later; + } + + switch (msg->type) { + case FTS_MSG_STOP: + done = true; + break; + + case FTS_MSG_ADD_TABLE: + ut_a(!done); + if (fts_optimize_new_table( + static_cast<dict_table_t*>( + msg->ptr))) { + ++n_tables; + } + break; + + case FTS_MSG_DEL_TABLE: + if (fts_optimize_del_table( + static_cast<fts_msg_del_t*>( + msg->ptr))) { + --n_tables; + } + break; + + case FTS_MSG_SYNC_TABLE: + if (UNIV_UNLIKELY(wsrep_sst_disable_writes)) { + add_msg(msg); + goto retry_later; + } + + DBUG_EXECUTE_IF( + "fts_instrument_msg_sync_sleep", + std::this_thread::sleep_for( + std::chrono::milliseconds( + 300));); + + fts_optimize_sync_table( + static_cast<dict_table_t*>(msg->ptr), + true); + break; + + default: + ut_error; + } + + mem_heap_free(msg->heap); + n_optimize = done ? 0 : fts_optimize_how_many(); + } + } + + /* Server is being shutdown, sync the data from FTS cache to disk + if needed */ + if (n_tables > 0) { + for (ulint i = 0; i < ib_vector_size(fts_slots); i++) { + fts_slot_t* slot = static_cast<fts_slot_t*>( + ib_vector_get(fts_slots, i)); + + if (slot->table) { + fts_optimize_sync_table(slot->table); + } + } + } + + ib_vector_free(fts_slots); + mysql_mutex_lock(&fts_optimize_wq->mutex); + fts_slots = NULL; + pthread_cond_broadcast(&fts_opt_shutdown_cond); + mysql_mutex_unlock(&fts_optimize_wq->mutex); + + ib::info() << "FTS optimize thread exiting."; +} + +/**********************************************************************//** +Startup the optimize thread and create the work queue. */ +void +fts_optimize_init(void) +/*===================*/ +{ + mem_heap_t* heap; + ib_alloc_t* heap_alloc; + + ut_ad(!srv_read_only_mode); + + /* For now we only support one optimize thread. */ + ut_a(!fts_optimize_wq); + + /* Create FTS optimize work queue */ + fts_optimize_wq = ib_wqueue_create(); + timer = srv_thread_pool->create_timer(timer_callback); + + /* Create FTS vector to store fts_slot_t */ + heap = mem_heap_create(sizeof(dict_table_t*) * 64); + heap_alloc = ib_heap_allocator_create(heap); + fts_slots = ib_vector_create(heap_alloc, sizeof(fts_slot_t), 4); + + fts_opt_thd = innobase_create_background_thd("InnoDB FTS optimizer"); + /* Add fts tables to fts_slots which could be skipped + during dict_load_table_one() because fts_optimize_thread + wasn't even started. */ + dict_sys.freeze(SRW_LOCK_CALL); + for (dict_table_t* table = UT_LIST_GET_FIRST(dict_sys.table_LRU); + table != NULL; + table = UT_LIST_GET_NEXT(table_LRU, table)) { + if (!table->fts || !dict_table_has_fts_index(table)) { + continue; + } + + /* fts_optimize_thread is not started yet. So there is no + need to acquire fts_optimize_wq->mutex for adding the fts + table to the fts slots. */ + ut_ad(!table->can_be_evicted); + fts_optimize_new_table(table); + table->fts->in_queue = true; + } + dict_sys.unfreeze(); + + pthread_cond_init(&fts_opt_shutdown_cond, nullptr); + last_check_sync_time = time(NULL); +} + +/** Shutdown fts optimize thread. */ +void +fts_optimize_shutdown() +{ + ut_ad(!srv_read_only_mode); + + /* If there is an ongoing activity on dictionary, such as + srv_master_evict_from_table_cache(), wait for it */ + dict_sys.freeze(SRW_LOCK_CALL); + mysql_mutex_lock(&fts_optimize_wq->mutex); + /* Tells FTS optimizer system that we are exiting from + optimizer thread, message send their after will not be + processed */ + fts_opt_start_shutdown = true; + dict_sys.unfreeze(); + + /* We tell the OPTIMIZE thread to switch to state done, we + can't delete the work queue here because the add thread needs + deregister the FTS tables. */ + timer->disarm(); + task_group.cancel_pending(&task); + + add_msg(fts_optimize_create_msg(FTS_MSG_STOP, nullptr)); + + while (fts_slots) { + my_cond_wait(&fts_opt_shutdown_cond, + &fts_optimize_wq->mutex.m_mutex); + } + + destroy_background_thd(fts_opt_thd); + fts_opt_thd = NULL; + pthread_cond_destroy(&fts_opt_shutdown_cond); + mysql_mutex_unlock(&fts_optimize_wq->mutex); + + ib_wqueue_free(fts_optimize_wq); + fts_optimize_wq = NULL; + + delete timer; + timer = NULL; +} + +/** Sync the table during commit phase +@param[in] table table to be synced */ +void fts_sync_during_ddl(dict_table_t* table) +{ + if (!fts_optimize_wq) + return; + mysql_mutex_lock(&fts_optimize_wq->mutex); + const auto sync_message= table->fts->sync_message; + mysql_mutex_unlock(&fts_optimize_wq->mutex); + if (!sync_message) + return; + + fts_sync_table(table, false); + + mysql_mutex_lock(&fts_optimize_wq->mutex); + table->fts->sync_message = false; + mysql_mutex_unlock(&fts_optimize_wq->mutex); +} diff --git a/storage/innobase/fts/fts0pars.cc b/storage/innobase/fts/fts0pars.cc new file mode 100644 index 00000000..cb51784a --- /dev/null +++ b/storage/innobase/fts/fts0pars.cc @@ -0,0 +1,2007 @@ +/* A Bison parser, made by GNU Bison 2.5. */ + +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output. */ +#define YYBISON 1 + +/* Bison version. */ +#define YYBISON_VERSION "2.5" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 1 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + +/* Using locations. */ +#define YYLSP_NEEDED 0 + +/* Substitute the variable and function names. */ +#define yyparse ftsparse +#define yylex ftslex +#define yyerror ftserror +#define yylval ftslval +#define yychar ftschar +#define yydebug ftsdebug +#define yynerrs ftsnerrs + + +/* Copy the first part of user declarations. */ + +/* Line 268 of yacc.c */ +#line 26 "fts0pars.y" + +#include "ha_prototypes.h" +#include "mem0mem.h" +#include "fts0ast.h" +#include "fts0blex.h" +#include "fts0tlex.h" +#include "fts0pars.h" +#include <my_sys.h> +extern int fts_lexer(YYSTYPE*, fts_lexer_t*); +extern int fts_blexer(YYSTYPE*, yyscan_t); +extern int fts_tlexer(YYSTYPE*, yyscan_t); +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wpragmas" +# pragma GCC diagnostic ignored "-Wunknown-warning-option" +# pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif +extern int ftserror(const char* p); +/* Required for reentrant parser */ +#define ftslex fts_lexer + +#define YYERROR_VERBOSE + +/* For passing an argument to yyparse() */ +#define YYPARSE_PARAM state +#define YYLEX_PARAM ((fts_ast_state_t*) state)->lexer + +#define YYTOKENFREE(token) fts_ast_string_free((token)) + + +typedef int (*fts_scanner)(YYSTYPE* val, yyscan_t yyscanner); + +struct fts_lexer_t { + fts_scanner scanner; + void* yyscanner; +}; + + + +/* Line 268 of yacc.c */ +#line 115 "fts0pars.cc" + +/* Enabling traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif + +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 +#endif + +/* Enabling the token table. */ +#ifndef YYTOKEN_TABLE +# define YYTOKEN_TABLE 0 +#endif + + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + FTS_OPER = 258, + FTS_TEXT = 259, + FTS_TERM = 260, + FTS_NUMB = 261 + }; +#endif + + + +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef union YYSTYPE +{ + +/* Line 293 of yacc.c */ +#line 61 "fts0pars.y" + + int oper; + fts_ast_string_t* token; + fts_ast_node_t* node; + + + +/* Line 293 of yacc.c */ +#line 165 "fts0pars.cc" +} YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +#endif + + +/* Copy the second part of user declarations. */ + + +/* Line 343 of yacc.c */ +#line 177 "fts0pars.cc" + +#ifdef short +# undef short +#endif + +#ifdef YYTYPE_UINT8 +typedef YYTYPE_UINT8 yytype_uint8; +#else +typedef unsigned char yytype_uint8; +#endif + +#ifdef YYTYPE_INT8 +typedef YYTYPE_INT8 yytype_int8; +#elif (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +typedef signed char yytype_int8; +#else +typedef short int yytype_int8; +#endif + +#ifdef YYTYPE_UINT16 +typedef YYTYPE_UINT16 yytype_uint16; +#else +typedef unsigned short int yytype_uint16; +#endif + +#ifdef YYTYPE_INT16 +typedef YYTYPE_INT16 yytype_int16; +#else +typedef short int yytype_int16; +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned int +# endif +#endif + +#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include <libintl.h> /* INFRINGES ON USER NAME SPACE */ +# define YY_(msgid) dgettext ("bison-runtime", msgid) +# endif +# endif +# ifndef YY_ +# define YY_(msgid) msgid +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YYUSE(e) ((void) (e)) +#else +# define YYUSE(e) /* empty */ +#endif + +/* Identity function, used to suppress warnings about constant conditions. */ +#ifndef lint +# define YYID(n) (n) +#else +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static int +YYID (int yyi) +#else +static int +YYID (yyi) + int yyi; +#endif +{ + return yyi; +} +#endif + +#if ! defined yyoverflow || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include <alloca.h> /* INFRINGES ON USER NAME SPACE */ +# elif defined _MSC_VER +# include <malloc.h> /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's `empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ + + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yytype_int16 yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (YYID (0)) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from FROM to TO. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(To, From, Count) \ + __builtin_memcpy (To, From, (Count) * sizeof (*(From))) +# else +# define YYCOPY(To, From, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (To)[yyi] = (From)[yyi]; \ + } \ + while (YYID (0)) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 3 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 52 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 16 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 8 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 24 +/* YYNRULES -- Number of states. */ +#define YYNSTATES 33 + +/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 261 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ +static const yytype_uint8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 12, 13, 14, 7, 2, 8, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 10, 2, 11, 2, 15, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6 +}; + +#if YYDEBUG +/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in + YYRHS. */ +static const yytype_uint8 yyprhs[] = +{ + 0, 0, 3, 5, 6, 9, 12, 16, 21, 23, + 25, 28, 32, 36, 39, 44, 47, 49, 51, 53, + 55, 57, 59, 61, 64 +}; + +/* YYRHS -- A `-1'-separated list of the rules' RHS. */ +static const yytype_int8 yyrhs[] = +{ + 17, 0, -1, 18, -1, -1, 18, 20, -1, 18, + 19, -1, 12, 18, 13, -1, 21, 12, 18, 13, + -1, 22, -1, 23, -1, 22, 14, -1, 23, 15, + 6, -1, 21, 22, 14, -1, 21, 22, -1, 21, + 23, 15, 6, -1, 21, 23, -1, 8, -1, 7, + -1, 9, -1, 10, -1, 11, -1, 5, -1, 6, + -1, 14, 22, -1, 4, -1 +}; + +/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ +static const yytype_uint8 yyrline[] = +{ + 0, 79, 79, 85, 89, 99, 111, 119, 129, 133, + 137, 141, 146, 152, 157, 164, 170, 174, 178, 182, + 186, 191, 196, 202, 207 +}; +#endif + +#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "FTS_OPER", "FTS_TEXT", "FTS_TERM", + "FTS_NUMB", "'+'", "'-'", "'~'", "'<'", "'>'", "'('", "')'", "'*'", + "'@'", "$accept", "query", "expr_lst", "sub_expr", "expr", "prefix", + "term", "text", 0 +}; +#endif + +# ifdef YYPRINT +/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to + token YYLEX-NUM. */ +static const yytype_uint16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 43, 45, 126, + 60, 62, 40, 41, 42, 64 +}; +# endif + +/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const yytype_uint8 yyr1[] = +{ + 0, 16, 17, 18, 18, 18, 19, 19, 20, 20, + 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, + 21, 22, 22, 22, 23 +}; + +/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ +static const yytype_uint8 yyr2[] = +{ + 0, 2, 1, 0, 2, 2, 3, 4, 1, 1, + 2, 3, 3, 2, 4, 2, 1, 1, 1, 1, + 1, 1, 1, 2, 1 +}; + +/* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE doesn't specify something else to do. Zero + means the default is an error. */ +static const yytype_uint8 yydefact[] = +{ + 3, 0, 2, 1, 24, 21, 22, 17, 16, 18, + 19, 20, 3, 0, 5, 4, 0, 8, 9, 0, + 23, 3, 13, 15, 10, 0, 6, 0, 12, 0, + 11, 7, 14 +}; + +/* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + -1, 1, 2, 14, 15, 16, 17, 18 +}; + +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +#define YYPACT_NINF -5 +static const yytype_int8 yypact[] = +{ + -5, 38, 18, -5, -5, -5, -5, -5, -5, -5, + -5, -5, -5, 31, -5, -5, 29, 30, 32, -4, + -5, -5, 34, 35, -5, 40, -5, 7, -5, 43, + -5, -5, -5 +}; + +/* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -5, -5, 19, -5, -5, -5, 26, 36 +}; + +/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule which + number is the opposite. If YYTABLE_NINF, syntax error. */ +#define YYTABLE_NINF -1 +static const yytype_uint8 yytable[] = +{ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 26, + 13, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 31, 13, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 19, 13, 4, 5, 6, 5, 6, 3, 20, + 27, 21, 22, 13, 24, 13, 30, 25, 28, 32, + 29, 0, 23 +}; + +#define yypact_value_is_default(yystate) \ + ((yystate) == (-5)) + +#define yytable_value_is_error(yytable_value) \ + YYID (0) + +static const yytype_int8 yycheck[] = +{ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 12, 14, 4, 5, 6, 5, 6, 0, 13, + 21, 12, 16, 14, 14, 14, 6, 15, 14, 6, + 15, -1, 16 +}; + +/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const yytype_uint8 yystos[] = +{ + 0, 17, 18, 0, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 14, 19, 20, 21, 22, 23, 18, + 22, 12, 22, 23, 14, 15, 13, 18, 14, 15, + 6, 13, 6 +}; + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY (-2) +#define YYEOF 0 + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab + + +/* Like YYERROR except do call yyerror. This remains here temporarily + to ease the transition to the new meaning of YYERROR, for GCC. + Once GCC version 2 has supplanted version 1, this can go. However, + YYFAIL appears to be in use. Nevertheless, it is formally deprecated + in Bison 2.4.2's NEWS entry, where a plan to phase it out is + discussed. */ + +#define YYFAIL goto yyerrlab +#if defined YYFAIL + /* This is here to suppress warnings from the GCC cpp's + -Wunused-macros. Normally we don't worry about that warning, but + some users do, and we want to make it easy for users to remove + YYFAIL uses, which will produce warnings from Bison 2.5. */ +#endif + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY && yylen == 1) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (1); \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ +while (YYID (0)) + + +#define YYTERROR 1 +#define YYERRCODE 256 + +#define YYERRCLEANUP \ +do \ + switch (yylastchar) \ + { \ + case FTS_NUMB: \ + case FTS_TEXT: \ + case FTS_TERM: \ + YYTOKENFREE(yylval.token); \ + break; \ + default: \ + break; \ + } \ +while (YYID (0)) + +/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. + If N is 0, then set CURRENT to the empty location which ends + the previous symbol: RHS[0] (always defined). */ + +#define YYRHSLOC(Rhs, K) ((Rhs)[K]) +#ifndef YYLLOC_DEFAULT +# define YYLLOC_DEFAULT(Current, Rhs, N) \ + do \ + if (YYID (N)) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC (Rhs, 0).last_column; \ + } \ + while (YYID (0)) +#endif + + +/* This macro is provided for backward compatibility. */ + +#ifndef YY_LOCATION_PRINT +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +#endif + + +/* YYLEX -- calling `yylex' with the right arguments. */ + +#ifdef YYLEX_PARAM +# define YYLEX yylex (&yylval, YYLEX_PARAM) +#else +# define YYLEX yylex (&yylval) +#endif + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (YYID (0)) + +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Type, Value); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (YYID (0)) + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +/*ARGSUSED*/ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) +#else +static void +yy_symbol_value_print (yyoutput, yytype, yyvaluep) + FILE *yyoutput; + int yytype; + YYSTYPE const * const yyvaluep; +#endif +{ + if (!yyvaluep) + return; +# ifdef YYPRINT + if (yytype < YYNTOKENS) + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# else + YYUSE (yyoutput); +# endif + switch (yytype) + { + default: + break; + } +} + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) +#else +static void +yy_symbol_print (yyoutput, yytype, yyvaluep) + FILE *yyoutput; + int yytype; + YYSTYPE const * const yyvaluep; +#endif +{ + if (yytype < YYNTOKENS) + YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); + else + YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); + + yy_symbol_value_print (yyoutput, yytype, yyvaluep); + YYFPRINTF (yyoutput, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) +#else +static void +yy_stack_print (yybottom, yytop) + yytype_int16 *yybottom; + yytype_int16 *yytop; +#endif +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (YYID (0)) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_reduce_print (YYSTYPE *yyvsp, int yyrule) +#else +static void +yy_reduce_print (yyvsp, yyrule) + YYSTYPE *yyvsp; + int yyrule; +#endif +{ + int yynrhs = yyr2[yyrule]; + int yyi; + unsigned long int yylno = yyrline[yyrule]; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], + &(yyvsp[(yyi + 1) - (yynrhs)]) + ); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyvsp, Rule); \ +} while (YYID (0)) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static YYSIZE_T +yystrlen (const char *yystr) +#else +static YYSIZE_T +yystrlen (yystr) + const char *yystr; +#endif +{ + YYSIZE_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +# endif + +# ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static char * +yystpcpy (char *yydest, const char *yysrc) +#else +static char * +yystpcpy (yydest, yysrc) + char *yydest; + const char *yysrc; +#endif +{ + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +# endif + +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYSIZE_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYSIZE_T yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (! yyres) + return yystrlen (yystr); + + return yystpcpy (yyres, yystr) - yyres; +} +# endif + +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN for the state stack whose top is + YYSSP. + + Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is + not large enough to hold the message. In that case, also set + *YYMSG_ALLOC to the required number of bytes. Return 2 if the + required number of bytes is too large to store. */ +static int +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, + yytype_int16 *yyssp, int yytoken) +{ + YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]); + YYSIZE_T yysize = yysize0; + YYSIZE_T yysize1; + enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; + /* Internationalized format string. */ + const char *yyformat = 0; + /* Arguments of yyformat. */ + char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; + /* Number of reported tokens (one for the "unexpected", one per + "expected"). */ + int yycount = 0; + + /* There are many possibilities here to consider: + - Assume YYFAIL is not used. It's too flawed to consider. See + <http://lists.gnu.org/archive/html/bison-patches/2009-12/msg00024.html> + for details. YYERROR is fine as it does not invoke this + function. + - If this state is a consistent state with a default action, then + the only way this function was invoked is if the default action + is an error action. In that case, don't check for expected + tokens because there are none. + - The only way there can be no lookahead present (in yychar) is if + this state is a consistent state with a default action. Thus, + detecting the absence of a lookahead is sufficient to determine + that there is no unexpected or expected token to report. In that + case, just report a simple "syntax error". + - Don't assume there isn't a lookahead just because this state is a + consistent state with a default action. There might have been a + previous inconsistent state, consistent state with a non-default + action, or user semantic action that manipulated yychar. + - Of course, the expected token list depends on states to have + correct lookahead information, and it depends on the parser not + to perform extra reductions after fetching a lookahead from the + scanner and before detecting a syntax error. Thus, state merging + (from LALR or IELR) and default reductions corrupt the expected + token list. However, the list is correct for canonical LR with + one exception: it will still contain any token that will not be + accepted due to an error action in a later state. + */ + if (yytoken != YYEMPTY) + { + int yyn = yypact[*yyssp]; + yyarg[yycount++] = yytname[yytoken]; + if (!yypact_value_is_default (yyn)) + { + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. In other words, skip the first -YYN actions for + this state because they are default actions. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR + && !yytable_value_is_error (yytable[yyx + yyn])) + { + if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) + { + yycount = 1; + yysize = yysize0; + break; + } + yyarg[yycount++] = yytname[yyx]; + yysize1 = yysize + yytnamerr (0, yytname[yyx]); + if (! (yysize <= yysize1 + && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + } + } + } + + switch (yycount) + { +# define YYCASE_(N, S) \ + case N: \ + yyformat = S; \ + break + YYCASE_(0, YY_("syntax error")); + YYCASE_(1, YY_("syntax error, unexpected %s")); + YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); + YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); + YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); + YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); +# undef YYCASE_ + } + + yysize1 = yysize + yystrlen (yyformat); + if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + return 2; + yysize = yysize1; + + if (*yymsg_alloc < yysize) + { + *yymsg_alloc = 2 * yysize; + if (! (yysize <= *yymsg_alloc + && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) + *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; + return 1; + } + + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyformat += 2; + } + else + { + yyp++; + yyformat++; + } + } + return 0; +} +#endif /* YYERROR_VERBOSE */ + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +/*ARGSUSED*/ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) +#else +static void +yydestruct (yymsg, yytype, yyvaluep) + const char *yymsg; + int yytype; + YYSTYPE *yyvaluep; +#endif +{ + YYUSE (yyvaluep); + + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + + switch (yytype) + { + + default: + break; + } +} + + +/* Prevent warnings from -Wmissing-prototypes. */ +#ifdef YYPARSE_PARAM +#if defined __STDC__ || defined __cplusplus +int yyparse (void *YYPARSE_PARAM); +#else +int yyparse (); +#endif +#else /* ! YYPARSE_PARAM */ +#if defined __STDC__ || defined __cplusplus +int yyparse (void); +#else +int yyparse (); +#endif +#endif /* ! YYPARSE_PARAM */ + + +/*----------. +| yyparse. | +`----------*/ + +#ifdef YYPARSE_PARAM +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +int +yyparse (void *YYPARSE_PARAM) +#else +int +yyparse (YYPARSE_PARAM) + void *YYPARSE_PARAM; +#endif +#else /* ! YYPARSE_PARAM */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +int +yyparse (void) +#else +int +yyparse () + +#endif +#endif +{ +/* The lookahead symbol. */ +int yychar; +/* The backup of yychar when there is an error and we're in yyerrlab. */ +int yylastchar; + +/* The semantic value of the lookahead symbol. */ +YYSTYPE yylval; + + /* Number of syntax errors so far. */ + int yynerrs; + + int yystate; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + + /* The stacks and their tools: + `yyss': related to states. + `yyvs': related to semantic values. + + Refer to the stacks thru separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* The state stack. */ + yytype_int16 yyssa[YYINITDEPTH]; + yytype_int16 *yyss; + yytype_int16 *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs; + YYSTYPE *yyvsp; + + YYSIZE_T yystacksize; + + int yyn; + int yyresult; + /* Lookahead token as an internal (translated) token number. */ + int yytoken; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + +#if YYERROR_VERBOSE + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYSIZE_T yymsg_alloc = sizeof yymsgbuf; +#endif + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + yytoken = 0; + yyss = yyssa; + yyvs = yyvsa; + yystacksize = YYINITDEPTH; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + + /* Initialize stack pointers. + Waste one element of value and location stack + so that they stay on the same level as the state stack. + The wasted elements are never initialized. */ + yyssp = yyss; + yyvsp = yyvs; + + goto yysetstate; + +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + yysetstate: + *yyssp = yystate; + + if (yyss + yystacksize - 1 <= yyssp) + { + /* Get the current used size of the three stacks, in elements. */ + YYSIZE_T yysize = yyssp - yyss + 1; + +#ifdef yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + yytype_int16 *yyss1 = yyss; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } +#else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyexhaustedlab; +# else + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyexhaustedlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yytype_int16 *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyexhaustedlab; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif +#endif /* no yyoverflow */ + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } + + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token: ")); + yychar = YYLEX; + } + + if (yychar <= YYEOF) + { + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + + /* Discard the shifted token. */ + yychar = YYEMPTY; + + yystate = yyn; + *++yyvsp = yylval; + + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + `$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 2: + +/* Line 1806 of yacc.c */ +#line 79 "fts0pars.y" + { + (yyval.node) = (yyvsp[(1) - (1)].node); + ((fts_ast_state_t*) state)->root = (yyval.node); + } + break; + + case 3: + +/* Line 1806 of yacc.c */ +#line 85 "fts0pars.y" + { + (yyval.node) = NULL; + } + break; + + case 4: + +/* Line 1806 of yacc.c */ +#line 89 "fts0pars.y" + { + (yyval.node) = (yyvsp[(1) - (2)].node); + + if (!(yyval.node)) { + (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(2) - (2)].node)); + } else { + fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); + } + } + break; + + case 5: + +/* Line 1806 of yacc.c */ +#line 99 "fts0pars.y" + { + (yyval.node) = (yyvsp[(1) - (2)].node); + (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); + + if (!(yyval.node)) { + (yyval.node) = (yyvsp[(2) - (2)].node); + } else { + fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); + } + } + break; + + case 6: + +/* Line 1806 of yacc.c */ +#line 111 "fts0pars.y" + { + (yyval.node) = (yyvsp[(2) - (3)].node); + + if ((yyval.node)) { + (yyval.node) = fts_ast_create_node_subexp_list(state, (yyval.node)); + } + } + break; + + case 7: + +/* Line 1806 of yacc.c */ +#line 119 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node)); + + if ((yyvsp[(3) - (4)].node)) { + fts_ast_add_node((yyval.node), + fts_ast_create_node_subexp_list(state, (yyvsp[(3) - (4)].node))); + } + } + break; + + case 8: + +/* Line 1806 of yacc.c */ +#line 129 "fts0pars.y" + { + (yyval.node) = (yyvsp[(1) - (1)].node); + } + break; + + case 9: + +/* Line 1806 of yacc.c */ +#line 133 "fts0pars.y" + { + (yyval.node) = (yyvsp[(1) - (1)].node); + } + break; + + case 10: + +/* Line 1806 of yacc.c */ +#line 137 "fts0pars.y" + { + fts_ast_term_set_wildcard((yyvsp[(1) - (2)].node)); + } + break; + + case 11: + +/* Line 1806 of yacc.c */ +#line 141 "fts0pars.y" + { + fts_ast_text_set_distance((yyvsp[(1) - (3)].node), fts_ast_string_to_ul((yyvsp[(3) - (3)].token), 10)); + fts_ast_string_free((yyvsp[(3) - (3)].token)); + } + break; + + case 12: + +/* Line 1806 of yacc.c */ +#line 146 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (3)].node)); + fts_ast_add_node((yyval.node), (yyvsp[(2) - (3)].node)); + fts_ast_term_set_wildcard((yyvsp[(2) - (3)].node)); + } + break; + + case 13: + +/* Line 1806 of yacc.c */ +#line 152 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); + fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); + } + break; + + case 14: + +/* Line 1806 of yacc.c */ +#line 157 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node)); + fts_ast_add_node((yyval.node), (yyvsp[(2) - (4)].node)); + fts_ast_text_set_distance((yyvsp[(2) - (4)].node), fts_ast_string_to_ul((yyvsp[(4) - (4)].token), 10)); + fts_ast_string_free((yyvsp[(4) - (4)].token)); + } + break; + + case 15: + +/* Line 1806 of yacc.c */ +#line 164 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); + fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); + } + break; + + case 16: + +/* Line 1806 of yacc.c */ +#line 170 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_oper(state, FTS_IGNORE); + } + break; + + case 17: + +/* Line 1806 of yacc.c */ +#line 174 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_oper(state, FTS_EXIST); + } + break; + + case 18: + +/* Line 1806 of yacc.c */ +#line 178 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_oper(state, FTS_NEGATE); + } + break; + + case 19: + +/* Line 1806 of yacc.c */ +#line 182 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_oper(state, FTS_DECR_RATING); + } + break; + + case 20: + +/* Line 1806 of yacc.c */ +#line 186 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_oper(state, FTS_INCR_RATING); + } + break; + + case 21: + +/* Line 1806 of yacc.c */ +#line 191 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token)); + fts_ast_string_free((yyvsp[(1) - (1)].token)); + } + break; + + case 22: + +/* Line 1806 of yacc.c */ +#line 196 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token)); + fts_ast_string_free((yyvsp[(1) - (1)].token)); + } + break; + + case 23: + +/* Line 1806 of yacc.c */ +#line 202 "fts0pars.y" + { + (yyval.node) = (yyvsp[(2) - (2)].node); + } + break; + + case 24: + +/* Line 1806 of yacc.c */ +#line 207 "fts0pars.y" + { + (yyval.node) = fts_ast_create_node_text(state, (yyvsp[(1) - (1)].token)); + fts_ast_string_free((yyvsp[(1) - (1)].token)); + } + break; + + + +/* Line 1806 of yacc.c */ +#line 1663 "fts0pars.cc" + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + + *++yyvsp = yyval; + + /* Now `shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTOKENS]; + + goto yynewstate; + + +/*------------------------------------. +| yyerrlab -- here on detecting error | +`------------------------------------*/ +yyerrlab: + /* Backup yychar, in case we would change it. */ + yylastchar = yychar; + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); + + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; +#if ! YYERROR_VERBOSE + yyerror (YY_("syntax error")); +#else +# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ + yyssp, yytoken) + { + char const *yymsgp = YY_("syntax error"); + int yysyntax_error_status; + yysyntax_error_status = YYSYNTAX_ERROR; + if (yysyntax_error_status == 0) + yymsgp = yymsg; + else if (yysyntax_error_status == 1) + { + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc); + if (!yymsg) + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + yysyntax_error_status = 2; + } + else + { + yysyntax_error_status = YYSYNTAX_ERROR; + yymsgp = yymsg; + } + } + yyerror (yymsgp); + if (yysyntax_error_status == 2) + goto yyexhaustedlab; + } +# undef YYSYNTAX_ERROR +#endif + } + + + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + { + /* Since we don't need the token, we have to free it first. */ + YYERRCLEANUP; + YYABORT; + } + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + + /* Pacify compilers like GCC when the user code never invokes + YYERROR and the label yyerrorlab therefore never appears in user + code. */ + if (/*CONSTCOND*/ 0) + goto yyerrorlab; + + /* Do not reclaim the symbols of the rule which action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + { + /* Since we don't need the error token, we have to free it first. */ + YYERRCLEANUP; + YYABORT; + } + + + yydestruct ("Error: popping", + yystos[yystate], yyvsp); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + *++yyvsp = yylval; + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#if !defined(yyoverflow) || YYERROR_VERBOSE +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ +yyexhaustedlab: + yyerror (YY_("memory exhausted")); + yyresult = 2; + /* Fall through. */ +#endif + +yyreturn: + if (yychar != YYEMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval); + } + /* Do not reclaim the symbols of the rule which action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + yystos[*yyssp], yyvsp); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif +#if YYERROR_VERBOSE + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); +#endif + /* Make sure YYID is used. */ + return YYID (yyresult); +} + + + +/* Line 2067 of yacc.c */ +#line 212 "fts0pars.y" + + +/******************************************************************** +*/ +int +ftserror( +/*=====*/ + const char* p) +{ + my_printf_error(ER_PARSE_ERROR, "%s", MYF(0), p); + return(0); +} + +/******************************************************************** +Create a fts_lexer_t instance.*/ +fts_lexer_t* +fts_lexer_create( +/*=============*/ + ibool boolean_mode, + const byte* query, + ulint query_len) +{ + fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>( + ut_malloc_nokey(sizeof(fts_lexer_t))); + + if (boolean_mode) { + fts0blex_init(&fts_lexer->yyscanner); + fts0b_scan_bytes( + reinterpret_cast<const char*>(query), + static_cast<int>(query_len), + fts_lexer->yyscanner); + fts_lexer->scanner = fts_blexer; + /* FIXME: Debugging */ + /* fts0bset_debug(1 , fts_lexer->yyscanner); */ + } else { + fts0tlex_init(&fts_lexer->yyscanner); + fts0t_scan_bytes( + reinterpret_cast<const char*>(query), + static_cast<int>(query_len), + fts_lexer->yyscanner); + fts_lexer->scanner = fts_tlexer; + } + + return(fts_lexer); +} + +/******************************************************************** +Free an fts_lexer_t instance.*/ +void + +fts_lexer_free( +/*===========*/ + fts_lexer_t* fts_lexer) +{ + if (fts_lexer->scanner == fts_blexer) { + fts0blex_destroy(fts_lexer->yyscanner); + } else { + fts0tlex_destroy(fts_lexer->yyscanner); + } + + ut_free(fts_lexer); +} + +/******************************************************************** +Call the appropaiate scanner.*/ +int +fts_lexer( +/*======*/ + YYSTYPE* val, + fts_lexer_t* fts_lexer) +{ + fts_scanner func_ptr; + + func_ptr = fts_lexer->scanner; + + return(func_ptr(val, fts_lexer->yyscanner)); +} + +/******************************************************************** +Parse the query.*/ +int +fts_parse( +/*======*/ + fts_ast_state_t* state) +{ + return(ftsparse(state)); +} + diff --git a/storage/innobase/fts/fts0pars.y b/storage/innobase/fts/fts0pars.y new file mode 100644 index 00000000..903c7280 --- /dev/null +++ b/storage/innobase/fts/fts0pars.y @@ -0,0 +1,293 @@ +/***************************************************************************** + +Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/** + * @file fts/fts0pars.y + * FTS parser: input file for the GNU Bison parser generator + * + * Created 2007/5/9 Sunny Bains + */ + +%{ +#include "ha_prototypes.h" +#include "mem0mem.h" +#include "fts0ast.h" +#include "fts0blex.h" +#include "fts0tlex.h" +#include "fts0pars.h" +#include <my_sys.h> +extern int fts_lexer(YYSTYPE*, fts_lexer_t*); +extern int fts_blexer(YYSTYPE*, yyscan_t); +extern int fts_tlexer(YYSTYPE*, yyscan_t); +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wpragmas" +# pragma GCC diagnostic ignored "-Wunknown-warning-option" +# pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif +extern int ftserror(const char* p); +/* Required for reentrant parser */ +#define ftslex fts_lexer + +#define YYERROR_VERBOSE + +/* For passing an argument to yyparse() */ +#define YYPARSE_PARAM state +#define YYLEX_PARAM ((fts_ast_state_t*) state)->lexer + + +typedef int (*fts_scanner)(YYSTYPE* val, yyscan_t yyscanner); + +struct fts_lexer_struct { + fts_scanner scanner; + void* yyscanner; +}; + +%} + +%union { + int oper; + fts_ast_string_t* token; + fts_ast_node_t* node; +}; + +/* Enable re-entrant parser */ +%pure_parser + +%token<oper> FTS_OPER +%token<token> FTS_TEXT FTS_TERM FTS_NUMB + +%type<node> prefix term text expr sub_expr expr_lst query + +%nonassoc '+' '-' '~' '<' '>' + +%% + +query : expr_lst { + $$ = $1; + ((fts_ast_state_t*) state)->root = $$; + } + ; + +expr_lst: /* Empty */ { + $$ = NULL; + } + + | expr_lst expr { + $$ = $1; + + if (!$$) { + $$ = fts_ast_create_node_list(state, $2); + } else { + fts_ast_add_node($$, $2); + } + } + + | expr_lst sub_expr { + $$ = $1; + $$ = fts_ast_create_node_list(state, $1); + + if (!$$) { + $$ = $2; + } else { + fts_ast_add_node($$, $2); + } + } + ; + +sub_expr: '(' expr_lst ')' { + $$ = $2; + + if ($$) { + $$ = fts_ast_create_node_subexp_list(state, $$); + } + } + + | prefix '(' expr_lst ')' { + $$ = fts_ast_create_node_list(state, $1); + + if ($3) { + fts_ast_add_node($$, + fts_ast_create_node_subexp_list(state, $3)); + } + } + ; + +expr : term { + $$ = $1; + } + + | text { + $$ = $1; + } + + | term '*' { + fts_ast_term_set_wildcard($1); + } + + | text '@' FTS_NUMB { + fts_ast_text_set_distance($1, fts_ast_string_to_ul($3, 10)); + fts_ast_string_free($3); + } + + | prefix term '*' { + $$ = fts_ast_create_node_list(state, $1); + fts_ast_add_node($$, $2); + fts_ast_term_set_wildcard($2); + } + + | prefix term { + $$ = fts_ast_create_node_list(state, $1); + fts_ast_add_node($$, $2); + } + + | prefix text '@' FTS_NUMB { + $$ = fts_ast_create_node_list(state, $1); + fts_ast_add_node($$, $2); + fts_ast_text_set_distance($2, fts_ast_string_to_ul($4, 10)); + fts_ast_string_free($4); + } + + | prefix text { + $$ = fts_ast_create_node_list(state, $1); + fts_ast_add_node($$, $2); + } + ; + +prefix : '-' { + $$ = fts_ast_create_node_oper(state, FTS_IGNORE); + } + + | '+' { + $$ = fts_ast_create_node_oper(state, FTS_EXIST); + } + + | '~' { + $$ = fts_ast_create_node_oper(state, FTS_NEGATE); + } + + | '<' { + $$ = fts_ast_create_node_oper(state, FTS_DECR_RATING); + } + + | '>' { + $$ = fts_ast_create_node_oper(state, FTS_INCR_RATING); + } + ; + +term : FTS_TERM { + $$ = fts_ast_create_node_term(state, $1); + fts_ast_string_free($1); + } + + | FTS_NUMB { + $$ = fts_ast_create_node_term(state, $1); + fts_ast_string_free($1); + } + + /* Ignore leading '*' */ + | '*' term { + $$ = $2; + } + ; + +text : FTS_TEXT { + $$ = fts_ast_create_node_text(state, $1); + fts_ast_string_free($1); + } + ; +%% + +/******************************************************************** +*/ +int +ftserror( +/*=====*/ + const char* p) +{ + fprintf(stderr, "%s\n", p); + return(0); +} + +/******************************************************************** +Create a fts_lexer_t instance.*/ +fts_lexer_t* +fts_lexer_create( +/*=============*/ + ibool boolean_mode, + const byte* query, + ulint query_len) +{ + fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>( + ut_malloc_nokey(sizeof(fts_lexer_t))); + + if (boolean_mode) { + fts0blex_init(&fts_lexer->yyscanner); + fts0b_scan_bytes((char*) query, (int) query_len, fts_lexer->yyscanner); + fts_lexer->scanner = fts_blexer; + /* FIXME: Debugging */ + /* fts0bset_debug(1 , fts_lexer->yyscanner); */ + } else { + fts0tlex_init(&fts_lexer->yyscanner); + fts0t_scan_bytes((char*) query, (int) query_len, fts_lexer->yyscanner); + fts_lexer->scanner = fts_tlexer; + } + + return(fts_lexer); +} + +/******************************************************************** +Free an fts_lexer_t instance.*/ +void + +fts_lexer_free( +/*===========*/ + fts_lexer_t* fts_lexer) +{ + if (fts_lexer->scanner == fts_blexer) { + fts0blex_destroy(fts_lexer->yyscanner); + } else { + fts0tlex_destroy(fts_lexer->yyscanner); + } + + ut_free(fts_lexer); +} + +/******************************************************************** +Call the appropaiate scanner.*/ +int +fts_lexer( +/*======*/ + YYSTYPE* val, + fts_lexer_t* fts_lexer) +{ + fts_scanner func_ptr; + + func_ptr = fts_lexer->scanner; + + return(func_ptr(val, fts_lexer->yyscanner)); +} + +/******************************************************************** +Parse the query.*/ +int +fts_parse( +/*======*/ + fts_ast_state_t* state) +{ + return(ftsparse(state)); +} diff --git a/storage/innobase/fts/fts0plugin.cc b/storage/innobase/fts/fts0plugin.cc new file mode 100644 index 00000000..de99d170 --- /dev/null +++ b/storage/innobase/fts/fts0plugin.cc @@ -0,0 +1,283 @@ +/***************************************************************************** + +Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file fts/fts0plugin.cc +Full Text Search plugin support. + +Created 2013/06/04 Shaohua Wang +***********************************************************************/ + +#include "fts0ast.h" +#include "fts0plugin.h" +#include "fts0tokenize.h" + +#include "ft_global.h" + +/******************************************************************//** +FTS default parser init +@return 0 */ +static int fts_default_parser_init(MYSQL_FTPARSER_PARAM*) { return 0; } + +/******************************************************************//** +FTS default parser deinit +@return 0 */ +static int fts_default_parser_deinit(MYSQL_FTPARSER_PARAM*) { return 0; } + +/******************************************************************//** +FTS default parser parse from ft_static.c in MYISAM. +@return 0 if parse successfully, or return non-zero */ +static +int +fts_default_parser_parse( +/*=====================*/ + MYSQL_FTPARSER_PARAM *param) /*!< in: plugin parser param */ +{ + return(param->mysql_parse(param, param->doc, param->length)); +} + +/* FTS default parser from ft_static.c in MYISAM. */ +struct st_mysql_ftparser fts_default_parser = +{ + MYSQL_FTPARSER_INTERFACE_VERSION, + fts_default_parser_parse, + fts_default_parser_init, + fts_default_parser_deinit +}; + +/******************************************************************//** +Get a operator node from token boolean info +@return node */ +static +fts_ast_node_t* +fts_query_get_oper_node( +/*====================*/ + MYSQL_FTPARSER_BOOLEAN_INFO* info, /*!< in: token info */ + fts_ast_state_t* state) /*!< in/out: query parse state*/ +{ + fts_ast_node_t* oper_node = NULL; + + if (info->yesno > 0) { + oper_node = fts_ast_create_node_oper(state, FTS_EXIST); + } else if (info->yesno < 0) { + oper_node = fts_ast_create_node_oper(state, FTS_IGNORE); + } else if (info->weight_adjust > 0) { + oper_node = fts_ast_create_node_oper(state, FTS_INCR_RATING); + } else if (info->weight_adjust < 0) { + oper_node = fts_ast_create_node_oper(state, FTS_DECR_RATING); + } else if (info->wasign > 0) { + oper_node = fts_ast_create_node_oper(state, FTS_NEGATE); + } + + return(oper_node); +} + +/******************************************************************//** +FTS plugin parser 'myql_add_word' callback function for query parse. +Refer to 'st_mysql_ftparser_param' for more detail. +Note: +a. Parse logic refers to 'ftb_query_add_word' from ft_boolean_search.c in MYISAM; +b. Parse node or tree refers to fts0pars.y. +@return 0 if add successfully, or return non-zero. */ +static +int +fts_query_add_word_for_parser( +/*==========================*/ + MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */ + const char* word, /*!< in: token */ + int word_len, /*!< in: token length */ + MYSQL_FTPARSER_BOOLEAN_INFO* info) /*!< in: token info */ +{ + fts_ast_state_t* state = + static_cast<fts_ast_state_t*>(param->mysql_ftparam); + fts_ast_node_t* cur_node = state->cur_node; + fts_ast_node_t* oper_node = NULL; + fts_ast_node_t* term_node = NULL; + fts_ast_node_t* node = NULL; + + switch (info->type) { + case FT_TOKEN_STOPWORD: + /* We only handler stopword in phrase */ + if (cur_node->type != FTS_AST_PARSER_PHRASE_LIST) { + break; + } + /* fall through */ + + case FT_TOKEN_WORD: + term_node = fts_ast_create_node_term_for_parser( + state, word, ulint(word_len)); + + if (info->trunc) { + fts_ast_term_set_wildcard(term_node); + } + + if (cur_node->type == FTS_AST_PARSER_PHRASE_LIST) { + /* Ignore operator inside phrase */ + fts_ast_add_node(cur_node, term_node); + } else { + ut_ad(cur_node->type == FTS_AST_LIST + || cur_node->type == FTS_AST_SUBEXP_LIST); + oper_node = fts_query_get_oper_node(info, state); + + if (oper_node) { + node = fts_ast_create_node_list(state, oper_node); + fts_ast_add_node(node, term_node); + fts_ast_add_node(cur_node, node); + } else { + fts_ast_add_node(cur_node, term_node); + } + } + + break; + + case FT_TOKEN_LEFT_PAREN: + /* Check parse error */ + if (cur_node->type != FTS_AST_LIST + && cur_node->type != FTS_AST_SUBEXP_LIST) { + return(1); + } + + /* Set operator */ + oper_node = fts_query_get_oper_node(info, state); + if (oper_node != NULL) { + node = fts_ast_create_node_list(state, oper_node); + fts_ast_add_node(cur_node, node); + node->go_up = true; + node->up_node = cur_node; + cur_node = node; + } + + if (info->quot) { + /* Phrase node */ + node = fts_ast_create_node_phrase_list(state); + } else { + /* Subexp list node */ + node = fts_ast_create_node_subexp_list(state, NULL); + } + + fts_ast_add_node(cur_node, node); + + node->up_node = cur_node; + state->cur_node = node; + state->depth += 1; + + break; + + case FT_TOKEN_RIGHT_PAREN: + info->quot = 0; + + if (cur_node->up_node != NULL) { + cur_node = cur_node->up_node; + + if (cur_node->go_up) { + ut_a(cur_node->up_node + && !(cur_node->up_node->go_up)); + cur_node = cur_node->up_node; + } + } + + state->cur_node = cur_node; + + if (state->depth > 0) { + state->depth--; + } else { + /* Parentheses mismatch */ + return(1); + } + + break; + + case FT_TOKEN_EOF: + default: + break; + } + + return(0); +} + +/******************************************************************//** +FTS plugin parser 'myql_parser' callback function for query parse. +Refer to 'st_mysql_ftparser_param' for more detail. +@return 0 if parse successfully */ +static +int +fts_parse_query_internal( +/*=====================*/ + MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */ + const char* query, /*!< in: query string */ + int len) /*!< in: query length */ +{ + MYSQL_FTPARSER_BOOLEAN_INFO info; + const CHARSET_INFO* cs = param->cs; + uchar** start = (uchar**)(&query); + uchar* end = (uchar*)(query + len); + FT_WORD w = {NULL, 0, 0}; + + info.prev = ' '; + info.quot = 0; + memset(&w, 0, sizeof(w)); + /* Note: We don't handle simple parser mode here, + but user supplied plugin parser should handler it. */ + while (fts_get_word(cs, start, end, &w, &info)) { + int ret = param->mysql_add_word( + param, + reinterpret_cast<char*>(w.pos), + int(w.len), &info); + if (ret) { + return(ret); + } + } + + return(0); +} + +/******************************************************************//** +fts parse query by plugin parser. +@return 0 if parse successfully, or return non-zero. */ +int +fts_parse_by_parser( +/*================*/ + ibool mode, /*!< in: parse boolean mode */ + uchar* query_str, /*!< in: query string */ + ulint query_len, /*!< in: query string length */ + st_mysql_ftparser* parser, /*!< in: fts plugin parser */ + fts_ast_state_t* state) /*!< in/out: parser state */ +{ + MYSQL_FTPARSER_PARAM param; + int ret; + + ut_ad(parser); + + /* Initial parser param */ + param.mysql_parse = fts_parse_query_internal; + param.mysql_add_word = fts_query_add_word_for_parser; + param.mysql_ftparam = static_cast<void*>(state); + param.cs = state->charset; + param.doc = reinterpret_cast<char*>(query_str); + param.length = static_cast<int>(query_len); + param.flags = 0; + param.mode = mode ? + MYSQL_FTPARSER_FULL_BOOLEAN_INFO : + MYSQL_FTPARSER_SIMPLE_MODE; + + PARSER_INIT(parser, ¶m); + ret = parser->parse(¶m); + PARSER_DEINIT(parser, ¶m); + + return(ret | state->depth); +} diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc new file mode 100644 index 00000000..9c92a117 --- /dev/null +++ b/storage/innobase/fts/fts0que.cc @@ -0,0 +1,4612 @@ +/***************************************************************************** + +Copyright (c) 2007, 2020, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, 2021, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file fts/fts0que.cc +Full Text Search functionality. + +Created 2007/03/27 Sunny Bains +Completed 2011/7/10 Sunny and Jimmy Yang +*******************************************************/ + +#include "dict0dict.h" +#include "ut0rbt.h" +#include "row0sel.h" +#include "fts0fts.h" +#include "fts0priv.h" +#include "fts0ast.h" +#include "fts0pars.h" +#include "fts0types.h" +#include "fts0plugin.h" +#include "fts0vlc.h" + +#include <iomanip> +#include <vector> + +#define FTS_ELEM(t, n, i, j) (t[(i) * n + (j)]) + +#define RANK_DOWNGRADE (-1.0F) +#define RANK_UPGRADE (1.0F) + +/* Maximum number of words supported in a phrase or proximity search. */ +#define MAX_PROXIMITY_ITEM 128 + +/* Memory used by rbt itself for create and node add */ +#define SIZEOF_RBT_CREATE sizeof(ib_rbt_t) + sizeof(ib_rbt_node_t) * 2 +#define SIZEOF_RBT_NODE_ADD sizeof(ib_rbt_node_t) + +/*Initial byte length for 'words' in fts_ranking_t */ +#define RANKING_WORDS_INIT_LEN 4 + +// FIXME: Need to have a generic iterator that traverses the ilist. + +typedef std::vector<fts_string_t, ut_allocator<fts_string_t> > word_vector_t; + +struct fts_word_freq_t; + +/** State of an FTS query. */ +struct fts_query_t { + mem_heap_t* heap; /*!< Heap to use for allocations */ + + trx_t* trx; /*!< The query transaction */ + + dict_index_t* index; /*!< The FTS index to search */ + /*!< FTS auxiliary common table def */ + + fts_table_t fts_common_table; + + fts_table_t fts_index_table;/*!< FTS auxiliary index table def */ + + size_t total_size; /*!< total memory size used by query */ + + fts_doc_ids_t* deleted; /*!< Deleted doc ids that need to be + filtered from the output */ + + fts_ast_node_t* root; /*!< Abstract syntax tree */ + + fts_ast_node_t* cur_node; /*!< Current tree node */ + + ib_rbt_t* word_map; /*!< Matched word map for + searching by word*/ + + word_vector_t* word_vector; /*!< Matched word vector for + searching by index */ + + ib_rbt_t* doc_ids; /*!< The current set of matching + doc ids, elements are of + type fts_ranking_t */ + + ib_rbt_t* intersection; /*!< The doc ids that were found in + doc_ids, this tree will become + the new doc_ids, elements are of type + fts_ranking_t */ + + /*!< Prepared statement to read the + nodes from the FTS INDEX */ + que_t* read_nodes_graph; + + fts_ast_oper_t oper; /*!< Current boolean mode operator */ + + /*!< TRUE if we want to collect the + word positions within the document */ + ibool collect_positions; + + ulint flags; /*!< Specify the full text search type, + such as boolean search, phrase + search, proximity search etc. */ + + ulint distance; /*!< The proximity distance of a + phrase search. */ + + /*!< These doc ids are used as a + boundary condition when searching the + FTS index rows */ + + doc_id_t lower_doc_id; /*!< Lowest doc id in doc_ids */ + + doc_id_t upper_doc_id; /*!< Highest doc id in doc_ids */ + + bool boolean_mode; /*!< TRUE if boolean mode query */ + + ib_vector_t* matched; /*!< Array of matching documents + (fts_match_t) to search for a phrase */ + + ib_vector_t** match_array; /*!< Used for proximity search, contains + position info for each matched word + in the word list */ + + ib_uint64_t total_docs; /*!< The total number of documents */ + + ulint total_words; /*!< The total number of words */ + + dberr_t error; /*!< Error code if any, that is + encountered during query processing */ + + ib_rbt_t* word_freqs; /*!< RB tree of word frequencies per + document, its elements are of type + fts_word_freq_t */ + + ib_rbt_t* wildcard_words; /*!< words with wildcard */ + + bool multi_exist; /*!< multiple FTS_EXIST oper */ + byte visiting_sub_exp; /*!< count of nested + fts_ast_visit_sub_exp() */ + + st_mysql_ftparser* parser; /*!< fts plugin parser */ +}; + +/** For phrase matching, first we collect the documents and the positions +then we match. */ +struct fts_match_t { + doc_id_t doc_id; /*!< Document id */ + + ulint start; /*!< Start the phrase match from + this offset within the positions + vector. */ + + ib_vector_t* positions; /*!< Offsets of a word in a + document */ +}; + +/** For matching tokens in a phrase search. We use this data structure in +the callback that determines whether a document should be accepted or +rejected for a phrase search. */ +struct fts_select_t { + doc_id_t doc_id; /*!< The document id to match */ + + ulint min_pos; /*!< For found to be TRUE at least + one position must be greater than + min_pos. */ + + ibool found; /*!< TRUE if found */ + + fts_word_freq_t* + word_freq; /*!< Word frequency instance of the + current word being looked up in + the FTS index */ +}; + +typedef std::vector<ulint, ut_allocator<ulint> > pos_vector_t; + +/** structure defines a set of ranges for original documents, each of which +has a minimum position and maximum position. Text in such range should +contain all words in the proximity search. We will need to count the +words in such range to make sure it is less than the specified distance +of the proximity search */ +struct fts_proximity_t { + ulint n_pos; /*!< number of position set, defines + a range (min to max) containing all + matching words */ + pos_vector_t min_pos; /*!< the minimum position (in bytes) + of the range */ + pos_vector_t max_pos; /*!< the maximum position (in bytes) + of the range */ +}; + +/** The match positions and tokesn to match */ +struct fts_phrase_t { + fts_phrase_t(const dict_table_t* table) + : + found(false), + match(NULL), + tokens(NULL), + distance(0), + charset(NULL), + heap(NULL), + zip_size(table->space->zip_size()), + proximity_pos(NULL), + parser(NULL) + { + } + + /** Match result */ + ibool found; + + /** Positions within text */ + const fts_match_t* match; + + /** Tokens to match */ + const ib_vector_t* tokens; + + /** For matching on proximity distance. Can be 0 for exact match */ + ulint distance; + + /** Phrase match charset */ + CHARSET_INFO* charset; + + /** Heap for word processing */ + mem_heap_t* heap; + + /** ROW_FORMAT=COMPRESSED page size, or 0 */ + const ulint zip_size; + + /** Position info for proximity search verification. Records the + min and max position of words matched */ + fts_proximity_t* proximity_pos; + + /** FTS plugin parser */ + st_mysql_ftparser* parser; +}; + +/** Paramter passed to fts phrase match by parser */ +struct fts_phrase_param_t { + fts_phrase_t* phrase; /*!< Match phrase instance */ + ulint token_index; /*!< Index of token to match next */ + mem_heap_t* heap; /*!< Heap for word processing */ +}; + +/** For storing the frequncy of a word/term in a document */ +struct fts_doc_freq_t { + doc_id_t doc_id; /*!< Document id */ + ulint freq; /*!< Frequency of a word in a document */ +}; + +/** To determine the word frequency per document. */ +struct fts_word_freq_t { + fts_string_t word; /*!< Word for which we need the freq, + it's allocated on the query heap */ + + ib_rbt_t* doc_freqs; /*!< RB Tree for storing per document + word frequencies. The elements are + of type fts_doc_freq_t */ + ib_uint64_t doc_count; /*!< Total number of documents that + contain this word */ + double idf; /*!< Inverse document frequency */ +}; + +/******************************************************************** +Callback function to fetch the rows in an FTS INDEX record. +@return always TRUE */ +static +ibool +fts_query_index_fetch_nodes( +/*========================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg); /*!< in: pointer to ib_vector_t */ + +/******************************************************************** +Read and filter nodes. +@return fts_node_t instance */ +static +dberr_t +fts_query_filter_doc_ids( +/*=====================*/ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word, /*!< in: the current word */ + fts_word_freq_t* word_freq, /*!< in/out: word frequency */ + const fts_node_t* node, /*!< in: current FTS node */ + void* data, /*!< in: doc id ilist */ + ulint len, /*!< in: doc id ilist size */ + ibool calc_doc_count);/*!< in: whether to remember doc + count */ + +/** Process (nested) sub-expression, create a new result set to store the +sub-expression result by processing nodes under current sub-expression +list. Merge the sub-expression result with that of parent expression list. +@param[in,out] node current root node +@param[in,out] visitor callback function +@param[in,out] arg argument for callback +@return DB_SUCCESS if all go well */ +static +dberr_t +fts_ast_visit_sub_exp( + fts_ast_node_t* node, + fts_ast_callback visitor, + void* arg); + +#if 0 +/*****************************************************************//*** +Find a doc_id in a word's ilist. +@return TRUE if found. */ +static +ibool +fts_query_find_doc_id( +/*==================*/ + fts_select_t* select, /*!< in/out: search the doc id selected, + update the frequency if found. */ + void* data, /*!< in: doc id ilist */ + ulint len); /*!< in: doc id ilist size */ +#endif + +/*************************************************************//** +This function implements a simple "blind" query expansion search: +words in documents found in the first search pass will be used as +search arguments to search the document again, thus "expand" +the search result set. +@return DB_SUCCESS if success, otherwise the error code */ +static +dberr_t +fts_expand_query( +/*=============*/ + dict_index_t* index, /*!< in: FTS index to search */ + fts_query_t* query) /*!< in: query result, to be freed + by the client */ + MY_ATTRIBUTE((nonnull, warn_unused_result)); +/*************************************************************//** +This function finds documents that contain all words in a +phrase or proximity search. And if proximity search, verify +the words are close enough to each other, as in specified distance. +This function is called for phrase and proximity search. +@return TRUE if documents are found, FALSE if otherwise */ +static +ibool +fts_phrase_or_proximity_search( +/*===========================*/ + fts_query_t* query, /*!< in/out: query instance + query->doc_ids might be instantiated + with qualified doc IDs */ + ib_vector_t* tokens); /*!< in: Tokens contain words */ +/*************************************************************//** +This function checks whether words in result documents are close to +each other (within proximity range as specified by "distance"). +If "distance" is MAX_ULINT, then it will find all combinations of +positions of matching words and store min and max positions +in the "qualified_pos" for later verification. +@return true if words are close to each other, false if otherwise */ +static +bool +fts_proximity_get_positions( +/*========================*/ + fts_match_t** match, /*!< in: query instance */ + ulint num_match, /*!< in: number of matching + items */ + ulint distance, /*!< in: distance value + for proximity search */ + fts_proximity_t* qualified_pos); /*!< out: the position info + records ranges containing + all matching words. */ +#if 0 +/******************************************************************** +Get the total number of words in a documents. */ +static +ulint +fts_query_terms_in_document( +/*========================*/ + /*!< out: DB_SUCCESS if all go well + else error code */ + fts_query_t* query, /*!< in: FTS query state */ + doc_id_t doc_id, /*!< in: the word to check */ + ulint* total); /*!< out: total words in document */ +#endif + +/******************************************************************** +Compare two fts_doc_freq_t doc_ids. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_freq_doc_id_cmp( +/*================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const fts_doc_freq_t* fq1 = (const fts_doc_freq_t*) p1; + const fts_doc_freq_t* fq2 = (const fts_doc_freq_t*) p2; + + return((int) (fq1->doc_id - fq2->doc_id)); +} + +#if 0 +/*******************************************************************//** +Print the table used for calculating LCS. */ +static +void +fts_print_lcs_table( +/*================*/ + const ulint* table, /*!< in: array to print */ + ulint n_rows, /*!< in: total no. of rows */ + ulint n_cols) /*!< in: total no. of cols */ +{ + ulint i; + + for (i = 0; i < n_rows; ++i) { + ulint j; + + printf("\n"); + + for (j = 0; j < n_cols; ++j) { + + printf("%2lu ", FTS_ELEM(table, n_cols, i, j)); + } + } +} + +/******************************************************************** +Find the longest common subsequence between the query string and +the document. */ +static +ulint +fts_query_lcs( +/*==========*/ + /*!< out: LCS (length) between + two ilists */ + const ulint* p1, /*!< in: word positions of query */ + ulint len_p1, /*!< in: no. of elements in p1 */ + const ulint* p2, /*!< in: word positions within document */ + ulint len_p2) /*!< in: no. of elements in p2 */ +{ + int i; + ulint len = 0; + ulint r = len_p1; + ulint c = len_p2; + ulint size = (r + 1) * (c + 1) * sizeof(ulint); + ulint* table = (ulint*) ut_malloc_nokey(size); + + /* Traverse the table backwards, from the last row to the first and + also from the last column to the first. We compute the smaller + common subsequeces first, then use the caluclated values to determine + the longest common subsequence. The result will be in TABLE[0][0]. */ + for (i = r; i >= 0; --i) { + int j; + + for (j = c; j >= 0; --j) { + + if (p1[i] == (ulint) -1 || p2[j] == (ulint) -1) { + + FTS_ELEM(table, c, i, j) = 0; + + } else if (p1[i] == p2[j]) { + + FTS_ELEM(table, c, i, j) = FTS_ELEM( + table, c, i + 1, j + 1) + 1; + + } else { + + ulint value; + + value = ut_max( + FTS_ELEM(table, c, i + 1, j), + FTS_ELEM(table, c, i, j + 1)); + + FTS_ELEM(table, c, i, j) = value; + } + } + } + + len = FTS_ELEM(table, c, 0, 0); + + fts_print_lcs_table(table, r, c); + printf("\nLen=" ULINTPF "\n", len); + + ut_free(table); + + return(len); +} +#endif + +/*******************************************************************//** +Compare two fts_ranking_t instance on their rank value and doc ids in +descending order on the rank and ascending order on doc id. +@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */ +static +int +fts_query_compare_rank( +/*===================*/ + const void* p1, /*!< in: pointer to elem */ + const void* p2) /*!< in: pointer to elem */ +{ + const fts_ranking_t* r1 = (const fts_ranking_t*) p1; + const fts_ranking_t* r2 = (const fts_ranking_t*) p2; + + if (r2->rank < r1->rank) { + return(-1); + } else if (r2->rank == r1->rank) { + + if (r1->doc_id < r2->doc_id) { + return(1); + } else if (r1->doc_id > r2->doc_id) { + return(1); + } + + return(0); + } + + return(1); +} + +/*******************************************************************//** +Create words in ranking */ +static +void +fts_ranking_words_create( +/*=====================*/ + fts_query_t* query, /*!< in: query instance */ + fts_ranking_t* ranking) /*!< in: ranking instance */ +{ + ranking->words = static_cast<byte*>( + mem_heap_zalloc(query->heap, RANKING_WORDS_INIT_LEN)); + ranking->words_len = RANKING_WORDS_INIT_LEN; +} + +/* +The optimization here is using a char array(bitmap) to replace words rb tree +in fts_ranking_t. + +It can save lots of memory except in some cases of QUERY EXPANSION. + +'word_map' is used as a word dictionary, in which the key is a word, the value +is a number. In 'fts_ranking_words_add', we first check if the word is in 'word_map'. +if not, we add it into 'word_map', and give it a position(actually a number). +then we set the corresponding bit to '1' at the position in the char array 'words'. + +'word_vector' is a useful backup of 'word_map', and we can get a word by its position, +more quickly than searching by value in 'word_map'. we use 'word_vector' +in 'fts_query_calculate_ranking' and 'fts_expand_query'. In the two functions, we need +to scan the bitmap 'words', and get a word when a bit is '1', then we get word_freq +by the word. +*/ + +/*******************************************************************//** +Add a word into ranking */ +static +void +fts_ranking_words_add( +/*==================*/ + fts_query_t* query, /*!< in: query instance */ + fts_ranking_t* ranking, /*!< in: ranking instance */ + const fts_string_t* word) /*!< in: term/word to add */ +{ + ulint pos; + ulint byte_offset; + ulint bit_offset; + ib_rbt_bound_t parent; + + /* Note: we suppose the word map and vector are append-only. */ + ut_ad(query->word_vector->size() == rbt_size(query->word_map)); + + /* We use ib_rbt to simulate a map, f_n_char means position. */ + if (rbt_search(query->word_map, &parent, word) == 0) { + fts_string_t* result_word; + + result_word = rbt_value(fts_string_t, parent.last); + pos = result_word->f_n_char; + ut_ad(pos < rbt_size(query->word_map)); + } else { + /* Add the word to map. */ + fts_string_t new_word; + + pos = rbt_size(query->word_map); + + fts_string_dup(&new_word, word, query->heap); + new_word.f_n_char = pos; + + rbt_add_node(query->word_map, &parent, &new_word); + ut_ad(rbt_validate(query->word_map)); + query->word_vector->push_back(new_word); + } + + /* Check words len */ + byte_offset = pos / CHAR_BIT; + if (byte_offset >= ranking->words_len) { + byte* words = ranking->words; + ulint words_len = ranking->words_len; + + while (byte_offset >= words_len) { + words_len *= 2; + } + + ranking->words = static_cast<byte*>( + mem_heap_zalloc(query->heap, words_len)); + memcpy(ranking->words, words, ranking->words_len); + ranking->words_len = words_len; + } + + /* Set ranking words */ + ut_ad(byte_offset < ranking->words_len); + bit_offset = pos % CHAR_BIT; + ranking->words[byte_offset] = static_cast<byte>( + ranking->words[byte_offset] | 1 << bit_offset); +} + +/*******************************************************************//** +Get a word from a ranking +@return true if it's successful */ +static +bool +fts_ranking_words_get_next( +/*=======================*/ + const fts_query_t* query, /*!< in: query instance */ + fts_ranking_t* ranking,/*!< in: ranking instance */ + ulint* pos, /*!< in/out: word start pos */ + fts_string_t* word) /*!< in/out: term/word to add */ +{ + bool ret = false; + ulint max_pos = ranking->words_len * CHAR_BIT; + + /* Search for next word */ + while (*pos < max_pos) { + ulint byte_offset = *pos / CHAR_BIT; + ulint bit_offset = *pos % CHAR_BIT; + + if (ranking->words[byte_offset] & (1 << bit_offset)) { + ret = true; + break; + } + + *pos += 1; + }; + + /* Get next word from word vector */ + if (ret) { + ut_ad(*pos < query->word_vector->size()); + *word = query->word_vector->at((size_t)*pos); + *pos += 1; + } + + return ret; +} + +/*******************************************************************//** +Add a word if it doesn't exist, to the term freq RB tree. We store +a pointer to the word that is passed in as the argument. +@return pointer to word */ +static +fts_word_freq_t* +fts_query_add_word_freq( +/*====================*/ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word) /*!< in: term/word to add */ +{ + ib_rbt_bound_t parent; + + /* Lookup the word in our rb tree and add if it doesn't exist. */ + if (rbt_search(query->word_freqs, &parent, word) != 0) { + fts_word_freq_t word_freq; + + memset(&word_freq, 0, sizeof(word_freq)); + + fts_string_dup(&word_freq.word, word, query->heap); + + word_freq.doc_count = 0; + + word_freq.doc_freqs = rbt_create( + sizeof(fts_doc_freq_t), fts_freq_doc_id_cmp); + + parent.last = rbt_add_node( + query->word_freqs, &parent, &word_freq); + + query->total_size += word->f_len + + SIZEOF_RBT_CREATE + + SIZEOF_RBT_NODE_ADD + + sizeof(fts_word_freq_t); + } + + return(rbt_value(fts_word_freq_t, parent.last)); +} + +/*******************************************************************//** +Add a doc id if it doesn't exist, to the doc freq RB tree. +@return pointer to word */ +static +fts_doc_freq_t* +fts_query_add_doc_freq( +/*===================*/ + fts_query_t* query, /*!< in: query instance */ + ib_rbt_t* doc_freqs, /*!< in: rb tree of fts_doc_freq_t */ + doc_id_t doc_id) /*!< in: doc id to add */ +{ + ib_rbt_bound_t parent; + + /* Lookup the doc id in our rb tree and add if it doesn't exist. */ + if (rbt_search(doc_freqs, &parent, &doc_id) != 0) { + fts_doc_freq_t doc_freq; + + memset(&doc_freq, 0, sizeof(doc_freq)); + + doc_freq.freq = 0; + doc_freq.doc_id = doc_id; + + parent.last = rbt_add_node(doc_freqs, &parent, &doc_freq); + + query->total_size += SIZEOF_RBT_NODE_ADD + + sizeof(fts_doc_freq_t); + } + + return(rbt_value(fts_doc_freq_t, parent.last)); +} + +/*******************************************************************//** +Add the doc id to the query set only if it's not in the +deleted array. */ +static +void +fts_query_union_doc_id( +/*===================*/ + fts_query_t* query, /*!< in: query instance */ + doc_id_t doc_id, /*!< in: the doc id to add */ + fts_rank_t rank) /*!< in: if non-zero, it is the + rank associated with the doc_id */ +{ + ib_rbt_bound_t parent; + ulint size = ib_vector_size(query->deleted->doc_ids); + doc_id_t* updates = (doc_id_t*) query->deleted->doc_ids->data; + + /* Check if the doc id is deleted and it's not already in our set. */ + if (fts_bsearch(updates, 0, static_cast<int>(size), doc_id) < 0 + && rbt_search(query->doc_ids, &parent, &doc_id) != 0) { + + fts_ranking_t ranking; + + ranking.rank = rank; + ranking.doc_id = doc_id; + fts_ranking_words_create(query, &ranking); + + rbt_add_node(query->doc_ids, &parent, &ranking); + + query->total_size += SIZEOF_RBT_NODE_ADD + + sizeof(fts_ranking_t) + RANKING_WORDS_INIT_LEN; + } +} + +/*******************************************************************//** +Remove the doc id from the query set only if it's not in the +deleted set. */ +static +void +fts_query_remove_doc_id( +/*====================*/ + fts_query_t* query, /*!< in: query instance */ + doc_id_t doc_id) /*!< in: the doc id to add */ +{ + ib_rbt_bound_t parent; + ulint size = ib_vector_size(query->deleted->doc_ids); + doc_id_t* updates = (doc_id_t*) query->deleted->doc_ids->data; + + /* Check if the doc id is deleted and it's in our set. */ + if (fts_bsearch(updates, 0, static_cast<int>(size), doc_id) < 0 + && rbt_search(query->doc_ids, &parent, &doc_id) == 0) { + ut_free(rbt_remove_node(query->doc_ids, parent.last)); + + ut_ad(query->total_size >= + SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t)); + query->total_size -= SIZEOF_RBT_NODE_ADD + + sizeof(fts_ranking_t); + } +} + +/*******************************************************************//** +Find the doc id in the query set but not in the deleted set, artificialy +downgrade or upgrade its ranking by a value and make/initialize its ranking +under or above its normal range 0 to 1. This is used for Boolean Search +operator such as Negation operator, which makes word's contribution to the +row's relevance to be negative */ +static +void +fts_query_change_ranking( +/*====================*/ + fts_query_t* query, /*!< in: query instance */ + doc_id_t doc_id, /*!< in: the doc id to add */ + ibool downgrade) /*!< in: Whether to downgrade ranking */ +{ + ib_rbt_bound_t parent; + ulint size = ib_vector_size(query->deleted->doc_ids); + doc_id_t* updates = (doc_id_t*) query->deleted->doc_ids->data; + + /* Check if the doc id is deleted and it's in our set. */ + if (fts_bsearch(updates, 0, static_cast<int>(size), doc_id) < 0 + && rbt_search(query->doc_ids, &parent, &doc_id) == 0) { + + fts_ranking_t* ranking; + + ranking = rbt_value(fts_ranking_t, parent.last); + + ranking->rank += downgrade ? RANK_DOWNGRADE : RANK_UPGRADE; + + /* Allow at most 2 adjustment by RANK_DOWNGRADE (-0.5) + and RANK_UPGRADE (0.5) */ + if (ranking->rank >= 1.0F) { + ranking->rank = 1.0F; + } else if (ranking->rank <= -1.0F) { + ranking->rank = -1.0F; + } + } +} + +/*******************************************************************//** +Check the doc id in the query set only if it's not in the +deleted array. The doc ids that were found are stored in +another rb tree (fts_query_t::intersect). */ +static +void +fts_query_intersect_doc_id( +/*=======================*/ + fts_query_t* query, /*!< in: query instance */ + doc_id_t doc_id, /*!< in: the doc id to add */ + fts_rank_t rank) /*!< in: if non-zero, it is the + rank associated with the doc_id */ +{ + ib_rbt_bound_t parent; + ulint size = ib_vector_size(query->deleted->doc_ids); + doc_id_t* updates = (doc_id_t*) query->deleted->doc_ids->data; + fts_ranking_t* ranking= NULL; + + /* There are three types of intersect: + 1. '+a': doc_ids is empty, add doc into intersect if it matches 'a'. + 2. 'a +b': docs match 'a' is in doc_ids, add doc into intersect + if it matches 'b'. if the doc is also in doc_ids, then change the + doc's rank, and add 'a' in doc's words. + 3. '+a +b': docs matching '+a' is in doc_ids, add doc into intsersect + if it matches 'b' and it's in doc_ids.(multi_exist = true). */ + + /* Check if the doc id is deleted and it's in our set */ + if (fts_bsearch(updates, 0, static_cast<int>(size), doc_id) < 0) { + fts_ranking_t new_ranking; + + if (rbt_search(query->doc_ids, &parent, &doc_id) != 0) { + if (query->multi_exist) { + return; + } else { + new_ranking.words = NULL; + } + } else { + ranking = rbt_value(fts_ranking_t, parent.last); + + /* We've just checked the doc id before */ + if (ranking->words == NULL) { + ut_ad(rbt_search(query->intersection, &parent, + ranking) == 0); + return; + } + + /* Merge rank */ + rank += ranking->rank; + if (rank >= 1.0F) { + rank = 1.0F; + } else if (rank <= -1.0F) { + rank = -1.0F; + } + + /* Take words */ + new_ranking.words = ranking->words; + new_ranking.words_len = ranking->words_len; + } + + new_ranking.rank = rank; + new_ranking.doc_id = doc_id; + + if (rbt_search(query->intersection, &parent, + &new_ranking) != 0) { + if (new_ranking.words == NULL) { + fts_ranking_words_create(query, &new_ranking); + + query->total_size += RANKING_WORDS_INIT_LEN; + } else { + /* Note that the intersection has taken + ownership of the ranking data. */ + ranking->words = NULL; + } + + rbt_add_node(query->intersection, + &parent, &new_ranking); + + query->total_size += SIZEOF_RBT_NODE_ADD + + sizeof(fts_ranking_t); + } + } +} + +/*******************************************************************//** +Free the document ranking rb tree. */ +static +void +fts_query_free_doc_ids( +/*===================*/ + fts_query_t* query, /*!< in: query instance */ + ib_rbt_t* doc_ids) /*!< in: rb tree to free */ +{ + const ib_rbt_node_t* node; + + for (node = rbt_first(doc_ids); node; node = rbt_first(doc_ids)) { + + fts_ranking_t* ranking; + + ranking = rbt_value(fts_ranking_t, node); + + if (ranking->words) { + ranking->words = NULL; + } + + ut_free(rbt_remove_node(doc_ids, node)); + + ut_ad(query->total_size >= + SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t)); + query->total_size -= SIZEOF_RBT_NODE_ADD + + sizeof(fts_ranking_t); + } + + rbt_free(doc_ids); + + ut_ad(query->total_size >= SIZEOF_RBT_CREATE); + query->total_size -= SIZEOF_RBT_CREATE; +} + +/*******************************************************************//** +Add the word to the documents "list" of matching words from +the query. We make a copy of the word from the query heap. */ +static +void +fts_query_add_word_to_document( +/*===========================*/ + fts_query_t* query, /*!< in: query to update */ + doc_id_t doc_id, /*!< in: the document to update */ + const fts_string_t* word) /*!< in: the token to add */ +{ + ib_rbt_bound_t parent; + fts_ranking_t* ranking = NULL; + + if (query->flags == FTS_OPT_RANKING) { + return; + } + + /* First we search the intersection RB tree as it could have + taken ownership of the words rb tree instance. */ + if (query->intersection + && rbt_search(query->intersection, &parent, &doc_id) == 0) { + + ranking = rbt_value(fts_ranking_t, parent.last); + } + + if (ranking == NULL + && rbt_search(query->doc_ids, &parent, &doc_id) == 0) { + + ranking = rbt_value(fts_ranking_t, parent.last); + } + + if (ranking != NULL) { + fts_ranking_words_add(query, ranking, word); + } +} + +/*******************************************************************//** +Check the node ilist. */ +static +void +fts_query_check_node( +/*=================*/ + fts_query_t* query, /*!< in: query to update */ + const fts_string_t* token, /*!< in: the token to search */ + const fts_node_t* node) /*!< in: node to check */ +{ + /* Skip nodes whose doc ids are out range. */ + if (query->oper == FTS_EXIST + && ((query->upper_doc_id > 0 + && node->first_doc_id > query->upper_doc_id) + || (query->lower_doc_id > 0 + && node->last_doc_id < query->lower_doc_id))) { + + /* Ignore */ + + } else { + int ret; + ib_rbt_bound_t parent; + ulint ilist_size = node->ilist_size; + fts_word_freq_t*word_freqs; + + /* The word must exist. */ + ret = rbt_search(query->word_freqs, &parent, token); + ut_a(ret == 0); + + word_freqs = rbt_value(fts_word_freq_t, parent.last); + + query->error = fts_query_filter_doc_ids( + query, token, word_freqs, node, + node->ilist, ilist_size, TRUE); + } +} + +/*****************************************************************//** +Search index cache for word with wildcard match. +@return number of words matched */ +static +ulint +fts_cache_find_wildcard( +/*====================*/ + fts_query_t* query, /*!< in: query instance */ + const fts_index_cache_t*index_cache, /*!< in: cache to search */ + const fts_string_t* token) /*!< in: token to search */ +{ + ib_rbt_bound_t parent; + const ib_vector_t* nodes = NULL; + fts_string_t srch_text; + byte term[FTS_MAX_WORD_LEN + 1]; + ulint num_word = 0; + + srch_text.f_len = (token->f_str[token->f_len - 1] == '%') + ? token->f_len - 1 + : token->f_len; + + strncpy((char*) term, (char*) token->f_str, srch_text.f_len); + term[srch_text.f_len] = '\0'; + srch_text.f_str = term; + + /* Lookup the word in the rb tree */ + if (rbt_search_cmp(index_cache->words, &parent, &srch_text, NULL, + innobase_fts_text_cmp_prefix) == 0) { + const fts_tokenizer_word_t* word; + ulint i; + const ib_rbt_node_t* cur_node; + ibool forward = FALSE; + + word = rbt_value(fts_tokenizer_word_t, parent.last); + cur_node = parent.last; + + while (innobase_fts_text_cmp_prefix( + index_cache->charset, &srch_text, &word->text) == 0) { + + nodes = word->nodes; + + for (i = 0; nodes && i < ib_vector_size(nodes); ++i) { + int ret; + const fts_node_t* node; + ib_rbt_bound_t freq_parent; + fts_word_freq_t* word_freqs; + + node = static_cast<const fts_node_t*>( + ib_vector_get_const(nodes, i)); + + ret = rbt_search(query->word_freqs, + &freq_parent, + &srch_text); + + ut_a(ret == 0); + + word_freqs = rbt_value( + fts_word_freq_t, + freq_parent.last); + + query->error = fts_query_filter_doc_ids( + query, &srch_text, + word_freqs, node, + node->ilist, node->ilist_size, TRUE); + + if (query->error != DB_SUCCESS) { + return(0); + } + } + + num_word++; + + if (!forward) { + cur_node = rbt_prev( + index_cache->words, cur_node); + } else { +cont_search: + cur_node = rbt_next( + index_cache->words, cur_node); + } + + if (!cur_node) { + break; + } + + word = rbt_value(fts_tokenizer_word_t, cur_node); + } + + if (!forward) { + forward = TRUE; + cur_node = parent.last; + goto cont_search; + } + } + + return(num_word); +} + +/*****************************************************************//** +Set difference. +@return DB_SUCCESS if all go well */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_query_difference( +/*=================*/ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* token) /*!< in: token to search */ +{ + ulint n_doc_ids= 0; + trx_t* trx = query->trx; + dict_table_t* table = query->index->table; + + ut_a(query->oper == FTS_IGNORE); + +#ifdef FTS_INTERNAL_DIAG_PRINT + { + ib::info out; + out << "DIFFERENCE: Searching: '"; + out.write(token->f_str, token->f_len); + out << "'"; + } +#endif + + if (query->doc_ids) { + n_doc_ids = rbt_size(query->doc_ids); + } + + /* There is nothing we can substract from an empty set. */ + if (query->doc_ids && !rbt_empty(query->doc_ids)) { + ulint i; + fts_fetch_t fetch; + const ib_vector_t* nodes; + const fts_index_cache_t*index_cache; + que_t* graph = NULL; + fts_cache_t* cache = table->fts->cache; + dberr_t error; + + mysql_mutex_lock(&cache->lock); + + index_cache = fts_find_index_cache(cache, query->index); + + /* Must find the index cache */ + ut_a(index_cache != NULL); + + /* Search the cache for a matching word first. */ + if (query->cur_node->term.wildcard + && query->flags != FTS_PROXIMITY + && query->flags != FTS_PHRASE) { + fts_cache_find_wildcard(query, index_cache, token); + } else { + nodes = fts_cache_find_word(index_cache, token); + + for (i = 0; nodes && i < ib_vector_size(nodes) + && query->error == DB_SUCCESS; ++i) { + const fts_node_t* node; + + node = static_cast<const fts_node_t*>( + ib_vector_get_const(nodes, i)); + + fts_query_check_node(query, token, node); + } + } + + mysql_mutex_unlock(&cache->lock); + + /* error is passed by 'query->error' */ + if (query->error != DB_SUCCESS) { + ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT); + return(query->error); + } + + /* Setup the callback args for filtering and + consolidating the ilist. */ + fetch.read_arg = query; + fetch.read_record = fts_query_index_fetch_nodes; + + error = fts_index_fetch_nodes( + trx, &graph, &query->fts_index_table, token, &fetch); + + /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */ + ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS)); + if (error != DB_SUCCESS) { + query->error = error; + } + + que_graph_free(graph); + } + + /* The size can't increase. */ + ut_a(rbt_size(query->doc_ids) <= n_doc_ids); + + return(query->error); +} + +/* Free the query intersection +@param query query instance */ +static void fts_query_free_intersection(fts_query_t* query) +{ + fts_query_free_doc_ids(query, query->intersection); + query->intersection = NULL; +} + +/*****************************************************************//** +Intersect the token doc ids with the current set. +@return DB_SUCCESS if all go well */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_query_intersect( +/*================*/ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* token) /*!< in: the token to search */ +{ + trx_t* trx = query->trx; + dict_table_t* table = query->index->table; + + ut_a(query->oper == FTS_EXIST); + +#ifdef FTS_INTERNAL_DIAG_PRINT + { + ib::info out; + out << "INTERSECT: Searching: '"; + out.write(token->f_str, token->f_len); + out << "'"; + } +#endif + + /* If the words set is not empty and multi exist is true, + we know the intersection set is empty in advance. */ + if (!(rbt_empty(query->doc_ids) && query->multi_exist)) { + ulint n_doc_ids = 0; + ulint i; + fts_fetch_t fetch; + const ib_vector_t* nodes; + const fts_index_cache_t*index_cache; + que_t* graph = NULL; + fts_cache_t* cache = table->fts->cache; + dberr_t error; + + ut_a(!query->intersection); + + n_doc_ids = rbt_size(query->doc_ids); + + /* Create the rb tree that will hold the doc ids of + the intersection. */ + query->intersection = rbt_create( + sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); + + query->total_size += SIZEOF_RBT_CREATE; + + /* This is to avoid decompressing the ilist if the + node's ilist doc ids are out of range. */ + if (!rbt_empty(query->doc_ids) && query->multi_exist) { + const ib_rbt_node_t* node; + doc_id_t* doc_id; + + node = rbt_first(query->doc_ids); + doc_id = rbt_value(doc_id_t, node); + query->lower_doc_id = *doc_id; + + node = rbt_last(query->doc_ids); + doc_id = rbt_value(doc_id_t, node); + query->upper_doc_id = *doc_id; + + } else { + query->lower_doc_id = 0; + query->upper_doc_id = 0; + } + + /* Search the cache for a matching word first. */ + + mysql_mutex_lock(&cache->lock); + + /* Search for the index specific cache. */ + index_cache = fts_find_index_cache(cache, query->index); + + /* Must find the index cache. */ + ut_a(index_cache != NULL); + + if (query->cur_node->term.wildcard) { + /* Wildcard search the index cache */ + fts_cache_find_wildcard(query, index_cache, token); + } else { + nodes = fts_cache_find_word(index_cache, token); + + for (i = 0; nodes && i < ib_vector_size(nodes) + && query->error == DB_SUCCESS; ++i) { + const fts_node_t* node; + + node = static_cast<const fts_node_t*>( + ib_vector_get_const(nodes, i)); + + fts_query_check_node(query, token, node); + } + } + + mysql_mutex_unlock(&cache->lock); + + /* error is passed by 'query->error' */ + if (query->error != DB_SUCCESS) { + ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT); + fts_query_free_intersection(query); + return(query->error); + } + + /* Setup the callback args for filtering and + consolidating the ilist. */ + fetch.read_arg = query; + fetch.read_record = fts_query_index_fetch_nodes; + + error = fts_index_fetch_nodes( + trx, &graph, &query->fts_index_table, token, &fetch); + + /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */ + ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS)); + if (error != DB_SUCCESS) { + query->error = error; + } + + que_graph_free(graph); + + if (query->error == DB_SUCCESS) { + /* Make the intesection (rb tree) the current doc id + set and free the old set. */ + fts_query_free_doc_ids(query, query->doc_ids); + query->doc_ids = query->intersection; + query->intersection = NULL; + + ut_a(!query->multi_exist || (query->multi_exist + && rbt_size(query->doc_ids) <= n_doc_ids)); + } else if (query->intersection) { + fts_query_free_intersection(query); + } + } + + return(query->error); +} + +/*****************************************************************//** +Query index cache. +@return DB_SUCCESS if all go well */ +static +dberr_t +fts_query_cache( +/*============*/ + fts_query_t* query, /*!< in/out: query instance */ + const fts_string_t* token) /*!< in: token to search */ +{ + const fts_index_cache_t*index_cache; + dict_table_t* table = query->index->table; + fts_cache_t* cache = table->fts->cache; + + /* Search the cache for a matching word first. */ + mysql_mutex_lock(&cache->lock); + + /* Search for the index specific cache. */ + index_cache = fts_find_index_cache(cache, query->index); + + /* Must find the index cache. */ + ut_a(index_cache != NULL); + + if (query->cur_node->term.wildcard + && query->flags != FTS_PROXIMITY + && query->flags != FTS_PHRASE) { + /* Wildcard search the index cache */ + fts_cache_find_wildcard(query, index_cache, token); + } else { + const ib_vector_t* nodes; + ulint i; + + nodes = fts_cache_find_word(index_cache, token); + + for (i = 0; nodes && i < ib_vector_size(nodes) + && query->error == DB_SUCCESS; ++i) { + const fts_node_t* node; + + node = static_cast<const fts_node_t*>( + ib_vector_get_const(nodes, i)); + + fts_query_check_node(query, token, node); + } + } + + mysql_mutex_unlock(&cache->lock); + + return(query->error); +} + +/*****************************************************************//** +Set union. +@return DB_SUCCESS if all go well */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_query_union( +/*============*/ + fts_query_t* query, /*!< in: query instance */ + fts_string_t* token) /*!< in: token to search */ +{ + fts_fetch_t fetch; + ulint n_doc_ids = 0; + trx_t* trx = query->trx; + que_t* graph = NULL; + dberr_t error; + + ut_a(query->oper == FTS_NONE || query->oper == FTS_DECR_RATING || + query->oper == FTS_NEGATE || query->oper == FTS_INCR_RATING); + +#ifdef FTS_INTERNAL_DIAG_PRINT + { + ib::info out; + out << "UNION: Searching: '"; + out.write(token->f_str, token->f_len); + out << "'"; + } +#endif + + if (query->doc_ids) { + n_doc_ids = rbt_size(query->doc_ids); + } + + if (token->f_len == 0) { + return(query->error); + } + + fts_query_cache(query, token); + + /* Setup the callback args for filtering and + consolidating the ilist. */ + fetch.read_arg = query; + fetch.read_record = fts_query_index_fetch_nodes; + + /* Read the nodes from disk. */ + error = fts_index_fetch_nodes( + trx, &graph, &query->fts_index_table, token, &fetch); + + /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */ + ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS)); + if (error != DB_SUCCESS) { + query->error = error; + } + + que_graph_free(graph); + + if (query->error == DB_SUCCESS) { + + /* The size can't decrease. */ + ut_a(rbt_size(query->doc_ids) >= n_doc_ids); + + /* Calulate the number of doc ids that were added to + the current doc id set. */ + if (query->doc_ids) { + n_doc_ids = rbt_size(query->doc_ids) - n_doc_ids; + } + } + + return(query->error); +} + +/*****************************************************************//** +Depending upon the current query operator process the doc id. +return DB_SUCCESS if all go well +or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */ +static +dberr_t +fts_query_process_doc_id( +/*=====================*/ + fts_query_t* query, /*!< in: query instance */ + doc_id_t doc_id, /*!< in: doc id to process */ + fts_rank_t rank) /*!< in: if non-zero, it is the + rank associated with the doc_id */ +{ + if (query->flags == FTS_OPT_RANKING) { + return(DB_SUCCESS); + } + + switch (query->oper) { + case FTS_NONE: + fts_query_union_doc_id(query, doc_id, rank); + break; + + case FTS_EXIST: + fts_query_intersect_doc_id(query, doc_id, rank); + break; + + case FTS_IGNORE: + fts_query_remove_doc_id(query, doc_id); + break; + + case FTS_NEGATE: + fts_query_change_ranking(query, doc_id, TRUE); + break; + + case FTS_DECR_RATING: + fts_query_union_doc_id(query, doc_id, rank); + fts_query_change_ranking(query, doc_id, TRUE); + break; + + case FTS_INCR_RATING: + fts_query_union_doc_id(query, doc_id, rank); + fts_query_change_ranking(query, doc_id, FALSE); + break; + + default: + ut_error; + } + + if (query->total_size > fts_result_cache_limit) { + return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT); + } else { + return(DB_SUCCESS); + } +} + +/*****************************************************************//** +Merge two result sets. */ +static +dberr_t +fts_merge_doc_ids( +/*==============*/ + fts_query_t* query, /*!< in,out: query instance */ + const ib_rbt_t* doc_ids) /*!< in: result set to merge */ +{ + const ib_rbt_node_t* node; + + DBUG_ENTER("fts_merge_doc_ids"); + + ut_a(!query->intersection); + + /* To process FTS_EXIST operation (intersection), we need + to create a new result set for fts_query_intersect(). */ + if (query->oper == FTS_EXIST) { + + query->intersection = rbt_create( + sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); + + query->total_size += SIZEOF_RBT_CREATE; + } + + /* Merge the elements to the result set. */ + for (node = rbt_first(doc_ids); node; node = rbt_next(doc_ids, node)) { + fts_ranking_t* ranking; + ulint pos = 0; + fts_string_t word; + + ranking = rbt_value(fts_ranking_t, node); + + query->error = fts_query_process_doc_id( + query, ranking->doc_id, ranking->rank); + + if (query->error != DB_SUCCESS) { + if (query->intersection) { + ut_a(query->oper == FTS_EXIST); + fts_query_free_intersection(query); + } + DBUG_RETURN(query->error); + } + + /* Merge words. Don't need to take operator into account. */ + ut_a(ranking->words); + while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { + fts_query_add_word_to_document(query, ranking->doc_id, + &word); + } + } + + /* If it is an intersection operation, reset query->doc_ids + to query->intersection and free the old result list. */ + if (query->oper == FTS_EXIST && query->intersection != NULL) { + fts_query_free_doc_ids(query, query->doc_ids); + query->doc_ids = query->intersection; + query->intersection = NULL; + } + + DBUG_RETURN(DB_SUCCESS); +} + +/*****************************************************************//** +Skip non-whitespace in a string. Move ptr to the next word boundary. +@return pointer to first whitespace character or end */ +UNIV_INLINE +byte* +fts_query_skip_word( +/*================*/ + byte* ptr, /*!< in: start of scan */ + const byte* end) /*!< in: pointer to end of string */ +{ + /* TODO: Does this have to be UTF-8 too ? */ + while (ptr < end && !(ispunct(*ptr) || isspace(*ptr))) { + ++ptr; + } + + return(ptr); +} + +/*****************************************************************//** +Check whether the remaining terms in the phrase match the text. +@return TRUE if matched else FALSE */ +static +ibool +fts_query_match_phrase_terms( +/*=========================*/ + fts_phrase_t* phrase, /*!< in: phrase to match */ + byte** start, /*!< in/out: text to search, we can't + make this const becase we need to + first convert the string to + lowercase */ + const byte* end, /*!< in: pointer to the end of + the string to search */ + mem_heap_t* heap) /*!< in: heap */ +{ + ulint i; + byte* ptr = *start; + const ib_vector_t* tokens = phrase->tokens; + ulint distance = phrase->distance; + + /* We check only from the second term onwards, since the first + must have matched otherwise we wouldn't be here. */ + for (i = 1; ptr < end && i < ib_vector_size(tokens); /* No op */) { + fts_string_t match; + fts_string_t cmp_str; + const fts_string_t* token; + int result; + ulint ret; + + ret = innobase_mysql_fts_get_token( + phrase->charset, ptr, + const_cast<byte*>(end), &match); + + if (match.f_len > 0) { + /* Get next token to match. */ + token = static_cast<const fts_string_t*>( + ib_vector_get_const(tokens, i)); + + fts_string_dup(&cmp_str, &match, heap); + + result = innobase_fts_text_case_cmp( + phrase->charset, token, &cmp_str); + + /* Skip the rest of the tokens if this one doesn't + match and the proximity distance is exceeded. */ + if (result + && (distance == ULINT_UNDEFINED + || distance == 0)) { + + break; + } + + /* This token matched move to the next token. */ + if (result == 0) { + /* Advance the text to search by the length + of the last token. */ + ptr += ret; + + /* Advance to the next token. */ + ++i; + } else { + + ut_a(distance != ULINT_UNDEFINED); + + ptr = fts_query_skip_word(ptr, end); + } + + /* Distance can be 0 for exact matches. */ + if (distance != ULINT_UNDEFINED && distance > 0) { + --distance; + } + } else { + ptr += ret; + } + } + + *start = ptr; + + /* Can't be greater than the number of elements. */ + ut_a(i <= ib_vector_size(tokens)); + + /* This is the case for multiple words. */ + if (i == ib_vector_size(tokens)) { + phrase->found = TRUE; + } + + return(phrase->found); +} + +/*****************************************************************//** +Callback function to count the number of words in position ranges, +and see whether the word count is in specified "phrase->distance" +@return true if the number of characters is less than the "distance" */ +static +bool +fts_proximity_is_word_in_range( +/*===========================*/ + const fts_phrase_t* + phrase, /*!< in: phrase with the search info */ + byte* start, /*!< in: text to search */ + ulint total_len) /*!< in: length of text */ +{ + fts_proximity_t* proximity_pos = phrase->proximity_pos; + + ut_ad(proximity_pos->n_pos == proximity_pos->min_pos.size()); + ut_ad(proximity_pos->n_pos == proximity_pos->max_pos.size()); + + /* Search each matched position pair (with min and max positions) + and count the number of words in the range */ + for (ulint i = 0; i < proximity_pos->n_pos; i++) { + ulint cur_pos = proximity_pos->min_pos[i]; + ulint n_word = 0; + + ut_ad(proximity_pos->max_pos[i] <= total_len); + + /* Walk through words in the range and count them */ + while (cur_pos <= proximity_pos->max_pos[i]) { + ulint len; + fts_string_t str; + + len = innobase_mysql_fts_get_token( + phrase->charset, + start + cur_pos, + start + total_len, &str); + + if (len == 0) { + break; + } + + /* Advances position with "len" bytes */ + cur_pos += len; + + /* Record the number of words */ + if (str.f_n_char > 0) { + n_word++; + } + + if (n_word > phrase->distance) { + break; + } + } + + /* Check if the number of words is less than specified + "distance" */ + if (n_word && n_word <= phrase->distance) { + return(true); + } + } + + return(false); +} + +/*****************************************************************//** +FTS plugin parser 'myql_add_word' callback function for phrase match +Refer to 'st_mysql_ftparser_param' for more detail. +@return 0 if match, or return non-zero */ +static +int +fts_query_match_phrase_add_word_for_parser( +/*=======================================*/ + MYSQL_FTPARSER_PARAM* param, /*!< in: parser param */ + const char* word, /*!< in: token */ + int word_len, /*!< in: token length */ + MYSQL_FTPARSER_BOOLEAN_INFO*) +{ + fts_phrase_param_t* phrase_param; + fts_phrase_t* phrase; + const ib_vector_t* tokens; + fts_string_t match; + fts_string_t cmp_str; + const fts_string_t* token; + int result; + mem_heap_t* heap; + + phrase_param = static_cast<fts_phrase_param_t*>(param->mysql_ftparam); + heap = phrase_param->heap; + phrase = phrase_param->phrase; + tokens = phrase->tokens; + + /* In case plugin parser doesn't check return value */ + if (phrase_param->token_index == ib_vector_size(tokens)) { + return(1); + } + + match.f_str = (uchar *)(word); + match.f_len = ulint(word_len); + match.f_n_char= fts_get_token_size(phrase->charset, word, match.f_len); + + if (match.f_len > 0) { + /* Get next token to match. */ + ut_a(phrase_param->token_index < ib_vector_size(tokens)); + token = static_cast<const fts_string_t*>( + ib_vector_get_const(tokens, phrase_param->token_index)); + + fts_string_dup(&cmp_str, &match, heap); + + result = innobase_fts_text_case_cmp( + phrase->charset, token, &cmp_str); + + if (result == 0) { + phrase_param->token_index++; + } else { + return(1); + } + } + + /* Can't be greater than the number of elements. */ + ut_a(phrase_param->token_index <= ib_vector_size(tokens)); + + /* This is the case for multiple words. */ + if (phrase_param->token_index == ib_vector_size(tokens)) { + phrase->found = TRUE; + } + + return(static_cast<int>(phrase->found)); +} + +/*****************************************************************//** +Check whether the terms in the phrase match the text. +@return TRUE if matched else FALSE */ +static +ibool +fts_query_match_phrase_terms_by_parser( +/*===================================*/ + fts_phrase_param_t* phrase_param, /* in/out: phrase param */ + st_mysql_ftparser* parser, /* in: plugin fts parser */ + byte* text, /* in: text to check */ + ulint len) /* in: text length */ +{ + MYSQL_FTPARSER_PARAM param; + + ut_a(parser); + + /* Set paramters for param */ + param.mysql_parse = fts_tokenize_document_internal; + param.mysql_add_word = fts_query_match_phrase_add_word_for_parser; + param.mysql_ftparam = phrase_param; + param.cs = phrase_param->phrase->charset; + param.doc = reinterpret_cast<char*>(text); + param.length = static_cast<int>(len); + param.mode= MYSQL_FTPARSER_WITH_STOPWORDS; + + PARSER_INIT(parser, ¶m); + parser->parse(¶m); + PARSER_DEINIT(parser, ¶m); + + return(phrase_param->phrase->found); +} + +/*****************************************************************//** +Callback function to fetch and search the document. +@return TRUE if matched else FALSE */ +static +ibool +fts_query_match_phrase( +/*===================*/ + fts_phrase_t* phrase, /*!< in: phrase to match */ + byte* start, /*!< in: text to search, we can't make + this const becase we need to first + convert the string to lowercase */ + ulint cur_len, /*!< in: length of text */ + ulint prev_len, /*!< in: total length for searched + doc fields*/ + mem_heap_t* heap) /* heap */ +{ + ulint i; + const fts_string_t* first; + const byte* end = start + cur_len; + const ib_vector_t* tokens = phrase->tokens; + const ib_vector_t* positions = phrase->match->positions; + + ut_a(!phrase->found); + ut_a(phrase->match->doc_id > 0); + ut_a(ib_vector_size(tokens) > 0); + ut_a(ib_vector_size(positions) > 0); + + first = static_cast<const fts_string_t*>( + ib_vector_get_const(tokens, 0)); + + ut_a(phrase->match->start < ib_vector_size(positions)); + + for (i = phrase->match->start; i < ib_vector_size(positions); ++i) { + ulint pos; + byte* ptr = start; + + pos = *(ulint*) ib_vector_get_const(positions, i); + + if (pos == ULINT_UNDEFINED) { + break; + } + + if (pos < prev_len) { + continue; + } + + /* Document positions are calculated from the beginning + of the first field, need to save the length for each + searched field to adjust the doc position when search + phrases. */ + pos -= prev_len; + ptr = start + pos; + + /* Within limits ? */ + if (ptr >= end) { + break; + } + + if (phrase->parser) { + fts_phrase_param_t phrase_param; + + phrase_param.phrase = phrase; + phrase_param.token_index = 0; + phrase_param.heap = heap; + + if (fts_query_match_phrase_terms_by_parser( + &phrase_param, + phrase->parser, + ptr, + ulint(end - ptr))) { + break; + } + } else { + fts_string_t match; + fts_string_t cmp_str; + ulint ret; + + match.f_str = ptr; + ret = innobase_mysql_fts_get_token( + phrase->charset, start + pos, + const_cast<byte*>(end), &match); + + if (match.f_len == 0) { + break; + } + + fts_string_dup(&cmp_str, &match, heap); + + if (innobase_fts_text_case_cmp( + phrase->charset, first, &cmp_str) == 0) { + + /* This is the case for the single word + in the phrase. */ + if (ib_vector_size(phrase->tokens) == 1) { + phrase->found = TRUE; + break; + } + + ptr += ret; + + /* Match the remaining terms in the phrase. */ + if (fts_query_match_phrase_terms(phrase, &ptr, + end, heap)) { + break; + } + } + } + } + + return(phrase->found); +} + +/*****************************************************************//** +Callback function to fetch and search the document. +@return whether the phrase is found */ +static +ibool +fts_query_fetch_document( +/*=====================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: fts_doc_t* */ +{ + + que_node_t* exp; + sel_node_t* node = static_cast<sel_node_t*>(row); + fts_phrase_t* phrase = static_cast<fts_phrase_t*>(user_arg); + ulint prev_len = 0; + ulint total_len = 0; + byte* document_text = NULL; + + exp = node->select_list; + + phrase->found = FALSE; + + /* For proximity search, we will need to get the whole document + from all fields, so first count the total length of the document + from all the fields */ + if (phrase->proximity_pos) { + while (exp) { + ulint field_len; + dfield_t* dfield = que_node_get_val(exp); + byte* data = static_cast<byte*>( + dfield_get_data(dfield)); + + if (dfield_is_ext(dfield)) { + ulint local_len = dfield_get_len(dfield); + + local_len -= BTR_EXTERN_FIELD_REF_SIZE; + + field_len = mach_read_from_4( + data + local_len + BTR_EXTERN_LEN + 4); + } else { + field_len = dfield_get_len(dfield); + } + + if (field_len != UNIV_SQL_NULL) { + total_len += field_len + 1; + } + + exp = que_node_get_next(exp); + } + + document_text = static_cast<byte*>(mem_heap_zalloc( + phrase->heap, total_len)); + + if (!document_text) { + return(FALSE); + } + } + + exp = node->select_list; + + while (exp) { + dfield_t* dfield = que_node_get_val(exp); + byte* data = static_cast<byte*>( + dfield_get_data(dfield)); + ulint cur_len; + + if (dfield_is_ext(dfield)) { + data = btr_copy_externally_stored_field( + &cur_len, data, phrase->zip_size, + dfield_get_len(dfield), phrase->heap); + } else { + cur_len = dfield_get_len(dfield); + } + + if (cur_len != UNIV_SQL_NULL && cur_len != 0) { + if (phrase->proximity_pos) { + ut_ad(prev_len + cur_len <= total_len); + memcpy(document_text + prev_len, data, cur_len); + } else { + /* For phrase search */ + phrase->found = + fts_query_match_phrase( + phrase, + static_cast<byte*>(data), + cur_len, prev_len, + phrase->heap); + } + + /* Document positions are calculated from the beginning + of the first field, need to save the length for each + searched field to adjust the doc position when search + phrases. */ + prev_len += cur_len + 1; + } + + if (phrase->found) { + break; + } + + exp = que_node_get_next(exp); + } + + if (phrase->proximity_pos) { + ut_ad(prev_len <= total_len); + + phrase->found = fts_proximity_is_word_in_range( + phrase, document_text, total_len); + } + + return(phrase->found); +} + +#if 0 +/******************************************************************** +Callback function to check whether a record was found or not. */ +static +ibool +fts_query_select( +/*=============*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: fts_doc_t* */ +{ + int i; + que_node_t* exp; + sel_node_t* node = row; + fts_select_t* select = user_arg; + + ut_a(select->word_freq); + ut_a(select->word_freq->doc_freqs); + + exp = node->select_list; + + for (i = 0; exp && !select->found; ++i) { + dfield_t* dfield = que_node_get_val(exp); + void* data = dfield_get_data(dfield); + ulint len = dfield_get_len(dfield); + + switch (i) { + case 0: /* DOC_COUNT */ + if (len != UNIV_SQL_NULL && len != 0) { + + select->word_freq->doc_count += + mach_read_from_4(data); + } + break; + + case 1: /* ILIST */ + if (len != UNIV_SQL_NULL && len != 0) { + + fts_query_find_doc_id(select, data, len); + } + break; + + default: + ut_error; + } + + exp = que_node_get_next(exp); + } + + return(FALSE); +} + +/******************************************************************** +Read the rows from the FTS index, that match word and where the +doc id is between first and last doc id. +@return DB_SUCCESS if all go well else error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_query_find_term( +/*================*/ + fts_query_t* query, /*!< in: FTS query state */ + que_t** graph, /*!< in: prepared statement */ + const fts_string_t* word, /*!< in: the word to fetch */ + doc_id_t doc_id, /*!< in: doc id to match */ + ulint* min_pos,/*!< in/out: pos found must be + greater than this minimum value. */ + ibool* found) /*!< out: TRUE if found else FALSE */ +{ + pars_info_t* info; + dberr_t error; + fts_select_t select; + doc_id_t match_doc_id; + trx_t* trx = query->trx; + char table_name[MAX_FULL_NAME_LEN]; + + trx->op_info = "fetching FTS index matching nodes"; + + if (*graph) { + info = (*graph)->info; + } else { + ulint selected; + + info = pars_info_create(); + + selected = fts_select_index(*word->f_str); + query->fts_index_table.suffix = fts_get_suffix(selected); + + fts_get_table_name(&query->fts_index_table, table_name); + pars_info_bind_id(info, "index_table_name", table_name); + } + + select.found = FALSE; + select.doc_id = doc_id; + select.min_pos = *min_pos; + select.word_freq = fts_query_add_word_freq(query, word->f_str); + + pars_info_bind_function(info, "my_func", fts_query_select, &select); + pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len); + + /* Convert to "storage" byte order. */ + fts_write_doc_id((byte*) &match_doc_id, doc_id); + + fts_bind_doc_id(info, "min_doc_id", &match_doc_id); + + fts_bind_doc_id(info, "max_doc_id", &match_doc_id); + + if (!*graph) { + + *graph = fts_parse_sql( + &query->fts_index_table, + info, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT doc_count, ilist\n" + " FROM $index_table_name\n" + " WHERE word LIKE :word AND" + " first_doc_id <= :min_doc_id AND" + " last_doc_id >= :max_doc_id\n" + " ORDER BY first_doc_id;\n" + "BEGIN\n" + "\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;"); + } + + for (;;) { + error = fts_eval_sql(trx, *graph); + + if (error == DB_SUCCESS) { + + break; /* Exit the loop. */ + } else { + + if (error == DB_LOCK_WAIT_TIMEOUT) { + ib::warn() << "lock wait timeout reading FTS" + " index. Retrying!"; + + trx->error_state = DB_SUCCESS; + } else { + ib::error() << error + << " while reading FTS index."; + + break; /* Exit the loop. */ + } + } + } + + /* Value to return */ + *found = select.found; + + if (*found) { + *min_pos = select.min_pos; + } + + return(error); +} + +/******************************************************************** +Callback aggregator for int columns. */ +static +ibool +fts_query_sum( +/*==========*/ + /*!< out: always returns TRUE */ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: ulint* */ +{ + + que_node_t* exp; + sel_node_t* node = row; + ulint* total = user_arg; + + exp = node->select_list; + + while (exp) { + dfield_t* dfield = que_node_get_val(exp); + void* data = dfield_get_data(dfield); + ulint len = dfield_get_len(dfield); + + if (len != UNIV_SQL_NULL && len != 0) { + *total += mach_read_from_4(data); + } + + exp = que_node_get_next(exp); + } + + return(TRUE); +} + +/******************************************************************** +Calculate the total documents that contain a particular word (term). +@return DB_SUCCESS if all go well else error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_query_total_docs_containing_term( +/*=================================*/ + fts_query_t* query, /*!< in: FTS query state */ + const fts_string_t* word, /*!< in: the word to check */ + ulint* total) /*!< out: documents containing word */ +{ + pars_info_t* info; + dberr_t error; + que_t* graph; + ulint selected; + trx_t* trx = query->trx; + char table_name[MAX_FULL_NAME_LEN] + + trx->op_info = "fetching FTS index document count"; + + *total = 0; + + info = pars_info_create(); + + pars_info_bind_function(info, "my_func", fts_query_sum, total); + pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len); + + selected = fts_select_index(*word->f_str); + + query->fts_index_table.suffix = fts_get_suffix(selected); + + fts_get_table_name(&query->fts_index_table, table_name); + + pars_info_bind_id(info, "index_table_name", table_name); + + graph = fts_parse_sql( + &query->fts_index_table, + info, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT doc_count\n" + " FROM $index_table_name\n" + " WHERE word = :word" + " ORDER BY first_doc_id;\n" + "BEGIN\n" + "\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;"); + + for (;;) { + error = fts_eval_sql(trx, graph); + + if (error == DB_SUCCESS) { + + break; /* Exit the loop. */ + } else { + + if (error == DB_LOCK_WAIT_TIMEOUT) { + ib::warn() << "lock wait timeout reading FTS" + " index. Retrying!"; + + trx->error_state = DB_SUCCESS; + } else { + ib::error() << error + << " while reading FTS index."; + + break; /* Exit the loop. */ + } + } + } + + que_graph_free(graph); + + return(error); +} + +/******************************************************************** +Get the total number of words in a documents. +@return DB_SUCCESS if all go well else error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_query_terms_in_document( +/*========================*/ + fts_query_t* query, /*!< in: FTS query state */ + doc_id_t doc_id, /*!< in: the word to check */ + ulint* total) /*!< out: total words in document */ +{ + pars_info_t* info; + dberr_t error; + que_t* graph; + doc_id_t read_doc_id; + trx_t* trx = query->trx; + char table_name[MAX_FULL_NAME_LEN]; + + trx->op_info = "fetching FTS document term count"; + + *total = 0; + + info = pars_info_create(); + + pars_info_bind_function(info, "my_func", fts_query_sum, total); + + /* Convert to "storage" byte order. */ + fts_write_doc_id((byte*) &read_doc_id, doc_id); + fts_bind_doc_id(info, "doc_id", &read_doc_id); + + query->fts_index_table.suffix = "DOC_ID"; + + fts_get_table_name(&query->fts_index_table, table_name); + + pars_info_bind_id(info, "index_table_name", table_name); + + graph = fts_parse_sql( + &query->fts_index_table, + info, + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS" + " SELECT count\n" + " FROM $index_table_name\n" + " WHERE doc_id = :doc_id" + " BEGIN\n" + "\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE c;"); + + for (;;) { + error = fts_eval_sql(trx, graph); + + if (error == DB_SUCCESS) { + + break; /* Exit the loop. */ + } else { + + if (error == DB_LOCK_WAIT_TIMEOUT) { + ib::warn() << "lock wait timeout reading FTS" + " doc id table. Retrying!"; + + trx->error_state = DB_SUCCESS; + } else { + ib::error() << error << " while reading FTS" + " doc id table."; + + break; /* Exit the loop. */ + } + } + } + + que_graph_free(graph); + + return(error); +} +#endif + +/*****************************************************************//** +Retrieve the document and match the phrase tokens. +@return DB_SUCCESS or error code */ +MY_ATTRIBUTE((nonnull(1,2,3,6), warn_unused_result)) +static +dberr_t +fts_query_match_document( +/*=====================*/ + ib_vector_t* tokens, /*!< in: phrase tokens */ + fts_get_doc_t* get_doc, /*!< in: table and prepared statements */ + fts_match_t* match, /*!< in: doc id and positions */ + ulint distance, /*!< in: proximity distance */ + st_mysql_ftparser* parser, /*!< in: fts plugin parser */ + ibool* found) /*!< out: TRUE if phrase found */ +{ + dberr_t error; + fts_phrase_t phrase(get_doc->index_cache->index->table); + + phrase.match = match; /* Positions to match */ + phrase.tokens = tokens; /* Tokens to match */ + phrase.distance = distance; + phrase.charset = get_doc->index_cache->charset; + phrase.heap = mem_heap_create(512); + phrase.parser = parser; + + *found = phrase.found = FALSE; + + error = fts_doc_fetch_by_doc_id( + get_doc, match->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL, + fts_query_fetch_document, &phrase); + + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { + ib::error() << "(" << error << ") matching document."; + } else { + *found = phrase.found; + } + + mem_heap_free(phrase.heap); + + return(error); +} + +/*****************************************************************//** +This function fetches the original documents and count the +words in between matching words to see that is in specified distance +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +bool +fts_query_is_in_proximity_range( +/*============================*/ + const fts_query_t* query, /*!< in: query instance */ + fts_match_t** match, /*!< in: query instance */ + fts_proximity_t* qualified_pos) /*!< in: position info for + qualified ranges */ +{ + fts_get_doc_t get_doc; + fts_cache_t* cache = query->index->table->fts->cache; + dberr_t err; + + memset(&get_doc, 0x0, sizeof(get_doc)); + + mysql_mutex_lock(&cache->lock); + get_doc.index_cache = fts_find_index_cache(cache, query->index); + mysql_mutex_unlock(&cache->lock); + ut_a(get_doc.index_cache != NULL); + + fts_phrase_t phrase(get_doc.index_cache->index->table); + + phrase.distance = query->distance; + phrase.charset = get_doc.index_cache->charset; + phrase.heap = mem_heap_create(512); + phrase.proximity_pos = qualified_pos; + phrase.found = FALSE; + + err = fts_doc_fetch_by_doc_id( + &get_doc, match[0]->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL, + fts_query_fetch_document, &phrase); + + if (UNIV_UNLIKELY(err != DB_SUCCESS)) { + ib::error() << "(" << err << ") in verification" + " phase of proximity search"; + } + + /* Free the prepared statement. */ + if (get_doc.get_document_graph) { + que_graph_free(get_doc.get_document_graph); + get_doc.get_document_graph = NULL; + } + + mem_heap_free(phrase.heap); + + return(err == DB_SUCCESS && phrase.found); +} + +/*****************************************************************//** +Iterate over the matched document ids and search the for the +actual phrase in the text. +@return DB_SUCCESS if all OK */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_query_search_phrase( +/*====================*/ + fts_query_t* query, /*!< in: query instance */ + ib_vector_t* orig_tokens, /*!< in: tokens to search, + with any stopwords in the + original phrase */ + ib_vector_t* tokens) /*!< in: tokens that does + not include stopwords and + can be used to calculate + ranking */ +{ + ulint i; + fts_get_doc_t get_doc; + ulint n_matched; + fts_cache_t* cache = query->index->table->fts->cache; + + n_matched = ib_vector_size(query->matched); + + /* Setup the doc retrieval infrastructure. */ + memset(&get_doc, 0x0, sizeof(get_doc)); + + mysql_mutex_lock(&cache->lock); + + get_doc.index_cache = fts_find_index_cache(cache, query->index); + + /* Must find the index cache */ + ut_a(get_doc.index_cache != NULL); + + mysql_mutex_unlock(&cache->lock); + +#ifdef FTS_INTERNAL_DIAG_PRINT + ib::info() << "Start phrase search"; +#endif + + /* Read the document from disk and do the actual + match, matching documents will be added to the current + doc id set. */ + for (i = 0; i < n_matched && query->error == DB_SUCCESS; ++i) { + fts_match_t* match; + ibool found = FALSE; + + match = static_cast<fts_match_t*>( + ib_vector_get(query->matched, i)); + + /* Skip the document ids that were filtered out by + an earlier pass. */ + if (match->doc_id != 0) { + + query->error = fts_query_match_document( + orig_tokens, &get_doc, match, + query->distance, query->parser, &found); + + if (query->error == DB_SUCCESS && found) { + ulint z; + + query->error = fts_query_process_doc_id(query, + match->doc_id, 0); + if (query->error != DB_SUCCESS) { + goto func_exit; + } + + for (z = 0; z < ib_vector_size(tokens); z++) { + fts_string_t* token; + token = static_cast<fts_string_t*>( + ib_vector_get(tokens, z)); + fts_query_add_word_to_document( + query, match->doc_id, token); + } + } + } + } + +func_exit: + /* Free the prepared statement. */ + if (get_doc.get_document_graph) { + que_graph_free(get_doc.get_document_graph); + get_doc.get_document_graph = NULL; + } + + return(query->error); +} + +/** Split the phrase into tokens +@param[in,out] query query instance +@param[in] node query node to search +@param[in,out] tokens token vector +@param[in,out] orig_tokens original node tokens include stopword +@param[in,out] heap mem heap */ +static +void +fts_query_phrase_split( + fts_query_t* query, + const fts_ast_node_t* node, + ib_vector_t* tokens, + ib_vector_t* orig_tokens, + mem_heap_t* heap) +{ + fts_string_t phrase; + ulint len = 0; + ulint cur_pos = 0; + fts_ast_node_t* term_node = NULL; + + if (node->type == FTS_AST_TEXT) { + phrase.f_str = node->text.ptr->str; + phrase.f_len = node->text.ptr->len; + len = phrase.f_len; + } else { + ut_ad(node->type == FTS_AST_PARSER_PHRASE_LIST); + phrase.f_str = NULL; + phrase.f_len = 0; + term_node = node->list.head; + } + + while (true) { + fts_cache_t* cache = query->index->table->fts->cache; + ulint cur_len; + fts_string_t result_str; + + if (node->type == FTS_AST_TEXT) { + if (cur_pos >= len) { + break; + } + + cur_len = innobase_mysql_fts_get_token( + query->fts_index_table.charset, + reinterpret_cast<const byte*>(phrase.f_str) + + cur_pos, + reinterpret_cast<const byte*>(phrase.f_str) + + len, + &result_str); + + if (cur_len == 0) { + break; + } + + cur_pos += cur_len; + } else { + ut_ad(node->type == FTS_AST_PARSER_PHRASE_LIST); + /* Term node in parser phrase list */ + if (term_node == NULL) { + break; + } + + ut_a(term_node->type == FTS_AST_TERM); + result_str.f_str = term_node->term.ptr->str; + result_str.f_len = term_node->term.ptr->len; + result_str.f_n_char = fts_get_token_size( + query->fts_index_table.charset, + reinterpret_cast<char*>(result_str.f_str), + result_str.f_len); + + term_node = term_node->next; + } + + if (result_str.f_n_char == 0) { + continue; + } + + fts_string_t* token = static_cast<fts_string_t*>( + ib_vector_push(tokens, NULL)); + fts_string_dup(token, &result_str, heap); + + if (fts_check_token( + &result_str, + cache->stopword_info.cached_stopword, + query->fts_index_table.charset)) { + /* Add the word to the RB tree so that we can + calculate it's frequencey within a document. */ + fts_query_add_word_freq(query, token); + } else { + ib_vector_pop(tokens); + } + + /* we will start to store all words including stopwords + in the "orig_tokens" vector, but skip any leading words + that are stopwords */ + if (!ib_vector_is_empty(tokens)) { + fts_string_t* orig_token = static_cast<fts_string_t*>( + ib_vector_push(orig_tokens, NULL)); + + orig_token->f_str = token->f_str; + orig_token->f_len = token->f_len; + } + } +} + +/*****************************************************************//** +Text/Phrase search. +@return DB_SUCCESS or error code */ +static MY_ATTRIBUTE((warn_unused_result)) +dberr_t +fts_query_phrase_search( +/*====================*/ + fts_query_t* query, /*!< in: query instance */ + const fts_ast_node_t* node) /*!< in: node to search */ +{ + ib_vector_t* tokens; + ib_vector_t* orig_tokens; + mem_heap_t* heap = mem_heap_create(sizeof(fts_string_t)); + ib_alloc_t* heap_alloc; + ulint num_token; + + heap_alloc = ib_heap_allocator_create(heap); + + tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4); + orig_tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4); + + if (query->distance != ULINT_UNDEFINED && query->distance > 0) { + query->flags = FTS_PROXIMITY; + } else { + query->flags = FTS_PHRASE; + } + + /* Split the phrase into tokens. */ + fts_query_phrase_split(query, node, tokens, orig_tokens, heap); + + num_token = ib_vector_size(tokens); + if (num_token > MAX_PROXIMITY_ITEM) { + query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE; + goto func_exit; + } + + ut_ad(ib_vector_size(orig_tokens) >= num_token); + + /* Ignore empty strings. */ + if (num_token > 0) { + fts_string_t* token = NULL; + fts_fetch_t fetch; + trx_t* trx = query->trx; + fts_ast_oper_t oper = query->oper; + que_t* graph = NULL; + ulint i; + dberr_t error; + + /* Create the vector for storing matching document ids + and the positions of the first token of the phrase. */ + if (!query->matched) { + ib_alloc_t* heap_alloc; + + heap_alloc = ib_heap_allocator_create(heap); + + if (!(query->flags & FTS_PROXIMITY) + && !(query->flags & FTS_PHRASE)) { + query->matched = ib_vector_create( + heap_alloc, sizeof(fts_match_t), + 64); + } else { + ut_a(num_token <= MAX_PROXIMITY_ITEM); + query->match_array = + (ib_vector_t**) mem_heap_alloc( + heap, + num_token * + sizeof(query->matched)); + + for (i = 0; i < num_token; i++) { + query->match_array[i] = + ib_vector_create( + heap_alloc, sizeof(fts_match_t), + 64); + } + + query->matched = query->match_array[0]; + } + } + + /* Setup the callback args for filtering and consolidating + the ilist. */ + fetch.read_arg = query; + fetch.read_record = fts_query_index_fetch_nodes; + + for (i = 0; i < num_token; i++) { + /* Search for the first word from the phrase. */ + token = static_cast<fts_string_t*>( + ib_vector_get(tokens, i)); + + if (query->flags & FTS_PROXIMITY + || query->flags & FTS_PHRASE) { + query->matched = query->match_array[i]; + } + + error = fts_index_fetch_nodes( + trx, &graph, &query->fts_index_table, + token, &fetch); + + /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */ + ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS)); + if (error != DB_SUCCESS) { + query->error = error; + } + + que_graph_free(graph); + graph = NULL; + + fts_query_cache(query, token); + + if (!(query->flags & FTS_PHRASE) + && !(query->flags & FTS_PROXIMITY)) { + break; + } + + /* If any of the token can't be found, + no need to continue match */ + if (ib_vector_is_empty(query->match_array[i]) + || query->error != DB_SUCCESS) { + goto func_exit; + } + } + + /* Just a single word, no need to fetch the original + documents to do phrase matching */ + if (ib_vector_size(orig_tokens) == 1 + && !ib_vector_is_empty(query->match_array[0])) { + fts_match_t* match; + ulint n_matched; + + n_matched = ib_vector_size(query->match_array[0]); + + for (i = 0; i < n_matched; i++) { + match = static_cast<fts_match_t*>( + ib_vector_get( + query->match_array[0], i)); + + query->error = fts_query_process_doc_id( + query, match->doc_id, 0); + if (query->error != DB_SUCCESS) { + goto func_exit; + } + + fts_query_add_word_to_document( + query, match->doc_id, token); + } + query->oper = oper; + goto func_exit; + } + + /* If we are doing proximity search, verify the distance + between all words, and check they are in specified distance. */ + if (query->flags & FTS_PROXIMITY) { + fts_phrase_or_proximity_search(query, tokens); + } else { + ibool matched; + + /* Phrase Search case: + We filter out the doc ids that don't contain + all the tokens in the phrase. It's cheaper to + search the ilist than bringing the documents in + and then doing a search through the text. Isolated + testing shows this also helps in mitigating disruption + of the buffer cache. */ + matched = fts_phrase_or_proximity_search(query, tokens); + query->matched = query->match_array[0]; + + /* Read the actual text in and search for the phrase. */ + if (matched) { + ut_ad(query->error == DB_SUCCESS); + query->error = fts_query_search_phrase( + query, orig_tokens, tokens); + } + } + + /* Restore original operation. */ + query->oper = oper; + + if (query->error != DB_SUCCESS) { + goto func_exit; + } + } + +func_exit: + mem_heap_free(heap); + + /* Don't need it anymore. */ + query->matched = NULL; + + return(query->error); +} + +/*****************************************************************//** +Find the word and evaluate. +@return DB_SUCCESS if all go well */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_query_execute( +/*==============*/ + fts_query_t* query, /*!< in: query instance */ + fts_string_t* token) /*!< in: token to search */ +{ + switch (query->oper) { + case FTS_NONE: + case FTS_NEGATE: + case FTS_INCR_RATING: + case FTS_DECR_RATING: + query->error = fts_query_union(query, token); + break; + + case FTS_EXIST: + query->error = fts_query_intersect(query, token); + break; + + case FTS_IGNORE: + query->error = fts_query_difference(query, token); + break; + + default: + ut_error; + } + + return(query->error); +} + +/*****************************************************************//** +Create a wildcard string. It's the responsibility of the caller to +free the byte* pointer. It's allocated using ut_malloc_nokey(). +@return ptr to allocated memory */ +static +byte* +fts_query_get_token( +/*================*/ + fts_ast_node_t* node, /*!< in: the current sub tree */ + fts_string_t* token) /*!< in: token to create */ +{ + ulint str_len; + byte* new_ptr = NULL; + + str_len = node->term.ptr->len; + + ut_a(node->type == FTS_AST_TERM); + + token->f_len = str_len; + token->f_str = node->term.ptr->str; + + if (node->term.wildcard) { + + token->f_str = static_cast<byte*>(ut_malloc_nokey(str_len + 2)); + token->f_len = str_len + 1; + + memcpy(token->f_str, node->term.ptr->str, str_len); + + token->f_str[str_len] = '%'; + token->f_str[token->f_len] = 0; + + new_ptr = token->f_str; + } + + return(new_ptr); +} + +static dberr_t fts_ast_visit_sub_exp(fts_ast_node_t*, fts_ast_callback, void*); + +/*****************************************************************//** +Visit every node of the AST. */ +static +dberr_t +fts_query_visitor( +/*==============*/ + fts_ast_oper_t oper, /*!< in: current operator */ + fts_ast_node_t* node, /*!< in: The root of the current subtree*/ + void* arg) /*!< in: callback arg*/ +{ + byte* ptr; + fts_string_t token; + fts_query_t* query = static_cast<fts_query_t*>(arg); + + ut_a(node); + DBUG_ENTER("fts_query_visitor"); + DBUG_PRINT("fts", ("nodetype: %s", fts_ast_node_type_get(node->type))); + + token.f_n_char = 0; + query->oper = oper; + query->cur_node = node; + + switch (node->type) { + case FTS_AST_TEXT: + case FTS_AST_PARSER_PHRASE_LIST: + + if (query->oper == FTS_EXIST) { + ut_ad(query->intersection == NULL); + query->intersection = rbt_create( + sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); + + query->total_size += SIZEOF_RBT_CREATE; + } + + /* Set the current proximity distance. */ + query->distance = node->text.distance; + + /* Force collection of doc ids and the positions. */ + query->collect_positions = TRUE; + + query->error = fts_query_phrase_search(query, node); + + query->collect_positions = FALSE; + + if (query->oper == FTS_EXIST) { + fts_query_free_doc_ids(query, query->doc_ids); + query->doc_ids = query->intersection; + query->intersection = NULL; + } + + break; + + case FTS_AST_TERM: + token.f_str = node->term.ptr->str; + token.f_len = node->term.ptr->len; + + /* Collect wildcard words for QUERY EXPANSION. */ + if (node->term.wildcard && query->wildcard_words != NULL) { + ib_rbt_bound_t parent; + + if (rbt_search(query->wildcard_words, &parent, &token) + != 0) { + fts_string_t word; + + fts_string_dup(&word, &token, query->heap); + rbt_add_node(query->wildcard_words, &parent, + &word); + } + } + + /* Add the word to our RB tree that will be used to + calculate this terms per document frequency. */ + fts_query_add_word_freq(query, &token); + + ptr = fts_query_get_token(node, &token); + query->error = fts_query_execute(query, &token); + + if (ptr) { + ut_free(ptr); + } + + break; + + case FTS_AST_SUBEXP_LIST: + query->error = fts_ast_visit_sub_exp(node, fts_query_visitor, arg); + break; + + default: + ut_error; + } + + if (query->oper == FTS_EXIST) { + query->multi_exist = true; + } + + DBUG_RETURN(query->error); +} + +/** Process (nested) sub-expression, create a new result set to store the +sub-expression result by processing nodes under current sub-expression +list. Merge the sub-expression result with that of parent expression list. +@param[in,out] node current root node +@param[in,out] visitor callback function +@param[in,out] arg argument for callback +@return DB_SUCCESS if all go well */ +static +dberr_t +fts_ast_visit_sub_exp( + fts_ast_node_t* node, + fts_ast_callback visitor, + void* arg) +{ + fts_ast_oper_t cur_oper; + fts_query_t* query = static_cast<fts_query_t*>(arg); + ib_rbt_t* parent_doc_ids; + ib_rbt_t* subexpr_doc_ids; + dberr_t error = DB_SUCCESS; + bool will_be_ignored = false; + bool multi_exist; + + DBUG_ENTER("fts_ast_visit_sub_exp"); + + ut_a(node->type == FTS_AST_SUBEXP_LIST); + + /* To avoid stack overflow, we limit the mutual recursion + depth between fts_ast_visit(), fts_query_visitor() and + fts_ast_visit_sub_exp(). */ + if (query->visiting_sub_exp++ > 31) { + query->error = DB_OUT_OF_MEMORY; + DBUG_RETURN(query->error); + } + + cur_oper = query->oper; + + /* Save current result set */ + parent_doc_ids = query->doc_ids; + + /* Create new result set to store the sub-expression result. We + will merge this result set with the parent after processing. */ + query->doc_ids = rbt_create(sizeof(fts_ranking_t), + fts_ranking_doc_id_cmp); + + query->total_size += SIZEOF_RBT_CREATE; + + multi_exist = query->multi_exist; + query->multi_exist = false; + /* Process nodes in current sub-expression and store its + result set in query->doc_ids we created above. */ + error = fts_ast_visit(FTS_NONE, node, visitor, + arg, &will_be_ignored); + + /* Reinstate parent node state */ + query->multi_exist = multi_exist; + query->oper = cur_oper; + query->visiting_sub_exp--; + + /* Merge the sub-expression result with the parent result set. */ + subexpr_doc_ids = query->doc_ids; + query->doc_ids = parent_doc_ids; + if (error == DB_SUCCESS) { + error = fts_merge_doc_ids(query, subexpr_doc_ids); + } + + /* Free current result set. Result already merged into parent. */ + fts_query_free_doc_ids(query, subexpr_doc_ids); + + DBUG_RETURN(error); +} + +#if 0 +/*****************************************************************//*** +Check if the doc id exists in the ilist. +@return TRUE if doc id found */ +static +ulint +fts_query_find_doc_id( +/*==================*/ + fts_select_t* select, /*!< in/out: contains the doc id to + find, we update the word freq if + document found */ + void* data, /*!< in: doc id ilist */ + ulint len) /*!< in: doc id ilist size */ +{ + byte* ptr = data; + doc_id_t doc_id = 0; + ulint decoded = 0; + + /* Decode the ilist and search for selected doc_id. We also + calculate the frequency of the word in the document if found. */ + while (decoded < len && !select->found) { + ulint freq = 0; + ulint min_pos = 0; + ulint last_pos = 0; + ulint pos = fts_decode_vlc(&ptr); + + /* Add the delta. */ + doc_id += pos; + + while (*ptr) { + ++freq; + last_pos += fts_decode_vlc(&ptr); + + /* Only if min_pos is not set and the current + term exists in a position greater than the + min_pos of the previous term. */ + if (min_pos == 0 && last_pos > select->min_pos) { + min_pos = last_pos; + } + } + + /* Skip the end of word position marker. */ + ++ptr; + + /* Bytes decoded so far. */ + decoded = ptr - (byte*) data; + + /* A word may exist in the document but we only consider a + match if it exists in a position that is greater than the + position of the previous term. */ + if (doc_id == select->doc_id && min_pos > 0) { + fts_doc_freq_t* doc_freq; + + /* Add the doc id to the doc freq rb tree, if + the doc id doesn't exist it will be created. */ + doc_freq = fts_query_add_doc_freq( + select->word_freq->doc_freqs, doc_id); + + /* Avoid duplicating the frequency tally */ + if (doc_freq->freq == 0) { + doc_freq->freq = freq; + } + + select->found = TRUE; + select->min_pos = min_pos; + } + } + + return(select->found); +} +#endif + +/*****************************************************************//** +Read and filter nodes. +@return DB_SUCCESS if all go well, +or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */ +static +dberr_t +fts_query_filter_doc_ids( +/*=====================*/ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word, /*!< in: the current word */ + fts_word_freq_t* word_freq, /*!< in/out: word frequency */ + const fts_node_t* node, /*!< in: current FTS node */ + void* data, /*!< in: doc id ilist */ + ulint len, /*!< in: doc id ilist size */ + ibool calc_doc_count) /*!< in: whether to remember doc count */ +{ + const byte* ptr = static_cast<byte*>(data); + doc_id_t doc_id = 0; + ulint decoded = 0; + ib_rbt_t* doc_freqs = word_freq->doc_freqs; + + /* Decode the ilist and add the doc ids to the query doc_id set. */ + while (decoded < len) { + ulint freq = 0; + fts_doc_freq_t* doc_freq; + fts_match_t* match = NULL; + doc_id_t last_pos = 0; + doc_id_t pos = fts_decode_vlc(&ptr); + + /* Some sanity checks. */ + if (doc_id == 0) { + ut_a(pos == node->first_doc_id); + } + + /* Add the delta. */ + doc_id += pos; + + if (calc_doc_count) { + word_freq->doc_count++; + } + + /* We simply collect the matching instances here. */ + if (query->collect_positions) { + ib_alloc_t* heap_alloc; + + /* Create a new fts_match_t instance. */ + match = static_cast<fts_match_t*>( + ib_vector_push(query->matched, NULL)); + + match->start = 0; + match->doc_id = doc_id; + heap_alloc = ib_vector_allocator(query->matched); + + /* Allocate from the same heap as the + parent container. */ + match->positions = ib_vector_create( + heap_alloc, sizeof(ulint), 64); + + query->total_size += sizeof(fts_match_t) + + sizeof(ib_vector_t) + + sizeof(ulint) * 64; + } + + /* Unpack the positions within the document. */ + while (*ptr) { + last_pos += fts_decode_vlc(&ptr); + + /* Collect the matching word positions, for phrase + matching later. */ + if (query->collect_positions) { + ib_vector_push(match->positions, &last_pos); + } + + ++freq; + } + + /* End of list marker. */ + last_pos = (ulint) -1; + + if (query->collect_positions) { + ut_a(match != NULL); + ib_vector_push(match->positions, &last_pos); + } + + /* Add the doc id to the doc freq rb tree, if the doc id + doesn't exist it will be created. */ + doc_freq = fts_query_add_doc_freq(query, doc_freqs, doc_id); + + /* Avoid duplicating frequency tally. */ + if (doc_freq->freq == 0) { + doc_freq->freq = freq; + } + + /* Skip the end of word position marker. */ + ++ptr; + + /* Bytes decoded so far */ + decoded = ulint(ptr - (byte*) data); + + /* We simply collect the matching documents and the + positions here and match later. */ + if (!query->collect_positions) { + /* We ignore error here and will check it later */ + fts_query_process_doc_id(query, doc_id, 0); + + /* Add the word to the document's matched RB tree. */ + fts_query_add_word_to_document(query, doc_id, word); + } + } + + /* Some sanity checks. */ + ut_a(doc_id == node->last_doc_id); + + if (query->total_size > fts_result_cache_limit) { + return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT); + } else { + return(DB_SUCCESS); + } +} + +/*****************************************************************//** +Read the FTS INDEX row. +@return DB_SUCCESS if all go well. */ +static +dberr_t +fts_query_read_node( +/*================*/ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word, /*!< in: current word */ + que_node_t* exp) /*!< in: query graph node */ +{ + int i; + int ret; + fts_node_t node; + ib_rbt_bound_t parent; + fts_word_freq_t* word_freq; + ibool skip = FALSE; + fts_string_t term; + byte buf[FTS_MAX_WORD_LEN + 1]; + dberr_t error = DB_SUCCESS; + + ut_a(query->cur_node->type == FTS_AST_TERM + || query->cur_node->type == FTS_AST_TEXT + || query->cur_node->type == FTS_AST_PARSER_PHRASE_LIST); + + memset(&node, 0, sizeof(node)); + term.f_str = buf; + + /* Need to consider the wildcard search case, the word frequency + is created on the search string not the actual word. So we need + to assign the frequency on search string behalf. */ + if (query->cur_node->type == FTS_AST_TERM + && query->cur_node->term.wildcard) { + + term.f_len = query->cur_node->term.ptr->len; + ut_ad(FTS_MAX_WORD_LEN >= term.f_len); + memcpy(term.f_str, query->cur_node->term.ptr->str, term.f_len); + } else { + term.f_len = word->f_len; + ut_ad(FTS_MAX_WORD_LEN >= word->f_len); + memcpy(term.f_str, word->f_str, word->f_len); + } + + /* Lookup the word in our rb tree, it must exist. */ + ret = rbt_search(query->word_freqs, &parent, &term); + + ut_a(ret == 0); + + word_freq = rbt_value(fts_word_freq_t, parent.last); + + /* Start from 1 since the first column has been read by the caller. + Also, we rely on the order of the columns projected, to filter + out ilists that are out of range and we always want to read + the doc_count irrespective of the suitablility of the row. */ + + for (i = 1; exp && !skip; exp = que_node_get_next(exp), ++i) { + + dfield_t* dfield = que_node_get_val(exp); + byte* data = static_cast<byte*>( + dfield_get_data(dfield)); + ulint len = dfield_get_len(dfield); + + ut_a(len != UNIV_SQL_NULL); + + /* Note: The column numbers below must match the SELECT. */ + + switch (i) { + case 1: /* DOC_COUNT */ + word_freq->doc_count += mach_read_from_4(data); + break; + + case 2: /* FIRST_DOC_ID */ + node.first_doc_id = fts_read_doc_id(data); + + /* Skip nodes whose doc ids are out range. */ + if (query->oper == FTS_EXIST + && query->upper_doc_id > 0 + && node.first_doc_id > query->upper_doc_id) { + skip = TRUE; + } + break; + + case 3: /* LAST_DOC_ID */ + node.last_doc_id = fts_read_doc_id(data); + + /* Skip nodes whose doc ids are out range. */ + if (query->oper == FTS_EXIST + && query->lower_doc_id > 0 + && node.last_doc_id < query->lower_doc_id) { + skip = TRUE; + } + break; + + case 4: /* ILIST */ + + error = fts_query_filter_doc_ids( + query, &word_freq->word, word_freq, + &node, data, len, FALSE); + + break; + + default: + ut_error; + } + } + + if (!skip) { + /* Make sure all columns were read. */ + + ut_a(i == 5); + } + + return error; +} + +/*****************************************************************//** +Callback function to fetch the rows in an FTS INDEX record. +@return always returns TRUE */ +static +ibool +fts_query_index_fetch_nodes( +/*========================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: pointer to fts_fetch_t */ +{ + fts_string_t key; + sel_node_t* sel_node = static_cast<sel_node_t*>(row); + fts_fetch_t* fetch = static_cast<fts_fetch_t*>(user_arg); + fts_query_t* query = static_cast<fts_query_t*>(fetch->read_arg); + que_node_t* exp = sel_node->select_list; + dfield_t* dfield = que_node_get_val(exp); + void* data = dfield_get_data(dfield); + ulint dfield_len = dfield_get_len(dfield); + + key.f_str = static_cast<byte*>(data); + key.f_len = dfield_len; + + ut_a(dfield_len <= FTS_MAX_WORD_LEN); + + /* Note: we pass error out by 'query->error' */ + query->error = fts_query_read_node(query, &key, que_node_get_next(exp)); + + if (query->error != DB_SUCCESS) { + ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT); + return(FALSE); + } else { + return(TRUE); + } +} + +/*****************************************************************//** +Calculate the inverse document frequency (IDF) for all the terms. */ +static +void +fts_query_calculate_idf( +/*====================*/ + fts_query_t* query) /*!< in: Query state */ +{ + const ib_rbt_node_t* node; + ib_uint64_t total_docs = query->total_docs; + + /* We need to free any instances of fts_doc_freq_t that we + may have allocated. */ + for (node = rbt_first(query->word_freqs); + node; + node = rbt_next(query->word_freqs, node)) { + + fts_word_freq_t* word_freq; + + word_freq = rbt_value(fts_word_freq_t, node); + + if (word_freq->doc_count > 0) { + if (total_docs == word_freq->doc_count) { + /* QP assume ranking > 0 if we find + a match. Since Log10(1) = 0, we cannot + make IDF a zero value if do find a + word in all documents. So let's make + it an arbitrary very small number */ + word_freq->idf = log10(1.0001); + } else { + word_freq->idf = log10( + static_cast<double>(total_docs) + / static_cast<double>( + word_freq->doc_count)); + } + } + } +} + +/*****************************************************************//** +Calculate the ranking of the document. */ +static +void +fts_query_calculate_ranking( +/*========================*/ + const fts_query_t* query, /*!< in: query state */ + fts_ranking_t* ranking) /*!< in: Document to rank */ +{ + ulint pos = 0; + fts_string_t word; + + /* At this stage, ranking->rank should not exceed the 1.0 + bound */ + ut_ad(ranking->rank <= 1.0 && ranking->rank >= -1.0); + ut_ad(rbt_size(query->word_map) == query->word_vector->size()); + + while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { + int ret; + ib_rbt_bound_t parent; + double weight; + fts_doc_freq_t* doc_freq; + fts_word_freq_t* word_freq; + + ret = rbt_search(query->word_freqs, &parent, &word); + + /* It must exist. */ + ut_a(ret == 0); + + word_freq = rbt_value(fts_word_freq_t, parent.last); + + ret = rbt_search( + word_freq->doc_freqs, &parent, &ranking->doc_id); + + /* It must exist. */ + ut_a(ret == 0); + + doc_freq = rbt_value(fts_doc_freq_t, parent.last); + + weight = (double) doc_freq->freq * word_freq->idf; + + ranking->rank += (fts_rank_t) (weight * word_freq->idf); + } +} + +/*****************************************************************//** +Add ranking to the result set. */ +static +void +fts_query_add_ranking( +/*==================*/ + fts_query_t* query, /*!< in: query state */ + ib_rbt_t* ranking_tree, /*!< in: ranking tree */ + const fts_ranking_t* new_ranking) /*!< in: ranking of a document */ +{ + ib_rbt_bound_t parent; + + /* Lookup the ranking in our rb tree and add if it doesn't exist. */ + if (rbt_search(ranking_tree, &parent, new_ranking) == 0) { + fts_ranking_t* ranking; + + ranking = rbt_value(fts_ranking_t, parent.last); + + ranking->rank += new_ranking->rank; + + ut_a(ranking->words == NULL); + } else { + rbt_add_node(ranking_tree, &parent, new_ranking); + + query->total_size += SIZEOF_RBT_NODE_ADD + + sizeof(fts_ranking_t); + } +} + +/*****************************************************************//** +Retrieve the FTS Relevance Ranking result for doc with doc_id +@return the relevance ranking value, 0 if no ranking value +present. */ +float +fts_retrieve_ranking( +/*=================*/ + fts_result_t* result, /*!< in: FTS result structure */ + doc_id_t doc_id) /*!< in: doc_id of the item to retrieve */ +{ + ib_rbt_bound_t parent; + fts_ranking_t new_ranking; + + DBUG_ENTER("fts_retrieve_ranking"); + + if (!result || !result->rankings_by_id) { + DBUG_RETURN(0); + } + + new_ranking.doc_id = doc_id; + + /* Lookup the ranking in our rb tree */ + if (rbt_search(result->rankings_by_id, &parent, &new_ranking) == 0) { + fts_ranking_t* ranking; + + ranking = rbt_value(fts_ranking_t, parent.last); + + DBUG_RETURN(ranking->rank); + } + + DBUG_RETURN(0); +} + +/*****************************************************************//** +Create the result and copy the data to it. */ +static +fts_result_t* +fts_query_prepare_result( +/*=====================*/ + fts_query_t* query, /*!< in: Query state */ + fts_result_t* result) /*!< in: result this can contain + data from a previous search on + another FTS index */ +{ + const ib_rbt_node_t* node; + bool result_is_null = false; + + DBUG_ENTER("fts_query_prepare_result"); + + if (result == NULL) { + result = static_cast<fts_result_t*>( + ut_zalloc_nokey(sizeof(*result))); + + result->rankings_by_id = rbt_create( + sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); + + query->total_size += sizeof(fts_result_t) + SIZEOF_RBT_CREATE; + result_is_null = true; + } + + if (query->flags == FTS_OPT_RANKING) { + fts_word_freq_t* word_freq; + ulint size = ib_vector_size(query->deleted->doc_ids); + doc_id_t* updates = + (doc_id_t*) query->deleted->doc_ids->data; + + node = rbt_first(query->word_freqs); + ut_ad(node); + word_freq = rbt_value(fts_word_freq_t, node); + + for (node = rbt_first(word_freq->doc_freqs); + node; + node = rbt_next(word_freq->doc_freqs, node)) { + fts_doc_freq_t* doc_freq; + fts_ranking_t ranking; + + doc_freq = rbt_value(fts_doc_freq_t, node); + + /* Don't put deleted docs into result */ + if (fts_bsearch(updates, 0, static_cast<int>(size), + doc_freq->doc_id) >= 0) { + /* one less matching doc count */ + --word_freq->doc_count; + continue; + } + + ranking.doc_id = doc_freq->doc_id; + ranking.rank = static_cast<fts_rank_t>(doc_freq->freq); + ranking.words = NULL; + + fts_query_add_ranking(query, result->rankings_by_id, + &ranking); + + if (query->total_size > fts_result_cache_limit) { + query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT; + fts_query_free_result(result); + DBUG_RETURN(NULL); + } + } + + /* Calculate IDF only after we exclude the deleted items */ + fts_query_calculate_idf(query); + + node = rbt_first(query->word_freqs); + word_freq = rbt_value(fts_word_freq_t, node); + + /* Calculate the ranking for each doc */ + for (node = rbt_first(result->rankings_by_id); + node != NULL; + node = rbt_next(result->rankings_by_id, node)) { + + fts_ranking_t* ranking; + + ranking = rbt_value(fts_ranking_t, node); + + ranking->rank = static_cast<fts_rank_t>( + ranking->rank * word_freq->idf * word_freq->idf); + } + + DBUG_RETURN(result); + } + + ut_a(rbt_size(query->doc_ids) > 0); + + for (node = rbt_first(query->doc_ids); + node; + node = rbt_next(query->doc_ids, node)) { + + fts_ranking_t* ranking; + + ranking = rbt_value(fts_ranking_t, node); + fts_query_calculate_ranking(query, ranking); + + // FIXME: I think we may requre this information to improve the + // ranking of doc ids which have more word matches from + // different FTS indexes. + + /* We don't need these anymore free the resources. */ + ranking->words = NULL; + + if (!result_is_null) { + fts_query_add_ranking(query, result->rankings_by_id, ranking); + + if (query->total_size > fts_result_cache_limit) { + query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT; + fts_query_free_result(result); + DBUG_RETURN(NULL); + } + } + } + + if (result_is_null) { + /* Use doc_ids directly */ + rbt_free(result->rankings_by_id); + result->rankings_by_id = query->doc_ids; + query->doc_ids = NULL; + } + + DBUG_RETURN(result); +} + +/*****************************************************************//** +Get the result of the query. Calculate the similarity coefficient. */ +static +fts_result_t* +fts_query_get_result( +/*=================*/ + fts_query_t* query, /*!< in: query instance */ + fts_result_t* result) /*!< in: result */ +{ + DBUG_ENTER("fts_query_get_result"); + + if (rbt_size(query->doc_ids) > 0 || query->flags == FTS_OPT_RANKING) { + /* Copy the doc ids to the result. */ + result = fts_query_prepare_result(query, result); + } else { + /* Create an empty result instance. */ + result = static_cast<fts_result_t*>( + ut_zalloc_nokey(sizeof(*result))); + } + + DBUG_RETURN(result); +} + +/*****************************************************************//** +FTS Query free resources and reset. */ +static +void +fts_query_free( +/*===========*/ + fts_query_t* query) /*!< in: query instance to free*/ +{ + + if (query->read_nodes_graph) { + que_graph_free(query->read_nodes_graph); + } + + if (query->root) { + fts_ast_free_node(query->root); + } + + if (query->deleted) { + fts_doc_ids_free(query->deleted); + } + + if (query->intersection) { + fts_query_free_doc_ids(query, query->intersection); + } + + if (query->doc_ids) { + fts_query_free_doc_ids(query, query->doc_ids); + } + + if (query->word_freqs) { + const ib_rbt_node_t* node; + + /* We need to free any instances of fts_doc_freq_t that we + may have allocated. */ + for (node = rbt_first(query->word_freqs); + node; + node = rbt_next(query->word_freqs, node)) { + + fts_word_freq_t* word_freq; + + word_freq = rbt_value(fts_word_freq_t, node); + + /* We need to cast away the const. */ + rbt_free(word_freq->doc_freqs); + } + + rbt_free(query->word_freqs); + } + + if (query->wildcard_words != NULL) { + rbt_free(query->wildcard_words); + } + + ut_a(!query->intersection); + + if (query->word_map) { + rbt_free(query->word_map); + } + + if (query->word_vector != NULL) { + UT_DELETE(query->word_vector); + } + + if (query->heap) { + mem_heap_free(query->heap); + } + + memset(query, 0, sizeof(*query)); +} + +/*****************************************************************//** +Parse the query using flex/bison or plugin parser. +@return parse tree node. */ +static +fts_ast_node_t* +fts_query_parse( +/*============*/ + fts_query_t* query, /*!< in: query instance */ + byte* query_str, /*!< in: query string */ + ulint query_len) /*!< in: query string length */ +{ + int error; + fts_ast_state_t state; + bool mode = query->boolean_mode; + DBUG_ENTER("fts_query_parse"); + + memset(&state, 0x0, sizeof(state)); + + state.charset = query->fts_index_table.charset; + + DBUG_EXECUTE_IF("fts_instrument_query_disable_parser", + query->parser = NULL;); + + if (query->parser) { + state.root = state.cur_node = + fts_ast_create_node_list(&state, NULL); + error = fts_parse_by_parser(mode, query_str, query_len, + query->parser, &state); + } else { + /* Setup the scanner to use, this depends on the mode flag. */ + state.lexer = fts_lexer_create(mode, query_str, query_len); + state.charset = query->fts_index_table.charset; + error = fts_parse(&state); + fts_lexer_free(state.lexer); + state.lexer = NULL; + } + + /* Error during parsing ? */ + if (error) { + /* Free the nodes that were allocated during parsing. */ + fts_ast_state_free(&state); + } else { + query->root = state.root; + + if (UNIV_UNLIKELY(fts_enable_diag_print) && query->root) { + fts_ast_node_print(query->root); + } + } + + DBUG_RETURN(state.root); +} + +/*******************************************************************//** +FTS Query optimization +Set FTS_OPT_RANKING if it is a simple term query */ +static +void +fts_query_can_optimize( +/*===================*/ + fts_query_t* query, /*!< in/out: query instance */ + uint flags) /*!< In: FTS search mode */ +{ + fts_ast_node_t* node = query->root; + + if (flags & FTS_EXPAND) { + return; + } + + /* Check if it has only a term without oper */ + ut_ad(node->type == FTS_AST_LIST); + node = node->list.head; + if (node != NULL && node->type == FTS_AST_TERM && node->next == NULL) { + query->flags = FTS_OPT_RANKING; + } +} + +/** FTS Query entry point. +@param[in,out] trx transaction +@param[in] index fts index to search +@param[in] flags FTS search mode +@param[in] query_str FTS query +@param[in] query_len FTS query string len in bytes +@param[in,out] result result doc ids +@return DB_SUCCESS if successful otherwise error code */ +dberr_t +fts_query( + trx_t* trx, + dict_index_t* index, + uint flags, + const byte* query_str, + ulint query_len, + fts_result_t** result) +{ + fts_query_t query; + dberr_t error = DB_SUCCESS; + byte* lc_query_str; + ulint lc_query_str_len; + ulint result_len; + bool boolean_mode; + trx_t* query_trx; /* FIXME: use provided trx */ + CHARSET_INFO* charset; + ulint start_time_ms; + bool will_be_ignored = false; + + boolean_mode = flags & FTS_BOOL; + + *result = NULL; + memset(&query, 0x0, sizeof(query)); + query_trx = trx_create(); + query_trx->op_info = "FTS query"; + + start_time_ms = ut_time_ms(); + + query.trx = query_trx; + query.index = index; + query.boolean_mode = boolean_mode; + query.deleted = fts_doc_ids_create(); + query.cur_node = NULL; + + query.fts_common_table.type = FTS_COMMON_TABLE; + query.fts_common_table.table_id = index->table->id; + query.fts_common_table.table = index->table; + + charset = fts_index_get_charset(index); + + query.fts_index_table.type = FTS_INDEX_TABLE; + query.fts_index_table.index_id = index->id; + query.fts_index_table.table_id = index->table->id; + query.fts_index_table.charset = charset; + query.fts_index_table.table = index->table; + + query.word_map = rbt_create_arg_cmp( + sizeof(fts_string_t), innobase_fts_text_cmp, (void*)charset); + query.word_vector = UT_NEW_NOKEY(word_vector_t()); + query.error = DB_SUCCESS; + + /* Setup the RB tree that will be used to collect per term + statistics. */ + query.word_freqs = rbt_create_arg_cmp( + sizeof(fts_word_freq_t), innobase_fts_text_cmp, + (void*) charset); + + if (flags & FTS_EXPAND) { + query.wildcard_words = rbt_create_arg_cmp( + sizeof(fts_string_t), innobase_fts_text_cmp, (void *)charset); + } + + query.total_size += SIZEOF_RBT_CREATE; + + query.total_docs = dict_table_get_n_rows(index->table); + + query.fts_common_table.suffix = "DELETED"; + + /* Read the deleted doc_ids, we need these for filtering. */ + error = fts_table_fetch_doc_ids( + NULL, &query.fts_common_table, query.deleted); + + if (error != DB_SUCCESS) { + goto func_exit; + } + + query.fts_common_table.suffix = "DELETED_CACHE"; + + error = fts_table_fetch_doc_ids( + NULL, &query.fts_common_table, query.deleted); + + if (error != DB_SUCCESS) { + goto func_exit; + } + + /* Get the deleted doc ids that are in the cache. */ + fts_cache_append_deleted_doc_ids( + index->table->fts->cache, query.deleted->doc_ids); + DEBUG_SYNC_C("fts_deleted_doc_ids_append"); + + /* Sort the vector so that we can do a binary search over the ids. */ + ib_vector_sort(query.deleted->doc_ids, fts_doc_id_cmp); + + /* Convert the query string to lower case before parsing. We own + the ut_malloc'ed result and so remember to free it before return. */ + + lc_query_str_len = query_len * charset->casedn_multiply() + 1; + lc_query_str = static_cast<byte*>(ut_malloc_nokey(lc_query_str_len)); + + /* For binary collations, a case sensitive search is + performed. Hence don't convert to lower case. */ + if (my_binary_compare(charset)) { + memcpy(lc_query_str, query_str, query_len); + lc_query_str[query_len]= 0; + result_len= query_len; + } else { + result_len = innobase_fts_casedn_str( + charset, (char*)( query_str), query_len, + (char*)(lc_query_str), lc_query_str_len); + } + + ut_ad(result_len < lc_query_str_len); + + lc_query_str[result_len] = 0; + + query.heap = mem_heap_create(128); + + /* Create the rb tree for the doc id (current) set. */ + query.doc_ids = rbt_create( + sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); + query.parser = index->parser; + + query.total_size += SIZEOF_RBT_CREATE; + + /* Parse the input query string. */ + if (fts_query_parse(&query, lc_query_str, result_len)) { + fts_ast_node_t* ast = query.root; + ast->trx = trx; + + /* Optimize query to check if it's a single term */ + fts_query_can_optimize(&query, flags); + + DBUG_EXECUTE_IF("fts_instrument_result_cache_limit", + fts_result_cache_limit = 2048; + ); + + /* Traverse the Abstract Syntax Tree (AST) and execute + the query. */ + query.error = fts_ast_visit( + FTS_NONE, ast, fts_query_visitor, + &query, &will_be_ignored); + if (query.error == DB_INTERRUPTED) { + error = DB_INTERRUPTED; + ut_free(lc_query_str); + goto func_exit; + } + + /* If query expansion is requested, extend the search + with first search pass result */ + if (query.error == DB_SUCCESS && (flags & FTS_EXPAND)) { + query.error = fts_expand_query(index, &query); + } + + /* Calculate the inverse document frequency of the terms. */ + if (query.error == DB_SUCCESS + && query.flags != FTS_OPT_RANKING) { + fts_query_calculate_idf(&query); + } + + /* Copy the result from the query state, so that we can + return it to the caller. */ + if (query.error == DB_SUCCESS) { + *result = fts_query_get_result(&query, *result); + } + + error = query.error; + } else { + /* still return an empty result set */ + *result = static_cast<fts_result_t*>( + ut_zalloc_nokey(sizeof(**result))); + } + + if (trx_is_interrupted(trx)) { + error = DB_INTERRUPTED; + ut_free(lc_query_str); + if (*result) { + fts_query_free_result(*result); + } + goto func_exit; + } + + ut_free(lc_query_str); + + if (UNIV_UNLIKELY(fts_enable_diag_print) && (*result)) { + ulint diff_time = ut_time_ms() - start_time_ms; + + ib::info() << "FTS Search Processing time: " + << diff_time / 1000 << " secs: " << diff_time % 1000 + << " millisec: row(s) " + << ((*result)->rankings_by_id + ? lint(rbt_size((*result)->rankings_by_id)) + : -1); + + /* Log memory consumption & result size */ + ib::info() << "Full Search Memory: " << query.total_size + << " (bytes), Row: " + << ((*result)->rankings_by_id + ? rbt_size((*result)->rankings_by_id) + : 0) + << "."; + } + +func_exit: + fts_query_free(&query); + + query_trx->free(); + + return(error); +} + +/*****************************************************************//** +FTS Query free result, returned by fts_query(). */ +void +fts_query_free_result( +/*==================*/ + fts_result_t* result) /*!< in: result instance to free.*/ +{ + if (result) { + if (result->rankings_by_id != NULL) { + rbt_free(result->rankings_by_id); + result->rankings_by_id = NULL; + } + if (result->rankings_by_rank != NULL) { + rbt_free(result->rankings_by_rank); + result->rankings_by_rank = NULL; + } + + ut_free(result); + result = NULL; + } +} + +/*****************************************************************//** +FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */ +void +fts_query_sort_result_on_rank( +/*==========================*/ + fts_result_t* result) /*!< out: result instance to sort.*/ +{ + const ib_rbt_node_t* node; + ib_rbt_t* ranked; + + ut_a(result->rankings_by_id != NULL); + if (result->rankings_by_rank) { + rbt_free(result->rankings_by_rank); + } + + ranked = rbt_create(sizeof(fts_ranking_t), fts_query_compare_rank); + + /* We need to free any instances of fts_doc_freq_t that we + may have allocated. */ + for (node = rbt_first(result->rankings_by_id); + node; + node = rbt_next(result->rankings_by_id, node)) { + + fts_ranking_t* ranking; + + ranking = rbt_value(fts_ranking_t, node); + + ut_a(ranking->words == NULL); + + rbt_insert(ranked, ranking, ranking); + } + + /* Reset the current node too. */ + result->current = NULL; + result->rankings_by_rank = ranked; +} + +/*******************************************************************//** +A debug function to print result doc_id set. */ +static +void +fts_print_doc_id( +/*=============*/ + fts_query_t* query) /*!< in : tree that stores doc_ids.*/ +{ + const ib_rbt_node_t* node; + + /* Iterate each member of the doc_id set */ + for (node = rbt_first(query->doc_ids); + node; + node = rbt_next(query->doc_ids, node)) { + fts_ranking_t* ranking; + ranking = rbt_value(fts_ranking_t, node); + + ib::info() << "doc_ids info, doc_id: " << ranking->doc_id; + + ulint pos = 0; + fts_string_t word; + + while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { + ib::info() << "doc_ids info, value: " << word.f_str; + } + } +} + +/*************************************************************//** +This function implements a simple "blind" query expansion search: +words in documents found in the first search pass will be used as +search arguments to search the document again, thus "expand" +the search result set. +@return DB_SUCCESS if success, otherwise the error code */ +static MY_ATTRIBUTE((nonnull, warn_unused_result)) +dberr_t +fts_expand_query( +/*=============*/ + dict_index_t* index, /*!< in: FTS index to search */ + fts_query_t* query) /*!< in: FTS query instance */ +{ + const ib_rbt_node_t* node; + const ib_rbt_node_t* token_node; + fts_doc_t result_doc; + dberr_t error = DB_SUCCESS; + const fts_index_cache_t*index_cache; + + /* If no doc is found in first search pass, return */ + if (!rbt_size(query->doc_ids)) { + return(error); + } + + /* Init "result_doc", to hold words from the first search pass */ + fts_doc_init(&result_doc); + + mysql_mutex_lock(&index->table->fts->cache->lock); + index_cache = fts_find_index_cache(index->table->fts->cache, index); + mysql_mutex_unlock(&index->table->fts->cache->lock); + + ut_a(index_cache); + + result_doc.tokens = rbt_create_arg_cmp( + sizeof(fts_token_t), innobase_fts_text_cmp, + (void*) index_cache->charset); + + result_doc.charset = index_cache->charset; + result_doc.parser = index_cache->index->parser; + + query->total_size += SIZEOF_RBT_CREATE; + + if (UNIV_UNLIKELY(fts_enable_diag_print)) { + fts_print_doc_id(query); + } + + for (node = rbt_first(query->doc_ids); + node; + node = rbt_next(query->doc_ids, node)) { + + fts_ranking_t* ranking; + ulint prev_token_size; + ulint estimate_size; + + prev_token_size = rbt_size(result_doc.tokens); + + ranking = rbt_value(fts_ranking_t, node); + + /* Fetch the documents with the doc_id from the + result of first seach pass. Since we do not + store document-to-word mapping, we need to + fetch the original document and parse them. + Future optimization could be done here if we + support some forms of document-to-word mapping */ + fts_doc_fetch_by_doc_id(NULL, ranking->doc_id, index, + FTS_FETCH_DOC_BY_ID_EQUAL, + fts_query_expansion_fetch_doc, + &result_doc); + + /* Estimate memory used, see fts_process_token and fts_token_t. + We ignore token size here. */ + estimate_size = (rbt_size(result_doc.tokens) - prev_token_size) + * (SIZEOF_RBT_NODE_ADD + sizeof(fts_token_t) + + sizeof(ib_vector_t) + sizeof(ulint) * 32); + query->total_size += estimate_size; + + if (query->total_size > fts_result_cache_limit) { + error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT; + goto func_exit; + } + } + + /* Remove words that have already been searched in the first pass */ + for (ulint i = 0; i < query->word_vector->size(); i++) { + fts_string_t word = query->word_vector->at(i); + ib_rbt_bound_t parent; + + if (query->wildcard_words + && rbt_search(query->wildcard_words, &parent, &word) == 0) { + /* If it's a wildcard word, remove words having + it as prefix. */ + while (rbt_search_cmp(result_doc.tokens, + &parent, &word, NULL, + innobase_fts_text_cmp_prefix) + == 0) { + ut_free(rbt_remove_node(result_doc.tokens, + parent.last)); + } + } else { + /* We don't check return value, because the word may + have been deleted by a previous wildcard word as its + prefix, e.g. ('g * good'). */ + rbt_delete(result_doc.tokens, &word); + } + } + + /* Search the table the second time with expanded search list */ + for (token_node = rbt_first(result_doc.tokens); + token_node; + token_node = rbt_next(result_doc.tokens, token_node)) { + fts_token_t* mytoken; + mytoken = rbt_value(fts_token_t, token_node); + + /* '%' in the end is treated as prefix search, + it can cause assert failure, so we skip it. */ + if (mytoken->text.f_str[mytoken->text.f_len - 1] == '%') { + continue; + } + + ut_ad(mytoken->text.f_str[mytoken->text.f_len] == 0); + fts_query_add_word_freq(query, &mytoken->text); + error = fts_query_union(query, &mytoken->text); + + if (error != DB_SUCCESS) { + break; + } + } + +func_exit: + fts_doc_free(&result_doc); + + return(error); +} +/*************************************************************//** +This function finds documents that contain all words in a +phrase or proximity search. And if proximity search, verify +the words are close enough to each other, as in specified distance. +This function is called for phrase and proximity search. +@return TRUE if documents are found, FALSE if otherwise */ +static +ibool +fts_phrase_or_proximity_search( +/*===========================*/ + fts_query_t* query, /*!< in/out: query instance. + query->doc_ids might be instantiated + with qualified doc IDs */ + ib_vector_t* tokens) /*!< in: Tokens contain words */ +{ + ulint n_matched; + ulint i; + ibool matched = FALSE; + ulint num_token = ib_vector_size(tokens); + fts_match_t* match[MAX_PROXIMITY_ITEM]; + ibool end_list = FALSE; + + /* Number of matched documents for the first token */ + n_matched = ib_vector_size(query->match_array[0]); + + /* We have a set of match list for each word, we shall + walk through the list and find common documents that + contain all the matching words. */ + for (i = 0; i < n_matched; i++) { + ulint j; + ulint k = 0; + fts_proximity_t qualified_pos; + + match[0] = static_cast<fts_match_t*>( + ib_vector_get(query->match_array[0], i)); + + /* For remaining match list for the token(word), we + try to see if there is a document with the same + doc id */ + for (j = 1; j < num_token; j++) { + match[j] = static_cast<fts_match_t*>( + ib_vector_get(query->match_array[j], k)); + + while (match[j]->doc_id < match[0]->doc_id + && k < ib_vector_size(query->match_array[j])) { + match[j] = static_cast<fts_match_t*>( + ib_vector_get( + query->match_array[j], k)); + k++; + } + + if (match[j]->doc_id > match[0]->doc_id) { + /* no match */ + if (query->flags & FTS_PHRASE) { + match[0]->doc_id = 0; + } + break; + } + + if (k == ib_vector_size(query->match_array[j])) { + end_list = TRUE; + + if (query->flags & FTS_PHRASE) { + ulint s; + /* Since i is the last doc id in the + match_array[j], remove all doc ids > i + from the match_array[0]. */ + fts_match_t* match_temp; + for (s = i + 1; s < n_matched; s++) { + match_temp = static_cast< + fts_match_t*>(ib_vector_get( + query->match_array[0], s)); + match_temp->doc_id = 0; + } + + if (match[j]->doc_id != + match[0]->doc_id) { + /* no match */ + match[0]->doc_id = 0; + } + } + + if (match[j]->doc_id != match[0]->doc_id) { + goto func_exit; + } + } + + /* FIXME: A better solution will be a counter array + remember each run's last position. So we don't + reset it here very time */ + k = 0; + } + + if (j != num_token) { + continue; + } + + /* For this matching doc, we need to further + verify whether the words in the doc are close + to each other, and within the distance specified + in the proximity search */ + if (query->flags & FTS_PHRASE) { + matched = TRUE; + } else if (fts_proximity_get_positions( + match, num_token, ULINT_MAX, &qualified_pos)) { + + /* Fetch the original documents and count the + words in between matching words to see that is in + specified distance */ + if (fts_query_is_in_proximity_range( + query, match, &qualified_pos)) { + /* If so, mark we find a matching doc */ + query->error = fts_query_process_doc_id( + query, match[0]->doc_id, 0); + if (query->error != DB_SUCCESS) { + matched = FALSE; + goto func_exit; + } + + matched = TRUE; + for (ulint z = 0; z < num_token; z++) { + fts_string_t* token; + token = static_cast<fts_string_t*>( + ib_vector_get(tokens, z)); + fts_query_add_word_to_document( + query, match[0]->doc_id, token); + } + } + } + + if (end_list) { + break; + } + } + +func_exit: + return(matched); +} + +/*************************************************************//** +This function checks whether words in result documents are close to +each other (within proximity range as specified by "distance"). +If "distance" is MAX_ULINT, then it will find all combinations of +positions of matching words and store min and max positions +in the "qualified_pos" for later verification. +@return true if words are close to each other, false if otherwise */ +static +bool +fts_proximity_get_positions( +/*========================*/ + fts_match_t** match, /*!< in: query instance */ + ulint num_match, /*!< in: number of matching + items */ + ulint distance, /*!< in: distance value + for proximity search */ + fts_proximity_t* qualified_pos) /*!< out: the position info + records ranges containing + all matching words. */ +{ + ulint i; + ulint idx[MAX_PROXIMITY_ITEM]; + ulint num_pos[MAX_PROXIMITY_ITEM]; + ulint min_idx; + + qualified_pos->n_pos = 0; + + ut_a(num_match <= MAX_PROXIMITY_ITEM); + + /* Each word could appear multiple times in a doc. So + we need to walk through each word's position list, and find + closest distance between different words to see if + they are in the proximity distance. */ + + /* Assume each word's position list is sorted, we + will just do a walk through to all words' lists + similar to a the merge phase of a merge sort */ + for (i = 0; i < num_match; i++) { + /* idx is the current position we are checking + for a particular word */ + idx[i] = 0; + + /* Number of positions for this word */ + num_pos[i] = ib_vector_size(match[i]->positions); + } + + /* Start with the first word */ + min_idx = 0; + + while (idx[min_idx] < num_pos[min_idx]) { + ulint position[MAX_PROXIMITY_ITEM]; + ulint min_pos = ULINT_MAX; + ulint max_pos = 0; + + /* Check positions in each word position list, and + record the max/min position */ + for (i = 0; i < num_match; i++) { + position[i] = *(ulint*) ib_vector_get_const( + match[i]->positions, idx[i]); + + if (position[i] == ULINT_UNDEFINED) { + break; + } + + if (position[i] < min_pos) { + min_pos = position[i]; + min_idx = i; + } + + if (position[i] > max_pos) { + max_pos = position[i]; + } + } + + /* If max and min position are within range, we + find a good match */ + if (max_pos - min_pos <= distance + && (i >= num_match || position[i] != ULINT_UNDEFINED)) { + /* The charset has variable character + length encoding, record the min_pos and + max_pos, we will need to verify the actual + number of characters */ + qualified_pos->min_pos.push_back(min_pos); + qualified_pos->max_pos.push_back(max_pos); + qualified_pos->n_pos++; + } + + /* Otherwise, move to the next position is the + list for the word with the smallest position */ + idx[min_idx]++; + } + + return(qualified_pos->n_pos != 0); +} diff --git a/storage/innobase/fts/fts0sql.cc b/storage/innobase/fts/fts0sql.cc new file mode 100644 index 00000000..1970f6f5 --- /dev/null +++ b/storage/innobase/fts/fts0sql.cc @@ -0,0 +1,208 @@ +/***************************************************************************** + +Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2019, 2021, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file fts/fts0sql.cc +Full Text Search functionality. + +Created 2007-03-27 Sunny Bains +*******************************************************/ + +#include "que0que.h" +#include "trx0roll.h" +#include "pars0pars.h" +#include "dict0dict.h" +#include "fts0types.h" +#include "fts0priv.h" + +/** SQL statements for creating the ancillary FTS tables. */ + +/** Preamble to all SQL statements. */ +static const char* fts_sql_begin= + "PROCEDURE P() IS\n"; + +/** Postamble to non-committing SQL statements. */ +static const char* fts_sql_end= + "\n" + "END;\n"; + +/******************************************************************//** +Get the table id. +@return number of bytes written */ +int +fts_get_table_id( +/*=============*/ + const fts_table_t* + fts_table, /*!< in: FTS Auxiliary table */ + char* table_id) /*!< out: table id, must be at least + FTS_AUX_MIN_TABLE_ID_LENGTH bytes + long */ +{ + int len; + + ut_a(fts_table->table != NULL); + + switch (fts_table->type) { + case FTS_COMMON_TABLE: + len = fts_write_object_id(fts_table->table_id, table_id); + break; + + case FTS_INDEX_TABLE: + + len = fts_write_object_id(fts_table->table_id, table_id); + + table_id[len] = '_'; + ++len; + table_id += len; + + len += fts_write_object_id(fts_table->index_id, table_id); + break; + + default: + ut_error; + } + + ut_a(len >= 16); + ut_a(len < FTS_AUX_MIN_TABLE_ID_LENGTH); + + return(len); +} + +/** Construct the name of an internal FTS table for the given table. +@param[in] fts_table metadata on fulltext-indexed table +@param[out] table_name a name up to MAX_FULL_NAME_LEN +@param[in] dict_locked whether dict_sys.latch is being held */ +void fts_get_table_name(const fts_table_t* fts_table, char* table_name, + bool dict_locked) +{ + if (!dict_locked) { + dict_sys.freeze(SRW_LOCK_CALL); + } + ut_ad(dict_sys.frozen()); + /* Include the separator as well. */ + const size_t dbname_len = fts_table->table->name.dblen() + 1; + ut_ad(dbname_len > 1); + memcpy(table_name, fts_table->table->name.m_name, dbname_len); + if (!dict_locked) { + dict_sys.unfreeze(); + } + memcpy(table_name += dbname_len, "FTS_", 4); + table_name += 4; + table_name += fts_get_table_id(fts_table, table_name); + *table_name++ = '_'; + strcpy(table_name, fts_table->suffix); +} + +/******************************************************************//** +Parse an SQL string. +@return query graph */ +que_t* +fts_parse_sql( +/*==========*/ + fts_table_t* fts_table, /*!< in: FTS auxiliarry table info */ + pars_info_t* info, /*!< in: info struct, or NULL */ + const char* sql) /*!< in: SQL string to evaluate */ +{ + char* str; + que_t* graph; + ibool dict_locked; + + str = ut_str3cat(fts_sql_begin, sql, fts_sql_end); + + dict_locked = (fts_table && fts_table->table->fts + && fts_table->table->fts->dict_locked); + + if (!dict_locked) { + /* The InnoDB SQL parser is not re-entrant. */ + dict_sys.lock(SRW_LOCK_CALL); + } + + graph = pars_sql(info, str); + ut_a(graph); + + if (!dict_locked) { + dict_sys.unlock(); + } + + ut_free(str); + + return(graph); +} + +/******************************************************************//** +Evaluate an SQL query graph. +@return DB_SUCCESS or error code */ +dberr_t +fts_eval_sql( +/*=========*/ + trx_t* trx, /*!< in: transaction */ + que_t* graph) /*!< in: Query graph to evaluate */ +{ + que_thr_t* thr; + + graph->trx = trx; + + ut_a(thr = que_fork_start_command(graph)); + + que_run_threads(thr); + + return(trx->error_state); +} + +/******************************************************************//** +Construct the column specification part of the SQL string for selecting the +indexed FTS columns for the given table. Adds the necessary bound +ids to the given 'info' and returns the SQL string. Examples: + +One indexed column named "text": + + "$sel0", + info/ids: sel0 -> "text" + +Two indexed columns named "subject" and "content": + + "$sel0, $sel1", + info/ids: sel0 -> "subject", sel1 -> "content", +@return heap-allocated WHERE string */ +const char* +fts_get_select_columns_str( +/*=======================*/ + dict_index_t* index, /*!< in: index */ + pars_info_t* info, /*!< in/out: parser info */ + mem_heap_t* heap) /*!< in: memory heap */ +{ + ulint i; + const char* str = ""; + + for (i = 0; i < index->n_user_defined_cols; i++) { + char* sel_str; + + dict_field_t* field = dict_index_get_nth_field(index, i); + + sel_str = mem_heap_printf(heap, "sel%lu", (ulong) i); + + /* Set copy_name to TRUE since it's dynamic. */ + pars_info_bind_id(info, sel_str, field->name); + + str = mem_heap_printf( + heap, "%s%s$%s", str, (*str) ? ", " : "", sel_str); + } + + return(str); +} diff --git a/storage/innobase/fts/fts0tlex.cc b/storage/innobase/fts/fts0tlex.cc new file mode 100644 index 00000000..29f73f23 --- /dev/null +++ b/storage/innobase/fts/fts0tlex.cc @@ -0,0 +1,2169 @@ +#include "univ.i" +#line 2 "fts0tlex.cc" + +#line 4 "fts0tlex.cc" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 6 +#define YY_FLEX_SUBMINOR_VERSION 4 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +#ifdef yy_create_buffer +#define fts0t_create_buffer_ALREADY_DEFINED +#else +#define yy_create_buffer fts0t_create_buffer +#endif + +#ifdef yy_delete_buffer +#define fts0t_delete_buffer_ALREADY_DEFINED +#else +#define yy_delete_buffer fts0t_delete_buffer +#endif + +#ifdef yy_scan_buffer +#define fts0t_scan_buffer_ALREADY_DEFINED +#else +#define yy_scan_buffer fts0t_scan_buffer +#endif + +#ifdef yy_scan_string +#define fts0t_scan_string_ALREADY_DEFINED +#else +#define yy_scan_string fts0t_scan_string +#endif + +#ifdef yy_scan_bytes +#define fts0t_scan_bytes_ALREADY_DEFINED +#else +#define yy_scan_bytes fts0t_scan_bytes +#endif + +#ifdef yy_init_buffer +#define fts0t_init_buffer_ALREADY_DEFINED +#else +#define yy_init_buffer fts0t_init_buffer +#endif + +#ifdef yy_flush_buffer +#define fts0t_flush_buffer_ALREADY_DEFINED +#else +#define yy_flush_buffer fts0t_flush_buffer +#endif + +#ifdef yy_load_buffer_state +#define fts0t_load_buffer_state_ALREADY_DEFINED +#else +#define yy_load_buffer_state fts0t_load_buffer_state +#endif + +#ifdef yy_switch_to_buffer +#define fts0t_switch_to_buffer_ALREADY_DEFINED +#else +#define yy_switch_to_buffer fts0t_switch_to_buffer +#endif + +#ifdef yypush_buffer_state +#define fts0tpush_buffer_state_ALREADY_DEFINED +#else +#define yypush_buffer_state fts0tpush_buffer_state +#endif + +#ifdef yypop_buffer_state +#define fts0tpop_buffer_state_ALREADY_DEFINED +#else +#define yypop_buffer_state fts0tpop_buffer_state +#endif + +#ifdef yyensure_buffer_stack +#define fts0tensure_buffer_stack_ALREADY_DEFINED +#else +#define yyensure_buffer_stack fts0tensure_buffer_stack +#endif + +#ifdef yylex +#define fts0tlex_ALREADY_DEFINED +#else +#define yylex fts0tlex +#endif + +#ifdef yyrestart +#define fts0trestart_ALREADY_DEFINED +#else +#define yyrestart fts0trestart +#endif + +#ifdef yylex_init +#define fts0tlex_init_ALREADY_DEFINED +#else +#define yylex_init fts0tlex_init +#endif + +#ifdef yylex_init_extra +#define fts0tlex_init_extra_ALREADY_DEFINED +#else +#define yylex_init_extra fts0tlex_init_extra +#endif + +#ifdef yylex_destroy +#define fts0tlex_destroy_ALREADY_DEFINED +#else +#define yylex_destroy fts0tlex_destroy +#endif + +#ifdef yyget_debug +#define fts0tget_debug_ALREADY_DEFINED +#else +#define yyget_debug fts0tget_debug +#endif + +#ifdef yyset_debug +#define fts0tset_debug_ALREADY_DEFINED +#else +#define yyset_debug fts0tset_debug +#endif + +#ifdef yyget_extra +#define fts0tget_extra_ALREADY_DEFINED +#else +#define yyget_extra fts0tget_extra +#endif + +#ifdef yyset_extra +#define fts0tset_extra_ALREADY_DEFINED +#else +#define yyset_extra fts0tset_extra +#endif + +#ifdef yyget_in +#define fts0tget_in_ALREADY_DEFINED +#else +#define yyget_in fts0tget_in +#endif + +#ifdef yyset_in +#define fts0tset_in_ALREADY_DEFINED +#else +#define yyset_in fts0tset_in +#endif + +#ifdef yyget_out +#define fts0tget_out_ALREADY_DEFINED +#else +#define yyget_out fts0tget_out +#endif + +#ifdef yyset_out +#define fts0tset_out_ALREADY_DEFINED +#else +#define yyset_out fts0tset_out +#endif + +#ifdef yyget_leng +#define fts0tget_leng_ALREADY_DEFINED +#else +#define yyget_leng fts0tget_leng +#endif + +#ifdef yyget_text +#define fts0tget_text_ALREADY_DEFINED +#else +#define yyget_text fts0tget_text +#endif + +#ifdef yyget_lineno +#define fts0tget_lineno_ALREADY_DEFINED +#else +#define yyget_lineno fts0tget_lineno +#endif + +#ifdef yyset_lineno +#define fts0tset_lineno_ALREADY_DEFINED +#else +#define yyset_lineno fts0tset_lineno +#endif + +#ifdef yyget_column +#define fts0tget_column_ALREADY_DEFINED +#else +#define yyget_column fts0tget_column +#endif + +#ifdef yyset_column +#define fts0tset_column_ALREADY_DEFINED +#else +#define yyset_column fts0tset_column +#endif + +#ifdef yywrap +#define fts0twrap_ALREADY_DEFINED +#else +#define yywrap fts0twrap +#endif + +#ifdef yyalloc +#define fts0talloc_ALREADY_DEFINED +#else +#define yyalloc fts0talloc +#endif + +#ifdef yyrealloc +#define fts0trealloc_ALREADY_DEFINED +#else +#define yyrealloc fts0trealloc +#endif + +#ifdef yyfree +#define fts0tfree_ALREADY_DEFINED +#else +#define yyfree fts0tfree +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include <inttypes.h> +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#ifndef SIZE_MAX +#define SIZE_MAX (~(size_t)0) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +/* begin standard C++ headers. */ + +/* TODO: this is always defined, so inline it */ +#define yyconst const + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define yynoreturn __attribute__((__noreturn__)) +#else +#define yynoreturn +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an + * integer in range [0..255] for use as an array index. + */ +#define YY_SC_TO_UI(c) ((YY_CHAR) (c)) + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yyg->yy_start = 1 + 2 * +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yyg->yy_start - 1) / 2) +#define YYSTATE YY_START +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin , yyscanner ) +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + #define YY_LESS_LINENO(n) + #define YY_LINENO_REWIND_TO(ptr) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = yyg->yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) +#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \ + ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \ + : NULL) +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] + +void yyrestart ( FILE *input_file , yyscan_t yyscanner ); +void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size , yyscan_t yyscanner ); +void yy_delete_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner ); +void yy_flush_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner ); +void yypush_buffer_state ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner ); +void yypop_buffer_state ( yyscan_t yyscanner ); + +static void yyensure_buffer_stack ( yyscan_t yyscanner ); +static void yy_load_buffer_state ( yyscan_t yyscanner ); +static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file , yyscan_t yyscanner ); +#define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER , yyscanner) + +YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_string ( const char *yy_str , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len , yyscan_t yyscanner ); + +void *yyalloc ( yy_size_t , yyscan_t yyscanner ); +void *yyrealloc ( void *, yy_size_t , yyscan_t yyscanner ); +void yyfree ( void * , yyscan_t yyscanner ); + +#define yy_new_buffer yy_create_buffer +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ + +#define fts0twrap(yyscanner) (/*CONSTCOND*/1) +#define YY_SKIP_YYWRAP +typedef flex_uint8_t YY_CHAR; + +typedef int yy_state_type; + +#define yytext_ptr yytext_r + +static yy_state_type yy_get_previous_state ( yyscan_t yyscanner ); +static yy_state_type yy_try_NUL_trans ( yy_state_type current_state , yyscan_t yyscanner); +static int yy_get_next_buffer ( yyscan_t yyscanner ); +static void yynoreturn yy_fatal_error ( const char* msg , yyscan_t yyscanner ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yyg->yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yyg->yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yyg->yy_c_buf_p = yy_cp; +#define YY_NUM_RULES 7 +#define YY_END_OF_BUFFER 8 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static const flex_int16_t yy_accept[17] = + { 0, + 4, 4, 8, 4, 1, 6, 1, 5, 5, 2, + 4, 1, 1, 0, 3, 0 + } ; + +static const YY_CHAR yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 4, 1, 5, 1, 1, 6, 1, 1, 1, + 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static const YY_CHAR yy_meta[8] = + { 0, + 1, 2, 3, 4, 5, 5, 1 + } ; + +static const flex_int16_t yy_base[20] = + { 0, + 0, 0, 18, 0, 6, 21, 0, 9, 21, 0, + 0, 0, 0, 4, 21, 21, 10, 11, 15 + } ; + +static const flex_int16_t yy_def[20] = + { 0, + 16, 1, 16, 17, 17, 16, 18, 19, 16, 17, + 17, 5, 18, 19, 16, 0, 16, 16, 16 + } ; + +static const flex_int16_t yy_nxt[29] = + { 0, + 4, 5, 6, 7, 8, 9, 10, 12, 15, 13, + 11, 11, 13, 15, 13, 14, 14, 16, 14, 14, + 3, 16, 16, 16, 16, 16, 16, 16 + } ; + +static const flex_int16_t yy_chk[29] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 5, 14, 5, + 17, 17, 18, 8, 18, 19, 19, 3, 19, 19, + 16, 16, 16, 16, 16, 16, 16, 16 + } ; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +#line 1 "fts0tlex.l" +/***************************************************************************** + +Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ +/** + * @file fts/fts0tlex.l + * FTS parser lexical analyzer + * + * Created 2007/5/9 Sunny Bains + */ +#line 27 "fts0tlex.l" + +#include "fts0ast.h" +#include "fts0pars.h" + +/* Required for reentrant parser */ +#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner) +#define exit(A) ut_error + +#line 671 "fts0tlex.cc" +#define YY_NO_INPUT 1 +#line 673 "fts0tlex.cc" + +#define INITIAL 0 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include <unistd.h> +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +/* Holds the entire state of the reentrant scanner. */ +struct yyguts_t + { + + /* User-defined. Not touched by flex. */ + YY_EXTRA_TYPE yyextra_r; + + /* The rest are the same as the globals declared in the non-reentrant scanner. */ + FILE *yyin_r, *yyout_r; + size_t yy_buffer_stack_top; /**< index of top of stack. */ + size_t yy_buffer_stack_max; /**< capacity of stack. */ + YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ + char yy_hold_char; + int yy_n_chars; + int yyleng_r; + char *yy_c_buf_p; + int yy_init; + int yy_start; + int yy_did_buffer_switch_on_eof; + int yy_start_stack_ptr; + int yy_start_stack_depth; + int *yy_start_stack; + yy_state_type yy_last_accepting_state; + char* yy_last_accepting_cpos; + + int yylineno_r; + int yy_flex_debug_r; + + char *yytext_r; + int yy_more_flag; + int yy_more_len; + + }; /* end struct yyguts_t */ + +static int yy_init_globals ( yyscan_t yyscanner ); + +int yylex_init (yyscan_t* scanner); + +int yylex_init_extra ( YY_EXTRA_TYPE user_defined, yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy ( yyscan_t yyscanner ); + +int yyget_debug ( yyscan_t yyscanner ); + +void yyset_debug ( int debug_flag , yyscan_t yyscanner ); + +YY_EXTRA_TYPE yyget_extra ( yyscan_t yyscanner ); + +void yyset_extra ( YY_EXTRA_TYPE user_defined , yyscan_t yyscanner ); + +FILE *yyget_in ( yyscan_t yyscanner ); + +void yyset_in ( FILE * _in_str , yyscan_t yyscanner ); + +FILE *yyget_out ( yyscan_t yyscanner ); + +void yyset_out ( FILE * _out_str , yyscan_t yyscanner ); + + int yyget_leng ( yyscan_t yyscanner ); + +char *yyget_text ( yyscan_t yyscanner ); + +int yyget_lineno ( yyscan_t yyscanner ); + +void yyset_lineno ( int _line_number , yyscan_t yyscanner ); + +int yyget_column ( yyscan_t yyscanner ); + +void yyset_column ( int _column_no , yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap ( yyscan_t yyscanner ); +#else +extern int yywrap ( yyscan_t yyscanner ); +#endif +#endif + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy ( char *, const char *, int , yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen ( const char * , yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput ( yyscan_t yyscanner ); +#else +static int input ( yyscan_t yyscanner ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + int n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex (yyscan_t yyscanner); + +#define YY_DECL int yylex (yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK /*LINTED*/break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + yy_state_type yy_current_state; + char *yy_cp, *yy_bp; + int yy_act; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( !yyg->yy_init ) + { + yyg->yy_init = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yyg->yy_start ) + yyg->yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); + } + + yy_load_buffer_state( yyscanner ); + } + + { +#line 45 "fts0tlex.l" + + +#line 934 "fts0tlex.cc" + + while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ + { + yy_cp = yyg->yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yyg->yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yyg->yy_start; +yy_match: + do + { + YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 17 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + ++yy_cp; + } + while ( yy_current_state != 16 ); + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + + YY_DO_BEFORE_ACTION; + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yyg->yy_hold_char; + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + +case 1: +YY_RULE_SETUP +#line 47 "fts0tlex.l" +/* Ignore whitespace */ ; + YY_BREAK +case 2: +YY_RULE_SETUP +#line 49 "fts0tlex.l" +{ + val->oper = fts0tget_text(yyscanner)[0]; + + return(val->oper); +} + YY_BREAK +case 3: +YY_RULE_SETUP +#line 55 "fts0tlex.l" +{ + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner)); + + return(FTS_TEXT); +} + YY_BREAK +case 4: +YY_RULE_SETUP +#line 61 "fts0tlex.l" +{ + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner)); + + return(FTS_TERM); +} + YY_BREAK +case 5: +YY_RULE_SETUP +#line 66 "fts0tlex.l" +; + YY_BREAK +case 6: +/* rule 6 can match eol */ +YY_RULE_SETUP +#line 67 "fts0tlex.l" + + YY_BREAK +case 7: +YY_RULE_SETUP +#line 69 "fts0tlex.l" +ECHO; + YY_BREAK +#line 1035 "fts0tlex.cc" +case YY_STATE_EOF(INITIAL): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yyg->yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner); + + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yyg->yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_END_OF_FILE: + { + yyg->yy_did_buffer_switch_on_eof = 0; + + if ( yywrap( yyscanner ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = + yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yyg->yy_c_buf_p = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars]; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of user's declarations */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + char *source = yyg->yytext_ptr; + int number_to_move, i; + int ret_val; + + if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr - 1); + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; + + int yy_c_buf_p_offset = + (int) (yyg->yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yyrealloc( (void *) b->yy_ch_buf, + (yy_size_t) (b->yy_buf_size + 2) , yyscanner ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = NULL; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - + number_to_move - 1; + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + yyg->yy_n_chars, num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + if ( yyg->yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin , yyscanner); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + if ((yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { + /* Extend the array by 50%, plus the number we really need. */ + int new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc( + (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size , yyscanner ); + if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); + /* "- 2" to take care of EOB's */ + YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2); + } + + yyg->yy_n_chars += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (yyscan_t yyscanner) +{ + yy_state_type yy_current_state; + char *yy_cp; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_current_state = yyg->yy_start; + + for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp ) + { + YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 17 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner) +{ + int yy_is_jam; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */ + char *yy_cp = yyg->yy_c_buf_p; + + YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 17 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + yy_is_jam = (yy_current_state == 16); + + (void)yyg; + return yy_is_jam ? 0 : yy_current_state; +} + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (yyscan_t yyscanner) +#else + static int input (yyscan_t yyscanner) +#endif + +{ + int c; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + *yyg->yy_c_buf_p = yyg->yy_hold_char; + + if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + /* This was really a NUL. */ + *yyg->yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr); + ++yyg->yy_c_buf_p; + + switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin , yyscanner); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap( yyscanner ) ) + return 0; + + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(yyscanner); +#else + return input(yyscanner); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = yyg->yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */ + *yyg->yy_c_buf_p = '\0'; /* preserve yytext */ + yyg->yy_hold_char = *++yyg->yy_c_buf_p; + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * @param yyscanner The scanner object. + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! YY_CURRENT_BUFFER ){ + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); + } + + yy_init_buffer( YY_CURRENT_BUFFER, input_file , yyscanner); + yy_load_buffer_state( yyscanner ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * @param yyscanner The scanner object. + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (yyscanner); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state( yyscanner ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yyg->yy_did_buffer_switch_on_eof = 1; +} + +static void yy_load_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + yyg->yy_hold_char = *yyg->yy_c_buf_p; +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * @param yyscanner The scanner object. + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) , yyscanner ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file , yyscanner); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * @param yyscanner The scanner object. + */ + void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree( (void *) b->yy_ch_buf , yyscanner ); + + yyfree( (void *) b , yyscanner ); +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner) + +{ + int oerrno = errno; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_flush_buffer( b , yyscanner); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * @param yyscanner The scanner object. + */ + void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state( yyscanner ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * @param yyscanner The scanner object. + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(yyscanner); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + yyg->yy_buffer_stack_top++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state( yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * @param yyscanner The scanner object. + */ +void yypop_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER , yyscanner); + YY_CURRENT_BUFFER_LVALUE = NULL; + if (yyg->yy_buffer_stack_top > 0) + --yyg->yy_buffer_stack_top; + + if (YY_CURRENT_BUFFER) { + yy_load_buffer_state( yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (yyscan_t yyscanner) +{ + yy_size_t num_to_alloc; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (!yyg->yy_buffer_stack) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */ + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + yyg->yy_buffer_stack_max = num_to_alloc; + yyg->yy_buffer_stack_top = 0; + return; + } + + if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + yy_size_t grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = yyg->yy_buffer_stack_max + grow_size; + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc + (yyg->yy_buffer_stack, + num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + /* zero only the new slots.*/ + memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*)); + yyg->yy_buffer_stack_max = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return NULL; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = NULL; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b , yyscanner ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param yystr a NUL-terminated string to scan + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (const char * yystr , yyscan_t yyscanner) +{ + + return yy_scan_bytes( yystr, (int) strlen(yystr) , yyscanner); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = (yy_size_t) (_yybytes_len + 2); + buf = (char *) yyalloc( n , yyscanner ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n , yyscanner); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yynoreturn yy_fatal_error (const char* msg , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = yyg->yy_hold_char; \ + yyg->yy_c_buf_p = yytext + yyless_macro_arg; \ + yyg->yy_hold_char = *yyg->yy_c_buf_p; \ + *yyg->yy_c_buf_p = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the user-defined data for this scanner. + * @param yyscanner The scanner object. + */ +YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyextra; +} + +/** Get the current line number. + * @param yyscanner The scanner object. + */ +int yyget_lineno (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yylineno; +} + +/** Get the current column number. + * @param yyscanner The scanner object. + */ +int yyget_column (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yycolumn; +} + +/** Get the input stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_in (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyin; +} + +/** Get the output stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_out (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyout; +} + +/** Get the length of the current token. + * @param yyscanner The scanner object. + */ +int yyget_leng (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyleng; +} + +/** Get the current token. + * @param yyscanner The scanner object. + */ + +char *yyget_text (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yytext; +} + +/** Set the user-defined data. This data is never touched by the scanner. + * @param user_defined The data to be associated with this scanner. + * @param yyscanner The scanner object. + */ +void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyextra = user_defined ; +} + +/** Set the current line number. + * @param _line_number line number + * @param yyscanner The scanner object. + */ +void yyset_lineno (int _line_number , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* lineno is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_lineno called with no buffer" ); + + yylineno = _line_number; +} + +/** Set the current column. + * @param _column_no column number + * @param yyscanner The scanner object. + */ +void yyset_column (int _column_no , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* column is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_column called with no buffer" ); + + yycolumn = _column_no; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param _in_str A readable stream. + * @param yyscanner The scanner object. + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * _in_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyin = _in_str ; +} + +void yyset_out (FILE * _out_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyout = _out_str ; +} + +int yyget_debug (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yy_flex_debug; +} + +void yyset_debug (int _bdebug , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yy_flex_debug = _bdebug ; +} + +/* Accessor methods for yylval and yylloc */ + +/* User-visible API */ + +/* yylex_init is special because it creates the scanner itself, so it is + * the ONLY reentrant function that doesn't take the scanner as the last argument. + * That's why we explicitly handle the declaration, instead of using our macros. + */ +int yylex_init(yyscan_t* ptr_yy_globals) +{ + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + return yy_init_globals ( *ptr_yy_globals ); +} + +/* yylex_init_extra has the same functionality as yylex_init, but follows the + * convention of taking the scanner as the last argument. Note however, that + * this is a *pointer* to a scanner, as it will be allocated by this call (and + * is the reason, too, why this function also must handle its own declaration). + * The user defined value in the first argument will be available to yyalloc in + * the yyextra field. + */ +int yylex_init_extra( YY_EXTRA_TYPE yy_user_defined, yyscan_t* ptr_yy_globals ) +{ + struct yyguts_t dummy_yyguts; + + yyset_extra (yy_user_defined, &dummy_yyguts); + + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in + yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + yyset_extra (yy_user_defined, *ptr_yy_globals); + + return yy_init_globals ( *ptr_yy_globals ); +} + +static int yy_init_globals (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + yyg->yy_buffer_stack = NULL; + yyg->yy_buffer_stack_top = 0; + yyg->yy_buffer_stack_max = 0; + yyg->yy_c_buf_p = NULL; + yyg->yy_init = 0; + yyg->yy_start = 0; + + yyg->yy_start_stack_ptr = 0; + yyg->yy_start_stack_depth = 0; + yyg->yy_start_stack = NULL; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = NULL; + yyout = NULL; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer( YY_CURRENT_BUFFER , yyscanner ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(yyscanner); + } + + /* Destroy the stack itself. */ + yyfree(yyg->yy_buffer_stack , yyscanner); + yyg->yy_buffer_stack = NULL; + + /* Destroy the start condition stack. */ + yyfree( yyg->yy_start_stack , yyscanner ); + yyg->yy_start_stack = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( yyscanner); + + /* Destroy the main struct (reentrant only). */ + yyfree ( yyscanner , yyscanner ); + yyscanner = NULL; + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, const char * s2, int n , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + + int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (const char * s , yyscan_t yyscanner) +{ + int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +void *yyalloc (yy_size_t size , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + return malloc(size); +} + +void *yyrealloc (void * ptr, yy_size_t size , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return realloc(ptr, size); +} + +void yyfree (void * ptr , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ +} + +#define YYTABLES_NAME "yytables" + +#line 69 "fts0tlex.l" + + diff --git a/storage/innobase/fts/fts0tlex.l b/storage/innobase/fts/fts0tlex.l new file mode 100644 index 00000000..e19e907f --- /dev/null +++ b/storage/innobase/fts/fts0tlex.l @@ -0,0 +1,69 @@ +/***************************************************************************** + +Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/** + * @file fts/fts0tlex.l + * FTS parser lexical analyzer + * + * Created 2007/5/9 Sunny Bains + */ + +%{ + +#include "fts0ast.h" +#include "fts0pars.h" + +/* Required for reentrant parser */ +#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner) +#define exit(A) ut_error + +%} + +%option noinput +%option nounput +%option noyywrap +%option nostdinit +%option reentrant +%option never-interactive + + +%% + +[\t ]+ /* Ignore whitespace */ ; + +[*] { + val->oper = fts0tget_text(yyscanner)[0]; + + return(val->oper); +} + +\"[^\"\n]*\" { + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner)); + + return(FTS_TEXT); +} + +[^" \n\%]* { + val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner)); + + return(FTS_TERM); +} +. ; +\n + +%% diff --git a/storage/innobase/fts/make_parser.sh b/storage/innobase/fts/make_parser.sh new file mode 100755 index 00000000..6b82c5ba --- /dev/null +++ b/storage/innobase/fts/make_parser.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# +# Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + + +TMPF=t.$$ + +make -f Makefile.query + +echo '#include "univ.i"' > $TMPF + +# This is to avoid compiler warning about unused parameters. +# FIXME: gcc extension "MY_ATTRIBUTE" causing compilation errors on windows +# platform. Quote them out for now. +sed -e ' +s/^\(static.*void.*yy_fatal_error.*msg.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; +s/^\(static.*void.*yy_flex_strncpy.*n.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; +s/^\(static.*int.*yy_flex_strlen.*s.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; +s/^\(\(static\|void\).*fts0[bt]alloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; +s/^\(\(static\|void\).*fts0[bt]realloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; +s/^\(\(static\|void\).*fts0[bt]free.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; +' < fts0blex.cc >> $TMPF + +mv $TMPF fts0blex.cc + +echo '#include "univ.i"' > $TMPF + +sed -e ' +s/^\(static.*void.*yy_fatal_error.*msg.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; +s/^\(static.*void.*yy_flex_strncpy.*n.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; +s/^\(static.*int.*yy_flex_strlen.*s.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; +s/^\(\(static\|void\).*fts0[bt]alloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; +s/^\(\(static\|void\).*fts0[bt]realloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; +s/^\(\(static\|void\).*fts0[bt]free.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; +' < fts0tlex.cc >> $TMPF + +mv $TMPF fts0tlex.cc |