summaryrefslogtreecommitdiffstats
path: root/storage/innobase/include/fts0fts.h
blob: 1d2b409be017f25d7213ed3147428203526fac16 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
/*****************************************************************************

Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2022, MariaDB Corporation.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA

*****************************************************************************/

/******************************************************************//**
@file include/fts0fts.h
Full text search header file

Created 2011/09/02 Sunny Bains
***********************************************************************/

#pragma once

#include "data0type.h"
#include "data0types.h"
#include "mem0mem.h"
#include "rem0types.h"
#include "row0types.h"
#include "trx0types.h"
#include "ut0vec.h"
#include "ut0rbt.h"
#include "ut0wqueue.h"
#include "que0types.h"
#include "ft_global.h"
#include "mysql/plugin_ftparser.h"

/** "NULL" value of a document id. */
#define FTS_NULL_DOC_ID			0

/** FTS hidden column that is used to map to and from the row */
#define FTS_DOC_ID_COL_NAME		"FTS_DOC_ID"

/** The name of the index created by FTS */
#define FTS_DOC_ID_INDEX_NAME		"FTS_DOC_ID_INDEX"

#define FTS_DOC_ID_INDEX_NAME_LEN	16

/** Doc ID is a 8 byte value */
#define FTS_DOC_ID_LEN			8

/** The number of fields to sort when we build FT index with
FIC. Three fields are sort: (word, doc_id, position) */
#define FTS_NUM_FIELDS_SORT		3

/** Maximum number of rows in a table, smaller than which, we will
optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */
#define MAX_DOC_ID_OPT_VAL		1073741824

/** Document id type. */
typedef ib_id_t doc_id_t;

/** doc_id_t printf format */
#define FTS_DOC_ID_FORMAT	IB_ID_FMT

/** Convert document id to the InnoDB (BIG ENDIAN) storage format. */
#define fts_write_doc_id(d, s)	mach_write_to_8(d, s)

/** Read a document id to internal format. */
#define fts_read_doc_id(s)	mach_read_from_8(s)

/** Bind the doc id to a variable */
#define fts_bind_doc_id(i, n, v) pars_info_bind_int8_literal(i, n, v)

/** Defines for FTS query mode, they have the same values as
those defined in mysql file ft_global.h */
#define FTS_NL		0
#define FTS_BOOL	1
#define FTS_SORTED	2
#define FTS_EXPAND	4
#define FTS_NO_RANKING	8
#define FTS_PROXIMITY	16
#define FTS_PHRASE	32
#define FTS_OPT_RANKING	64

#define FTS_INDEX_TABLE_IND_NAME	"FTS_INDEX_TABLE_IND"

/** The number of FTS index partitions for a fulltext idnex */
#define FTS_NUM_AUX_INDEX		6

/** Threshold where our optimize thread automatically kicks in */
#define FTS_OPTIMIZE_THRESHOLD		10000000

/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */
#define FTS_MAX_WORD_LEN		(HA_FT_MAXCHARLEN * 4)

/** Maximum possible Fulltext word length (in characters) */
#define FTS_MAX_WORD_LEN_IN_CHAR	HA_FT_MAXCHARLEN

/** Number of columns in FTS AUX Tables */
#define FTS_DELETED_TABLE_NUM_COLS	1
#define FTS_CONFIG_TABLE_NUM_COLS	2
#define FTS_AUX_INDEX_TABLE_NUM_COLS	5

/** DELETED_TABLE(doc_id BIGINT UNSIGNED) */
#define FTS_DELETED_TABLE_COL_LEN	8
/** CONFIG_TABLE(key CHAR(50), value CHAR(200)) */
#define FTS_CONFIG_TABLE_KEY_COL_LEN	50
#define FTS_CONFIG_TABLE_VALUE_COL_LEN	200

#define FTS_INDEX_FIRST_DOC_ID_LEN	8
#define FTS_INDEX_LAST_DOC_ID_LEN	8
#define FTS_INDEX_DOC_COUNT_LEN		4
/* BLOB COLUMN, 0 means VARIABLE SIZE */
#define FTS_INDEX_ILIST_LEN		0


/** Variable specifying the FTS parallel sort degree */
extern ulong		fts_sort_pll_degree;

/** Variable specifying the number of word to optimize for each optimize table
call */
extern ulong		fts_num_word_optimize;

/** Variable specifying whether we do additional FTS diagnostic printout
in the log */
extern char		fts_enable_diag_print;

/** FTS rank type, which will be between 0 .. 1 inclusive */
typedef float 		fts_rank_t;

/** Type of a row during a transaction. FTS_NOTHING means the row can be
forgotten from the FTS system's POV, FTS_INVALID is an internal value used
to mark invalid states.

NOTE: Do not change the order or value of these, fts_trx_row_get_new_state
depends on them being exactly as they are. */
enum fts_row_state {
	FTS_INSERT = 0,
	FTS_MODIFY,
	FTS_DELETE,
	FTS_NOTHING,
	FTS_INVALID
};

/** The FTS table types. */
enum fts_table_type_t {
	FTS_INDEX_TABLE,		/*!< FTS auxiliary table that is
					specific to a particular FTS index
					on a table */

	FTS_COMMON_TABLE		/*!< FTS auxiliary table that is common
					for all FTS index on a table */
};

struct fts_doc_t;
struct fts_cache_t;
struct fts_token_t;
struct fts_doc_ids_t;
struct fts_index_cache_t;

/** Compare two DOC_ID. */
int fts_doc_id_cmp(const void *p1, const void *p2)
  __attribute__((nonnull, warn_unused_result));

/** Initialize the "fts_table" for internal query into FTS auxiliary
tables */
#define FTS_INIT_FTS_TABLE(fts_table, m_suffix, m_type, m_table)\
do {								\
	(fts_table)->suffix = m_suffix;				\
        (fts_table)->type = m_type;				\
        (fts_table)->table_id = m_table->id;			\
        (fts_table)->table = m_table;				\
} while (0);

#define FTS_INIT_INDEX_TABLE(fts_table, m_suffix, m_type, m_index)\
do {								\
	(fts_table)->suffix = m_suffix;				\
        (fts_table)->type = m_type;				\
        (fts_table)->table_id = m_index->table->id;		\
        (fts_table)->table = m_index->table;			\
        (fts_table)->index_id = m_index->id;			\
} while (0);

/** Information about changes in a single transaction affecting
the FTS system. */
struct fts_trx_t {
	trx_t*		trx;		/*!< InnoDB transaction */

	ib_vector_t*	savepoints;	/*!< Active savepoints, must have at
					least one element, the implied
					savepoint */
	ib_vector_t*	last_stmt;	/*!< last_stmt */

	mem_heap_t*	heap;		/*!< heap */
};

/** Information required for transaction savepoint handling. */
struct fts_savepoint_t {
	char*		name;		/*!< First entry is always NULL, the
					default instance. Otherwise the name
					of the savepoint */

	ib_rbt_t*	tables;		/*!< Modified FTS tables */
};

/** Information about changed rows in a transaction for a single table. */
struct fts_trx_table_t {
	dict_table_t*	table;		/*!< table */

	fts_trx_t*	fts_trx;	/*!< link to parent */

	ib_rbt_t*	rows;		/*!< rows changed; indexed by doc-id,
					cells are fts_trx_row_t* */

	fts_doc_ids_t*	added_doc_ids;	/*!< list of added doc ids (NULL until
					the first addition) */

					/*!< for adding doc ids */
	que_t*		docs_added_graph;
};

/** Information about one changed row in a transaction. */
struct fts_trx_row_t {
	doc_id_t	doc_id;		/*!< Id of the ins/upd/del document */

	fts_row_state	state;		/*!< state of the row */

	ib_vector_t*	fts_indexes;	/*!< The indexes that are affected */
};

/** List of document ids that were added during a transaction. This
list is passed on to a background 'Add' thread and OPTIMIZE, so it
needs its own memory heap. */
struct fts_doc_ids_t {
	ib_vector_t*	doc_ids;	/*!< document ids (each element is
					of type doc_id_t). */

	ib_alloc_t*	self_heap;	/*!< Allocator used to create an
					instance of this type and the
					doc_ids vector */
};

// FIXME: Get rid of this if possible.
/** Since MySQL's character set support for Unicode is woefully inadequate
(it supports basic operations like isalpha etc. only for 8-bit characters),
we have to implement our own. We use UTF-16 without surrogate processing
as our in-memory format. This typedef is a single such character. */
typedef unsigned short ib_uc_t;

/** An UTF-16 ro UTF-8 string. */
struct fts_string_t {
	byte*		f_str;		/*!< string, not necessary terminated in
					any way */
	ulint		f_len;		/*!< Length of the string in bytes */
	ulint		f_n_char;	/*!< Number of characters */
};

/** Query ranked doc ids. */
struct fts_ranking_t {
	doc_id_t	doc_id;		/*!< Document id */

	fts_rank_t	rank;		/*!< Rank is between 0 .. 1 */

	byte*		words;		/*!< this contains the words
					that were queried
					and found in this document */
	ulint		words_len;	/*!< words len */
};

/** Query result. */
struct fts_result_t {
	ib_rbt_node_t*	current;	/*!< Current element */

	ib_rbt_t*	rankings_by_id;	/*!< RB tree of type fts_ranking_t
					indexed by doc id */
	ib_rbt_t*	rankings_by_rank;/*!< RB tree of type fts_ranking_t
					indexed by rank */
};

/** This is used to generate the FTS auxiliary table name, we need the
table id and the index id to generate the column specific FTS auxiliary
table name. */
struct fts_table_t {
	fts_table_type_t
			type;		/*!< The auxiliary table type */

	table_id_t	table_id;	/*!< The table id */

	index_id_t	index_id;	/*!< The index id */

	const char*	suffix;		/*!< The suffix of the fts auxiliary
					table name, can be NULL, not used
					everywhere (yet) */
	const dict_table_t*
			table;		/*!< Parent table */
	CHARSET_INFO*	charset;	/*!< charset info if it is for FTS
					index auxiliary table */
};

/** The state of the FTS sub system. */
class fts_t {
public:
	/** fts_t constructor.
	@param[in]	table	table with FTS indexes
	@param[in,out]	heap	memory heap where 'this' is stored */
	fts_t(
		const dict_table_t*	table,
		mem_heap_t*		heap);

	/** fts_t destructor. */
	~fts_t();

	/** Whether the ADDED table record sync-ed after crash recovery */
	unsigned	added_synced:1;
	/** Whether the table holds dict_sys.latch */
	unsigned	dict_locked:1;

	/** Work queue for scheduling jobs for the FTS 'Add' thread, or NULL
	if the thread has not yet been created. Each work item is a
	fts_trx_doc_ids_t*. */
	ib_wqueue_t*	add_wq;

	/** FTS memory buffer for this table, or NULL if the table has no FTS
	index. */
	fts_cache_t*	cache;

	/** FTS doc id hidden column number in the CLUSTERED index. */
	ulint		doc_col;

	/** Vector of FTS indexes, this is mainly for caching purposes. */
	ib_vector_t*	indexes;

	/** Whether the table exists in fts_optimize_wq;
	protected by fts_optimize_wq mutex */
	bool		in_queue;

	/** Whether the sync message exists in fts_optimize_wq;
	protected by fts_optimize_wq mutex */
	bool		sync_message;

	/** Heap for fts_t allocation. */
	mem_heap_t*	fts_heap;
};

struct fts_stopword_t;

/** status bits for fts_stopword_t status field. */
#define STOPWORD_NOT_INIT               0x1
#define STOPWORD_OFF                    0x2
#define STOPWORD_FROM_DEFAULT           0x4
#define STOPWORD_USER_TABLE             0x8

extern const char*	fts_default_stopword[];

/** Variable specifying the maximum FTS cache size for each table */
extern Atomic_relaxed<size_t> fts_max_cache_size;

/** Variable specifying the total memory allocated for FTS cache */
extern Atomic_relaxed<size_t> fts_max_total_cache_size;

/** Variable specifying the FTS result cache limit for each query */
extern size_t		fts_result_cache_limit;

/** Variable specifying the maximum FTS max token size */
extern ulong		fts_max_token_size;

/** Variable specifying the minimum FTS max token size */
extern ulong		fts_min_token_size;

/** Whether the total memory used for FTS cache is exhausted, and we will
need a sync to free some memory */
extern bool		fts_need_sync;

/******************************************************************//**
Create a FTS cache. */
fts_cache_t*
fts_cache_create(
/*=============*/
	dict_table_t*	table);			/*!< table owns the FTS cache */

/******************************************************************//**
Create a FTS index cache.
@return Index Cache */
fts_index_cache_t*
fts_cache_index_cache_create(
/*=========================*/
	dict_table_t*	table,			/*!< in: table with FTS index */
	dict_index_t*	index);			/*!< in: FTS index */

/******************************************************************//**
Get the next available document id. This function creates a new
transaction to generate the document id.
@return DB_SUCCESS if OK */
dberr_t
fts_get_next_doc_id(
/*================*/
	const dict_table_t*	table,	/*!< in: table */
	doc_id_t*		doc_id);/*!< out: new document id */

/******************************************************************//**
Create a new fts_doc_ids_t.
@return new fts_doc_ids_t. */
fts_doc_ids_t*
fts_doc_ids_create(void);
/*=====================*/

/** Free fts_doc_ids_t */
inline void fts_doc_ids_free(fts_doc_ids_t* doc_ids)
{
	mem_heap_free(static_cast<mem_heap_t*>(doc_ids->self_heap->arg));
}

/** Sort an array of doc_id */
void fts_doc_ids_sort(ib_vector_t *doc_ids);

/******************************************************************//**
Notify the FTS system about an operation on an FTS-indexed table. */
void
fts_trx_add_op(
/*===========*/
	trx_t*		trx,			/*!< in: InnoDB transaction */
	dict_table_t*	table,			/*!< in: table */
	doc_id_t	doc_id,			/*!< in: doc id */
	fts_row_state	state,			/*!< in: state of the row */
	ib_vector_t*	fts_indexes);		/*!< in: FTS indexes affected
						(NULL=all) */

/******************************************************************//**
Free an FTS trx. */
void
fts_trx_free(
/*=========*/
	fts_trx_t*	fts_trx);		/*!< in, own: FTS trx */

/** Creates the common auxiliary tables needed for supporting an FTS index
on the given table.
The following tables are created.
CREATE TABLE $FTS_PREFIX_DELETED
	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
CREATE TABLE $FTS_PREFIX_DELETED_CACHE
	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
CREATE TABLE $FTS_PREFIX_BEING_DELETED
	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
CREATE TABLE $FTS_PREFIX_CONFIG
	(key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
@param[in,out]	trx			transaction
@param[in]	table			table with FTS index
@param[in]	skip_doc_id_index	Skip index on doc id
@return DB_SUCCESS if succeed */
dberr_t
fts_create_common_tables(
	trx_t*		trx,
	dict_table_t*	table,
	bool		skip_doc_id_index)
	MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Creates the column specific ancillary tables needed for supporting an
FTS index on the given table.

All FTS AUX Index tables have the following schema.
CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
	word		VARCHAR(FTS_MAX_WORD_LEN),
	first_doc_id	INT NOT NULL,
	last_doc_id	UNSIGNED NOT NULL,
	doc_count	UNSIGNED INT NOT NULL,
	ilist		VARBINARY NOT NULL,
	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
@param[in,out]	trx	dictionary transaction
@param[in]	index	fulltext index
@param[in]	id	table id
@return DB_SUCCESS or error code */
dberr_t
fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
	MY_ATTRIBUTE((nonnull, warn_unused_result));
/******************************************************************//**
Add the FTS document id hidden column. */
void
fts_add_doc_id_column(
/*==================*/
	dict_table_t*	table,	/*!< in/out: Table with FTS index */
	mem_heap_t*	heap);	/*!< in: temporary memory heap, or NULL */

/** Lock the internal FTS_ tables for an index, before fts_drop_index_tables().
@param trx   transaction
@param index fulltext index */
dberr_t fts_lock_index_tables(trx_t *trx, const dict_index_t &index);

/** Lock the internal common FTS_ tables, before fts_drop_common_tables().
@param trx    transaction
@param table  table containing FULLTEXT INDEX
@return DB_SUCCESS or error code */
dberr_t fts_lock_common_tables(trx_t *trx, const dict_table_t &table);

/** Lock the internal FTS_ tables for table, before fts_drop_tables().
@param trx    transaction
@param table  table containing FULLTEXT INDEX
@return DB_SUCCESS or error code */
dberr_t fts_lock_tables(trx_t *trx, const dict_table_t &table);

/** Drop the internal FTS_ tables for table.
@param trx    transaction
@param table  table containing FULLTEXT INDEX
@return DB_SUCCESS or error code */
dberr_t fts_drop_tables(trx_t *trx, const dict_table_t &table);

/******************************************************************//**
The given transaction is about to be committed; do whatever is necessary
from the FTS system's POV.
@return DB_SUCCESS or error code */
dberr_t
fts_commit(
/*=======*/
	trx_t*		trx)			/*!< in: transaction */
	MY_ATTRIBUTE((warn_unused_result));

/** FTS Query entry point.
@param[in,out]	trx		transaction
@param[in]	index		fts index to search
@param[in]	flags		FTS search mode
@param[in]	query_str	FTS query
@param[in]	query_len	FTS query string len in bytes
@param[in,out]	result		result doc ids
@return DB_SUCCESS if successful otherwise error code */
dberr_t
fts_query(
	trx_t*		trx,
	dict_index_t*	index,
	uint		flags,
	const byte*	query_str,
	ulint		query_len,
	fts_result_t**	result)
	MY_ATTRIBUTE((warn_unused_result));

/******************************************************************//**
Retrieve the FTS Relevance Ranking result for doc with doc_id
@return the relevance ranking value. */
float
fts_retrieve_ranking(
/*=================*/
	fts_result_t*	result,			/*!< in: FTS result structure */
	doc_id_t	doc_id);		/*!< in: the interested document
						doc_id */

/******************************************************************//**
FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
void
fts_query_sort_result_on_rank(
/*==========================*/
	fts_result_t*	result);		/*!< out: result instance
						to sort.*/

/******************************************************************//**
FTS Query free result, returned by fts_query(). */
void
fts_query_free_result(
/*==================*/
	fts_result_t*	result);		/*!< in: result instance
						to free.*/

/******************************************************************//**
Extract the doc id from the FTS hidden column. */
doc_id_t
fts_get_doc_id_from_row(
/*====================*/
	dict_table_t*	table,			/*!< in: table */
	dtuple_t*	row);			/*!< in: row whose FTS doc id we
						want to extract.*/

/** Extract the doc id from the record that belongs to index.
@param[in]	rec	record containing FTS_DOC_ID
@param[in]	index	index of rec
@param[in]	offsets	rec_get_offsets(rec,index)
@return doc id that was extracted from rec */
doc_id_t
fts_get_doc_id_from_rec(
	const rec_t*		rec,
	const dict_index_t*	index,
	const rec_offs*		offsets);

/** Add new fts doc id to the update vector.
@param[in]	table		the table that contains the FTS index.
@param[in,out]	ufield		the fts doc id field in the update vector.
				No new memory is allocated for this in this
				function.
@param[in,out]	next_doc_id	the fts doc id that has been added to the
				update vector.  If 0, a new fts doc id is
				automatically generated.  The memory provided
				for this argument will be used by the update
				vector. Ensure that the life time of this
				memory matches that of the update vector.
@return the fts doc id used in the update vector */
doc_id_t
fts_update_doc_id(
	dict_table_t*	table,
	upd_field_t*	ufield,
	doc_id_t*	next_doc_id);

/******************************************************************//**
FTS initialize. */
void
fts_startup(void);
/*==============*/

/******************************************************************//**
Create an instance of fts_t.
@return instance of fts_t */
fts_t*
fts_create(
/*=======*/
	dict_table_t*	table);			/*!< out: table with FTS
						indexes */

/*********************************************************************//**
Run OPTIMIZE on the given table.
@return DB_SUCCESS if all OK */
dberr_t
fts_optimize_table(
/*===============*/
	dict_table_t*	table);			/*!< in: table to optimiza */

/**********************************************************************//**
Startup the optimize thread and create the work queue. */
void
fts_optimize_init(void);
/*====================*/

/****************************************************************//**
Drops index ancillary tables for a FTS index
@return DB_SUCCESS or error code */
dberr_t fts_drop_index_tables(trx_t *trx, const dict_index_t &index)
	MY_ATTRIBUTE((warn_unused_result));

/** Add the table to add to the OPTIMIZER's list.
@param[in]	table	table to add */
void
fts_optimize_add_table(
	dict_table_t*	table);

/******************************************************************//**
Remove the table from the OPTIMIZER's list. We do wait for
acknowledgement from the consumer of the message. */
void
fts_optimize_remove_table(
/*======================*/
	dict_table_t*	table);			/*!< in: table to remove */

/** Shutdown fts optimize thread. */
void
fts_optimize_shutdown();

/** Send sync fts cache for the table.
@param[in]	table	table to sync */
void
fts_optimize_request_sync_table(
	dict_table_t*	table);

/**********************************************************************//**
Take a FTS savepoint. */
void
fts_savepoint_take(
/*===============*/
	fts_trx_t*	fts_trx,		/*!< in: fts transaction */
	const char*	name);			/*!< in: savepoint name */

/**********************************************************************//**
Refresh last statement savepoint. */
void
fts_savepoint_laststmt_refresh(
/*===========================*/
	trx_t*		trx);			/*!< in: transaction */

/**********************************************************************//**
Release the savepoint data identified by  name. */
void
fts_savepoint_release(
/*==================*/
	trx_t*		trx,			/*!< in: transaction */
	const char*	name);			/*!< in: savepoint name */

/** Clear cache.
@param[in,out]	cache	fts cache */
void
fts_cache_clear(
	fts_cache_t*	cache);

/*********************************************************************//**
Initialize things in cache. */
void
fts_cache_init(
/*===========*/
	fts_cache_t*	cache);			/*!< in: cache */

/*********************************************************************//**
Rollback to and including savepoint indentified by name. */
void
fts_savepoint_rollback(
/*===================*/
	trx_t*		trx,			/*!< in: transaction */
	const char*	name);			/*!< in: savepoint name */

/*********************************************************************//**
Rollback to and including savepoint indentified by name. */
void
fts_savepoint_rollback_last_stmt(
/*=============================*/
	trx_t*		trx);			/*!< in: transaction */

/** Run SYNC on the table, i.e., write out data from the cache to the
FTS auxiliary INDEX table and clear the cache at the end.
@param[in,out]	table		fts table
@param[in]	wait		whether to wait for existing sync to finish
@return DB_SUCCESS on success, error code on failure. */
dberr_t fts_sync_table(dict_table_t* table, bool wait = true);

/****************************************************************//**
Create an FTS index cache. */
CHARSET_INFO*
fts_index_get_charset(
/*==================*/
	dict_index_t*		index);		/*!< in: FTS index */

/*********************************************************************//**
Get the initial Doc ID by consulting the CONFIG table
@return initial Doc ID */
doc_id_t
fts_init_doc_id(
/*============*/
	const dict_table_t*		table);	/*!< in: table */

/******************************************************************//**
compare two character string according to their charset. */
extern
int
innobase_fts_text_cmp(
/*==================*/
	const void*	cs,			/*!< in: Character set */
	const void*	p1,			/*!< in: key */
	const void*	p2);			/*!< in: node */

/******************************************************************//**
Makes all characters in a string lower case. */
extern
size_t
innobase_fts_casedn_str(
/*====================*/
        CHARSET_INFO*	cs,			/*!< in: Character set */
	char*		src,			/*!< in: string to put in
						lower case */
	size_t		src_len,		/*!< in: input string length */
	char*		dst,			/*!< in: buffer for result
						string */
	size_t		dst_len);		/*!< in: buffer size */


/******************************************************************//**
compare two character string according to their charset. */
extern
int
innobase_fts_text_cmp_prefix(
/*=========================*/
	const void*	cs,			/*!< in: Character set */
	const void*	p1,			/*!< in: key */
	const void*	p2);			/*!< in: node */

/*************************************************************//**
Get the next token from the given string and store it in *token. */
extern
ulint
innobase_mysql_fts_get_token(
/*=========================*/
	CHARSET_INFO*	charset,		/*!< in: Character set */
	const byte*	start,			/*!< in: start of text */
	const byte*	end,			/*!< in: one character past
						end of text */
	fts_string_t*	token);			/*!< out: token's text */

/*************************************************************//**
Get token char size by charset
@return the number of token char size */
ulint
fts_get_token_size(
/*===============*/
	const CHARSET_INFO*	cs,		/*!< in: Character set */
	const char*		token,		/*!< in: token */
	ulint			len);		/*!< in: token length */

/*************************************************************//**
FULLTEXT tokenizer internal in MYSQL_FTPARSER_SIMPLE_MODE
@return 0 if tokenize sucessfully */
int
fts_tokenize_document_internal(
/*===========================*/
	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser parameter */
	const char*			doc,	/*!< in: document to tokenize */
	int			len);	/*!< in: document length */

/*********************************************************************//**
Fetch COUNT(*) from specified table.
@return the number of rows in the table */
ulint
fts_get_rows_count(
/*===============*/
	fts_table_t*	fts_table);		/*!< in: fts table to read */

/*************************************************************//**
Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
doc_id_t
fts_get_max_doc_id(
/*===============*/
	dict_table_t*	table);			/*!< in: user table */

/** Check whether a stopword table is in the right format.
@param stopword_table_name   table name
@param row_end   name of the system-versioning end column, or "value"
@return the stopword column charset
@retval NULL if the table does not exist or qualify */
CHARSET_INFO *fts_valid_stopword_table(const char *stopword_table_name,
                                       const char **row_end= NULL);

/****************************************************************//**
This function loads specified stopword into FTS cache
@return true if success */
bool
fts_load_stopword(
/*==============*/
	const dict_table_t*
			table,			/*!< in: Table with FTS */
	trx_t*		trx,			/*!< in: Transaction */
	const char*	session_stopword_table,	/*!< in: Session stopword table
						name */
	bool		stopword_is_on,		/*!< in: Whether stopword
						option is turned on/off */
	bool		reload);		/*!< in: Whether it is during
						reload of FTS table */

/****************************************************************//**
Read the rows from the FTS index
@return DB_SUCCESS if OK */
dberr_t
fts_table_fetch_doc_ids(
/*====================*/
	trx_t*		trx,			/*!< in: transaction */
	fts_table_t*	fts_table,		/*!< in: aux table */
	fts_doc_ids_t*	doc_ids);		/*!< in: For collecting
						doc ids */
/****************************************************************//**
This function brings FTS index in sync when FTS index is first
used. There are documents that have not yet sync-ed to auxiliary
tables from last server abnormally shutdown, we will need to bring
such document into FTS cache before any further operations */
void
fts_init_index(
/*===========*/
	dict_table_t*	table,			/*!< in: Table with FTS */
	bool		has_cache_lock);	/*!< in: Whether we already
						have cache lock */
/*******************************************************************//**
Add a newly create index in FTS cache */
void
fts_add_index(
/*==========*/
	dict_index_t*	index,			/*!< FTS index to be added */
	dict_table_t*	table);			/*!< table */

/*******************************************************************//**
Drop auxiliary tables related to an FTS index
@return DB_SUCCESS or error number */
dberr_t
fts_drop_index(
/*===========*/
	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
	dict_index_t*	index,	/*!< in: Index to be dropped */
	trx_t*		trx);	/*!< in: Transaction for the drop */

/****************************************************************//**
Rename auxiliary tables for all fts index for a table
@return DB_SUCCESS or error code */
dberr_t
fts_rename_aux_tables(
/*==================*/
	dict_table_t*	table,		/*!< in: user Table */
	const char*	new_name,	/*!< in: new table name */
	trx_t*		trx);		/*!< in: transaction */

/*******************************************************************//**
Check indexes in the fts->indexes is also present in index cache and
table->indexes list
@return TRUE if all indexes match */
ibool
fts_check_cached_index(
/*===================*/
	dict_table_t*	table);  /*!< in: Table where indexes are dropped */

/** Fetch the document from tuple, tokenize the text data and
insert the text data into fts auxiliary table and
its cache. Moreover this tuple fields doesn't contain any information
about externally stored field. This tuple contains data directly
converted from mysql.
@param[in]     ftt     FTS transaction table
@param[in]     doc_id  doc id
@param[in]     tuple   tuple from where data can be retrieved
                       and tuple should be arranged in table
                       schema order. */
void
fts_add_doc_from_tuple(
	fts_trx_table_t*ftt,
	doc_id_t        doc_id,
	const dtuple_t* tuple);

/** Create an FTS trx.
@param[in,out] trx     InnoDB Transaction
@return FTS transaction. */
fts_trx_t*
fts_trx_create(
	trx_t*  trx);

/** Clear all fts resources when there is no internal DOC_ID
and there are no new fts index to add.
@param[in,out]  table   table  where fts is to be freed */
void fts_clear_all(dict_table_t *table);

/** Check whether the given name is fts auxiliary table
and fetch the parent table id and index id
@param[in]	name		table name
@param[in,out]	table_id	parent table id
@param[in,out]	index_id	index id
@return true if it is auxilary table */
bool fts_check_aux_table(const char *name,
                         table_id_t *table_id,
                         index_id_t *index_id);

/** Update the last document id. This function could create a new
transaction to update the last document id.
@param	table	table to be updated
@param	doc_id	last document id
@param	trx	update trx or null
@retval DB_SUCCESS if OK */
dberr_t
fts_update_sync_doc_id(const dict_table_t *table,
		       doc_id_t  doc_id,
		       trx_t *trx)
	MY_ATTRIBUTE((nonnull(1)));

/** Sync the table during commit phase
@param[in]	table	table to be synced */
void fts_sync_during_ddl(dict_table_t* table);