diff options
Diffstat (limited to 'storage/innobase/include/fts0types.ic')
-rw-r--r-- | storage/innobase/include/fts0types.ic | 231 |
1 files changed, 231 insertions, 0 deletions
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic new file mode 100644 index 00000000..facc1e5c --- /dev/null +++ b/storage/innobase/include/fts0types.ic @@ -0,0 +1,231 @@ +/***************************************************************************** + +Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, 2020, MariaDB Corporation. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fts0types.ic +Full text search types. + +Created 2007-03-27 Sunny Bains +*******************************************************/ + +#ifndef INNOBASE_FTS0TYPES_IC +#define INNOBASE_FTS0TYPES_IC + +/******************************************************************//** +Duplicate a string. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +void +fts_string_dup( +/*===========*/ + fts_string_t* dst, /*!< in: dup to here */ + const fts_string_t* src, /*!< in: src string */ + mem_heap_t* heap) /*!< in: heap to use */ +{ + dst->f_str = (byte*)mem_heap_alloc(heap, src->f_len + 1); + memcpy(dst->f_str, src->f_str, src->f_len); + + dst->f_len = src->f_len; + dst->f_str[src->f_len] = 0; + dst->f_n_char = src->f_n_char; +} + +/******************************************************************//** +Compare two fts_trx_row_t doc_ids. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_trx_row_doc_id_cmp( +/*===================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const fts_trx_row_t* tr1 = (const fts_trx_row_t*) p1; + const fts_trx_row_t* tr2 = (const fts_trx_row_t*) p2; + + return((int)(tr1->doc_id - tr2->doc_id)); +} + +/******************************************************************//** +Compare two fts_ranking_t doc_ids. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_ranking_doc_id_cmp( +/*===================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const fts_ranking_t* rk1 = (const fts_ranking_t*) p1; + const fts_ranking_t* rk2 = (const fts_ranking_t*) p2; + + return((int)(rk1->doc_id - rk2->doc_id)); +} + +/******************************************************************//** +Compare two doc_ids. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int fts_doc_id_cmp( +/*==================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const doc_id_t* up1 = static_cast<const doc_id_t*>(p1); + const doc_id_t* up2 = static_cast<const doc_id_t*>(p2); + + return static_cast<int>(*up1 - *up2); +} + +/******************************************************************//** +Get the first character's code position for FTS index partition */ +extern +ulint +innobase_strnxfrm( +/*==============*/ + const CHARSET_INFO* cs, /*!< in: Character set */ + const uchar* p2, /*!< in: string */ + const ulint len2); /*!< in: string length */ + +/** Check if fts index charset is cjk +@param[in] cs charset +@retval true if the charset is cjk +@retval false if not. */ +inline bool fts_is_charset_cjk(const CHARSET_INFO* cs) +{ + switch (cs->number) { + case 24: /* my_charset_gb2312_chinese_ci */ + case 28: /* my_charset_gbk_chinese_ci */ + case 1: /* my_charset_big5_chinese_ci */ + case 12: /* my_charset_ujis_japanese_ci */ + case 13: /* my_charset_sjis_japanese_ci */ + case 95: /* my_charset_cp932_japanese_ci */ + case 97: /* my_charset_eucjpms_japanese_ci */ + case 19: /* my_charset_euckr_korean_ci */ + return true; + default: + return false; + } +} + +/** Select the FTS auxiliary index for the given character by range. +@param[in] cs charset +@param[in] str string +@param[in] len string length +@retval the index to use for the string */ +UNIV_INLINE +ulint +fts_select_index_by_range( + const CHARSET_INFO* cs, + const byte* str, + ulint len) +{ + ulint selected = 0; + ulint value = innobase_strnxfrm(cs, str, len); + + while (fts_index_selector[selected].value != 0) { + + if (fts_index_selector[selected].value == value) { + + return(selected); + + } else if (fts_index_selector[selected].value > value) { + + return(selected > 0 ? selected - 1 : 0); + } + + ++selected; + } + + ut_ad(selected > 1); + + return(selected - 1); +} + +/** Select the FTS auxiliary index for the given character by hash. +@param[in] cs charset +@param[in] str string +@param[in] len string length +@retval the index to use for the string */ +UNIV_INLINE +ulint +fts_select_index_by_hash( + const CHARSET_INFO* cs, + const byte* str, + ulint len) +{ + ulong nr1 = 1; + ulong nr2 = 4; + + ut_ad(!(str == NULL && len > 0)); + + if (str == NULL || len == 0) { + return 0; + } + + /* Get the first char */ + /* JAN: TODO: MySQL 5.7 had + char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char*>(str), + reinterpret_cast<const char*>(str + len)); + */ + size_t char_len = size_t(cs->charlen(str, str + len)); + + ut_ad(char_len <= len); + + /* Get collation hash code */ + my_ci_hash_sort(cs, str, char_len, &nr1, &nr2); + + return(nr1 % FTS_NUM_AUX_INDEX); +} + +/** Select the FTS auxiliary index for the given character. +@param[in] cs charset +@param[in] str string +@param[in] len string length in bytes +@retval the index to use for the string */ +UNIV_INLINE +ulint +fts_select_index( + const CHARSET_INFO* cs, + const byte* str, + ulint len) +{ + ulint selected; + + if (fts_is_charset_cjk(cs)) { + selected = fts_select_index_by_hash(cs, str, len); + } else { + selected = fts_select_index_by_range(cs, str, len); + } + + return(selected); +} + +/******************************************************************//** +Return the selected FTS aux index suffix. */ +UNIV_INLINE +const char* +fts_get_suffix( +/*===========*/ + ulint selected) /*!< in: selected index */ +{ + return(fts_index_selector[selected].suffix); +} + +#endif /* INNOBASE_FTS0TYPES_IC */ |