summaryrefslogtreecommitdiffstats
path: root/storage/myisam/ft_stopwords.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/myisam/ft_stopwords.c')
-rw-r--r--storage/myisam/ft_stopwords.c147
1 files changed, 147 insertions, 0 deletions
diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c
new file mode 100644
index 00000000..34c445cc
--- /dev/null
+++ b/storage/myisam/ft_stopwords.c
@@ -0,0 +1,147 @@
+/* Copyright (c) 2000, 2010, Oracle and/or its affiliates
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
+
+/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
+
+#include "ftdefs.h"
+#include "my_compare.h"
+
+
+static CHARSET_INFO *ft_stopword_cs= NULL;
+
+
+typedef struct st_ft_stopwords
+{
+ const char * pos;
+ uint len;
+} FT_STOPWORD;
+
+static TREE *stopwords3=NULL;
+
+static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
+ FT_STOPWORD *w1, FT_STOPWORD *w2)
+{
+ return ha_compare_text(ft_stopword_cs,
+ (uchar *)w1->pos,w1->len,
+ (uchar *)w2->pos,w2->len,0);
+}
+
+static int FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action,
+ void *arg __attribute__((unused)))
+{
+ if (action == free_free)
+ my_free((void*)w->pos);
+ return 0;
+}
+
+static int ft_add_stopword(const char *w)
+{
+ FT_STOPWORD sw;
+ return !w ||
+ (((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) &&
+ (tree_insert(stopwords3, &sw, 0, stopwords3->custom_arg)==NULL));
+}
+
+int ft_init_stopwords()
+{
+ DBUG_ENTER("ft_init_stopwords");
+ if (!stopwords3)
+ {
+ if (!(stopwords3=(TREE *)my_malloc(mi_key_memory_ft_stopwords,
+ sizeof(TREE), MYF(0))))
+ DBUG_RETURN(-1);
+ init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,
+ (ft_stopword_file ? (tree_element_free)&FT_STOPWORD_free : 0),
+ NULL, MYF(0));
+ /*
+ Stopword engine currently does not support tricky
+ character sets UCS2, UTF16, UTF32.
+ Use latin1 to compare stopwords in case of these character sets.
+ It's also fine to use latin1 with the built-in stopwords.
+ */
+ ft_stopword_cs= default_charset_info->mbminlen == 1 ?
+ default_charset_info : &my_charset_latin1;
+ }
+
+ if (ft_stopword_file)
+ {
+ File fd;
+ size_t len;
+ uchar *buffer, *start, *end;
+ FT_WORD w;
+ int error=-1;
+
+ if (!*ft_stopword_file)
+ DBUG_RETURN(0);
+
+ if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1)
+ DBUG_RETURN(-1);
+ len=(size_t)my_seek(fd, 0L, MY_SEEK_END, MYF(0));
+ my_seek(fd, 0L, MY_SEEK_SET, MYF(0));
+ if (!(start= buffer= my_malloc(mi_key_memory_ft_stopwords, len+1,
+ MYF(MY_WME))))
+ goto err0;
+ len=my_read(fd, buffer, len, MYF(MY_WME));
+ end=start+len;
+ while (ft_simple_get_word(ft_stopword_cs, &start, end, &w, TRUE))
+ {
+ if (ft_add_stopword(my_strndup(mi_key_memory_ft_stopwords,
+ (char*) w.pos, w.len, MYF(0))))
+ goto err1;
+ }
+ error=0;
+err1:
+ my_free(buffer);
+err0:
+ my_close(fd, MYF(MY_WME));
+ DBUG_RETURN(error);
+ }
+ else
+ {
+ /* compatibility mode: to be removed */
+ char **sws=(char **)ft_precompiled_stopwords;
+
+ for (;*sws;sws++)
+ {
+ if (ft_add_stopword(*sws))
+ DBUG_RETURN(-1);
+ }
+ ft_stopword_file="(built-in)"; /* for SHOW VARIABLES */
+ }
+ DBUG_RETURN(0);
+}
+
+int is_stopword(const char *word, size_t len)
+{
+ FT_STOPWORD sw;
+ sw.pos=word;
+ sw.len=(uint)len;
+ return tree_search(stopwords3,&sw, stopwords3->custom_arg) != NULL;
+}
+
+
+void ft_free_stopwords()
+{
+ DBUG_ENTER("ft_free_stopwords");
+
+ if (stopwords3)
+ {
+ delete_tree(stopwords3, 0); /* purecov: inspected */
+ my_free(stopwords3);
+ stopwords3=0;
+ }
+ ft_stopword_file= 0;
+ DBUG_VOID_RETURN;
+}