summaryrefslogtreecommitdiffstats
path: root/storage/mroonga/vendor/groonga/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'storage/mroonga/vendor/groonga/plugins')
-rw-r--r--storage/mroonga/vendor/groonga/plugins/CMakeLists.txt30
-rw-r--r--storage/mroonga/vendor/groonga/plugins/Makefile.am19
-rw-r--r--storage/mroonga/vendor/groonga/plugins/expression_rewriters/CMakeLists.txt26
-rw-r--r--storage/mroonga/vendor/groonga/plugins/expression_rewriters/Makefile.am9
-rw-r--r--storage/mroonga/vendor/groonga/plugins/expression_rewriters/optimizer.rb147
-rw-r--r--storage/mroonga/vendor/groonga/plugins/expression_rewriters/sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/CMakeLists.txt141
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/Makefile.am33
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/index_column.c266
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/index_column_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/math.c142
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/math_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/number.c187
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/number_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/string.c299
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/string_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/time.c376
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/time_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/vector.c401
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/vector_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/query_expanders/CMakeLists.txt38
-rw-r--r--storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am20
-rw-r--r--storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c314
-rw-r--r--storage/mroonga/vendor/groonga/plugins/query_expanders/tsv_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt24
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am9
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/eval.rb36
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby/sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/ruby_scripts.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding.rb11
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/CMakeLists.txt24
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/Makefile.am9
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/logical_count.rb169
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/logical_enumerator.rb317
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/logical_parameters.rb44
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb642
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/logical_select.rb975
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/logical_shard_list.rb28
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/logical_table_remove.rb345
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/parameters.rb10
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/range_expression_builder.rb88
-rw-r--r--storage/mroonga/vendor/groonga/plugins/sharding/sources.am10
-rw-r--r--storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt36
-rw-r--r--storage/mroonga/vendor/groonga/plugins/suggest/Makefile.am24
-rw-r--r--storage/mroonga/vendor/groonga/plugins/suggest/sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/suggest/suggest.c1035
-rw-r--r--storage/mroonga/vendor/groonga/plugins/token_filters/CMakeLists.txt63
-rw-r--r--storage/mroonga/vendor/groonga/plugins/token_filters/Makefile.am28
-rw-r--r--storage/mroonga/vendor/groonga/plugins/token_filters/stem.c279
-rw-r--r--storage/mroonga/vendor/groonga/plugins/token_filters/stem_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/token_filters/stop_word.c159
-rw-r--r--storage/mroonga/vendor/groonga/plugins/token_filters/stop_word_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt76
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am33
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/kytea.cpp358
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/kytea_sources.am2
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c660
-rw-r--r--storage/mroonga/vendor/groonga/plugins/tokenizers/mecab_sources.am2
58 files changed, 7970 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt
new file mode 100644
index 00000000..d7a5f22e
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright(C) 2012-2016 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+add_subdirectory(suggest)
+add_subdirectory(tokenizers)
+add_subdirectory(query_expanders)
+add_subdirectory(ruby)
+add_subdirectory(token_filters)
+add_subdirectory(sharding)
+add_subdirectory(functions)
+
+if(NOT GRN_EMBED)
+ if(GRN_WITH_MRUBY)
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/ruby_scripts.am RUBY_SCRIPTS)
+ install(FILES ${RUBY_SCRIPTS}
+ DESTINATION "${GRN_RELATIVE_PLUGINS_DIR}")
+ endif()
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/Makefile.am b/storage/mroonga/vendor/groonga/plugins/Makefile.am
new file mode 100644
index 00000000..6c98a0cc
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/Makefile.am
@@ -0,0 +1,19 @@
+SUBDIRS = \
+ tokenizers \
+ suggest \
+ query_expanders \
+ ruby \
+ token_filters \
+ sharding \
+ functions \
+ expression_rewriters
+
+EXTRA_DIST = \
+ CMakeLists.txt
+
+if WITH_MRUBY
+dist_plugins_DATA = \
+ $(ruby_scripts)
+endif
+
+include ruby_scripts.am
diff --git a/storage/mroonga/vendor/groonga/plugins/expression_rewriters/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/CMakeLists.txt
new file mode 100644
index 00000000..aabec442
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/CMakeLists.txt
@@ -0,0 +1,26 @@
+# Copyright(C) 2015 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+if(NOT GRN_EMBED)
+ if(GRN_WITH_MRUBY)
+ set(GRN_RELATIVE_EXPRESSION_REWRITER_PLUGINS_DIR
+ "${GRN_RELATIVE_PLUGINS_DIR}/expression_rewriters")
+
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am
+ EXPRESSION_REWRITERS)
+ install(FILES ${EXPRESSION_REWRITERS}
+ DESTINATION "${GRN_RELATIVE_SEXPRESSION_REWRITER_PLUGINS_DIR}")
+ endif()
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/expression_rewriters/Makefile.am b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/Makefile.am
new file mode 100644
index 00000000..60a032ac
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/Makefile.am
@@ -0,0 +1,9 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+if WITH_MRUBY
+dist_expression_rewriter_plugins_DATA = \
+ $(expression_rewriters)
+endif
+
+include sources.am
diff --git a/storage/mroonga/vendor/groonga/plugins/expression_rewriters/optimizer.rb b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/optimizer.rb
new file mode 100644
index 00000000..3dfee681
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/optimizer.rb
@@ -0,0 +1,147 @@
+module Groonga
+ module ExpressionRewriters
+ class Optimizer < ExpressionRewriter
+ register "optimizer"
+
+ def rewrite
+ builder = ExpressionTreeBuilder.new(@expression)
+ root_node = builder.build
+
+ variable = @expression[0]
+ table = context[variable.domain]
+ optimized_root_node = optimize_node(table, root_node)
+
+ rewritten = Expression.create(table)
+ optimized_root_node.build(rewritten)
+ rewritten
+ end
+
+ private
+ def optimize_node(table, node)
+ case node
+ when ExpressionTree::LogicalOperation
+ optimized_sub_nodes = node.nodes.collect do |sub_node|
+ optimize_node(table, sub_node)
+ end
+ case node.operator
+ when Operator::AND
+ optimized_sub_nodes =
+ optimize_and_sub_nodes(table, optimized_sub_nodes)
+ end
+ ExpressionTree::LogicalOperation.new(node.operator,
+ optimized_sub_nodes)
+ when ExpressionTree::BinaryOperation
+ optimized_left = optimize_node(table, node.left)
+ optimized_right = optimize_node(table, node.right)
+ if optimized_left.is_a?(ExpressionTree::Constant) and
+ optimized_right.is_a?(ExpressionTree::Variable)
+ ExpressionTree::BinaryOperation.new(node.operator,
+ optimized_right,
+ optimized_left)
+ elsif node.left == optimized_left and node.right == optimized_right
+ node
+ else
+ ExpressionTree::BinaryOperation.new(node.operator,
+ optimized_left,
+ optimized_right)
+ end
+ else
+ node
+ end
+ end
+
+ def optimize_and_sub_nodes(table, sub_nodes)
+ grouped_sub_nodes = sub_nodes.group_by do |sub_node|
+ case sub_node
+ when ExpressionTree::BinaryOperation
+ if sub_node.left.is_a?(ExpressionTree::Variable)
+ sub_node.left.column
+ else
+ nil
+ end
+ else
+ nil
+ end
+ end
+
+ optimized_nodes = []
+ grouped_sub_nodes.each do |column, grouped_nodes|
+ if column
+ grouped_nodes = optimize_grouped_nodes(column, grouped_nodes)
+ end
+ optimized_nodes.concat(grouped_nodes)
+ end
+
+ optimized_nodes.sort_by do |node|
+ node.estimate_size(table)
+ end
+ end
+
+ COMPARISON_OPERATORS = [
+ Operator::EQUAL,
+ Operator::NOT_EQUAL,
+ Operator::LESS,
+ Operator::GREATER,
+ Operator::LESS_EQUAL,
+ Operator::GREATER_EQUAL,
+ ]
+ def optimize_grouped_nodes(column, grouped_nodes)
+ target_nodes, done_nodes = grouped_nodes.partition do |node|
+ node.is_a?(ExpressionTree::BinaryOperation) and
+ COMPARISON_OPERATORS.include?(node.operator) and
+ node.right.is_a?(ExpressionTree::Constant)
+ end
+
+ # TODO: target_nodes = remove_needless_nodes(target_nodes)
+ # e.g.: x < 1 && x < 3 -> x < 1: (x < 3) is meaningless
+
+ if target_nodes.size == 2
+ between_node = try_optimize_between(column, target_nodes)
+ if between_node
+ done_nodes << between_node
+ else
+ done_nodes.concat(target_nodes)
+ end
+ else
+ done_nodes.concat(target_nodes)
+ end
+
+ done_nodes
+ end
+
+ def try_optimize_between(column, target_nodes)
+ greater_node = nil
+ less_node = nil
+ target_nodes.each do |node|
+ case node.operator
+ when Operator::GREATER, Operator::GREATER_EQUAL
+ greater_node = node
+ when Operator::LESS, Operator::LESS_EQUAL
+ less_node = node
+ end
+ end
+ return nil if greater_node.nil? or less_node.nil?
+
+ between = ExpressionTree::Procedure.new(context["between"])
+ if greater_node.operator == Operator::GREATER
+ greater_border = "exclude"
+ else
+ greater_border = "include"
+ end
+ if less_node.operator == Operator::LESS
+ less_border = "exclude"
+ else
+ less_border = "include"
+ end
+ arguments = [
+ ExpressionTree::Variable.new(column),
+ greater_node.right,
+ ExpressionTree::Constant.new(greater_border),
+ less_node.right,
+ ExpressionTree::Constant.new(less_border),
+ ]
+ ExpressionTree::FunctionCall.new(between, arguments)
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/expression_rewriters/sources.am b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/sources.am
new file mode 100644
index 00000000..7670bed6
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/expression_rewriters/sources.am
@@ -0,0 +1,2 @@
+expression_rewriters = \
+ optimizer.rb
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/functions/CMakeLists.txt
new file mode 100644
index 00000000..65a6d2c3
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/CMakeLists.txt
@@ -0,0 +1,141 @@
+# Copyright(C) 2015-2017 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ )
+
+set(GRN_FUNCTIONS_PLUGIN_DIR "${GRN_RELATIVE_PLUGINS_DIR}/functions")
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/vector_sources.am
+ VECTOR_SOURCES)
+set_source_files_properties(${VECTOR_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+if(GRN_EMBED)
+ add_library(vector_functions STATIC ${VECTOR_SOURCES})
+ set_target_properties(
+ vector_functions
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+else()
+ add_library(vector_functions MODULE ${VECTOR_SOURCES})
+ set_target_properties(vector_functions PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "vector")
+ install(TARGETS vector_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}")
+endif()
+target_link_libraries(vector_functions libgroonga)
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/string_sources.am
+ STRING_SOURCES)
+set_source_files_properties(${STRING_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+if(GRN_EMBED)
+ add_library(string_functions STATIC ${STRING_SOURCES})
+ set_target_properties(
+ string_functions
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+else()
+ add_library(string_functions MODULE ${STRING_SOURCES})
+ set_target_properties(string_functions PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "string")
+ install(TARGETS string_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}")
+endif()
+target_link_libraries(string_functions libgroonga)
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/number_sources.am
+ NUMBER_SOURCES)
+set_source_files_properties(${NUMBER_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+if(GRN_EMBED)
+ add_library(number_functions STATIC ${NUMBER_SOURCES})
+ set_target_properties(
+ number_functions
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+else()
+ add_library(number_functions MODULE ${NUMBER_SOURCES})
+ set_target_properties(number_functions PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "number")
+ install(TARGETS number_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}")
+endif()
+target_link_libraries(number_functions libgroonga "${M_LIBS}")
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/time_sources.am
+ TIME_SOURCES)
+set_source_files_properties(${TIME_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+if(GRN_EMBED)
+ add_library(time_functions STATIC ${TIME_SOURCES})
+ set_target_properties(
+ time_functions
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+else()
+ add_library(time_functions MODULE ${TIME_SOURCES})
+ set_target_properties(time_functions PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "time")
+ install(TARGETS time_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}")
+endif()
+target_link_libraries(time_functions libgroonga)
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/index_column_sources.am
+ INDEX_COLUMN_SOURCES)
+set_source_files_properties(${INDEX_COLUMN_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+if(GRN_EMBED)
+ add_library(index_column_functions STATIC ${INDEX_COLUMN_SOURCES})
+ set_target_properties(
+ index_column_functions
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+else()
+ add_library(index_column_functions MODULE ${INDEX_COLUMN_SOURCES})
+ set_target_properties(index_column_functions PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "index_column")
+ install(TARGETS index_column_functions
+ DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}")
+endif()
+target_link_libraries(index_column_functions libgroonga)
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/math_sources.am
+ MATH_SOURCES)
+set_source_files_properties(${MATH_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+if(GRN_EMBED)
+ add_library(math_functions STATIC ${MATH_SOURCES})
+ set_target_properties(
+ math_functions
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+else()
+ add_library(math_functions MODULE ${MATH_SOURCES})
+ set_target_properties(math_functions PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "math")
+ install(TARGETS math_functions DESTINATION "${GRN_FUNCTIONS_PLUGIN_DIR}")
+endif()
+target_link_libraries(math_functions libgroonga)
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/Makefile.am b/storage/mroonga/vendor/groonga/plugins/functions/Makefile.am
new file mode 100644
index 00000000..f57ee031
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/Makefile.am
@@ -0,0 +1,33 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la
+
+function_plugins_LTLIBRARIES =
+function_plugins_LTLIBRARIES += vector.la
+function_plugins_LTLIBRARIES += string.la
+function_plugins_LTLIBRARIES += number.la
+function_plugins_LTLIBRARIES += time.la
+function_plugins_LTLIBRARIES += index_column.la
+function_plugins_LTLIBRARIES += math.la
+
+include vector_sources.am
+include string_sources.am
+include number_sources.am
+include time_sources.am
+include index_column_sources.am
+include math_sources.am
+
+number_la_LIBADD = -lm
+math_la_LIBADD = -lm
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/index_column.c b/storage/mroonga/vendor/groonga/plugins/functions/index_column.c
new file mode 100644
index 00000000..05010074
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/index_column.c
@@ -0,0 +1,266 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2017 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG functions_time
+#endif
+
+#include <groonga/plugin.h>
+
+static grn_rc
+selector_index_column_df_ratio_between(grn_ctx *ctx,
+ grn_obj *table,
+ grn_obj *index,
+ int n_args,
+ grn_obj **args,
+ grn_obj *res,
+ grn_operator op)
+{
+ grn_rc rc = GRN_SUCCESS;
+ grn_obj *index_column;
+ grn_ii *ii;
+ double min;
+ double max;
+ grn_obj *source_table;
+ unsigned int n_documents;
+ grn_posting posting;
+
+ if ((n_args - 1) != 3) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio_between(): "
+ "wrong number of arguments (%d for 3)", n_args - 1);
+ rc = ctx->rc;
+ goto exit;
+ }
+
+ index_column = args[1];
+ ii = (grn_ii *)index_column;
+ min = GRN_FLOAT_VALUE(args[2]);
+ max = GRN_FLOAT_VALUE(args[3]);
+
+ source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column));
+ n_documents = grn_table_size(ctx, source_table);
+ memset(&posting, 0, sizeof(grn_posting));
+ posting.sid = 1;
+
+ if (op == GRN_OP_AND) {
+ GRN_TABLE_EACH_BEGIN(ctx, res, cursor, record_id) {
+ void *key;
+ grn_id term_id;
+ uint32_t n_match_documents;
+ double df_ratio;
+
+ grn_table_cursor_get_key(ctx, cursor, &key);
+ term_id = *(grn_id *)key;
+ n_match_documents = grn_ii_estimate_size(ctx, ii, term_id);
+ if (n_match_documents > n_documents) {
+ n_match_documents = n_documents;
+ }
+ df_ratio = (double)n_match_documents / (double)n_documents;
+ if (min <= df_ratio && df_ratio <= max) {
+ posting.rid = term_id;
+ grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op);
+ }
+ } GRN_TABLE_EACH_END(ctx, cursor);
+ grn_ii_resolve_sel_and(ctx, (grn_hash *)res, op);
+ } else {
+ GRN_TABLE_EACH_BEGIN(ctx, table, cursor, term_id) {
+ uint32_t n_match_documents;
+ double df_ratio;
+
+ n_match_documents = grn_ii_estimate_size(ctx, ii, term_id);
+ if (n_match_documents > n_documents) {
+ n_match_documents = n_documents;
+ }
+ df_ratio = (double)n_match_documents / (double)n_documents;
+ {
+ void *key;
+ int key_size;
+ key_size = grn_table_cursor_get_key(ctx, cursor, &key);
+ }
+ if (min <= df_ratio && df_ratio <= max) {
+ posting.rid = term_id;
+ grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op);
+ }
+ } GRN_TABLE_EACH_END(ctx, cursor);
+ }
+
+exit :
+ return rc;
+}
+
+static grn_obj *
+func_index_column_df_ratio(grn_ctx *ctx,
+ int n_args,
+ grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *term_table;
+ grn_obj *index_column_name;
+ grn_obj *index_column;
+ grn_ii *ii;
+ grn_id term_id;
+
+ if (n_args != 1) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "wrong number of arguments (%d for 1)", n_args - 1);
+ return NULL;
+ }
+
+ {
+ grn_obj *expr;
+ grn_obj *variable;
+
+ expr = grn_plugin_proc_get_caller(ctx, user_data);
+ if (!expr) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "called directly");
+ return NULL;
+ }
+
+ variable = grn_expr_get_var_by_offset(ctx, expr, 0);
+ if (!variable) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "caller expression must have target record information");
+ return NULL;
+ }
+
+ term_table = grn_ctx_at(ctx, variable->header.domain);
+ term_id = GRN_RECORD_VALUE(variable);
+ while (GRN_TRUE) {
+ grn_obj *key_type;
+
+ key_type = grn_ctx_at(ctx, term_table->header.domain);
+ if (!grn_obj_is_table(ctx, key_type)) {
+ break;
+ }
+
+ grn_table_get_key(ctx, term_table, term_id, &term_id, sizeof(grn_id));
+ term_table = key_type;
+ }
+ }
+
+ index_column_name = args[0];
+ if (!grn_obj_is_text_family_bulk(ctx, index_column_name)) {
+ grn_obj inspected;
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, index_column_name);
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "the first argument must be index column name: %.*s",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+
+ index_column = grn_obj_column(ctx,
+ term_table,
+ GRN_TEXT_VALUE(index_column_name),
+ GRN_TEXT_LEN(index_column_name));
+ if (!index_column) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "nonexistent object: <%.*s>",
+ (int)GRN_TEXT_LEN(index_column_name),
+ GRN_TEXT_VALUE(index_column_name));
+ return NULL;
+ }
+
+ if (!grn_obj_is_index_column(ctx, index_column)) {
+ grn_obj inspected;
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, index_column);
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "the first argument must be index column: %.*s",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ if (grn_obj_is_accessor(ctx, index_column)) {
+ grn_obj_unlink(ctx, index_column);
+ }
+ return NULL;
+ }
+
+ ii = (grn_ii *)index_column;
+
+ {
+ grn_obj *source_table;
+ unsigned int n_documents;
+ uint32_t n_match_documents;
+ double df_ratio;
+ grn_obj *df_ratio_value;
+
+ source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column));
+ n_documents = grn_table_size(ctx, source_table);
+ n_match_documents = grn_ii_estimate_size(ctx, ii, term_id);
+ if (n_match_documents > n_documents) {
+ n_match_documents = n_documents;
+ }
+ df_ratio = (double)n_match_documents / (double)n_documents;
+
+ df_ratio_value = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_FLOAT, 0);
+ if (!df_ratio_value) {
+ return NULL;
+ }
+ GRN_FLOAT_SET(ctx, df_ratio_value, df_ratio);
+ return df_ratio_value;
+ }
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_obj *selector_proc;
+
+ selector_proc = grn_proc_create(ctx, "index_column_df_ratio_between", -1,
+ GRN_PROC_FUNCTION,
+ NULL, NULL, NULL, 0, NULL);
+ grn_proc_set_selector(ctx, selector_proc,
+ selector_index_column_df_ratio_between);
+ grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_NOP);
+
+ grn_proc_create(ctx, "index_column_df_ratio", -1,
+ GRN_PROC_FUNCTION,
+ func_index_column_df_ratio, NULL, NULL, 0, NULL);
+
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/index_column_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/index_column_sources.am
new file mode 100644
index 00000000..261907bc
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/index_column_sources.am
@@ -0,0 +1,2 @@
+index_column_la_SOURCES = \
+ index_column.c
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/math.c b/storage/mroonga/vendor/groonga/plugins/functions/math.c
new file mode 100644
index 00000000..a6a9e260
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/math.c
@@ -0,0 +1,142 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2017 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG functions_math
+#endif
+
+#include <groonga/plugin.h>
+
+#include <math.h>
+#include <stdlib.h>
+
+static grn_obj *
+func_math_abs(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *number;
+ grn_obj *grn_abs_number = NULL;
+
+ if (n_args != 1) {
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "math_abs(): wrong number of arguments (%d for 1)",
+ n_args);
+ return NULL;
+ }
+
+ number = args[0];
+ if (!(number->header.type == GRN_BULK &&
+ grn_type_id_is_number_family(ctx, number->header.domain))) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, number);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "math_abs(): the first argument must be a number: "
+ "<%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+
+#define ABS_AS_IS(return_type, to_type, getter, setter) { \
+ grn_abs_number = grn_plugin_proc_alloc(ctx, \
+ user_data, \
+ (return_type), \
+ 0); \
+ if (!grn_abs_number) { \
+ return NULL; \
+ } \
+ setter(ctx, grn_abs_number, getter(number)); \
+ }
+#define ABS_CONVERT_TYPE(func, return_type, to_type, getter, setter) { \
+ grn_abs_number = grn_plugin_proc_alloc(ctx, \
+ user_data, \
+ (return_type), \
+ 0); \
+ if (!grn_abs_number) { \
+ return NULL; \
+ } else { \
+ to_type abs_number_raw = (to_type)(func)(getter(number)); \
+ setter(ctx, grn_abs_number, abs_number_raw); \
+ } \
+ }
+
+ switch (number->header.domain) {
+ case GRN_DB_INT8:
+ ABS_CONVERT_TYPE(abs, GRN_DB_UINT8, uint8_t, GRN_INT8_VALUE, GRN_UINT8_SET);
+ break;
+ case GRN_DB_UINT8:
+ ABS_AS_IS(GRN_DB_UINT8, uint8_t, GRN_UINT8_VALUE, GRN_UINT8_SET);
+ break;
+ case GRN_DB_INT16:
+ ABS_CONVERT_TYPE(abs, GRN_DB_UINT16, uint16_t, GRN_INT16_VALUE, GRN_UINT16_SET);
+ break;
+ case GRN_DB_UINT16:
+ ABS_AS_IS(GRN_DB_UINT16, uint16_t, GRN_UINT16_VALUE, GRN_UINT16_SET);
+ break;
+ case GRN_DB_INT32:
+ ABS_CONVERT_TYPE(labs, GRN_DB_UINT32, uint32_t, GRN_INT32_VALUE, GRN_UINT32_SET);
+ break;
+ case GRN_DB_UINT32:
+ ABS_AS_IS(GRN_DB_UINT32, uint32_t, GRN_UINT32_VALUE, GRN_UINT32_SET);
+ break;
+ case GRN_DB_INT64:
+ ABS_CONVERT_TYPE(llabs, GRN_DB_UINT64, uint64_t, GRN_INT64_VALUE, GRN_UINT64_SET);
+ break;
+ case GRN_DB_UINT64:
+ ABS_AS_IS(GRN_DB_UINT64, uint64_t, GRN_UINT64_VALUE, GRN_UINT64_SET);
+ break;
+ case GRN_DB_FLOAT:
+ ABS_CONVERT_TYPE(fabs, GRN_DB_FLOAT, double, GRN_FLOAT_VALUE, GRN_FLOAT_SET);
+ break;
+ default :
+ break;
+ }
+#undef ABS_CONVERT_TYPE
+#undef ABS_AS_IS
+
+ return grn_abs_number;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_rc rc = GRN_SUCCESS;
+
+ grn_proc_create(ctx,
+ "math_abs", -1,
+ GRN_PROC_FUNCTION,
+ func_math_abs,
+ NULL, NULL, 0, NULL);
+
+ return rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/math_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/math_sources.am
new file mode 100644
index 00000000..8c14ca74
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/math_sources.am
@@ -0,0 +1,2 @@
+math_la_SOURCES = \
+ math.c
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/number.c b/storage/mroonga/vendor/groonga/plugins/functions/number.c
new file mode 100644
index 00000000..7cdfc0e1
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/number.c
@@ -0,0 +1,187 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2016 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG functions_number
+#endif
+
+#include <groonga/plugin.h>
+
+#include <math.h>
+
+static grn_obj *
+func_number_classify(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *number;
+ grn_obj *interval;
+ grn_obj casted_interval;
+ grn_obj *classed_number;
+
+ if (n_args != 2) {
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "number_classify(): wrong number of arguments (%d for 2)",
+ n_args);
+ return NULL;
+ }
+
+ number = args[0];
+ if (!(number->header.type == GRN_BULK &&
+ grn_type_id_is_number_family(ctx, number->header.domain))) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, number);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "number_classify(): the first argument must be a number: "
+ "<%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+
+ interval = args[1];
+ if (!(interval->header.type == GRN_BULK &&
+ grn_type_id_is_number_family(ctx, interval->header.domain))) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, interval);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "number_classify(): the second argument must be a number: "
+ "<%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+
+ classed_number = grn_plugin_proc_alloc(ctx,
+ user_data,
+ number->header.domain,
+ 0);
+ if (!classed_number) {
+ return NULL;
+ }
+
+ GRN_VALUE_FIX_SIZE_INIT(&casted_interval, 0, number->header.domain);
+ grn_obj_cast(ctx, interval, &casted_interval, GRN_FALSE);
+
+#define CLASSIFY_RAW(type, getter, setter, classifier) { \
+ type number_raw; \
+ type interval_raw; \
+ type class_raw; \
+ type classed_number_raw; \
+ \
+ number_raw = getter(number); \
+ interval_raw = getter(&casted_interval); \
+ class_raw = classifier(number_raw, interval_raw); \
+ classed_number_raw = class_raw * interval_raw; \
+ setter(ctx, classed_number, classed_number_raw); \
+ }
+
+#define CLASSIFIER_INT(number_raw, interval_raw) \
+ (number_raw) < 0 ? \
+ ((((number_raw) + 1) / (interval_raw)) - 1) : \
+ (((number_raw) / (interval_raw)))
+
+#define CLASSIFY_INT(type, getter, setter) \
+ CLASSIFY_RAW(type, getter, setter, CLASSIFIER_INT)
+
+#define CLASSIFIER_UINT(number_raw, interval_raw) \
+ ((number_raw) / (interval_raw))
+
+#define CLASSIFY_UINT(type, getter, setter) \
+ CLASSIFY_RAW(type, getter, setter, CLASSIFIER_UINT)
+
+#define CLASSIFIER_FLOAT(number_raw, interval_raw) \
+ floor((number_raw) / (interval_raw))
+
+#define CLASSIFY_FLOAT(getter, setter) \
+ CLASSIFY_RAW(double, getter, setter, CLASSIFIER_FLOAT)
+
+ switch (number->header.domain) {
+ case GRN_DB_INT8 :
+ CLASSIFY_INT(int8_t, GRN_INT8_VALUE, GRN_INT8_SET);
+ break;
+ case GRN_DB_UINT8 :
+ CLASSIFY_UINT(uint8_t, GRN_UINT8_VALUE, GRN_UINT8_SET);
+ break;
+ case GRN_DB_INT16 :
+ CLASSIFY_INT(int16_t, GRN_INT16_VALUE, GRN_INT16_SET);
+ break;
+ case GRN_DB_UINT16 :
+ CLASSIFY_UINT(uint16_t, GRN_UINT16_VALUE, GRN_UINT16_SET);
+ break;
+ case GRN_DB_INT32 :
+ CLASSIFY_INT(int32_t, GRN_INT32_VALUE, GRN_INT32_SET);
+ break;
+ case GRN_DB_UINT32 :
+ CLASSIFY_UINT(uint32_t, GRN_UINT32_VALUE, GRN_UINT32_SET);
+ break;
+ case GRN_DB_INT64 :
+ CLASSIFY_INT(int64_t, GRN_INT64_VALUE, GRN_INT64_SET);
+ break;
+ case GRN_DB_UINT64 :
+ CLASSIFY_UINT(uint64_t, GRN_UINT64_VALUE, GRN_UINT64_SET);
+ break;
+ case GRN_DB_FLOAT :
+ CLASSIFY_FLOAT(GRN_FLOAT_VALUE, GRN_FLOAT_SET);
+ break;
+ default :
+ break;
+ }
+#undef CLASSIFY_FLOAT
+#undef CLASSIFIER_FLAOT
+#undef CLASSIFY_UINT
+#undef CLASSIFIER_UINT
+#undef CLASSIFY_INT
+#undef CLASSIFIER_INT
+#undef CLASSIFY_RAW
+
+ GRN_OBJ_FIN(ctx, &casted_interval);
+
+ return classed_number;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_rc rc = GRN_SUCCESS;
+
+ grn_proc_create(ctx,
+ "number_classify", -1,
+ GRN_PROC_FUNCTION,
+ func_number_classify,
+ NULL, NULL, 0, NULL);
+
+ return rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/number_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/number_sources.am
new file mode 100644
index 00000000..b3d9483b
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/number_sources.am
@@ -0,0 +1,2 @@
+number_la_SOURCES = \
+ number.c
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/string.c b/storage/mroonga/vendor/groonga/plugins/functions/string.c
new file mode 100644
index 00000000..0af2d6ab
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/string.c
@@ -0,0 +1,299 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2016 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG functions_string
+#endif
+
+#include <groonga/plugin.h>
+
+/*
+ * func_string_length() returns the number of characters in a string.
+ * If the string contains an invalid byte sequence, this function returns the
+ * number of characters before the invalid byte sequence.
+ */
+static grn_obj *
+func_string_length(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *target;
+ unsigned int length = 0;
+ grn_obj *grn_length;
+
+ if (n_args != 1) {
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "string_length(): wrong number of arguments (%d for 1)",
+ n_args);
+ return NULL;
+ }
+
+ target = args[0];
+ if (!(target->header.type == GRN_BULK &&
+ ((target->header.domain == GRN_DB_SHORT_TEXT) ||
+ (target->header.domain == GRN_DB_TEXT) ||
+ (target->header.domain == GRN_DB_LONG_TEXT)))) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, target);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "string_length(): target object must be a text bulk: "
+ "<%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+
+ {
+ const char *s = GRN_TEXT_VALUE(target);
+ const char *e = GRN_TEXT_VALUE(target) + GRN_TEXT_LEN(target);
+ const char *p;
+ unsigned int cl = 0;
+ for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) {
+ length++;
+ }
+ }
+
+ grn_length = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0);
+ if (!grn_length) {
+ return NULL;
+ }
+
+ GRN_UINT32_SET(ctx, grn_length, length);
+
+ return grn_length;
+}
+
+static grn_obj *
+func_string_substring(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *target;
+ grn_obj *from_raw;
+ grn_obj *length_raw = NULL;
+ int64_t from = 0;
+ int64_t length = -1;
+ const char *start = NULL;
+ const char *end = NULL;
+ grn_obj *substring;
+
+ if (n_args < 2) {
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "string_substring(): wrong number of arguments (%d for 2..3)",
+ n_args);
+ return NULL;
+ }
+
+ target = args[0];
+ from_raw = args[1];
+ if (n_args == 3) {
+ length_raw = args[2];
+ }
+
+ if (!(target->header.type == GRN_BULK &&
+ grn_type_id_is_text_family(ctx, target->header.domain))) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, target);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "string_substring(): target object must be a text bulk: "
+ "<%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+
+ /* TODO: extract as grn_func_arg_int64() */
+ if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, from_raw);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "string_substring(): from must be a number: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ if (from_raw->header.domain == GRN_DB_INT32) {
+ from = GRN_INT32_VALUE(from_raw);
+ } else if (from_raw->header.domain == GRN_DB_INT64) {
+ from = GRN_INT64_VALUE(from_raw);
+ } else {
+ grn_obj buffer;
+ grn_rc rc;
+
+ GRN_INT64_INIT(&buffer, 0);
+ rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE);
+ if (rc == GRN_SUCCESS) {
+ from = GRN_INT64_VALUE(&buffer);
+ }
+ GRN_OBJ_FIN(ctx, &buffer);
+
+ if (rc != GRN_SUCCESS) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, from_raw);
+ GRN_PLUGIN_ERROR(ctx, rc,
+ "string_substring(): "
+ "failed to cast from value to number: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ }
+
+ if (length_raw) {
+ /* TODO: extract as grn_func_arg_int64() */
+ if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, length_raw);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "string_substring(): length must be a number: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ if (length_raw->header.domain == GRN_DB_INT32) {
+ length = GRN_INT32_VALUE(length_raw);
+ } else if (length_raw->header.domain == GRN_DB_INT64) {
+ length = GRN_INT64_VALUE(length_raw);
+ } else {
+ grn_obj buffer;
+ grn_rc rc;
+
+ GRN_INT64_INIT(&buffer, 0);
+ rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE);
+ if (rc == GRN_SUCCESS) {
+ length = GRN_INT64_VALUE(&buffer);
+ }
+ GRN_OBJ_FIN(ctx, &buffer);
+
+ if (rc != GRN_SUCCESS) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, length_raw);
+ GRN_PLUGIN_ERROR(ctx, rc,
+ "string_substring(): "
+ "failed to cast length value to number: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ }
+ }
+
+ substring = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, 0);
+ if (!substring) {
+ return NULL;
+ }
+
+ GRN_BULK_REWIND(substring);
+
+ if (GRN_TEXT_LEN(target) == 0) {
+ return substring;
+ }
+ if (length == 0) {
+ return substring;
+ }
+
+ while (from < 0) {
+ from += GRN_TEXT_LEN(target);
+ }
+
+ {
+ const char *p;
+
+ start = NULL;
+ p = GRN_TEXT_VALUE(target);
+ end = p + GRN_TEXT_LEN(target);
+
+ if (from == 0) {
+ start = p;
+ } else {
+ unsigned int char_length = 0;
+ int64_t n_chars = 0;
+
+ for (;
+ p < end && (char_length = grn_charlen(ctx, p, end));
+ p += char_length, n_chars++) {
+ if (n_chars == from) {
+ start = p;
+ break;
+ }
+ }
+ }
+
+ if (start && length > 0) {
+ unsigned int char_length = 0;
+ int64_t n_chars = 0;
+
+ for (;
+ p < end && (char_length = grn_charlen(ctx, p, end));
+ p += char_length, n_chars++) {
+ if (n_chars == length) {
+ end = p;
+ break;
+ }
+ }
+ }
+ }
+
+ if (start) {
+ GRN_TEXT_SET(ctx, substring, start, end - start);
+ }
+
+ return substring;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_rc rc = GRN_SUCCESS;
+
+ grn_proc_create(ctx, "string_length", -1, GRN_PROC_FUNCTION, func_string_length,
+ NULL, NULL, 0, NULL);
+
+ grn_proc_create(ctx, "string_substring", -1, GRN_PROC_FUNCTION, func_string_substring,
+ NULL, NULL, 0, NULL);
+
+ return rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/string_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/string_sources.am
new file mode 100644
index 00000000..3477e58a
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/string_sources.am
@@ -0,0 +1,2 @@
+string_la_SOURCES = \
+ string.c
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/time.c b/storage/mroonga/vendor/groonga/plugins/functions/time.c
new file mode 100644
index 00000000..f82ea872
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/time.c
@@ -0,0 +1,376 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2016 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG functions_time
+#endif
+
+#include <groonga/plugin.h>
+
+#include <math.h>
+
+typedef enum {
+ GRN_TIME_CLASSIFY_UNIT_SECOND,
+ GRN_TIME_CLASSIFY_UNIT_MINUTE,
+ GRN_TIME_CLASSIFY_UNIT_HOUR,
+ GRN_TIME_CLASSIFY_UNIT_DAY,
+ GRN_TIME_CLASSIFY_UNIT_WEEK,
+ GRN_TIME_CLASSIFY_UNIT_MONTH,
+ GRN_TIME_CLASSIFY_UNIT_YEAR
+} grn_time_classify_unit;
+
+static grn_obj *
+func_time_classify_raw(grn_ctx *ctx,
+ int n_args,
+ grn_obj **args,
+ grn_user_data *user_data,
+ const char *function_name,
+ grn_time_classify_unit unit)
+{
+ grn_obj *time;
+ uint32_t interval_raw = 1;
+ grn_obj *classed_time;
+ grn_bool accept_interval = GRN_TRUE;
+
+ switch (unit) {
+ case GRN_TIME_CLASSIFY_UNIT_SECOND :
+ case GRN_TIME_CLASSIFY_UNIT_MINUTE :
+ case GRN_TIME_CLASSIFY_UNIT_HOUR :
+ accept_interval = GRN_TRUE;
+ break;
+ case GRN_TIME_CLASSIFY_UNIT_DAY :
+ case GRN_TIME_CLASSIFY_UNIT_WEEK :
+ accept_interval = GRN_FALSE;
+ break;
+ case GRN_TIME_CLASSIFY_UNIT_MONTH :
+ case GRN_TIME_CLASSIFY_UNIT_YEAR :
+ accept_interval = GRN_TRUE;
+ break;
+ }
+
+ if (accept_interval) {
+ if (!(n_args == 1 || n_args == 2)) {
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "%s(): "
+ "wrong number of arguments (%d for 1..2)",
+ function_name,
+ n_args);
+ return NULL;
+ }
+ } else {
+ if (n_args != 1) {
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "%s(): "
+ "wrong number of arguments (%d for 1)",
+ function_name,
+ n_args);
+ return NULL;
+ }
+ }
+
+ time = args[0];
+ if (!(time->header.type == GRN_BULK &&
+ time->header.domain == GRN_DB_TIME)) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, time);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "%s(): "
+ "the first argument must be a time: "
+ "<%.*s>",
+ function_name,
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+
+ if (n_args == 2) {
+ grn_obj *interval;
+ grn_obj casted_interval;
+
+ interval = args[1];
+ if (!(interval->header.type == GRN_BULK &&
+ grn_type_id_is_number_family(ctx, interval->header.domain))) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, interval);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "%s(): "
+ "the second argument must be a number: "
+ "<%.*s>",
+ function_name,
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+
+ GRN_VALUE_FIX_SIZE_INIT(&casted_interval, 0, GRN_DB_UINT32);
+ grn_obj_cast(ctx, interval, &casted_interval, GRN_FALSE);
+ interval_raw = GRN_UINT32_VALUE(&casted_interval);
+ GRN_OBJ_FIN(ctx, &casted_interval);
+
+ if (interval_raw == 0) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, interval);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "%s(): "
+ "the second argument must not be zero: "
+ "<%.*s>",
+ function_name,
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ }
+
+ {
+ int64_t time_raw;
+ struct tm tm;
+ int64_t classed_time_raw;
+
+ time_raw = GRN_TIME_VALUE(time);
+ if (!grn_time_to_tm(ctx, time_raw, &tm)) {
+ return NULL;
+ }
+
+ switch (unit) {
+ case GRN_TIME_CLASSIFY_UNIT_SECOND :
+ tm.tm_sec = (tm.tm_sec / interval_raw) * interval_raw;
+ break;
+ case GRN_TIME_CLASSIFY_UNIT_MINUTE :
+ tm.tm_min = (tm.tm_min / interval_raw) * interval_raw;
+ tm.tm_sec = 0;
+ break;
+ case GRN_TIME_CLASSIFY_UNIT_HOUR :
+ tm.tm_hour = (tm.tm_hour / interval_raw) * interval_raw;
+ tm.tm_min = 0;
+ tm.tm_sec = 0;
+ break;
+ case GRN_TIME_CLASSIFY_UNIT_DAY :
+ tm.tm_hour = 0;
+ tm.tm_min = 0;
+ tm.tm_sec = 0;
+ break;
+ case GRN_TIME_CLASSIFY_UNIT_WEEK :
+ if ((tm.tm_mday - tm.tm_wday) >= 0) {
+ tm.tm_mday -= tm.tm_wday;
+ } else {
+ int n_underflowed_mday = -(tm.tm_mday - tm.tm_wday);
+ int mday;
+ int max_mday = 31;
+
+ if (tm.tm_mon == 0) {
+ tm.tm_year--;
+ tm.tm_mon = 11;
+ } else {
+ tm.tm_mon--;
+ }
+
+ for (mday = max_mday; mday > n_underflowed_mday; mday--) {
+ int64_t unused;
+ tm.tm_mday = mday;
+ if (grn_time_from_tm(ctx, &unused, &tm)) {
+ break;
+ }
+ }
+ tm.tm_mday -= n_underflowed_mday;
+ }
+ tm.tm_hour = 0;
+ tm.tm_min = 0;
+ tm.tm_sec = 0;
+ break;
+ case GRN_TIME_CLASSIFY_UNIT_MONTH :
+ tm.tm_mon = (tm.tm_mon / interval_raw) * interval_raw;
+ tm.tm_mday = 1;
+ tm.tm_hour = 0;
+ tm.tm_min = 0;
+ tm.tm_sec = 0;
+ break;
+ case GRN_TIME_CLASSIFY_UNIT_YEAR :
+ tm.tm_year = (((1900 + tm.tm_year) / interval_raw) * interval_raw) - 1900;
+ tm.tm_mon = 0;
+ tm.tm_mday = 1;
+ tm.tm_hour = 0;
+ tm.tm_min = 0;
+ tm.tm_sec = 0;
+ break;
+ }
+
+ if (!grn_time_from_tm(ctx, &classed_time_raw, &tm)) {
+ return NULL;
+ }
+
+ classed_time = grn_plugin_proc_alloc(ctx,
+ user_data,
+ time->header.domain,
+ 0);
+ if (!classed_time) {
+ return NULL;
+ }
+ GRN_TIME_SET(ctx, classed_time, classed_time_raw);
+
+ return classed_time;
+ }
+}
+
+static grn_obj *
+func_time_classify_second(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ return func_time_classify_raw(ctx,
+ n_args,
+ args,
+ user_data,
+ "time_classify_second",
+ GRN_TIME_CLASSIFY_UNIT_SECOND);
+}
+
+static grn_obj *
+func_time_classify_minute(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ return func_time_classify_raw(ctx,
+ n_args,
+ args,
+ user_data,
+ "time_classify_minute",
+ GRN_TIME_CLASSIFY_UNIT_MINUTE);
+}
+
+static grn_obj *
+func_time_classify_hour(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ return func_time_classify_raw(ctx,
+ n_args,
+ args,
+ user_data,
+ "time_classify_hour",
+ GRN_TIME_CLASSIFY_UNIT_HOUR);
+}
+
+static grn_obj *
+func_time_classify_day(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ return func_time_classify_raw(ctx,
+ n_args,
+ args,
+ user_data,
+ "time_classify_day",
+ GRN_TIME_CLASSIFY_UNIT_DAY);
+}
+
+static grn_obj *
+func_time_classify_week(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ return func_time_classify_raw(ctx,
+ n_args,
+ args,
+ user_data,
+ "time_classify_week",
+ GRN_TIME_CLASSIFY_UNIT_WEEK);
+}
+
+static grn_obj *
+func_time_classify_month(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ return func_time_classify_raw(ctx,
+ n_args,
+ args,
+ user_data,
+ "time_classify_month",
+ GRN_TIME_CLASSIFY_UNIT_MONTH);
+}
+
+static grn_obj *
+func_time_classify_year(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ return func_time_classify_raw(ctx,
+ n_args,
+ args,
+ user_data,
+ "time_classify_year",
+ GRN_TIME_CLASSIFY_UNIT_YEAR);
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_rc rc = GRN_SUCCESS;
+
+ grn_proc_create(ctx,
+ "time_classify_second", -1,
+ GRN_PROC_FUNCTION,
+ func_time_classify_second,
+ NULL, NULL, 0, NULL);
+ grn_proc_create(ctx,
+ "time_classify_minute", -1,
+ GRN_PROC_FUNCTION,
+ func_time_classify_minute,
+ NULL, NULL, 0, NULL);
+ grn_proc_create(ctx,
+ "time_classify_hour", -1,
+ GRN_PROC_FUNCTION,
+ func_time_classify_hour,
+ NULL, NULL, 0, NULL);
+ grn_proc_create(ctx,
+ "time_classify_day", -1,
+ GRN_PROC_FUNCTION,
+ func_time_classify_day,
+ NULL, NULL, 0, NULL);
+ grn_proc_create(ctx,
+ "time_classify_week", -1,
+ GRN_PROC_FUNCTION,
+ func_time_classify_week,
+ NULL, NULL, 0, NULL);
+ grn_proc_create(ctx,
+ "time_classify_month", -1,
+ GRN_PROC_FUNCTION,
+ func_time_classify_month,
+ NULL, NULL, 0, NULL);
+ grn_proc_create(ctx,
+ "time_classify_year", -1,
+ GRN_PROC_FUNCTION,
+ func_time_classify_year,
+ NULL, NULL, 0, NULL);
+
+ return rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/time_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/time_sources.am
new file mode 100644
index 00000000..2c55a570
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/time_sources.am
@@ -0,0 +1,2 @@
+time_la_SOURCES = \
+ time.c
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/vector.c b/storage/mroonga/vendor/groonga/plugins/functions/vector.c
new file mode 100644
index 00000000..1104b313
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/vector.c
@@ -0,0 +1,401 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2015-2017 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG functions_vector
+#endif
+
+#include <groonga/plugin.h>
+
+static grn_obj *
+func_vector_size(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *target;
+ unsigned int size;
+ grn_obj *grn_size;
+
+ if (n_args != 1) {
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "vector_size(): wrong number of arguments (%d for 1)",
+ n_args);
+ return NULL;
+ }
+
+ target = args[0];
+ switch (target->header.type) {
+ case GRN_VECTOR :
+ case GRN_PVECTOR :
+ case GRN_UVECTOR :
+ size = grn_vector_size(ctx, target);
+ break;
+ default :
+ {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, target, &inspected);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "vector_size(): target object must be vector: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ break;
+ }
+
+ grn_size = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0);
+ if (!grn_size) {
+ return NULL;
+ }
+
+ GRN_UINT32_SET(ctx, grn_size, size);
+
+ return grn_size;
+}
+
+static grn_obj *
+func_vector_slice(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *target;
+ grn_obj *from_raw = NULL;
+ grn_obj *length_raw = NULL;
+ int64_t from = 0;
+ int64_t length = -1;
+ uint32_t to = 0;
+ uint32_t size = 0;
+ grn_obj *slice;
+
+ if (n_args < 2 || n_args > 3) {
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "vector_slice(): wrong number of arguments (%d for 2..3)",
+ n_args);
+ return NULL;
+ }
+
+ target = args[0];
+ from_raw = args[1];
+ if (n_args == 3) {
+ length_raw = args[2];
+ }
+ switch (target->header.type) {
+ case GRN_VECTOR :
+ case GRN_PVECTOR :
+ case GRN_UVECTOR :
+ size = grn_vector_size(ctx, target);
+ break;
+ default :
+ {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, target, &inspected);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "vector_slice(): target object must be vector: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ break;
+ }
+
+ if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, from_raw);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "vector_slice(): from must be a number: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ if (from_raw->header.domain == GRN_DB_INT32) {
+ from = GRN_INT32_VALUE(from_raw);
+ } else if (from_raw->header.domain == GRN_DB_INT64) {
+ from = GRN_INT64_VALUE(from_raw);
+ } else {
+ grn_obj buffer;
+ grn_rc rc;
+
+ GRN_INT64_INIT(&buffer, 0);
+ rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE);
+ if (rc == GRN_SUCCESS) {
+ from = GRN_INT64_VALUE(&buffer);
+ }
+ GRN_OBJ_FIN(ctx, &buffer);
+
+ if (rc != GRN_SUCCESS) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, from_raw);
+ GRN_PLUGIN_ERROR(ctx, rc,
+ "vector_slice(): "
+ "failed to cast from value to number: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ }
+
+ if (length_raw) {
+ if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, length_raw);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "vector_slice(): length must be a number: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ if (length_raw->header.domain == GRN_DB_INT32) {
+ length = GRN_INT32_VALUE(length_raw);
+ } else if (length_raw->header.domain == GRN_DB_INT64) {
+ length = GRN_INT64_VALUE(length_raw);
+ } else {
+ grn_obj buffer;
+ grn_rc rc;
+
+ GRN_INT64_INIT(&buffer, 0);
+ rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE);
+ if (rc == GRN_SUCCESS) {
+ length = GRN_INT64_VALUE(&buffer);
+ }
+ GRN_OBJ_FIN(ctx, &buffer);
+
+ if (rc != GRN_SUCCESS) {
+ grn_obj inspected;
+
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, length_raw);
+ GRN_PLUGIN_ERROR(ctx, rc,
+ "vector_slice(): "
+ "failed to cast length value to number: <%.*s>",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+ }
+ }
+
+ slice = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, GRN_OBJ_VECTOR);
+ if (!slice) {
+ return NULL;
+ }
+
+ if (target->header.flags & GRN_OBJ_WITH_WEIGHT) {
+ slice->header.flags |= GRN_OBJ_WITH_WEIGHT;
+ }
+
+ if (length < 0) {
+ length = size + length + 1;
+ }
+
+ if (length > size) {
+ length = size;
+ }
+
+ if (length <= 0) {
+ return slice;
+ }
+
+ while (from < 0) {
+ from += size;
+ }
+
+ to = from + length;
+ if (to > size) {
+ to = size;
+ }
+
+ switch (target->header.type) {
+ case GRN_VECTOR :
+ {
+ unsigned int i;
+ for (i = from; i < to; i++) {
+ const char *content;
+ unsigned int content_length;
+ unsigned int weight;
+ grn_id domain;
+ content_length = grn_vector_get_element(ctx, target, i,
+ &content, &weight, &domain);
+ grn_vector_add_element(ctx, slice,
+ content, content_length, weight, domain);
+ }
+ }
+ break;
+ case GRN_PVECTOR :
+ {
+ unsigned int i;
+ for (i = from; i < to; i++) {
+ grn_obj *element = GRN_PTR_VALUE_AT(target, i);
+ GRN_PTR_PUT(ctx, slice, element);
+ }
+ }
+ break;
+ case GRN_UVECTOR :
+ {
+ grn_obj *domain;
+
+ domain = grn_ctx_at(ctx, target->header.domain);
+ if (grn_obj_is_table(ctx, domain)) {
+ unsigned int i;
+ for (i = from; i < to; i++) {
+ grn_id id;
+ unsigned int weight;
+ id = grn_uvector_get_element(ctx, target, i, &weight);
+ grn_uvector_add_element(ctx, slice, id, weight);
+ }
+ } else {
+#define PUT_SLICE_VALUES(type) do { \
+ unsigned int i; \
+ for (i = from; i < to; i++) { \
+ GRN_ ## type ## _PUT(ctx, \
+ slice, \
+ GRN_ ## type ## _VALUE_AT(target, i)); \
+ } \
+ } while (GRN_FALSE)
+ switch (target->header.domain) {
+ case GRN_DB_BOOL :
+ PUT_SLICE_VALUES(BOOL);
+ break;
+ case GRN_DB_INT8 :
+ PUT_SLICE_VALUES(INT8);
+ break;
+ case GRN_DB_UINT8 :
+ PUT_SLICE_VALUES(UINT8);
+ break;
+ case GRN_DB_INT16 :
+ PUT_SLICE_VALUES(INT16);
+ break;
+ case GRN_DB_UINT16 :
+ PUT_SLICE_VALUES(UINT16);
+ break;
+ case GRN_DB_INT32 :
+ PUT_SLICE_VALUES(INT32);
+ break;
+ case GRN_DB_UINT32 :
+ PUT_SLICE_VALUES(UINT32);
+ break;
+ case GRN_DB_INT64 :
+ PUT_SLICE_VALUES(INT64);
+ break;
+ case GRN_DB_UINT64 :
+ PUT_SLICE_VALUES(UINT64);
+ break;
+ case GRN_DB_FLOAT :
+ PUT_SLICE_VALUES(FLOAT);
+ break;
+ case GRN_DB_TIME :
+ PUT_SLICE_VALUES(TIME);
+ break;
+ }
+ }
+ }
+ break;
+#undef PUT_SLICE_VALUES
+ }
+
+ return slice;
+}
+
+static grn_obj *
+func_vector_new(grn_ctx *ctx, int n_args, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *vector = NULL;
+ int i;
+
+ if (n_args == 0) {
+ return grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, GRN_OBJ_VECTOR);
+ }
+
+ vector = grn_plugin_proc_alloc(ctx,
+ user_data,
+ args[0]->header.domain,
+ GRN_OBJ_VECTOR);
+ if (!vector) {
+ return NULL;
+ }
+
+ for (i = 0; i < n_args; i++) {
+ grn_obj *element = args[i];
+ switch (vector->header.type) {
+ case GRN_VECTOR :
+ grn_vector_add_element(ctx,
+ vector,
+ GRN_BULK_HEAD(element),
+ GRN_BULK_VSIZE(element),
+ 0,
+ element->header.domain);
+ break;
+ case GRN_UVECTOR :
+ grn_bulk_write(ctx,
+ vector,
+ GRN_BULK_HEAD(element),
+ GRN_BULK_VSIZE(element));
+ break;
+ case GRN_PVECTOR :
+ GRN_PTR_PUT(ctx, vector, element);
+ break;
+ default :
+ break;
+ }
+ }
+
+ return vector;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_rc rc = GRN_SUCCESS;
+
+ grn_proc_create(ctx, "vector_size", -1, GRN_PROC_FUNCTION, func_vector_size,
+ NULL, NULL, 0, NULL);
+
+ grn_proc_create(ctx, "vector_slice", -1, GRN_PROC_FUNCTION, func_vector_slice,
+ NULL, NULL, 0, NULL);
+
+ grn_proc_create(ctx, "vector_new", -1, GRN_PROC_FUNCTION, func_vector_new,
+ NULL, NULL, 0, NULL);
+
+ return rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/vector_sources.am b/storage/mroonga/vendor/groonga/plugins/functions/vector_sources.am
new file mode 100644
index 00000000..1d98e651
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/vector_sources.am
@@ -0,0 +1,2 @@
+vector_la_SOURCES = \
+ vector.c
diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/query_expanders/CMakeLists.txt
new file mode 100644
index 00000000..c2f04cb8
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/CMakeLists.txt
@@ -0,0 +1,38 @@
+# Copyright(C) 2012-2013 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ )
+
+set(QUERY_EXPANDERS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/query_expanders")
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/tsv_sources.am TSV_SOURCES)
+set_source_files_properties(${TSV_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+if(GRN_EMBED)
+ add_library(tsv_query_expander STATIC ${TSV_SOURCES})
+ set_target_properties(
+ tsv_query_expander
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+else()
+ add_library(tsv_query_expander MODULE ${TSV_SOURCES})
+ set_target_properties(tsv_query_expander PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "tsv")
+ install(TARGETS tsv_query_expander DESTINATION "${QUERY_EXPANDERS_DIR}")
+endif()
+target_link_libraries(tsv_query_expander libgroonga)
diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am b/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am
new file mode 100644
index 00000000..96c0911a
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/Makefile.am
@@ -0,0 +1,20 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la
+
+query_expander_plugins_LTLIBRARIES =
+query_expander_plugins_LTLIBRARIES += tsv.la
+
+include tsv_sources.am
diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c
new file mode 100644
index 00000000..5d5deec6
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv.c
@@ -0,0 +1,314 @@
+/* -*- c-basic-offset: 2 -*- */
+/* Copyright(C) 2012-2015 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG query_expanders_tsv
+#endif
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <groonga/plugin.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef WIN32
+# include <windows.h>
+# include <share.h>
+#endif /* WIN32 */
+
+#define MAX_SYNONYM_BYTES 4096
+
+static grn_hash *synonyms = NULL;
+
+#ifdef WIN32
+static char win32_synonyms_file[MAX_PATH] = "";
+const char *
+get_system_synonyms_file(void)
+{
+ if (win32_synonyms_file[0] == '\0') {
+ const char *base_dir;
+ const char *relative_path = GRN_QUERY_EXPANDER_TSV_RELATIVE_SYNONYMS_FILE;
+ size_t base_dir_length;
+
+ base_dir = grn_plugin_windows_base_dir();
+ base_dir_length = strlen(base_dir);
+ grn_strcpy(win32_synonyms_file, MAX_PATH, base_dir);
+ grn_strcat(win32_synonyms_file, MAX_PATH, "/");
+ grn_strcat(win32_synonyms_file, MAX_PATH, relative_path);
+ }
+ return win32_synonyms_file;
+}
+
+#else /* WIN32 */
+const char *
+get_system_synonyms_file(void)
+{
+ return GRN_QUERY_EXPANDER_TSV_SYNONYMS_FILE;
+}
+#endif /* WIN32 */
+
+static grn_bool
+is_comment_mark(char character)
+{
+ return character == '#';
+}
+
+static grn_encoding
+detect_coding_part(grn_ctx *ctx, const char *line, size_t line_length)
+{
+ grn_encoding encoding = GRN_ENC_NONE;
+ grn_obj null_terminated_line_buffer;
+ const char *c_line;
+ const char *coding_part_keyword = "coding: ";
+ const char *coding_part;
+ const char *encoding_name;
+
+ GRN_TEXT_INIT(&null_terminated_line_buffer, 0);
+ GRN_TEXT_PUT(ctx, &null_terminated_line_buffer, line, line_length);
+ GRN_TEXT_PUTC(ctx, &null_terminated_line_buffer, '\0');
+
+ c_line = GRN_TEXT_VALUE(&null_terminated_line_buffer);
+ coding_part = strstr(c_line, coding_part_keyword);
+ if (coding_part) {
+ encoding_name = coding_part + strlen(coding_part_keyword);
+ if (grn_strncasecmp(encoding_name, "utf-8", strlen("utf-8")) == 0 ||
+ grn_strncasecmp(encoding_name, "utf8", strlen("utf8")) == 0) {
+ encoding = GRN_ENC_UTF8;
+ } else if (grn_strncasecmp(encoding_name, "sjis", strlen("sjis")) == 0 ||
+ grn_strncasecmp(encoding_name, "Shift_JIS", strlen("Shift_JIS")) == 0) {
+ encoding = GRN_ENC_SJIS;
+ } else if (grn_strncasecmp(encoding_name, "EUC-JP", strlen("EUC-JP")) == 0 ||
+ grn_strncasecmp(encoding_name, "euc_jp", strlen("euc_jp")) == 0) {
+ encoding = GRN_ENC_EUC_JP;
+ } else if (grn_strncasecmp(encoding_name, "latin1", strlen("latin1")) == 0) {
+ encoding = GRN_ENC_LATIN1;
+ } else if (grn_strncasecmp(encoding_name, "KOI8-R", strlen("KOI8-R")) == 0 ||
+ grn_strncasecmp(encoding_name, "koi8r", strlen("koi8r")) == 0) {
+ encoding = GRN_ENC_KOI8R;
+ }
+ } else {
+ encoding = ctx->encoding;
+ }
+ GRN_OBJ_FIN(ctx, &null_terminated_line_buffer);
+
+ return encoding;
+}
+
+static grn_encoding
+guess_encoding(grn_ctx *ctx, const char **line, size_t *line_length)
+{
+ const char bom[] = {0xef, 0xbb, 0xbf};
+ size_t bom_length = sizeof(bom);
+
+ if (*line_length >= bom_length && memcmp(*line, bom, bom_length) == 0) {
+ *line += bom_length;
+ *line_length -= bom_length;
+ return GRN_ENC_UTF8;
+ }
+
+ if (!is_comment_mark((*line)[0])) {
+ return ctx->encoding;
+ }
+
+ return detect_coding_part(ctx, (*line) + 1, (*line_length) - 1);
+}
+
+static void
+parse_synonyms_file_line(grn_ctx *ctx, const char *line, size_t line_length,
+ grn_obj *key, grn_obj *value)
+{
+ size_t i = 0;
+
+ if (is_comment_mark(line[i])) {
+ return;
+ }
+
+ while (i < line_length) {
+ char character = line[i];
+ i++;
+ if (character == '\t') {
+ break;
+ }
+ GRN_TEXT_PUTC(ctx, key, character);
+ }
+
+ if (i == line_length) {
+ return;
+ }
+
+ GRN_TEXT_PUTS(ctx, value, "((");
+ while (i < line_length) {
+ char character = line[i];
+ i++;
+ if (character == '\t') {
+ GRN_TEXT_PUTS(ctx, value, ") OR (");
+ } else {
+ GRN_TEXT_PUTC(ctx, value, character);
+ }
+ }
+ GRN_TEXT_PUTS(ctx, value, "))");
+
+ {
+ grn_id id;
+ void *value_location = NULL;
+
+ id = grn_hash_add(ctx, synonyms, GRN_TEXT_VALUE(key), GRN_TEXT_LEN(key),
+ &value_location, NULL);
+ if (id == GRN_ID_NIL) {
+ GRN_PLUGIN_LOG(ctx, GRN_LOG_WARNING,
+ "[plugin][query-expander][tsv] "
+ "failed to register key: <%.*s>",
+ (int)GRN_TEXT_LEN(key), GRN_TEXT_VALUE(key));
+ return;
+ }
+
+ if (GRN_TEXT_LEN(value) <= MAX_SYNONYM_BYTES - 1) {
+ GRN_TEXT_PUTC(ctx, value, '\0');
+ } else {
+ grn_bulk_truncate(ctx, value, MAX_SYNONYM_BYTES - 1);
+ GRN_TEXT_PUTC(ctx, value, '\0');
+ }
+ grn_memcpy(value_location, GRN_TEXT_VALUE(value), GRN_TEXT_LEN(value));
+ }
+}
+
+static void
+load_synonyms(grn_ctx *ctx)
+{
+ static char path_env[GRN_ENV_BUFFER_SIZE];
+ const char *path;
+ grn_file_reader *file_reader;
+ int number_of_lines;
+ grn_encoding encoding;
+ grn_obj line, key, value;
+
+ grn_getenv("GRN_QUERY_EXPANDER_TSV_SYNONYMS_FILE",
+ path_env,
+ GRN_ENV_BUFFER_SIZE);
+ if (path_env[0]) {
+ path = path_env;
+ } else {
+ path = get_system_synonyms_file();
+ }
+ file_reader = grn_file_reader_open(ctx, path);
+ if (!file_reader) {
+ GRN_LOG(ctx, GRN_LOG_WARNING,
+ "[plugin][query-expander][tsv] "
+ "synonyms file doesn't exist: <%s>",
+ path);
+ return;
+ }
+
+ GRN_TEXT_INIT(&line, 0);
+ GRN_TEXT_INIT(&key, 0);
+ GRN_TEXT_INIT(&value, 0);
+ grn_bulk_reserve(ctx, &value, MAX_SYNONYM_BYTES);
+ number_of_lines = 0;
+ while (grn_file_reader_read_line(ctx, file_reader, &line) == GRN_SUCCESS) {
+ const char *line_value = GRN_TEXT_VALUE(&line);
+ size_t line_length = GRN_TEXT_LEN(&line);
+
+ if (line_length > 0 && line_value[line_length - 1] == '\n') {
+ if (line_length > 1 && line_value[line_length - 2] == '\r') {
+ line_length -= 2;
+ } else {
+ line_length -= 1;
+ }
+ }
+ number_of_lines++;
+ if (number_of_lines == 1) {
+ encoding = guess_encoding(ctx, &line_value, &line_length);
+ }
+ GRN_BULK_REWIND(&key);
+ GRN_BULK_REWIND(&value);
+ parse_synonyms_file_line(ctx, line_value, line_length, &key, &value);
+ GRN_BULK_REWIND(&line);
+ }
+ GRN_OBJ_FIN(ctx, &line);
+ GRN_OBJ_FIN(ctx, &key);
+ GRN_OBJ_FIN(ctx, &value);
+
+ grn_file_reader_close(ctx, file_reader);
+}
+
+static grn_obj *
+func_query_expander_tsv(grn_ctx *ctx, int nargs, grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_rc rc = GRN_END_OF_DATA;
+ grn_id id;
+ grn_obj *term, *expanded_term;
+ void *value;
+ grn_obj *rc_object;
+
+ term = args[0];
+ expanded_term = args[1];
+ id = grn_hash_get(ctx, synonyms,
+ GRN_TEXT_VALUE(term), GRN_TEXT_LEN(term),
+ &value);
+ if (id != GRN_ID_NIL) {
+ const char *query = value;
+ GRN_TEXT_PUTS(ctx, expanded_term, query);
+ rc = GRN_SUCCESS;
+ }
+
+ rc_object = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_INT32, 0);
+ if (rc_object) {
+ GRN_INT32_SET(ctx, rc_object, rc);
+ }
+
+ return rc_object;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ if (!synonyms) {
+ synonyms = grn_hash_create(ctx, NULL,
+ GRN_TABLE_MAX_KEY_SIZE,
+ MAX_SYNONYM_BYTES,
+ GRN_OBJ_TABLE_HASH_KEY | GRN_OBJ_KEY_VAR_SIZE);
+ if (!synonyms) {
+ return ctx->rc;
+ }
+ load_synonyms(ctx);
+ }
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_proc_create(ctx, "QueryExpanderTSV", strlen("QueryExpanderTSV"),
+ GRN_PROC_FUNCTION,
+ func_query_expander_tsv, NULL, NULL,
+ 0, NULL);
+ return GRN_SUCCESS;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ if (synonyms) {
+ grn_hash_close(ctx, synonyms);
+ synonyms = NULL;
+ }
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv_sources.am b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv_sources.am
new file mode 100644
index 00000000..f1bdabed
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/query_expanders/tsv_sources.am
@@ -0,0 +1,2 @@
+tsv_la_SOURCES = \
+ tsv.c
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt
new file mode 100644
index 00000000..a2bcccd1
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright(C) 2013-2016 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+if(NOT GRN_EMBED)
+ if(GRN_WITH_MRUBY)
+ set(GRN_RELATIVE_RUBY_PLUGINS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/ruby")
+
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am RUBY_SCRIPTS)
+ install(FILES ${RUBY_SCRIPTS}
+ DESTINATION "${GRN_RELATIVE_RUBY_PLUGINS_DIR}")
+ endif()
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am b/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am
new file mode 100644
index 00000000..a4949727
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/Makefile.am
@@ -0,0 +1,9 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+if WITH_MRUBY
+dist_ruby_plugins_DATA = \
+ $(ruby_scripts)
+endif
+
+include sources.am
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/eval.rb b/storage/mroonga/vendor/groonga/plugins/ruby/eval.rb
new file mode 100644
index 00000000..e7619cf2
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/eval.rb
@@ -0,0 +1,36 @@
+module Groonga
+ module Ruby
+ class EvalCommand < Command
+ register("ruby_eval",
+ [
+ "script",
+ ])
+
+ def run_body(input)
+ script = input[:script]
+ unless script.is_a?(String)
+ message = "script must be a string: <#{script.inspect}>"
+ raise Groonga::InvalidArgument, message
+ end
+
+ eval_context = EvalContext.new
+ begin
+ result = eval_context.eval(script)
+ rescue Exception => error
+ writer.map("result", 1) do
+ writer.write("exception")
+ writer.map("exception", 1) do
+ writer.write("message")
+ writer.write(error.message)
+ end
+ end
+ else
+ writer.map("result", 1) do
+ writer.write("value")
+ writer.write(result)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby/sources.am b/storage/mroonga/vendor/groonga/plugins/ruby/sources.am
new file mode 100644
index 00000000..f8938291
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby/sources.am
@@ -0,0 +1,2 @@
+ruby_scripts = \
+ eval.rb
diff --git a/storage/mroonga/vendor/groonga/plugins/ruby_scripts.am b/storage/mroonga/vendor/groonga/plugins/ruby_scripts.am
new file mode 100644
index 00000000..0262dbb9
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/ruby_scripts.am
@@ -0,0 +1,2 @@
+ruby_scripts = \
+ sharding.rb
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding.rb b/storage/mroonga/vendor/groonga/plugins/sharding.rb
new file mode 100644
index 00000000..86401c1f
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding.rb
@@ -0,0 +1,11 @@
+require "sharding/parameters"
+require "sharding/range_expression_builder"
+require "sharding/logical_enumerator"
+
+require "sharding/logical_parameters"
+
+require "sharding/logical_count"
+require "sharding/logical_range_filter"
+require "sharding/logical_select"
+require "sharding/logical_shard_list"
+require "sharding/logical_table_remove"
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/sharding/CMakeLists.txt
new file mode 100644
index 00000000..1131520f
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright(C) 2015 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+if(NOT GRN_EMBED)
+ if(GRN_WITH_MRUBY)
+ set(GRN_RELATIVE_SHARDING_PLUGINS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/sharding")
+
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am SHARDING_SCRIPTS)
+ install(FILES ${SHARDING_SCRIPTS}
+ DESTINATION "${GRN_RELATIVE_SHARDING_PLUGINS_DIR}")
+ endif()
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/Makefile.am b/storage/mroonga/vendor/groonga/plugins/sharding/Makefile.am
new file mode 100644
index 00000000..8104ab6d
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/Makefile.am
@@ -0,0 +1,9 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+if WITH_MRUBY
+dist_sharding_plugins_DATA = \
+ $(sharding_scripts)
+endif
+
+include sources.am
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_count.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_count.rb
new file mode 100644
index 00000000..8bdd77ef
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_count.rb
@@ -0,0 +1,169 @@
+module Groonga
+ module Sharding
+ class LogicalCountCommand < Command
+ register("logical_count",
+ [
+ "logical_table",
+ "shard_key",
+ "min",
+ "min_border",
+ "max",
+ "max_border",
+ "filter",
+ ])
+
+ def run_body(input)
+ enumerator = LogicalEnumerator.new("logical_count", input)
+ filter = input[:filter]
+
+ total = 0
+ enumerator.each do |shard, shard_range|
+ total += count_n_records(filter, shard, shard_range,
+ enumerator.target_range)
+ end
+ writer.write(total)
+ end
+
+ private
+ def cache_key(input)
+ key = "logical_count\0"
+ key << "#{input[:logical_table]}\0"
+ key << "#{input[:shard_key]}\0"
+ key << "#{input[:min]}\0"
+ key << "#{input[:min_border]}\0"
+ key << "#{input[:max]}\0"
+ key << "#{input[:max_border]}\0"
+ key << "#{input[:filter]}\0"
+ key
+ end
+
+ def log_use_range_index(use, table_name, line, method)
+ message = "[logical_count]"
+ if use
+ message << "[range-index]"
+ else
+ message << "[select]"
+ end
+ message << " <#{table_name}>"
+ Context.instance.logger.log(Logger::Level::DEBUG,
+ __FILE__,
+ line,
+ method.to_s,
+ message)
+ end
+
+ def count_n_records(filter, shard, shard_range, target_range)
+ cover_type = target_range.cover_type(shard_range)
+ return 0 if cover_type == :none
+
+ shard_key = shard.key
+ if shard_key.nil?
+ message = "[logical_count] shard_key doesn't exist: " +
+ "<#{shard.key_name}>"
+ raise InvalidArgument, message
+ end
+ table = shard.table
+ table_name = shard.table_name
+
+ expression_builder = RangeExpressionBuilder.new(shard_key,
+ target_range)
+ expression_builder.filter = filter
+ if cover_type == :all
+ log_use_range_index(false, table_name, __LINE__, __method__)
+ if filter.nil?
+ return table.size
+ else
+ return filtered_count_n_records(table) do |expression|
+ expression_builder.build_all(expression)
+ end
+ end
+ end
+
+ range_index = nil
+ if filter.nil?
+ index_info = shard_key.find_index(Operator::LESS)
+ if index_info
+ range_index = index_info.index
+ end
+ end
+
+ use_range_index = (!range_index.nil?)
+ log_use_range_index(use_range_index, table_name, __LINE__, __method__)
+
+ case cover_type
+ when :partial_min
+ if range_index
+ count_n_records_in_range(range_index,
+ target_range.min, target_range.min_border,
+ nil, nil)
+ else
+ filtered_count_n_records(table) do |expression|
+ expression_builder.build_partial_min(expression)
+ end
+ end
+ when :partial_max
+ if range_index
+ count_n_records_in_range(range_index,
+ nil, nil,
+ target_range.max, target_range.max_border)
+ else
+ filtered_count_n_records(table) do |expression|
+ expression_builder.build_partial_max(expression)
+ end
+ end
+ when :partial_min_and_max
+ if range_index
+ count_n_records_in_range(range_index,
+ target_range.min, target_range.min_border,
+ target_range.max, target_range.max_border)
+ else
+ filtered_count_n_records(table) do |expression|
+ expression_builder.build_partial_min_and_max(expression)
+ end
+ end
+ end
+ end
+
+ def filtered_count_n_records(table)
+ expression = nil
+ filtered_table = nil
+
+ begin
+ expression = Expression.create(table)
+ yield(expression)
+ filtered_table = table.select(expression)
+ filtered_table.size
+ ensure
+ filtered_table.close if filtered_table
+ expression.close if expression
+ end
+ end
+
+ def count_n_records_in_range(range_index,
+ min, min_border, max, max_border)
+ flags = TableCursorFlags::BY_KEY
+ case min_border
+ when :include
+ flags |= TableCursorFlags::GE
+ when :exclude
+ flags |= TableCursorFlags::GT
+ end
+ case max_border
+ when :include
+ flags |= TableCursorFlags::LE
+ when :exclude
+ flags |= TableCursorFlags::LT
+ end
+
+ TableCursor.open(range_index.table,
+ :min => min,
+ :max => max,
+ :flags => flags) do |table_cursor|
+ IndexCursor.open(table_cursor, range_index) do |index_cursor|
+ index_cursor.count
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_enumerator.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_enumerator.rb
new file mode 100644
index 00000000..d05a220f
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_enumerator.rb
@@ -0,0 +1,317 @@
+module Groonga
+ module Sharding
+ class LogicalEnumerator
+ include Enumerable
+
+ attr_reader :target_range
+ attr_reader :logical_table
+ attr_reader :shard_key_name
+ def initialize(command_name, input, options={})
+ @command_name = command_name
+ @input = input
+ @options = options
+ initialize_parameters
+ end
+
+ def each(&block)
+ each_internal(:ascending, &block)
+ end
+
+ def reverse_each(&block)
+ each_internal(:descending, &block)
+ end
+
+ private
+ def each_internal(order)
+ context = Context.instance
+ each_shard_with_around(order) do |prev_shard, current_shard, next_shard|
+ shard_range_data = current_shard.range_data
+ shard_range = nil
+
+ if shard_range_data.day.nil?
+ if order == :ascending
+ if next_shard
+ next_shard_range_data = next_shard.range_data
+ else
+ next_shard_range_data = nil
+ end
+ else
+ if prev_shard
+ next_shard_range_data = prev_shard.range_data
+ else
+ next_shard_range_data = nil
+ end
+ end
+ max_day = compute_month_shard_max_day(shard_range_data.year,
+ shard_range_data.month,
+ next_shard_range_data)
+ shard_range = MonthShardRange.new(shard_range_data.year,
+ shard_range_data.month,
+ max_day)
+ else
+ shard_range = DayShardRange.new(shard_range_data.year,
+ shard_range_data.month,
+ shard_range_data.day)
+ end
+
+ yield(current_shard, shard_range)
+ end
+ end
+
+ def each_shard_with_around(order)
+ context = Context.instance
+ prefix = "#{@logical_table}_"
+
+ shards = [nil]
+ context.database.each_name(:prefix => prefix,
+ :order_by => :key,
+ :order => order) do |name|
+ shard_range_raw = name[prefix.size..-1]
+
+ case shard_range_raw
+ when /\A(\d{4})(\d{2})\z/
+ shard_range_data = ShardRangeData.new($1.to_i, $2.to_i, nil)
+ when /\A(\d{4})(\d{2})(\d{2})\z/
+ shard_range_data = ShardRangeData.new($1.to_i, $2.to_i, $3.to_i)
+ else
+ next
+ end
+
+ shards << Shard.new(name, @shard_key_name, shard_range_data)
+ next if shards.size < 3
+ yield(*shards)
+ shards.shift
+ end
+
+ if shards.size == 2
+ yield(shards[0], shards[1], nil)
+ end
+ end
+
+ private
+ def initialize_parameters
+ @logical_table = @input[:logical_table]
+ if @logical_table.nil?
+ raise InvalidArgument, "[#{@command_name}] logical_table is missing"
+ end
+
+ @shard_key_name = @input[:shard_key]
+ if @shard_key_name.nil?
+ require_shard_key = @options[:require_shard_key]
+ require_shard_key = true if require_shard_key.nil?
+ if require_shard_key
+ raise InvalidArgument, "[#{@command_name}] shard_key is missing"
+ end
+ end
+
+ @target_range = TargetRange.new(@command_name, @input)
+ end
+
+ def compute_month_shard_max_day(year, month, next_shard_range)
+ return nil if next_shard_range.nil?
+
+ return nil if month != next_shard_range.month
+
+ next_shard_range.day
+ end
+
+ class Shard
+ attr_reader :table_name, :key_name, :range_data
+ def initialize(table_name, key_name, range_data)
+ @table_name = table_name
+ @key_name = key_name
+ @range_data = range_data
+ end
+
+ def table
+ @table ||= Context.instance[@table_name]
+ end
+
+ def full_key_name
+ "#{@table_name}.#{@key_name}"
+ end
+
+ def key
+ @key ||= Context.instance[full_key_name]
+ end
+ end
+
+ class ShardRangeData
+ attr_reader :year, :month, :day
+ def initialize(year, month, day)
+ @year = year
+ @month = month
+ @day = day
+ end
+
+ def to_suffix
+ if @day.nil?
+ "_%04d%02d" % [@year, @month]
+ else
+ "_%04d%02d%02d" % [@year, @month, @day]
+ end
+ end
+ end
+
+ class DayShardRange
+ attr_reader :year, :month, :day
+ def initialize(year, month, day)
+ @year = year
+ @month = month
+ @day = day
+ end
+
+ def least_over_time
+ next_day = Time.local(@year, @month, @day) + (60 * 60 * 24)
+ while next_day.day == @day # For leap second
+ next_day += 1
+ end
+ next_day
+ end
+
+ def min_time
+ Time.local(@year, @month, @day)
+ end
+
+ def include?(time)
+ @year == time.year and
+ @month == time.month and
+ @day == time.day
+ end
+ end
+
+ class MonthShardRange
+ attr_reader :year, :month, :max_day
+ def initialize(year, month, max_day)
+ @year = year
+ @month = month
+ @max_day = max_day
+ end
+
+ def least_over_time
+ if @max_day.nil?
+ if @month == 12
+ Time.local(@year + 1, 1, 1)
+ else
+ Time.local(@year, @month + 1, 1)
+ end
+ else
+ Time.local(@year, @month, @max_day)
+ end
+ end
+
+ def min_time
+ Time.local(@year, @month, 1)
+ end
+
+ def include?(time)
+ return false unless @year == time.year
+ return false unless @month == time.month
+
+ if @max_day.nil?
+ true
+ else
+ time.day <= @max_day
+ end
+ end
+ end
+
+ class TargetRange
+ attr_reader :min, :min_border
+ attr_reader :max, :max_border
+ def initialize(command_name, input)
+ @command_name = command_name
+ @input = input
+ @min = parse_value(:min)
+ @min_border = parse_border(:min_border)
+ @max = parse_value(:max)
+ @max_border = parse_border(:max_border)
+ end
+
+ def cover_type(shard_range)
+ return :all if @min.nil? and @max.nil?
+
+ if @min and @max
+ return :none unless in_min?(shard_range)
+ return :none unless in_max?(shard_range)
+ min_partial_p = in_min_partial?(shard_range)
+ max_partial_p = in_max_partial?(shard_range)
+ if min_partial_p and max_partial_p
+ :partial_min_and_max
+ elsif min_partial_p
+ :partial_min
+ elsif max_partial_p
+ :partial_max
+ else
+ :all
+ end
+ elsif @min
+ return :none unless in_min?(shard_range)
+ if in_min_partial?(shard_range)
+ :partial_min
+ else
+ :all
+ end
+ else
+ return :none unless in_max?(shard_range)
+ if in_max_partial?(shard_range)
+ :partial_max
+ else
+ :all
+ end
+ end
+ end
+
+ private
+ def parse_value(name)
+ value = @input[name]
+ return nil if value.nil?
+
+ Converter.convert(value, Time)
+ end
+
+ def parse_border(name)
+ border = @input[name]
+ return :include if border.nil?
+
+ case border
+ when "include"
+ :include
+ when "exclude"
+ :exclude
+ else
+ message =
+ "[#{@command_name}] #{name} must be \"include\" or \"exclude\": " +
+ "<#{border}>"
+ raise InvalidArgument, message
+ end
+ end
+
+ def in_min?(shard_range)
+ @min < shard_range.least_over_time
+ end
+
+ def in_min_partial?(shard_range)
+ return false unless shard_range.include?(@min)
+
+ return true if @min_border == :exclude
+
+ shard_range.min_time != @min
+ end
+
+ def in_max?(shard_range)
+ max_base_time = shard_range.min_time
+ if @max_border == :include
+ @max >= max_base_time
+ else
+ @max > max_base_time
+ end
+ end
+
+ def in_max_partial?(shard_range)
+ shard_range.include?(@max)
+ end
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_parameters.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_parameters.rb
new file mode 100644
index 00000000..75ff569b
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_parameters.rb
@@ -0,0 +1,44 @@
+module Groonga
+ module Sharding
+ class LogicalParametersCommand < Command
+ register("logical_parameters",
+ [
+ "range_index",
+ ])
+
+ def run_body(input)
+ range_index = parse_range_index(input[:range_index])
+
+ parameters = [
+ :range_index,
+ ]
+ writer.map("parameters", parameters.size) do
+ parameters.each do |name|
+ writer.write(name.to_s)
+ writer.write(Parameters.__send__(name))
+ end
+ end
+
+ Parameters.range_index = range_index if range_index
+ end
+
+ private
+ def parse_range_index(value)
+ case value
+ when nil
+ nil
+ when "auto"
+ :auto
+ when "always"
+ :always
+ when "never"
+ :never
+ else
+ message = "[logical_parameters][range_index] "
+ message << "must be auto, always or never: <#{value}>"
+ raise InvalidArgument, message
+ end
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb
new file mode 100644
index 00000000..1c8f8644
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_range_filter.rb
@@ -0,0 +1,642 @@
+module Groonga
+ module Sharding
+ class LogicalRangeFilterCommand < Command
+ register("logical_range_filter",
+ [
+ "logical_table",
+ "shard_key",
+ "min",
+ "min_border",
+ "max",
+ "max_border",
+ "order",
+ "filter",
+ "offset",
+ "limit",
+ "output_columns",
+ "use_range_index",
+ ])
+
+ def run_body(input)
+ output_columns = input[:output_columns] || "_key, *"
+
+ context = ExecuteContext.new(input)
+ begin
+ executor = Executor.new(context)
+ executor.execute
+
+ result_sets = context.result_sets
+ n_elements = 1 # for columns
+ result_sets.each do |result_set|
+ n_elements += result_set.size
+ end
+
+ writer.array("RESULTSET", n_elements) do
+ first_result_set = result_sets.first
+ if first_result_set
+ writer.write_table_columns(first_result_set, output_columns)
+ end
+ limit = context.limit
+ if limit < 0
+ n_records = result_sets.inject(0) do |n, result_set|
+ n + result_set.size
+ end
+ limit = n_records + limit + 1
+ end
+ options = {}
+ result_sets.each do |result_set|
+ options[:limit] = limit
+ writer.write_table_records(result_set, output_columns, options)
+ limit -= result_set.size
+ break if limit <= 0
+ end
+ end
+ ensure
+ context.close
+ end
+ end
+
+ private
+ def cache_key(input)
+ key = "logical_range_filter\0"
+ key << "#{input[:logical_table]}\0"
+ key << "#{input[:shard_key]}\0"
+ key << "#{input[:min]}\0"
+ key << "#{input[:min_border]}\0"
+ key << "#{input[:max]}\0"
+ key << "#{input[:max_border]}\0"
+ key << "#{input[:order]}\0"
+ key << "#{input[:filter]}\0"
+ key << "#{input[:offset]}\0"
+ key << "#{input[:limit]}\0"
+ key << "#{input[:output_columns]}\0"
+ key << "#{input[:use_range_index]}\0"
+ key
+ end
+
+ class ExecuteContext
+ attr_reader :use_range_index
+ attr_reader :enumerator
+ attr_reader :order
+ attr_reader :filter
+ attr_reader :offset
+ attr_reader :limit
+ attr_accessor :current_offset
+ attr_accessor :current_limit
+ attr_reader :result_sets
+ attr_reader :unsorted_result_sets
+ attr_reader :threshold
+ def initialize(input)
+ @input = input
+ @use_range_index = parse_use_range_index(@input[:use_range_index])
+ @enumerator = LogicalEnumerator.new("logical_range_filter", @input)
+ @order = parse_order(@input, :order)
+ @filter = @input[:filter]
+ @offset = (@input[:offset] || 0).to_i
+ @limit = (@input[:limit] || 10).to_i
+
+ @current_offset = @offset
+ @current_limit = @limit
+
+ @result_sets = []
+ @unsorted_result_sets = []
+
+ @threshold = compute_threshold
+ end
+
+ def close
+ @unsorted_result_sets.each do |result_set|
+ result_set.close if result_set.temporary?
+ end
+ @result_sets.each do |result_set|
+ result_set.close if result_set.temporary?
+ end
+ end
+
+ private
+ def parse_use_range_index(use_range_index)
+ case use_range_index
+ when "yes"
+ true
+ when "no"
+ false
+ else
+ nil
+ end
+ end
+
+ def parse_order(input, name)
+ order = input[name]
+ return :ascending if order.nil?
+
+ case order
+ when "ascending"
+ :ascending
+ when "descending"
+ :descending
+ else
+ message =
+ "[logical_range_filter] #{name} must be " +
+ "\"ascending\" or \"descending\": <#{order}>"
+ raise InvalidArgument, message
+ end
+ end
+
+ def compute_threshold
+ threshold_env = ENV["GRN_LOGICAL_RANGE_FILTER_THRESHOLD"]
+ default_threshold = 0.2
+ (threshold_env || default_threshold).to_f
+ end
+ end
+
+ class Executor
+ def initialize(context)
+ @context = context
+ end
+
+ def execute
+ first_shard = nil
+ enumerator = @context.enumerator
+ target_range = enumerator.target_range
+ if @context.order == :descending
+ each_method = :reverse_each
+ else
+ each_method = :each
+ end
+ enumerator.send(each_method) do |shard, shard_range|
+ first_shard ||= shard
+ shard_executor = ShardExecutor.new(@context, shard, shard_range)
+ shard_executor.execute
+ break if @context.current_limit == 0
+ end
+ if first_shard.nil?
+ message =
+ "[logical_range_filter] no shard exists: " +
+ "logical_table: <#{enumerator.logical_table}>: " +
+ "shard_key: <#{enumerator.shard_key_name}>"
+ raise InvalidArgument, message
+ end
+ if @context.result_sets.empty?
+ result_set = HashTable.create(:flags => ObjectFlags::WITH_SUBREC,
+ :key_type => first_shard.table)
+ @context.result_sets << result_set
+ end
+ end
+ end
+
+ class ShardExecutor
+ def initialize(context, shard, shard_range)
+ @context = context
+ @shard = shard
+ @shard_range = shard_range
+
+ @filter = @context.filter
+ @result_sets = @context.result_sets
+ @unsorted_result_sets = @context.unsorted_result_sets
+
+ @target_range = @context.enumerator.target_range
+
+ @cover_type = @target_range.cover_type(@shard_range)
+ end
+
+ def execute
+ return if @cover_type == :none
+ return if @shard.table.empty?
+
+ shard_key = @shard.key
+ if shard_key.nil?
+ message = "[logical_range_filter] shard_key doesn't exist: " +
+ "<#{@shard.key_name}>"
+ raise InvalidArgument, message
+ end
+
+ expression_builder = RangeExpressionBuilder.new(shard_key,
+ @target_range)
+ expression_builder.filter = @filter
+
+ index_info = shard_key.find_index(Operator::LESS)
+ if index_info
+ range_index = index_info.index
+ unless use_range_index?(range_index, expression_builder)
+ range_index = nil
+ end
+ else
+ range_index = nil
+ end
+
+ execute_filter(range_index, expression_builder)
+ end
+
+ private
+ def decide_use_range_index(use, reason, line, method)
+ message = "[logical_range_filter]"
+ if use
+ message << "[range-index] "
+ else
+ message << "[select] "
+ end
+ message << "<#{@shard.table_name}>: "
+ message << reason
+ Context.instance.logger.log(Logger::Level::DEBUG,
+ __FILE__,
+ line,
+ method.to_s,
+ message)
+
+ use
+ end
+
+ def use_range_index?(range_index, expression_builder)
+ use_range_index_parameter_message =
+ "force by use_range_index parameter"
+ case @context.use_range_index
+ when true
+ return decide_use_range_index(true,
+ use_range_index_parameter_message,
+ __LINE__, __method__)
+ when false
+ return decide_use_range_index(false,
+ use_range_index_parameter_message,
+ __LINE__, __method__)
+ end
+
+ range_index_logical_parameter_message =
+ "force by range_index logical parameter"
+ case Parameters.range_index
+ when :always
+ return decide_use_range_index(true,
+ range_index_logical_parameter_message,
+ __LINE__, __method__)
+ when :never
+ return decide_use_range_index(false,
+ range_index_logical_parameter_message,
+ __LINE__, __method__)
+ end
+
+ current_limit = @context.current_limit
+ if current_limit < 0
+ reason = "limit is negative: <#{current_limit}>"
+ return decide_use_range_index(false, reason,
+ __LINE__, __method__)
+ end
+
+ required_n_records = @context.current_offset + current_limit
+ max_n_records = @shard.table.size
+ if max_n_records <= required_n_records
+ reason = "the number of required records (#{required_n_records}) "
+ reason << ">= "
+ reason << "the number of records in shard (#{max_n_records})"
+ return decide_use_range_index(false, reason,
+ __LINE__, __method__)
+ end
+
+ threshold = @context.threshold
+ if threshold <= 0.0
+ reason = "threshold is negative: <#{threshold}>"
+ return decide_use_range_index(true, reason,
+ __LINE__, __method__)
+ end
+ if threshold >= 1.0
+ reason = "threshold (#{threshold}) >= 1.0"
+ return decide_use_range_index(false, reason,
+ __LINE__, __method__)
+ end
+
+ table = @shard.table
+ estimated_n_records = 0
+ case @cover_type
+ when :all
+ if @filter
+ create_expression(table) do |expression|
+ expression_builder.build_all(expression)
+ unless range_index_available_expression?(expression,
+ __LINE__, __method__)
+ return false
+ end
+ estimated_n_records = expression.estimate_size(table)
+ end
+ else
+ estimated_n_records = max_n_records
+ end
+ when :partial_min
+ create_expression(table) do |expression|
+ expression_builder.build_partial_min(expression)
+ unless range_index_available_expression?(expression,
+ __LINE__, __method__)
+ return false
+ end
+ estimated_n_records = expression.estimate_size(table)
+ end
+ when :partial_max
+ create_expression(table) do |expression|
+ expression_builder.build_partial_max(expression)
+ unless range_index_available_expression?(expression,
+ __LINE__, __method__)
+ return false
+ end
+ estimated_n_records = expression.estimate_size(table)
+ end
+ when :partial_min_and_max
+ create_expression(table) do |expression|
+ expression_builder.build_partial_min_and_max(expression)
+ unless range_index_available_expression?(expression,
+ __LINE__, __method__)
+ return false
+ end
+ estimated_n_records = expression.estimate_size(table)
+ end
+ end
+
+ if estimated_n_records <= required_n_records
+ reason = "the number of required records (#{required_n_records}) "
+ reason << ">= "
+ reason << "the number of estimated records (#{estimated_n_records})"
+ return decide_use_range_index(false, reason,
+ __LINE__, __method__)
+ end
+
+ hit_ratio = estimated_n_records / max_n_records.to_f
+ use_range_index_by_hit_ratio = (hit_ratio >= threshold)
+ if use_range_index_by_hit_ratio
+ relation = ">="
+ else
+ relation = "<"
+ end
+ reason = "hit ratio "
+ reason << "(#{hit_ratio}=#{estimated_n_records}/#{max_n_records}) "
+ reason << "#{relation} threshold (#{threshold})"
+ decide_use_range_index(use_range_index_by_hit_ratio, reason,
+ __LINE__, __method__)
+ end
+
+ def range_index_available_expression?(expression, line, method_name)
+ nested_reference_vector_column_accessor =
+ find_nested_reference_vector_column_accessor(expression)
+ if nested_reference_vector_column_accessor
+ reason = "nested reference vector column accessor can't be used: "
+ reason << "<#{nested_reference_vector_column_accessor.name}>"
+ return decide_use_range_index(false, reason, line, method_name)
+ end
+
+ selector_only_procedure = find_selector_only_procedure(expression)
+ if selector_only_procedure
+ reason = "selector only procedure can't be used: "
+ reason << "<#{selector_only_procedure.name}>"
+ return decide_use_range_index(false, reason, line, method_name)
+ end
+
+ true
+ end
+
+ def find_nested_reference_vector_column_accessor(expression)
+ expression.codes.each do |code|
+ value = code.value
+ next unless value.is_a?(Accessor)
+
+ sub_accessor = value
+ while sub_accessor.have_next?
+ object = sub_accessor.object
+ return value if object.is_a?(Column) and object.vector?
+ sub_accessor = sub_accessor.next
+ end
+ end
+ nil
+ end
+
+ def find_selector_only_procedure(expression)
+ expression.codes.each do |code|
+ value = code.value
+ return value if value.is_a?(Procedure) and value.selector_only?
+ end
+ nil
+ end
+
+ def execute_filter(range_index, expression_builder)
+ case @cover_type
+ when :all
+ filter_shard_all(range_index, expression_builder)
+ when :partial_min
+ if range_index
+ filter_by_range(range_index, expression_builder,
+ @target_range.min, @target_range.min_border,
+ nil, nil)
+ else
+ filter_table do |expression|
+ expression_builder.build_partial_min(expression)
+ end
+ end
+ when :partial_max
+ if range_index
+ filter_by_range(range_index, expression_builder,
+ nil, nil,
+ @target_range.max, @target_range.max_border)
+ else
+ filter_table do |expression|
+ expression_builder.build_partial_max(expression)
+ end
+ end
+ when :partial_min_and_max
+ if range_index
+ filter_by_range(range_index, expression_builder,
+ @target_range.min, @target_range.min_border,
+ @target_range.max, @target_range.max_border)
+ else
+ filter_table do |expression|
+ expression_builder.build_partial_min_and_max(expression)
+ end
+ end
+ end
+ end
+
+ def filter_shard_all(range_index, expression_builder)
+ table = @shard.table
+ if @filter.nil?
+ if table.size <= @context.current_offset
+ @context.current_offset -= table.size
+ return
+ end
+ if range_index
+ filter_by_range(range_index, expression_builder,
+ nil, nil,
+ nil, nil)
+ else
+ sort_result_set(table)
+ end
+ else
+ if range_index
+ filter_by_range(range_index, expression_builder,
+ nil, nil,
+ nil, nil)
+ else
+ filter_table do |expression|
+ expression_builder.build_all(expression)
+ end
+ end
+ end
+ end
+
+ def create_expression(table)
+ expression = Expression.create(table)
+ begin
+ yield(expression)
+ ensure
+ expression.close
+ end
+ end
+
+ def filter_by_range(range_index, expression_builder,
+ min, min_border, max, max_border)
+ lexicon = range_index.domain
+ data_table = range_index.range
+ flags = build_range_search_flags(min_border, max_border)
+
+ result_set = HashTable.create(:flags => ObjectFlags::WITH_SUBREC,
+ :key_type => data_table)
+ n_matched_records = 0
+ begin
+ TableCursor.open(lexicon,
+ :min => min,
+ :max => max,
+ :flags => flags) do |table_cursor|
+ options = {
+ :offset => @context.current_offset,
+ }
+ current_limit = @context.current_limit
+ if current_limit < 0
+ options[:limit] = data_table.size
+ else
+ options[:limit] = current_limit
+ end
+ max_n_unmatched_records =
+ compute_max_n_unmatched_records(data_table.size,
+ options[:limit])
+ options[:max_n_unmatched_records] = max_n_unmatched_records
+ if @filter
+ create_expression(data_table) do |expression|
+ expression.parse(@filter)
+ options[:expression] = expression
+ IndexCursor.open(table_cursor, range_index) do |index_cursor|
+ n_matched_records = index_cursor.select(result_set, options)
+ end
+ end
+ else
+ IndexCursor.open(table_cursor, range_index) do |index_cursor|
+ n_matched_records = index_cursor.select(result_set, options)
+ end
+ end
+ if n_matched_records == -1
+ result_set.close
+ fallback_message =
+ "fallback because there are too much unmatched records: "
+ fallback_message << "<#{max_n_unmatched_records}>"
+ decide_use_range_index(false,
+ fallback_message,
+ __LINE__, __method__)
+ execute_filter(nil, expression_builder)
+ return
+ end
+ end
+ rescue
+ result_set.close
+ raise
+ end
+
+ if n_matched_records <= @context.current_offset
+ @context.current_offset -= n_matched_records
+ result_set.close
+ return
+ end
+
+ if @context.current_offset > 0
+ @context.current_offset = 0
+ end
+ if @context.current_limit > 0
+ @context.current_limit -= result_set.size
+ end
+ @result_sets << result_set
+ end
+
+ def build_range_search_flags(min_border, max_border)
+ flags = TableCursorFlags::BY_KEY
+ case @context.order
+ when :ascending
+ flags |= TableCursorFlags::ASCENDING
+ when :descending
+ flags |= TableCursorFlags::DESCENDING
+ end
+ case min_border
+ when :include
+ flags |= TableCursorFlags::GE
+ when :exclude
+ flags |= TableCursorFlags::GT
+ end
+ case max_border
+ when :include
+ flags |= TableCursorFlags::LE
+ when :exclude
+ flags |= TableCursorFlags::LT
+ end
+ flags
+ end
+
+ def compute_max_n_unmatched_records(data_table_size, limit)
+ max_n_unmatched_records = limit * 100
+ max_n_sample_records = data_table_size
+ if max_n_sample_records > 10000
+ sample_ratio = 1 / (Math.log(data_table_size) ** 2)
+ max_n_sample_records = (max_n_sample_records * sample_ratio).ceil
+ end
+ if max_n_unmatched_records > max_n_sample_records
+ max_n_unmatched_records = max_n_sample_records
+ end
+ max_n_unmatched_records
+ end
+
+ def filter_table
+ table = @shard.table
+ create_expression(table) do |expression|
+ yield(expression)
+ result_set = table.select(expression)
+ sort_result_set(result_set)
+ end
+ end
+
+ def sort_result_set(result_set)
+ if result_set.empty?
+ result_set.close if result_set.temporary?
+ return
+ end
+
+ if result_set.size <= @context.current_offset
+ @context.current_offset -= result_set.size
+ result_set.close if result_set.temporary?
+ return
+ end
+
+ @unsorted_result_sets << result_set if result_set.temporary?
+ sort_keys = [
+ {
+ :key => @context.enumerator.shard_key_name,
+ :order => @context.order,
+ },
+ ]
+ if @context.current_limit > 0
+ limit = @context.current_limit
+ else
+ limit = result_set.size
+ end
+ sorted_result_set = result_set.sort(sort_keys,
+ :offset => @context.current_offset,
+ :limit => limit)
+ @result_sets << sorted_result_set
+ if @context.current_offset > 0
+ @context.current_offset = 0
+ end
+ if @context.current_limit > 0
+ @context.current_limit -= sorted_result_set.size
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_select.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_select.rb
new file mode 100644
index 00000000..07ebf9e8
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_select.rb
@@ -0,0 +1,975 @@
+module Groonga
+ module Sharding
+ class LogicalSelectCommand < Command
+ register("logical_select",
+ [
+ "logical_table",
+ "shard_key",
+ "min",
+ "min_border",
+ "max",
+ "max_border",
+ "filter",
+ # Deprecated since 6.1.5. Use sort_keys instead.
+ "sortby",
+ "output_columns",
+ "offset",
+ "limit",
+ "drilldown",
+ # Deprecated since 6.1.5. Use drilldown_sort_keys instead.
+ "drilldown_sortby",
+ "drilldown_output_columns",
+ "drilldown_offset",
+ "drilldown_limit",
+ "drilldown_calc_types",
+ "drilldown_calc_target",
+ "sort_keys",
+ "drilldown_sort_keys",
+ "match_columns",
+ "query",
+ "drilldown_filter",
+ ])
+
+ def run_body(input)
+ context = ExecuteContext.new(input)
+ begin
+ executor = Executor.new(context)
+ executor.execute
+
+ n_results = 1
+ n_plain_drilldowns = context.plain_drilldown.n_result_sets
+ n_labeled_drilldowns = context.labeled_drilldowns.n_result_sets
+ if n_plain_drilldowns > 0
+ n_results += n_plain_drilldowns
+ elsif
+ if n_labeled_drilldowns > 0
+ n_results += 1
+ end
+ end
+
+ writer.array("RESULT", n_results) do
+ write_records(writer, context)
+ if n_plain_drilldowns > 0
+ write_plain_drilldowns(writer, context)
+ elsif n_labeled_drilldowns > 0
+ write_labeled_drilldowns(writer, context)
+ end
+ end
+ ensure
+ context.close
+ end
+ end
+
+ private
+ def cache_key(input)
+ sort_keys = input[:sort_keys] || input[:sortby]
+ drilldown_sort_keys =
+ input[:drilldown_sort_keys] || input[:drilldown_sortby]
+ key = "logical_select\0"
+ key << "#{input[:logical_table]}\0"
+ key << "#{input[:shard_key]}\0"
+ key << "#{input[:min]}\0"
+ key << "#{input[:min_border]}\0"
+ key << "#{input[:max]}\0"
+ key << "#{input[:max_border]}\0"
+ key << "#{input[:filter]}\0"
+ key << "#{sort_keys}\0"
+ key << "#{input[:output_columns]}\0"
+ key << "#{input[:offset]}\0"
+ key << "#{input[:limit]}\0"
+ key << "#{input[:drilldown]}\0"
+ key << "#{drilldown_sort_keys}\0"
+ key << "#{input[:match_columns]}\0"
+ key << "#{input[:query]}\0"
+ key << "#{input[:drilldown_output_columns]}\0"
+ key << "#{input[:drilldown_offset]}\0"
+ key << "#{input[:drilldown_limit]}\0"
+ key << "#{input[:drilldown_calc_types]}\0"
+ key << "#{input[:drilldown_calc_target]}\0"
+ key << "#{input[:drilldown_filter]}\0"
+ labeled_drilldowns = LabeledDrilldowns.parse(input).sort_by(&:label)
+ labeled_drilldowns.each do |drilldown|
+ key << "#{drilldown.label}\0"
+ key << "#{drilldown.keys.join(',')}\0"
+ key << "#{drilldown.output_columns}\0"
+ key << "#{drilldown.offset}\0"
+ key << "#{drilldown.limit}\0"
+ key << "#{drilldown.calc_types}\0"
+ key << "#{drilldown.calc_target_name}\0"
+ key << "#{drilldown.filter}\0"
+ cache_key_dynamic_columns(key, drilldown.dynamic_columns)
+ end
+ dynamic_columns = DynamicColumns.parse(input)
+ cache_key_dynamic_columns(key, dynamic_columns)
+ key
+ end
+
+ def cache_key_dynamic_columns(key, dynamic_columns)
+ [
+ :initial,
+ :filtered,
+ :output
+ ].each do |stage|
+ target_dynamic_columns = dynamic_columns.__send__("each_#{stage}").to_a
+ target_dynamic_columns.sort_by(&:label).each do |dynamic_column|
+ key << "#{dynamic_column.label}\0"
+ key << "#{dynamic_column.stage}\0"
+ key << "#{dynamic_column.type}\0"
+ key << "#{dynamic_column.flags}\0"
+ key << "#{dynamic_column.value}\0"
+ key << "#{dynamic_column.window_sort_keys.join(',')}\0"
+ key << "#{dynamic_column.window_group_keys.join(',')}\0"
+ end
+ end
+ end
+
+ def write_records(writer, context)
+ result_sets = context.result_sets
+
+ n_hits = 0
+ n_elements = 2 # for N hits and columns
+ result_sets.each do |result_set|
+ n_hits += result_set.size
+ n_elements += result_set.size
+ end
+
+ output_columns = context.output_columns
+
+ writer.array("RESULTSET", n_elements) do
+ writer.array("NHITS", 1) do
+ writer.write(n_hits)
+ end
+ first_result_set = result_sets.first
+ if first_result_set
+ writer.write_table_columns(first_result_set, output_columns)
+ end
+
+ current_offset = context.offset
+ current_offset += n_hits if current_offset < 0
+ current_limit = context.limit
+ current_limit += n_hits + 1 if current_limit < 0
+ options = {
+ :offset => current_offset,
+ :limit => current_limit,
+ }
+ result_sets.each do |result_set|
+ if result_set.size > current_offset
+ writer.write_table_records(result_set, output_columns, options)
+ current_limit -= result_set.size
+ end
+ if current_offset > 0
+ current_offset = [current_offset - result_set.size, 0].max
+ end
+ break if current_limit <= 0
+ options[:offset] = current_offset
+ options[:limit] = current_limit
+ end
+ end
+ end
+
+ def write_plain_drilldowns(writer, execute_context)
+ plain_drilldown = execute_context.plain_drilldown
+
+ drilldowns = plain_drilldown.result_sets
+ output_columns = plain_drilldown.output_columns
+ options = {
+ :offset => plain_drilldown.offset,
+ :limit => plain_drilldown.limit,
+ }
+
+ drilldowns.each do |drilldown|
+ n_elements = 2 # for N hits and columns
+ n_elements += drilldown.size
+ writer.array("RESULTSET", n_elements) do
+ writer.array("NHITS", 1) do
+ writer.write(drilldown.size)
+ end
+ writer.write_table_columns(drilldown, output_columns)
+ writer.write_table_records(drilldown, output_columns,
+ options)
+ end
+ end
+ end
+
+ def write_labeled_drilldowns(writer, execute_context)
+ labeled_drilldowns = execute_context.labeled_drilldowns
+ is_command_version1 = (context.command_version == 1)
+
+ writer.map("DRILLDOWNS", labeled_drilldowns.n_result_sets) do
+ labeled_drilldowns.each do |drilldown|
+ writer.write(drilldown.label)
+
+ result_set = drilldown.result_set
+ n_elements = 2 # for N hits and columns
+ n_elements += result_set.size
+ output_columns = drilldown.output_columns
+ options = {
+ :offset => drilldown.offset,
+ :limit => drilldown.limit,
+ }
+
+ writer.array("RESULTSET", n_elements) do
+ writer.array("NHITS", 1) do
+ writer.write(result_set.size)
+ end
+ writer.write_table_columns(result_set, output_columns)
+ if is_command_version1 and drilldown.need_command_version2?
+ context.with_command_version(2) do
+ writer.write_table_records(result_set,
+ drilldown.output_columns_v2,
+ options)
+ end
+ else
+ writer.write_table_records(result_set, output_columns, options)
+ end
+ end
+ end
+ end
+ end
+
+ class LabeledArgumentParser
+ def initialize(parameters)
+ @parameters = parameters
+ end
+
+ def parse(prefix_pattern)
+ pattern = /\A#{prefix_pattern}\[(.+?)\]\.(.+)\z/
+ labeled_arguments = {}
+ @parameters.each do |key, value|
+ match_data = pattern.match(key)
+ next if match_data.nil?
+ labeled_argument = (labeled_arguments[match_data[1]] ||= {})
+ labeled_argument[match_data[2]] = value
+ end
+ labeled_arguments
+ end
+ end
+
+ module KeysParsable
+ private
+ def parse_keys(raw_keys)
+ return [] if raw_keys.nil?
+
+ raw_keys.strip.split(/ *, */)
+ end
+ end
+
+ module Calculatable
+ def calc_target(table)
+ return nil if @calc_target_name.nil?
+ table.find_column(@calc_target_name)
+ end
+
+ private
+ def parse_calc_types(raw_types)
+ return TableGroupFlags::CALC_COUNT if raw_types.nil?
+
+ types = 0
+ raw_types.strip.split(/ *, */).each do |name|
+ case name
+ when "COUNT"
+ types |= TableGroupFlags::CALC_COUNT
+ when "MAX"
+ types |= TableGroupFlags::CALC_MAX
+ when "MIN"
+ types |= TableGroupFlags::CALC_MIN
+ when "SUM"
+ types |= TableGroupFlags::CALC_SUM
+ when "AVG"
+ types |= TableGroupFlags::CALC_AVG
+ when "NONE"
+ # Do nothing
+ else
+ raise InvalidArgument, "invalid drilldown calc type: <#{name}>"
+ end
+ end
+ types
+ end
+ end
+
+ class ExecuteContext
+ include KeysParsable
+
+ attr_reader :enumerator
+ attr_reader :match_columns
+ attr_reader :query
+ attr_reader :filter
+ attr_reader :offset
+ attr_reader :limit
+ attr_reader :sort_keys
+ attr_reader :output_columns
+ attr_reader :dynamic_columns
+ attr_reader :result_sets
+ attr_reader :unsorted_result_sets
+ attr_reader :plain_drilldown
+ attr_reader :labeled_drilldowns
+ attr_reader :temporary_tables
+ attr_reader :expressions
+ def initialize(input)
+ @input = input
+ @enumerator = LogicalEnumerator.new("logical_select", @input)
+ @match_columns = @input[:match_columns]
+ @query = @input[:query]
+ @filter = @input[:filter]
+ @offset = (@input[:offset] || 0).to_i
+ @limit = (@input[:limit] || 10).to_i
+ @sort_keys = parse_keys(@input[:sort_keys] || @input[:sortby])
+ @output_columns = @input[:output_columns] || "_id, _key, *"
+
+ @dynamic_columns = DynamicColumns.parse(@input)
+
+ @result_sets = []
+ @unsorted_result_sets = []
+
+ @plain_drilldown = PlainDrilldownExecuteContext.new(@input)
+ @labeled_drilldowns = LabeledDrilldowns.parse(@input)
+
+ @temporary_tables = []
+
+ @expressions = []
+ end
+
+ def close
+ @result_sets.each do |result_set|
+ result_set.close if result_set.temporary?
+ end
+ @unsorted_result_sets.each do |result_set|
+ result_set.close if result_set.temporary?
+ end
+
+ @plain_drilldown.close
+ @labeled_drilldowns.close
+
+ @dynamic_columns.close
+
+ @temporary_tables.each do |table|
+ table.close
+ end
+
+ @expressions.each do |expression|
+ expression.close
+ end
+ end
+ end
+
+ class DynamicColumns
+ class << self
+ def parse(input)
+ parser = LabeledArgumentParser.new(input)
+ columns = parser.parse(/columns?/)
+
+ initial_contexts = []
+ filtered_contexts = []
+ output_contexts = []
+ columns.each do |label, parameters|
+ contexts = nil
+ case parameters["stage"]
+ when "initial"
+ contexts = initial_contexts
+ when "filtered"
+ contexts = filtered_contexts
+ when "output"
+ contexts = output_contexts
+ else
+ next
+ end
+ contexts << DynamicColumnExecuteContext.new(label, parameters)
+ end
+
+ new(initial_contexts,
+ filtered_contexts,
+ output_contexts)
+ end
+ end
+
+ def initialize(initial_contexts,
+ filtered_contexts,
+ output_contexts)
+ @initial_contexts = initial_contexts
+ @filtered_contexts = filtered_contexts
+ @output_contexts = output_contexts
+ end
+
+ def each_initial(&block)
+ @initial_contexts.each(&block)
+ end
+
+ def each_filtered(&block)
+ @filtered_contexts.each(&block)
+ end
+
+ def each_output(&block)
+ @output_contexts.each(&block)
+ end
+
+ def close
+ @initial_contexts.each do |context|
+ context.close
+ end
+ @filtered_contexts.each do |context|
+ context.close
+ end
+ @output_contexts.each do |context|
+ context.close
+ end
+ end
+ end
+
+ class DynamicColumnExecuteContext
+ include KeysParsable
+
+ attr_reader :label
+ attr_reader :stage
+ attr_reader :type
+ attr_reader :flags
+ attr_reader :value
+ attr_reader :window_sort_keys
+ attr_reader :window_group_keys
+ def initialize(label, parameters)
+ @label = label
+ @stage = parameters["stage"]
+ @type = parse_type(parameters["type"])
+ @flags = parse_flags(parameters["flags"] || "COLUMN_SCALAR")
+ @value = parameters["value"]
+ @window_sort_keys = parse_keys(parameters["window.sort_keys"])
+ @window_group_keys = parse_keys(parameters["window.group_keys"])
+ end
+
+ def close
+ end
+
+ def apply(table, condition=nil)
+ column = table.create_column(@label, @flags, @type)
+ return if table.empty?
+
+ expression = Expression.create(table)
+ begin
+ expression.parse(@value)
+ if @window_sort_keys.empty? and @window_group_keys.empty?
+ expression.condition = condition if condition
+ table.apply_expression(column, expression)
+ else
+ table.apply_window_function(column, expression,
+ :sort_keys => @window_sort_keys,
+ :group_keys => @window_group_keys)
+ end
+ ensure
+ expression.close
+ end
+ end
+
+ private
+ def parse_type(type_raw)
+ return nil if type_raw.nil?
+
+ type = Context.instance[type_raw]
+ if type.nil?
+ message = "#{error_message_tag} unknown type: <#{type_raw}>"
+ raise InvalidArgument, message
+ end
+
+ case type
+ when Type, Table
+ type
+ else
+ message = "#{error_message_tag} invalid type: #{type.grn_inspect}"
+ raise InvalidArgument, message
+ end
+ end
+
+ def parse_flags(flags_raw)
+ Column.parse_flags(error_message_tag, flags_raw)
+ end
+
+ def error_message_tag
+ "[logical_select][columns][#{@stage}][#{@label}]"
+ end
+ end
+
+ class PlainDrilldownExecuteContext
+ include KeysParsable
+ include Calculatable
+
+ attr_reader :keys
+ attr_reader :offset
+ attr_reader :limit
+ attr_reader :sort_keys
+ attr_reader :output_columns
+ attr_reader :calc_target_name
+ attr_reader :calc_types
+ attr_reader :filter
+ attr_reader :result_sets
+ attr_reader :unsorted_result_sets
+ attr_reader :temporary_tables
+ attr_reader :expressions
+ def initialize(input)
+ @input = input
+ @keys = parse_keys(@input[:drilldown])
+ @offset = (@input[:drilldown_offset] || 0).to_i
+ @limit = (@input[:drilldown_limit] || 10).to_i
+ @sort_keys = parse_keys(@input[:drilldown_sort_keys] ||
+ @input[:drilldown_sortby])
+ @output_columns = @input[:drilldown_output_columns]
+ @output_columns ||= "_key, _nsubrecs"
+ @calc_target_name = @input[:drilldown_calc_target]
+ @calc_types = parse_calc_types(@input[:drilldown_calc_types])
+ @filter = @input[:drilldown_filter]
+
+ @result_sets = []
+ @unsorted_result_sets = []
+
+ @temporary_tables = []
+
+ @expressions = []
+ end
+
+ def close
+ @result_sets.each do |result_set|
+ result_set.close
+ end
+ @unsorted_result_sets.each do |result_set|
+ result_set.close
+ end
+
+ @temporary_tables.each do |table|
+ table.close
+ end
+
+ @expressions.each do |expression|
+ expression.close
+ end
+ end
+
+ def have_keys?
+ @keys.size > 0
+ end
+
+ def n_result_sets
+ @result_sets.size
+ end
+ end
+
+ class LabeledDrilldowns
+ include Enumerable
+ include TSort
+
+ class << self
+ def parse(input)
+ parser = LabeledArgumentParser.new(input)
+ drilldowns = parser.parse(/drilldowns?/)
+
+ contexts = {}
+ drilldowns.each do |label, parameters|
+ next if parameters["keys"].nil?
+ context = LabeledDrilldownExecuteContext.new(label, parameters)
+ contexts[label] = context
+ end
+
+ new(contexts)
+ end
+ end
+
+ def initialize(contexts)
+ @contexts = contexts
+ @dependencies = {}
+ @contexts.each do |label, context|
+ if context.table
+ depended_context = @contexts[context.table]
+ if depended_context.nil?
+ raise "Unknown drilldown: <#{context.table}>"
+ end
+ @dependencies[label] = [depended_context]
+ else
+ @dependencies[label] = []
+ end
+ end
+ end
+
+ def close
+ @contexts.each_value do |context|
+ context.close
+ end
+ end
+
+ def [](label)
+ @contexts[label]
+ end
+
+ def have_keys?
+ not @contexts.empty?
+ end
+
+ def n_result_sets
+ @contexts.size
+ end
+
+ def each(&block)
+ @contexts.each_value(&block)
+ end
+
+ def tsort_each_node(&block)
+ @contexts.each_value(&block)
+ end
+
+ def tsort_each_child(context, &block)
+ @dependencies[context.label].each(&block)
+ end
+ end
+
+ class LabeledDrilldownExecuteContext
+ include KeysParsable
+ include Calculatable
+
+ attr_reader :label
+ attr_reader :keys
+ attr_reader :offset
+ attr_reader :limit
+ attr_reader :sort_keys
+ attr_reader :output_columns
+ attr_reader :calc_target_name
+ attr_reader :calc_types
+ attr_reader :filter
+ attr_reader :table
+ attr_reader :dynamic_columns
+ attr_accessor :result_set
+ attr_accessor :unsorted_result_set
+ attr_reader :temporary_tables
+ attr_reader :expressions
+ def initialize(label, parameters)
+ @label = label
+ @keys = parse_keys(parameters["keys"])
+ @offset = (parameters["offset"] || 0).to_i
+ @limit = (parameters["limit"] || 10).to_i
+ @sort_keys = parse_keys(parameters["sort_keys"] ||
+ parameters["sortby"])
+ @output_columns = parameters["output_columns"]
+ @output_columns ||= "_key, _nsubrecs"
+ @calc_target_name = parameters["calc_target"]
+ @calc_types = parse_calc_types(parameters["calc_types"])
+ @filter = parameters["filter"]
+ @table = parameters["table"]
+
+ @dynamic_columns = DynamicColumns.parse(parameters)
+
+ @result_set = nil
+ @unsorted_result_set = nil
+
+ @temporary_tables = []
+
+ @expressions = []
+ end
+
+ def close
+ @result_set.close if @result_set
+ @unsorted_result_set.close if @unsorted_result_set
+
+ @dynamic_columns.close
+
+ @temporary_tables.each do |table|
+ table.close
+ end
+
+ @expressions.each do |expression|
+ expression.close
+ end
+ end
+
+ def need_command_version2?
+ /[.\[]/ === @output_columns
+ end
+
+ def output_columns_v2
+ columns = @output_columns.strip.split(/ *, */)
+ converted_columns = columns.collect do |column|
+ match_data = /\A_value\.(.+)\z/.match(column)
+ if match_data.nil?
+ column
+ else
+ nth_key = keys.index(match_data[1])
+ if nth_key
+ "_key[#{nth_key}]"
+ else
+ column
+ end
+ end
+ end
+ converted_columns.join(",")
+ end
+ end
+
+ class Executor
+ def initialize(context)
+ @context = context
+ end
+
+ def execute
+ execute_search
+ if @context.plain_drilldown.have_keys?
+ execute_plain_drilldown
+ elsif @context.labeled_drilldowns.have_keys?
+ execute_labeled_drilldowns
+ end
+ end
+
+ private
+ def execute_search
+ first_shard = nil
+ enumerator = @context.enumerator
+ enumerator.each do |shard, shard_range|
+ first_shard ||= shard
+ shard_executor = ShardExecutor.new(@context, shard, shard_range)
+ shard_executor.execute
+ end
+ if first_shard.nil?
+ message =
+ "[logical_select] no shard exists: " +
+ "logical_table: <#{enumerator.logical_table}>: " +
+ "shard_key: <#{enumerator.shard_key_name}>"
+ raise InvalidArgument, message
+ end
+ if @context.result_sets.empty?
+ result_set = HashTable.create(:flags => ObjectFlags::WITH_SUBREC,
+ :key_type => first_shard.table)
+ @context.dynamic_columns.each_initial do |dynamic_column|
+ dynamic_column.apply(result_set)
+ end
+ @context.dynamic_columns.each_filtered do |dynamic_column|
+ dynamic_column.apply(result_set)
+ end
+ @context.result_sets << result_set
+ end
+ end
+
+ def execute_plain_drilldown
+ drilldown = @context.plain_drilldown
+ group_result = TableGroupResult.new
+ begin
+ group_result.key_begin = 0
+ group_result.key_end = 0
+ group_result.limit = 1
+ group_result.flags = drilldown.calc_types
+ drilldown.keys.each do |key|
+ @context.result_sets.each do |result_set|
+ with_calc_target(group_result,
+ drilldown.calc_target(result_set)) do
+ result_set.group([key], group_result)
+ end
+ end
+ result_set = group_result.table
+ result_set = apply_drilldown_filter(drilldown, result_set)
+ if drilldown.sort_keys.empty?
+ drilldown.result_sets << result_set
+ else
+ drilldown.result_sets << result_set.sort(drilldown.sort_keys)
+ drilldown.unsorted_result_sets << result_set
+ end
+ group_result.table = nil
+ end
+ ensure
+ group_result.close
+ end
+ end
+
+ def execute_labeled_drilldowns
+ drilldowns = @context.labeled_drilldowns
+
+ drilldowns.tsort_each do |drilldown|
+ group_result = TableGroupResult.new
+ keys = drilldown.keys
+ begin
+ group_result.key_begin = 0
+ group_result.key_end = keys.size - 1
+ if keys.size > 1
+ group_result.max_n_sub_records = 1
+ end
+ group_result.limit = 1
+ group_result.flags = drilldown.calc_types
+ if drilldown.table
+ target_table = drilldowns[drilldown.table].result_set
+ with_calc_target(group_result,
+ drilldown.calc_target(target_table)) do
+ target_table.group(keys, group_result)
+ end
+ else
+ @context.result_sets.each do |result_set|
+ with_calc_target(group_result,
+ drilldown.calc_target(result_set)) do
+ result_set.group(keys, group_result)
+ end
+ end
+ end
+ result_set = group_result.table
+ drilldown.dynamic_columns.each_initial do |dynamic_column|
+ dynamic_column.apply(result_set)
+ end
+ result_set = apply_drilldown_filter(drilldown, result_set)
+ if drilldown.sort_keys.empty?
+ drilldown.result_set = result_set
+ else
+ drilldown.result_set = result_set.sort(drilldown.sort_keys)
+ drilldown.unsorted_result_set = result_set
+ end
+ group_result.table = nil
+ ensure
+ group_result.close
+ end
+ end
+ end
+
+ def with_calc_target(group_result, calc_target)
+ group_result.calc_target = calc_target
+ begin
+ yield
+ ensure
+ calc_target.close if calc_target
+ group_result.calc_target = nil
+ end
+ end
+
+ def apply_drilldown_filter(drilldown, result_set)
+ filter = drilldown.filter
+ return result_set if filter.nil?
+
+ expression = Expression.create(result_set)
+ drilldown.expressions << expression
+ expression.parse(filter)
+ filtered_result_set = result_set.select(expression)
+ drilldown.temporary_tables << result_set
+ filtered_result_set
+ end
+ end
+
+ class ShardExecutor
+ def initialize(context, shard, shard_range)
+ @context = context
+ @shard = shard
+ @shard_range = shard_range
+
+ @target_table = @shard.table
+
+ @match_columns = @context.match_columns
+ @query = @context.query
+ @filter = @context.filter
+ @sort_keys = @context.sort_keys
+ @result_sets = @context.result_sets
+ @unsorted_result_sets = @context.unsorted_result_sets
+
+ @target_range = @context.enumerator.target_range
+
+ @cover_type = @target_range.cover_type(@shard_range)
+ end
+
+ def execute
+ return if @cover_type == :none
+ return if @target_table.empty?
+
+ shard_key = @shard.key
+ if shard_key.nil?
+ message = "[logical_select] shard_key doesn't exist: " +
+ "<#{@shard.key_name}>"
+ raise InvalidArgument, message
+ end
+
+ @context.dynamic_columns.each_initial do |dynamic_column|
+ if @target_table == @shard.table
+ @target_table = create_all_match_table(@target_table)
+ @context.temporary_tables << @target_table
+ end
+ dynamic_column.apply(@target_table)
+ end
+
+ create_expression_builder(shard_key) do |expression_builder|
+ case @cover_type
+ when :all
+ filter_shard_all(expression_builder)
+ when :partial_min
+ filter_table do |expression|
+ expression_builder.build_partial_min(expression)
+ end
+ when :partial_max
+ filter_table do |expression|
+ expression_builder.build_partial_max(expression)
+ end
+ when :partial_min_and_max
+ filter_table do |expression|
+ expression_builder.build_partial_min_and_max(expression)
+ end
+ end
+ end
+ end
+
+ private
+ def filter_shard_all(expression_builder)
+ if @query.nil? and @filter.nil?
+ add_result_set(@target_table, nil)
+ @context.temporary_tables.delete(@target_table)
+ else
+ filter_table do |expression|
+ expression_builder.build_all(expression)
+ end
+ end
+ end
+
+ def create_expression(table)
+ expression = Expression.create(table)
+ @context.expressions << expression
+ expression
+ end
+
+ def create_expression_builder(shard_key)
+ expression_builder = RangeExpressionBuilder.new(shard_key,
+ @target_range)
+ expression_builder.match_columns = @match_columns
+ expression_builder.query = @query
+ expression_builder.filter = @filter
+ begin
+ yield(expression_builder)
+ ensure
+ expression = expression_builder.match_columns_expression
+ @context.expressions << expression if expression
+ end
+ end
+
+ def filter_table
+ table = @target_table
+ expression = create_expression(table)
+ yield(expression)
+ add_result_set(table.select(expression), expression)
+ end
+
+ def add_result_set(result_set, condition)
+ if result_set.empty?
+ result_set.close
+ return
+ end
+
+ @context.dynamic_columns.each_filtered do |dynamic_column|
+ if result_set == @shard.table
+ @context.temporary_tables << result_set
+ result_set = create_all_match_table(result_set)
+ end
+ dynamic_column.apply(result_set, condition)
+ end
+
+ if @sort_keys.empty?
+ @result_sets << result_set
+ else
+ @unsorted_result_sets << result_set
+ sorted_result_set = result_set.sort(@sort_keys)
+ @result_sets << sorted_result_set
+ end
+ end
+
+ def create_all_match_table(table)
+ expression = Expression.create(table)
+ begin
+ expression.append_constant(true, Operator::PUSH, 1)
+ table.select(expression)
+ ensure
+ expression.close
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_shard_list.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_shard_list.rb
new file mode 100644
index 00000000..b8ef3f76
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_shard_list.rb
@@ -0,0 +1,28 @@
+module Groonga
+ module Sharding
+ class LogicalShardListCommand < Command
+ register("logical_shard_list",
+ [
+ "logical_table",
+ ])
+
+ def run_body(input)
+ enumerator = LogicalEnumerator.new("logical_shard_list",
+ input,
+ :require_shard_key => false)
+ shard_names = enumerator.collect do |current_shard, shard_range|
+ current_shard.table_name
+ end
+
+ writer.array("shards", shard_names.size) do
+ shard_names.each do |shard_name|
+ writer.map("shard", 1) do
+ writer.write("name")
+ writer.write(shard_name)
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/logical_table_remove.rb b/storage/mroonga/vendor/groonga/plugins/sharding/logical_table_remove.rb
new file mode 100644
index 00000000..3353d6c3
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/logical_table_remove.rb
@@ -0,0 +1,345 @@
+module Groonga
+ module Sharding
+ class LogicalTableRemoveCommand < Command
+ register("logical_table_remove",
+ [
+ "logical_table",
+ "shard_key",
+ "min",
+ "min_border",
+ "max",
+ "max_border",
+ "dependent",
+ "force",
+ ])
+
+ def run_body(input)
+ @dependent = (input[:dependent] == "yes")
+ @force = (input[:force] == "yes")
+
+ enumerator = LogicalEnumerator.new("logical_table_remove", input)
+
+ success = true
+ enumerator.each do |shard, shard_range|
+ remove_shard(shard, shard_range, enumerator.target_range)
+ end
+ writer.write(success)
+ end
+
+ private
+ def remove_shard(shard, shard_range, target_range)
+ cover_type = target_range.cover_type(shard_range)
+ return if cover_type == :none
+
+ shard_key = shard.key
+ if shard_key.nil?
+ if @force
+ context.clear_error
+ else
+ message =
+ "[logical_table_remove] shard_key doesn't exist: " +
+ "<#{shard.key_name}>"
+ raise InvalidArgument, message
+ end
+ end
+ table = shard.table
+
+ if cover_type == :all or ((table.nil? or shard_key.nil?) and @force)
+ remove_table(shard, table)
+ return
+ end
+
+ expression_builder = RangeExpressionBuilder.new(shard_key,
+ target_range)
+ case cover_type
+ when :partial_min
+ remove_records(table) do |expression|
+ expression_builder.build_partial_min(expression)
+ end
+ remove_table(shard, table) if table.empty?
+ when :partial_max
+ remove_records(table) do |expression|
+ expression_builder.build_partial_max(expression)
+ end
+ remove_table(shard, table) if table.empty?
+ when :partial_min_and_max
+ remove_records(table) do |expression|
+ expression_builder.build_partial_min_and_max(expression)
+ end
+ remove_table(shard, table) if table.empty?
+ end
+ end
+
+ def collect_referenced_table_ids_from_index_ids(index_ids,
+ referenced_table_ids)
+ database = context.database
+ index_ids.each do |index_id|
+ index = context[index_id]
+ if index.nil?
+ context.clear_error
+ index_name = database[index_id]
+ lexicon_name = index_name.split(".", 2)[0]
+ lexicon_id = database[lexicon_name]
+ referenced_table_ids << lexicon_id if lexicon_id
+ else
+ referenced_table_ids << index.domain_id
+ end
+ end
+ end
+
+ def collect_referenced_table_ids_from_column_name(column_name,
+ referenced_table_ids)
+ database = context.database
+ column_id = database[column_name]
+ database.each_raw do |id, cursor|
+ next if ID.builtin?(id)
+ next if id == column_id
+
+ context.open_temporary(id) do |object|
+ if object.nil?
+ context.clear_error
+ next
+ end
+
+ case object
+ when IndexColumn
+ if object.source_ids.include?(column_id)
+ collect_referenced_table_ids_from_index_ids([id],
+ referenced_table_ids)
+ end
+ end
+ end
+ end
+ end
+
+ def collect_referenced_table_ids_from_column(column,
+ referenced_table_ids)
+ range = column.range
+ case range
+ when nil
+ context.clear_error
+ when Table
+ referenced_table_ids << range.id
+ collect_referenced_table_ids_from_index_ids(range.index_ids,
+ referenced_table_ids)
+ end
+ collect_referenced_table_ids_from_index_ids(column.index_ids,
+ referenced_table_ids)
+ end
+
+ def collect_referenced_table_ids_from_column_names(column_names)
+ referenced_table_ids = []
+ column_names.each do |column_name|
+ column = context[column_name]
+ if column.nil?
+ context.clear_error
+ collect_referenced_table_ids_from_column_name(column_name,
+ referenced_table_ids)
+ else
+ collect_referenced_table_ids_from_column(column,
+ referenced_table_ids)
+ end
+ end
+ referenced_table_ids
+ end
+
+ def collect_referenced_table_ids(shard, table)
+ return [] unless @dependent
+
+ column_names = nil
+ if table
+ begin
+ column_names = table.columns.collect(&:name)
+ rescue
+ context.clear_error
+ end
+ end
+ if column_names.nil?
+ prefix = "#{shard.table_name}."
+ column_names = []
+ context.database.each_name(:prefix => prefix) do |column_name|
+ column_names << column_name
+ end
+ end
+
+ collect_referenced_table_ids_from_column_names(column_names)
+ end
+
+ def remove_table(shard, table)
+ if table.nil?
+ unless @force
+ if context.rc == Context::RC::SUCCESS.to_i
+ error_class = InvalidArgument
+ else
+ rc = Context::RC.find(context.rc)
+ error_class = rc.error_class
+ end
+ message = "[logical_table_remove] table is broken: " +
+ "<#{shard.table_name}>: #{context.error_message}"
+ raise error_class, message
+ end
+ context.clear_error
+ end
+
+ referenced_table_ids = collect_referenced_table_ids(shard, table)
+
+ if table.nil?
+ remove_table_force(shard.table_name)
+ else
+ options = {:dependent => @dependent}
+ if @force
+ begin
+ table.remove(options)
+ rescue
+ context.clear_error
+ table.close
+ remove_table_force(shard.table_name)
+ end
+ else
+ table.remove(options)
+ end
+ end
+
+ remove_referenced_tables(shard, referenced_table_ids)
+ end
+
+ def remove_table_force(table_name)
+ database = context.database
+
+ prefix = "#{table_name}."
+ database.each_raw(:prefix => prefix) do |id, cursor|
+ column = context[id]
+ if column.nil?
+ context.clear_error
+ column_name = cursor.key
+ remove_column_force(column_name)
+ table = context[table_name]
+ if table.nil?
+ context.clear_error
+ else
+ table.close
+ end
+ else
+ remove_column(column)
+ end
+ end
+
+ table_id = database[table_name]
+ return if table_id.nil?
+
+ database.each_raw do |id, cursor|
+ next if ID.builtin?(id)
+ next if id == table_id
+
+ context.open_temporary(id) do |object|
+ if object.nil?
+ context.clear_error
+ next
+ end
+
+ case object
+ when Table
+ if object.domain_id == table_id
+ begin
+ object.remove(:dependent => @dependent)
+ rescue
+ context.clear_error
+ reference_table_name = object.name
+ object.close
+ remove_table_force(reference_table_name)
+ end
+ end
+ when Column
+ if object.range_id == table_id
+ remove_column(object)
+ end
+ end
+ end
+ end
+
+ Object.remove_force(table_name)
+ end
+
+ def remove_column(column)
+ begin
+ column.remove(:dependent => @dependent)
+ rescue
+ context.clear_error
+ column_name = column.name
+ column.close
+ remove_column_force(column_name)
+ end
+ end
+
+ def remove_column_force(column_name)
+ database = context.database
+
+ column_id = database[column_name]
+
+ column = context[column_id]
+ if column.nil?
+ context.clear_error
+ else
+ column.index_ids.each do |id|
+ index_column = context[id]
+ if index_column.nil?
+ context.clear_error
+ index_column_name = database[id]
+ remove_column_force(index_column_name)
+ else
+ remove_column(index_column)
+ end
+ end
+ column.close
+ end
+
+ Object.remove_force(column_name)
+ end
+
+ def remove_referenced_tables(shard, referenced_table_ids)
+ return if referenced_table_ids.empty?
+
+ database = context.database
+ shard_suffix = shard.range_data.to_suffix
+ referenced_table_ids.uniq.each do |referenced_table_id|
+ referenced_table_name = database[referenced_table_id]
+ next if referenced_table_name.nil?
+ next unless referenced_table_name.end_with?(shard_suffix)
+
+ referenced_table = context[referenced_table_id]
+ if referenced_table.nil?
+ context.clear_error
+ if @force
+ Object.remove_force(referenced_table_name)
+ end
+ next
+ end
+
+ if @force
+ begin
+ referenced_table.remove(:dependent => @dependent)
+ rescue
+ context.clear_error
+ referenced_table.close
+ remove_table_force(referenced_table_name)
+ end
+ else
+ referenced_table.remove(:dependent => @dependent)
+ end
+ end
+ end
+
+ def remove_records(table)
+ expression = nil
+
+ begin
+ expression = Expression.create(table)
+ yield(expression)
+ table.delete(:expression => expression)
+ ensure
+ expression.close if expression
+ end
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/parameters.rb b/storage/mroonga/vendor/groonga/plugins/sharding/parameters.rb
new file mode 100644
index 00000000..b09a9d6c
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/parameters.rb
@@ -0,0 +1,10 @@
+module Groonga
+ module Sharding
+ module Parameters
+ @range_index = :auto
+ class << self
+ attr_accessor :range_index
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/range_expression_builder.rb b/storage/mroonga/vendor/groonga/plugins/sharding/range_expression_builder.rb
new file mode 100644
index 00000000..cc80735d
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/range_expression_builder.rb
@@ -0,0 +1,88 @@
+module Groonga
+ module Sharding
+ class RangeExpressionBuilder
+ attr_reader :match_columns_expression
+
+ attr_writer :match_columns
+ attr_writer :query
+ attr_writer :filter
+
+ def initialize(key, target_range)
+ @key = key
+ @target_range = target_range
+ @match_columns_expression = nil
+ @match_columns = nil
+ @query = nil
+ @filter = nil
+ end
+
+ def build_all(expression)
+ build_condition(expression)
+ end
+
+ def build_partial_min(expression)
+ expression.append_object(@key, Operator::PUSH, 1)
+ expression.append_operator(Operator::GET_VALUE, 1)
+ expression.append_constant(@target_range.min, Operator::PUSH, 1)
+ if @target_range.min_border == :include
+ expression.append_operator(Operator::GREATER_EQUAL, 2)
+ else
+ expression.append_operator(Operator::GREATER, 2)
+ end
+ build_condition(expression)
+ end
+
+ def build_partial_max(expression)
+ expression.append_object(@key, Operator::PUSH, 1)
+ expression.append_operator(Operator::GET_VALUE, 1)
+ expression.append_constant(@target_range.max, Operator::PUSH, 1)
+ if @target_range.max_border == :include
+ expression.append_operator(Operator::LESS_EQUAL, 2)
+ else
+ expression.append_operator(Operator::LESS, 2)
+ end
+ build_condition(expression)
+ end
+
+ def build_partial_min_and_max(expression)
+ between = Groonga::Context.instance["between"]
+ expression.append_object(between, Operator::PUSH, 1)
+ expression.append_object(@key, Operator::PUSH, 1)
+ expression.append_operator(Operator::GET_VALUE, 1)
+ expression.append_constant(@target_range.min, Operator::PUSH, 1)
+ expression.append_constant(@target_range.min_border,
+ Operator::PUSH, 1)
+ expression.append_constant(@target_range.max, Operator::PUSH, 1)
+ expression.append_constant(@target_range.max_border,
+ Operator::PUSH, 1)
+ expression.append_operator(Operator::CALL, 5)
+ build_condition(expression)
+ end
+
+ private
+ def build_condition(expression)
+ if @query
+ is_empty = expression.empty?
+ if @match_columns
+ table = Context.instance[expression[0].domain]
+ @match_columns_expression = Expression.create(table)
+ @match_columns_expression.parse(@match_columns)
+ end
+ flags = Expression::SYNTAX_QUERY |
+ Expression::ALLOW_PRAGMA |
+ Expression::ALLOW_COLUMN
+ expression.parse(@query,
+ default_column: @match_columns_expression,
+ flags: flags)
+ expression.append_operator(Operator::AND, 2) unless is_empty
+ end
+
+ if @filter
+ is_empty = expression.empty?
+ expression.parse(@filter)
+ expression.append_operator(Operator::AND, 2) unless is_empty
+ end
+ end
+ end
+ end
+end
diff --git a/storage/mroonga/vendor/groonga/plugins/sharding/sources.am b/storage/mroonga/vendor/groonga/plugins/sharding/sources.am
new file mode 100644
index 00000000..df2b6d02
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/sharding/sources.am
@@ -0,0 +1,10 @@
+sharding_scripts = \
+ logical_count.rb \
+ logical_enumerator.rb \
+ logical_parameters.rb \
+ logical_range_filter.rb \
+ logical_select.rb \
+ logical_shard_list.rb \
+ logical_table_remove.rb \
+ parameters.rb \
+ range_expression_builder.rb
diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt
new file mode 100644
index 00000000..8b287e65
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/suggest/CMakeLists.txt
@@ -0,0 +1,36 @@
+# Copyright(C) 2012-2013 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ ${MRUBY_INCLUDE_DIRS}
+ ${MESSAGE_PACK_INCLUDE_DIRS})
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/sources.am SUGGEST_SOURCES)
+set_source_files_properties(${SUGGEST_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+if(GRN_EMBED)
+ add_library(suggest STATIC ${SUGGEST_SOURCES})
+ set_target_properties(
+ suggest
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+else()
+ add_library(suggest MODULE ${SUGGEST_SOURCES})
+ set_target_properties(suggest PROPERTIES PREFIX "")
+ install(TARGETS suggest DESTINATION "${GRN_RELATIVE_PLUGINS_DIR}/suggest")
+endif()
+target_link_libraries(suggest libgroonga)
diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/Makefile.am b/storage/mroonga/vendor/groonga/plugins/suggest/Makefile.am
new file mode 100644
index 00000000..7f321b6c
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/suggest/Makefile.am
@@ -0,0 +1,24 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CFLAGS = \
+ $(MESSAGE_PACK_CFLAGS) \
+ $(MRUBY_CFLAGS)
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la \
+ $(MESSAGE_PACK_LIBS)
+
+suggest_plugins_LTLIBRARIES = suggest.la
+
+include sources.am
diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/sources.am b/storage/mroonga/vendor/groonga/plugins/suggest/sources.am
new file mode 100644
index 00000000..798a431a
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/suggest/sources.am
@@ -0,0 +1,2 @@
+suggest_la_SOURCES = \
+ suggest.c
diff --git a/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c b/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c
new file mode 100644
index 00000000..7f64f3c1
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/suggest/suggest.c
@@ -0,0 +1,1035 @@
+/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */
+/* Copyright(C) 2010-2014 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG suggest_suggest
+#endif
+
+#include <string.h>
+
+#include "grn_ctx.h"
+#include "grn_db.h"
+#include "grn_ii.h"
+#include "grn_token_cursor.h"
+#include "grn_output.h"
+#include <groonga/plugin.h>
+
+#define VAR GRN_PROC_GET_VAR_BY_OFFSET
+#define CONST_STR_LEN(x) x, x ? sizeof(x) - 1 : 0
+#define TEXT_VALUE_LEN(x) GRN_TEXT_VALUE(x), GRN_TEXT_LEN(x)
+
+#define MIN_LEARN_DISTANCE (60 * GRN_TIME_USEC_PER_SEC)
+
+#define COMPLETE 1
+#define CORRECT 2
+#define SUGGEST 4
+
+typedef enum {
+ GRN_SUGGEST_SEARCH_YES,
+ GRN_SUGGEST_SEARCH_NO,
+ GRN_SUGGEST_SEARCH_AUTO
+} grn_suggest_search_mode;
+
+typedef struct {
+ grn_obj *post_event;
+ grn_obj *post_type;
+ grn_obj *post_item;
+ grn_obj *seq;
+ grn_obj *post_time;
+ grn_obj *pairs;
+
+ int learn_distance_in_seconds;
+
+ grn_id post_event_id;
+ grn_id post_type_id;
+ grn_id post_item_id;
+ grn_id seq_id;
+ int64_t post_time_value;
+
+ grn_obj *seqs;
+ grn_obj *seqs_events;
+ grn_obj *events;
+ grn_obj *events_item;
+ grn_obj *events_type;
+ grn_obj *events_time;
+ grn_obj *event_types;
+ grn_obj *items;
+ grn_obj *items_freq;
+ grn_obj *items_freq2;
+ grn_obj *items_last;
+ grn_obj *pairs_pre;
+ grn_obj *pairs_post;
+ grn_obj *pairs_freq0;
+ grn_obj *pairs_freq1;
+ grn_obj *pairs_freq2;
+
+ grn_obj dataset_name;
+
+ grn_obj *configuration;
+
+ grn_obj weight;
+ grn_obj pre_events;
+
+ uint64_t key_prefix;
+ grn_obj pre_item;
+} grn_suggest_learner;
+
+static int
+grn_parse_suggest_types(grn_obj *text)
+{
+ const char *nptr = GRN_TEXT_VALUE(text);
+ const char *end = GRN_BULK_CURR(text);
+ int types = 0;
+ while (nptr < end) {
+ if (*nptr == '|') {
+ nptr += 1;
+ continue;
+ }
+ {
+ const char string[] = "complete";
+ size_t length = sizeof(string) - 1;
+ if (nptr + length <= end && memcmp(nptr, string, length) == 0) {
+ types |= COMPLETE;
+ nptr += length;
+ continue;
+ }
+ }
+ {
+ const char string[] = "correct";
+ size_t length = sizeof(string) - 1;
+ if (nptr + length <= end && memcmp(nptr, string, length) == 0) {
+ types |= CORRECT;
+ nptr += length;
+ continue;
+ }
+ }
+ {
+ const char string[] = "suggest";
+ size_t length = sizeof(string) - 1;
+ if (nptr + length <= end && memcmp(nptr, string, length) == 0) {
+ types |= SUGGEST;
+ nptr += length;
+ continue;
+ }
+ }
+ break;
+ }
+ return types;
+}
+
+static double
+cooccurrence_search(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_id id,
+ grn_obj *res, int query_type, int frequency_threshold,
+ double conditional_probability_threshold)
+{
+ double max_score = 0.0;
+ if (id) {
+ grn_ii_cursor *c;
+ grn_obj *co = grn_obj_column(ctx, items, CONST_STR_LEN("co"));
+ grn_obj *pairs = grn_ctx_at(ctx, grn_obj_get_range(ctx, co));
+ grn_obj *items_freq = grn_obj_column(ctx, items, CONST_STR_LEN("freq"));
+ grn_obj *items_freq2 = grn_obj_column(ctx, items, CONST_STR_LEN("freq2"));
+ grn_obj *pairs_freq, *pairs_post = grn_obj_column(ctx, pairs, CONST_STR_LEN("post"));
+ switch (query_type) {
+ case COMPLETE :
+ pairs_freq = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq0"));
+ break;
+ case CORRECT :
+ pairs_freq = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq1"));
+ break;
+ case SUGGEST :
+ pairs_freq = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq2"));
+ break;
+ default :
+ return max_score;
+ }
+ if ((c = grn_ii_cursor_open(ctx, (grn_ii *)co, id, GRN_ID_NIL, GRN_ID_MAX,
+ ((grn_ii *)co)->n_elements - 1, 0))) {
+ grn_posting *p;
+ grn_obj post, pair_freq, item_freq, item_freq2, item_boost;
+ GRN_RECORD_INIT(&post, 0, grn_obj_id(ctx, items));
+ GRN_INT32_INIT(&pair_freq, 0);
+ GRN_INT32_INIT(&item_freq, 0);
+ GRN_INT32_INIT(&item_freq2, 0);
+ GRN_INT32_INIT(&item_boost, 0);
+ while ((p = grn_ii_cursor_next(ctx, c))) {
+ grn_id post_id;
+ int pfreq, ifreq, ifreq2, boost;
+ double conditional_probability;
+ GRN_BULK_REWIND(&post);
+ GRN_BULK_REWIND(&pair_freq);
+ GRN_BULK_REWIND(&item_freq);
+ GRN_BULK_REWIND(&item_freq2);
+ GRN_BULK_REWIND(&item_boost);
+ grn_obj_get_value(ctx, pairs_post, p->rid, &post);
+ grn_obj_get_value(ctx, pairs_freq, p->rid, &pair_freq);
+ post_id = GRN_RECORD_VALUE(&post);
+ grn_obj_get_value(ctx, items_freq, post_id, &item_freq);
+ grn_obj_get_value(ctx, items_freq2, post_id, &item_freq2);
+ grn_obj_get_value(ctx, items_boost, post_id, &item_boost);
+ pfreq = GRN_INT32_VALUE(&pair_freq);
+ ifreq = GRN_INT32_VALUE(&item_freq);
+ ifreq2 = GRN_INT32_VALUE(&item_freq2);
+ if (ifreq2 > 0) {
+ conditional_probability = (double)pfreq / (double)ifreq2;
+ } else {
+ conditional_probability = 0.0;
+ }
+ boost = GRN_INT32_VALUE(&item_boost);
+ if (pfreq >= frequency_threshold && ifreq >= frequency_threshold &&
+ conditional_probability >= conditional_probability_threshold &&
+ boost >= 0) {
+ grn_rset_recinfo *ri;
+ void *value;
+ double score = pfreq;
+ int added;
+ if (max_score < score + boost) { max_score = score + boost; }
+ /* put any formula if desired */
+ if (grn_hash_add(ctx, (grn_hash *)res,
+ &post_id, sizeof(grn_id), &value, &added)) {
+ ri = value;
+ ri->score += score;
+ if (added) {
+ ri->score += boost;
+ }
+ }
+ }
+ }
+ GRN_OBJ_FIN(ctx, &post);
+ GRN_OBJ_FIN(ctx, &pair_freq);
+ GRN_OBJ_FIN(ctx, &item_freq);
+ GRN_OBJ_FIN(ctx, &item_freq2);
+ GRN_OBJ_FIN(ctx, &item_boost);
+ grn_ii_cursor_close(ctx, c);
+ }
+ }
+ return max_score;
+}
+
+#define DEFAULT_LIMIT 10
+#define DEFAULT_SORTBY "-_score"
+#define DEFAULT_OUTPUT_COLUMNS "_key,_score"
+#define DEFAULT_FREQUENCY_THRESHOLD 100
+#define DEFAULT_CONDITIONAL_PROBABILITY_THRESHOLD 0.2
+
+static void
+output(grn_ctx *ctx, grn_obj *table, grn_obj *res, grn_id tid,
+ grn_obj *sortby, grn_obj *output_columns, int offset, int limit)
+{
+ grn_obj *sorted;
+ if ((sorted = grn_table_create(ctx, NULL, 0, NULL, GRN_OBJ_TABLE_NO_KEY, NULL, res))) {
+ uint32_t nkeys;
+ grn_obj_format format;
+ grn_table_sort_key *keys;
+ const char *sortby_val = GRN_TEXT_VALUE(sortby);
+ unsigned int sortby_len = GRN_TEXT_LEN(sortby);
+ const char *oc_val = GRN_TEXT_VALUE(output_columns);
+ unsigned int oc_len = GRN_TEXT_LEN(output_columns);
+ if (!sortby_val || !sortby_len) {
+ sortby_val = DEFAULT_SORTBY;
+ sortby_len = sizeof(DEFAULT_SORTBY) - 1;
+ }
+ if (!oc_val || !oc_len) {
+ oc_val = DEFAULT_OUTPUT_COLUMNS;
+ oc_len = sizeof(DEFAULT_OUTPUT_COLUMNS) - 1;
+ }
+ if ((keys = grn_table_sort_key_from_str(ctx, sortby_val, sortby_len, res, &nkeys))) {
+ grn_table_sort(ctx, res, offset, limit, sorted, keys, nkeys);
+ GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE,
+ ":", "sort(%d)", limit);
+ GRN_OBJ_FORMAT_INIT(&format, grn_table_size(ctx, res), 0, limit, offset);
+ format.flags =
+ GRN_OBJ_FORMAT_WITH_COLUMN_NAMES|
+ GRN_OBJ_FORMAT_XML_ELEMENT_RESULTSET;
+ grn_obj_columns(ctx, sorted, oc_val, oc_len, &format.columns);
+ GRN_OUTPUT_OBJ(sorted, &format);
+ GRN_OBJ_FORMAT_FIN(ctx, &format);
+ grn_table_sort_key_close(ctx, keys, nkeys);
+ }
+ grn_obj_unlink(ctx, sorted);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot create temporary sort table.");
+ }
+}
+
+static inline void
+complete_add_item(grn_ctx *ctx, grn_id id, grn_obj *res, int frequency_threshold,
+ grn_obj *items_freq, grn_obj *items_boost,
+ grn_obj *item_freq, grn_obj *item_boost)
+{
+ GRN_BULK_REWIND(item_freq);
+ GRN_BULK_REWIND(item_boost);
+ grn_obj_get_value(ctx, items_freq, id, item_freq);
+ grn_obj_get_value(ctx, items_boost, id, item_boost);
+ if (GRN_INT32_VALUE(item_boost) >= 0) {
+ double score;
+ score = 1 +
+ GRN_INT32_VALUE(item_freq) +
+ GRN_INT32_VALUE(item_boost);
+ if (score >= frequency_threshold) {
+ void *value;
+ if (grn_hash_add(ctx, (grn_hash *)res, &id, sizeof(grn_id),
+ &value, NULL)) {
+ grn_rset_recinfo *ri;
+ ri = value;
+ ri->score += score;
+ }
+ }
+ }
+}
+
+static void
+complete(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost, grn_obj *col,
+ grn_obj *query, grn_obj *sortby,
+ grn_obj *output_columns, int offset, int limit,
+ int frequency_threshold, double conditional_probability_threshold,
+ grn_suggest_search_mode prefix_search_mode)
+{
+ grn_obj *res;
+ grn_obj *items_freq = grn_obj_column(ctx, items, CONST_STR_LEN("freq"));
+ grn_obj item_freq, item_boost;
+ GRN_INT32_INIT(&item_freq, 0);
+ GRN_INT32_INIT(&item_boost, 0);
+ if ((res = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) {
+ grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query));
+ if (GRN_TEXT_LEN(query)) {
+ grn_table_cursor *cur;
+ /* RK search + prefix search */
+ grn_obj *index;
+ /* FIXME: support index selection */
+ if (grn_column_index(ctx, col, GRN_OP_PREFIX, &index, 1, NULL)) {
+ if ((cur = grn_table_cursor_open(ctx, grn_ctx_at(ctx, index->header.domain),
+ GRN_TEXT_VALUE(query),
+ GRN_TEXT_LEN(query),
+ NULL, 0, 0, -1,
+ GRN_CURSOR_PREFIX|GRN_CURSOR_RK))) {
+ grn_id id;
+ while ((id = grn_table_cursor_next(ctx, cur))) {
+ grn_ii_cursor *icur;
+ if ((icur = grn_ii_cursor_open(ctx, (grn_ii *)index, id,
+ GRN_ID_NIL, GRN_ID_MAX, 1, 0))) {
+ grn_posting *p;
+ while ((p = grn_ii_cursor_next(ctx, icur))) {
+ complete_add_item(ctx, p->rid, res, frequency_threshold,
+ items_freq, items_boost,
+ &item_freq, &item_boost);
+ }
+ grn_ii_cursor_close(ctx, icur);
+ }
+ }
+ grn_table_cursor_close(ctx, cur);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot open cursor for prefix RK search.");
+ }
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot find index for prefix RK search.");
+ }
+ cooccurrence_search(ctx, items, items_boost, tid, res, COMPLETE,
+ frequency_threshold,
+ conditional_probability_threshold);
+ if (((prefix_search_mode == GRN_SUGGEST_SEARCH_YES) ||
+ (prefix_search_mode == GRN_SUGGEST_SEARCH_AUTO &&
+ !grn_table_size(ctx, res))) &&
+ (cur = grn_table_cursor_open(ctx, items,
+ GRN_TEXT_VALUE(query),
+ GRN_TEXT_LEN(query),
+ NULL, 0, 0, -1, GRN_CURSOR_PREFIX))) {
+ grn_id id;
+ while ((id = grn_table_cursor_next(ctx, cur))) {
+ complete_add_item(ctx, id, res, frequency_threshold,
+ items_freq, items_boost, &item_freq, &item_boost);
+ }
+ grn_table_cursor_close(ctx, cur);
+ }
+ }
+ output(ctx, items, res, tid, sortby, output_columns, offset, limit);
+ grn_obj_close(ctx, res);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot create temporary table.");
+ }
+ GRN_OBJ_FIN(ctx, &item_boost);
+ GRN_OBJ_FIN(ctx, &item_freq);
+}
+
+static void
+correct(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost,
+ grn_obj *query, grn_obj *sortby,
+ grn_obj *output_columns, int offset, int limit,
+ int frequency_threshold, double conditional_probability_threshold,
+ grn_suggest_search_mode similar_search_mode)
+{
+ grn_obj *res;
+ grn_obj *items_freq2 = grn_obj_column(ctx, items, CONST_STR_LEN("freq2"));
+ grn_obj item_freq2, item_boost;
+ GRN_INT32_INIT(&item_freq2, 0);
+ GRN_INT32_INIT(&item_boost, 0);
+ if ((res = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) {
+ grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query));
+ double max_score;
+ max_score = cooccurrence_search(ctx, items, items_boost, tid, res, CORRECT,
+ frequency_threshold,
+ conditional_probability_threshold);
+ GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SCORE,
+ ":", "cooccur(%f)", max_score);
+ if (GRN_TEXT_LEN(query) &&
+ ((similar_search_mode == GRN_SUGGEST_SEARCH_YES) ||
+ (similar_search_mode == GRN_SUGGEST_SEARCH_AUTO &&
+ max_score < frequency_threshold))) {
+ grn_obj *key, *index;
+ if ((key = grn_obj_column(ctx, items,
+ GRN_COLUMN_NAME_KEY,
+ GRN_COLUMN_NAME_KEY_LEN))) {
+ if (grn_column_index(ctx, key, GRN_OP_MATCH, &index, 1, NULL)) {
+ grn_select_optarg optarg;
+ memset(&optarg, 0, sizeof(grn_select_optarg));
+ optarg.mode = GRN_OP_SIMILAR;
+ optarg.similarity_threshold = 0;
+ optarg.max_size = 2;
+ grn_ii_select(ctx, (grn_ii *)index, TEXT_VALUE_LEN(query),
+ (grn_hash *)res, GRN_OP_OR, &optarg);
+ grn_obj_unlink(ctx, index);
+ GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE,
+ ":", "similar(%d)", grn_table_size(ctx, res));
+ {
+ grn_hash_cursor *hc = grn_hash_cursor_open(ctx, (grn_hash *)res, NULL,
+ 0, NULL, 0, 0, -1, 0);
+ if (hc) {
+ while (grn_hash_cursor_next(ctx, hc)) {
+ void *key, *value;
+ if (grn_hash_cursor_get_key_value(ctx, hc, &key, NULL, &value)) {
+ grn_id *rp;
+ rp = key;
+ GRN_BULK_REWIND(&item_freq2);
+ GRN_BULK_REWIND(&item_boost);
+ grn_obj_get_value(ctx, items_freq2, *rp, &item_freq2);
+ grn_obj_get_value(ctx, items_boost, *rp, &item_boost);
+ if (GRN_INT32_VALUE(&item_boost) >= 0) {
+ double score;
+ grn_rset_recinfo *ri;
+ score = 1 +
+ (GRN_INT32_VALUE(&item_freq2) >> 4) +
+ GRN_INT32_VALUE(&item_boost);
+ ri = value;
+ ri->score += score;
+ if (score >= frequency_threshold) { continue; }
+ }
+ /* score < frequency_threshold || item_boost < 0 */
+ grn_hash_cursor_delete(ctx, hc, NULL);
+ }
+ }
+ grn_hash_cursor_close(ctx, hc);
+ }
+ }
+ GRN_QUERY_LOG(ctx, GRN_QUERY_LOG_SIZE,
+ ":", "filter(%d)", grn_table_size(ctx, res));
+ {
+ /* exec _score -= edit_distance(_key, "query string") for all records */
+ grn_obj *var;
+ grn_obj *expr;
+
+ GRN_EXPR_CREATE_FOR_QUERY(ctx, res, expr, var);
+ if (expr) {
+ grn_table_cursor *tc;
+ grn_obj *score = grn_obj_column(ctx, res,
+ GRN_COLUMN_NAME_SCORE,
+ GRN_COLUMN_NAME_SCORE_LEN);
+ grn_obj *key = grn_obj_column(ctx, res,
+ GRN_COLUMN_NAME_KEY,
+ GRN_COLUMN_NAME_KEY_LEN);
+ grn_expr_append_obj(ctx, expr,
+ score,
+ GRN_OP_GET_VALUE, 1);
+ grn_expr_append_obj(ctx, expr,
+ grn_ctx_get(ctx, CONST_STR_LEN("edit_distance")),
+ GRN_OP_PUSH, 1);
+ grn_expr_append_obj(ctx, expr,
+ key,
+ GRN_OP_GET_VALUE, 1);
+ grn_expr_append_const(ctx, expr, query, GRN_OP_PUSH, 1);
+ grn_expr_append_op(ctx, expr, GRN_OP_CALL, 2);
+ grn_expr_append_op(ctx, expr, GRN_OP_MINUS_ASSIGN, 2);
+
+ if ((tc = grn_table_cursor_open(ctx, res, NULL, 0, NULL, 0, 0, -1, 0))) {
+ grn_id id;
+ grn_obj score_value;
+ GRN_FLOAT_INIT(&score_value, 0);
+ while ((id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
+ GRN_RECORD_SET(ctx, var, id);
+ grn_expr_exec(ctx, expr, 0);
+ GRN_BULK_REWIND(&score_value);
+ grn_obj_get_value(ctx, score, id, &score_value);
+ if (GRN_FLOAT_VALUE(&score_value) < frequency_threshold) {
+ grn_table_cursor_delete(ctx, tc);
+ }
+ }
+ grn_obj_unlink(ctx, &score_value);
+ grn_table_cursor_close(ctx, tc);
+ }
+ grn_obj_unlink(ctx, score);
+ grn_obj_unlink(ctx, key);
+ grn_obj_unlink(ctx, expr);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR,
+ "error on building expr. for calicurating edit distance");
+ }
+ }
+ }
+ grn_obj_unlink(ctx, key);
+ }
+ }
+ output(ctx, items, res, tid, sortby, output_columns, offset, limit);
+ grn_obj_close(ctx, res);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot create temporary table.");
+ }
+ GRN_OBJ_FIN(ctx, &item_boost);
+ GRN_OBJ_FIN(ctx, &item_freq2);
+}
+
+static void
+suggest(grn_ctx *ctx, grn_obj *items, grn_obj *items_boost,
+ grn_obj *query, grn_obj *sortby,
+ grn_obj *output_columns, int offset, int limit,
+ int frequency_threshold, double conditional_probability_threshold)
+{
+ grn_obj *res;
+ if ((res = grn_table_create(ctx, NULL, 0, NULL,
+ GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, items, NULL))) {
+ grn_id tid = grn_table_get(ctx, items, TEXT_VALUE_LEN(query));
+ cooccurrence_search(ctx, items, items_boost, tid, res, SUGGEST,
+ frequency_threshold, conditional_probability_threshold);
+ output(ctx, items, res, tid, sortby, output_columns, offset, limit);
+ grn_obj_close(ctx, res);
+ } else {
+ ERR(GRN_UNKNOWN_ERROR, "cannot create temporary table.");
+ }
+}
+
+static grn_suggest_search_mode
+parse_search_mode(grn_ctx *ctx, grn_obj *mode_text)
+{
+ grn_suggest_search_mode mode;
+ int mode_length;
+
+ mode_length = GRN_TEXT_LEN(mode_text);
+ if (mode_length == 3 &&
+ grn_strncasecmp("yes", GRN_TEXT_VALUE(mode_text), 3) == 0) {
+ mode = GRN_SUGGEST_SEARCH_YES;
+ } else if (mode_length == 2 &&
+ grn_strncasecmp("no", GRN_TEXT_VALUE(mode_text), 2) == 0) {
+ mode = GRN_SUGGEST_SEARCH_NO;
+ } else {
+ mode = GRN_SUGGEST_SEARCH_AUTO;
+ }
+
+ return mode;
+}
+
+static grn_obj *
+command_suggest(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_obj *items, *col, *items_boost;
+ int types;
+ int offset = 0;
+ int limit = DEFAULT_LIMIT;
+ int frequency_threshold = DEFAULT_FREQUENCY_THRESHOLD;
+ double conditional_probability_threshold =
+ DEFAULT_CONDITIONAL_PROBABILITY_THRESHOLD;
+ grn_suggest_search_mode prefix_search_mode;
+ grn_suggest_search_mode similar_search_mode;
+
+ types = grn_parse_suggest_types(VAR(0));
+ if (GRN_TEXT_LEN(VAR(6)) > 0) {
+ offset = grn_atoi(GRN_TEXT_VALUE(VAR(6)), GRN_BULK_CURR(VAR(6)), NULL);
+ }
+ if (GRN_TEXT_LEN(VAR(7)) > 0) {
+ limit = grn_atoi(GRN_TEXT_VALUE(VAR(7)), GRN_BULK_CURR(VAR(7)), NULL);
+ }
+ if (GRN_TEXT_LEN(VAR(8)) > 0) {
+ frequency_threshold = grn_atoi(GRN_TEXT_VALUE(VAR(8)), GRN_BULK_CURR(VAR(8)), NULL);
+ }
+ if (GRN_TEXT_LEN(VAR(9)) > 0) {
+ GRN_TEXT_PUTC(ctx, VAR(9), '\0');
+ conditional_probability_threshold = strtod(GRN_TEXT_VALUE(VAR(9)), NULL);
+ }
+
+ prefix_search_mode = parse_search_mode(ctx, VAR(10));
+ similar_search_mode = parse_search_mode(ctx, VAR(11));
+
+ if ((items = grn_ctx_get(ctx, TEXT_VALUE_LEN(VAR(1))))) {
+ if ((items_boost = grn_obj_column(ctx, items, CONST_STR_LEN("boost")))) {
+ int n_outputs = 0;
+ if (types & COMPLETE) {
+ n_outputs++;
+ }
+ if (types & CORRECT) {
+ n_outputs++;
+ }
+ if (types & SUGGEST) {
+ n_outputs++;
+ }
+ GRN_OUTPUT_MAP_OPEN("RESULT_SET", n_outputs);
+
+ if (types & COMPLETE) {
+ if ((col = grn_obj_column(ctx, items, TEXT_VALUE_LEN(VAR(2))))) {
+ GRN_OUTPUT_CSTR("complete");
+ complete(ctx, items, items_boost, col, VAR(3), VAR(4),
+ VAR(5), offset, limit,
+ frequency_threshold, conditional_probability_threshold,
+ prefix_search_mode);
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "invalid column.");
+ }
+ }
+ if (types & CORRECT) {
+ GRN_OUTPUT_CSTR("correct");
+ correct(ctx, items, items_boost, VAR(3), VAR(4),
+ VAR(5), offset, limit,
+ frequency_threshold, conditional_probability_threshold,
+ similar_search_mode);
+ }
+ if (types & SUGGEST) {
+ GRN_OUTPUT_CSTR("suggest");
+ suggest(ctx, items, items_boost, VAR(3), VAR(4),
+ VAR(5), offset, limit,
+ frequency_threshold, conditional_probability_threshold);
+ }
+ GRN_OUTPUT_MAP_CLOSE();
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s.boost>",
+ (int)GRN_TEXT_LEN(VAR(1)), GRN_TEXT_VALUE(VAR(1)));
+ }
+ grn_obj_unlink(ctx, items);
+ } else {
+ ERR(GRN_INVALID_ARGUMENT, "nonexistent table: <%.*s>",
+ (int)GRN_TEXT_LEN(VAR(1)), GRN_TEXT_VALUE(VAR(1)));
+ }
+ return NULL;
+}
+
+static void
+learner_init_values(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ learner->post_event_id = GRN_RECORD_VALUE(learner->post_event);
+ learner->post_type_id = GRN_RECORD_VALUE(learner->post_type);
+ learner->post_item_id = GRN_RECORD_VALUE(learner->post_item);
+ learner->seq_id = GRN_RECORD_VALUE(learner->seq);
+ learner->post_time_value = GRN_TIME_VALUE(learner->post_time);
+}
+
+static void
+learner_init(grn_ctx *ctx, grn_suggest_learner *learner,
+ grn_obj *post_event, grn_obj *post_type, grn_obj *post_item,
+ grn_obj *seq, grn_obj *post_time, grn_obj *pairs)
+{
+ learner->post_event = post_event;
+ learner->post_type = post_type;
+ learner->post_item = post_item;
+ learner->seq = seq;
+ learner->post_time = post_time;
+ learner->pairs = pairs;
+
+ learner->learn_distance_in_seconds = 0;
+
+ learner_init_values(ctx, learner);
+}
+
+static void
+learner_init_columns(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_id events_id, event_types_id;
+ grn_obj *seqs, *events, *post_item, *items, *pairs;
+
+ learner->seqs = seqs = grn_ctx_at(ctx, GRN_OBJ_GET_DOMAIN(learner->seq));
+ learner->seqs_events = grn_obj_column(ctx, seqs, CONST_STR_LEN("events"));
+
+ events_id = grn_obj_get_range(ctx, learner->seqs_events);
+ learner->events = events = grn_ctx_at(ctx, events_id);
+ learner->events_item = grn_obj_column(ctx, events, CONST_STR_LEN("item"));
+ learner->events_type = grn_obj_column(ctx, events, CONST_STR_LEN("type"));
+ learner->events_time = grn_obj_column(ctx, events, CONST_STR_LEN("time"));
+
+ event_types_id = grn_obj_get_range(ctx, learner->events_type);
+ learner->event_types = grn_obj_column(ctx, events, CONST_STR_LEN("time"));
+
+ post_item = learner->post_item;
+ learner->items = items = grn_ctx_at(ctx, GRN_OBJ_GET_DOMAIN(post_item));
+ learner->items_freq = grn_obj_column(ctx, items, CONST_STR_LEN("freq"));
+ learner->items_freq2 = grn_obj_column(ctx, items, CONST_STR_LEN("freq2"));
+ learner->items_last = grn_obj_column(ctx, items, CONST_STR_LEN("last"));
+
+ pairs = learner->pairs;
+ learner->pairs_pre = grn_obj_column(ctx, pairs, CONST_STR_LEN("pre"));
+ learner->pairs_post = grn_obj_column(ctx, pairs, CONST_STR_LEN("post"));
+ learner->pairs_freq0 = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq0"));
+ learner->pairs_freq1 = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq1"));
+ learner->pairs_freq2 = grn_obj_column(ctx, pairs, CONST_STR_LEN("freq2"));
+}
+
+static void
+learner_fin_columns(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj_unlink(ctx, learner->seqs);
+ grn_obj_unlink(ctx, learner->seqs_events);
+
+ grn_obj_unlink(ctx, learner->events);
+ grn_obj_unlink(ctx, learner->events_item);
+ grn_obj_unlink(ctx, learner->events_type);
+ grn_obj_unlink(ctx, learner->events_time);
+
+ grn_obj_unlink(ctx, learner->event_types);
+
+ grn_obj_unlink(ctx, learner->items);
+ grn_obj_unlink(ctx, learner->items_freq);
+ grn_obj_unlink(ctx, learner->items_freq2);
+ grn_obj_unlink(ctx, learner->items_last);
+
+ grn_obj_unlink(ctx, learner->pairs_pre);
+ grn_obj_unlink(ctx, learner->pairs_post);
+ grn_obj_unlink(ctx, learner->pairs_freq0);
+ grn_obj_unlink(ctx, learner->pairs_freq1);
+ grn_obj_unlink(ctx, learner->pairs_freq2);
+}
+
+static void
+learner_init_weight(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj *weight_column = NULL;
+ unsigned int weight = 1;
+
+ if (learner->configuration) {
+ weight_column = grn_obj_column(ctx,
+ learner->configuration,
+ CONST_STR_LEN("weight"));
+ }
+ if (weight_column) {
+ grn_id id;
+ id = grn_table_get(ctx, learner->configuration,
+ GRN_TEXT_VALUE(&(learner->dataset_name)),
+ GRN_TEXT_LEN(&(learner->dataset_name)));
+ if (id != GRN_ID_NIL) {
+ grn_obj weight_value;
+ GRN_UINT32_INIT(&weight_value, 0);
+ grn_obj_get_value(ctx, weight_column, id, &weight_value);
+ weight = GRN_UINT32_VALUE(&weight_value);
+ GRN_OBJ_FIN(ctx, &weight_value);
+ }
+ grn_obj_unlink(ctx, weight_column);
+ }
+
+ GRN_UINT32_INIT(&(learner->weight), 0);
+ GRN_UINT32_SET(ctx, &(learner->weight), weight);
+}
+
+static void
+learner_init_dataset_name(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ char events_name[GRN_TABLE_MAX_KEY_SIZE];
+ unsigned int events_name_size;
+ unsigned int events_name_prefix_size;
+
+ events_name_size = grn_obj_name(ctx, learner->events,
+ events_name, GRN_TABLE_MAX_KEY_SIZE);
+ GRN_TEXT_INIT(&(learner->dataset_name), 0);
+ events_name_prefix_size = strlen("event_");
+ if (events_name_size > events_name_prefix_size) {
+ GRN_TEXT_PUT(ctx,
+ &(learner->dataset_name),
+ events_name + events_name_prefix_size,
+ events_name_size - events_name_prefix_size);
+ }
+}
+
+static void
+learner_fin_dataset_name(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ GRN_OBJ_FIN(ctx, &(learner->dataset_name));
+}
+
+static void
+learner_init_configuration(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ learner->configuration = grn_ctx_get(ctx, "configuration", -1);
+}
+
+static void
+learner_fin_configuration(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ if (learner->configuration) {
+ grn_obj_unlink(ctx, learner->configuration);
+ }
+}
+
+static void
+learner_init_buffers(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ learner_init_weight(ctx, learner);
+ GRN_RECORD_INIT(&(learner->pre_events), 0, grn_obj_id(ctx, learner->events));
+}
+
+static void
+learner_fin_buffers(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj_unlink(ctx, &(learner->weight));
+ grn_obj_unlink(ctx, &(learner->pre_events));
+}
+
+static void
+learner_init_submit_learn(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_id items_id;
+
+ learner->key_prefix = ((uint64_t)learner->post_item_id) << 32;
+
+ items_id = grn_obj_get_range(ctx, learner->events_item);
+ GRN_RECORD_INIT(&(learner->pre_item), 0, items_id);
+
+ grn_obj_get_value(ctx, learner->seqs_events, learner->seq_id,
+ &(learner->pre_events));
+}
+
+static void
+learner_fin_submit_learn(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj_unlink(ctx, &(learner->pre_item));
+ GRN_BULK_REWIND(&(learner->pre_events));
+}
+
+static grn_bool
+learner_is_valid_input(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ return learner->post_event_id && learner->post_item_id && learner->seq_id;
+}
+
+static void
+learner_increment(grn_ctx *ctx, grn_suggest_learner *learner,
+ grn_obj *column, grn_id record_id)
+{
+ grn_obj_set_value(ctx, column, record_id, &(learner->weight), GRN_OBJ_INCR);
+}
+
+static void
+learner_increment_item_freq(grn_ctx *ctx, grn_suggest_learner *learner,
+ grn_obj *column)
+{
+ learner_increment(ctx, learner, column, learner->post_item_id);
+}
+
+static void
+learner_set_last_post_time(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ grn_obj_set_value(ctx, learner->items_last, learner->post_item_id,
+ learner->post_time, GRN_OBJ_SET);
+}
+
+static void
+learner_learn_for_complete_and_correcnt(grn_ctx *ctx,
+ grn_suggest_learner *learner)
+{
+ grn_obj *pre_item, *post_item, *pre_events;
+ grn_obj pre_type, pre_time;
+ grn_id *ep, *es;
+ uint64_t key;
+ int64_t post_time_value;
+
+ pre_item = &(learner->pre_item);
+ post_item = learner->post_item;
+ pre_events = &(learner->pre_events);
+ post_time_value = learner->post_time_value;
+ GRN_RECORD_INIT(&pre_type, 0, grn_obj_get_range(ctx, learner->events_type));
+ GRN_TIME_INIT(&pre_time, 0);
+ ep = (grn_id *)GRN_BULK_CURR(pre_events);
+ es = (grn_id *)GRN_BULK_HEAD(pre_events);
+ while (es < ep--) {
+ grn_id pair_id;
+ int added;
+ int64_t learn_distance;
+
+ GRN_BULK_REWIND(&pre_type);
+ GRN_BULK_REWIND(&pre_time);
+ GRN_BULK_REWIND(pre_item);
+ grn_obj_get_value(ctx, learner->events_type, *ep, &pre_type);
+ grn_obj_get_value(ctx, learner->events_time, *ep, &pre_time);
+ grn_obj_get_value(ctx, learner->events_item, *ep, pre_item);
+ learn_distance = post_time_value - GRN_TIME_VALUE(&pre_time);
+ if (learn_distance >= MIN_LEARN_DISTANCE) {
+ learner->learn_distance_in_seconds =
+ (int)(learn_distance / GRN_TIME_USEC_PER_SEC);
+ break;
+ }
+ key = learner->key_prefix + GRN_RECORD_VALUE(pre_item);
+ pair_id = grn_table_add(ctx, learner->pairs, &key, sizeof(uint64_t),
+ &added);
+ if (added) {
+ grn_obj_set_value(ctx, learner->pairs_pre, pair_id, pre_item,
+ GRN_OBJ_SET);
+ grn_obj_set_value(ctx, learner->pairs_post, pair_id, post_item,
+ GRN_OBJ_SET);
+ }
+ if (GRN_RECORD_VALUE(&pre_type)) {
+ learner_increment(ctx, learner, learner->pairs_freq1, pair_id);
+ break;
+ } else {
+ learner_increment(ctx, learner, learner->pairs_freq0, pair_id);
+ }
+ }
+ GRN_OBJ_FIN(ctx, &pre_type);
+ GRN_OBJ_FIN(ctx, &pre_time);
+}
+
+static void
+learner_learn_for_suggest(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ char keybuf[GRN_TABLE_MAX_KEY_SIZE];
+ int keylen = grn_table_get_key(ctx, learner->items, learner->post_item_id,
+ keybuf, GRN_TABLE_MAX_KEY_SIZE);
+ unsigned int token_flags = 0;
+ grn_token_cursor *token_cursor =
+ grn_token_cursor_open(ctx, learner->items, keybuf, keylen,
+ GRN_TOKEN_ADD, token_flags);
+ if (token_cursor) {
+ grn_id tid;
+ grn_obj *pre_item = &(learner->pre_item);
+ grn_obj *post_item = learner->post_item;
+ grn_hash *token_ids = NULL;
+ while ((tid = grn_token_cursor_next(ctx, token_cursor)) && tid != learner->post_item_id) {
+ uint64_t key;
+ int added;
+ grn_id pair_id;
+ key = learner->key_prefix + tid;
+ pair_id = grn_table_add(ctx, learner->pairs, &key, sizeof(uint64_t),
+ &added);
+ if (added) {
+ GRN_RECORD_SET(ctx, pre_item, tid);
+ grn_obj_set_value(ctx, learner->pairs_pre, pair_id,
+ pre_item, GRN_OBJ_SET);
+ grn_obj_set_value(ctx, learner->pairs_post, pair_id,
+ post_item, GRN_OBJ_SET);
+ }
+ if (!token_ids) {
+ token_ids = grn_hash_create(ctx, NULL, sizeof(grn_id), 0,
+ GRN_OBJ_TABLE_HASH_KEY|GRN_HASH_TINY);
+ }
+ if (token_ids) {
+ int token_added;
+ grn_hash_add(ctx, token_ids, &tid, sizeof(grn_id), NULL, &token_added);
+ if (token_added) {
+ learner_increment(ctx, learner, learner->pairs_freq2, pair_id);
+ }
+ }
+ }
+ if (token_ids) {
+ grn_hash_close(ctx, token_ids);
+ }
+ grn_token_cursor_close(ctx, token_cursor);
+ }
+}
+
+static void
+learner_append_post_event(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ GRN_RECORD_SET(ctx, &(learner->pre_events), learner->post_event_id);
+ grn_obj_set_value(ctx, learner->seqs_events, learner->seq_id,
+ &(learner->pre_events), GRN_OBJ_APPEND);
+}
+
+static void
+learner_learn(grn_ctx *ctx, grn_suggest_learner *learner)
+{
+ if (learner_is_valid_input(ctx, learner)) {
+ learner_init_columns(ctx, learner);
+ learner_init_dataset_name(ctx, learner);
+ learner_init_configuration(ctx, learner);
+ learner_init_buffers(ctx, learner);
+ learner_increment_item_freq(ctx, learner, learner->items_freq);
+ learner_set_last_post_time(ctx, learner);
+ if (learner->post_type_id) {
+ learner_init_submit_learn(ctx, learner);
+ learner_increment_item_freq(ctx, learner, learner->items_freq2);
+ learner_learn_for_complete_and_correcnt(ctx, learner);
+ learner_learn_for_suggest(ctx, learner);
+ learner_fin_submit_learn(ctx, learner);
+ }
+ learner_append_post_event(ctx, learner);
+ learner_fin_buffers(ctx, learner);
+ learner_fin_configuration(ctx, learner);
+ learner_fin_dataset_name(ctx, learner);
+ learner_fin_columns(ctx, learner);
+ }
+}
+
+static grn_obj *
+func_suggest_preparer(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ int learn_distance_in_seconds = 0;
+ grn_obj *obj;
+ if (nargs == 6) {
+ grn_obj *post_event = args[0];
+ grn_obj *post_type = args[1];
+ grn_obj *post_item = args[2];
+ grn_obj *seq = args[3];
+ grn_obj *post_time = args[4];
+ grn_obj *pairs = args[5];
+ grn_suggest_learner learner;
+ learner_init(ctx, &learner,
+ post_event, post_type, post_item, seq, post_time, pairs);
+ learner_learn(ctx, &learner);
+ learn_distance_in_seconds = learner.learn_distance_in_seconds;
+ }
+ if ((obj = GRN_PROC_ALLOC(GRN_DB_UINT32, 0))) {
+ GRN_UINT32_SET(ctx, obj, learn_distance_in_seconds);
+ }
+ return obj;
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_expr_var vars[12];
+
+ grn_plugin_expr_var_init(ctx, &vars[0], "types", -1);
+ grn_plugin_expr_var_init(ctx, &vars[1], "table", -1);
+ grn_plugin_expr_var_init(ctx, &vars[2], "column", -1);
+ grn_plugin_expr_var_init(ctx, &vars[3], "query", -1);
+ grn_plugin_expr_var_init(ctx, &vars[4], "sortby", -1);
+ grn_plugin_expr_var_init(ctx, &vars[5], "output_columns", -1);
+ grn_plugin_expr_var_init(ctx, &vars[6], "offset", -1);
+ grn_plugin_expr_var_init(ctx, &vars[7], "limit", -1);
+ grn_plugin_expr_var_init(ctx, &vars[8], "frequency_threshold", -1);
+ grn_plugin_expr_var_init(ctx, &vars[9], "conditional_probability_threshold", -1);
+ grn_plugin_expr_var_init(ctx, &vars[10], "prefix_search", -1);
+ grn_plugin_expr_var_init(ctx, &vars[11], "similar_search", -1);
+ grn_plugin_command_create(ctx, "suggest", -1, command_suggest, 12, vars);
+
+ grn_proc_create(ctx, CONST_STR_LEN("suggest_preparer"), GRN_PROC_FUNCTION,
+ func_suggest_preparer, NULL, NULL, 0, NULL);
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/token_filters/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/token_filters/CMakeLists.txt
new file mode 100644
index 00000000..4aa7d09b
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/token_filters/CMakeLists.txt
@@ -0,0 +1,63 @@
+# Copyright(C) 2014 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ )
+
+set(TOKEN_FILTERS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/token_filters")
+
+read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/stop_word_sources.am
+ STOP_WORD_SOURCES)
+set_source_files_properties(${STOP_WORD_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+if(GRN_EMBED)
+ add_library(stop_word_token_filter STATIC ${STOP_WORD_SOURCES})
+ set_target_properties(
+ stop_word_token_filter
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+else()
+ add_library(stop_word_token_filter MODULE ${STOP_WORD_SOURCES})
+ set_target_properties(stop_word_token_filter PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "stop_word")
+ install(TARGETS stop_word_token_filter DESTINATION "${TOKEN_FILTERS_DIR}")
+endif()
+target_link_libraries(stop_word_token_filter libgroonga)
+
+if(GRN_WITH_LIBSTEMMER)
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/stem_sources.am STEM_SOURCES)
+ include_directories(${LIBSTEMMER_INCLUDE_DIRS})
+ link_directories(${LIBSTEMMER_LIBRARY_DIRS})
+ set_source_files_properties(${STEM_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+ if(GRN_EMBED)
+ add_library(stem_token_filter STATIC ${STEM_SOURCES})
+ set_target_properties(
+ stem_token_filter
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+ else()
+ add_library(stem_token_filter MODULE ${STEM_SOURCES})
+ set_target_properties(stem_token_filter PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "stem")
+ install(TARGETS stem_token_filter DESTINATION "${TOKEN_FILTERS_DIR}")
+ endif()
+ target_link_libraries(stem_token_filter libgroonga ${LIBSTEMMER_LIBRARIES})
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/token_filters/Makefile.am b/storage/mroonga/vendor/groonga/plugins/token_filters/Makefile.am
new file mode 100644
index 00000000..c63bef7a
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/token_filters/Makefile.am
@@ -0,0 +1,28 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la
+
+token_filter_plugins_LTLIBRARIES =
+token_filter_plugins_LTLIBRARIES += stop_word.la
+if WITH_LIBSTEMMER
+token_filter_plugins_LTLIBRARIES += stem.la
+endif
+
+include stop_word_sources.am
+
+include stem_sources.am
+stem_la_CPPFLAGS = $(AM_CPPFLAGS) $(LIBSTEMMER_CFLAGS)
+stem_la_LIBADD = $(LIBS) $(LIBSTEMMER_LIBS)
+stem_la_LDFLAGS = $(AM_LDFLAGS) $(LIBSTEMMER_LDFLAGS)
diff --git a/storage/mroonga/vendor/groonga/plugins/token_filters/stem.c b/storage/mroonga/vendor/groonga/plugins/token_filters/stem.c
new file mode 100644
index 00000000..2144eb09
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/token_filters/stem.c
@@ -0,0 +1,279 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2014 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG token_filters_stem
+#endif
+
+#include <grn_str.h>
+
+#include <groonga.h>
+#include <groonga/token_filter.h>
+
+#include <ctype.h>
+#include <string.h>
+
+#include <libstemmer.h>
+
+typedef struct {
+ struct sb_stemmer *stemmer;
+ grn_tokenizer_token token;
+ grn_obj buffer;
+} grn_stem_token_filter;
+
+static void *
+stem_init(grn_ctx *ctx, grn_obj *table, grn_token_mode mode)
+{
+ grn_stem_token_filter *token_filter;
+
+ token_filter = GRN_PLUGIN_MALLOC(ctx, sizeof(grn_stem_token_filter));
+ if (!token_filter) {
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[token-filter][stem] "
+ "failed to allocate grn_stem_token_filter");
+ return NULL;
+ }
+
+ {
+ /* TODO: Support other languages. */
+ const char *algorithm = "english";
+ const char *encoding = "UTF_8";
+ token_filter->stemmer = sb_stemmer_new(algorithm, encoding);
+ if (!token_filter->stemmer) {
+ GRN_PLUGIN_FREE(ctx, token_filter);
+ GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
+ "[token-filter][stem] "
+ "failed to create stemmer: "
+ "algorithm=<%s>, encoding=<%s>",
+ algorithm, encoding);
+ return NULL;
+ }
+ }
+ grn_tokenizer_token_init(ctx, &(token_filter->token));
+ GRN_TEXT_INIT(&(token_filter->buffer), 0);
+
+ return token_filter;
+}
+
+static grn_bool
+is_stemmable(grn_obj *data, grn_bool *is_all_upper)
+{
+ const char *current, *end;
+ grn_bool have_lower = GRN_FALSE;
+ grn_bool have_upper = GRN_FALSE;
+
+ *is_all_upper = GRN_FALSE;
+
+ switch (data->header.domain) {
+ case GRN_DB_SHORT_TEXT :
+ case GRN_DB_TEXT :
+ case GRN_DB_LONG_TEXT :
+ break;
+ default :
+ return GRN_FALSE;
+ }
+
+ current = GRN_TEXT_VALUE(data);
+ end = current + GRN_TEXT_LEN(data);
+
+ for (; current < end; current++) {
+ if (islower((unsigned char)*current)) {
+ have_lower = GRN_TRUE;
+ continue;
+ }
+ if (isupper((unsigned char)*current)) {
+ have_upper = GRN_TRUE;
+ continue;
+ }
+ if (isdigit((unsigned char)*current)) {
+ continue;
+ }
+ switch (*current) {
+ case '-' :
+ case '\'' :
+ break;
+ default :
+ return GRN_FALSE;
+ }
+ }
+
+ if (!have_lower && have_upper) {
+ *is_all_upper = GRN_TRUE;
+ }
+
+ return GRN_TRUE;
+}
+
+static void
+normalize(grn_ctx *ctx,
+ const char *string, unsigned int length,
+ grn_obj *normalized)
+{
+ const char *current, *end;
+ const char *unwritten;
+
+ current = unwritten = string;
+ end = current + length;
+
+ for (; current < end; current++) {
+ if (isupper((unsigned char)*current)) {
+ if (current > unwritten) {
+ GRN_TEXT_PUT(ctx, normalized, unwritten, current - unwritten);
+ }
+ GRN_TEXT_PUTC(ctx, normalized, tolower((unsigned char)*current));
+ unwritten = current + 1;
+ }
+ }
+
+ if (current != unwritten) {
+ GRN_TEXT_PUT(ctx, normalized, unwritten, current - unwritten);
+ }
+}
+
+static void
+unnormalize(grn_ctx *ctx,
+ const char *string, unsigned int length,
+ grn_obj *normalized)
+{
+ const char *current, *end;
+ const char *unwritten;
+
+ current = unwritten = string;
+ end = current + length;
+
+ for (; current < end; current++) {
+ if (islower((unsigned char)*current)) {
+ if (current > unwritten) {
+ GRN_TEXT_PUT(ctx, normalized, unwritten, current - unwritten);
+ }
+ GRN_TEXT_PUTC(ctx, normalized, toupper((unsigned char)*current));
+ unwritten = current + 1;
+ }
+ }
+
+ if (current != unwritten) {
+ GRN_TEXT_PUT(ctx, normalized, unwritten, current - unwritten);
+ }
+}
+
+static void
+stem_filter(grn_ctx *ctx,
+ grn_token *current_token,
+ grn_token *next_token,
+ void *user_data)
+{
+ grn_stem_token_filter *token_filter = user_data;
+ grn_obj *data;
+ grn_bool is_all_upper = GRN_FALSE;
+
+ if (GRN_CTX_GET_ENCODING(ctx) != GRN_ENC_UTF8) {
+ return;
+ }
+
+ data = grn_token_get_data(ctx, current_token);
+ if (!is_stemmable(data, &is_all_upper)) {
+ return;
+ }
+
+ {
+ const sb_symbol *stemmed;
+
+ if (is_all_upper) {
+ grn_obj *buffer;
+ buffer = &(token_filter->buffer);
+ GRN_BULK_REWIND(buffer);
+ normalize(ctx,
+ GRN_TEXT_VALUE(data),
+ GRN_TEXT_LEN(data),
+ buffer);
+ stemmed = sb_stemmer_stem(token_filter->stemmer,
+ GRN_TEXT_VALUE(buffer), GRN_TEXT_LEN(buffer));
+ if (stemmed) {
+ GRN_BULK_REWIND(buffer);
+ unnormalize(ctx,
+ stemmed,
+ sb_stemmer_length(token_filter->stemmer),
+ buffer);
+ grn_token_set_data(ctx, next_token,
+ GRN_TEXT_VALUE(buffer), GRN_TEXT_LEN(buffer));
+ } else {
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[token-filter][stem] "
+ "failed to allocate memory for stemmed word: <%.*s> "
+ "(normalized: <%.*s>)",
+ (int)GRN_TEXT_LEN(data), GRN_TEXT_VALUE(data),
+ (int)GRN_TEXT_LEN(buffer), GRN_TEXT_VALUE(buffer));
+ }
+ } else {
+ stemmed = sb_stemmer_stem(token_filter->stemmer,
+ GRN_TEXT_VALUE(data), GRN_TEXT_LEN(data));
+ if (stemmed) {
+ grn_token_set_data(ctx, next_token,
+ stemmed,
+ sb_stemmer_length(token_filter->stemmer));
+ } else {
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[token-filter][stem] "
+ "failed to allocate memory for stemmed word: <%.*s>",
+ (int)GRN_TEXT_LEN(data), GRN_TEXT_VALUE(data));
+ }
+ }
+ }
+}
+
+static void
+stem_fin(grn_ctx *ctx, void *user_data)
+{
+ grn_stem_token_filter *token_filter = user_data;
+ if (!token_filter) {
+ return;
+ }
+
+ grn_tokenizer_token_fin(ctx, &(token_filter->token));
+ if (token_filter->stemmer) {
+ sb_stemmer_delete(token_filter->stemmer);
+ }
+ GRN_OBJ_FIN(ctx, &(token_filter->buffer));
+ GRN_PLUGIN_FREE(ctx, token_filter);
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_rc rc;
+
+ rc = grn_token_filter_register(ctx,
+ "TokenFilterStem", -1,
+ stem_init,
+ stem_filter,
+ stem_fin);
+
+ return rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/token_filters/stem_sources.am b/storage/mroonga/vendor/groonga/plugins/token_filters/stem_sources.am
new file mode 100644
index 00000000..d02a3952
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/token_filters/stem_sources.am
@@ -0,0 +1,2 @@
+stem_la_SOURCES = \
+ stem.c
diff --git a/storage/mroonga/vendor/groonga/plugins/token_filters/stop_word.c b/storage/mroonga/vendor/groonga/plugins/token_filters/stop_word.c
new file mode 100644
index 00000000..a06d772f
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/token_filters/stop_word.c
@@ -0,0 +1,159 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2014 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG token_filters_stop_word
+#endif
+
+#include <grn_str.h>
+
+#include <groonga.h>
+#include <groonga/token_filter.h>
+
+#include <string.h>
+
+#define COLUMN_NAME "is_stop_word"
+
+typedef struct {
+ grn_obj *table;
+ grn_token_mode mode;
+ grn_obj *column;
+ grn_obj value;
+ grn_tokenizer_token token;
+} grn_stop_word_token_filter;
+
+static void *
+stop_word_init(grn_ctx *ctx, grn_obj *table, grn_token_mode mode)
+{
+ grn_stop_word_token_filter *token_filter;
+
+ if (mode != GRN_TOKEN_GET) {
+ return NULL;
+ }
+
+ token_filter = GRN_PLUGIN_MALLOC(ctx, sizeof(grn_stop_word_token_filter));
+ if (!token_filter) {
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[token-filter][stop-word] "
+ "failed to allocate grn_stop_word_token_filter");
+ return NULL;
+ }
+
+ token_filter->table = table;
+ token_filter->mode = mode;
+ token_filter->column = grn_obj_column(ctx,
+ token_filter->table,
+ COLUMN_NAME,
+ strlen(COLUMN_NAME));
+ if (!token_filter->column) {
+ char table_name[GRN_TABLE_MAX_KEY_SIZE];
+ unsigned int table_name_size;
+
+ table_name_size = grn_obj_name(ctx,
+ token_filter->table,
+ table_name,
+ GRN_TABLE_MAX_KEY_SIZE);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKEN_FILTER_ERROR,
+ "[token-filter][stop-word] "
+ "column for judging stop word doesn't exit: <%.*s.%s>",
+ table_name_size,
+ table_name,
+ COLUMN_NAME);
+ GRN_PLUGIN_FREE(ctx, token_filter);
+ return NULL;
+ }
+
+ GRN_BOOL_INIT(&(token_filter->value), 0);
+ grn_tokenizer_token_init(ctx, &(token_filter->token));
+
+ return token_filter;
+}
+
+static void
+stop_word_filter(grn_ctx *ctx,
+ grn_token *current_token,
+ grn_token *next_token,
+ void *user_data)
+{
+ grn_stop_word_token_filter *token_filter = user_data;
+ grn_id id;
+ grn_obj *data;
+
+ if (!token_filter) {
+ return;
+ }
+
+ data = grn_token_get_data(ctx, current_token);
+ id = grn_table_get(ctx,
+ token_filter->table,
+ GRN_TEXT_VALUE(data),
+ GRN_TEXT_LEN(data));
+ if (id != GRN_ID_NIL) {
+ GRN_BULK_REWIND(&(token_filter->value));
+ grn_obj_get_value(ctx,
+ token_filter->column,
+ id,
+ &(token_filter->value));
+ if (GRN_BOOL_VALUE(&(token_filter->value))) {
+ grn_tokenizer_status status;
+ status = grn_token_get_status(ctx, current_token);
+ status |= GRN_TOKEN_SKIP;
+ grn_token_set_status(ctx, next_token, status);
+ }
+ }
+}
+
+static void
+stop_word_fin(grn_ctx *ctx, void *user_data)
+{
+ grn_stop_word_token_filter *token_filter = user_data;
+ if (!token_filter) {
+ return;
+ }
+
+ grn_tokenizer_token_fin(ctx, &(token_filter->token));
+ grn_obj_unlink(ctx, token_filter->column);
+ grn_obj_unlink(ctx, &(token_filter->value));
+ GRN_PLUGIN_FREE(ctx, token_filter);
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_rc rc;
+
+ rc = grn_token_filter_register(ctx,
+ "TokenFilterStopWord", -1,
+ stop_word_init,
+ stop_word_filter,
+ stop_word_fin);
+
+ return rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/token_filters/stop_word_sources.am b/storage/mroonga/vendor/groonga/plugins/token_filters/stop_word_sources.am
new file mode 100644
index 00000000..bab89551
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/token_filters/stop_word_sources.am
@@ -0,0 +1,2 @@
+stop_word_la_SOURCES = \
+ stop_word.c
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt b/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt
new file mode 100644
index 00000000..26aadc4e
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/CMakeLists.txt
@@ -0,0 +1,76 @@
+# Copyright(C) 2012-2013 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/../../lib
+ )
+
+set(TOKENIZERS_DIR "${GRN_RELATIVE_PLUGINS_DIR}/tokenizers")
+if(GRN_WITH_MECAB)
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/mecab_sources.am MECAB_SOURCES)
+ include_directories(${MECAB_INCLUDE_DIRS})
+ link_directories(${MECAB_LIBRARY_DIRS})
+ if(GRN_WITH_BUNDLED_MECAB)
+ set(GRN_BUNDLED_MECAB_RELATIVE_RC_PATH "${CONFIG_DIR}/mecabrc")
+ set(MECAB_COMPILE_DEFINITIONS
+ "GRN_WITH_BUNDLED_MECAB"
+ "GRN_BUNDLED_MECAB_RELATIVE_RC_PATH=\"${GRN_BUNDLED_MECAB_RELATIVE_RC_PATH}\""
+ "GRN_BUNDLED_MECAB_RC_PATH=\"${CMAKE_INSTALL_PREFIX}/${GRN_BUNDLED_MECAB_RELATIVE_RC_PATH}\"")
+ set_source_files_properties(${MECAB_SOURCES}
+ PROPERTIES
+ COMPILE_DEFINITIONS
+ "${MECAB_COMPILE_DEFINITIONS}")
+ endif()
+ set_source_files_properties(${MECAB_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_C_COMPILE_FLAGS}")
+ if(GRN_EMBED)
+ add_library(mecab_tokenizer STATIC ${MECAB_SOURCES})
+ set_target_properties(
+ mecab_tokenizer
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+ else()
+ add_library(mecab_tokenizer MODULE ${MECAB_SOURCES})
+ set_target_properties(mecab_tokenizer PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "mecab")
+ install(TARGETS mecab_tokenizer DESTINATION "${TOKENIZERS_DIR}")
+ endif()
+ target_link_libraries(mecab_tokenizer libgroonga ${MECAB_LIBRARIES})
+endif()
+
+if(GRN_WITH_KYTEA)
+ read_file_list(${CMAKE_CURRENT_SOURCE_DIR}/kytea_sources.am KYTEA_SOURCES)
+ include_directories(${KYTEA_INCLUDE_DIRS})
+ link_directories(${KYTEA_LIBRARY_DIRS})
+ set_source_files_properties(${KYTEA_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS "${GRN_CXX_COMPILE_FLAGS}")
+ if(GRN_EMBED)
+ add_library(kytea_tokenizer STATIC ${KYTEA_SOURCES})
+ set_target_properties(
+ kytea_tokenizer
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON)
+ else()
+ add_library(kytea_tokenizer MODULE ${KYTEA_SOURCES})
+ set_target_properties(kytea_tokenizer PROPERTIES
+ PREFIX ""
+ OUTPUT_NAME "kytea")
+ install(TARGETS kytea_tokenizer DESTINATION "${TOKENIZERS_DIR}")
+ endif()
+ target_link_libraries(kytea_tokenizer libgroonga ${KYTEA_LIBRARIES})
+endif()
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am b/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am
new file mode 100644
index 00000000..9e10612b
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/Makefile.am
@@ -0,0 +1,33 @@
+EXTRA_DIST = \
+ CMakeLists.txt
+
+AM_CPPFLAGS = \
+ -I$(top_builddir) \
+ -I$(top_srcdir)/include \
+ -I$(top_srcdir)/lib
+
+AM_LDFLAGS = \
+ -avoid-version \
+ -module \
+ -no-undefined
+
+LIBS = \
+ $(top_builddir)/lib/libgroonga.la
+
+tokenizer_plugins_LTLIBRARIES =
+if WITH_MECAB
+tokenizer_plugins_LTLIBRARIES += mecab.la
+endif
+if WITH_KYTEA
+tokenizer_plugins_LTLIBRARIES += kytea.la
+endif
+
+include mecab_sources.am
+mecab_la_CPPFLAGS = $(AM_CPPFLAGS) $(MECAB_CPPFLAGS)
+mecab_la_LIBADD = $(LIBS) $(MECAB_LIBS)
+mecab_la_LDFLAGS = $(AM_LDFLAGS) $(MECAB_LDFLAGS)
+
+include kytea_sources.am
+kytea_la_CPPFLAGS = $(AM_CPPFLAGS) $(KYTEA_CFLAGS)
+kytea_la_LIBADD = $(LIBS) $(KYTEA_LIBS)
+kytea_la_LDFLAGS = $(AM_LDFLAGS) $(KYTEA_LDFLAGS)
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea.cpp b/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea.cpp
new file mode 100644
index 00000000..76d827c0
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea.cpp
@@ -0,0 +1,358 @@
+/* -*- c-basic-offset: 2 -*- */
+/* Copyright(C) 2012 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG tokenizers_kytea
+#endif
+
+#include <groonga/tokenizer.h>
+
+#include <kytea/kytea.h>
+#include <kytea/string-util.h>
+
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+namespace {
+
+grn_plugin_mutex *kytea_mutex = NULL;
+kytea::KyteaConfig *kytea_config = NULL;
+kytea::Kytea *kytea_tagger = NULL;
+kytea::StringUtil *kytea_util = NULL;
+
+void kytea_init(grn_ctx *ctx);
+void kytea_fin(grn_ctx *ctx);
+
+void kytea_init(grn_ctx *ctx) {
+ if (kytea_mutex || kytea_config || kytea_tagger || kytea_util) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "TokenKytea is already initialized");
+ return;
+ }
+
+ kytea_mutex = grn_plugin_mutex_open(ctx);
+ if (!kytea_mutex) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][kytea] "
+ "grn_plugin_mutex_open() failed");
+ return;
+ }
+
+ kytea::KyteaConfig * const config = static_cast<kytea::KyteaConfig *>(
+ GRN_PLUGIN_MALLOC(ctx, sizeof(kytea::KyteaConfig)));
+ if (!config) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][kytea] "
+ "memory allocation to kytea::KyteaConfig failed");
+ return;
+ }
+
+ try {
+ new (config) kytea::KyteaConfig;
+ kytea_config = config;
+ try {
+ kytea_config->setDebug(0);
+ kytea_config->setOnTraining(false);
+ kytea_config->parseRunCommandLine(0, NULL);
+ } catch (...) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::KyteaConfig settings failed");
+ return;
+ }
+ } catch (...) {
+ GRN_PLUGIN_FREE(ctx, config);
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::KyteaConfig initialization failed");
+ return;
+ }
+
+ kytea::Kytea * const tagger = static_cast<kytea::Kytea *>(
+ GRN_PLUGIN_MALLOC(ctx, sizeof(kytea::Kytea)));
+ if (!tagger) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][kytea] "
+ "memory allocation to kytea::Kytea failed");
+ return;
+ }
+
+ try {
+ new (tagger) kytea::Kytea;
+ kytea_tagger = tagger;
+ try {
+ kytea_tagger->readModel(kytea_config->getModelFile().c_str());
+ } catch (...) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::Kytea::readModel() failed");
+ return;
+ }
+ } catch (...) {
+ GRN_PLUGIN_FREE(ctx, tagger);
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::Kytea initialization failed");
+ return;
+ }
+
+ try {
+ kytea_util = kytea_tagger->getStringUtil();
+ } catch (...) {
+ kytea_fin(ctx);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "kytea::Kytea::getStringUtil() failed");
+ return;
+ }
+}
+
+void kytea_fin(grn_ctx *ctx) {
+ kytea_util = NULL;
+
+ if (kytea_tagger) {
+ kytea_tagger->~Kytea();
+ GRN_PLUGIN_FREE(ctx, kytea_tagger);
+ kytea_tagger = NULL;
+ }
+
+ if (kytea_config) {
+ kytea_config->~KyteaConfig();
+ GRN_PLUGIN_FREE(ctx, kytea_config);
+ kytea_config = NULL;
+ }
+
+ if (kytea_mutex) {
+ grn_plugin_mutex_close(ctx, kytea_mutex);
+ kytea_mutex = NULL;
+ }
+}
+
+struct grn_tokenizer_kytea {
+ grn_tokenizer_query *query;
+ kytea::KyteaSentence sentence;
+ std::vector<std::string> tokens;
+ std::size_t id;
+ grn_tokenizer_token token;
+ const char *rest_query_string;
+ unsigned int rest_query_string_length;
+
+ grn_tokenizer_kytea() :
+ query(NULL),
+ sentence(),
+ tokens(),
+ id(0),
+ token(),
+ rest_query_string(NULL)
+ {
+ }
+ ~grn_tokenizer_kytea() {}
+};
+
+void grn_tokenizer_kytea_init(grn_ctx *ctx, grn_tokenizer_kytea *tokenizer) {
+ new (tokenizer) grn_tokenizer_kytea;
+ grn_tokenizer_token_init(ctx, &tokenizer->token);
+}
+
+void grn_tokenizer_kytea_fin(grn_ctx *ctx, grn_tokenizer_kytea *tokenizer) {
+ grn_tokenizer_token_fin(ctx, &tokenizer->token);
+ if (tokenizer->query) {
+ grn_tokenizer_query_close(ctx, tokenizer->query);
+ }
+ tokenizer->~grn_tokenizer_kytea();
+}
+
+grn_obj *grn_kytea_init(grn_ctx *ctx, int num_args, grn_obj **args,
+ grn_user_data *user_data) {
+ unsigned int normalizer_flags = 0;
+ grn_tokenizer_query * const query =
+ grn_tokenizer_query_open(ctx, num_args, args, normalizer_flags);
+ if (!query) {
+ return NULL;
+ }
+
+ grn_tokenizer_kytea * const tokenizer = static_cast<grn_tokenizer_kytea *>(
+ GRN_PLUGIN_MALLOC(ctx, sizeof(grn_tokenizer_kytea)));
+ if (!tokenizer) {
+ grn_tokenizer_query_close(ctx, query);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][kytea] "
+ "memory allocation to grn_tokenizer_kytea failed");
+ return NULL;
+ }
+
+ try {
+ grn_tokenizer_kytea_init(ctx, tokenizer);
+ } catch (...) {
+ grn_tokenizer_query_close(ctx, query);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "tokenizer initialization failed");
+ return NULL;
+ }
+
+ tokenizer->query = query;
+
+ grn_obj *normalized_query = query->normalized_query;
+ const char *normalized_string;
+ unsigned int normalized_string_length;
+ grn_string_get_normalized(ctx,
+ normalized_query,
+ &normalized_string,
+ &normalized_string_length,
+ NULL);
+ if (tokenizer->query->have_tokenized_delimiter) {
+ tokenizer->rest_query_string = normalized_string;
+ tokenizer->rest_query_string_length = normalized_string_length;
+ } else {
+ grn_plugin_mutex_lock(ctx, kytea_mutex);
+ try {
+ const std::string str(normalized_string, normalized_string_length);
+ const kytea::KyteaString &surface_str = kytea_util->mapString(str);
+ const kytea::KyteaString &normalized_str = kytea_util->normalize(surface_str);
+ tokenizer->sentence = kytea::KyteaSentence(surface_str, normalized_str);
+ kytea_tagger->calculateWS(tokenizer->sentence);
+ } catch (...) {
+ grn_plugin_mutex_unlock(ctx, kytea_mutex);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "tokenization failed");
+ return NULL;
+ }
+ grn_plugin_mutex_unlock(ctx, kytea_mutex);
+
+ try {
+ for (std::size_t i = 0; i < tokenizer->sentence.words.size(); ++i) {
+ const std::string &token =
+ kytea_util->showString(tokenizer->sentence.words[i].surface);
+ const char *ptr = token.c_str();
+ unsigned int left = static_cast<unsigned int>(token.length());
+ while (left > 0) {
+ const int char_length =
+ grn_tokenizer_charlen(ctx, ptr, left, query->encoding);
+ if ((char_length == 0) ||
+ (grn_tokenizer_isspace(ctx, ptr, left, query->encoding) != 0)) {
+ break;
+ }
+ ptr += char_length;
+ left -= char_length;
+ }
+ if (left == 0) {
+ tokenizer->tokens.push_back(token);
+ }
+ }
+ } catch (...) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][kytea] "
+ "adjustment failed");
+ return NULL;
+ }
+ }
+
+ user_data->ptr = tokenizer;
+ return NULL;
+}
+
+grn_obj *grn_kytea_next(grn_ctx *ctx, int num_args, grn_obj **args,
+ grn_user_data *user_data) {
+ grn_tokenizer_kytea * const tokenizer =
+ static_cast<grn_tokenizer_kytea *>(user_data->ptr);
+
+ if (tokenizer->query->have_tokenized_delimiter) {
+ unsigned int rest_query_string_length =
+ tokenizer->rest_query_string_length;
+ const char *rest_query_string =
+ grn_tokenizer_tokenized_delimiter_next(ctx,
+ &(tokenizer->token),
+ tokenizer->rest_query_string,
+ rest_query_string_length,
+ tokenizer->query->encoding);
+ if (rest_query_string) {
+ tokenizer->rest_query_string_length -=
+ rest_query_string - tokenizer->rest_query_string;
+ }
+ tokenizer->rest_query_string = rest_query_string;
+ } else {
+ const grn_tokenizer_status status =
+ ((tokenizer->id + 1) < tokenizer->tokens.size()) ?
+ GRN_TOKENIZER_CONTINUE : GRN_TOKENIZER_LAST;
+ if (tokenizer->id < tokenizer->tokens.size()) {
+ const std::string &token = tokenizer->tokens[tokenizer->id++];
+ grn_tokenizer_token_push(ctx, &tokenizer->token,
+ token.c_str(), token.length(), status);
+ } else {
+ grn_tokenizer_token_push(ctx, &tokenizer->token, "", 0, status);
+ }
+ }
+
+ return NULL;
+}
+
+grn_obj *grn_kytea_fin(grn_ctx *ctx, int num_args, grn_obj **args,
+ grn_user_data *user_data) {
+ grn_tokenizer_kytea * const tokenizer =
+ static_cast<grn_tokenizer_kytea *>(user_data->ptr);
+ if (tokenizer) {
+ grn_tokenizer_kytea_fin(ctx, tokenizer);
+ GRN_PLUGIN_FREE(ctx, tokenizer);
+ }
+ return NULL;
+}
+
+} // namespace
+
+extern "C" {
+
+/*
+ GRN_PLUGIN_INIT() is called to initialize this plugin. Note that an error
+ code must be set in `ctx->rc' on failure.
+ */
+grn_rc GRN_PLUGIN_INIT(grn_ctx *ctx) {
+ kytea_init(ctx);
+ return ctx->rc;
+}
+
+/*
+ GRN_PLUGIN_REGISTER() registers this plugin to the database associated with
+ `ctx'. The registration requires the plugin name and the functions to be
+ called for tokenization.
+ */
+grn_rc GRN_PLUGIN_REGISTER(grn_ctx *ctx) {
+ return grn_tokenizer_register(ctx, "TokenKytea", 10, grn_kytea_init,
+ grn_kytea_next, grn_kytea_fin);
+}
+
+/*
+ GRN_PLUGIN_FIN() is called to finalize the plugin that was initialized by
+ GRN_PLUGIN_INIT().
+ */
+grn_rc GRN_PLUGIN_FIN(grn_ctx *ctx) {
+ kytea_fin(ctx);
+ return GRN_SUCCESS;
+}
+
+} // extern "C"
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea_sources.am b/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea_sources.am
new file mode 100644
index 00000000..182f3857
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/kytea_sources.am
@@ -0,0 +1,2 @@
+kytea_la_SOURCES = \
+ kytea.cpp
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c
new file mode 100644
index 00000000..cabf2c94
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab.c
@@ -0,0 +1,660 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2009-2016 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG tokenizers_mecab
+#endif
+
+#include <grn_str.h>
+
+#include <groonga.h>
+#include <groonga/tokenizer.h>
+
+#include <mecab.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+static unsigned int sole_mecab_init_counter = 0;
+static mecab_t *sole_mecab = NULL;
+static grn_plugin_mutex *sole_mecab_mutex = NULL;
+static grn_encoding sole_mecab_encoding = GRN_ENC_NONE;
+
+static grn_bool grn_mecab_chunked_tokenize_enabled = GRN_FALSE;
+static int grn_mecab_chunk_size_threshold = 8192;
+
+typedef struct {
+ mecab_t *mecab;
+ grn_obj buf;
+ const char *next;
+ const char *end;
+ grn_tokenizer_query *query;
+ grn_tokenizer_token token;
+} grn_mecab_tokenizer;
+
+static const char *
+mecab_global_error_message(void)
+{
+ double version;
+
+ version = atof(mecab_version());
+ /* MeCab <= 0.993 doesn't support mecab_strerror(NULL). */
+ if (version <= 0.993) {
+ return "Unknown";
+ }
+
+ return mecab_strerror(NULL);
+}
+
+
+static grn_encoding
+translate_mecab_charset_to_grn_encoding(const char *charset)
+{
+ if (grn_strcasecmp(charset, "euc-jp") == 0) {
+ return GRN_ENC_EUC_JP;
+ } else if (grn_strcasecmp(charset, "utf-8") == 0 ||
+ grn_strcasecmp(charset, "utf8") == 0) {
+ return GRN_ENC_UTF8;
+ } else if (grn_strcasecmp(charset, "shift_jis") == 0 ||
+ grn_strcasecmp(charset, "shift-jis") == 0 ||
+ grn_strcasecmp(charset, "sjis") == 0) {
+ return GRN_ENC_SJIS;
+ }
+ return GRN_ENC_NONE;
+}
+
+static grn_encoding
+get_mecab_encoding(mecab_t *mecab)
+{
+ grn_encoding encoding = GRN_ENC_NONE;
+ const mecab_dictionary_info_t *dictionary_info;
+ dictionary_info = mecab_dictionary_info(mecab);
+ if (dictionary_info) {
+ const char *charset = dictionary_info->charset;
+ encoding = translate_mecab_charset_to_grn_encoding(charset);
+ }
+ return encoding;
+}
+
+static inline grn_bool
+is_delimiter_character(grn_ctx *ctx, const char *character, int character_bytes)
+{
+ switch (character_bytes) {
+ case 1 :
+ switch (character[0]) {
+ case ',' :
+ case '.' :
+ case '!' :
+ case '?' :
+ return GRN_TRUE;
+ default :
+ return GRN_FALSE;
+ }
+ case 3 :
+ switch ((unsigned char)(character[0])) {
+ case 0xE3 :
+ switch ((unsigned char)(character[1])) {
+ case 0x80 :
+ switch ((unsigned char)(character[2])) {
+ case 0x81 : /* U+3001 (0xE3 0x80 0x81 in UTF-8) IDEOGRAPHIC COMMA */
+ case 0x82 : /* U+3002 (0xE3 0x80 0x82 in UTF-8) IDEOGRAPHIC FULL STOP */
+ return GRN_TRUE;
+ default :
+ return GRN_FALSE;
+ }
+ default :
+ return GRN_FALSE;
+ }
+ return GRN_FALSE;
+ case 0xEF :
+ switch ((unsigned char)(character[1])) {
+ case 0xBC :
+ switch ((unsigned char)(character[2])) {
+ case 0x81 :
+ /* U+FF01 (0xEF 0xBC 0x81 in UTF-8) FULLWIDTH EXCLAMATION MARK */
+ case 0x9F :
+ /* U+FF1F (0xEF 0xBC 0x9F in UTF-8) FULLWIDTH QUESTION MARK */
+ return GRN_TRUE;
+ default :
+ return GRN_FALSE;
+ }
+ default :
+ return GRN_FALSE;
+ }
+ return GRN_FALSE;
+ default :
+ return GRN_FALSE;
+ }
+ default :
+ return GRN_FALSE;
+ }
+}
+
+static grn_bool
+chunked_tokenize_utf8_chunk(grn_ctx *ctx,
+ grn_mecab_tokenizer *tokenizer,
+ const char *chunk,
+ unsigned int chunk_bytes)
+{
+ const char *tokenized_chunk;
+ size_t tokenized_chunk_length;
+
+ tokenized_chunk = mecab_sparse_tostr2(tokenizer->mecab, chunk, chunk_bytes);
+ if (!tokenized_chunk) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab][chunk] "
+ "mecab_sparse_tostr2() failed len=%d err=%s",
+ chunk_bytes,
+ mecab_strerror(tokenizer->mecab));
+ return GRN_FALSE;
+ }
+
+ if (GRN_TEXT_LEN(&(tokenizer->buf)) > 0) {
+ GRN_TEXT_PUTS(ctx, &(tokenizer->buf), " ");
+ }
+
+ tokenized_chunk_length = strlen(tokenized_chunk);
+ if (tokenized_chunk_length >= 1 &&
+ isspace((unsigned char)tokenized_chunk[tokenized_chunk_length - 1])) {
+ GRN_TEXT_PUT(ctx, &(tokenizer->buf),
+ tokenized_chunk, tokenized_chunk_length - 1);
+ } else {
+ GRN_TEXT_PUT(ctx, &(tokenizer->buf),
+ tokenized_chunk, tokenized_chunk_length);
+ }
+
+ return GRN_TRUE;
+}
+
+static grn_bool
+chunked_tokenize_utf8(grn_ctx *ctx,
+ grn_mecab_tokenizer *tokenizer,
+ const char *string,
+ unsigned int string_bytes)
+{
+ const char *chunk_start;
+ const char *current;
+ const char *last_delimiter;
+ const char *string_end = string + string_bytes;
+ grn_encoding encoding = tokenizer->query->encoding;
+
+ if (string_bytes < grn_mecab_chunk_size_threshold) {
+ return chunked_tokenize_utf8_chunk(ctx,
+ tokenizer,
+ string,
+ string_bytes);
+ }
+
+ chunk_start = current = string;
+ last_delimiter = NULL;
+ while (current < string_end) {
+ int space_bytes;
+ int character_bytes;
+ const char *current_character;
+
+ space_bytes = grn_isspace(current, encoding);
+ if (space_bytes > 0) {
+ if (chunk_start != current) {
+ grn_bool succeeded;
+ succeeded = chunked_tokenize_utf8_chunk(ctx,
+ tokenizer,
+ chunk_start,
+ current - chunk_start);
+ if (!succeeded) {
+ return succeeded;
+ }
+ }
+ current += space_bytes;
+ chunk_start = current;
+ last_delimiter = NULL;
+ continue;
+ }
+
+ character_bytes = grn_charlen_(ctx, current, string_end, encoding);
+ if (character_bytes == 0) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab][chunk] "
+ "invalid byte sequence: position=%d",
+ (int)(current - string));
+ return GRN_FALSE;
+ }
+
+ current_character = current;
+ current += character_bytes;
+ if (is_delimiter_character(ctx, current_character, character_bytes)) {
+ last_delimiter = current;
+ }
+
+ if ((current - chunk_start) >= grn_mecab_chunk_size_threshold) {
+ grn_bool succeeded;
+ if (last_delimiter) {
+ succeeded = chunked_tokenize_utf8_chunk(ctx,
+ tokenizer,
+ chunk_start,
+ last_delimiter - chunk_start);
+ chunk_start = last_delimiter;
+ } else {
+ succeeded = chunked_tokenize_utf8_chunk(ctx,
+ tokenizer,
+ chunk_start,
+ current - chunk_start);
+ chunk_start = current;
+ }
+ if (!succeeded) {
+ return succeeded;
+ }
+ last_delimiter = NULL;
+ }
+ }
+
+ if (current == chunk_start) {
+ return GRN_TRUE;
+ } else {
+ return chunked_tokenize_utf8_chunk(ctx,
+ tokenizer,
+ chunk_start,
+ current - chunk_start);
+ }
+}
+
+static mecab_t *
+mecab_create(grn_ctx *ctx)
+{
+ mecab_t *mecab;
+ int argc = 0;
+ const char *argv[4];
+
+ argv[argc++] = "Groonga";
+ argv[argc++] = "-Owakati";
+#ifdef GRN_WITH_BUNDLED_MECAB
+ argv[argc++] = "--rcfile";
+# ifdef WIN32
+ {
+ static char windows_mecab_rc_file[PATH_MAX];
+
+ grn_strcpy(windows_mecab_rc_file,
+ PATH_MAX,
+ grn_plugin_windows_base_dir());
+ grn_strcat(windows_mecab_rc_file,
+ PATH_MAX,
+ "/");
+ grn_strcat(windows_mecab_rc_file,
+ PATH_MAX,
+ GRN_BUNDLED_MECAB_RELATIVE_RC_PATH);
+ {
+ char *c;
+ for (c = windows_mecab_rc_file; *c != '\0'; c++) {
+ if (*c == '/') {
+ *c = '\\';
+ }
+ }
+ }
+ argv[argc++] = windows_mecab_rc_file;
+ }
+# else /* WIN32 */
+ argv[argc++] = GRN_BUNDLED_MECAB_RC_PATH;
+# endif /* WIN32 */
+#endif /* GRN_WITH_BUNDLED_MECAB */
+ mecab = mecab_new(argc, (char **)argv);
+
+ if (!mecab) {
+#ifdef GRN_WITH_BUNDLED_MECAB
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] failed to create mecab_t: %s: "
+ "mecab_new(\"%s\", \"%s\", \"%s\", \"%s\")",
+ mecab_global_error_message(),
+ argv[0], argv[1], argv[2], argv[3]);
+#else /* GRN_WITH_BUNDLED_MECAB */
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] failed to create mecab_t: %s: "
+ "mecab_new(\"%s\", \"%s\")",
+ mecab_global_error_message(),
+ argv[0], argv[1]);
+#endif /* GRN_WITH_BUNDLED_MECAB */
+ }
+
+ return mecab;
+}
+
+/*
+ This function is called for a full text search query or a document to be
+ indexed. This means that both short/long strings are given.
+ The return value of this function is ignored. When an error occurs in this
+ function, `ctx->rc' is overwritten with an error code (not GRN_SUCCESS).
+ */
+static grn_obj *
+mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_mecab_tokenizer *tokenizer;
+ unsigned int normalizer_flags = 0;
+ grn_tokenizer_query *query;
+ grn_obj *normalized_query;
+ const char *normalized_string;
+ unsigned int normalized_string_length;
+
+ query = grn_tokenizer_query_open(ctx, nargs, args, normalizer_flags);
+ if (!query) {
+ return NULL;
+ }
+ if (!sole_mecab) {
+ grn_plugin_mutex_lock(ctx, sole_mecab_mutex);
+ if (!sole_mecab) {
+ sole_mecab = mecab_create(ctx);
+ if (sole_mecab) {
+ sole_mecab_encoding = get_mecab_encoding(sole_mecab);
+ }
+ }
+ grn_plugin_mutex_unlock(ctx, sole_mecab_mutex);
+ }
+ if (!sole_mecab) {
+ grn_tokenizer_query_close(ctx, query);
+ return NULL;
+ }
+
+ if (query->encoding != sole_mecab_encoding) {
+ grn_tokenizer_query_close(ctx, query);
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] "
+ "MeCab dictionary charset (%s) does not match "
+ "the table encoding: <%s>",
+ grn_encoding_to_string(sole_mecab_encoding),
+ grn_encoding_to_string(query->encoding));
+ return NULL;
+ }
+
+ if (!(tokenizer = GRN_PLUGIN_MALLOC(ctx, sizeof(grn_mecab_tokenizer)))) {
+ grn_tokenizer_query_close(ctx, query);
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][mecab] "
+ "memory allocation to grn_mecab_tokenizer failed");
+ return NULL;
+ }
+ tokenizer->mecab = sole_mecab;
+ tokenizer->query = query;
+
+ normalized_query = query->normalized_query;
+ grn_string_get_normalized(ctx,
+ normalized_query,
+ &normalized_string,
+ &normalized_string_length,
+ NULL);
+ GRN_TEXT_INIT(&(tokenizer->buf), 0);
+ if (query->have_tokenized_delimiter) {
+ tokenizer->next = normalized_string;
+ tokenizer->end = tokenizer->next + normalized_string_length;
+ } else if (normalized_string_length == 0) {
+ tokenizer->next = "";
+ tokenizer->end = tokenizer->next;
+ } else {
+ grn_bool succeeded;
+ grn_plugin_mutex_lock(ctx, sole_mecab_mutex);
+ if (grn_mecab_chunked_tokenize_enabled &&
+ ctx->encoding == GRN_ENC_UTF8) {
+ succeeded = chunked_tokenize_utf8(ctx,
+ tokenizer,
+ normalized_string,
+ normalized_string_length);
+ } else {
+ const char *s;
+ s = mecab_sparse_tostr2(tokenizer->mecab,
+ normalized_string,
+ normalized_string_length);
+ if (!s) {
+ succeeded = GRN_FALSE;
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] "
+ "mecab_sparse_tostr() failed len=%d err=%s",
+ normalized_string_length,
+ mecab_strerror(tokenizer->mecab));
+ } else {
+ succeeded = GRN_TRUE;
+ GRN_TEXT_PUTS(ctx, &(tokenizer->buf), s);
+ }
+ }
+ grn_plugin_mutex_unlock(ctx, sole_mecab_mutex);
+ if (!succeeded) {
+ grn_tokenizer_query_close(ctx, tokenizer->query);
+ GRN_PLUGIN_FREE(ctx, tokenizer);
+ return NULL;
+ }
+ {
+ char *buf, *p;
+ unsigned int bufsize;
+
+ buf = GRN_TEXT_VALUE(&(tokenizer->buf));
+ bufsize = GRN_TEXT_LEN(&(tokenizer->buf));
+ /* A certain version of mecab returns trailing lf or spaces. */
+ for (p = buf + bufsize - 2;
+ buf <= p && isspace(*(unsigned char *)p);
+ p--) { *p = '\0'; }
+ tokenizer->next = buf;
+ tokenizer->end = p + 1;
+ }
+ }
+ user_data->ptr = tokenizer;
+
+ grn_tokenizer_token_init(ctx, &(tokenizer->token));
+
+ return NULL;
+}
+
+/*
+ This function returns tokens one by one.
+ */
+static grn_obj *
+mecab_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ /* grn_obj *table = args[0]; */
+ grn_mecab_tokenizer *tokenizer = user_data->ptr;
+ grn_encoding encoding = tokenizer->query->encoding;
+
+ if (tokenizer->query->have_tokenized_delimiter) {
+ tokenizer->next =
+ grn_tokenizer_tokenized_delimiter_next(ctx,
+ &(tokenizer->token),
+ tokenizer->next,
+ tokenizer->end - tokenizer->next,
+ encoding);
+ } else {
+ size_t cl;
+ const char *p = tokenizer->next, *r;
+ const char *e = tokenizer->end;
+ grn_tokenizer_status status;
+
+ for (r = p; r < e; r += cl) {
+ int space_len;
+
+ space_len = grn_isspace(r, encoding);
+ if (space_len > 0 && r == p) {
+ cl = space_len;
+ p = r + cl;
+ continue;
+ }
+
+ if (!(cl = grn_charlen_(ctx, r, e, encoding))) {
+ tokenizer->next = e;
+ break;
+ }
+
+ if (space_len > 0) {
+ const char *q = r + space_len;
+ while (q < e && (space_len = grn_isspace(q, encoding))) {
+ q += space_len;
+ }
+ tokenizer->next = q;
+ break;
+ }
+ }
+
+ if (r == e || tokenizer->next == e) {
+ status = GRN_TOKENIZER_LAST;
+ } else {
+ status = GRN_TOKENIZER_CONTINUE;
+ }
+ grn_tokenizer_token_push(ctx, &(tokenizer->token), p, r - p, status);
+ }
+
+ return NULL;
+}
+
+/*
+ This function finalizes a tokenization.
+ */
+static grn_obj *
+mecab_fin(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+ grn_mecab_tokenizer *tokenizer = user_data->ptr;
+ if (!tokenizer) {
+ return NULL;
+ }
+ grn_tokenizer_token_fin(ctx, &(tokenizer->token));
+ grn_tokenizer_query_close(ctx, tokenizer->query);
+ grn_obj_unlink(ctx, &(tokenizer->buf));
+ GRN_PLUGIN_FREE(ctx, tokenizer);
+ return NULL;
+}
+
+static void
+check_mecab_dictionary_encoding(grn_ctx *ctx)
+{
+#ifdef HAVE_MECAB_DICTIONARY_INFO_T
+ mecab_t *mecab;
+ grn_encoding encoding;
+ grn_bool have_same_encoding_dictionary;
+
+ mecab = mecab_create(ctx);
+ if (!mecab) {
+ return;
+ }
+
+ encoding = GRN_CTX_GET_ENCODING(ctx);
+ have_same_encoding_dictionary = (encoding == get_mecab_encoding(mecab));
+ mecab_destroy(mecab);
+
+ if (!have_same_encoding_dictionary) {
+ GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+ "[tokenizer][mecab] "
+ "MeCab has no dictionary that uses the context encoding"
+ ": <%s>",
+ grn_encoding_to_string(encoding));
+ }
+#endif
+}
+
+/*
+ This function initializes a plugin. This function fails if there is no
+ dictionary that uses the context encoding of groonga.
+ */
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ ++sole_mecab_init_counter;
+ if (sole_mecab_init_counter > 1)
+ {
+ return GRN_SUCCESS;
+ }
+ {
+ char env[GRN_ENV_BUFFER_SIZE];
+
+ grn_getenv("GRN_MECAB_CHUNKED_TOKENIZE_ENABLED",
+ env,
+ GRN_ENV_BUFFER_SIZE);
+ grn_mecab_chunked_tokenize_enabled = (env[0] && strcmp(env, "yes") == 0);
+ }
+
+ {
+ char env[GRN_ENV_BUFFER_SIZE];
+
+ grn_getenv("GRN_MECAB_CHUNK_SIZE_THRESHOLD",
+ env,
+ GRN_ENV_BUFFER_SIZE);
+ if (env[0]) {
+ int threshold = -1;
+ const char *end;
+ const char *rest;
+
+ end = env + strlen(env);
+ threshold = grn_atoi(env, end, &rest);
+ if (end > env && end == rest) {
+ grn_mecab_chunk_size_threshold = threshold;
+ }
+ }
+ }
+
+ sole_mecab = NULL;
+ sole_mecab_mutex = grn_plugin_mutex_open(ctx);
+ if (!sole_mecab_mutex) {
+ GRN_PLUGIN_ERROR(ctx, GRN_NO_MEMORY_AVAILABLE,
+ "[tokenizer][mecab] grn_plugin_mutex_open() failed");
+ return ctx->rc;
+ }
+
+ check_mecab_dictionary_encoding(ctx);
+ if (ctx->rc != GRN_SUCCESS) {
+ grn_plugin_mutex_close(ctx, sole_mecab_mutex);
+ sole_mecab_mutex = NULL;
+ }
+
+ return ctx->rc;
+}
+
+/*
+ This function registers a plugin to a database.
+ */
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_rc rc;
+
+ rc = grn_tokenizer_register(ctx, "TokenMecab", 10,
+ mecab_init, mecab_next, mecab_fin);
+ if (rc == GRN_SUCCESS) {
+ grn_obj *token_mecab;
+ token_mecab = grn_ctx_get(ctx, "TokenMecab", 10);
+ /* Just for backward compatibility. TokenMecab was built-in not plugin. */
+ if (token_mecab && grn_obj_id(ctx, token_mecab) != GRN_DB_MECAB) {
+ rc = GRN_FILE_CORRUPT;
+ }
+ }
+
+ return rc;
+}
+
+/*
+ This function finalizes a plugin.
+ */
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ --sole_mecab_init_counter;
+ if (sole_mecab_init_counter > 0)
+ {
+ return GRN_SUCCESS;
+ }
+ if (sole_mecab) {
+ mecab_destroy(sole_mecab);
+ sole_mecab = NULL;
+ }
+ if (sole_mecab_mutex) {
+ grn_plugin_mutex_close(ctx, sole_mecab_mutex);
+ sole_mecab_mutex = NULL;
+ }
+
+ return GRN_SUCCESS;
+}
diff --git a/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab_sources.am b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab_sources.am
new file mode 100644
index 00000000..56912727
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/tokenizers/mecab_sources.am
@@ -0,0 +1,2 @@
+mecab_la_SOURCES = \
+ mecab.c