summaryrefslogtreecommitdiffstats
path: root/storage/mroonga/vendor/groonga/plugins/functions/index_column.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/mroonga/vendor/groonga/plugins/functions/index_column.c')
-rw-r--r--storage/mroonga/vendor/groonga/plugins/functions/index_column.c266
1 files changed, 266 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/plugins/functions/index_column.c b/storage/mroonga/vendor/groonga/plugins/functions/index_column.c
new file mode 100644
index 00000000..05010074
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/plugins/functions/index_column.c
@@ -0,0 +1,266 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2017 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#ifdef GRN_EMBEDDED
+# define GRN_PLUGIN_FUNCTION_TAG functions_time
+#endif
+
+#include <groonga/plugin.h>
+
+static grn_rc
+selector_index_column_df_ratio_between(grn_ctx *ctx,
+ grn_obj *table,
+ grn_obj *index,
+ int n_args,
+ grn_obj **args,
+ grn_obj *res,
+ grn_operator op)
+{
+ grn_rc rc = GRN_SUCCESS;
+ grn_obj *index_column;
+ grn_ii *ii;
+ double min;
+ double max;
+ grn_obj *source_table;
+ unsigned int n_documents;
+ grn_posting posting;
+
+ if ((n_args - 1) != 3) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio_between(): "
+ "wrong number of arguments (%d for 3)", n_args - 1);
+ rc = ctx->rc;
+ goto exit;
+ }
+
+ index_column = args[1];
+ ii = (grn_ii *)index_column;
+ min = GRN_FLOAT_VALUE(args[2]);
+ max = GRN_FLOAT_VALUE(args[3]);
+
+ source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column));
+ n_documents = grn_table_size(ctx, source_table);
+ memset(&posting, 0, sizeof(grn_posting));
+ posting.sid = 1;
+
+ if (op == GRN_OP_AND) {
+ GRN_TABLE_EACH_BEGIN(ctx, res, cursor, record_id) {
+ void *key;
+ grn_id term_id;
+ uint32_t n_match_documents;
+ double df_ratio;
+
+ grn_table_cursor_get_key(ctx, cursor, &key);
+ term_id = *(grn_id *)key;
+ n_match_documents = grn_ii_estimate_size(ctx, ii, term_id);
+ if (n_match_documents > n_documents) {
+ n_match_documents = n_documents;
+ }
+ df_ratio = (double)n_match_documents / (double)n_documents;
+ if (min <= df_ratio && df_ratio <= max) {
+ posting.rid = term_id;
+ grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op);
+ }
+ } GRN_TABLE_EACH_END(ctx, cursor);
+ grn_ii_resolve_sel_and(ctx, (grn_hash *)res, op);
+ } else {
+ GRN_TABLE_EACH_BEGIN(ctx, table, cursor, term_id) {
+ uint32_t n_match_documents;
+ double df_ratio;
+
+ n_match_documents = grn_ii_estimate_size(ctx, ii, term_id);
+ if (n_match_documents > n_documents) {
+ n_match_documents = n_documents;
+ }
+ df_ratio = (double)n_match_documents / (double)n_documents;
+ {
+ void *key;
+ int key_size;
+ key_size = grn_table_cursor_get_key(ctx, cursor, &key);
+ }
+ if (min <= df_ratio && df_ratio <= max) {
+ posting.rid = term_id;
+ grn_ii_posting_add(ctx, &posting, (grn_hash *)res, op);
+ }
+ } GRN_TABLE_EACH_END(ctx, cursor);
+ }
+
+exit :
+ return rc;
+}
+
+static grn_obj *
+func_index_column_df_ratio(grn_ctx *ctx,
+ int n_args,
+ grn_obj **args,
+ grn_user_data *user_data)
+{
+ grn_obj *term_table;
+ grn_obj *index_column_name;
+ grn_obj *index_column;
+ grn_ii *ii;
+ grn_id term_id;
+
+ if (n_args != 1) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "wrong number of arguments (%d for 1)", n_args - 1);
+ return NULL;
+ }
+
+ {
+ grn_obj *expr;
+ grn_obj *variable;
+
+ expr = grn_plugin_proc_get_caller(ctx, user_data);
+ if (!expr) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "called directly");
+ return NULL;
+ }
+
+ variable = grn_expr_get_var_by_offset(ctx, expr, 0);
+ if (!variable) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "caller expression must have target record information");
+ return NULL;
+ }
+
+ term_table = grn_ctx_at(ctx, variable->header.domain);
+ term_id = GRN_RECORD_VALUE(variable);
+ while (GRN_TRUE) {
+ grn_obj *key_type;
+
+ key_type = grn_ctx_at(ctx, term_table->header.domain);
+ if (!grn_obj_is_table(ctx, key_type)) {
+ break;
+ }
+
+ grn_table_get_key(ctx, term_table, term_id, &term_id, sizeof(grn_id));
+ term_table = key_type;
+ }
+ }
+
+ index_column_name = args[0];
+ if (!grn_obj_is_text_family_bulk(ctx, index_column_name)) {
+ grn_obj inspected;
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, index_column_name);
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "the first argument must be index column name: %.*s",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ return NULL;
+ }
+
+ index_column = grn_obj_column(ctx,
+ term_table,
+ GRN_TEXT_VALUE(index_column_name),
+ GRN_TEXT_LEN(index_column_name));
+ if (!index_column) {
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "nonexistent object: <%.*s>",
+ (int)GRN_TEXT_LEN(index_column_name),
+ GRN_TEXT_VALUE(index_column_name));
+ return NULL;
+ }
+
+ if (!grn_obj_is_index_column(ctx, index_column)) {
+ grn_obj inspected;
+ GRN_TEXT_INIT(&inspected, 0);
+ grn_inspect(ctx, &inspected, index_column);
+ GRN_PLUGIN_ERROR(ctx,
+ GRN_INVALID_ARGUMENT,
+ "index_column_df_ratio(): "
+ "the first argument must be index column: %.*s",
+ (int)GRN_TEXT_LEN(&inspected),
+ GRN_TEXT_VALUE(&inspected));
+ GRN_OBJ_FIN(ctx, &inspected);
+ if (grn_obj_is_accessor(ctx, index_column)) {
+ grn_obj_unlink(ctx, index_column);
+ }
+ return NULL;
+ }
+
+ ii = (grn_ii *)index_column;
+
+ {
+ grn_obj *source_table;
+ unsigned int n_documents;
+ uint32_t n_match_documents;
+ double df_ratio;
+ grn_obj *df_ratio_value;
+
+ source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column));
+ n_documents = grn_table_size(ctx, source_table);
+ n_match_documents = grn_ii_estimate_size(ctx, ii, term_id);
+ if (n_match_documents > n_documents) {
+ n_match_documents = n_documents;
+ }
+ df_ratio = (double)n_match_documents / (double)n_documents;
+
+ df_ratio_value = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_FLOAT, 0);
+ if (!df_ratio_value) {
+ return NULL;
+ }
+ GRN_FLOAT_SET(ctx, df_ratio_value, df_ratio);
+ return df_ratio_value;
+ }
+}
+
+grn_rc
+GRN_PLUGIN_INIT(grn_ctx *ctx)
+{
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_REGISTER(grn_ctx *ctx)
+{
+ grn_obj *selector_proc;
+
+ selector_proc = grn_proc_create(ctx, "index_column_df_ratio_between", -1,
+ GRN_PROC_FUNCTION,
+ NULL, NULL, NULL, 0, NULL);
+ grn_proc_set_selector(ctx, selector_proc,
+ selector_index_column_df_ratio_between);
+ grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_NOP);
+
+ grn_proc_create(ctx, "index_column_df_ratio", -1,
+ GRN_PROC_FUNCTION,
+ func_index_column_df_ratio, NULL, NULL, 0, NULL);
+
+ return ctx->rc;
+}
+
+grn_rc
+GRN_PLUGIN_FIN(grn_ctx *ctx)
+{
+ return GRN_SUCCESS;
+}