From a175314c3e5827eb193872241446f2f8f5c9d33c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 20:07:14 +0200 Subject: Adding upstream version 1:10.5.12. Signed-off-by: Daniel Baumann --- storage/mroonga/udf/Makefile.am | 10 + storage/mroonga/udf/mrn_udf_command.cpp | 294 ++++++++++++ storage/mroonga/udf/mrn_udf_escape.cpp | 245 ++++++++++ storage/mroonga/udf/mrn_udf_highlight_html.cpp | 494 +++++++++++++++++++++ storage/mroonga/udf/mrn_udf_last_insert_grn_id.cpp | 55 +++ storage/mroonga/udf/mrn_udf_normalize.cpp | 212 +++++++++ storage/mroonga/udf/mrn_udf_query_expand.cpp | 282 ++++++++++++ storage/mroonga/udf/mrn_udf_snippet.cpp | 338 ++++++++++++++ storage/mroonga/udf/mrn_udf_snippet_html.cpp | 444 ++++++++++++++++++ storage/mroonga/udf/sources.am | 9 + 10 files changed, 2383 insertions(+) create mode 100644 storage/mroonga/udf/Makefile.am create mode 100644 storage/mroonga/udf/mrn_udf_command.cpp create mode 100644 storage/mroonga/udf/mrn_udf_escape.cpp create mode 100644 storage/mroonga/udf/mrn_udf_highlight_html.cpp create mode 100644 storage/mroonga/udf/mrn_udf_last_insert_grn_id.cpp create mode 100644 storage/mroonga/udf/mrn_udf_normalize.cpp create mode 100644 storage/mroonga/udf/mrn_udf_query_expand.cpp create mode 100644 storage/mroonga/udf/mrn_udf_snippet.cpp create mode 100644 storage/mroonga/udf/mrn_udf_snippet_html.cpp create mode 100644 storage/mroonga/udf/sources.am (limited to 'storage/mroonga/udf') diff --git a/storage/mroonga/udf/Makefile.am b/storage/mroonga/udf/Makefile.am new file mode 100644 index 00000000..f3c2966f --- /dev/null +++ b/storage/mroonga/udf/Makefile.am @@ -0,0 +1,10 @@ +AM_CPPFLAGS = \ + $(MYSQL_INCLUDES) \ + $(GROONGA_CFLAGS) \ + -I$(top_srcdir) \ + -I$(top_srcdir)/lib + +noinst_LTLIBRARIES = \ + libmrn_udf.la + +include sources.am diff --git a/storage/mroonga/udf/mrn_udf_command.cpp b/storage/mroonga/udf/mrn_udf_command.cpp new file mode 100644 index 00000000..10123c62 --- /dev/null +++ b/storage/mroonga/udf/mrn_udf_command.cpp @@ -0,0 +1,294 @@ +/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* + Copyright(C) 2010 Tetsuro IKEDA + Copyright(C) 2010-2013 Kentoku SHIBA + Copyright(C) 2011-2017 Kouhei Sutou + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +MRN_BEGIN_DECLS + +extern mrn::DatabaseManager *mrn_db_manager; +extern mrn::ContextPool *mrn_context_pool; + +struct CommandInfo +{ + grn_ctx *ctx; + grn_obj *db; + bool use_shared_db; + grn_obj command; + String result; +}; + +MRN_API my_bool mroonga_command_init(UDF_INIT *init, UDF_ARGS *args, + char *message) +{ + CommandInfo *info = NULL; + + init->ptr = NULL; + if (args->arg_count == 0) { + grn_snprintf(message, + MYSQL_ERRMSG_SIZE, + MYSQL_ERRMSG_SIZE, + "mroonga_command(): Wrong number of arguments: %u for 1..", + args->arg_count); + goto error; + } + + if ((args->arg_count % 2) == 0) { + grn_snprintf(message, + MYSQL_ERRMSG_SIZE, + MYSQL_ERRMSG_SIZE, + "mroonga_command(): The number of arguments must be odd: %u", + args->arg_count); + goto error; + } + + for (unsigned int i = 0; i < args->arg_count; ++i) { + switch (args->arg_type[i]) { + case STRING_RESULT: + // OK + break; + case REAL_RESULT: + grn_snprintf(message, + MYSQL_ERRMSG_SIZE, + MYSQL_ERRMSG_SIZE, + "mroonga_command(): Argument must be string: <%g>", + *reinterpret_cast(args->args[i])); + goto error; + break; + case INT_RESULT: + grn_snprintf(message, + MYSQL_ERRMSG_SIZE, + MYSQL_ERRMSG_SIZE, + "mroonga_command(): Argument must be string: <%lld>", + *reinterpret_cast(args->args[i])); + goto error; + break; + case DECIMAL_RESULT: + grn_snprintf(message, + MYSQL_ERRMSG_SIZE, + MYSQL_ERRMSG_SIZE, + "mroonga_command(): Argument must be string: <%.*s>", + static_cast(args->lengths[i]), + args->args[i]); + goto error; + break; + default: + grn_snprintf(message, + MYSQL_ERRMSG_SIZE, + MYSQL_ERRMSG_SIZE, + "mroonga_command(): Argument must be string: <%d>(%u)", + args->arg_type[i], + i); + goto error; + break; + } + } + init->maybe_null = 1; + init->const_item = 0; + + info = (CommandInfo *)mrn_my_malloc(sizeof(CommandInfo), + MYF(MY_WME | MY_ZEROFILL)); + if (!info) { + strcpy(message, "mroonga_command(): out of memory"); + goto error; + } + + info->ctx = mrn_context_pool->pull(); + { + const char *current_db_path = MRN_THD_DB_PATH(current_thd); + const char *action; + if (current_db_path) { + action = "open database"; + char encoded_db_path[FN_REFLEN + 1]; + uint encoded_db_path_length = + tablename_to_filename(current_db_path, + encoded_db_path, + sizeof(encoded_db_path)); + encoded_db_path[encoded_db_path_length] = '\0'; + mrn::Database *db; + int error = mrn_db_manager->open(encoded_db_path, &db); + if (error == 0) { + info->db = db->get(); + grn_ctx_use(info->ctx, info->db); + info->use_shared_db = true; + } + } else { + action = "create anonymous database"; + info->db = grn_db_create(info->ctx, NULL, NULL); + info->use_shared_db = false; + } + if (!info->db) { + grn_snprintf(message, + MYSQL_ERRMSG_SIZE, + MYSQL_ERRMSG_SIZE, + "mroonga_command(): failed to %s: %s", + action, + info->ctx->errbuf); + goto error; + } + } + GRN_TEXT_INIT(&(info->command), 0); + + init->ptr = (char *)info; + + return FALSE; + +error: + if (info) { + if (!info->use_shared_db) { + grn_obj_close(info->ctx, info->db); + } + mrn_context_pool->release(info->ctx); + my_free(info); + } + return TRUE; +} + +static void mroonga_command_escape_value(grn_ctx *ctx, + grn_obj *command, + const char *value, + unsigned long value_length) +{ + GRN_TEXT_PUTC(ctx, command, '"'); + + const char *value_current = value; + const char *value_end = value_current + value_length; + while (value_current < value_end) { + int char_length = grn_charlen(ctx, value_current, value_end); + + if (char_length == 0) { + break; + } else if (char_length == 1) { + switch (*value_current) { + case '\\': + case '"': + GRN_TEXT_PUTC(ctx, command, '\\'); + GRN_TEXT_PUTC(ctx, command, *value_current); + break; + case '\n': + GRN_TEXT_PUTS(ctx, command, "\\n"); + break; + default: + GRN_TEXT_PUTC(ctx, command, *value_current); + break; + } + } else { + GRN_TEXT_PUT(ctx, command, value_current, char_length); + } + + value_current += char_length; + } + + GRN_TEXT_PUTC(ctx, command, '"'); +} + +MRN_API char *mroonga_command(UDF_INIT *init, UDF_ARGS *args, char *result, + unsigned long *length, char *is_null, char *error) +{ + CommandInfo *info = (CommandInfo *)init->ptr; + grn_ctx *ctx = info->ctx; + int flags = 0; + + if (!args->args[0]) { + *is_null = 1; + return NULL; + } + + GRN_BULK_REWIND(&(info->command)); + GRN_TEXT_PUT(ctx, &(info->command), args->args[0], args->lengths[0]); + for (unsigned int i = 1; i < args->arg_count; i += 2) { + if (!args->args[i] || !args->args[i + 1]) { + *is_null = 1; + return NULL; + } + + const char *name = args->args[i]; + unsigned long name_length = args->lengths[i]; + GRN_TEXT_PUTS(ctx, &(info->command), " --"); + GRN_TEXT_PUT(ctx, &(info->command), name, name_length); + + const char *value = args->args[i + 1]; + unsigned long value_length = args->lengths[i + 1]; + GRN_TEXT_PUTS(ctx, &(info->command), " "); + mroonga_command_escape_value(ctx, &(info->command), value, value_length); + } + + *is_null = 0; + + grn_ctx_send(ctx, + GRN_TEXT_VALUE(&(info->command)), + GRN_TEXT_LEN(&(info->command)), + 0); + if (ctx->rc) { + my_message(ER_ERROR_ON_WRITE, ctx->errbuf, MYF(0)); + goto error; + } + + info->result.length(0); + do { + char *buffer; + unsigned int buffer_length; + grn_ctx_recv(ctx, &buffer, &buffer_length, &flags); + if (ctx->rc) { + my_message(ER_ERROR_ON_READ, ctx->errbuf, MYF(0)); + goto error; + } + if (buffer_length > 0) { + if (info->result.reserve(buffer_length)) { + my_error(ER_OUT_OF_RESOURCES, MYF(0), HA_ERR_OUT_OF_MEM); + goto error; + } + info->result.q_append(buffer, buffer_length); + } + } while (flags & GRN_CTX_MORE); + + *length = info->result.length(); + return (char *)(info->result.ptr()); + +error: + *error = 1; + return NULL; +} + +MRN_API void mroonga_command_deinit(UDF_INIT *init) +{ + CommandInfo *info = (CommandInfo *)init->ptr; + if (info) { + GRN_OBJ_FIN(info->ctx, &(info->command)); + if (!info->use_shared_db) { + grn_obj_close(info->ctx, info->db); + } + mrn_context_pool->release(info->ctx); + MRN_STRING_FREE(info->result); + my_free(info); + } +} + +MRN_END_DECLS diff --git a/storage/mroonga/udf/mrn_udf_escape.cpp b/storage/mroonga/udf/mrn_udf_escape.cpp new file mode 100644 index 00000000..72182790 --- /dev/null +++ b/storage/mroonga/udf/mrn_udf_escape.cpp @@ -0,0 +1,245 @@ +/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* + Copyright(C) 2013-2017 Kouhei Sutou + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include +#include +#include +#include +#include +#include +#include + +MRN_BEGIN_DECLS + +extern mrn::ContextPool *mrn_context_pool; + +struct EscapeInfo +{ + grn_ctx *ctx; + bool script_mode; + grn_obj target_characters; + grn_obj escaped_value; +}; + +MRN_API my_bool mroonga_escape_init(UDF_INIT *init, UDF_ARGS *args, + char *message) +{ + EscapeInfo *info = NULL; + bool script_mode = false; + + init->ptr = NULL; + if (!(1 <= args->arg_count && args->arg_count <= 2)) { + snprintf(message, + MYSQL_ERRMSG_SIZE, + "mroonga_escape(): Incorrect number of arguments: %u for 1..2", + args->arg_count); + goto error; + } + + if (args->attribute_lengths[0] == strlen("script") && + strncmp(args->attributes[0], "script", strlen("script")) == 0) { + switch (args->arg_type[0]) { + case ROW_RESULT: + snprintf(message, + MYSQL_ERRMSG_SIZE, + "mroonga_escape(): " + "The 1st script argument must be " + "string, integer or floating point: "); + goto error; + break; + default: + break; + } + script_mode = true; + } else { + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message, + "mroonga_escape(): The 1st query argument must be string"); + goto error; + } + } + if (args->arg_count == 2) { + if (args->arg_type[1] != STRING_RESULT) { + strcpy(message, + "mroonga_escape(): " + "The 2st argument must be escape target characters as string"); + goto error; + } + } + + init->maybe_null = 1; + + info = static_cast(mrn_my_malloc(sizeof(EscapeInfo), + MYF(MY_WME | MY_ZEROFILL))); + if (!info) { + strcpy(message, "mroonga_escape(): out of memory"); + goto error; + } + + info->ctx = mrn_context_pool->pull(); + info->script_mode = script_mode; + GRN_TEXT_INIT(&(info->target_characters), 0); + GRN_TEXT_INIT(&(info->escaped_value), 0); + + init->ptr = reinterpret_cast(info); + + return FALSE; + +error: + if (info) { + mrn_context_pool->release(info->ctx); + my_free(info); + } + return TRUE; +} + +static void escape(EscapeInfo *info, UDF_ARGS *args) +{ + grn_ctx *ctx = info->ctx; + + GRN_BULK_REWIND(&(info->escaped_value)); + if (info->script_mode) { + switch (args->arg_type[0]) { + case STRING_RESULT: + { + char *value = args->args[0]; + unsigned long value_length = args->lengths[0]; + GRN_TEXT_PUTC(ctx, &(info->escaped_value), '"'); + if (args->arg_count == 2) { + grn_obj special_characters; + GRN_TEXT_INIT(&special_characters, 0); + GRN_TEXT_PUT(ctx, + &special_characters, + args->args[1], + args->lengths[1]); + GRN_TEXT_PUTC(ctx, &special_characters, '\0'); + grn_expr_syntax_escape(ctx, + value, + value_length, + GRN_TEXT_VALUE(&special_characters), + '\\', + &(info->escaped_value)); + GRN_OBJ_FIN(ctx, &special_characters); + } else { + const char *special_characters = "\"\\"; + grn_expr_syntax_escape(ctx, + value, + value_length, + special_characters, + '\\', + &(info->escaped_value)); + } + GRN_TEXT_PUTC(ctx, &(info->escaped_value), '"'); + } + break; + case REAL_RESULT: + { + double value = *reinterpret_cast(args->args[0]); + grn_text_ftoa(ctx, &(info->escaped_value), value); + } + break; + case INT_RESULT: + { + longlong value = *reinterpret_cast(args->args[0]); + grn_text_lltoa(ctx, &(info->escaped_value), value); + } + break; + case DECIMAL_RESULT: + { + grn_obj value_raw; + GRN_TEXT_INIT(&value_raw, GRN_OBJ_DO_SHALLOW_COPY); + GRN_TEXT_SET(ctx, &value_raw, args->args[0], args->lengths[0]); + grn_obj value; + GRN_FLOAT_INIT(&value, 0); + if (grn_obj_cast(ctx, &value_raw, &value, GRN_FALSE) == GRN_SUCCESS) { + grn_text_ftoa(ctx, &(info->escaped_value), GRN_FLOAT_VALUE(&value)); + } else { + GRN_TEXT_PUT(ctx, + &(info->escaped_value), + args->args[0], + args->lengths[0]); + } + GRN_OBJ_FIN(ctx, &value); + GRN_OBJ_FIN(ctx, &value_raw); + } + break; + default: + break; + } + } else { + char *query = args->args[0]; + unsigned long query_length = args->lengths[0]; + if (args->arg_count == 2) { + char *target_characters = args->args[1]; + unsigned long target_characters_length = args->lengths[1]; + GRN_TEXT_PUT(ctx, &(info->target_characters), + target_characters, + target_characters_length); + GRN_TEXT_PUTC(ctx, &(info->target_characters), '\0'); + grn_expr_syntax_escape(ctx, query, query_length, + GRN_TEXT_VALUE(&(info->target_characters)), + GRN_QUERY_ESCAPE, + &(info->escaped_value)); + } else { + grn_expr_syntax_escape_query(ctx, query, query_length, + &(info->escaped_value)); + } + } +} + +MRN_API char *mroonga_escape(UDF_INIT *init, UDF_ARGS *args, char *result, + unsigned long *length, char *is_null, char *error) +{ + EscapeInfo *info = reinterpret_cast(init->ptr); + grn_ctx *ctx = info->ctx; + + if (!args->args[0]) { + *is_null = 1; + return NULL; + } + + *is_null = 0; + + escape(info, args); + + if (ctx->rc) { + my_message(ER_ERROR_ON_WRITE, ctx->errbuf, MYF(0)); + goto error; + } + + *length = GRN_TEXT_LEN(&(info->escaped_value)); + return GRN_TEXT_VALUE(&(info->escaped_value)); + +error: + *error = 1; + return NULL; +} + +MRN_API void mroonga_escape_deinit(UDF_INIT *init) +{ + EscapeInfo *info = reinterpret_cast(init->ptr); + if (info) { + grn_obj_unlink(info->ctx, &(info->target_characters)); + grn_obj_unlink(info->ctx, &(info->escaped_value)); + mrn_context_pool->release(info->ctx); + my_free(info); + } +} + +MRN_END_DECLS diff --git a/storage/mroonga/udf/mrn_udf_highlight_html.cpp b/storage/mroonga/udf/mrn_udf_highlight_html.cpp new file mode 100644 index 00000000..12f54a7d --- /dev/null +++ b/storage/mroonga/udf/mrn_udf_highlight_html.cpp @@ -0,0 +1,494 @@ +/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* + Copyright(C) 2017 Kouhei Sutou + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MRN_BEGIN_DECLS + +extern mrn::DatabaseManager *mrn_db_manager; +extern mrn::ContextPool *mrn_context_pool; + +typedef struct st_mrn_highlight_html_info +{ + grn_ctx *ctx; + grn_obj *db; + bool use_shared_db; + grn_obj *keywords; + String result_str; + struct { + bool used; + grn_obj *table; + grn_obj *default_column; + } query_mode; +} mrn_highlight_html_info; + +static my_bool mrn_highlight_html_prepare(mrn_highlight_html_info *info, + UDF_ARGS *args, + char *message, + grn_obj **keywords) +{ + MRN_DBUG_ENTER_FUNCTION(); + + grn_ctx *ctx = info->ctx; + const char *normalizer_name = "NormalizerAuto"; + grn_obj *expr = NULL; + String *result_str = &(info->result_str); + + *keywords = NULL; + + mrn::encoding::set_raw(ctx, system_charset_info); + if (system_charset_info->state & (MY_CS_BINSORT | MY_CS_CSSORT)) { + normalizer_name = NULL; + } + + *keywords = grn_table_create(ctx, NULL, 0, NULL, + GRN_OBJ_TABLE_PAT_KEY, + grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), + NULL); + if (ctx->rc != GRN_SUCCESS) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): " + "failed to create grn_pat for keywords: <%s>", + ctx->errbuf); + } + goto error; + } + if (normalizer_name) { + grn_obj_set_info(ctx, + *keywords, + GRN_INFO_NORMALIZER, + grn_ctx_get(ctx, normalizer_name, -1)); + } + + if (info->query_mode.used) { + if (!info->query_mode.table) { + grn_obj *short_text; + short_text = grn_ctx_at(info->ctx, GRN_DB_SHORT_TEXT); + info->query_mode.table = grn_table_create(info->ctx, + NULL, 0, NULL, + GRN_TABLE_HASH_KEY, + short_text, + NULL); + } + if (!info->query_mode.default_column) { + info->query_mode.default_column = + grn_obj_column(info->ctx, + info->query_mode.table, + GRN_COLUMN_NAME_KEY, + GRN_COLUMN_NAME_KEY_LEN); + } + + grn_obj *record = NULL; + GRN_EXPR_CREATE_FOR_QUERY(info->ctx, info->query_mode.table, expr, record); + if (!expr) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): " + "failed to create expression: <%s>", + ctx->errbuf); + } + goto error; + } + + mrn::QueryParser query_parser(info->ctx, + current_thd, + expr, + info->query_mode.default_column, + 0, + NULL); + grn_rc rc = query_parser.parse(args->args[1], args->lengths[1]); + if (rc != GRN_SUCCESS) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): " + "failed to parse query: <%s>", + ctx->errbuf); + } + goto error; + } + + { + grn_obj extracted_keywords; + GRN_PTR_INIT(&extracted_keywords, GRN_OBJ_VECTOR, GRN_ID_NIL); + grn_expr_get_keywords(ctx, expr, &extracted_keywords); + + size_t n_keywords = + GRN_BULK_VSIZE(&extracted_keywords) / sizeof(grn_obj *); + for (size_t i = 0; i < n_keywords; ++i) { + grn_obj *extracted_keyword = GRN_PTR_VALUE_AT(&extracted_keywords, i); + grn_table_add(ctx, + *keywords, + GRN_TEXT_VALUE(extracted_keyword), + GRN_TEXT_LEN(extracted_keyword), + NULL); + if (ctx->rc != GRN_SUCCESS) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): " + "failed to add a keyword: <%.*s>: <%s>", + static_cast(GRN_TEXT_LEN(extracted_keyword)), + GRN_TEXT_VALUE(extracted_keyword), + ctx->errbuf); + GRN_OBJ_FIN(ctx, &extracted_keywords); + } + goto error; + } + } + GRN_OBJ_FIN(ctx, &extracted_keywords); + } + } else { + for (unsigned int i = 1; i < args->arg_count; ++i) { + if (!args->args[i]) { + continue; + } + grn_table_add(ctx, + *keywords, + args->args[i], + args->lengths[i], + NULL); + if (ctx->rc != GRN_SUCCESS) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): " + "failed to add a keyword: <%.*s>: <%s>", + static_cast(args->lengths[i]), + args->args[i], + ctx->errbuf); + } + goto error; + } + } + } + + result_str->set_charset(system_charset_info); + DBUG_RETURN(FALSE); + +error: + if (expr) { + grn_obj_close(ctx, expr); + } + if (*keywords) { + grn_obj_close(ctx, *keywords); + } + DBUG_RETURN(TRUE); +} + +MRN_API my_bool mroonga_highlight_html_init(UDF_INIT *init, + UDF_ARGS *args, + char *message) +{ + MRN_DBUG_ENTER_FUNCTION(); + + mrn_highlight_html_info *info = NULL; + + init->ptr = NULL; + + if (args->arg_count < 1) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): wrong number of arguments: %u for 1+", + args->arg_count); + goto error; + } + + + for (unsigned int i = 0; i < args->arg_count; ++i) { + switch (args->arg_type[i]) { + case STRING_RESULT: + /* OK */ + break; + case REAL_RESULT: + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): all arguments must be string: " + "<%u>=<%g>", + i, *((double *)(args->args[i]))); + goto error; + break; + case INT_RESULT: + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): all arguments must be string: " + "<%u>=<%lld>", + i, *((longlong *)(args->args[i]))); + goto error; + break; + default: + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): all arguments must be string: <%u>", + i); + goto error; + break; + } + } + + init->maybe_null = 0; + + info = + reinterpret_cast( + mrn_my_malloc(sizeof(mrn_highlight_html_info), + MYF(MY_WME | MY_ZEROFILL))); + if (!info) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_highlight_html(): failed to allocate memory"); + goto error; + } + + info->ctx = mrn_context_pool->pull(); + { + const char *current_db_path = MRN_THD_DB_PATH(current_thd); + const char *action; + if (current_db_path) { + action = "open database"; + mrn::Database *db; + int error = mrn_db_manager->open(current_db_path, &db); + if (error == 0) { + info->db = db->get(); + grn_ctx_use(info->ctx, info->db); + info->use_shared_db = true; + } + } else { + action = "create anonymous database"; + info->db = grn_db_create(info->ctx, NULL, NULL); + info->use_shared_db = false; + } + if (!info->db) { + sprintf(message, + "mroonga_highlight_html(): failed to %s: %s", + action, + info->ctx->errbuf); + goto error; + } + } + + info->query_mode.used = FALSE; + + if (args->arg_count == 2 && + args->attribute_lengths[1] == strlen("query") && + strncmp(args->attributes[1], "query", strlen("query")) == 0) { + info->query_mode.used = TRUE; + info->query_mode.table = NULL; + info->query_mode.default_column = NULL; + } + + { + bool all_keywords_are_constant = TRUE; + for (unsigned int i = 1; i < args->arg_count; ++i) { + if (!args->args[i]) { + all_keywords_are_constant = FALSE; + break; + } + } + + if (all_keywords_are_constant) { + if (mrn_highlight_html_prepare(info, args, message, &(info->keywords))) { + goto error; + } + } else { + info->keywords = NULL; + } + } + + init->ptr = (char *)info; + + DBUG_RETURN(FALSE); + +error: + if (info) { + if (!info->use_shared_db) { + grn_obj_close(info->ctx, info->db); + } + mrn_context_pool->release(info->ctx); + my_free(info); + } + DBUG_RETURN(TRUE); +} + +static bool highlight_html(grn_ctx *ctx, + grn_pat *keywords, + const char *target, + size_t target_length, + String *output) +{ + MRN_DBUG_ENTER_FUNCTION(); + + grn_obj buffer; + + GRN_TEXT_INIT(&buffer, 0); + + { + const char *open_tag = ""; + size_t open_tag_length = strlen(open_tag); + const char *close_tag = ""; + size_t close_tag_length = strlen(close_tag); + + while (target_length > 0) { +#define MAX_N_HITS 16 + grn_pat_scan_hit hits[MAX_N_HITS]; + const char *rest; + size_t previous = 0; + size_t chunk_length; + + int n_hits = grn_pat_scan(ctx, + keywords, + target, + target_length, + hits, MAX_N_HITS, &rest); + for (int i = 0; i < n_hits; i++) { + if ((hits[i].offset - previous) > 0) { + grn_text_escape_xml(ctx, + &buffer, + target + previous, + hits[i].offset - previous); + } + GRN_TEXT_PUT(ctx, &buffer, open_tag, open_tag_length); + grn_text_escape_xml(ctx, + &buffer, + target + hits[i].offset, + hits[i].length); + GRN_TEXT_PUT(ctx, &buffer, close_tag, close_tag_length); + previous = hits[i].offset + hits[i].length; + } + + chunk_length = rest - target; + if ((chunk_length - previous) > 0) { + grn_text_escape_xml(ctx, + &buffer, + target + previous, + target_length - previous); + } + target_length -= chunk_length; + target = rest; +#undef MAX_N_HITS + } + } + + if (output->reserve(GRN_TEXT_LEN(&buffer))) { + my_error(ER_OUT_OF_RESOURCES, MYF(0), HA_ERR_OUT_OF_MEM); + GRN_OBJ_FIN(ctx, &buffer); + DBUG_RETURN(false); + } + + output->q_append(GRN_TEXT_VALUE(&buffer), GRN_TEXT_LEN(&buffer)); + GRN_OBJ_FIN(ctx, &buffer); + DBUG_RETURN(true); +} + +MRN_API char *mroonga_highlight_html(UDF_INIT *init, + UDF_ARGS *args, + char *result, + unsigned long *length, + char *is_null, + char *error) +{ + MRN_DBUG_ENTER_FUNCTION(); + + mrn_highlight_html_info *info = + reinterpret_cast(init->ptr); + + grn_ctx *ctx = info->ctx; + grn_obj *keywords = info->keywords; + String *result_str = &(info->result_str); + + if (!args->args[0]) { + *is_null = 1; + DBUG_RETURN(NULL); + } + + if (!keywords) { + if (mrn_highlight_html_prepare(info, args, NULL, &keywords)) { + goto error; + } + } + + *is_null = 0; + result_str->length(0); + + if (!highlight_html(ctx, + reinterpret_cast(keywords), + args->args[0], + args->lengths[0], + result_str)) { + goto error; + } + + if (!info->keywords) { + grn_rc rc = grn_obj_close(ctx, keywords); + if (rc != GRN_SUCCESS) { + my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, + ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); + goto error; + } + } + + *length = result_str->length(); + DBUG_RETURN((char *)result_str->ptr()); + +error: + if (!info->keywords && keywords) { + grn_obj_close(ctx, keywords); + } + + *is_null = 1; + *error = 1; + + DBUG_RETURN(NULL); +} + +MRN_API void mroonga_highlight_html_deinit(UDF_INIT *init) +{ + MRN_DBUG_ENTER_FUNCTION(); + + mrn_highlight_html_info *info = + reinterpret_cast(init->ptr); + if (!info) { + DBUG_VOID_RETURN; + } + + if (info->keywords) { + grn_obj_close(info->ctx, info->keywords); + } + if (info->query_mode.used) { + if (info->query_mode.default_column) { + grn_obj_close(info->ctx, info->query_mode.default_column); + } + if (info->query_mode.table) { + grn_obj_close(info->ctx, info->query_mode.table); + } + } + MRN_STRING_FREE(info->result_str); + if (!info->use_shared_db) { + grn_obj_close(info->ctx, info->db); + } + mrn_context_pool->release(info->ctx); + my_free(info); + + DBUG_VOID_RETURN; +} + +MRN_END_DECLS diff --git a/storage/mroonga/udf/mrn_udf_last_insert_grn_id.cpp b/storage/mroonga/udf/mrn_udf_last_insert_grn_id.cpp new file mode 100644 index 00000000..46176bc8 --- /dev/null +++ b/storage/mroonga/udf/mrn_udf_last_insert_grn_id.cpp @@ -0,0 +1,55 @@ +/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* + Copyright(C) 2010 Tetsuro IKEDA + Copyright(C) 2010-2013 Kentoku SHIBA + Copyright(C) 2011-2017 Kouhei Sutou + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include +#include +#include +#include +#include + +MRN_BEGIN_DECLS + +MRN_API my_bool last_insert_grn_id_init(UDF_INIT *init, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 0) { + strcpy(message, "last_insert_grn_id must not have arguments"); + return 1; + } + init->maybe_null = 0; + return 0; +} + +MRN_API longlong last_insert_grn_id(UDF_INIT *init, UDF_ARGS *args, char *is_null, char *error) +{ + THD *thd = current_thd; + st_mrn_slot_data *slot_data = mrn_get_slot_data(thd, false); + if (slot_data == NULL) { + return 0; + } + longlong last_insert_record_id = slot_data->last_insert_record_id; + return last_insert_record_id; +} + +MRN_API void last_insert_grn_id_deinit(UDF_INIT *init) +{ +} + +MRN_END_DECLS diff --git a/storage/mroonga/udf/mrn_udf_normalize.cpp b/storage/mroonga/udf/mrn_udf_normalize.cpp new file mode 100644 index 00000000..30362351 --- /dev/null +++ b/storage/mroonga/udf/mrn_udf_normalize.cpp @@ -0,0 +1,212 @@ +/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* + Copyright(C) 2015 Naoya Murakami + Copyright(C) 2017 Kouhei Sutou + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MRN_BEGIN_DECLS + +extern mrn::DatabaseManager *mrn_db_manager; +extern mrn::ContextPool *mrn_context_pool; + +#define DEFAULT_NORMALIZER_NAME "NormalizerAuto" + +struct st_mrn_normalize_info +{ + grn_ctx *ctx; + grn_obj *db; + bool use_shared_db; + grn_obj *normalizer; + int flags; + String result_str; +}; + +MRN_API my_bool mroonga_normalize_init(UDF_INIT *init, UDF_ARGS *args, + char *message) +{ + st_mrn_normalize_info *info = NULL; + String *result_str = NULL; + + init->ptr = NULL; + if (!(1 <= args->arg_count && args->arg_count <= 2)) { + sprintf(message, + "mroonga_normalize(): Incorrect number of arguments: %u for 1..2", + args->arg_count); + goto error; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message, + "mroonga_normalize(): The 1st argument must be query as string"); + goto error; + } + if (args->arg_count == 2) { + if (args->arg_type[1] != STRING_RESULT) { + strcpy(message, + "mroonga_normalize(): " + "The 2st argument must be normalizer name as string"); + goto error; + } + } + + init->maybe_null = 1; + + info = (st_mrn_normalize_info *)mrn_my_malloc(sizeof(st_mrn_normalize_info), + MYF(MY_WME | MY_ZEROFILL)); + if (!info) { + strcpy(message, "mroonga_normalize(): out of memory"); + goto error; + } + + info->ctx = mrn_context_pool->pull(); + { + const char *current_db_path = MRN_THD_DB_PATH(current_thd); + const char *action; + if (current_db_path) { + action = "open database"; + mrn::Database *db; + int error = mrn_db_manager->open(current_db_path, &db); + if (error == 0) { + info->db = db->get(); + grn_ctx_use(info->ctx, info->db); + info->use_shared_db = true; + } + } else { + action = "create anonymous database"; + info->db = grn_db_create(info->ctx, NULL, NULL); + info->use_shared_db = false; + } + if (!info->db) { + sprintf(message, + "mroonga_normalize(): failed to %s: %s", + action, + info->ctx->errbuf); + goto error; + } + } + + if (args->arg_count == 1) { + info->normalizer = grn_ctx_get(info->ctx, DEFAULT_NORMALIZER_NAME, -1); + } else { + info->normalizer = grn_ctx_get(info->ctx, args->args[1], args->lengths[1]); + } + if (!info->normalizer) { + sprintf(message, "mroonga_normalize(): nonexistent normalizer %.*s", + (int)args->lengths[1], args->args[1]); + goto error; + } + info->flags = 0; + + result_str = &(info->result_str); + mrn::encoding::set_raw(info->ctx, system_charset_info); + result_str->set_charset(system_charset_info); + + init->ptr = (char *)info; + + return FALSE; + +error: + if (info) { + if (!info->use_shared_db) { + grn_obj_close(info->ctx, info->db); + } + mrn_context_pool->release(info->ctx); + my_free(info); + } + return TRUE; +} + +MRN_API char *mroonga_normalize(UDF_INIT *init, UDF_ARGS *args, char *result, + unsigned long *length, char *is_null, char *error) +{ + st_mrn_normalize_info *info = (st_mrn_normalize_info *)init->ptr; + grn_ctx *ctx = info->ctx; + String *result_str = &(info->result_str); + + if (!args->args[0]) { + *is_null = 1; + return NULL; + } + + result_str->length(0); + { + char *target = args->args[0]; + unsigned int target_length = args->lengths[0]; + grn_obj *grn_string; + const char *normalized; + unsigned int normalized_length_in_bytes; + unsigned int normalized_n_characters; + + grn_string = grn_string_open(ctx, + target, target_length, + info->normalizer, info->flags); + grn_string_get_normalized(ctx, grn_string, + &normalized, + &normalized_length_in_bytes, + &normalized_n_characters); + if (result_str->reserve(normalized_length_in_bytes)) { + my_error(ER_OUT_OF_RESOURCES, MYF(0), HA_ERR_OUT_OF_MEM); + goto error; + } + result_str->q_append(normalized, normalized_length_in_bytes); + result_str->length(normalized_length_in_bytes); + grn_obj_unlink(ctx, grn_string); + } + *is_null = 0; + + if (ctx->rc) { + my_message(ER_ERROR_ON_WRITE, ctx->errbuf, MYF(0)); + goto error; + } + + *length = result_str->length(); + return (char *)result_str->ptr(); + +error: + *is_null = 1; + *error = 1; + return NULL; +} + +MRN_API void mroonga_normalize_deinit(UDF_INIT *init) +{ + st_mrn_normalize_info *info = (st_mrn_normalize_info *)init->ptr; + + if (info) { + MRN_STRING_FREE(info->result_str); + if (info->normalizer) { + grn_obj_unlink(info->ctx, info->normalizer); + } + if (!info->use_shared_db) { + grn_obj_close(info->ctx, info->db); + } + mrn_context_pool->release(info->ctx); + my_free(info); + } +} + +MRN_END_DECLS diff --git a/storage/mroonga/udf/mrn_udf_query_expand.cpp b/storage/mroonga/udf/mrn_udf_query_expand.cpp new file mode 100644 index 00000000..03bef321 --- /dev/null +++ b/storage/mroonga/udf/mrn_udf_query_expand.cpp @@ -0,0 +1,282 @@ +/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* + Copyright(C) 2017 Kouhei Sutou + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MRN_BEGIN_DECLS + +extern mrn::DatabaseManager *mrn_db_manager; +extern mrn::ContextPool *mrn_context_pool; + +namespace mrn { + struct QueryExpandInfo { + grn_ctx *ctx; + grn_obj expanded_query; + grn_obj *term_column; + grn_obj *expanded_term_column; + }; +} + +static void mrn_query_expand_info_free(mrn::QueryExpandInfo *info) +{ + MRN_DBUG_ENTER_FUNCTION(); + + if (!info) { + DBUG_VOID_RETURN; + } + + if (info->ctx) { + GRN_OBJ_FIN(info->ctx, &(info->expanded_query)); + if (grn_obj_is_accessor(info->ctx, info->expanded_term_column)) { + grn_obj_unlink(info->ctx, info->expanded_term_column); + } + if (grn_obj_is_accessor(info->ctx, info->term_column)) { + grn_obj_unlink(info->ctx, info->term_column); + } + mrn_context_pool->release(info->ctx); + } + my_free(info); + + DBUG_VOID_RETURN; +} + +MRN_API my_bool mroonga_query_expand_init(UDF_INIT *init, + UDF_ARGS *args, + char *message) +{ + mrn::QueryExpandInfo *info = NULL; + + MRN_DBUG_ENTER_FUNCTION(); + + init->ptr = NULL; + if (args->arg_count != 4) { + sprintf(message, + "mroonga_query_expand(): wrong number of arguments: %u for 4", + args->arg_count); + goto error; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message, + "mroonga_query_expand(): " + "the 1st argument must be table name as string"); + goto error; + } + if (args->arg_type[1] != STRING_RESULT) { + strcpy(message, + "mroonga_query_expand(): " + "the 2nd argument must be term column name as string"); + goto error; + } + if (args->arg_type[2] != STRING_RESULT) { + strcpy(message, + "mroonga_query_expand(): " + "the 3nd argument must be expanded term column name as string"); + goto error; + } + if (args->arg_type[3] != STRING_RESULT) { + strcpy(message, + "mroonga_query_expand(): " + "the 4th argument must be query as string"); + goto error; + } + + init->maybe_null = 1; + + info = static_cast( + mrn_my_malloc(sizeof(mrn::QueryExpandInfo), + MYF(MY_WME | MY_ZEROFILL))); + if (!info) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_query_expand(): failed to allocate memory"); + goto error; + } + + { + const char *current_db_path = MRN_THD_DB_PATH(current_thd); + if (!current_db_path) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_query_expand(): no current database"); + goto error; + } + + mrn::Database *db; + int error = mrn_db_manager->open(current_db_path, &db); + if (error != 0) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_query_expand(): failed to open database: %s", + mrn_db_manager->error_message()); + goto error; + } + info->ctx = mrn_context_pool->pull(); + grn_ctx_use(info->ctx, db->get()); + } + + GRN_TEXT_INIT(&(info->expanded_query), 0); + + { + const char *table_name = args->args[0]; + unsigned int table_name_length = args->lengths[0]; + grn_obj *table = grn_ctx_get(info->ctx, + table_name, + table_name_length); + if (!table) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_query_expand(): table doesn't exist: <%.*s>", + static_cast(table_name_length), + table_name); + goto error; + } + + const char *term_column_name = args->args[1]; + unsigned int term_column_name_length = args->lengths[1]; + info->term_column = grn_obj_column(info->ctx, + table, + term_column_name, + term_column_name_length); + if (!info->term_column) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_query_expand(): term column doesn't exist: <%.*s.%.*s>", + static_cast(table_name_length), + table_name, + static_cast(term_column_name_length), + term_column_name); + goto error; + } + + const char *expanded_term_column_name = args->args[2]; + unsigned int expanded_term_column_name_length = args->lengths[2]; + info->expanded_term_column = grn_obj_column(info->ctx, + table, + expanded_term_column_name, + expanded_term_column_name_length); + if (!info->expanded_term_column) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_query_expand(): " + "expanded term column doesn't exist: <%.*s.%.*s>", + static_cast(table_name_length), + table_name, + static_cast(expanded_term_column_name_length), + expanded_term_column_name); + goto error; + } + } + + init->ptr = reinterpret_cast(info); + + DBUG_RETURN(FALSE); + +error: + mrn_query_expand_info_free(info); + DBUG_RETURN(TRUE); +} + +static void query_expand(mrn::QueryExpandInfo *info, UDF_ARGS *args) +{ + grn_ctx *ctx = info->ctx; + const char *query = args->args[3]; + unsigned int query_length = args->lengths[3]; + + mrn::QueryParser query_parser(info->ctx, + current_thd, + NULL, + NULL, + 0, + NULL); + const char *raw_query; + size_t raw_query_length; + grn_operator default_operator; + grn_expr_flags flags; + query_parser.parse_pragma(query, + query_length, + &raw_query, + &raw_query_length, + &default_operator, + &flags); + GRN_TEXT_SET(info->ctx, + &(info->expanded_query), + query, + raw_query - query); + grn_expr_syntax_expand_query_by_table(ctx, + raw_query, + raw_query_length, + flags, + info->term_column, + info->expanded_term_column, + &(info->expanded_query)); +} + +MRN_API char *mroonga_query_expand(UDF_INIT *init, + UDF_ARGS *args, + char *result, + unsigned long *length, + char *is_null, + char *error) +{ + MRN_DBUG_ENTER_FUNCTION(); + + mrn::QueryExpandInfo *info = + reinterpret_cast(init->ptr); + grn_ctx *ctx = info->ctx; + + if (!args->args[3]) { + *is_null = 1; + DBUG_RETURN(NULL); + } + + *is_null = 0; + + query_expand(info, args); + + if (ctx->rc) { + char message[MYSQL_ERRMSG_SIZE]; + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_query_expand(): " + "failed to expand: %s", + ctx->errbuf); + my_message(ER_ERROR_ON_WRITE, message, MYF(0)); + goto error; + } + + *length = GRN_TEXT_LEN(&(info->expanded_query)); + DBUG_RETURN(GRN_TEXT_VALUE(&(info->expanded_query))); + +error: + *error = 1; + DBUG_RETURN(NULL); +} + +MRN_API void mroonga_query_expand_deinit(UDF_INIT *init) +{ + MRN_DBUG_ENTER_FUNCTION(); + mrn::QueryExpandInfo *info = + reinterpret_cast(init->ptr); + mrn_query_expand_info_free(info); + DBUG_VOID_RETURN; +} + +MRN_END_DECLS diff --git a/storage/mroonga/udf/mrn_udf_snippet.cpp b/storage/mroonga/udf/mrn_udf_snippet.cpp new file mode 100644 index 00000000..a4b37d03 --- /dev/null +++ b/storage/mroonga/udf/mrn_udf_snippet.cpp @@ -0,0 +1,338 @@ +/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* + Copyright(C) 2010 Tetsuro IKEDA + Copyright(C) 2010-2013 Kentoku SHIBA + Copyright(C) 2011-2017 Kouhei Sutou + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MRN_BEGIN_DECLS + +extern mrn::DatabaseManager *mrn_db_manager; +extern mrn::ContextPool *mrn_context_pool; + +struct st_mrn_snip_info +{ + grn_ctx *ctx; + grn_obj *db; + bool use_shared_db; + grn_obj *snippet; + String result_str; +}; + +static my_bool mrn_snippet_prepare(st_mrn_snip_info *snip_info, UDF_ARGS *args, + char *message, grn_obj **snippet) +{ + unsigned int i; + CHARSET_INFO *cs; + grn_ctx *ctx = snip_info->ctx; + long long snip_max_len; + long long snip_max_num; + long long skip_leading_spaces; + long long html_escape; + int flags = GRN_SNIP_COPY_TAG; + grn_snip_mapping *mapping = NULL; + grn_rc rc; + String *result_str = &snip_info->result_str; + + *snippet = NULL; + snip_max_len = *((long long *) args->args[1]); + snip_max_num = *((long long *) args->args[2]); + + if (args->arg_type[3] == STRING_RESULT) { + if (!(cs = get_charset_by_name(args->args[3], MYF(0)))) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "Unknown charset: <%s>", args->args[3]); + goto error; + } + } else { + uint charset_id = static_cast(*((long long *) args->args[3])); + if (!(cs = get_charset(charset_id, MYF(0)))) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "Unknown charset ID: <%u>", charset_id); + goto error; + } + } + if (!mrn::encoding::set_raw(ctx, cs)) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "Unsupported charset: <%s>", cs->name); + goto error; + } + + if (!(cs->state & (MY_CS_BINSORT | MY_CS_CSSORT))) { + flags |= GRN_SNIP_NORMALIZE; + } + + skip_leading_spaces = *((long long *) args->args[4]); + if (skip_leading_spaces) { + flags |= GRN_SNIP_SKIP_LEADING_SPACES; + } + + html_escape = *((long long *) args->args[5]); + if (html_escape) { + mapping = (grn_snip_mapping *) -1; + } + + *snippet = grn_snip_open(ctx, flags, static_cast(snip_max_len), + static_cast(snip_max_num), + "", 0, "", 0, mapping); + if (ctx->rc) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "Failed to open grn_snip: <%s>", ctx->errbuf); + goto error; + } + + for (i = 8; i < args->arg_count; i += 3) { + rc = grn_snip_add_cond(ctx, *snippet, + args->args[i], args->lengths[i], + args->args[i + 1], args->lengths[i + 1], + args->args[i + 2], args->lengths[i + 2]); + if (rc) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "Failed to add a condition to grn_snip: <%s>", ctx->errbuf); + goto error; + } + } + + result_str->set_charset(cs); + return FALSE; + +error: + if (*snippet) { + grn_obj_close(ctx, *snippet); + } + return TRUE; +} + +MRN_API my_bool mroonga_snippet_init(UDF_INIT *init, UDF_ARGS *args, char *message) +{ + uint i; + st_mrn_snip_info *snip_info = NULL; + bool can_open_snippet = TRUE; + init->ptr = NULL; + if (args->arg_count < 11 || (args->arg_count - 11) % 3) + { + sprintf(message, "Incorrect number of arguments for mroonga_snippet(): %u", + args->arg_count); + goto error; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message, "mroonga_snippet() requires string for 1st argument"); + goto error; + } + if (args->arg_type[1] != INT_RESULT) { + strcpy(message, "mroonga_snippet() requires int for 2nd argument"); + goto error; + } + if (args->arg_type[2] != INT_RESULT) { + strcpy(message, "mroonga_snippet() requires int for 3rd argument"); + goto error; + } + if ( + args->arg_type[3] != STRING_RESULT && + args->arg_type[3] != INT_RESULT + ) { + strcpy(message, + "mroonga_snippet() requires string or int for 4th argument"); + goto error; + } + if (args->arg_type[4] != INT_RESULT) { + strcpy(message, "mroonga_snippet() requires int for 5th argument"); + goto error; + } + if (args->arg_type[5] != INT_RESULT) { + strcpy(message, "mroonga_snippet() requires int for 6th argument"); + goto error; + } + for (i = 6; i < args->arg_count; i++) { + if (args->arg_type[i] != STRING_RESULT) { + sprintf(message, "mroonga_snippet() requires string for %uth argument", + i); + goto error; + } + } + init->maybe_null = 1; + + if (!(snip_info = (st_mrn_snip_info *) mrn_my_malloc(sizeof(st_mrn_snip_info), + MYF(MY_WME | MY_ZEROFILL)))) + { + strcpy(message, "mroonga_snippet() out of memory"); + goto error; + } + snip_info->ctx = mrn_context_pool->pull(); + { + const char *current_db_path = MRN_THD_DB_PATH(current_thd); + const char *action; + if (current_db_path) { + action = "open database"; + mrn::Database *db; + int error = mrn_db_manager->open(current_db_path, &db); + if (error == 0) { + snip_info->db = db->get(); + grn_ctx_use(snip_info->ctx, snip_info->db); + snip_info->use_shared_db = true; + } + } else { + action = "create anonymous database"; + snip_info->db = grn_db_create(snip_info->ctx, NULL, NULL); + snip_info->use_shared_db = false; + } + if (!snip_info->db) { + sprintf(message, + "mroonga_snippet(): failed to %s: %s", + action, + snip_info->ctx->errbuf); + goto error; + } + } + + for (i = 1; i < args->arg_count; i++) { + if (!args->args[i]) { + can_open_snippet = FALSE; + break; + } + } + if (can_open_snippet) { + if (mrn_snippet_prepare(snip_info, args, message, &snip_info->snippet)) { + goto error; + } + } + init->ptr = (char *) snip_info; + + return FALSE; + +error: + if (snip_info) { + if (!snip_info->use_shared_db) { + grn_obj_close(snip_info->ctx, snip_info->db); + } + mrn_context_pool->release(snip_info->ctx); + my_free(snip_info); + } + return TRUE; +} + +MRN_API char *mroonga_snippet(UDF_INIT *init, UDF_ARGS *args, char *result, + unsigned long *length, char *is_null, char *error) +{ + st_mrn_snip_info *snip_info = (st_mrn_snip_info *) init->ptr; + grn_ctx *ctx = snip_info->ctx; + String *result_str = &snip_info->result_str; + char *target; + unsigned int target_length; + grn_obj *snippet = NULL; + grn_rc rc; + unsigned int i, n_results, max_tagged_length, result_length; + + if (!args->args[0]) { + *is_null = 1; + return NULL; + } + *is_null = 0; + target = args->args[0]; + target_length = args->lengths[0]; + + if (!snip_info->snippet) { + for (i = 1; i < args->arg_count; i++) { + if (!args->args[i]) { + my_printf_error(ER_MRN_INVALID_NULL_VALUE_NUM, + ER_MRN_INVALID_NULL_VALUE_STR, MYF(0), + "mroonga_snippet() arguments"); + goto error; + } + } + + if (mrn_snippet_prepare(snip_info, args, NULL, &snippet)) { + goto error; + } + } else { + snippet = snip_info->snippet; + } + + rc = grn_snip_exec(ctx, snippet, target, target_length, + &n_results, &max_tagged_length); + if (rc) { + my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, + ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); + goto error; + } + + result_str->length(0); + if (result_str->reserve((args->lengths[6] + args->lengths[7] + + max_tagged_length) * n_results)) { + my_error(ER_OUT_OF_RESOURCES, MYF(0), HA_ERR_OUT_OF_MEM); + goto error; + } + for (i = 0; i < n_results; i++) { + result_str->q_append(args->args[6], args->lengths[6]); + rc = grn_snip_get_result(ctx, snippet, i, + (char *) result_str->ptr() + result_str->length(), + &result_length); + if (rc) { + my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, + ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); + goto error; + } + result_str->length(result_str->length() + result_length); + result_str->q_append(args->args[7], args->lengths[7]); + } + + if (!snip_info->snippet) { + rc = grn_obj_close(ctx, snippet); + if (rc) { + my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, + ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); + goto error; + } + } + + *length = result_str->length(); + return (char *) result_str->ptr(); + +error: + *error = 1; + return NULL; +} + +MRN_API void mroonga_snippet_deinit(UDF_INIT *init) +{ + st_mrn_snip_info *snip_info = (st_mrn_snip_info *) init->ptr; + if (snip_info) { + if (snip_info->snippet) { + grn_obj_close(snip_info->ctx, snip_info->snippet); + } + MRN_STRING_FREE(snip_info->result_str); + if (!snip_info->use_shared_db) { + grn_obj_close(snip_info->ctx, snip_info->db); + } + mrn_context_pool->release(snip_info->ctx); + my_free(snip_info); + } +} + +MRN_END_DECLS diff --git a/storage/mroonga/udf/mrn_udf_snippet_html.cpp b/storage/mroonga/udf/mrn_udf_snippet_html.cpp new file mode 100644 index 00000000..311c91bf --- /dev/null +++ b/storage/mroonga/udf/mrn_udf_snippet_html.cpp @@ -0,0 +1,444 @@ +/* -*- c-basic-offset: 2; indent-tabs-mode: nil -*- */ +/* + Copyright(C) 2015-2017 Kouhei Sutou + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MRN_BEGIN_DECLS + +extern mrn::DatabaseManager *mrn_db_manager; +extern mrn::ContextPool *mrn_context_pool; + +typedef struct st_mrn_snippet_html_info +{ + grn_ctx *ctx; + grn_obj *db; + bool use_shared_db; + grn_obj *snippet; + String result_str; + struct { + bool used; + grn_obj *table; + grn_obj *default_column; + } query_mode; +} mrn_snippet_html_info; + +static my_bool mrn_snippet_html_prepare(mrn_snippet_html_info *info, + UDF_ARGS *args, + char *message, + grn_obj **snippet) +{ + MRN_DBUG_ENTER_FUNCTION(); + + grn_ctx *ctx = info->ctx; + int flags = GRN_SNIP_SKIP_LEADING_SPACES; + unsigned int width = 200; + unsigned int max_n_results = 3; + const char *open_tag = ""; + const char *close_tag = ""; + grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE; + grn_obj *expr = NULL; + String *result_str = &(info->result_str); + + *snippet = NULL; + + mrn::encoding::set_raw(ctx, system_charset_info); + if (!(system_charset_info->state & (MY_CS_BINSORT | MY_CS_CSSORT))) { + flags |= GRN_SNIP_NORMALIZE; + } + + *snippet = grn_snip_open(ctx, flags, + width, max_n_results, + open_tag, strlen(open_tag), + close_tag, strlen(close_tag), + mapping); + if (ctx->rc != GRN_SUCCESS) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): failed to open grn_snip: <%s>", + ctx->errbuf); + } + goto error; + } + + if (info->query_mode.used) { + if (!info->query_mode.table) { + grn_obj *short_text; + short_text = grn_ctx_at(info->ctx, GRN_DB_SHORT_TEXT); + info->query_mode.table = grn_table_create(info->ctx, + NULL, 0, NULL, + GRN_TABLE_HASH_KEY, + short_text, + NULL); + } + if (!info->query_mode.default_column) { + info->query_mode.default_column = + grn_obj_column(info->ctx, + info->query_mode.table, + GRN_COLUMN_NAME_KEY, + GRN_COLUMN_NAME_KEY_LEN); + } + + grn_obj *record = NULL; + GRN_EXPR_CREATE_FOR_QUERY(info->ctx, info->query_mode.table, expr, record); + if (!expr) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): " + "failed to create expression: <%s>", + ctx->errbuf); + } + goto error; + } + + mrn::QueryParser query_parser(info->ctx, + current_thd, + expr, + info->query_mode.default_column, + 0, + NULL); + grn_rc rc = query_parser.parse(args->args[1], args->lengths[1]); + if (rc != GRN_SUCCESS) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): " + "failed to parse query: <%s>", + ctx->errbuf); + } + goto error; + } + + rc = grn_expr_snip_add_conditions(info->ctx, + expr, + *snippet, + 0, + NULL, NULL, + NULL, NULL); + if (rc != GRN_SUCCESS) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): " + "failed to add conditions: <%s>", + ctx->errbuf); + } + goto error; + } + } else { + unsigned int i; + for (i = 1; i < args->arg_count; ++i) { + if (!args->args[i]) { + continue; + } + grn_rc rc = grn_snip_add_cond(ctx, *snippet, + args->args[i], args->lengths[i], + NULL, 0, + NULL, 0); + if (rc != GRN_SUCCESS) { + if (message) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): " + "failed to add a condition to grn_snip: <%s>", + ctx->errbuf); + } + goto error; + } + } + } + + result_str->set_charset(system_charset_info); + DBUG_RETURN(FALSE); + +error: + if (expr) { + grn_obj_close(ctx, expr); + } + if (*snippet) { + grn_obj_close(ctx, *snippet); + } + DBUG_RETURN(TRUE); +} + +MRN_API my_bool mroonga_snippet_html_init(UDF_INIT *init, + UDF_ARGS *args, + char *message) +{ + MRN_DBUG_ENTER_FUNCTION(); + + mrn_snippet_html_info *info = NULL; + + init->ptr = NULL; + + if (args->arg_count < 1) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): wrong number of arguments: %u for 1+", + args->arg_count); + goto error; + } + + + for (unsigned int i = 0; i < args->arg_count; ++i) { + switch (args->arg_type[i]) { + case STRING_RESULT: + /* OK */ + break; + case REAL_RESULT: + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): all arguments must be string: " + "<%u>=<%g>", + i, *((double *)(args->args[i]))); + goto error; + break; + case INT_RESULT: + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): all arguments must be string: " + "<%u>=<%lld>", + i, *((longlong *)(args->args[i]))); + goto error; + break; + default: + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): all arguments must be string: <%u>", + i); + goto error; + break; + } + } + + init->maybe_null = 1; + + info = (mrn_snippet_html_info *)mrn_my_malloc(sizeof(mrn_snippet_html_info), + MYF(MY_WME | MY_ZEROFILL)); + if (!info) { + snprintf(message, MYSQL_ERRMSG_SIZE, + "mroonga_snippet_html(): failed to allocate memory"); + goto error; + } + + info->ctx = mrn_context_pool->pull(); + { + const char *current_db_path = MRN_THD_DB_PATH(current_thd); + const char *action; + if (current_db_path) { + action = "open database"; + mrn::Database *db; + int error = mrn_db_manager->open(current_db_path, &db); + if (error == 0) { + info->db = db->get(); + grn_ctx_use(info->ctx, info->db); + info->use_shared_db = true; + } + } else { + action = "create anonymous database"; + info->db = grn_db_create(info->ctx, NULL, NULL); + info->use_shared_db = false; + } + if (!info->db) { + sprintf(message, + "mroonga_snippet_html(): failed to %s: %s", + action, + info->ctx->errbuf); + goto error; + } + } + + info->query_mode.used = FALSE; + + if (args->arg_count == 2 && + args->attribute_lengths[1] == strlen("query") && + strncmp(args->attributes[1], "query", strlen("query")) == 0) { + info->query_mode.used = TRUE; + info->query_mode.table = NULL; + info->query_mode.default_column = NULL; + } + + { + bool all_keywords_are_constant = TRUE; + for (unsigned int i = 1; i < args->arg_count; ++i) { + if (!args->args[i]) { + all_keywords_are_constant = FALSE; + break; + } + } + + if (all_keywords_are_constant) { + if (mrn_snippet_html_prepare(info, args, message, &(info->snippet))) { + goto error; + } + } else { + info->snippet = NULL; + } + } + + init->ptr = (char *)info; + + DBUG_RETURN(FALSE); + +error: + if (info) { + if (!info->use_shared_db) { + grn_obj_close(info->ctx, info->db); + } + mrn_context_pool->release(info->ctx); + my_free(info); + } + DBUG_RETURN(TRUE); +} + +MRN_API char *mroonga_snippet_html(UDF_INIT *init, + UDF_ARGS *args, + char *result, + unsigned long *length, + char *is_null, + char *error) +{ + MRN_DBUG_ENTER_FUNCTION(); + + mrn_snippet_html_info *info = + reinterpret_cast(init->ptr); + + grn_ctx *ctx = info->ctx; + grn_obj *snippet = info->snippet; + String *result_str = &(info->result_str); + + if (!args->args[0]) { + *is_null = 1; + DBUG_RETURN(NULL); + } + + if (!snippet) { + if (mrn_snippet_html_prepare(info, args, NULL, &snippet)) { + goto error; + } + } + + { + char *target = args->args[0]; + unsigned int target_length = args->lengths[0]; + + unsigned int n_results, max_tagged_length; + { + grn_rc rc = grn_snip_exec(ctx, snippet, target, target_length, + &n_results, &max_tagged_length); + if (rc != GRN_SUCCESS) { + my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, + ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); + goto error; + } + } + + *is_null = 0; + result_str->length(0); + + { + const char *start_tag = "
"; + const char *end_tag = "
"; + size_t start_tag_length = strlen(start_tag); + size_t end_tag_length = strlen(end_tag); + unsigned int max_length_per_snippet = + start_tag_length + end_tag_length + max_tagged_length; + if (result_str->reserve(max_length_per_snippet * n_results)) { + my_error(ER_OUT_OF_RESOURCES, MYF(0), HA_ERR_OUT_OF_MEM); + goto error; + } + + for (unsigned int i = 0; i < n_results; ++i) { + result_str->q_append(start_tag, start_tag_length); + + unsigned int result_length; + grn_rc rc = + grn_snip_get_result(ctx, snippet, i, + (char *)result_str->ptr() + result_str->length(), + &result_length); + if (rc) { + my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, + ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); + goto error; + } + result_str->length(result_str->length() + result_length); + + result_str->q_append(end_tag, end_tag_length); + } + } + + if (!info->snippet) { + grn_rc rc = grn_obj_close(ctx, snippet); + if (rc != GRN_SUCCESS) { + my_printf_error(ER_MRN_ERROR_FROM_GROONGA_NUM, + ER_MRN_ERROR_FROM_GROONGA_STR, MYF(0), ctx->errbuf); + goto error; + } + } + } + + *length = result_str->length(); + DBUG_RETURN((char *)result_str->ptr()); + +error: + if (!info->snippet && snippet) { + grn_obj_close(ctx, snippet); + } + + *is_null = 1; + *error = 1; + + DBUG_RETURN(NULL); +} + +MRN_API void mroonga_snippet_html_deinit(UDF_INIT *init) +{ + MRN_DBUG_ENTER_FUNCTION(); + + mrn_snippet_html_info *info = + reinterpret_cast(init->ptr); + if (!info) { + DBUG_VOID_RETURN; + } + + if (info->snippet) { + grn_obj_close(info->ctx, info->snippet); + } + if (info->query_mode.used) { + if (info->query_mode.default_column) { + grn_obj_close(info->ctx, info->query_mode.default_column); + } + if (info->query_mode.table) { + grn_obj_close(info->ctx, info->query_mode.table); + } + } + MRN_STRING_FREE(info->result_str); + if (!info->use_shared_db) { + grn_obj_close(info->ctx, info->db); + } + mrn_context_pool->release(info->ctx); + my_free(info); + + DBUG_VOID_RETURN; +} + +MRN_END_DECLS diff --git a/storage/mroonga/udf/sources.am b/storage/mroonga/udf/sources.am new file mode 100644 index 00000000..ac2f098e --- /dev/null +++ b/storage/mroonga/udf/sources.am @@ -0,0 +1,9 @@ +libmrn_udf_la_SOURCES = \ + mrn_udf_last_insert_grn_id.cpp \ + mrn_udf_snippet.cpp \ + mrn_udf_snippet_html.cpp \ + mrn_udf_command.cpp \ + mrn_udf_escape.cpp \ + mrn_udf_normalize.cpp \ + mrn_udf_highlight_html.cpp \ + mrn_udf_query_expand.cpp -- cgit v1.2.3