/* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2017 Kouhei Sutou This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ #include "mrn_query_parser.hpp" #include extern "C" { /* Groonga's internal functions */ int grn_atoi(const char *nptr, const char *end, const char **rest); uint grn_atoui(const char *nptr, const char *end, const char **rest); } #define MRN_CLASS_NAME "mrn::QueryParser" namespace mrn { QueryParser::QueryParser(grn_ctx *ctx, THD *thd, grn_obj *expression, grn_obj *default_column, uint n_sections, grn_obj *match_columns) : ctx_(ctx), thd_(thd), expression_(expression), default_column_(default_column), n_sections_(n_sections), match_columns_(match_columns) { } QueryParser::~QueryParser() = default; grn_rc QueryParser::parse(const char *query, size_t query_length) { MRN_DBUG_ENTER_METHOD(); const char *raw_query = NULL; size_t raw_query_length = 0; grn_operator default_operator = GRN_OP_OR; grn_expr_flags expression_flags = 0; parse_pragma(query, query_length, &raw_query, &raw_query_length, &default_operator, &expression_flags); grn_obj *default_column = default_column_; if (match_columns_) { default_column = match_columns_; } grn_rc rc = grn_expr_parse(ctx_, expression_, raw_query, raw_query_length, default_column, GRN_OP_MATCH, default_operator, expression_flags); if (rc != GRN_SUCCESS) { char error_message[MRN_MESSAGE_BUFFER_SIZE]; snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE, "failed to parse fulltext search keyword: <%.*s>: <%s>", static_cast(query_length), query, ctx_->errbuf); variables::ActionOnError action = variables::get_action_on_fulltext_query_error(thd_); switch (action) { case variables::ACTION_ON_ERROR_ERROR: my_message(ER_PARSE_ERROR, error_message, MYF(0)); break; case variables::ACTION_ON_ERROR_ERROR_AND_LOG: my_message(ER_PARSE_ERROR, error_message, MYF(0)); GRN_LOG(ctx_, GRN_LOG_ERROR, "%s", error_message); break; case variables::ACTION_ON_ERROR_IGNORE: break; case variables::ACTION_ON_ERROR_IGNORE_AND_LOG: GRN_LOG(ctx_, GRN_LOG_ERROR, "%s", error_message); break; } } DBUG_RETURN(rc); } void QueryParser::parse_pragma(const char *query, size_t query_length, const char **raw_query, size_t *raw_query_length, grn_operator *default_operator, grn_expr_flags *flags) { MRN_DBUG_ENTER_METHOD(); const char *current_query = query; size_t current_query_length = query_length; *default_operator = GRN_OP_OR; if (current_query_length >= 4 && memcmp(current_query, "*SS ", 4) == 0) { *raw_query = current_query + 4; *raw_query_length = current_query_length - 4; *flags = GRN_EXPR_SYNTAX_SCRIPT; DBUG_VOID_RETURN; } bool weight_specified = false; *raw_query = query; *raw_query_length = query_length; *flags = default_expression_flags(); if (current_query_length >= 2 && current_query[0] == '*') { bool parsed = false; bool done = false; current_query++; current_query_length--; while (!done) { size_t consumed_query_length = 0; switch (current_query[0]) { case 'D': if (parse_pragma_d(current_query + 1, current_query_length - 1, default_operator, &consumed_query_length)) { parsed = true; consumed_query_length += 1; current_query += consumed_query_length; current_query_length -= consumed_query_length; } else { done = true; } break; case 'W': if (parse_pragma_w(current_query + 1, current_query_length - 1, &consumed_query_length)) { parsed = true; weight_specified = true; consumed_query_length += 1; current_query += consumed_query_length; current_query_length -= consumed_query_length; } else { done = true; } break; default: done = true; break; } } if (parsed) { *raw_query = current_query; *raw_query_length = current_query_length; } } // WORKAROUND: ignore the first '+' to support "+apple macintosh" pattern. while (*raw_query_length > 0 && (*raw_query)[0] == ' ') { (*raw_query)++; (*raw_query_length)--; } if (*raw_query_length > 0 && (*raw_query)[0] == '+') { (*raw_query)++; (*raw_query_length)--; } if (!weight_specified && match_columns_) { grn_expr_append_obj(ctx_, match_columns_, default_column_, GRN_OP_PUSH, 1); } DBUG_VOID_RETURN; } bool QueryParser::parse_pragma_w(const char *query, size_t query_length, size_t *consumed_query_length) { MRN_DBUG_ENTER_METHOD(); *consumed_query_length = 0; grn_obj section_value_buffer; GRN_UINT32_INIT(§ion_value_buffer, 0); MRN_ALLOCATE_VARIABLE_LENGTH_ARRAYS(bool, specified_sections, n_sections_); for (uint i = 0; i < n_sections_; ++i) { specified_sections[i] = false; } uint n_weights = 0; while (query_length >= 1) { if (n_weights >= 1) { if (query[0] != ',') { break; } size_t n_used_query_length = 1; *consumed_query_length += n_used_query_length; query_length -= n_used_query_length; query += n_used_query_length; if (query_length == 0) { break; } } uint section = 0; if ('1' <= query[0] && query[0] <= '9') { const char *section_start = query; const char *query_end = query + query_length; const char *query_rest; section = grn_atoui(section_start, query_end, &query_rest); if (section_start == query_rest) { break; } if (!(0 < section && section <= n_sections_)) { break; } section -= 1; specified_sections[section] = true; size_t n_used_query_length = query_rest - query; *consumed_query_length += n_used_query_length; query_length -= n_used_query_length; query += n_used_query_length; } else { break; } int weight = 1; if (query_length >= 2 && query[0] == ':') { const char *weight_start = query + 1; const char *query_end = query + query_length; const char *query_rest; weight = grn_atoi(weight_start, query_end, &query_rest); if (weight_start == query_rest) { break; } size_t n_used_query_length = query_rest - query; *consumed_query_length += n_used_query_length; query_length -= n_used_query_length; query += n_used_query_length; } n_weights++; append_section(section, §ion_value_buffer, weight, n_weights); } for (uint section = 0; section < n_sections_; ++section) { if (specified_sections[section]) { continue; } ++n_weights; int default_weight = 1; append_section(section, §ion_value_buffer, default_weight, n_weights); } MRN_FREE_VARIABLE_LENGTH_ARRAYS(specified_sections); GRN_OBJ_FIN(ctx_, §ion_value_buffer); DBUG_RETURN(n_weights > 0); } void QueryParser::append_section(uint section, grn_obj *section_value_buffer, int weight, uint n_weights) { MRN_DBUG_ENTER_METHOD(); if (!match_columns_) { DBUG_VOID_RETURN; } grn_expr_append_obj(ctx_, match_columns_, default_column_, GRN_OP_PUSH, 1); GRN_UINT32_SET(ctx_, section_value_buffer, section); grn_expr_append_const(ctx_, match_columns_, section_value_buffer, GRN_OP_PUSH, 1); grn_expr_append_op(ctx_, match_columns_, GRN_OP_GET_MEMBER, 2); if (weight != 1) { grn_expr_append_const_int(ctx_, match_columns_, weight, GRN_OP_PUSH, 1); grn_expr_append_op(ctx_, match_columns_, GRN_OP_STAR, 2); } if (n_weights >= 2) { grn_expr_append_op(ctx_, match_columns_, GRN_OP_OR, 2); } DBUG_VOID_RETURN; } bool QueryParser::parse_pragma_d(const char *query, size_t query_length, grn_operator *default_operator, size_t *consumed_query_length) { MRN_DBUG_ENTER_METHOD(); bool succeeded = true; if (query_length >= 1 && query[0] == '+') { *default_operator = GRN_OP_AND; *consumed_query_length = 1; } else if (query_length >= 1 && query[0] == '-') { *default_operator = GRN_OP_AND_NOT; *consumed_query_length = 1; } else if (query_length >= 2 && memcmp(query, "OR", 2) == 0) { *default_operator = GRN_OP_OR; *consumed_query_length = 2; } else { succeeded = false; } DBUG_RETURN(succeeded); } grn_expr_flags QueryParser::default_expression_flags() { MRN_DBUG_ENTER_METHOD(); ulonglong syntax_flags = variables::get_boolean_mode_syntax_flags(thd_); grn_expr_flags expression_flags = 0; if (syntax_flags == variables::BOOLEAN_MODE_SYNTAX_FLAG_DEFAULT) { expression_flags = GRN_EXPR_SYNTAX_QUERY | GRN_EXPR_ALLOW_LEADING_NOT; } else { if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_SYNTAX_SCRIPT) { expression_flags |= GRN_EXPR_SYNTAX_SCRIPT; } else { expression_flags |= GRN_EXPR_SYNTAX_QUERY; } if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_COLUMN) { expression_flags |= GRN_EXPR_ALLOW_COLUMN; } if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_UPDATE) { expression_flags |= GRN_EXPR_ALLOW_UPDATE; } if (syntax_flags & variables::BOOLEAN_MODE_SYNTAX_FLAG_ALLOW_LEADING_NOT) { expression_flags |= GRN_EXPR_ALLOW_LEADING_NOT; } } DBUG_RETURN(expression_flags); } }