summaryrefslogtreecommitdiffstats
path: root/storage/mroonga/vendor/groonga/lib/ts/ts_expr_parser.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
commita175314c3e5827eb193872241446f2f8f5c9d33c (patch)
treecd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/mroonga/vendor/groonga/lib/ts/ts_expr_parser.c
parentInitial commit. (diff)
downloadmariadb-10.5-upstream.tar.xz
mariadb-10.5-upstream.zip
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/mroonga/vendor/groonga/lib/ts/ts_expr_parser.c')
-rw-r--r--storage/mroonga/vendor/groonga/lib/ts/ts_expr_parser.c1329
1 files changed, 1329 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/ts/ts_expr_parser.c b/storage/mroonga/vendor/groonga/lib/ts/ts_expr_parser.c
new file mode 100644
index 00000000..10e6d2fc
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/lib/ts/ts_expr_parser.c
@@ -0,0 +1,1329 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2015-2016 Brazil
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License version 2.1 as published by the Free Software Foundation.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#include "ts_expr_parser.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "../grn_ctx.h"
+
+#include "ts_log.h"
+#include "ts_str.h"
+#include "ts_util.h"
+
+/*-------------------------------------------------------------
+ * grn_ts_expr_token.
+ */
+
+#define GRN_TS_EXPR_TOKEN_INIT(TYPE)\
+ memset(token, 0, sizeof(*token));\
+ token->type = GRN_TS_EXPR_ ## TYPE ## _TOKEN;\
+ token->src = src;
+/* grn_ts_expr_dummy_token_init() initializes a token. */
+static void
+grn_ts_expr_dummy_token_init(grn_ctx *ctx, grn_ts_expr_dummy_token *token,
+ grn_ts_str src)
+{
+ GRN_TS_EXPR_TOKEN_INIT(DUMMY)
+}
+
+/* grn_ts_expr_start_token_init() initializes a token. */
+static void
+grn_ts_expr_start_token_init(grn_ctx *ctx, grn_ts_expr_start_token *token,
+ grn_ts_str src)
+{
+ GRN_TS_EXPR_TOKEN_INIT(START)
+}
+
+/* grn_ts_expr_end_token_init() initializes a token. */
+static void
+grn_ts_expr_end_token_init(grn_ctx *ctx, grn_ts_expr_end_token *token,
+ grn_ts_str src)
+{
+ GRN_TS_EXPR_TOKEN_INIT(END)
+}
+
+/* grn_ts_expr_const_token_init() initializes a token. */
+static void
+grn_ts_expr_const_token_init(grn_ctx *ctx, grn_ts_expr_const_token *token,
+ grn_ts_str src)
+{
+ GRN_TS_EXPR_TOKEN_INIT(CONST);
+ grn_ts_buf_init(ctx, &token->buf);
+}
+
+/* grn_ts_expr_name_token_init() initializes a token. */
+static void
+grn_ts_expr_name_token_init(grn_ctx *ctx, grn_ts_expr_name_token *token,
+ grn_ts_str src)
+{
+ GRN_TS_EXPR_TOKEN_INIT(NAME);
+}
+
+/* grn_ts_expr_op_token_init() initializes a token. */
+static void
+grn_ts_expr_op_token_init(grn_ctx *ctx, grn_ts_expr_op_token *token,
+ grn_ts_str src)
+{
+ GRN_TS_EXPR_TOKEN_INIT(OP);
+}
+
+/* grn_ts_expr_bridge_token_init() initializes a token. */
+static void
+grn_ts_expr_bridge_token_init(grn_ctx *ctx, grn_ts_expr_bridge_token *token,
+ grn_ts_str src)
+{
+ GRN_TS_EXPR_TOKEN_INIT(BRIDGE)
+}
+
+/* grn_ts_expr_bracket_token_init() initializes a token. */
+static void
+grn_ts_expr_bracket_token_init(grn_ctx *ctx, grn_ts_expr_bracket_token *token,
+ grn_ts_str src)
+{
+ GRN_TS_EXPR_TOKEN_INIT(BRACKET)
+}
+#undef GRN_TS_EXPR_TOKEN_INIT
+
+/* grn_ts_expr_dummy_token_fin() finalizes a token. */
+static void
+grn_ts_expr_dummy_token_fin(grn_ctx *ctx, grn_ts_expr_dummy_token *token)
+{
+ /* Nothing to do. */
+}
+
+/* grn_ts_expr_start_token_fin() finalizes a token. */
+static void
+grn_ts_expr_start_token_fin(grn_ctx *ctx, grn_ts_expr_start_token *token)
+{
+ /* Nothing to do. */
+}
+
+/* grn_ts_expr_end_token_fin() finalizes a token. */
+static void
+grn_ts_expr_end_token_fin(grn_ctx *ctx, grn_ts_expr_end_token *token)
+{
+ /* Nothing to do. */
+}
+
+/* grn_ts_expr_const_token_fin() finalizes a token. */
+static void
+grn_ts_expr_const_token_fin(grn_ctx *ctx, grn_ts_expr_const_token *token)
+{
+ grn_ts_buf_fin(ctx, &token->buf);
+}
+
+/* grn_ts_expr_name_token_fin() finalizes a token. */
+static void
+grn_ts_expr_name_token_fin(grn_ctx *ctx, grn_ts_expr_name_token *token)
+{
+ /* Nothing to do. */
+}
+
+/* grn_ts_expr_op_token_fin() finalizes a token. */
+static void
+grn_ts_expr_op_token_fin(grn_ctx *ctx, grn_ts_expr_op_token *token)
+{
+ /* Nothing to do. */
+}
+
+/* grn_ts_expr_bridge_token_fin() finalizes a token. */
+static void
+grn_ts_expr_bridge_token_fin(grn_ctx *ctx, grn_ts_expr_bridge_token *token)
+{
+ /* Nothing to do. */
+}
+
+/* grn_ts_expr_bracket_token_fin() finalizes a token. */
+static void
+grn_ts_expr_bracket_token_fin(grn_ctx *ctx, grn_ts_expr_bracket_token *token)
+{
+ /* Nothing to do. */
+}
+
+#define GRN_TS_EXPR_TOKEN_OPEN(TYPE, type)\
+ grn_ts_expr_ ## type ## _token *new_token;\
+ new_token = GRN_MALLOCN(grn_ts_expr_ ## type ## _token, 1);\
+ if (!new_token) {\
+ GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,\
+ "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x 1",\
+ sizeof(grn_ts_expr_ ## type ## _token));\
+ }\
+ grn_ts_expr_ ## type ## _token_init(ctx, new_token, src);\
+ *token = new_token;
+/* grn_ts_expr_dummy_token_open() creates a token. */
+/*
+static grn_rc
+grn_ts_expr_dummy_token_open(grn_ctx *ctx, grn_ts_str src,
+ grn_ts_expr_dummy_token **token)
+{
+ GRN_TS_EXPR_TOKEN_OPEN(DUMMY, dummy)
+ return GRN_SUCCESS;
+}
+*/
+
+/* grn_ts_expr_start_token_open() creates a token. */
+static grn_rc
+grn_ts_expr_start_token_open(grn_ctx *ctx, grn_ts_str src,
+ grn_ts_expr_start_token **token)
+{
+ GRN_TS_EXPR_TOKEN_OPEN(START, start)
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_end_token_open() creates a token. */
+static grn_rc
+grn_ts_expr_end_token_open(grn_ctx *ctx, grn_ts_str src,
+ grn_ts_expr_end_token **token)
+{
+ GRN_TS_EXPR_TOKEN_OPEN(END, end)
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_const_token_open() creates a token. */
+static grn_rc
+grn_ts_expr_const_token_open(grn_ctx *ctx, grn_ts_str src,
+ grn_ts_expr_const_token **token)
+ {
+ GRN_TS_EXPR_TOKEN_OPEN(CONST, const)
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_name_token_open() creates a token. */
+static grn_rc
+grn_ts_expr_name_token_open(grn_ctx *ctx, grn_ts_str src,
+ grn_ts_expr_name_token **token)
+{
+ GRN_TS_EXPR_TOKEN_OPEN(NAME, name)
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_op_token_open() creates a token. */
+static grn_rc
+grn_ts_expr_op_token_open(grn_ctx *ctx, grn_ts_str src, grn_ts_op_type op_type,
+ grn_ts_expr_op_token **token)
+{
+ GRN_TS_EXPR_TOKEN_OPEN(OP, op)
+ new_token->op_type = op_type;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_bridge_token_open() creates a token. */
+static grn_rc
+grn_ts_expr_bridge_token_open(grn_ctx *ctx, grn_ts_str src,
+ grn_ts_expr_bridge_token **token)
+{
+ GRN_TS_EXPR_TOKEN_OPEN(BRIDGE, bridge)
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_bracket_token_open() creates a token. */
+static grn_rc
+grn_ts_expr_bracket_token_open(grn_ctx *ctx, grn_ts_str src,
+ grn_ts_expr_bracket_token **token)
+{
+ GRN_TS_EXPR_TOKEN_OPEN(BRACKET, bracket)
+ return GRN_SUCCESS;
+}
+#undef GRN_TS_EXPR_TOKEN_OPEN
+
+#define GRN_TS_EXPR_TOKEN_CLOSE_CASE(TYPE, type)\
+ case GRN_TS_EXPR_ ## TYPE ## _TOKEN: {\
+ grn_ts_expr_ ## type ## _token *type ## _token;\
+ type ## _token = (grn_ts_expr_ ## type ## _token *)token;\
+ grn_ts_expr_ ## type ## _token_fin(ctx, type ## _token);\
+ break;\
+ }
+/* grn_ts_expr_token_close() destroys a token. */
+static void
+grn_ts_expr_token_close(grn_ctx *ctx, grn_ts_expr_token *token)
+{
+ switch (token->type) {
+ GRN_TS_EXPR_TOKEN_CLOSE_CASE(DUMMY, dummy)
+ GRN_TS_EXPR_TOKEN_CLOSE_CASE(START, start)
+ GRN_TS_EXPR_TOKEN_CLOSE_CASE(END, end)
+ GRN_TS_EXPR_TOKEN_CLOSE_CASE(CONST, const)
+ GRN_TS_EXPR_TOKEN_CLOSE_CASE(NAME, name)
+ GRN_TS_EXPR_TOKEN_CLOSE_CASE(OP, op)
+ GRN_TS_EXPR_TOKEN_CLOSE_CASE(BRACKET, bracket)
+ GRN_TS_EXPR_TOKEN_CLOSE_CASE(BRIDGE, bridge)
+ }
+ GRN_FREE(token);
+}
+#undef GRN_TS_EXPR_TOKEN_CLOSE_CASE
+
+/*-------------------------------------------------------------
+ * grn_ts_expr_parser.
+ */
+
+/* grn_ts_expr_parser_init() initializes a parser. */
+static void
+grn_ts_expr_parser_init(grn_ctx *ctx, grn_ts_expr_parser *parser)
+{
+ memset(parser, 0, sizeof(*parser));
+ parser->builder = NULL;
+ grn_ts_buf_init(ctx, &parser->str_buf);
+ parser->tokens = NULL;
+ parser->dummy_tokens = NULL;
+ parser->stack = NULL;
+}
+
+/* grn_ts_expr_parser_fin() finalizes a parser. */
+static void
+grn_ts_expr_parser_fin(grn_ctx *ctx, grn_ts_expr_parser *parser)
+{
+ if (parser->stack) {
+ GRN_FREE(parser->stack);
+ }
+ if (parser->dummy_tokens) {
+ size_t i;
+ for (i = 0; i < parser->n_dummy_tokens; i++) {
+ grn_ts_expr_dummy_token_fin(ctx, &parser->dummy_tokens[i]);
+ }
+ GRN_FREE(parser->dummy_tokens);
+ }
+ if (parser->tokens) {
+ size_t i;
+ for (i = 0; i < parser->n_tokens; i++) {
+ grn_ts_expr_token_close(ctx, parser->tokens[i]);
+ }
+ GRN_FREE(parser->tokens);
+ }
+ grn_ts_buf_fin(ctx, &parser->str_buf);
+ if (parser->builder) {
+ grn_ts_expr_builder_close(ctx, parser->builder);
+ }
+}
+
+grn_rc
+grn_ts_expr_parser_open(grn_ctx *ctx, grn_obj *table,
+ grn_ts_expr_parser **parser)
+{
+ grn_rc rc;
+ grn_ts_expr_parser *new_parser;
+ if (!ctx) {
+ return GRN_INVALID_ARGUMENT;
+ }
+ if (!table || !grn_ts_obj_is_table(ctx, table) || !parser) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid argument");
+ }
+ new_parser = GRN_MALLOCN(grn_ts_expr_parser, 1);
+ if (!new_parser) {
+ GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,
+ "GRN_MALLOCN failed: %" GRN_FMT_SIZE " x 1",
+ sizeof(grn_ts_expr_parser));
+ }
+ grn_ts_expr_parser_init(ctx, new_parser);
+ rc = grn_ts_expr_builder_open(ctx, table, &new_parser->builder);
+ if (rc != GRN_SUCCESS) {
+ grn_ts_expr_parser_fin(ctx, new_parser);
+ GRN_FREE(new_parser);
+ return rc;
+ }
+ *parser = new_parser;
+ return GRN_SUCCESS;
+}
+
+grn_rc
+grn_ts_expr_parser_close(grn_ctx *ctx, grn_ts_expr_parser *parser)
+{
+ if (!ctx) {
+ return GRN_INVALID_ARGUMENT;
+ }
+ if (!parser) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid argument");
+ }
+ grn_ts_expr_parser_fin(ctx, parser);
+ GRN_FREE(parser);
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_tokenize_start() creates the start token. */
+static grn_rc
+grn_ts_expr_parser_tokenize_start(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr_token **token)
+{
+ grn_ts_str token_str = { str.ptr, 0 };
+ grn_ts_expr_start_token *new_token;
+ grn_rc rc = grn_ts_expr_start_token_open(ctx, token_str, &new_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ *token = (grn_ts_expr_token *)new_token;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_tokenize_end() creates the end token. */
+static grn_rc
+grn_ts_expr_parser_tokenize_end(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr_token **token)
+{
+ grn_ts_str token_str = { str.ptr, 0 };
+ grn_ts_expr_end_token *new_token;
+ grn_rc rc = grn_ts_expr_end_token_open(ctx, token_str, &new_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ *token = (grn_ts_expr_token *)new_token;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_tokenize_number() tokenizes an Int or Float literal. */
+static grn_rc
+grn_ts_expr_parser_tokenize_number(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr_token **token)
+{
+ char *end;
+ grn_rc rc;
+ grn_ts_int int_value;
+ grn_ts_str token_str;
+ grn_ts_expr_const_token *new_token;
+
+ int_value = strtol(str.ptr, &end, 0);
+ if ((end != str.ptr) && (*end != '.') && (*end != 'e')) {
+ if (grn_ts_byte_is_name_char(*end)) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT,
+ "unterminated Int literal: \"%.*s\"",
+ (int)str.size, str.ptr);
+ }
+ token_str.ptr = str.ptr;
+ token_str.size = end - str.ptr;
+ rc = grn_ts_expr_const_token_open(ctx, token_str, &new_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ new_token->data_kind = GRN_TS_INT;
+ new_token->content.as_int = int_value;
+ } else {
+ grn_ts_float float_value = strtod(str.ptr, &end);
+ if (end == str.ptr) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid number literal: \"%.*s\"",
+ (int)str.size, str.ptr);
+ }
+ if (grn_ts_byte_is_name_char(*end)) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT,
+ "unterminated Float literal: \"%.*s\"",
+ (int)str.size, str.ptr);
+ }
+ token_str.ptr = str.ptr;
+ token_str.size = end - str.ptr;
+ rc = grn_ts_expr_const_token_open(ctx, token_str, &new_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ new_token->data_kind = GRN_TS_FLOAT;
+ new_token->content.as_float = float_value;
+ }
+ *token = (grn_ts_expr_token *)new_token;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_tokenize_text() tokenizes a Text literal. */
+static grn_rc
+grn_ts_expr_parser_tokenize_text(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr_token **token)
+{
+ size_t i, n_escapes = 0;
+ grn_rc rc;
+ grn_ts_str token_str;
+ grn_ts_expr_const_token *new_token;
+ for (i = 1; i < str.size; i++) {
+ if (str.ptr[i] == '\\') {
+ i++;
+ n_escapes++;
+ } else if (str.ptr[i] == '"') {
+ break;
+ }
+ }
+ if (i >= str.size) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "no closing double quote: \"%.*s\"",
+ (int)str.size, str.ptr);
+ }
+ token_str.ptr = str.ptr;
+ token_str.size = i + 1;
+ rc = grn_ts_expr_const_token_open(ctx, token_str, &new_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ new_token->data_kind = GRN_TS_TEXT;
+ if (n_escapes) {
+ char *buf_ptr;
+ const char *str_ptr = str.ptr + 1;
+ size_t size = token_str.size - 2 - n_escapes;
+ rc = grn_ts_buf_resize(ctx, &new_token->buf, size);
+ if (rc != GRN_SUCCESS) {
+ grn_ts_expr_token_close(ctx, (grn_ts_expr_token *)new_token);
+ return rc;
+ }
+ buf_ptr = (char *)new_token->buf.ptr;
+ for (i = 0; i < size; i++) {
+ if (str_ptr[i] == '\\') {
+ str_ptr++;
+ }
+ buf_ptr[i] = str_ptr[i];
+ }
+ new_token->content.as_text.ptr = buf_ptr;
+ new_token->content.as_text.size = size;
+ } else {
+ new_token->content.as_text.ptr = token_str.ptr + 1;
+ new_token->content.as_text.size = token_str.size - 2;
+ }
+ *token = (grn_ts_expr_token *)new_token;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_tokenize_name() tokenizes a Bool literal or a name. */
+static grn_rc
+grn_ts_expr_parser_tokenize_name(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr_token **token)
+{
+ size_t i;
+ grn_ts_str token_str;
+ for (i = 1; i < str.size; i++) {
+ if (!grn_ts_byte_is_name_char(str.ptr[i])) {
+ break;
+ }
+ }
+ token_str.ptr = str.ptr;
+ token_str.size = i;
+
+ if (grn_ts_str_is_bool(token_str)) {
+ grn_ts_expr_const_token *new_token;
+ grn_rc rc = grn_ts_expr_const_token_open(ctx, token_str, &new_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ new_token->data_kind = GRN_TS_BOOL;
+ if (token_str.ptr[0] == 't') {
+ new_token->content.as_bool = GRN_TRUE;
+ } else {
+ new_token->content.as_bool = GRN_FALSE;
+ }
+ *token = (grn_ts_expr_token *)new_token;
+ return GRN_SUCCESS;
+ }
+ return grn_ts_expr_name_token_open(ctx, token_str, token);
+}
+
+/* grn_ts_expr_parser_tokenize_bridge() tokenizes a bridge. */
+static grn_rc
+grn_ts_expr_parser_tokenize_bridge(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr_token **token)
+{
+ grn_ts_str token_str = { str.ptr, 1 };
+ grn_ts_expr_bridge_token *new_token;
+ grn_rc rc = grn_ts_expr_bridge_token_open(ctx, token_str, &new_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ *token = (grn_ts_expr_token *)new_token;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_tokenize_bracket() tokenizes a bracket. */
+static grn_rc
+grn_ts_expr_parser_tokenize_bracket(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str,
+ grn_ts_expr_token **token)
+{
+ grn_ts_str token_str = { str.ptr, 1 };
+ grn_ts_expr_bracket_token *new_token;
+ grn_rc rc = grn_ts_expr_bracket_token_open(ctx, token_str, &new_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ *token = (grn_ts_expr_token *)new_token;
+ return GRN_SUCCESS;
+}
+
+/*
+ * grn_ts_expr_parsre_tokenize_sign() tokenizes an operator '+' or '-'.
+ * Note that '+' and '-' have two roles each.
+ * '+' is GRN_TS_OP_POSITIVE or GRN_TS_OP_PLUS.
+ * '-' is GRN_TS_OP_NEGATIVE or GRN_TS_OP_MINUS.
+ */
+static grn_rc
+grn_ts_expr_parser_tokenize_sign(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr_token **token)
+{
+ size_t n_args;
+ grn_rc rc;
+ grn_ts_op_type op_type;
+ grn_ts_str token_str = { str.ptr, 1 };
+ grn_ts_expr_token *prev_token = parser->tokens[parser->n_tokens - 1];
+ grn_ts_expr_op_token *new_token;
+ switch (prev_token->type) {
+ case GRN_TS_EXPR_START_TOKEN:
+ case GRN_TS_EXPR_OP_TOKEN: {
+ n_args = 1;
+ break;
+ }
+ case GRN_TS_EXPR_CONST_TOKEN:
+ case GRN_TS_EXPR_NAME_TOKEN: {
+ n_args = 2;
+ break;
+ }
+ case GRN_TS_EXPR_BRACKET_TOKEN: {
+ grn_ts_str bracket;
+ const grn_ts_expr_bracket_token *bracket_token;
+ bracket_token = (const grn_ts_expr_bracket_token *)prev_token;
+ bracket = bracket_token->src;
+ switch (bracket.ptr[0]) {
+ case '(': case '[': {
+ n_args = 1;
+ break;
+ }
+ case ')': case ']': {
+ n_args = 2;
+ break;
+ }
+ default: {
+ GRN_TS_ERR_RETURN(GRN_OBJECT_CORRUPT, "undefined bracket: \"%.*s\"",
+ (int)bracket.size, bracket.ptr);
+ }
+ }
+ break;
+ }
+ default: {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence: %d",
+ prev_token->type);
+ }
+ }
+ if (token_str.ptr[0] == '+') {
+ op_type = (n_args == 1) ? GRN_TS_OP_POSITIVE : GRN_TS_OP_PLUS;
+ } else {
+ op_type = (n_args == 1) ? GRN_TS_OP_NEGATIVE : GRN_TS_OP_MINUS;
+ }
+ rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ *token = (grn_ts_expr_token *)new_token;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_tokenize_op() tokenizes an operator. */
+static grn_rc
+grn_ts_expr_parser_tokenize_op(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr_token **token)
+{
+ grn_rc rc = GRN_SUCCESS;
+ grn_ts_str token_str = str;
+ grn_ts_op_type op_type;
+ grn_ts_expr_op_token *new_token;
+ switch (str.ptr[0]) {
+ case '+': case '-': {
+ return grn_ts_expr_parser_tokenize_sign(ctx, parser, str, token);
+ }
+ case '!': {
+ if ((str.size >= 2) && (str.ptr[1] == '=')) {
+ token_str.size = 2;
+ op_type = GRN_TS_OP_NOT_EQUAL;
+ } else {
+ token_str.size = 1;
+ op_type = GRN_TS_OP_LOGICAL_NOT;
+ }
+ rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token);
+ break;
+ }
+#define GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE(label, TYPE_1, TYPE_2, TYPE_3,\
+ TYPE_EQUAL)\
+ case label: {\
+ if ((str.size >= 2) && (str.ptr[1] == '=')) {\
+ token_str.size = 2;\
+ op_type = GRN_TS_OP_ ## TYPE_EQUAL;\
+ } else if ((str.size >= 2) && (str.ptr[1] == label)) {\
+ if ((str.size >= 3) && (str.ptr[2] == label)) {\
+ token_str.size = 3;\
+ op_type = GRN_TS_OP_ ## TYPE_3;\
+ } else {\
+ token_str.size = 2;\
+ op_type = GRN_TS_OP_ ## TYPE_2;\
+ }\
+ } else {\
+ token_str.size = 1;\
+ op_type = GRN_TS_OP_ ## TYPE_1;\
+ }\
+ rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token);\
+ break;\
+ }
+ GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('<', LESS, SHIFT_ARITHMETIC_LEFT,
+ SHIFT_LOGICAL_LEFT, LESS_EQUAL)
+ GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('>', GREATER, SHIFT_ARITHMETIC_RIGHT,
+ SHIFT_LOGICAL_RIGHT, GREATER_EQUAL)
+#undef GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE
+ case '&': {
+ if ((str.size >= 2) && (str.ptr[1] == '&')) {
+ token_str.size = 2;
+ op_type = GRN_TS_OP_LOGICAL_AND;
+ } else if ((str.size >= 2) && (str.ptr[1] == '&')) {
+ token_str.size = 2;
+ op_type = GRN_TS_OP_LOGICAL_SUB;
+ } else {
+ token_str.size = 1;
+ op_type = GRN_TS_OP_BITWISE_AND;
+ }
+ rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token);
+ break;
+ }
+ case '|': {
+ if ((str.size >= 2) && (str.ptr[1] == '|')) {
+ token_str.size = 2;
+ op_type = GRN_TS_OP_LOGICAL_OR;
+ } else {
+ token_str.size = 1;
+ op_type = GRN_TS_OP_BITWISE_OR;
+ }
+ rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token);
+ break;
+ }
+ case '=': {
+ if ((str.size < 2) || (str.ptr[1] != '=')) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT,
+ "single equal not available: =\"%.*s\"",
+ (int)str.size, str.ptr);
+ }
+ token_str.size = 2;
+ rc = grn_ts_expr_op_token_open(ctx, token_str, GRN_TS_OP_EQUAL,
+ &new_token);
+ break;
+ }
+#define GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE(label, TYPE)\
+ case label: {\
+ token_str.size = 1;\
+ rc = grn_ts_expr_op_token_open(ctx, token_str, GRN_TS_OP_ ## TYPE,\
+ &new_token);\
+ break;\
+ }
+ GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('~', BITWISE_NOT)
+ GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('^', BITWISE_XOR)
+ GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('*', MULTIPLICATION)
+ GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('/', DIVISION)
+ GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE('%', MODULUS)
+#undef GRN_TS_EXPR_PARSER_TOKENIZE_OP_CASE
+ case '@': {
+ if ((str.size >= 2) && (str.ptr[1] == '^')) {
+ token_str.size = 2;
+ op_type = GRN_TS_OP_PREFIX_MATCH;
+ } else if ((str.size >= 2) && (str.ptr[1] == '$')) {
+ token_str.size = 2;
+ op_type = GRN_TS_OP_SUFFIX_MATCH;
+ } else {
+ token_str.size = 1;
+ op_type = GRN_TS_OP_MATCH;
+ }
+ rc = grn_ts_expr_op_token_open(ctx, token_str, op_type, &new_token);
+ break;
+ }
+ default: {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid character: \"%.*s\"",
+ (int)str.size, str.ptr);
+ }
+ }
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ *token = (grn_ts_expr_token *)new_token;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_tokenize_next() extracts the next token. */
+static grn_rc
+grn_ts_expr_parser_tokenize_next(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr_token **token)
+{
+ grn_ts_str rest;
+ if (!parser->n_tokens) {
+ return grn_ts_expr_parser_tokenize_start(ctx, parser, str, token);
+ }
+ rest = grn_ts_str_trim_left(str);
+ if (!rest.size) {
+ return grn_ts_expr_parser_tokenize_end(ctx, parser, rest, token);
+ }
+ if (grn_ts_str_has_number_prefix(rest)) {
+ grn_ts_expr_token *prev_token;
+ if ((rest.ptr[0] != '+') && (rest.ptr[0] != '-')) {
+ return grn_ts_expr_parser_tokenize_number(ctx, parser, rest, token);
+ }
+ prev_token = parser->tokens[parser->n_tokens - 1];
+ switch (prev_token->type) {
+ case GRN_TS_EXPR_START_TOKEN:
+ case GRN_TS_EXPR_OP_TOKEN: {
+ return grn_ts_expr_parser_tokenize_number(ctx, parser, rest, token);
+ }
+ case GRN_TS_EXPR_BRACKET_TOKEN: {
+ if ((prev_token->src.ptr[0] == '(') ||
+ (prev_token->src.ptr[0] == '[')) {
+ return grn_ts_expr_parser_tokenize_number(ctx, parser, rest, token);
+ }
+ break;
+ }
+ default: {
+ break;
+ }
+ }
+ }
+ if (rest.ptr[0] == '"') {
+ return grn_ts_expr_parser_tokenize_text(ctx, parser, rest, token);
+ }
+ if (grn_ts_byte_is_name_char(rest.ptr[0])) {
+ return grn_ts_expr_parser_tokenize_name(ctx, parser, rest, token);
+ }
+ switch (rest.ptr[0]) {
+ case '(': case ')': case '[': case ']': {
+ return grn_ts_expr_parser_tokenize_bracket(ctx, parser, rest, token);
+ }
+ case '.': {
+ return grn_ts_expr_parser_tokenize_bridge(ctx, parser, rest, token);
+ }
+ default: {
+ return grn_ts_expr_parser_tokenize_op(ctx, parser, rest, token);
+ }
+ }
+}
+
+/*
+ * grn_ts_expr_parser_reserve_tokens() extends a token buffer for a new token.
+ */
+static grn_rc
+grn_ts_expr_parser_reserve_tokens(grn_ctx *ctx, grn_ts_expr_parser *parser)
+{
+ size_t i, n_bytes, new_max_n_tokens;
+ grn_ts_expr_token **new_tokens;
+ if (parser->n_tokens < parser->max_n_tokens) {
+ return GRN_SUCCESS;
+ }
+ new_max_n_tokens = parser->n_tokens * 2;
+ if (!new_max_n_tokens) {
+ new_max_n_tokens = 1;
+ }
+ n_bytes = sizeof(grn_ts_expr_token *) * new_max_n_tokens;
+ new_tokens = (grn_ts_expr_token **)GRN_REALLOC(parser->tokens, n_bytes);
+ if (!new_tokens) {
+ GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,
+ "GRN_REALLOC failed: %" GRN_FMT_SIZE,
+ n_bytes);
+ }
+ for (i = parser->n_tokens; i < new_max_n_tokens; i++) {
+ new_tokens[i] = NULL;
+ }
+ parser->tokens = new_tokens;
+ parser->max_n_tokens = new_max_n_tokens;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_tokenize() tokenizes a string. */
+static grn_rc
+grn_ts_expr_parser_tokenize(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str)
+{
+ grn_ts_str rest = str;
+ const char *end = str.ptr + str.size;
+ grn_ts_expr_token *token = NULL;
+ GRN_TS_DEBUG("str = \"%.*s\"", (int)str.size, str.ptr);
+ do {
+ grn_rc rc = grn_ts_expr_parser_reserve_tokens(ctx, parser);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ rc = grn_ts_expr_parser_tokenize_next(ctx, parser, rest, &token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ if ((token->type != GRN_TS_EXPR_START_TOKEN) &&
+ (token->type != GRN_TS_EXPR_END_TOKEN)) {
+ GRN_TS_DEBUG("token = \"%.*s\"", (int)token->src.size, token->src.ptr);
+ }
+ parser->tokens[parser->n_tokens++] = token;
+ rest.ptr = token->src.ptr + token->src.size;
+ rest.size = end - rest.ptr;
+ } while (token->type != GRN_TS_EXPR_END_TOKEN);
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_push_const() pushes a token to an expression. */
+static grn_rc
+grn_ts_expr_parser_push_const(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_expr_const_token *token)
+{
+ return grn_ts_expr_builder_push_const(ctx, parser->builder, token->data_kind,
+ GRN_DB_VOID, token->content);
+}
+
+/* grn_ts_expr_parser_push_name() pushes a token to an expression. */
+static grn_rc
+grn_ts_expr_parser_push_name(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_expr_name_token *token)
+{
+ return grn_ts_expr_builder_push_name(ctx, parser->builder, token->src);
+}
+
+/* grn_ts_expr_parser_push_op() pushes a token to an expression. */
+static grn_rc
+grn_ts_expr_parser_push_op(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_expr_op_token *token)
+{
+ return grn_ts_expr_builder_push_op(ctx, parser->builder, token->op_type);
+}
+
+/*
+ * grn_ts_expr_parser_apply_one() applies a bridge or prior operator.
+ * If there is no target, this function returns GRN_END_OF_DATA.
+ */
+// FIXME: Support a ternary operator.
+static grn_rc
+grn_ts_expr_parser_apply_one(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_op_precedence precedence_threshold)
+{
+ grn_rc rc;
+ grn_ts_str src;
+ grn_ts_expr_token **stack = parser->stack;
+ grn_ts_expr_dummy_token *dummy_token;
+ size_t n_args, depth = parser->stack_depth;
+ if (depth < 2) {
+ return GRN_END_OF_DATA;
+ }
+ if (stack[depth - 1]->type != GRN_TS_EXPR_DUMMY_TOKEN) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "argument must be dummy token");
+ }
+
+ /* Check the number of arguments. */
+ switch (stack[depth - 2]->type) {
+ case GRN_TS_EXPR_BRIDGE_TOKEN: {
+ rc = grn_ts_expr_builder_end_subexpr(ctx, parser->builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ n_args = 2;
+ break;
+ }
+ case GRN_TS_EXPR_OP_TOKEN: {
+ grn_ts_expr_op_token *op_token;
+ grn_ts_op_precedence precedence;
+ op_token = (grn_ts_expr_op_token *)stack[depth - 2];
+ precedence = grn_ts_op_get_precedence(op_token->op_type);
+ if (precedence < precedence_threshold) {
+ return GRN_END_OF_DATA;
+ }
+ rc = grn_ts_expr_parser_push_op(ctx, parser, op_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ n_args = grn_ts_op_get_n_args(op_token->op_type);
+ break;
+ }
+ default: {
+ return GRN_END_OF_DATA;
+ }
+ }
+
+ /* Concatenate the source strings. */
+ switch (n_args) {
+ case 1: {
+ grn_ts_expr_token *arg = stack[depth - 1];
+ src.ptr = stack[depth - 2]->src.ptr;
+ src.size = (arg->src.ptr + arg->src.size) - src.ptr;
+ break;
+ }
+ case 2: {
+ grn_ts_expr_token *args[2] = { stack[depth - 3], stack[depth - 1] };
+ src.ptr = args[0]->src.ptr;
+ src.size = (args[1]->src.ptr + args[1]->src.size) - src.ptr;
+ break;
+ }
+ default: {
+ GRN_TS_ERR_RETURN(GRN_OPERATION_NOT_SUPPORTED,
+ "invalid #arguments: %" GRN_FMT_SIZE,
+ n_args);
+ }
+ }
+
+ /* Replace the operator and argument tokens with a dummy token. */
+ dummy_token = &parser->dummy_tokens[parser->n_dummy_tokens++];
+ GRN_TS_DEBUG("dummy token: \"%.*s\"", (int)src.size, src.ptr);
+ grn_ts_expr_dummy_token_init(ctx, dummy_token, src);
+ depth -= n_args + 1;
+ stack[depth++] = dummy_token;
+ parser->stack_depth = depth;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_apply() applies bridges and prior operators. */
+static grn_rc
+grn_ts_expr_parser_apply(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_op_precedence precedence_threshold)
+{
+ for ( ; ; ) {
+ grn_rc rc = grn_ts_expr_parser_apply_one(ctx, parser,
+ precedence_threshold);
+ if (rc == GRN_END_OF_DATA) {
+ return GRN_SUCCESS;
+ } else if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+}
+
+/* grn_ts_expr_parser_analyze_op() analyzes a token. */
+static grn_rc
+grn_ts_expr_parser_analyze_op(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_expr_op_token *token)
+{
+ size_t n_args = grn_ts_op_get_n_args(token->op_type);
+ grn_ts_expr_token *ex_token = parser->stack[parser->stack_depth - 1];
+ if (n_args == 1) {
+ if (ex_token->type == GRN_TS_EXPR_DUMMY_TOKEN) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence");
+ }
+ } else if (n_args == 2) {
+ grn_ts_op_precedence precedence = grn_ts_op_get_precedence(token->op_type);
+ grn_rc rc = grn_ts_expr_parser_apply(ctx, parser, precedence);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ parser->stack[parser->stack_depth++] = (grn_ts_expr_token *)token;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_analyze_bridge() analyzes a token. */
+static grn_rc
+grn_ts_expr_parser_analyze_bridge(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_expr_bridge_token *token)
+{
+ grn_rc rc = grn_ts_expr_builder_begin_subexpr(ctx, parser->builder);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ parser->stack[parser->stack_depth++] = (grn_ts_expr_token *)token;
+ return GRN_SUCCESS;
+}
+
+/* grn_ts_expr_parser_analyze_bracket() analyzes a token. */
+static grn_rc
+grn_ts_expr_parser_analyze_bracket(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_expr_bracket_token *token)
+{
+ grn_ts_expr_token *ex_token = parser->stack[parser->stack_depth - 1];
+ switch (token->src.ptr[0]) {
+ case '(': {
+ if (ex_token->type == GRN_TS_EXPR_DUMMY_TOKEN) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence");
+ }
+ parser->stack[parser->stack_depth++] = (grn_ts_expr_token *)token;
+ return GRN_SUCCESS;
+ }
+ case '[': {
+ if (ex_token->type != GRN_TS_EXPR_DUMMY_TOKEN) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence");
+ }
+ parser->stack[parser->stack_depth++] = (grn_ts_expr_token *)token;
+ return GRN_SUCCESS;
+ }
+ case ')': case ']': {
+ grn_ts_expr_token *ex_ex_token;
+ grn_rc rc = grn_ts_expr_parser_apply(ctx, parser, 0);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ if (parser->stack_depth < 2) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence");
+ }
+ ex_ex_token = parser->stack[parser->stack_depth - 2];
+ if (ex_ex_token->type != GRN_TS_EXPR_BRACKET_TOKEN) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence");
+ }
+ if (token->src.ptr[0] == ')') {
+ size_t depth = parser->stack_depth;
+ grn_ts_str src;
+ grn_ts_expr_dummy_token *dummy_token;
+ if (ex_ex_token->src.ptr[0] != '(') {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence");
+ }
+ src.ptr = ex_ex_token->src.ptr;
+ src.size = (token->src.ptr + token->src.size) - src.ptr;
+ dummy_token = &parser->dummy_tokens[parser->n_dummy_tokens++];
+ GRN_TS_DEBUG("dummy token: \"%.*s\"", (int)src.size, src.ptr);
+ grn_ts_expr_dummy_token_init(ctx, dummy_token, src);
+ parser->stack[depth - 2] = dummy_token;
+ parser->stack_depth--;
+ // TODO: Apply a function.
+ } else if (token->src.ptr[0] == ']') {
+ size_t depth = parser->stack_depth;
+ if (ex_ex_token->src.ptr[0] != '[') {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "invalid token sequence");
+ }
+ parser->stack[depth - 2] = parser->stack[depth - 1];
+ parser->stack_depth--;
+ // TODO: Push a subscript operator.
+ }
+ return GRN_SUCCESS;
+ }
+ default: {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT, "undefined bracket: \"%.*s\"",
+ (int)token->src.size, token->src.ptr);
+ }
+ }
+}
+
+/* grn_ts_expr_parser_analyze_token() analyzes a token. */
+static grn_rc
+grn_ts_expr_parser_analyze_token(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_expr_token *token)
+{
+ switch (token->type) {
+ case GRN_TS_EXPR_START_TOKEN: {
+ parser->stack[parser->stack_depth++] = token;
+ return GRN_SUCCESS;
+ }
+ case GRN_TS_EXPR_END_TOKEN: {
+ return grn_ts_expr_parser_apply(ctx, parser, 0);
+ }
+ case GRN_TS_EXPR_CONST_TOKEN: {
+ grn_ts_expr_const_token *const_token = (grn_ts_expr_const_token *)token;
+ grn_ts_expr_dummy_token *dummy_token;
+ grn_rc rc = grn_ts_expr_parser_push_const(ctx, parser, const_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ dummy_token = &parser->dummy_tokens[parser->n_dummy_tokens++];
+ grn_ts_expr_dummy_token_init(ctx, dummy_token, token->src);
+ parser->stack[parser->stack_depth++] = dummy_token;
+ return GRN_SUCCESS;
+ }
+ case GRN_TS_EXPR_NAME_TOKEN: {
+ grn_ts_expr_name_token *name_token = (grn_ts_expr_name_token *)token;
+ grn_ts_expr_dummy_token *dummy_token;
+ grn_rc rc = grn_ts_expr_parser_push_name(ctx, parser, name_token);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ dummy_token = &parser->dummy_tokens[parser->n_dummy_tokens++];
+ grn_ts_expr_dummy_token_init(ctx, dummy_token, token->src);
+ parser->stack[parser->stack_depth++] = dummy_token;
+ return GRN_SUCCESS;
+ }
+ case GRN_TS_EXPR_OP_TOKEN: {
+ grn_ts_expr_op_token *op_token = (grn_ts_expr_op_token *)token;
+ return grn_ts_expr_parser_analyze_op(ctx, parser, op_token);
+ }
+ case GRN_TS_EXPR_BRIDGE_TOKEN: {
+ grn_ts_expr_bridge_token *bridge_token;
+ bridge_token = (grn_ts_expr_bridge_token *)token;
+ return grn_ts_expr_parser_analyze_bridge(ctx, parser, bridge_token);
+ }
+ case GRN_TS_EXPR_BRACKET_TOKEN: {
+ grn_ts_expr_bracket_token *bracket_token;
+ bracket_token = (grn_ts_expr_bracket_token *)token;
+ return grn_ts_expr_parser_analyze_bracket(ctx, parser, bracket_token);
+ }
+ default: {
+ GRN_TS_ERR_RETURN(GRN_OBJECT_CORRUPT, "invalid token type: %d",
+ token->type);
+ }
+ }
+}
+
+/* grn_ts_expr_parser_analyze() analyzes tokens. */
+static grn_rc
+grn_ts_expr_parser_analyze(grn_ctx *ctx, grn_ts_expr_parser *parser)
+{
+ size_t i;
+
+ /* Reserve temporary work spaces. */
+ if (parser->n_tokens > parser->max_n_dummy_tokens) {
+ size_t n_bytes = sizeof(grn_ts_expr_dummy_token) * parser->n_tokens;
+ grn_ts_expr_dummy_token *dummy_tokens = parser->dummy_tokens;
+ grn_ts_expr_dummy_token *new_dummy_tokens;
+ new_dummy_tokens = (grn_ts_expr_dummy_token *)GRN_REALLOC(dummy_tokens,
+ n_bytes);
+ if (!new_dummy_tokens) {
+ GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,
+ "GRN_REALLOC failed: %" GRN_FMT_SIZE, n_bytes);
+ }
+ parser->dummy_tokens = new_dummy_tokens;
+ parser->max_n_dummy_tokens = parser->n_tokens;
+ }
+ if (parser->n_tokens > parser->stack_size) {
+ size_t n_bytes = sizeof(grn_ts_expr_token *) * parser->n_tokens;
+ grn_ts_expr_token **new_stack;
+ new_stack = (grn_ts_expr_token **)GRN_REALLOC(parser->stack, n_bytes);
+ if (!new_stack) {
+ GRN_TS_ERR_RETURN(GRN_NO_MEMORY_AVAILABLE,
+ "GRN_REALLOC failed: %" GRN_FMT_SIZE, n_bytes);
+ }
+ parser->stack = new_stack;
+ parser->stack_size = parser->n_tokens;
+ }
+
+ /* Analyze tokens. */
+ for (i = 0; i < parser->n_tokens; i++) {
+ grn_rc rc;
+ rc = grn_ts_expr_parser_analyze_token(ctx, parser, parser->tokens[i]);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ }
+ if (parser->stack_depth != 2) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_FORMAT,
+ "tokens left in stack: %" GRN_FMT_SIZE,
+ parser->stack_depth);
+ }
+ return GRN_SUCCESS;
+}
+
+/*
+ * grn_ts_expr_parser_clear() clears the internal states for parsing the next
+ * string.
+ */
+static void
+grn_ts_expr_parser_clear(grn_ctx *ctx, grn_ts_expr_parser *parser)
+{
+ parser->stack_depth = 0;
+ if (parser->dummy_tokens) {
+ size_t i;
+ for (i = 0; i < parser->n_dummy_tokens; i++) {
+ grn_ts_expr_dummy_token_fin(ctx, &parser->dummy_tokens[i]);
+ }
+ parser->n_dummy_tokens = 0;
+ }
+ if (parser->tokens) {
+ size_t i;
+ for (i = 0; i < parser->n_tokens; i++) {
+ grn_ts_expr_token_close(ctx, parser->tokens[i]);
+ }
+ parser->n_tokens = 0;
+ }
+ grn_ts_expr_builder_clear(ctx, parser->builder);
+}
+
+grn_rc
+grn_ts_expr_parser_parse(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_expr **expr)
+{
+ grn_rc rc;
+ if (!ctx) {
+ return GRN_INVALID_ARGUMENT;
+ }
+ if (!parser || (!str.ptr && str.size)) {
+ GRN_TS_ERR_RETURN(GRN_INVALID_ARGUMENT, "invalid argument");
+ }
+ grn_ts_expr_parser_clear(ctx, parser);
+ rc = grn_ts_buf_reserve(ctx, &parser->str_buf, str.size + 1);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ grn_memcpy(parser->str_buf.ptr, str.ptr, str.size);
+ ((char *)parser->str_buf.ptr)[str.size] = '\0';
+ str.ptr = (const char *)parser->str_buf.ptr;
+ rc = grn_ts_expr_parser_tokenize(ctx, parser, str);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ rc = grn_ts_expr_parser_analyze(ctx, parser);
+ if (rc != GRN_SUCCESS) {
+ return rc;
+ }
+ return grn_ts_expr_builder_complete(ctx, parser->builder, expr);
+}
+
+grn_rc
+grn_ts_expr_parser_split(grn_ctx *ctx, grn_ts_expr_parser *parser,
+ grn_ts_str str, grn_ts_str *first, grn_ts_str *rest)
+{
+ size_t i;
+ char stack_top;
+ grn_rc rc = GRN_SUCCESS;
+ grn_ts_buf stack;
+
+ // FIXME: `stack` should be a member of `parser`.
+ grn_ts_buf_init(ctx, &stack);
+ for ( ; ; ) {
+ str = grn_ts_str_trim_left(str);
+ if (!str.size) {
+ rc = GRN_END_OF_DATA;
+ break;
+ }
+ for (i = 0; i < str.size; i++) {
+ if (stack.pos) {
+ if (str.ptr[i] == stack_top) {
+ if (--stack.pos) {
+ stack_top = ((char *)stack.ptr)[stack.pos - 1];
+ }
+ continue;
+ }
+ if (stack_top == '"') {
+ /* Skip the next byte of an escape character. */
+ if ((str.ptr[i] == '\\') && (i < (str.size - 1))) {
+ i++;
+ }
+ continue;
+ }
+ } else if (str.ptr[i] == ',') {
+ /* An expression delimiter. */
+ break;
+ }
+ switch (str.ptr[i]) {
+ case '(': {
+ stack_top = ')';
+ rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+ break;
+ }
+ case '[': {
+ stack_top = ']';
+ rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+ break;
+ }
+ case '{': {
+ stack_top = '}';
+ rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+ break;
+ }
+ case '"': {
+ stack_top = '"';
+ rc = grn_ts_buf_write(ctx, &stack, &stack_top, 1);
+ break;
+ }
+ }
+ if (rc != GRN_SUCCESS) {
+ break;
+ }
+ }
+ if (rc != GRN_SUCCESS) {
+ break;
+ }
+ if (i) {
+ /* Set the result. */
+ first->ptr = str.ptr;
+ first->size = i;
+ if (first->size == str.size) {
+ rest->ptr = str.ptr + str.size;
+ rest->size = 0;
+ } else {
+ rest->ptr = str.ptr + first->size + 1;
+ rest->size = str.size - first->size - 1;
+ }
+ break;
+ }
+ str.ptr++;
+ str.size--;
+ }
+ grn_ts_buf_fin(ctx, &stack);
+ return rc;
+}