summaryrefslogtreecommitdiffstats
path: root/source3/rpc_server/mdssvc/es_parser.y
diff options
context:
space:
mode:
Diffstat (limited to 'source3/rpc_server/mdssvc/es_parser.y')
-rw-r--r--source3/rpc_server/mdssvc/es_parser.y686
1 files changed, 686 insertions, 0 deletions
diff --git a/source3/rpc_server/mdssvc/es_parser.y b/source3/rpc_server/mdssvc/es_parser.y
new file mode 100644
index 0000000..3fbdf93
--- /dev/null
+++ b/source3/rpc_server/mdssvc/es_parser.y
@@ -0,0 +1,686 @@
+/*
+ Unix SMB/CIFS implementation.
+ Main metadata server / Spotlight routines / Elasticsearch backend
+
+ Copyright (C) Ralph Boehme 2019
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+%{
+ #include "includes.h"
+ #include "rpc_server/mdssvc/mdssvc.h"
+ #include "rpc_server/mdssvc/mdssvc_es.h"
+ #include "rpc_server/mdssvc/es_parser.tab.h"
+ #include "rpc_server/mdssvc/es_mapping.h"
+ #include "lib/util/smb_strtox.h"
+ #include <jansson.h>
+
+ /*
+ * allow building with -O3 -Wp,-D_FORTIFY_SOURCE=2
+ *
+ * /tmp/samba-testbase/.../mdssvc/es_parser.y: In function
+ * ‘mdsyylparse’:
+ * es_parser.tab.c:1124:6: error: assuming pointer wraparound
+ * does not occur when comparing P +- C1 with P +- C2
+ * [-Werror=strict-overflow]
+ *
+ * The generated code in es_parser.tab.c looks like this:
+ *
+ * if (yyss + yystacksize - 1 <= yyssp)
+ */
+ #pragma GCC diagnostic ignored "-Wstrict-overflow"
+
+ #define YYMALLOC SMB_MALLOC
+ #define YYREALLOC SMB_REALLOC
+
+ struct yy_buffer_state;
+ typedef struct yy_buffer_state *YY_BUFFER_STATE;
+ int mdsyyllex(void);
+ void mdsyylerror(char const *);
+ void *mdsyylterminate(void);
+ YY_BUFFER_STATE mdsyyl_scan_string(const char *str);
+ void mdsyyl_delete_buffer(YY_BUFFER_STATE buffer);
+
+ /* forward declarations */
+ static char *isodate_to_sldate(const char *s);
+ static char *map_expr(const struct es_attr_map *attr,
+ char op,
+ const char *val1,
+ const char *val2);
+
+ /* global vars, eg needed by the lexer */
+ struct es_parser_state {
+ TALLOC_CTX *frame;
+ json_t *kmd_map;
+ json_t *mime_map;
+ bool ignore_unknown_attribute;
+ bool ignore_unknown_type;
+ bool type_error;
+ YY_BUFFER_STATE s;
+ const char *result;
+ } *global_es_parser_state;
+%}
+
+%code provides {
+ #include <stdbool.h>
+ #include <jansson.h>
+ #include "rpc_server/mdssvc/mdssvc.h"
+
+ /* 2001-01-01T00:00:00Z - Unix Epoch = SP_RAW_TIME_OFFSET */
+ #define SP_RAW_TIME_OFFSET 978307200
+
+ int mdsyylwrap(void);
+ bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx,
+ json_t *mappings,
+ const char *path_scope,
+ const char *query_string,
+ char **_es_query);
+}
+
+%union {
+ bool bval;
+ const char *sval;
+ struct es_attr_map *attr_map;
+}
+
+%name-prefix "mdsyyl"
+%expect 1
+%error-verbose
+
+%type <sval> match expr line function value isodate
+%type <attr_map> attribute
+
+%token <sval> WORD PHRASE
+%token <bval> BOOLEAN
+%token FUNC_INRANGE
+%token DATE_ISO
+%token OBRACE CBRACE EQUAL UNEQUAL GT LT COMMA QUOTE
+%left OR
+%left AND
+%%
+
+input:
+/* empty */
+| input line
+;
+
+line:
+expr {
+ if ($1 == NULL) {
+ YYABORT;
+ }
+ if (global_es_parser_state->type_error) {
+ YYABORT;
+ }
+ global_es_parser_state->result = $1;
+}
+;
+
+expr:
+OBRACE expr CBRACE {
+ if ($2 == NULL) {
+ $$ = NULL;
+ } else {
+ $$ = talloc_asprintf(talloc_tos(), "(%s)", $2);
+ if ($$ == NULL) YYABORT;
+ }
+}
+| expr AND expr {
+ if ($1 == NULL && $3 == NULL) {
+ $$ = NULL;
+ } else if ($1 == NULL) {
+ $$ = $3;
+ } else if ($3 == NULL) {
+ $$ = $1;
+ } else {
+ $$ = talloc_asprintf(talloc_tos(), "(%s) AND (%s)", $1, $3);
+ if ($$ == NULL) YYABORT;
+ }
+}
+| expr OR expr {
+ if ($1 == NULL && $3 == NULL) {
+ $$ = NULL;
+ } else if ($1 == NULL) {
+ $$ = $3;
+ } else if ($3 == NULL) {
+ $$ = $1;
+ } else {
+ $$ = talloc_asprintf(talloc_tos(), "%s OR %s", $1, $3);
+ if ($$ == NULL) YYABORT;
+ }
+}
+| match {
+ $$ = $1;
+}
+| BOOLEAN {
+ /*
+ * We can't properly handle these in expressions, fortunately this
+ * is probably only ever used by OS X as sole element in an
+ * expression ie "False" (when Finder window selected our share
+ * but no search string entered yet). Packet traces showed that OS
+ * X Spotlight server then returns a failure (ie -1) which is what
+ * we do here too by calling YYABORT.
+ */
+ YYABORT;
+};
+
+match:
+attribute EQUAL value {
+ if ($1 == NULL) {
+ $$ = NULL;
+ } else {
+ $$ = map_expr($1, '=', $3, NULL);
+ }
+}
+| attribute UNEQUAL value {
+ if ($1 == NULL) {
+ $$ = NULL;
+ } else {
+ $$ = map_expr($1, '!', $3, NULL);
+ }
+}
+| attribute LT value {
+ if ($1 == NULL) {
+ $$ = NULL;
+ } else {
+ $$ = map_expr($1, '<', $3, NULL);
+ }
+}
+| attribute GT value {
+ if ($1 == NULL) {
+ $$ = NULL;
+ } else {
+ $$ = map_expr($1, '>', $3, NULL);
+ }
+}
+| function {
+ $$ = $1;
+}
+| match WORD {
+ $$ = $1;
+};
+
+function:
+FUNC_INRANGE OBRACE attribute COMMA WORD COMMA WORD CBRACE {
+ if ($3 == NULL) {
+ $$ = NULL;
+ } else {
+ $$ = map_expr($3, '~', $5, $7);
+ }
+};
+
+attribute:
+WORD {
+ $$ = es_map_sl_attr(global_es_parser_state->frame,
+ global_es_parser_state->kmd_map,
+ $1);
+ if ($$ == NULL &&
+ !global_es_parser_state->ignore_unknown_attribute)
+ {
+ YYABORT;
+ }
+};
+
+value:
+PHRASE {
+ $$ = $1;
+}
+| isodate {
+ $$ = $1;
+};
+
+isodate:
+DATE_ISO OBRACE WORD CBRACE {
+ $$ = isodate_to_sldate($3);
+ if ($$ == NULL) YYABORT;
+};
+
+%%
+
+/*
+ * Spotlight has two date formats:
+ * - seconds since 2001-01-01 00:00:00Z
+ * - as string "$time.iso(%Y-%m-%dT%H:%M:%SZ)"
+ * This function converts the latter to the former as string, so the parser
+ * can work on a uniform format.
+ */
+static char *isodate_to_sldate(const char *isodate)
+{
+ struct es_parser_state *s = global_es_parser_state;
+ struct tm tm = {};
+ const char *p = NULL;
+ char *tstr = NULL;
+ time_t t;
+
+ p = strptime(isodate, "%Y-%m-%dT%H:%M:%SZ", &tm);
+ if (p == NULL) {
+ DBG_ERR("strptime [%s] failed\n", isodate);
+ return NULL;
+ }
+
+ t = timegm(&tm);
+ t -= SP_RAW_TIME_OFFSET;
+
+ tstr = talloc_asprintf(s->frame, "%jd", (intmax_t)t);
+ if (tstr == NULL) {
+ return NULL;
+ }
+
+ return tstr;
+}
+
+static char *map_type(const struct es_attr_map *attr,
+ char op,
+ const char *val)
+{
+ struct es_parser_state *s = global_es_parser_state;
+ const char *mime_type_list = NULL;
+ char *esc_mime_type_list = NULL;
+ const char *not = NULL;
+ const char *end = NULL;
+ char *es = NULL;
+
+ mime_type_list = es_map_sl_type(s->mime_map, val);
+ if (mime_type_list == NULL) {
+ DBG_DEBUG("Mapping type [%s] failed\n", val);
+ if (!s->ignore_unknown_type) {
+ s->type_error = true;
+ }
+ return NULL;
+ }
+
+ esc_mime_type_list = es_escape_str(s->frame,
+ mime_type_list,
+ "* ");
+ if (esc_mime_type_list == NULL) {
+ return NULL;
+ }
+
+ switch (op) {
+ case '=':
+ not = "";
+ end = "";
+ break;
+ case '!':
+ not = "(NOT ";
+ end = ")";
+ break;
+ default:
+ DBG_ERR("Mapping type [%s] unexpected op [%c]\n", val, op);
+ return NULL;
+ }
+ es = talloc_asprintf(s->frame,
+ "%s%s:(%s)%s",
+ not,
+ attr->name,
+ esc_mime_type_list,
+ end);
+ if (es == NULL) {
+ return NULL;
+ }
+
+ return es;
+}
+
+static char *map_num(const struct es_attr_map *attr,
+ char op,
+ const char *val1,
+ const char *val2)
+{
+ struct es_parser_state *s = global_es_parser_state;
+ char *es = NULL;
+
+ switch (op) {
+ case '>':
+ es = talloc_asprintf(s->frame,
+ "%s:{%s TO *}",
+ attr->name,
+ val1);
+ break;
+ case '<':
+ es = talloc_asprintf(s->frame,
+ "%s:{* TO %s}",
+ attr->name,
+ val1);
+ break;
+ case '~':
+ es = talloc_asprintf(s->frame,
+ "%s:[%s TO %s]",
+ attr->name,
+ val1,
+ val2);
+ break;
+ case '=':
+ es = talloc_asprintf(s->frame,
+ "%s:%s",
+ attr->name,
+ val1);
+ break;
+ case '!':
+ es = talloc_asprintf(s->frame,
+ "(NOT %s:%s)",
+ attr->name,
+ val1);
+ break;
+ default:
+ DBG_ERR("Mapping num unexpected op [%c]\n", op);
+ return NULL;
+ }
+ if (es == NULL) {
+ return NULL;
+ }
+
+ return es;
+}
+
+static char *map_fts(const struct es_attr_map *attr,
+ char op,
+ const char *val)
+{
+ struct es_parser_state *s = global_es_parser_state;
+ const char *not = NULL;
+ const char *end = NULL;
+ char *esval = NULL;
+ char *es = NULL;
+
+ esval = es_escape_str(s->frame, val, "*\\\"");
+ if (esval == NULL) {
+ yyerror("es_escape_str failed");
+ return NULL;
+ }
+
+ switch (op) {
+ case '=':
+ not = "";
+ end = "";
+ break;
+ case '!':
+ not = "(NOT ";
+ end = ")";
+ break;
+ default:
+ DBG_ERR("Mapping fts [%s] unexpected op [%c]\n", val, op);
+ return NULL;
+ }
+ es = talloc_asprintf(s->frame,
+ "%s%s%s",
+ not,
+ esval,
+ end);
+ if (es == NULL) {
+ return NULL;
+ }
+ return es;
+}
+
+static char *map_str(const struct es_attr_map *attr,
+ char op,
+ const char *val)
+{
+ struct es_parser_state *s = global_es_parser_state;
+ char *esval = NULL;
+ char *es = NULL;
+ const char *not = NULL;
+ const char *end = NULL;
+
+ esval = es_escape_str(s->frame, val, "*\\\"");
+ if (esval == NULL) {
+ yyerror("es_escape_str failed");
+ return NULL;
+ }
+
+ switch (op) {
+ case '=':
+ not = "";
+ end = "";
+ break;
+ case '!':
+ not = "(NOT ";
+ end = ")";
+ break;
+ default:
+ DBG_ERR("Mapping string [%s] unexpected op [%c]\n", val, op);
+ return NULL;
+ }
+
+ es = talloc_asprintf(s->frame,
+ "%s%s:%s%s",
+ not,
+ attr->name,
+ esval,
+ end);
+ if (es == NULL) {
+ return NULL;
+ }
+ return es;
+}
+
+/*
+ * Convert Spotlight date seconds since 2001-01-01 00:00:00Z
+ * to a date string in the format %Y-%m-%dT%H:%M:%SZ.
+ */
+static char *map_sldate_to_esdate(TALLOC_CTX *mem_ctx,
+ const char *sldate)
+{
+ struct tm *tm = NULL;
+ char *esdate = NULL;
+ char buf[21];
+ size_t len;
+ time_t t;
+ int error;
+
+ t = (time_t)smb_strtoull(sldate, NULL, 10, &error, SMB_STR_STANDARD);
+ if (error != 0) {
+ DBG_ERR("smb_strtoull [%s] failed\n", sldate);
+ return NULL;
+ }
+ t += SP_RAW_TIME_OFFSET;
+
+ tm = gmtime(&t);
+ if (tm == NULL) {
+ DBG_ERR("localtime [%s] failed\n", sldate);
+ return NULL;
+ }
+
+ len = strftime(buf, sizeof(buf),
+ "%Y-%m-%dT%H:%M:%SZ", tm);
+ if (len != 20) {
+ DBG_ERR("strftime [%s] failed\n", sldate);
+ return NULL;
+ }
+
+ esdate = es_escape_str(mem_ctx, buf, NULL);
+ if (esdate == NULL) {
+ yyerror("es_escape_str failed");
+ return NULL;
+ }
+ return esdate;
+}
+
+static char *map_date(const struct es_attr_map *attr,
+ char op,
+ const char *sldate1,
+ const char *sldate2)
+{
+ struct es_parser_state *s = global_es_parser_state;
+ char *esdate1 = NULL;
+ char *esdate2 = NULL;
+ char *es = NULL;
+
+ if (op == '~' && sldate2 == NULL) {
+ DBG_ERR("Date range query, but second date is NULL\n");
+ return NULL;
+ }
+
+ esdate1 = map_sldate_to_esdate(s->frame, sldate1);
+ if (esdate1 == NULL) {
+ DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate1);
+ return NULL;
+ }
+ if (sldate2 != NULL) {
+ esdate2 = map_sldate_to_esdate(s->frame, sldate2);
+ if (esdate2 == NULL) {
+ DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate2);
+ return NULL;
+ }
+ }
+
+ switch (op) {
+ case '>':
+ es = talloc_asprintf(s->frame,
+ "%s:{%s TO *}",
+ attr->name,
+ esdate1);
+ break;
+ case '<':
+ es = talloc_asprintf(s->frame,
+ "%s:{* TO %s}",
+ attr->name,
+ esdate1);
+ break;
+ case '~':
+ es = talloc_asprintf(s->frame,
+ "%s:[%s TO %s]",
+ attr->name,
+ esdate1,
+ esdate2);
+ break;
+ case '=':
+ es = talloc_asprintf(s->frame,
+ "%s:%s",
+ attr->name,
+ esdate1);
+ break;
+ case '!':
+ es = talloc_asprintf(s->frame,
+ "(NOT %s:%s)",
+ attr->name,
+ esdate1);
+ break;
+ }
+ if (es == NULL) {
+ return NULL;
+ }
+ return es;
+}
+
+static char *map_expr(const struct es_attr_map *attr,
+ char op,
+ const char *val1,
+ const char *val2)
+{
+ char *es = NULL;
+
+ switch (attr->type) {
+ case ssmt_type:
+ es = map_type(attr, op, val1);
+ break;
+ case ssmt_num:
+ es = map_num(attr, op, val1, val2);
+ break;
+ case ssmt_fts:
+ es = map_fts(attr, op, val1);
+ break;
+ case ssmt_str:
+ es = map_str(attr, op, val1);
+ break;
+ case ssmt_date:
+ es = map_date(attr, op, val1, val2);
+ break;
+ default:
+ break;
+ }
+ if (es == NULL) {
+ DBG_DEBUG("Mapping [%s %c %s (%s)] failed\n",
+ attr->name, op, val1, val2 ? val2 : "");
+ return NULL;
+ }
+
+ return es;
+}
+
+void mdsyylerror(const char *str)
+{
+ DBG_ERR("Parser failed: %s\n", str);
+}
+
+int mdsyylwrap(void)
+{
+ return 1;
+}
+
+/**
+ * Map a Spotlight RAW query string to a ES query string
+ **/
+bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx,
+ json_t *mappings,
+ const char *path_scope,
+ const char *query_string,
+ char **_es_query)
+{
+ struct es_parser_state s = {
+ .frame = talloc_stackframe(),
+ };
+ int result;
+ char *es_query = NULL;
+
+ s.kmd_map = json_object_get(mappings, "attribute_mappings");
+ if (s.kmd_map == NULL) {
+ DBG_ERR("Failed to load attribute_mappings from JSON\n");
+ return false;
+ }
+ s.mime_map = json_object_get(mappings, "mime_mappings");
+ if (s.mime_map == NULL) {
+ DBG_ERR("Failed to load mime_mappings from JSON\n");
+ return false;
+ }
+
+ s.s = mdsyyl_scan_string(query_string);
+ if (s.s == NULL) {
+ DBG_WARNING("Failed to parse [%s]\n", query_string);
+ TALLOC_FREE(s.frame);
+ return false;
+ }
+
+ s.ignore_unknown_attribute = lp_parm_bool(GLOBAL_SECTION_SNUM,
+ "elasticsearch",
+ "ignore unknown attribute",
+ false);
+ s.ignore_unknown_type = lp_parm_bool(GLOBAL_SECTION_SNUM,
+ "elasticsearch",
+ "ignore unknown type",
+ false);
+
+ global_es_parser_state = &s;
+ result = mdsyylparse();
+ global_es_parser_state = NULL;
+ mdsyyl_delete_buffer(s.s);
+
+ if (result != 0) {
+ TALLOC_FREE(s.frame);
+ return false;
+ }
+
+ es_query = talloc_asprintf(mem_ctx,
+ "(%s) AND path.real.fulltext:\\\"%s\\\"",
+ s.result, path_scope);
+ TALLOC_FREE(s.frame);
+ if (es_query == NULL) {
+ return false;
+ }
+
+ *_es_query = es_query;
+ return true;
+}