From 8daa83a594a2e98f39d764422bfbdbc62c9efd44 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 19:20:00 +0200 Subject: Adding upstream version 2:4.20.0+dfsg. Signed-off-by: Daniel Baumann --- source3/rpc_server/mdssvc/es_mapping.c | 241 +++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 source3/rpc_server/mdssvc/es_mapping.c (limited to 'source3/rpc_server/mdssvc/es_mapping.c') diff --git a/source3/rpc_server/mdssvc/es_mapping.c b/source3/rpc_server/mdssvc/es_mapping.c new file mode 100644 index 0000000..e8d181d --- /dev/null +++ b/source3/rpc_server/mdssvc/es_mapping.c @@ -0,0 +1,241 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / Elasticsearch backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "includes.h" +#include "es_mapping.h" + +/* + * Escaping of special characters in Lucene query syntax across HTTP and JSON + * ========================================================================== + * + * These characters in Lucene queries need escaping [1]: + * + * + - & | ! ( ) { } [ ] ^ " ~ * ? : \ / + * + * Additionally JSON requires escaping of: + * + * " \ + * + * Characters already escaped by the mdssvc client: + * + * * " \ + * + * The following table contains the resulting escaped strings, beginning with the + * search term, the corresponding Spotlight query and the final string that gets + * sent to the target Elasticsearch server. + * + * string | mdfind | http + * -------+--------+------ + * x!x x!x x\\!x + * x&x x&x x\\&x + * x+x x+x x\\+x + * x-x x-x x\\-x + * x.x x.x x\\.x + * xx x>x x\\>x + * x=x x=x x\\=x + * x?x x?x x\\?x + * x[x x[x x\\[x + * x]x x]x x\\]x + * x^x x^x x\\^x + * x{x x{x x\\{x + * x}x x}x x\\}x + * x|x x|x x\\|x + * x x x x x\\ x + * x*x x\*x x\\*x + * x\x x\\x x\\\\x + * x"x x\"x x\\\"x + * + * Special cases: + * x y It's not possible to search for terms including spaces, Spotlight + * will search for x OR y. + * x(x Search for terms including ( and ) does not work with Spotlight. + * + * [1] + */ + +static char *escape_str(TALLOC_CTX *mem_ctx, + const char *in, + const char *escape_list, + const char *escape_exceptions) +{ + char *out = NULL; + size_t in_len; + size_t new_len; + size_t in_pos; + size_t out_pos = 0; + + if (in == NULL) { + return NULL; + } + in_len = strlen(in); + + if (escape_list == NULL) { + escape_list = ""; + } + if (escape_exceptions == NULL) { + escape_exceptions = ""; + } + + /* + * Allocate enough space for the worst case: every char needs to be + * escaped and requires an additional char. + */ + new_len = (in_len * 2) + 1; + if (new_len <= in_len) { + return NULL; + } + + out = talloc_zero_array(mem_ctx, char, new_len); + if (out == NULL) { + return NULL; + } + + for (in_pos = 0, out_pos = 0; in_pos < in_len; in_pos++, out_pos++) { + if (strchr(escape_list, in[in_pos]) != NULL && + strchr(escape_exceptions, in[in_pos]) == NULL) + { + out[out_pos++] = '\\'; + } + out[out_pos] = in[in_pos]; + } + + return out; +} + +char *es_escape_str(TALLOC_CTX *mem_ctx, + const char *in, + const char *exceptions) +{ + const char *lucene_escape_list = "+-&|!(){}[]^\"~*?:\\/ "; + const char *json_escape_list = "\\\""; + char *lucene_escaped = NULL; + char *full_escaped = NULL; + + lucene_escaped = escape_str(mem_ctx, + in, + lucene_escape_list, + exceptions); + if (lucene_escaped == NULL) { + return NULL; + } + + full_escaped = escape_str(mem_ctx, + lucene_escaped, + json_escape_list, + NULL); + TALLOC_FREE(lucene_escaped); + return full_escaped; +} + +struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx, + json_t *kmd_map, + const char *sl_attr) +{ + struct es_attr_map *es_map = NULL; + const char *typestr = NULL; + enum ssm_type type = ssmt_bool; + char *es_attr = NULL; + size_t i; + int cmp; + int ret; + + static struct { + const char *typestr; + enum ssm_type typeval; + } ssmt_type_map[] = { + {"bool", ssmt_bool}, + {"num", ssmt_num}, + {"str", ssmt_str}, + {"fts", ssmt_fts}, + {"date", ssmt_date}, + {"type", ssmt_type}, + }; + + if (sl_attr == NULL) { + return NULL; + } + + ret = json_unpack(kmd_map, + "{s: {s: s}}", + sl_attr, + "type", + &typestr); + if (ret != 0) { + DBG_DEBUG("No JSON type mapping for [%s]\n", sl_attr); + return NULL; + } + + ret = json_unpack(kmd_map, + "{s: {s: s}}", + sl_attr, + "attribute", + &es_attr); + if (ret != 0) { + DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr); + return NULL; + } + + for (i = 0; i < ARRAY_SIZE(ssmt_type_map); i++) { + cmp = strcmp(typestr, ssmt_type_map[i].typestr); + if (cmp == 0) { + type = ssmt_type_map[i].typeval; + break; + } + } + if (i == ARRAY_SIZE(ssmt_type_map)) { + return NULL; + } + + es_map = talloc_zero(mem_ctx, struct es_attr_map); + if (es_map == NULL) { + return NULL; + } + es_map->type = type; + + es_map->name = es_escape_str(es_map, es_attr, NULL); + if (es_map->name == NULL) { + TALLOC_FREE(es_map); + return false; + } + + return es_map; +} + +const char *es_map_sl_type(json_t *mime_map, + const char *sl_type) +{ + const char *mime_type = NULL; + int ret; + + if (sl_type == NULL) { + return NULL; + } + + ret = json_unpack(mime_map, + "{s: s}", + sl_type, + &mime_type); + if (ret != 0) { + return NULL; + } + + return mime_type; +} -- cgit v1.2.3