/* Unix SMB/CIFS implementation. Main metadata server / Spotlight routines / Elasticsearch backend Copyright (C) Ralph Boehme 2019 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "includes.h" #include "es_mapping.h" /* * Escaping of special characters in Lucene query syntax across HTTP and JSON * ========================================================================== * * These characters in Lucene queries need escaping [1]: * * + - & | ! ( ) { } [ ] ^ " ~ * ? : \ / * * Additionally JSON requires escaping of: * * " \ * * Characters already escaped by the mdssvc client: * * * " \ * * The following table contains the resulting escaped strings, beginning with the * search term, the corresponding Spotlight query and the final string that gets * sent to the target Elasticsearch server. * * string | mdfind | http * -------+--------+------ * x!x x!x x\\!x * x&x x&x x\\&x * x+x x+x x\\+x * x-x x-x x\\-x * x.x x.x x\\.x * xx x>x x\\>x * x=x x=x x\\=x * x?x x?x x\\?x * x[x x[x x\\[x * x]x x]x x\\]x * x^x x^x x\\^x * x{x x{x x\\{x * x}x x}x x\\}x * x|x x|x x\\|x * x x x x x\\ x * x*x x\*x x\\*x * x\x x\\x x\\\\x * x"x x\"x x\\\"x * * Special cases: * x y It's not possible to search for terms including spaces, Spotlight * will search for x OR y. * x(x Search for terms including ( and ) does not work with Spotlight. * * [1] */ static char *escape_str(TALLOC_CTX *mem_ctx, const char *in, const char *escape_list, const char *escape_exceptions) { char *out = NULL; size_t in_len; size_t new_len; size_t in_pos; size_t out_pos = 0; if (in == NULL) { return NULL; } in_len = strlen(in); if (escape_list == NULL) { escape_list = ""; } if (escape_exceptions == NULL) { escape_exceptions = ""; } /* * Allocate enough space for the worst case: every char needs to be * escaped and requires an additional char. */ new_len = (in_len * 2) + 1; if (new_len <= in_len) { return NULL; } out = talloc_zero_array(mem_ctx, char, new_len); if (out == NULL) { return NULL; } for (in_pos = 0, out_pos = 0; in_pos < in_len; in_pos++, out_pos++) { if (strchr(escape_list, in[in_pos]) != NULL && strchr(escape_exceptions, in[in_pos]) == NULL) { out[out_pos++] = '\\'; } out[out_pos] = in[in_pos]; } return out; } char *es_escape_str(TALLOC_CTX *mem_ctx, const char *in, const char *exceptions) { const char *lucene_escape_list = "+-&|!(){}[]^\"~*?:\\/ "; const char *json_escape_list = "\\\""; char *lucene_escaped = NULL; char *full_escaped = NULL; lucene_escaped = escape_str(mem_ctx, in, lucene_escape_list, exceptions); if (lucene_escaped == NULL) { return NULL; } full_escaped = escape_str(mem_ctx, lucene_escaped, json_escape_list, NULL); TALLOC_FREE(lucene_escaped); return full_escaped; } struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx, json_t *kmd_map, const char *sl_attr) { struct es_attr_map *es_map = NULL; const char *typestr = NULL; enum ssm_type type = ssmt_bool; char *es_attr = NULL; size_t i; int cmp; int ret; static struct { const char *typestr; enum ssm_type typeval; } ssmt_type_map[] = { {"bool", ssmt_bool}, {"num", ssmt_num}, {"str", ssmt_str}, {"fts", ssmt_fts}, {"date", ssmt_date}, {"type", ssmt_type}, }; if (sl_attr == NULL) { return NULL; } ret = json_unpack(kmd_map, "{s: {s: s}}", sl_attr, "type", &typestr); if (ret != 0) { DBG_DEBUG("No JSON type mapping for [%s]\n", sl_attr); return NULL; } ret = json_unpack(kmd_map, "{s: {s: s}}", sl_attr, "attribute", &es_attr); if (ret != 0) { DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr); return NULL; } for (i = 0; i < ARRAY_SIZE(ssmt_type_map); i++) { cmp = strcmp(typestr, ssmt_type_map[i].typestr); if (cmp == 0) { type = ssmt_type_map[i].typeval; break; } } if (i == ARRAY_SIZE(ssmt_type_map)) { return NULL; } es_map = talloc_zero(mem_ctx, struct es_attr_map); if (es_map == NULL) { return NULL; } es_map->type = type; es_map->name = es_escape_str(es_map, es_attr, NULL); if (es_map->name == NULL) { TALLOC_FREE(es_map); return false; } return es_map; } const char *es_map_sl_type(json_t *mime_map, const char *sl_type) { const char *mime_type = NULL; int ret; if (sl_type == NULL) { return NULL; } ret = json_unpack(mime_map, "{s: s}", sl_type, &mime_type); if (ret != 0) { return NULL; } return mime_type; }