diff options
Diffstat (limited to 'source3/rpc_server/mdssvc')
27 files changed, 8726 insertions, 0 deletions
diff --git a/source3/rpc_server/mdssvc/README b/source3/rpc_server/mdssvc/README new file mode 100644 index 0000000..7dff83e --- /dev/null +++ b/source3/rpc_server/mdssvc/README @@ -0,0 +1,14 @@ +Introduction: +============= +This directory contains source code for the metadata search service +aka Spotlight. + +Bison and flex: +=============== +Not yet integrated into the waf buildsystem, run these by hand: + +$ bison -d -o sparql_parser.c sparql_parser.y +$ flex -o sparql_lexer.c sparql_lexer.l + +or use the bundled Makefile. + diff --git a/source3/rpc_server/mdssvc/dalloc.c b/source3/rpc_server/mdssvc/dalloc.c new file mode 100644 index 0000000..8b79b41 --- /dev/null +++ b/source3/rpc_server/mdssvc/dalloc.c @@ -0,0 +1,404 @@ +/* + Copyright (c) Ralph Boehme 2012-2014 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include <talloc.h> +#include "dalloc.h" +#include "marshalling.h" +#include "lib/util/charset/charset.h" +#include "lib/util/talloc_stack.h" +#include "system/time.h" + +/** + * Dynamic Datastore + **/ +struct dalloc_ctx { + void **dd_talloc_array; +}; + +void *_dalloc_new(TALLOC_CTX *mem_ctx, const char *type) +{ + void *p; + + p = talloc_zero(mem_ctx, DALLOC_CTX); + if (p == NULL) { + return NULL; + } + talloc_set_name_const(p, type); + + return p; +} + +int _dalloc_add_talloc_chunk(DALLOC_CTX *dd, void *obj, const char *type, size_t size) +{ + size_t array_len = talloc_array_length(dd->dd_talloc_array); + + dd->dd_talloc_array = talloc_realloc(dd, + dd->dd_talloc_array, + void *, + array_len + 1); + if (dd->dd_talloc_array == NULL) { + return -1; + } + + if (size != 0) { + void *p; + + p = talloc_named_const(dd->dd_talloc_array, size, type); + if (p == NULL) { + return -1; + } + memcpy(p, obj, size); + obj = p; + } else { + _talloc_get_type_abort(obj, type, __location__); + } + + dd->dd_talloc_array[array_len] = obj; + + return 0; +} + +/* Get number of elements, returns 0 if the structure is empty or not initialized */ +size_t dalloc_size(const DALLOC_CTX *d) +{ + if (d == NULL) { + return 0; + } + return talloc_array_length(d->dd_talloc_array); +} + +/* Return element at position */ +void *dalloc_get_object(const DALLOC_CTX *d, int i) +{ + size_t size = dalloc_size(d); + + if (i >= size) { + return NULL; + } + + return d->dd_talloc_array[i]; +} + +/* Return typename of element at position */ +const char *dalloc_get_name(const DALLOC_CTX *d, int i) +{ + void *o = dalloc_get_object(d, i); + + if (o == NULL) { + return NULL; + } + + return talloc_get_name(o); +} + +/* + * Get pointer to value from a DALLOC object + * + * Returns pointer to object from a DALLOC object. Nested object interation + * is supported by using the type string "DALLOC_CTX". Any other type string + * designates the requested objects type. + */ +void *dalloc_get(const DALLOC_CTX *d, ...) +{ + int result = 0; + void *p = NULL; + va_list args; + const char *type; + int elem; + + va_start(args, d); + type = va_arg(args, const char *); + + while (strcmp(type, "DALLOC_CTX") == 0) { + elem = va_arg(args, int); + if (elem >= talloc_array_length(d->dd_talloc_array)) { + result = -1; + goto done; + } + d = d->dd_talloc_array[elem]; + type = va_arg(args, const char *); + } + + elem = va_arg(args, int); + if (elem >= talloc_array_length(d->dd_talloc_array)) { + result = -1; + goto done; + } + + p = talloc_check_name(d->dd_talloc_array[elem], type); + if (p == NULL) { + result = -1; + goto done; + } + +done: + va_end(args); + if (result != 0) { + p = NULL; + } + return p; +} + +void *dalloc_value_for_key(const DALLOC_CTX *d, ...) +{ + int result = 0; + void *p = NULL; + va_list args; + const char *type = NULL; + int elem; + size_t array_len; + + va_start(args, d); + type = va_arg(args, const char *); + + while (strcmp(type, "DALLOC_CTX") == 0) { + array_len = talloc_array_length(d->dd_talloc_array); + elem = va_arg(args, int); + if (elem >= array_len) { + result = -1; + goto done; + } + d = d->dd_talloc_array[elem]; + type = va_arg(args, const char *); + } + + array_len = talloc_array_length(d->dd_talloc_array); + + for (elem = 0; elem + 1 < array_len; elem += 2) { + if (strcmp(talloc_get_name(d->dd_talloc_array[elem]), "char *") != 0) { + result = -1; + goto done; + } + if (strcmp((char *)d->dd_talloc_array[elem],type) == 0) { + p = d->dd_talloc_array[elem + 1]; + break; + } + } + if (p == NULL) { + goto done; + } + + type = va_arg(args, const char *); + if (strcmp(talloc_get_name(p), type) != 0) { + p = NULL; + } + +done: + va_end(args); + if (result != 0) { + p = NULL; + } + return p; +} + +static char *dalloc_strdup(TALLOC_CTX *mem_ctx, const char *string) +{ + char *p; + + p = talloc_strdup(mem_ctx, string); + if (p == NULL) { + return NULL; + } + talloc_set_name_const(p, "char *"); + return p; +} + +int dalloc_stradd(DALLOC_CTX *d, const char *string) +{ + int result; + char *p; + + p = dalloc_strdup(d, string); + if (p == NULL) { + return -1; + } + + result = dalloc_add(d, p, char *); + if (result != 0) { + return -1; + } + + return 0; +} + +static char *tab_level(TALLOC_CTX *mem_ctx, int level) +{ + int i; + char *string = talloc_array(mem_ctx, char, level + 1); + + for (i = 0; i < level; i++) { + string[i] = '\t'; + } + + string[i] = '\0'; + return string; +} + +char *dalloc_dump(DALLOC_CTX *dd, int nestinglevel) +{ + const char *type; + int n, result; + uint64_t i; + sl_bool_t bl; + sl_time_t t; + struct tm *tm; + char datestring[256]; + sl_cnids_t cnids; + char *logstring, *nested_logstring; + char *tab_string1, *tab_string2; + void *p; + bool ok; + char *utf8string; + size_t utf8len; + + tab_string1 = tab_level(dd, nestinglevel); + if (tab_string1 == NULL) { + return NULL; + } + tab_string2 = tab_level(dd, nestinglevel + 1); + if (tab_string2 == NULL) { + return NULL; + } + + logstring = talloc_asprintf(dd, + "%s%s(#%zu): {\n", + tab_string1, + talloc_get_name(dd), + dalloc_size(dd)); + if (logstring == NULL) { + return NULL; + } + + for (n = 0; n < dalloc_size(dd); n++) { + type = dalloc_get_name(dd, n); + if (type == NULL) { + return NULL; + } + p = dalloc_get_object(dd, n); + if (p == NULL) { + return NULL; + } + if (strcmp(type, "DALLOC_CTX") == 0 + || strcmp(type, "sl_array_t") == 0 + || strcmp(type, "sl_filemeta_t") == 0 + || strcmp(type, "sl_dict_t") == 0) { + nested_logstring = dalloc_dump(p, nestinglevel + 1); + if (nested_logstring == NULL) { + return NULL; + } + logstring = talloc_strdup_append(logstring, + nested_logstring); + } else if (strcmp(type, "uint64_t") == 0) { + memcpy(&i, p, sizeof(uint64_t)); + logstring = talloc_asprintf_append( + logstring, + "%suint64_t: 0x%04jx\n", + tab_string2, (uintmax_t)i); + } else if (strcmp(type, "char *") == 0) { + logstring = talloc_asprintf_append( + logstring, + "%sstring: %s\n", + tab_string2, + (char *)p); + } else if (strcmp(type, "smb_ucs2_t *") == 0) { + ok = convert_string_talloc(talloc_tos(), + CH_UTF16LE, + CH_UTF8, + p, + talloc_get_size(p), + &utf8string, + &utf8len); + if (!ok) { + return NULL; + } + logstring = talloc_asprintf_append( + logstring, + "%sUTF16-string: %s\n", + tab_string2, + utf8string); + TALLOC_FREE(utf8string); + } else if (strcmp(type, "sl_bool_t") == 0) { + memcpy(&bl, p, sizeof(sl_bool_t)); + logstring = talloc_asprintf_append( + logstring, + "%sbool: %s\n", + tab_string2, + bl ? "true" : "false"); + } else if (strcmp(type, "sl_nil_t") == 0) { + logstring = talloc_asprintf_append( + logstring, + "%snil\n", + tab_string2); + } else if (strcmp(type, "sl_time_t") == 0) { + memcpy(&t, p, sizeof(sl_time_t)); + tm = localtime(&t.tv_sec); + if (tm == NULL) { + return NULL; + } + result = strftime(datestring, + sizeof(datestring), + "%Y-%m-%d %H:%M:%S", tm); + if (result == 0) { + return NULL; + } + logstring = talloc_asprintf_append( + logstring, + "%ssl_time_t: %s.%06lu\n", + tab_string2, + datestring, + (unsigned long)t.tv_usec); + } else if (strcmp(type, "sl_cnids_t") == 0) { + memcpy(&cnids, p, sizeof(sl_cnids_t)); + logstring = talloc_asprintf_append( + logstring, + "%sCNIDs: unkn1: 0x%" PRIx16 ", unkn2: 0x%" PRIx32 "\n", + tab_string2, + cnids.ca_unkn1, + cnids.ca_context); + if (logstring == NULL) { + return NULL; + } + if (cnids.ca_cnids) { + nested_logstring = dalloc_dump( + cnids.ca_cnids, + nestinglevel + 2); + if (!nested_logstring) { + return NULL; + } + logstring = talloc_strdup_append(logstring, + nested_logstring); + } + } else { + logstring = talloc_asprintf_append( + logstring, + "%stype: %s\n", + tab_string2, + type); + } + if (logstring == NULL) { + return NULL; + } + } + logstring = talloc_asprintf_append(logstring, + "%s}\n", + tab_string1); + if (logstring == NULL) { + return NULL; + } + return logstring; +} diff --git a/source3/rpc_server/mdssvc/dalloc.h b/source3/rpc_server/mdssvc/dalloc.h new file mode 100644 index 0000000..69650b8 --- /dev/null +++ b/source3/rpc_server/mdssvc/dalloc.h @@ -0,0 +1,165 @@ +/* + Copyright (c) Ralph Boehme 2012-2014 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* + Typesafe, dynamic object store based on talloc + + Usage + ===== + + Define some types: + + A key/value store aka dictionary that supports retrieving elements + by key: + + typedef dict_t DALLOC_CTX; + + An ordered set that can store different objects which can be + retrieved by number: + + typedef set_t DALLOC_CTX; + + Create an dalloc object and add elementes of different type: + + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DALLOC_CTX *d = dalloc_new(mem_ctx); + + Store an int value in the object: + + uint64_t i = 1; + dalloc_add_copy(d, &i, uint64_t); + + Store a string: + + dalloc_stradd(d, "hello world"); + + Add a nested object: + + DALLOC_CTX *nested = dalloc_new(d); + dalloc_add(d, nested, DALLOC_CTX); + + Add an int value to the nested object, this can be fetched: + + i = 2; + dalloc_add_copy(nested, &i, uint64_t); + + Add a nested set: + + set_t *set = dalloc_zero(nested, set_t); + dalloc_add(nested, set, set_t); + + Add an int value to the set: + + i = 3; + dalloc_add_copy(set, &i, uint64_t); + + Add a dictionary (key/value store): + + dict_t *dict = dalloc_zero(nested, dict_t); + dalloc_add(nested, dict, dict_t); + + Store a string as key in the dict: + + dalloc_stradd(dict, "key"); + + Add a value for the key: + + i = 4; + dalloc_add_copy(dict, &i, uint64_t); + + Fetching value references + ========================= + + You can fetch anything that is not a DALLOC_CTXs, because passing + "DALLOC_CTXs" as type to the functions dalloc_get() and + dalloc_value_for_key() tells the function to step into that object + and expect more arguments that specify which element to fetch. + + Get reference to an objects element by position: + + uint64_t *p = dalloc_get(d, "uint64_t", 0); + + p now points to the first int with a value of 1. + + Get reference to the "hello world" string: + + str = dalloc_get(d, "char *", 1); + + You can't fetch a DALLOC_CTX itself: + + nested = dalloc_get(d, "DALLOC_CTX", 2); + + But you can fetch elements from the nested DALLOC_CTX: + + p = dalloc_get(d, "DALLOC_CTX", 2, "uint64_t", 0); + + p now points to the value 2. + + You can fetch types that are typedefd DALLOC_CTXs: + + set = dalloc_get(d, "DALLOC_CTX", 2, "set_t", 1); + + Fetch int from set, must use DALLOC_CTX as type for the set: + + p = dalloc_get(d, "DALLOC_CTX", 2, "DALLOC_CTX", 1, "uint64_t", 0); + + p points to 3. + + Fetch value by key from dictionary: + + p = dalloc_value_for_key(d, "DALLOC_CTX", 2, "DALLOC_CTX", 2, "key"); + + p now points to 4. +*/ + +#ifndef DALLOC_H +#define DALLOC_H + +#include <talloc.h> + +struct dalloc_ctx; +typedef struct dalloc_ctx DALLOC_CTX; + +#define dalloc_new(mem_ctx) (DALLOC_CTX *)_dalloc_new((mem_ctx), "DALLOC_CTX") +#define dalloc_zero(mem_ctx, type) (type *)_dalloc_new((mem_ctx), #type) + +/** + * talloc a chunk for obj of required size, copy the obj into the + * chunk and add the chunk to the dalloc ctx + **/ +#define dalloc_add_copy(d, obj, type) _dalloc_add_talloc_chunk((d), (obj), #type, sizeof(type)) + +/** + * Add a pointer to a talloced object to the dalloc ctx. The object + * must be a talloc child of the dalloc ctx. + **/ +#define dalloc_add(d, obj, type) _dalloc_add_talloc_chunk((d), (obj), #type, 0) + + +extern void *dalloc_get(const DALLOC_CTX *d, ...); +extern void *dalloc_value_for_key(const DALLOC_CTX *d, ...); +extern size_t dalloc_size(const DALLOC_CTX *d); +extern void *dalloc_get_object(const DALLOC_CTX *d, int i); +extern const char *dalloc_get_name(const DALLOC_CTX *d, int i); +extern int dalloc_stradd(DALLOC_CTX *d, const char *string); + +extern void *_dalloc_new(TALLOC_CTX *mem_ctx, const char *type); +extern int _dalloc_add_talloc_chunk(DALLOC_CTX *d, void *obj, const char *type, size_t size); + +extern char *dalloc_dump(DALLOC_CTX *dd, int nestinglevel); + +#endif /* DALLOC_H */ diff --git a/source3/rpc_server/mdssvc/elasticsearch_mappings.json b/source3/rpc_server/mdssvc/elasticsearch_mappings.json new file mode 100644 index 0000000..9f68a64 --- /dev/null +++ b/source3/rpc_server/mdssvc/elasticsearch_mappings.json @@ -0,0 +1,142 @@ +{ + "attribute_mappings": { + "*": { + "type": "fts", + "attribute": "" + }, + "kMDItemTextContent": { + "type": "str", + "attribute": "content" + }, + "_kMDItemGroupId": { + "type": "type", + "attribute": "file.content_type" + }, + "kMDItemContentType": { + "type": "type", + "attribute": "file.content_type" + }, + "kMDItemContentTypeTree": { + "type": "type", + "attribute": "file.content_type" + }, + "kMDItemFSContentChangeDate": { + "type": "date", + "attribute": "file.last_modified" + }, + "kMDItemFSCreationDate": { + "type": "date", + "attribute": "file.created" + }, + "kMDItemFSName": { + "type": "str", + "attribute": "file.filename" + }, + "kMDItemFSOwnerGroupID": { + "type": "str", + "attribute": "attributes.owner" + }, + "kMDItemFSOwnerUserID": { + "type": "str", + "attribute": "attributes.group" + }, + "kMDItemFSSize": { + "type": "num", + "attribute": "file.filesize" + }, + "kMDItemPath": { + "type": "str", + "attribute": "path.real" + }, + "kMDItemAttributeChangeDate": { + "type": "date", + "attribute": "file.last_modified" + }, + "kMDItemAuthors": { + "type": "str", + "attribute": "meta.author" + }, + "kMDItemContentCreationDate": { + "type": "date", + "attribute": "file.created" + }, + "kMDItemContentModificationDate": { + "type": "date", + "attribute": "file.last_modified" + }, + "kMDItemCreator": { + "type": "str", + "attribute": "meta.raw.creator" + }, + "kMDItemDescription": { + "type": "str", + "attribute": "meta.raw.description" + }, + "kMDItemDisplayName": { + "type": "str", + "attribute": "file.filename" + }, + "kMDItemDurationSeconds": { + "type": "num", + "attribute": "meta.raw.xmpDM:duration" + }, + "kMDItemNumberOfPages": { + "type": "num", + "attribute": "meta.raw.xmpTPg:NPages" + }, + "kMDItemTitle": { + "type": "str", + "attribute": "meta.title" + }, + "kMDItemAlbum": { + "type": "str", + "attribute": "meta.raw.xmpDM:album" + }, + "kMDItemBitsPerSample": { + "type": "num", + "attribute": "meta.raw.tiff:BitsPerSample" + }, + "kMDItemPixelHeight": { + "type": "num", + "attribute": "meta.raw.Image Height" + }, + "kMDItemPixelWidth": { + "type": "num", + "attribute": "meta.raw.Image Width" + }, + "kMDItemResolutionHeightDPI": { + "type": "num", + "attribute": "meta.raw.Y Resolution" + }, + "kMDItemResolutionWidthDPI": { + "type": "num", + "attribute": "meta.raw.X Resolution" + } + }, + "mime_mappings": { + "1": "message/rfc822", + "2": "text/x-vcard", + "6": "text/x-vcard", + "7": "video/*", + "8": "application/octet-stream", + "9": "text/directory", + "10": "audio/*", + "11": "application/pdf", + "12": "application/vnd.oasis.opendocument.presentation", + "13": "image/*", + "public.content": "message/rfc822 application/pdf application/vnd.oasis.opendocument.presentation image/* text/*", + "public.jpeg": "image/jpeg", + "public.tiff": "image/tiff", + "com.compuserve.gif": "image/gif", + "public.png": "image/png", + "com.microsoft.bmp": "image/bmp", + "public.mp3": "audio/mpeg", + "public.mpeg-4-audio": "audio/x-aac", + "public.text": "text/*", + "public.plain-text": "text/plain", + "public.rtf": "text/rtf", + "public.html": "text/html", + "public.xml": "text/xml", + "public.archive": "application/zip application/x-bzip application/x-bzip2 application/x-tar application/x-7z-compressed" + } +} diff --git a/source3/rpc_server/mdssvc/es_lexer.l b/source3/rpc_server/mdssvc/es_lexer.l new file mode 100644 index 0000000..4be4225 --- /dev/null +++ b/source3/rpc_server/mdssvc/es_lexer.l @@ -0,0 +1,92 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / Elasticsearch backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +%{ +#include "includes.h" +#include "rpc_server/mdssvc/es_parser.tab.h" + +#define YY_NO_INPUT +#define mdsyylalloc SMB_MALLOC +#define mdsyylrealloc SMB_REALLOC + +static char *strip_quote(const char *phrase); +%} + +%option nounput noyyalloc noyyrealloc prefix="mdsyyl" + +ASC [a-zA-Z0-9_\*\:\-\.] +U [\x80-\xbf] +U2 [\xc2-\xdf] +U3 [\xe0-\xef] +U4 [\xf0-\xf4] +SPECIAL [\!\#\$\%\&\'\(\)\+\,\.\/\;\<\=\>\?\@\[\]\^\`\{\}\|\~\\] +ESCHAR [\"\*] +BLANK [ \t\n] + +UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} +UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} +UPHRASE {UANY}|{SPECIAL}|{BLANK}|\\{ESCHAR} + +%% +InRange return FUNC_INRANGE; +\$time\.iso return DATE_ISO; +false {mdsyyllval.bval = false; return BOOLEAN;} +true {mdsyyllval.bval = true; return BOOLEAN;} +\" return QUOTE; +\( return OBRACE; +\) return CBRACE; +\&\& return AND; +\|\| return OR; +\=\= return EQUAL; +\!\= return UNEQUAL; +\= return EQUAL; +\< return LT; +\> return GT; +\, return COMMA; +{UANY}+ {mdsyyllval.sval = talloc_strdup(talloc_tos(), yytext); return WORD;} +\"{UPHRASE}+\" {mdsyyllval.sval = strip_quote(yytext); return PHRASE;} +{BLANK} /* ignore */ +%% + +static char *strip_quote(const char *phrase) +{ + size_t phrase_len = 0; + char *stripped_phrase = NULL; + + if (phrase == NULL) { + return NULL; + } + + phrase_len = strlen(phrase); + if (phrase_len < 2 || + phrase[0] != '\"' || + phrase[phrase_len - 1] != '\"') + { + return talloc_strdup(talloc_tos(), phrase); + } + + phrase++; + + stripped_phrase = talloc_strndup(talloc_tos(), phrase, phrase_len - 2); + if (stripped_phrase == NULL) { + return NULL; + } + return stripped_phrase; +} diff --git a/source3/rpc_server/mdssvc/es_mapping.c b/source3/rpc_server/mdssvc/es_mapping.c new file mode 100644 index 0000000..577fc38 --- /dev/null +++ b/source3/rpc_server/mdssvc/es_mapping.c @@ -0,0 +1,241 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / Elasticsearch backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "es_mapping.h" + +/* + * Escaping of special characters in Lucene query syntax across HTTP and JSON + * ========================================================================== + * + * These characters in Lucene queries need escaping [1]: + * + * + - & | ! ( ) { } [ ] ^ " ~ * ? : \ / + * + * Additionally JSON requires escaping of: + * + * " \ + * + * Characters already escaped by the mdssvc client: + * + * * " \ + * + * The following table contains the resulting escaped strings, beginning with the + * search term, the corresponding Spotlight query and the final string that gets + * sent to the target Elasticsearch server. + * + * string | mdfind | http + * -------+--------+------ + * x!x x!x x\\!x + * x&x x&x x\\&x + * x+x x+x x\\+x + * x-x x-x x\\-x + * x.x x.x x\\.x + * x<x x<x x\\<x + * x>x x>x x\\>x + * x=x x=x x\\=x + * x?x x?x x\\?x + * x[x x[x x\\[x + * x]x x]x x\\]x + * x^x x^x x\\^x + * x{x x{x x\\{x + * x}x x}x x\\}x + * x|x x|x x\\|x + * x x x x x\\ x + * x*x x\*x x\\*x + * x\x x\\x x\\\\x + * x"x x\"x x\\\"x + * + * Special cases: + * x y It's not possible to search for terms including spaces, Spotlight + * will search for x OR y. + * x(x Search for terms including ( and ) doesn not work with Spotlight. + * + * [1] <http://lucene.apache.org/core/8_2_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters> + */ + +static char *escape_str(TALLOC_CTX *mem_ctx, + const char *in, + const char *escape_list, + const char *escape_exceptions) +{ + char *out = NULL; + size_t in_len; + size_t new_len; + size_t in_pos; + size_t out_pos = 0; + + if (in == NULL) { + return NULL; + } + in_len = strlen(in); + + if (escape_list == NULL) { + escape_list = ""; + } + if (escape_exceptions == NULL) { + escape_exceptions = ""; + } + + /* + * Allocate enough space for the worst case: every char needs to be + * escaped and requires an additional char. + */ + new_len = (in_len * 2) + 1; + if (new_len <= in_len) { + return NULL; + } + + out = talloc_zero_array(mem_ctx, char, new_len); + if (out == NULL) { + return NULL; + } + + for (in_pos = 0, out_pos = 0; in_pos < in_len; in_pos++, out_pos++) { + if (strchr(escape_list, in[in_pos]) != NULL && + strchr(escape_exceptions, in[in_pos]) == NULL) + { + out[out_pos++] = '\\'; + } + out[out_pos] = in[in_pos]; + } + + return out; +} + +char *es_escape_str(TALLOC_CTX *mem_ctx, + const char *in, + const char *exceptions) +{ + const char *lucene_escape_list = "+-&|!(){}[]^\"~*?:\\/ "; + const char *json_escape_list = "\\\""; + char *lucene_escaped = NULL; + char *full_escaped = NULL; + + lucene_escaped = escape_str(mem_ctx, + in, + lucene_escape_list, + exceptions); + if (lucene_escaped == NULL) { + return NULL; + } + + full_escaped = escape_str(mem_ctx, + lucene_escaped, + json_escape_list, + NULL); + TALLOC_FREE(lucene_escaped); + return full_escaped; +} + +struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx, + json_t *kmd_map, + const char *sl_attr) +{ + struct es_attr_map *es_map = NULL; + const char *typestr = NULL; + enum ssm_type type; + char *es_attr = NULL; + size_t i; + int cmp; + int ret; + + static struct { + const char *typestr; + enum ssm_type typeval; + } ssmt_type_map[] = { + {"bool", ssmt_bool}, + {"num", ssmt_num}, + {"str", ssmt_str}, + {"fts", ssmt_fts}, + {"date", ssmt_date}, + {"type", ssmt_type}, + }; + + if (sl_attr == NULL) { + return NULL; + } + + ret = json_unpack(kmd_map, + "{s: {s: s}}", + sl_attr, + "type", + &typestr); + if (ret != 0) { + DBG_DEBUG("No JSON type mapping for [%s]\n", sl_attr); + return NULL; + } + + ret = json_unpack(kmd_map, + "{s: {s: s}}", + sl_attr, + "attribute", + &es_attr); + if (ret != 0) { + DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr); + return NULL; + } + + for (i = 0; i < ARRAY_SIZE(ssmt_type_map); i++) { + cmp = strcmp(typestr, ssmt_type_map[i].typestr); + if (cmp == 0) { + type = ssmt_type_map[i].typeval; + break; + } + } + if (i == ARRAY_SIZE(ssmt_type_map)) { + return NULL; + } + + es_map = talloc_zero(mem_ctx, struct es_attr_map); + if (es_map == NULL) { + return NULL; + } + es_map->type = type; + + es_map->name = es_escape_str(es_map, es_attr, NULL); + if (es_map->name == NULL) { + TALLOC_FREE(es_map); + return false; + } + + return es_map; +} + +const char *es_map_sl_type(json_t *mime_map, + const char *sl_type) +{ + const char *mime_type = NULL; + int ret; + + if (sl_type == NULL) { + return NULL; + } + + ret = json_unpack(mime_map, + "{s: s}", + sl_type, + &mime_type); + if (ret != 0) { + return NULL; + } + + return mime_type; +} diff --git a/source3/rpc_server/mdssvc/es_mapping.h b/source3/rpc_server/mdssvc/es_mapping.h new file mode 100644 index 0000000..29511b5 --- /dev/null +++ b/source3/rpc_server/mdssvc/es_mapping.h @@ -0,0 +1,49 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / Elasticsearch backend + + Copyright (c) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _ES_MAPPING_H_ +#define _ES_MAPPING_H_ + +#include <jansson.h> + +enum ssm_type { + ssmt_bool, /* a boolean value */ + ssmt_num, /* a numeric value */ + ssmt_str, /* a string value */ + ssmt_fts, /* a string value */ + ssmt_date, /* date values */ + ssmt_type /* kMDItemContentType, requires special mapping */ +}; + +struct es_attr_map { + enum ssm_type type; + const char *name; +}; + +char *es_escape_str(TALLOC_CTX *mem_ctx, + const char *in, + const char *exceptions); +struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx, + json_t *kmd_map, + const char *sl_attr); +const char *es_map_sl_type(json_t *mime_map, + const char *sl_type); + +#endif diff --git a/source3/rpc_server/mdssvc/es_parser.y b/source3/rpc_server/mdssvc/es_parser.y new file mode 100644 index 0000000..c154dd6 --- /dev/null +++ b/source3/rpc_server/mdssvc/es_parser.y @@ -0,0 +1,686 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / Elasticsearch backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +%{ + #include "includes.h" + #include "rpc_server/mdssvc/mdssvc.h" + #include "rpc_server/mdssvc/mdssvc_es.h" + #include "rpc_server/mdssvc/es_parser.tab.h" + #include "rpc_server/mdssvc/es_mapping.h" + #include "lib/util/smb_strtox.h" + #include <jansson.h> + + /* + * allow building with -O3 -Wp,-D_FORTIFY_SOURCE=2 + * + * /tmp/samba-testbase/.../mdssvc/es_parser.y: In function + * ‘mdsyylparse’: + * es_parser.tab.c:1124:6: error: assuming pointer wraparound + * does not occur when comparing P +- C1 with P +- C2 + * [-Werror=strict-overflow] + * + * The generated code in es_parser.tab.c looks like this: + * + * if (yyss + yystacksize - 1 <= yyssp) + */ + #pragma GCC diagnostic ignored "-Wstrict-overflow" + + #define YYMALLOC SMB_MALLOC + #define YYREALLOC SMB_REALLOC + + struct yy_buffer_state; + typedef struct yy_buffer_state *YY_BUFFER_STATE; + int mdsyyllex(void); + void mdsyylerror(char const *); + void *mdsyylterminate(void); + YY_BUFFER_STATE mdsyyl_scan_string(const char *str); + void mdsyyl_delete_buffer(YY_BUFFER_STATE buffer); + + /* forward declarations */ + static char *isodate_to_sldate(const char *s); + static char *map_expr(const struct es_attr_map *attr, + char op, + const char *val1, + const char *val2); + + /* global vars, eg needed by the lexer */ + struct es_parser_state { + TALLOC_CTX *frame; + json_t *kmd_map; + json_t *mime_map; + bool ignore_unknown_attribute; + bool ignore_unknown_type; + bool type_error; + YY_BUFFER_STATE s; + const char *result; + } *global_es_parser_state; +%} + +%code provides { + #include <stdbool.h> + #include <jansson.h> + #include "rpc_server/mdssvc/mdssvc.h" + + /* 2001-01-01T00:00:00Z - Unix Epoch = SP_RAW_TIME_OFFSET */ + #define SP_RAW_TIME_OFFSET 978307200 + + int mdsyylwrap(void); + bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx, + json_t *mappings, + const char *path_scope, + const char *query_string, + char **_es_query); +} + +%union { + bool bval; + const char *sval; + struct es_attr_map *attr_map; +} + +%name-prefix "mdsyyl" +%expect 1 +%error-verbose + +%type <sval> match expr line function value isodate +%type <attr_map> attribute + +%token <sval> WORD PHRASE +%token <bval> BOOLEAN +%token FUNC_INRANGE +%token DATE_ISO +%token OBRACE CBRACE EQUAL UNEQUAL GT LT COMMA QUOTE +%left OR +%left AND +%% + +input: +/* empty */ +| input line +; + +line: +expr { + if ($1 == NULL) { + YYABORT; + } + if (global_es_parser_state->type_error) { + YYABORT; + } + global_es_parser_state->result = $1; +} +; + +expr: +OBRACE expr CBRACE { + if ($2 == NULL) { + $$ = NULL; + } else { + $$ = talloc_asprintf(talloc_tos(), "(%s)", $2); + if ($$ == NULL) YYABORT; + } +} +| expr AND expr { + if ($1 == NULL && $3 == NULL) { + $$ = NULL; + } else if ($1 == NULL) { + $$ = $3; + } else if ($3 == NULL) { + $$ = $1; + } else { + $$ = talloc_asprintf(talloc_tos(), "(%s) AND (%s)", $1, $3); + if ($$ == NULL) YYABORT; + } +} +| expr OR expr { + if ($1 == NULL && $3 == NULL) { + $$ = NULL; + } else if ($1 == NULL) { + $$ = $3; + } else if ($3 == NULL) { + $$ = $1; + } else { + $$ = talloc_asprintf(talloc_tos(), "%s OR %s", $1, $3); + if ($$ == NULL) YYABORT; + } +} +| match { + $$ = $1; +} +| BOOLEAN { + /* + * We can't properly handle these in expressions, fortunately this + * is probably only ever used by OS X as sole element in an + * expression ie "False" (when Finder window selected our share + * but no search string entered yet). Packet traces showed that OS + * X Spotlight server then returns a failure (ie -1) which is what + * we do here too by calling YYABORT. + */ + YYABORT; +}; + +match: +attribute EQUAL value { + if ($1 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($1, '=', $3, NULL); + } +} +| attribute UNEQUAL value { + if ($1 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($1, '!', $3, NULL); + } +} +| attribute LT value { + if ($1 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($1, '<', $3, NULL); + } +} +| attribute GT value { + if ($1 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($1, '>', $3, NULL); + } +} +| function { + $$ = $1; +} +| match WORD { + $$ = $1; +}; + +function: +FUNC_INRANGE OBRACE attribute COMMA WORD COMMA WORD CBRACE { + if ($3 == NULL) { + $$ = NULL; + } else { + $$ = map_expr($3, '~', $5, $7); + } +}; + +attribute: +WORD { + $$ = es_map_sl_attr(global_es_parser_state->frame, + global_es_parser_state->kmd_map, + $1); + if ($$ == NULL && + !global_es_parser_state->ignore_unknown_attribute) + { + YYABORT; + } +}; + +value: +PHRASE { + $$ = $1; +} +| isodate { + $$ = $1; +}; + +isodate: +DATE_ISO OBRACE WORD CBRACE { + $$ = isodate_to_sldate($3); + if ($$ == NULL) YYABORT; +}; + +%% + +/* + * Spotlight has two date formats: + * - seconds since 2001-01-01 00:00:00Z + * - as string "$time.iso(%Y-%m-%dT%H:%M:%SZ)" + * This function converts the latter to the former as string, so the parser + * can work on a uniform format. + */ +static char *isodate_to_sldate(const char *isodate) +{ + struct es_parser_state *s = global_es_parser_state; + struct tm tm; + const char *p = NULL; + char *tstr = NULL; + time_t t; + + p = strptime(isodate, "%Y-%m-%dT%H:%M:%SZ", &tm); + if (p == NULL) { + DBG_ERR("strptime [%s] failed\n", isodate); + return NULL; + } + + t = timegm(&tm); + t -= SP_RAW_TIME_OFFSET; + + tstr = talloc_asprintf(s->frame, "%jd", (intmax_t)t); + if (tstr == NULL) { + return NULL; + } + + return tstr; +} + +static char *map_type(const struct es_attr_map *attr, + char op, + const char *val) +{ + struct es_parser_state *s = global_es_parser_state; + const char *mime_type_list = NULL; + char *esc_mime_type_list = NULL; + const char *not = NULL; + const char *end = NULL; + char *es = NULL; + + mime_type_list = es_map_sl_type(s->mime_map, val); + if (mime_type_list == NULL) { + DBG_DEBUG("Mapping type [%s] failed\n", val); + if (!s->ignore_unknown_type) { + s->type_error = true; + } + return NULL; + } + + esc_mime_type_list = es_escape_str(s->frame, + mime_type_list, + "* "); + if (esc_mime_type_list == NULL) { + return NULL; + } + + switch (op) { + case '=': + not = ""; + end = ""; + break; + case '!': + not = "(NOT "; + end = ")"; + break; + default: + DBG_ERR("Mapping type [%s] unexpected op [%c]\n", val, op); + return NULL; + } + es = talloc_asprintf(s->frame, + "%s%s:(%s)%s", + not, + attr->name, + esc_mime_type_list, + end); + if (es == NULL) { + return NULL; + } + + return es; +} + +static char *map_num(const struct es_attr_map *attr, + char op, + const char *val1, + const char *val2) +{ + struct es_parser_state *s = global_es_parser_state; + char *es = NULL; + + switch (op) { + case '>': + es = talloc_asprintf(s->frame, + "%s:{%s TO *}", + attr->name, + val1); + break; + case '<': + es = talloc_asprintf(s->frame, + "%s:{* TO %s}", + attr->name, + val1); + break; + case '~': + es = talloc_asprintf(s->frame, + "%s:[%s TO %s]", + attr->name, + val1, + val2); + break; + case '=': + es = talloc_asprintf(s->frame, + "%s:%s", + attr->name, + val1); + break; + case '!': + es = talloc_asprintf(s->frame, + "(NOT %s:%s)", + attr->name, + val1); + break; + default: + DBG_ERR("Mapping num unexpected op [%c]\n", op); + return NULL; + } + if (es == NULL) { + return NULL; + } + + return es; +} + +static char *map_fts(const struct es_attr_map *attr, + char op, + const char *val) +{ + struct es_parser_state *s = global_es_parser_state; + const char *not = NULL; + const char *end = NULL; + char *esval = NULL; + char *es = NULL; + + esval = es_escape_str(s->frame, val, "*\\\""); + if (esval == NULL) { + yyerror("es_escape_str failed"); + return NULL; + } + + switch (op) { + case '=': + not = ""; + end = ""; + break; + case '!': + not = "(NOT "; + end = ")"; + break; + default: + DBG_ERR("Mapping fts [%s] unexpected op [%c]\n", val, op); + return NULL; + } + es = talloc_asprintf(s->frame, + "%s%s%s", + not, + esval, + end); + if (es == NULL) { + return NULL; + } + return es; +} + +static char *map_str(const struct es_attr_map *attr, + char op, + const char *val) +{ + struct es_parser_state *s = global_es_parser_state; + char *esval = NULL; + char *es = NULL; + const char *not = NULL; + const char *end = NULL; + + esval = es_escape_str(s->frame, val, "*\\\""); + if (esval == NULL) { + yyerror("es_escape_str failed"); + return NULL; + } + + switch (op) { + case '=': + not = ""; + end = ""; + break; + case '!': + not = "(NOT "; + end = ")"; + break; + default: + DBG_ERR("Mapping string [%s] unexpected op [%c]\n", val, op); + return NULL; + } + + es = talloc_asprintf(s->frame, + "%s%s:%s%s", + not, + attr->name, + esval, + end); + if (es == NULL) { + return NULL; + } + return es; +} + +/* + * Convert Spotlight date seconds since 2001-01-01 00:00:00Z + * to a date string in the format %Y-%m-%dT%H:%M:%SZ. + */ +static char *map_sldate_to_esdate(TALLOC_CTX *mem_ctx, + const char *sldate) +{ + struct tm *tm = NULL; + char *esdate = NULL; + char buf[21]; + size_t len; + time_t t; + int error; + + t = (time_t)smb_strtoull(sldate, NULL, 10, &error, SMB_STR_STANDARD); + if (error != 0) { + DBG_ERR("smb_strtoull [%s] failed\n", sldate); + return NULL; + } + t += SP_RAW_TIME_OFFSET; + + tm = gmtime(&t); + if (tm == NULL) { + DBG_ERR("localtime [%s] failed\n", sldate); + return NULL; + } + + len = strftime(buf, sizeof(buf), + "%Y-%m-%dT%H:%M:%SZ", tm); + if (len != 20) { + DBG_ERR("strftime [%s] failed\n", sldate); + return NULL; + } + + esdate = es_escape_str(mem_ctx, buf, NULL); + if (esdate == NULL) { + yyerror("es_escape_str failed"); + return NULL; + } + return esdate; +} + +static char *map_date(const struct es_attr_map *attr, + char op, + const char *sldate1, + const char *sldate2) +{ + struct es_parser_state *s = global_es_parser_state; + char *esdate1 = NULL; + char *esdate2 = NULL; + char *es = NULL; + + if (op == '~' && sldate2 == NULL) { + DBG_ERR("Date range query, but second date is NULL\n"); + return NULL; + } + + esdate1 = map_sldate_to_esdate(s->frame, sldate1); + if (esdate1 == NULL) { + DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate1); + return NULL; + } + if (sldate2 != NULL) { + esdate2 = map_sldate_to_esdate(s->frame, sldate2); + if (esdate2 == NULL) { + DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate2); + return NULL; + } + } + + switch (op) { + case '>': + es = talloc_asprintf(s->frame, + "%s:{%s TO *}", + attr->name, + esdate1); + break; + case '<': + es = talloc_asprintf(s->frame, + "%s:{* TO %s}", + attr->name, + esdate1); + break; + case '~': + es = talloc_asprintf(s->frame, + "%s:[%s TO %s]", + attr->name, + esdate1, + esdate2); + break; + case '=': + es = talloc_asprintf(s->frame, + "%s:%s", + attr->name, + esdate1); + break; + case '!': + es = talloc_asprintf(s->frame, + "(NOT %s:%s)", + attr->name, + esdate1); + break; + } + if (es == NULL) { + return NULL; + } + return es; +} + +static char *map_expr(const struct es_attr_map *attr, + char op, + const char *val1, + const char *val2) +{ + char *es = NULL; + + switch (attr->type) { + case ssmt_type: + es = map_type(attr, op, val1); + break; + case ssmt_num: + es = map_num(attr, op, val1, val2); + break; + case ssmt_fts: + es = map_fts(attr, op, val1); + break; + case ssmt_str: + es = map_str(attr, op, val1); + break; + case ssmt_date: + es = map_date(attr, op, val1, val2); + break; + default: + break; + } + if (es == NULL) { + DBG_DEBUG("Mapping [%s %c %s (%s)] failed\n", + attr->name, op, val1, val2 ? val2 : ""); + return NULL; + } + + return es; +} + +void mdsyylerror(const char *str) +{ + DBG_ERR("Parser failed: %s\n", str); +} + +int mdsyylwrap(void) +{ + return 1; +} + +/** + * Map a Spotlight RAW query string to a ES query string + **/ +bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx, + json_t *mappings, + const char *path_scope, + const char *query_string, + char **_es_query) +{ + struct es_parser_state s = { + .frame = talloc_stackframe(), + }; + int result; + char *es_query = NULL; + + s.kmd_map = json_object_get(mappings, "attribute_mappings"); + if (s.kmd_map == NULL) { + DBG_ERR("Failed to load attribute_mappings from JSON\n"); + return false; + } + s.mime_map = json_object_get(mappings, "mime_mappings"); + if (s.mime_map == NULL) { + DBG_ERR("Failed to load mime_mappings from JSON\n"); + return false; + } + + s.s = mdsyyl_scan_string(query_string); + if (s.s == NULL) { + DBG_WARNING("Failed to parse [%s]\n", query_string); + TALLOC_FREE(s.frame); + return false; + } + + s.ignore_unknown_attribute = lp_parm_bool(GLOBAL_SECTION_SNUM, + "elasticsearch", + "ignore unknown attribute", + false); + s.ignore_unknown_type = lp_parm_bool(GLOBAL_SECTION_SNUM, + "elasticsearch", + "ignore unknown type", + false); + + global_es_parser_state = &s; + result = mdsyylparse(); + global_es_parser_state = NULL; + mdsyyl_delete_buffer(s.s); + + if (result != 0) { + TALLOC_FREE(s.frame); + return false; + } + + es_query = talloc_asprintf(mem_ctx, + "(%s) AND path.real.fulltext:\\\"%s\\\"", + s.result, path_scope); + TALLOC_FREE(s.frame); + if (es_query == NULL) { + return false; + } + + *_es_query = es_query; + return true; +} diff --git a/source3/rpc_server/mdssvc/es_parser_test.c b/source3/rpc_server/mdssvc/es_parser_test.c new file mode 100644 index 0000000..7d88c67 --- /dev/null +++ b/source3/rpc_server/mdssvc/es_parser_test.c @@ -0,0 +1,97 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / ES backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "rpc_server/mdssvc/mdssvc.h" +#include "rpc_server/mdssvc/mdssvc_es.h" +#include "rpc_server/mdssvc/es_parser.tab.h" +#include "rpc_server/mdssvc/es_mapping.h" + +/* + * Examples: + * + * $ ./spotlight2es '_kMDItemGroupId=="11"' + * ... + * $ ./spotlight2es '*=="test*"||kMDItemTextContent=="test*"' + * ... + */ + +int main(int argc, char **argv) +{ + TALLOC_CTX *mem_ctx = NULL; + json_t *mappings = NULL; + json_error_t json_error; + char *default_path = NULL; + const char *path = NULL; + const char *query_string = NULL; + const char *path_scope = NULL; + char *es_query = NULL; + bool ok; + + if (argc != 2) { + printf("usage: %s QUERY\n", argv[0]); + return 1; + } + query_string = argv[1]; + path_scope = "/foo/bar"; + + lp_load_global(get_dyn_CONFIGFILE()); + + mem_ctx = talloc_init("es_parser_test"); + if (mem_ctx == NULL) { + return 1; + } + + default_path = talloc_asprintf(mem_ctx, + "%s/mdssvc/elasticsearch_mappings.json", + get_dyn_SAMBA_DATADIR()); + if (default_path == NULL) { + TALLOC_FREE(mem_ctx); + return 1; + } + + path = lp_parm_const_string(GLOBAL_SECTION_SNUM, + "elasticsearch", + "mappings", + default_path); + if (path == NULL) { + TALLOC_FREE(mem_ctx); + return 1; + } + + mappings = json_load_file(path, 0, &json_error); + if (mappings == NULL) { + DBG_ERR("Opening mapping file [%s] failed: %s\n", + path, strerror(errno)); + TALLOC_FREE(mem_ctx); + return 1; + } + + ok = map_spotlight_to_es_query(mem_ctx, + mappings, + path_scope, + query_string, + &es_query); + printf("%s\n", ok ? es_query : "*mapping failed*"); + + json_decref(mappings); + talloc_free(mem_ctx); + return ok ? 0 : 1; +} diff --git a/source3/rpc_server/mdssvc/marshalling.c b/source3/rpc_server/mdssvc/marshalling.c new file mode 100644 index 0000000..b3e16d9 --- /dev/null +++ b/source3/rpc_server/mdssvc/marshalling.c @@ -0,0 +1,1422 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines + + Copyright (C) Ralph Boehme 2012-2014 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "dalloc.h" +#include "marshalling.h" + +#undef DBGC_CLASS +#define DBGC_CLASS DBGC_RPC_SRV + +/* + * This is used to talloc an array that will hold the table of + * contents of a marshalled Spotlight RPC (S-RPC) reply. Each ToC + * entry is 8 bytes, so we allocate space for 1024 entries which + * should be sufficient for even the largest S-RPC replies. + * + * The total buffersize for S-RPC packets is typically limited to 64k, + * so we can only store so many elements there anyway. + */ +#define MAX_SLQ_TOC 1024*8 +#define MAX_SLQ_TOCIDX 1024 +#define MAX_SLQ_COUNT 4096 +#define MAX_SL_STRLEN 1024 + +/****************************************************************************** + * RPC data marshalling and unmarshalling + ******************************************************************************/ + +/* Spotlight epoch is 1.1.2001 00:00 UTC */ +#define SPOTLIGHT_TIME_DELTA 978307200 /* Diff from UNIX epoch to Spotlight epoch */ + +#define SQ_TYPE_NULL 0x0000 +#define SQ_TYPE_COMPLEX 0x0200 +#define SQ_TYPE_INT64 0x8400 +#define SQ_TYPE_BOOL 0x0100 +#define SQ_TYPE_FLOAT 0x8500 +#define SQ_TYPE_DATA 0x0700 +#define SQ_TYPE_CNIDS 0x8700 +#define SQ_TYPE_UUID 0x0e00 +#define SQ_TYPE_DATE 0x8600 +#define SQ_TYPE_TOC 0x8800 + +#define SQ_CPX_TYPE_ARRAY 0x0a00 +#define SQ_CPX_TYPE_STRING 0x0c00 +#define SQ_CPX_TYPE_UTF16_STRING 0x1c00 +#define SQ_CPX_TYPE_DICT 0x0d00 +#define SQ_CPX_TYPE_CNIDS 0x1a00 +#define SQ_CPX_TYPE_FILEMETA 0x1b00 + +struct sl_tag { + int type; + int count; + size_t length; + size_t size; +}; + +static ssize_t sl_pack_loop(DALLOC_CTX *query, char *buf, + ssize_t offset, size_t bufsize, + char *toc_buf, int *toc_idx, int *count); +static ssize_t sl_unpack_loop(DALLOC_CTX *query, const char *buf, + ssize_t offset, size_t bufsize, + int count, ssize_t toc_offset, + int encoding); +static ssize_t sl_pack(DALLOC_CTX *query, char *buf, size_t bufsize); + +/****************************************************************************** + * Wrapper functions for the *VAL macros with bound checking + ******************************************************************************/ + +static ssize_t sl_push_uint64_val(char *buf, + ssize_t offset, + size_t max_offset, + uint64_t val) +{ + if (offset + 8 > max_offset) { + DEBUG(1, ("%s: offset: %zd, max_offset: %zu", + __func__, offset, max_offset)); + return -1; + } + + SBVAL(buf, offset, val); + return offset + 8; +} + +static ssize_t sl_pull_uint64_val(const char *buf, + ssize_t offset, + size_t bufsize, + uint encoding, + uint64_t *presult) +{ + uint64_t val; + + if (offset + 8 > bufsize) { + DEBUG(1,("%s: buffer overflow\n", __func__)); + return -1; + } + + if (encoding == SL_ENC_LITTLE_ENDIAN) { + val = BVAL(buf, offset); + } else { + val = RBVAL(buf, offset); + } + + *presult = val; + + return offset + 8; +} + +/* + * Returns the UTF-16 string encoding, by checking the 2-byte byte order mark. + * If there is no byte order mark, -1 is returned. + */ +static int spotlight_get_utf16_string_encoding(const char *buf, ssize_t offset, + size_t query_length, int encoding) +{ + int utf16_encoding; + + /* Assumed encoding in absence of a bom is little endian */ + utf16_encoding = SL_ENC_LITTLE_ENDIAN; + + if (query_length >= 2) { + uint8_t le_bom[] = {0xff, 0xfe}; + uint8_t be_bom[] = {0xfe, 0xff}; + if (memcmp(le_bom, buf + offset, sizeof(uint16_t)) == 0) { + utf16_encoding = SL_ENC_LITTLE_ENDIAN | SL_ENC_UTF_16; + } else if (memcmp(be_bom, buf + offset, sizeof(uint16_t)) == 0) { + utf16_encoding = SL_ENC_BIG_ENDIAN | SL_ENC_UTF_16; + } + } + + return utf16_encoding; +} + +/****************************************************************************** + * marshalling functions + ******************************************************************************/ + +static inline uint64_t sl_pack_tag(uint16_t type, uint16_t size_or_count, uint32_t val) +{ + uint64_t tag = ((uint64_t)val << 32) | ((uint64_t)type << 16) | size_or_count; + return tag; +} + +static ssize_t sl_pack_float(double d, char *buf, ssize_t offset, size_t bufsize) +{ + union { + double d; + uint64_t w; + } ieee_fp_union; + + ieee_fp_union.d = d; + + offset = sl_push_uint64_val(buf, offset, bufsize, sl_pack_tag(SQ_TYPE_FLOAT, 2, 1)); + if (offset == -1) { + return -1; + } + offset = sl_push_uint64_val(buf, offset, bufsize, ieee_fp_union.w); + if (offset == -1) { + return -1; + } + + return offset; +} + +static ssize_t sl_pack_uint64(uint64_t u, char *buf, ssize_t offset, size_t bufsize) +{ + uint64_t tag; + + tag = sl_pack_tag(SQ_TYPE_INT64, 2, 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + offset = sl_push_uint64_val(buf, offset, bufsize, u); + if (offset == -1) { + return -1; + } + + return offset; +} + +static ssize_t sl_pack_uint64_array(uint64_t *u, char *buf, ssize_t offset, size_t bufsize, int *toc_count) +{ + int count, i; + uint64_t tag; + + count = talloc_array_length(u); + + tag = sl_pack_tag(SQ_TYPE_INT64, count + 1, count); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + for (i = 0; i < count; i++) { + offset = sl_push_uint64_val(buf, offset, bufsize, u[i]); + if (offset == -1) { + return -1; + } + } + + if (count > 1) { + *toc_count += (count - 1); + } + + return offset; +} + +static ssize_t sl_pack_bool(sl_bool_t val, char *buf, ssize_t offset, size_t bufsize) +{ + uint64_t tag; + + tag = sl_pack_tag(SQ_TYPE_BOOL, 1, val ? 1 : 0); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + return offset; +} + +static ssize_t sl_pack_nil(char *buf, ssize_t offset, size_t bufsize) +{ + uint64_t tag; + + tag = sl_pack_tag(SQ_TYPE_NULL, 1, 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + return offset; +} + +static ssize_t sl_pack_date(sl_time_t t, char *buf, ssize_t offset, size_t bufsize) +{ + uint64_t data; + uint64_t tag; + union { + double d; + uint64_t w; + } ieee_fp_union; + + tag = sl_pack_tag(SQ_TYPE_DATE, 2, 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + ieee_fp_union.d = (double)(t.tv_sec - SPOTLIGHT_TIME_DELTA); + ieee_fp_union.d += (double)t.tv_usec / 1000000; + + data = ieee_fp_union.w; + offset = sl_push_uint64_val(buf, offset, bufsize, data); + if (offset == -1) { + return -1; + } + + return offset; +} + +static ssize_t sl_pack_uuid(sl_uuid_t *uuid, char *buf, ssize_t offset, size_t bufsize) +{ + uint64_t tag; + + tag = sl_pack_tag(SQ_TYPE_UUID, 3, 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + if (offset + 16 > bufsize) { + return -1; + } + memcpy(buf + offset, uuid, 16); + + return offset + 16; +} + +static ssize_t sl_pack_CNID(sl_cnids_t *cnids, char *buf, ssize_t offset, + size_t bufsize, char *toc_buf, int *toc_idx) +{ + ssize_t result; + int len, i; + int cnid_count = dalloc_size(cnids->ca_cnids); + uint64_t tag; + uint64_t id; + void *p; + + tag = sl_pack_tag(SQ_CPX_TYPE_CNIDS, offset / 8, 0); + result = sl_push_uint64_val(toc_buf, *toc_idx * 8, MAX_SLQ_TOC, tag); + if (result == -1) { + return -1; + } + + tag = sl_pack_tag(SQ_TYPE_COMPLEX, 1, *toc_idx + 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + *toc_idx += 1; + + len = cnid_count + 1; + if (cnid_count > 0) { + len ++; + } + + /* unknown meaning, but always 8 */ + tag = sl_pack_tag(SQ_TYPE_CNIDS, len, 8 ); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + if (cnid_count > 0) { + tag = sl_pack_tag(cnids->ca_unkn1, cnid_count, cnids->ca_context); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + for (i = 0; i < cnid_count; i++) { + p = dalloc_get_object(cnids->ca_cnids, i); + if (p == NULL) { + return -1; + } + memcpy(&id, p, sizeof(uint64_t)); + offset = sl_push_uint64_val(buf, offset, bufsize, id); + if (offset == -1) { + return -1; + } + } + } + + return offset; +} + +static ssize_t sl_pack_array(sl_array_t *array, char *buf, ssize_t offset, + size_t bufsize, char *toc_buf, int *toc_idx) +{ + ssize_t result; + int count = dalloc_size(array); + int octets = offset / 8; + uint64_t tag; + int toc_idx_save = *toc_idx; + + tag = sl_pack_tag(SQ_TYPE_COMPLEX, 1, *toc_idx + 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + *toc_idx += 1; + + offset = sl_pack_loop(array, buf, offset, bufsize - offset, toc_buf, toc_idx, &count); + + tag = sl_pack_tag(SQ_CPX_TYPE_ARRAY, octets, count); + result = sl_push_uint64_val(toc_buf, toc_idx_save * 8, MAX_SLQ_TOC, tag); + if (result == -1) { + return -1; + } + + return offset; +} + +static ssize_t sl_pack_dict(sl_array_t *dict, char *buf, ssize_t offset, + size_t bufsize, char *toc_buf, int *toc_idx, int *count) +{ + ssize_t result; + uint64_t tag; + + tag = sl_pack_tag(SQ_CPX_TYPE_DICT, offset / 8, + dalloc_size(dict)); + result = sl_push_uint64_val(toc_buf, *toc_idx * 8, MAX_SLQ_TOC, tag); + if (result == -1) { + return -1; + } + + tag = sl_pack_tag(SQ_TYPE_COMPLEX, 1, *toc_idx + 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + *toc_idx += 1; + + offset = sl_pack_loop(dict, buf, offset, bufsize - offset, toc_buf, toc_idx, count); + + return offset; +} + +static ssize_t sl_pack_filemeta(sl_filemeta_t *fm, char *buf, ssize_t offset, + size_t bufsize, char *toc_buf, int *toc_idx) +{ + ssize_t result; + ssize_t fmlen; + ssize_t saveoff = offset; + uint64_t tag; + + tag = sl_pack_tag(SQ_TYPE_COMPLEX, 1, *toc_idx + 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + offset += 8; + + fmlen = sl_pack(fm, buf + offset, bufsize - offset); + if (fmlen == -1) { + return -1; + } + + /* + * Check for empty filemeta array, if it's only 40 bytes, it's + * only the header but no content + */ + if (fmlen > 40) { + offset += fmlen; + } else { + fmlen = 0; + } + + /* unknown meaning, but always 8 */ + tag = sl_pack_tag(SQ_TYPE_DATA, (fmlen / 8) + 1, 8); + result = sl_push_uint64_val(buf, saveoff + 8, bufsize, tag); + if (result == -1) { + return -1; + } + + tag = sl_pack_tag(SQ_CPX_TYPE_FILEMETA, saveoff / 8, fmlen / 8); + result = sl_push_uint64_val(toc_buf, *toc_idx * 8, MAX_SLQ_TOC, tag); + if (result == -1) { + return -1; + } + + *toc_idx += 1; + + return offset; +} + +static ssize_t sl_pack_string(char *s, char *buf, ssize_t offset, size_t bufsize, + char *toc_buf, int *toc_idx) +{ + ssize_t result; + size_t len, octets, used_in_last_octet; + uint64_t tag; + + len = strlen(s); + if (len > MAX_SL_STRLEN) { + return -1; + } + octets = (len + 7) / 8; + used_in_last_octet = len % 8; + if (used_in_last_octet == 0) { + used_in_last_octet = 8; + } + + tag = sl_pack_tag(SQ_CPX_TYPE_STRING, offset / 8, used_in_last_octet); + result = sl_push_uint64_val(toc_buf, *toc_idx * 8, MAX_SLQ_TOC, tag); + if (result == -1) { + return -1; + } + + tag = sl_pack_tag(SQ_TYPE_COMPLEX, 1, *toc_idx + 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + *toc_idx += 1; + + tag = sl_pack_tag(SQ_TYPE_DATA, octets + 1, used_in_last_octet); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + return -1; + } + + if (offset + (octets * 8) > bufsize) { + return -1; + } + + memset(buf + offset, 0, octets * 8); + memcpy(buf + offset, s, len); + offset += octets * 8; + + return offset; +} + +static ssize_t sl_pack_string_as_utf16(char *s, char *buf, ssize_t offset, + size_t bufsize, char *toc_buf, int *toc_idx) +{ + ssize_t result; + int utf16_plus_bom_len, octets, used_in_last_octet; + char *utf16string = NULL; + char bom[] = { 0xff, 0xfe }; + size_t slen, utf16len; + uint64_t tag; + bool ok; + + slen = strlen(s); + if (slen > MAX_SL_STRLEN) { + return -1; + } + + ok = convert_string_talloc(talloc_tos(), + CH_UTF8, + CH_UTF16LE, + s, + slen, + &utf16string, + &utf16len); + if (!ok) { + return -1; + } + + utf16_plus_bom_len = utf16len + 2; + octets = (utf16_plus_bom_len + 7) / 8; + used_in_last_octet = utf16_plus_bom_len % 8; + if (used_in_last_octet == 0) { + used_in_last_octet = 8; + } + + tag = sl_pack_tag(SQ_CPX_TYPE_UTF16_STRING, offset / 8, used_in_last_octet); + result = sl_push_uint64_val(toc_buf, *toc_idx * 8, MAX_SLQ_TOC, tag); + if (result == -1) { + offset = -1; + goto done; + } + + tag = sl_pack_tag(SQ_TYPE_COMPLEX, 1, *toc_idx + 1); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + goto done; + } + + *toc_idx += 1; + + tag = sl_pack_tag(SQ_TYPE_DATA, octets + 1, used_in_last_octet); + offset = sl_push_uint64_val(buf, offset, bufsize, tag); + if (offset == -1) { + goto done; + } + + if (offset + (octets * 8) > bufsize) { + offset = -1; + goto done; + } + + memset(buf + offset, 0, octets * 8); + memcpy(buf + offset, &bom, sizeof(bom)); + memcpy(buf + offset + 2, utf16string, utf16len); + offset += octets * 8; + +done: + TALLOC_FREE(utf16string); + return offset; +} + +static ssize_t sl_pack_loop(DALLOC_CTX *query, char *buf, ssize_t offset, + size_t bufsize, char *toc_buf, int *toc_idx, int *count) +{ + const char *type; + int n; + uint64_t i; + sl_bool_t bl; + double d; + sl_time_t t; + void *p; + + for (n = 0; n < dalloc_size(query); n++) { + + type = dalloc_get_name(query, n); + if (type == NULL) { + return -1; + } + p = dalloc_get_object(query, n); + if (p == NULL) { + return -1; + } + + if (strcmp(type, "sl_array_t") == 0) { + offset = sl_pack_array(p, buf, offset, bufsize, + toc_buf, toc_idx); + } else if (strcmp(type, "sl_dict_t") == 0) { + offset = sl_pack_dict(p, buf, offset, bufsize, + toc_buf, toc_idx, count); + } else if (strcmp(type, "sl_filemeta_t") == 0) { + offset = sl_pack_filemeta(p, buf, offset, bufsize, + toc_buf, toc_idx); + } else if (strcmp(type, "uint64_t") == 0) { + memcpy(&i, p, sizeof(uint64_t)); + offset = sl_pack_uint64(i, buf, offset, bufsize); + } else if (strcmp(type, "uint64_t *") == 0) { + offset = sl_pack_uint64_array(p, buf, offset, + bufsize, count); + } else if (strcmp(type, "char *") == 0) { + offset = sl_pack_string(p, buf, offset, bufsize, + toc_buf, toc_idx); + } else if (strcmp(type, "smb_ucs2_t *") == 0) { + offset = sl_pack_string_as_utf16(p, buf, offset, bufsize, + toc_buf, toc_idx); + } else if (strcmp(type, "sl_bool_t") == 0) { + memcpy(&bl, p, sizeof(sl_bool_t)); + offset = sl_pack_bool(bl, buf, offset, bufsize); + } else if (strcmp(type, "double") == 0) { + memcpy(&d, p, sizeof(double)); + offset = sl_pack_float(d, buf, offset, bufsize); + } else if (strcmp(type, "sl_nil_t") == 0) { + offset = sl_pack_nil(buf, offset, bufsize); + } else if (strcmp(type, "sl_time_t") == 0) { + memcpy(&t, p, sizeof(sl_time_t)); + offset = sl_pack_date(t, buf, offset, bufsize); + } else if (strcmp(type, "sl_uuid_t") == 0) { + offset = sl_pack_uuid(p, buf, offset, bufsize); + } else if (strcmp(type, "sl_cnids_t") == 0) { + offset = sl_pack_CNID(p, buf, offset, + bufsize, toc_buf, toc_idx); + } else { + DEBUG(1, ("unknown type: %s", type)); + return -1; + } + if (offset == -1) { + DEBUG(1, ("error packing type: %s\n", type)); + return -1; + } + } + + return offset; +} + +/****************************************************************************** + * unmarshalling functions + ******************************************************************************/ + +static ssize_t sl_unpack_tag(const char *buf, + ssize_t offset, + size_t bufsize, + uint encoding, + struct sl_tag *tag) +{ + uint64_t val; + + if (offset + 8 > bufsize) { + DEBUG(1,("%s: buffer overflow\n", __func__)); + return -1; + } + + if (encoding == SL_ENC_LITTLE_ENDIAN) { + val = BVAL(buf, offset); + } else { + val = RBVAL(buf, offset); + } + + tag->size = (val & 0xffff) * 8; + tag->type = (val & 0xffff0000) >> 16; + tag->count = val >> 32; + tag->length = tag->count * 8; + + if (tag->size > MAX_SL_FRAGMENT_SIZE) { + DEBUG(1,("%s: size limit %zu\n", __func__, tag->size)); + return -1; + } + + if (tag->length > MAX_SL_FRAGMENT_SIZE) { + DEBUG(1,("%s: length limit %zu\n", __func__, tag->length)); + return -1; + } + + if (tag->count > MAX_SLQ_COUNT) { + DEBUG(1,("%s: count limit %d\n", __func__, tag->count)); + return -1; + } + + return offset + 8; +} + +static int sl_unpack_ints(DALLOC_CTX *query, + const char *buf, + ssize_t offset, + size_t bufsize, + int encoding) +{ + int i, result; + struct sl_tag tag; + uint64_t query_data64; + + offset = sl_unpack_tag(buf, offset, bufsize, encoding, &tag); + if (offset == -1) { + return -1; + } + + for (i = 0; i < tag.count; i++) { + offset = sl_pull_uint64_val(buf, offset, bufsize, encoding, &query_data64); + if (offset == -1) { + return -1; + } + result = dalloc_add_copy(query, &query_data64, uint64_t); + if (result != 0) { + return -1; + } + } + + return tag.count; +} + +static int sl_unpack_date(DALLOC_CTX *query, + const char *buf, + ssize_t offset, + size_t bufsize, + int encoding) +{ + int i, result; + struct sl_tag tag; + uint64_t query_data64; + union { + double d; + uint64_t w; + } ieee_fp_union; + double fraction; + sl_time_t t; + + offset = sl_unpack_tag(buf, offset, bufsize, encoding, &tag); + if (offset == -1) { + return -1; + } + + for (i = 0; i < tag.count; i++) { + offset = sl_pull_uint64_val(buf, offset, bufsize, encoding, &query_data64); + if (offset == -1) { + return -1; + } + ieee_fp_union.w = query_data64; + fraction = ieee_fp_union.d - (uint64_t)ieee_fp_union.d; + + t = (sl_time_t) { + .tv_sec = ieee_fp_union.d + SPOTLIGHT_TIME_DELTA, + .tv_usec = fraction * 1000000 + }; + + result = dalloc_add_copy(query, &t, sl_time_t); + if (result != 0) { + return -1; + } + } + + return tag.count; +} + +static int sl_unpack_uuid(DALLOC_CTX *query, + const char *buf, + ssize_t offset, + size_t bufsize, + int encoding) +{ + int i, result; + sl_uuid_t uuid; + struct sl_tag tag; + + offset = sl_unpack_tag(buf, offset, bufsize, encoding, &tag); + if (offset == -1) { + return -1; + } + + for (i = 0; i < tag.count; i++) { + if (offset + 16 > bufsize) { + DEBUG(1,("%s: buffer overflow\n", __func__)); + return -1; + } + memcpy(uuid.sl_uuid, buf + offset, 16); + result = dalloc_add_copy(query, &uuid, sl_uuid_t); + if (result != 0) { + return -1; + } + offset += 16; + } + + return tag.count; +} + +static int sl_unpack_floats(DALLOC_CTX *query, + const char *buf, + ssize_t offset, + size_t bufsize, + int encoding) +{ + int i, result; + union { + double d; + uint32_t w[2]; + } ieee_fp_union; + struct sl_tag tag; + + offset = sl_unpack_tag(buf, offset, bufsize, encoding, &tag); + if (offset == -1) { + return -1; + } + + for (i = 0; i < tag.count; i++) { + if (offset + 8 > bufsize) { + DEBUG(1,("%s: buffer overflow\n", __func__)); + return -1; + } + if (encoding == SL_ENC_LITTLE_ENDIAN) { +#ifdef WORDS_BIGENDIAN + ieee_fp_union.w[0] = IVAL(buf, offset + 4); + ieee_fp_union.w[1] = IVAL(buf, offset); +#else + ieee_fp_union.w[0] = IVAL(buf, offset); + ieee_fp_union.w[1] = IVAL(buf, offset + 4); +#endif + } else { +#ifdef WORDS_BIGENDIAN + ieee_fp_union.w[0] = RIVAL(buf, offset); + ieee_fp_union.w[1] = RIVAL(buf, offset + 4); +#else + ieee_fp_union.w[0] = RIVAL(buf, offset + 4); + ieee_fp_union.w[1] = RIVAL(buf, offset); +#endif + } + result = dalloc_add_copy(query, &ieee_fp_union.d, double); + if (result != 0) { + return -1; + } + offset += 8; + } + + return tag.count; +} + +static int sl_unpack_CNID(DALLOC_CTX *query, + const char *buf, + ssize_t offset, + size_t bufsize, + int length, + int encoding) +{ + int i, count, result; + uint64_t query_data64; + sl_cnids_t *cnids; + + cnids = talloc_zero(query, sl_cnids_t); + if (cnids == NULL) { + return -1; + } + cnids->ca_cnids = dalloc_new(cnids); + if (cnids->ca_cnids == NULL) { + return -1; + } + + if (length < 8) { + return -1; + } + if (length == 8) { + /* + * That's permitted, length=8 is an empty CNID array. + */ + result = dalloc_add(query, cnids, sl_cnids_t); + if (result != 0) { + return -1; + } + return 0; + } + + offset = sl_pull_uint64_val(buf, offset, bufsize, encoding, &query_data64); + if (offset == -1) { + return -1; + } + + /* + * Note: ca_unkn1 and ca_context could be taken from the tag + * type and count members, but the fields are packed + * differently in this context, so we can't use + * sl_unpack_tag(). + */ + count = query_data64 & 0xffff;; + cnids->ca_unkn1 = (query_data64 & 0xffff0000) >> 16; + cnids->ca_context = query_data64 >> 32; + + for (i = 0; i < count; i++) { + offset = sl_pull_uint64_val(buf, offset, bufsize, encoding, &query_data64); + if (offset == -1) { + return -1; + } + + result = dalloc_add_copy(cnids->ca_cnids, &query_data64, uint64_t); + if (result != 0) { + return -1; + } + } + + result = dalloc_add(query, cnids, sl_cnids_t); + if (result != 0) { + return -1; + } + + return 0; +} + +static ssize_t sl_unpack_cpx(DALLOC_CTX *query, + const char *buf, + ssize_t offset, + size_t bufsize, + int cpx_query_type, + int cpx_query_count, + ssize_t toc_offset, + int encoding) +{ + int result; + ssize_t roffset = offset; + int unicode_encoding; + bool mark_exists; + char *p; + size_t slen, tmp_len; + sl_array_t *sl_array; + sl_dict_t *sl_dict; + sl_filemeta_t *sl_fm; + bool ok; + struct sl_tag tag; + + switch (cpx_query_type) { + case SQ_CPX_TYPE_ARRAY: + sl_array = dalloc_zero(query, sl_array_t); + if (sl_array == NULL) { + return -1; + } + roffset = sl_unpack_loop(sl_array, buf, offset, bufsize, + cpx_query_count, toc_offset, encoding); + if (roffset == -1) { + return -1; + } + result = dalloc_add(query, sl_array, sl_array_t); + if (result != 0) { + return -1; + } + break; + + case SQ_CPX_TYPE_DICT: + sl_dict = dalloc_zero(query, sl_dict_t); + if (sl_dict == NULL) { + return -1; + } + roffset = sl_unpack_loop(sl_dict, buf, offset, bufsize, + cpx_query_count, toc_offset, encoding); + if (roffset == -1) { + return -1; + } + result = dalloc_add(query, sl_dict, sl_dict_t); + if (result != 0) { + return -1; + } + break; + + case SQ_CPX_TYPE_STRING: + case SQ_CPX_TYPE_UTF16_STRING: + offset = sl_unpack_tag(buf, offset, bufsize, encoding, &tag); + if (offset == -1) { + return -1; + } + + if (tag.size < 16) { + DEBUG(1,("%s: string buffer too small\n", __func__)); + return -1; + } + slen = tag.size - 16 + tag.count; + if (slen > MAX_SL_FRAGMENT_SIZE) { + return -1; + } + + if (offset + slen > bufsize) { + DEBUG(1,("%s: buffer overflow\n", __func__)); + return -1; + } + + if (cpx_query_type == SQ_CPX_TYPE_STRING) { + p = talloc_strndup(query, buf + offset, slen); + if (p == NULL) { + return -1; + } + } else { + unicode_encoding = spotlight_get_utf16_string_encoding( + buf, offset, slen, encoding); + mark_exists = (unicode_encoding & SL_ENC_UTF_16) ? true : false; + if (unicode_encoding & SL_ENC_BIG_ENDIAN) { + DEBUG(1, ("Unsupported big endian UTF16 string")); + return -1; + } + slen -= mark_exists ? 2 : 0; + ok = convert_string_talloc( + query, + CH_UTF16LE, + CH_UTF8, + buf + offset + (mark_exists ? 2 : 0), + slen, + &p, + &tmp_len); + if (!ok) { + return -1; + } + } + + result = dalloc_stradd(query, p); + if (result != 0) { + return -1; + } + roffset += tag.size; + break; + + case SQ_CPX_TYPE_FILEMETA: + offset = sl_unpack_tag(buf, offset, bufsize, encoding, &tag); + if (offset == -1) { + return -1; + } + if (tag.size < 8) { + DBG_WARNING("size too mall: %zu\n", tag.size); + return -1; + } + + sl_fm = dalloc_zero(query, sl_filemeta_t); + if (sl_fm == NULL) { + return -1; + } + + if (tag.size >= 16) { + result = sl_unpack(sl_fm, + buf + offset, + bufsize - offset ); + if (result == -1) { + return -1; + } + } + result = dalloc_add(query, sl_fm, sl_filemeta_t); + if (result != 0) { + return -1; + } + roffset += tag.size; + break; + + case SQ_CPX_TYPE_CNIDS: + offset = sl_unpack_tag(buf, offset, bufsize, encoding, &tag); + if (offset == -1) { + return -1; + } + + result = sl_unpack_CNID(query, buf, offset, bufsize, + tag.size, encoding); + if (result == -1) { + return -1; + } + roffset += tag.size; + break; + + default: + DEBUG(1, ("unknown complex query type: %u", cpx_query_type)); + return -1; + } + + return roffset; +} + +static ssize_t sl_unpack_loop(DALLOC_CTX *query, + const char *buf, + ssize_t offset, + size_t bufsize, + int count, + ssize_t toc_offset, + int encoding) +{ + int i, toc_index, subcount; + uint64_t result; + + while (count > 0) { + struct sl_tag tag; + + if (offset >= toc_offset) { + return -1; + } + + result = sl_unpack_tag(buf, offset, bufsize, encoding, &tag); + if (result == -1) { + return -1; + } + + switch (tag.type) { + case SQ_TYPE_COMPLEX: { + struct sl_tag cpx_tag; + + if (tag.count < 1) { + DEBUG(1,("%s: invalid tag.count: %d\n", + __func__, tag.count)); + return -1; + } + toc_index = tag.count - 1; + if (toc_index > MAX_SLQ_TOCIDX) { + DEBUG(1,("%s: toc_index too large: %d\n", + __func__, toc_index)); + return -1; + } + result = sl_unpack_tag(buf, toc_offset + (toc_index * 8), + bufsize, encoding, &cpx_tag); + if (result == -1) { + return -1; + } + + offset = sl_unpack_cpx(query, buf, offset + 8, bufsize, cpx_tag.type, + cpx_tag.count, toc_offset, encoding); + if (offset == -1) { + return -1; + } + /* + * tag.size is not the size here, so we need + * to use the offset returned from sl_unpack_cpx() + * instead of offset += tag.size; + */ + count--; + break; + } + + case SQ_TYPE_NULL: { + sl_nil_t nil = 0; + + subcount = tag.count; + if (subcount < 1 || subcount > count) { + return -1; + } + for (i = 0; i < subcount; i++) { + result = dalloc_add_copy(query, &nil, sl_nil_t); + if (result != 0) { + return -1; + } + } + offset += tag.size; + count -= subcount; + break; + } + + case SQ_TYPE_BOOL: { + sl_bool_t b = (tag.count != 0); + + result = dalloc_add_copy(query, &b, sl_bool_t); + if (result != 0) { + return -1; + } + offset += tag.size; + count--; + break; + } + + case SQ_TYPE_INT64: + subcount = sl_unpack_ints(query, buf, offset, bufsize, encoding); + if (subcount < 1 || subcount > count) { + return -1; + } + offset += tag.size; + count -= subcount; + break; + + case SQ_TYPE_UUID: + subcount = sl_unpack_uuid(query, buf, offset, bufsize, encoding); + if (subcount < 1 || subcount > count) { + return -1; + } + offset += tag.size; + count -= subcount; + break; + + case SQ_TYPE_FLOAT: + subcount = sl_unpack_floats(query, buf, offset, bufsize, encoding); + if (subcount < 1 || subcount > count) { + return -1; + } + offset += tag.size; + count -= subcount; + break; + + case SQ_TYPE_DATE: + subcount = sl_unpack_date(query, buf, offset, bufsize, encoding); + if (subcount < 1 || subcount > count) { + return -1; + } + offset += tag.size; + count -= subcount; + break; + + default: + DEBUG(1, ("unknown query type: %d\n", tag.type)); + return -1; + } + } + + return offset; +} + +static ssize_t sl_pack(DALLOC_CTX *query, char *buf, size_t bufsize) +{ + ssize_t result; + char *toc_buf; + int toc_index = 0; + int toc_count = 0; + ssize_t offset, len; + uint64_t hdr; + uint32_t total_octets; + uint32_t data_octets; + uint64_t tag; + + memset(buf, 0, bufsize); + + toc_buf = talloc_zero_size(query, MAX_SLQ_TOC + 8); + if (toc_buf == NULL) { + return -1; + } + + offset = sl_pack_loop(query, buf, 16, bufsize, toc_buf + 8, &toc_index, &toc_count); + if (offset == -1 || offset < 16) { + DEBUG(10,("%s: sl_pack_loop error\n", __func__)); + return -1; + } + len = offset - 16; + + /* + * Marshalling overview: + * + * 16 bytes at the start of buf: + * + * 8 bytes byte order mark + * 4 bytes total octets + * 4 bytes table of content octets + * + * x bytes total octets * 8 from sl_pack_loop + * x bytes ToC octets * 8 from toc_buf + */ + + /* Byte-order mark - we are using little endian only for now */ + memcpy(buf, "432130dm", strlen("432130dm")); + + /* + * The data buffer and ToC buffer sizes are enocoded in number + * of octets (size / 8), plus one, because the octet encoding + * the sizes is included. + */ + data_octets = (len / 8) + 1; + total_octets = data_octets + toc_index + 1; + + hdr = total_octets; + hdr |= ((uint64_t)data_octets << 32); + + /* HDR */ + result = sl_push_uint64_val(buf, 8, bufsize, hdr); + if (result == -1) { + return -1; + } + + /* + * ToC tag with number of ToC entries plus one, the ToC tag + * header. + */ + tag = sl_pack_tag(SQ_TYPE_TOC, toc_index + 1, 0); + result = sl_push_uint64_val(toc_buf, 0, MAX_SLQ_TOC, tag); + if (result == -1) { + return -1; + } + + if ((16 + len + ((toc_index + 1 ) * 8)) > bufsize) { + DEBUG(1, ("%s: exceeding size limit %zu", __func__, bufsize)); + return -1; + } + + memcpy(buf + 16 + len, toc_buf, (toc_index + 1 ) * 8); + len += 16 + (toc_index + 1 ) * 8; + + return len; +} + +/****************************************************************************** + * Global functions for packing und unpacking + ******************************************************************************/ + +NTSTATUS sl_pack_alloc(TALLOC_CTX *mem_ctx, + DALLOC_CTX *d, + struct mdssvc_blob *b, + size_t max_fragment_size) +{ + ssize_t len; + + b->spotlight_blob = talloc_zero_array(mem_ctx, + uint8_t, + max_fragment_size); + if (b->spotlight_blob == NULL) { + return NT_STATUS_NO_MEMORY; + } + + len = sl_pack(d, (char *)b->spotlight_blob, max_fragment_size); + if (len == -1) { + return NT_STATUS_DATA_ERROR; + } + + b->length = len; + b->size = len; + return NT_STATUS_OK; +} + +bool sl_unpack(DALLOC_CTX *query, const char *buf, size_t bufsize) +{ + ssize_t result; + ssize_t offset = 0; + int encoding; + uint64_t hdr; + uint32_t total_octets; + uint64_t total_bytes; + uint32_t data_octets; + uint64_t data_bytes; + uint64_t toc_offset; + struct sl_tag toc_tag; + + if (bufsize > MAX_SL_FRAGMENT_SIZE) { + return false; + } + + if (bufsize < 8) { + return false; + } + if (strncmp(buf + offset, "md031234", 8) == 0) { + encoding = SL_ENC_BIG_ENDIAN; + } else { + encoding = SL_ENC_LITTLE_ENDIAN; + } + offset += 8; + + offset = sl_pull_uint64_val(buf, offset, bufsize, encoding, &hdr); + if (offset == -1) { + return false; + } + + total_octets = hdr & UINT32_MAX; + data_octets = hdr >> 32; + + /* + * Both fields contain the number of octets of the + * corresponding buffer plus the tag octet. We adjust the + * values to match just the number of octets in the buffers. + */ + if (total_octets < 1) { + return false; + } + if (data_octets < 1) { + return false; + } + total_octets--; + data_octets--; + data_bytes = ((uint64_t)data_octets) * 8; + total_bytes = ((uint64_t)total_octets) * 8; + + if (data_bytes >= total_bytes) { + DEBUG(1,("%s: data_bytes: %" PRIu64 ", total_bytes: %" PRIu64 "\n", + __func__, data_bytes, total_bytes)); + return false; + } + + if (total_bytes > (bufsize - offset)) { + return false; + } + + toc_offset = data_bytes; + + toc_offset = sl_unpack_tag(buf + offset, toc_offset, + bufsize - offset, encoding, &toc_tag); + if (toc_offset == -1) { + return false; + } + + if (toc_tag.type != SQ_TYPE_TOC) { + DEBUG(1,("%s: unknown tag type %d\n", __func__, toc_tag.type)); + return false; + } + + /* + * Check toc_tag.size even though we don't use it when unmarshalling + */ + if (toc_tag.size > MAX_SLQ_TOC) { + DEBUG(1,("%s: bad size %zu\n", __func__, toc_tag.size)); + return false; + } + if (toc_tag.size > (total_bytes - data_bytes)) { + DEBUG(1,("%s: bad size %zu\n", __func__, toc_tag.size)); + return false; + } + + if (toc_tag.count != 0) { + DEBUG(1,("%s: bad count %u\n", __func__, toc_tag.count)); + return false; + } + + /* + * We already consumed 16 bytes from the buffer (BOM and size + * tag), so we start at buf + offset. + */ + result = sl_unpack_loop(query, buf + offset, 0, bufsize - offset, + 1, toc_offset, encoding); + if (result == -1) { + DEBUG(1,("%s: sl_unpack_loop failed\n", __func__)); + return false; + } + + return true; +} diff --git a/source3/rpc_server/mdssvc/marshalling.h b/source3/rpc_server/mdssvc/marshalling.h new file mode 100644 index 0000000..2cc1b44 --- /dev/null +++ b/source3/rpc_server/mdssvc/marshalling.h @@ -0,0 +1,62 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines + + Copyright (C) Ralph Boehme 2015 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _MDSSVC_MARSHALLING_H +#define _MDSSVC_MARSHALLING_H + +#include "dalloc.h" +#include "libcli/util/ntstatus.h" +#include "lib/util/data_blob.h" +#include "librpc/gen_ndr/mdssvc.h" + +#define MAX_SL_FRAGMENT_SIZE 0xFFFFF + +/* Can be ored and used as flags */ +#define SL_ENC_LITTLE_ENDIAN 1 +#define SL_ENC_BIG_ENDIAN 2 +#define SL_ENC_UTF_16 4 + +typedef DALLOC_CTX sl_array_t; /* an array of elements */ +typedef DALLOC_CTX sl_dict_t; /* an array of key/value elements */ +typedef DALLOC_CTX sl_filemeta_t; /* contains one sl_array_t */ +typedef int sl_nil_t; /* a nil element */ +typedef bool sl_bool_t; +typedef struct timeval sl_time_t; +typedef struct { + char sl_uuid[16]; +} sl_uuid_t; +typedef struct { + uint16_t ca_unkn1; + uint32_t ca_context; + DALLOC_CTX *ca_cnids; +} sl_cnids_t; /* an array of CNIDs */ + +/****************************************************************************** + * Function declarations + ******************************************************************************/ + +extern NTSTATUS sl_pack_alloc(TALLOC_CTX *mem_ctx, + DALLOC_CTX *d, + struct mdssvc_blob *b, + size_t max_fragment_size); + +extern bool sl_unpack(DALLOC_CTX *query, const char *buf, size_t bufsize); + +#endif diff --git a/source3/rpc_server/mdssvc/mdssvc.c b/source3/rpc_server/mdssvc/mdssvc.c new file mode 100644 index 0000000..5f0ec02 --- /dev/null +++ b/source3/rpc_server/mdssvc/mdssvc.c @@ -0,0 +1,1893 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines + + Copyright (C) Ralph Boehme 2012-2014 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "smbd/proto.h" +#include "librpc/gen_ndr/auth.h" +#include "dbwrap/dbwrap.h" +#include "lib/util/dlinklist.h" +#include "lib/util/util_tdb.h" +#include "lib/util/time_basic.h" +#include "lib/dbwrap/dbwrap_rbt.h" +#include "libcli/security/dom_sid.h" +#include "libcli/security/security.h" +#include "mdssvc.h" +#include "mdssvc_noindex.h" +#ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER +#include "mdssvc_tracker.h" +#endif +#ifdef HAVE_SPOTLIGHT_BACKEND_ES +#include "mdssvc_es.h" +#endif +#include "lib/global_contexts.h" + +#undef DBGC_CLASS +#define DBGC_CLASS DBGC_RPC_SRV + +struct slrpc_cmd { + const char *name; + bool (*function)(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, + DALLOC_CTX *reply); +}; + +struct slq_destroy_state { + struct tevent_context *ev; + struct sl_query *slq; +}; + +/* + * This is a static global because we may be called multiple times and + * we only want one mdssvc_ctx per connection to Tracker. + * + * The client will bind multiple times to the mdssvc RPC service, once + * for every tree connect. + */ +static struct mdssvc_ctx *mdssvc_ctx = NULL; + +/* + * If these functions return an error, they hit something like a non + * recoverable talloc error. Most errors are dealt with by returning + * an error code in the Spotlight RPC reply. + */ +static bool slrpc_fetch_properties(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply); +static bool slrpc_open_query(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply); +static bool slrpc_fetch_query_results(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply); +static bool slrpc_store_attributes(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply); +static bool slrpc_fetch_attributenames(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply); +static bool slrpc_fetch_attributes(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply); +static bool slrpc_close_query(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply); + +/************************************************ + * Misc utility functions + ************************************************/ + +/** + * Add requested metadata for a query result element + * + * This could be rewritten to something more sophisticated like + * querying metadata from Tracker. + * + * If path or sp is NULL, simply add nil values for all attributes. + **/ +static bool add_filemeta(struct mds_ctx *mds_ctx, + sl_array_t *reqinfo, + sl_array_t *fm_array, + const char *path, + const struct stat_ex *sp) +{ + sl_array_t *meta; + sl_nil_t nil; + int i, metacount, result; + uint64_t uint64var; + sl_time_t sl_time; + char *p; + const char *attribute; + size_t nfc_len; + const char *nfc_path = path; + size_t nfd_buf_size; + char *nfd_path = NULL; + char *dest = NULL; + size_t dest_remaining; + size_t nconv; + + metacount = dalloc_size(reqinfo); + if (metacount == 0 || path == NULL || sp == NULL) { + result = dalloc_add_copy(fm_array, &nil, sl_nil_t); + if (result != 0) { + return false; + } + return true; + } + + meta = dalloc_zero(fm_array, sl_array_t); + if (meta == NULL) { + return false; + } + + nfc_len = strlen(nfc_path); + /* + * Simple heuristic, strlen by two should give enough room for NFC to + * NFD conversion. + */ + nfd_buf_size = nfc_len * 2; + nfd_path = talloc_array(meta, char, nfd_buf_size); + if (nfd_path == NULL) { + return false; + } + dest = nfd_path; + dest_remaining = talloc_array_length(dest); + + nconv = smb_iconv(mds_ctx->ic_nfc_to_nfd, + &nfc_path, + &nfc_len, + &dest, + &dest_remaining); + if (nconv == (size_t)-1) { + return false; + } + + for (i = 0; i < metacount; i++) { + attribute = dalloc_get_object(reqinfo, i); + if (attribute == NULL) { + return false; + } + if (strcmp(attribute, "kMDItemDisplayName") == 0 + || strcmp(attribute, "kMDItemFSName") == 0) { + p = strrchr(nfd_path, '/'); + if (p) { + result = dalloc_stradd(meta, p + 1); + if (result != 0) { + return false; + } + } + } else if (strcmp(attribute, "kMDItemPath") == 0) { + result = dalloc_stradd(meta, nfd_path); + if (result != 0) { + return false; + } + } else if (strcmp(attribute, "kMDItemFSSize") == 0) { + uint64var = sp->st_ex_size; + result = dalloc_add_copy(meta, &uint64var, uint64_t); + if (result != 0) { + return false; + } + } else if (strcmp(attribute, "kMDItemFSOwnerUserID") == 0) { + uint64var = sp->st_ex_uid; + result = dalloc_add_copy(meta, &uint64var, uint64_t); + if (result != 0) { + return false; + } + } else if (strcmp(attribute, "kMDItemFSOwnerGroupID") == 0) { + uint64var = sp->st_ex_gid; + result = dalloc_add_copy(meta, &uint64var, uint64_t); + if (result != 0) { + return false; + } + } else if (strcmp(attribute, "kMDItemFSContentChangeDate") == 0 || + strcmp(attribute, "kMDItemContentModificationDate") == 0) + { + sl_time = convert_timespec_to_timeval(sp->st_ex_mtime); + result = dalloc_add_copy(meta, &sl_time, sl_time_t); + if (result != 0) { + return false; + } + } else { + result = dalloc_add_copy(meta, &nil, sl_nil_t); + if (result != 0) { + return false; + } + } + } + + result = dalloc_add(fm_array, meta, sl_array_t); + if (result != 0) { + return false; + } + return true; +} + +static int cnid_comp_fn(const void *p1, const void *p2) +{ + const uint64_t *cnid1 = p1, *cnid2 = p2; + if (*cnid1 == *cnid2) { + return 0; + } + if (*cnid1 < *cnid2) { + return -1; + } + return 1; +} + +/** + * Create a sorted copy of a CNID array + **/ +static bool sort_cnids(struct sl_query *slq, const DALLOC_CTX *d) +{ + uint64_t *cnids = NULL; + int i; + const void *p; + + cnids = talloc_array(slq, uint64_t, dalloc_size(d)); + if (cnids == NULL) { + return false; + } + + for (i = 0; i < dalloc_size(d); i++) { + p = dalloc_get_object(d, i); + if (p == NULL) { + return NULL; + } + memcpy(&cnids[i], p, sizeof(uint64_t)); + } + qsort(cnids, dalloc_size(d), sizeof(uint64_t), cnid_comp_fn); + + slq->cnids = cnids; + slq->cnids_num = dalloc_size(d); + + return true; +} + +/** + * Allocate result handle used in the async Tracker cursor result + * handler for storing results + **/ +static bool create_result_handle(struct sl_query *slq) +{ + sl_nil_t nil = 0; + struct sl_rslts *query_results; + int result; + + if (slq->query_results) { + DEBUG(1, ("unexpected existing result handle\n")); + return false; + } + + query_results = talloc_zero(slq, struct sl_rslts); + if (query_results == NULL) { + return false; + } + + /* CNIDs */ + query_results->cnids = talloc_zero(query_results, sl_cnids_t); + if (query_results->cnids == NULL) { + return false; + } + query_results->cnids->ca_cnids = dalloc_new(query_results->cnids); + if (query_results->cnids->ca_cnids == NULL) { + return false; + } + + query_results->cnids->ca_unkn1 = 0xadd; + if (slq->ctx2 > UINT32_MAX) { + DEBUG(1,("64bit ctx2 id too large: 0x%jx", (uintmax_t)slq->ctx2)); + return false; + } + query_results->cnids->ca_context = (uint32_t)slq->ctx2; + + /* FileMeta */ + query_results->fm_array = dalloc_zero(query_results, sl_array_t); + if (query_results->fm_array == NULL) { + return false; + } + + /* For some reason the list of results always starts with a nil entry */ + result = dalloc_add_copy(query_results->fm_array, &nil, sl_nil_t); + if (result != 0) { + return false; + } + + slq->query_results = query_results; + return true; +} + +static bool add_results(sl_array_t *array, struct sl_query *slq) +{ + sl_filemeta_t *fm; + uint64_t status; + int result; + bool ok; + + /* + * Taken from network traces against a macOS SMB Spotlight server: if + * the search is not finished yet in the backend macOS returns 0x23, + * otherwise 0x0. + */ + if (slq->state >= SLQ_STATE_DONE) { + status = 0; + } else { + status = 0x23; + } + + /* FileMeta */ + fm = dalloc_zero(array, sl_filemeta_t); + if (fm == NULL) { + return false; + } + + result = dalloc_add_copy(array, &status, uint64_t); + if (result != 0) { + return false; + } + result = dalloc_add(array, slq->query_results->cnids, sl_cnids_t); + if (result != 0) { + return false; + } + if (slq->query_results->num_results > 0) { + result = dalloc_add(fm, slq->query_results->fm_array, sl_array_t); + if (result != 0) { + return false; + } + } + result = dalloc_add(array, fm, sl_filemeta_t); + if (result != 0) { + return false; + } + + /* This ensure the results get clean up after been sent to the client */ + talloc_move(array, &slq->query_results); + + ok = create_result_handle(slq); + if (!ok) { + DEBUG(1, ("couldn't add result handle\n")); + slq->state = SLQ_STATE_ERROR; + return false; + } + + return true; +} + +static const struct slrpc_cmd *slrpc_cmd_by_name(const char *rpccmd) +{ + size_t i; + static const struct slrpc_cmd cmds[] = { + { "fetchPropertiesForContext:", slrpc_fetch_properties}, + { "openQueryWithParams:forContext:", slrpc_open_query}, + { "fetchQueryResultsForContext:", slrpc_fetch_query_results}, + { "storeAttributes:forOIDArray:context:", slrpc_store_attributes}, + { "fetchAttributeNamesForOIDArray:context:", slrpc_fetch_attributenames}, + { "fetchAttributes:forOIDArray:context:", slrpc_fetch_attributes}, + { "fetchAllAttributes:forOIDArray:context:", slrpc_fetch_attributes}, + { "closeQueryForContext:", slrpc_close_query}, + }; + + for (i = 0; i < ARRAY_SIZE(cmds); i++) { + int cmp; + + cmp = strcmp(cmds[i].name, rpccmd); + if (cmp == 0) { + return &cmds[i]; + } + } + + return NULL; +} + +/** + * Search the list of active queries given their context ids + **/ +static struct sl_query *slq_for_ctx(struct mds_ctx *mds_ctx, + uint64_t ctx1, uint64_t ctx2) +{ + struct sl_query *q; + + for (q = mds_ctx->query_list; q; q = q->next) { + if ((q->ctx1 == ctx1) && (q->ctx2 == ctx2)) { + return q; + } + } + + return NULL; +} + +static int slq_destructor_cb(struct sl_query *slq) +{ + SLQ_DEBUG(10, slq, "destroying"); + + /* Free all entries before freeing the slq handle! */ + TALLOC_FREE(slq->entries_ctx); + TALLOC_FREE(slq->te); + + if (slq->mds_ctx != NULL) { + DLIST_REMOVE(slq->mds_ctx->query_list, slq); + slq->mds_ctx = NULL; + } + + TALLOC_FREE(slq->backend_private); + + return 0; +} + +/** + * Remove talloc_refcounted entry from mapping db + * + * Multiple queries (via the slq handle) may reference a + * sl_inode_path_map entry, when the last reference goes away as the + * queries are closed and this gets called to remove the entry from + * the db. + **/ +static int ino_path_map_destr_cb(struct sl_inode_path_map *entry) +{ + NTSTATUS status; + TDB_DATA key; + + key = make_tdb_data((uint8_t *)&entry->ino, sizeof(entry->ino)); + + status = dbwrap_delete(entry->mds_ctx->ino_path_map, key); + if (!NT_STATUS_IS_OK(status)) { + DEBUG(1, ("Failed to delete record: %s\n", nt_errstr(status))); + return -1; + } + + DBG_DEBUG("deleted [0x%"PRIx64"] [%s]\n", entry->ino, entry->path); + return 0; +} + +/** + * Add result to inode->path mapping dbwrap rbt db + * + * This is necessary as a CNID db substitute, ie we need a way to + * simulate unique, constant numerical identifiers for paths with an + * API that supports mapping from id to path. + * + * Entries are talloc'ed of the query, using talloc_reference() if + * multiple queries returned the same result. That way we can cleanup + * entries by calling talloc_free() on the query slq handles. + **/ + +static bool inode_map_add(struct sl_query *slq, + uint64_t ino, + const char *path, + struct stat_ex *st) +{ + NTSTATUS status; + struct sl_inode_path_map *entry; + TDB_DATA key, value; + void *p; + + key = make_tdb_data((uint8_t *)&ino, sizeof(ino)); + status = dbwrap_fetch(slq->mds_ctx->ino_path_map, slq, key, &value); + + if (NT_STATUS_IS_OK(status)) { + /* + * We have one db, so when different parallel queries + * return the same file, we have to refcount entries + * in the db. + */ + + if (value.dsize != sizeof(void *)) { + DEBUG(1, ("invalid dsize\n")); + return false; + } + memcpy(&p, value.dptr, sizeof(p)); + entry = talloc_get_type_abort(p, struct sl_inode_path_map); + + DEBUG(10, ("map: %s\n", entry->path)); + + entry = talloc_reference(slq->entries_ctx, entry); + if (entry == NULL) { + DEBUG(1, ("talloc_reference failed\n")); + return false; + } + return true; + } + + if (!NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) { + DEBUG(1, ("dbwrap_fetch failed %s\n", nt_errstr(status))); + return false; + } + + entry = talloc_zero(slq->entries_ctx, struct sl_inode_path_map); + if (entry == NULL) { + DEBUG(1, ("talloc failed\n")); + return false; + } + + entry->ino = ino; + entry->mds_ctx = slq->mds_ctx; + entry->st = *st; + entry->path = talloc_strdup(entry, path); + if (entry->path == NULL) { + DEBUG(1, ("talloc failed\n")); + TALLOC_FREE(entry); + return false; + } + + status = dbwrap_store(slq->mds_ctx->ino_path_map, key, + make_tdb_data((void *)&entry, sizeof(void *)), 0); + if (!NT_STATUS_IS_OK(status)) { + DEBUG(1, ("Failed to store record: %s\n", nt_errstr(status))); + TALLOC_FREE(entry); + return false; + } + + talloc_set_destructor(entry, ino_path_map_destr_cb); + + return true; +} + +bool mds_add_result(struct sl_query *slq, const char *path) +{ + struct smb_filename *smb_fname = NULL; + const char *relative = NULL; + char *fake_path = NULL; + struct stat_ex sb; + uint64_t ino64; + int result; + NTSTATUS status; + bool sub; + bool ok; + + /* + * We're in a tevent callback which means in the case of + * running as external RPC service we're running as root and + * not as the user. + */ + if (!become_authenticated_pipe_user(slq->mds_ctx->pipe_session_info)) { + DBG_ERR("can't become authenticated user: %d\n", + slq->mds_ctx->uid); + smb_panic("can't become authenticated user"); + } + + if (geteuid() != slq->mds_ctx->uid) { + DBG_ERR("uid mismatch: %d/%d\n", geteuid(), slq->mds_ctx->uid); + smb_panic("uid mismatch"); + } + + /* + * We've changed identity to the authenticated pipe user, so + * any function exit below must ensure we switch back + */ + + status = synthetic_pathref(talloc_tos(), + slq->mds_ctx->conn->cwd_fsp, + path, + NULL, + NULL, + 0, + 0, + &smb_fname); + if (!NT_STATUS_IS_OK(status)) { + DBG_DEBUG("synthetic_pathref [%s]: %s\n", + smb_fname_str_dbg(smb_fname), + nt_errstr(status)); + unbecome_authenticated_pipe_user(); + return true; + } + + sb = smb_fname->st; + + status = smbd_check_access_rights_fsp(slq->mds_ctx->conn->cwd_fsp, + smb_fname->fsp, + false, + FILE_READ_DATA); + unbecome_authenticated_pipe_user(); + if (!NT_STATUS_IS_OK(status)) { + TALLOC_FREE(smb_fname); + return true; + } + + /* Done with smb_fname now. */ + TALLOC_FREE(smb_fname); + + ino64 = SMB_VFS_FS_FILE_ID(slq->mds_ctx->conn, &sb); + + if (slq->cnids) { + bool found; + + /* + * Check whether the found element is in the requested + * set of IDs. Note that we're faking CNIDs by using + * filesystem inode numbers here + */ + found = bsearch(&ino64, + slq->cnids, + slq->cnids_num, + sizeof(uint64_t), + cnid_comp_fn); + if (!found) { + return true; + } + } + + sub = subdir_of(slq->mds_ctx->spath, + slq->mds_ctx->spath_len, + path, + &relative); + if (!sub) { + DBG_ERR("[%s] is not inside [%s]\n", + path, slq->mds_ctx->spath); + slq->state = SLQ_STATE_ERROR; + return false; + } + + /* + * Add inode number and filemeta to result set, this is what + * we return as part of the result set of a query + */ + result = dalloc_add_copy(slq->query_results->cnids->ca_cnids, + &ino64, + uint64_t); + if (result != 0) { + DBG_ERR("dalloc error\n"); + slq->state = SLQ_STATE_ERROR; + return false; + } + + fake_path = talloc_asprintf(slq, + "/%s/%s", + slq->mds_ctx->sharename, + relative); + if (fake_path == NULL) { + slq->state = SLQ_STATE_ERROR; + return false; + } + + ok = add_filemeta(slq->mds_ctx, + slq->reqinfo, + slq->query_results->fm_array, + fake_path, + &sb); + if (!ok) { + DBG_ERR("add_filemeta error\n"); + TALLOC_FREE(fake_path); + slq->state = SLQ_STATE_ERROR; + return false; + } + + ok = inode_map_add(slq, ino64, fake_path, &sb); + TALLOC_FREE(fake_path); + if (!ok) { + DEBUG(1, ("inode_map_add error\n")); + slq->state = SLQ_STATE_ERROR; + return false; + } + + slq->query_results->num_results++; + return true; +} + +/*********************************************************** + * Spotlight RPC functions + ***********************************************************/ + +static bool slrpc_fetch_properties(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply) +{ + sl_dict_t *dict; + sl_array_t *array; + char *s; + uint64_t u; + sl_bool_t b; + sl_uuid_t uuid; + int result; + + dict = dalloc_zero(reply, sl_dict_t); + if (dict == NULL) { + return false; + } + + /* kMDSStoreHasPersistentUUID = false */ + result = dalloc_stradd(dict, "kMDSStoreHasPersistentUUID"); + if (result != 0) { + return false; + } + b = false; + result = dalloc_add_copy(dict, &b, sl_bool_t); + if (result != 0) { + return false; + } + + /* kMDSStoreIsBackup = false */ + result = dalloc_stradd(dict, "kMDSStoreIsBackup"); + if (result != 0) { + return false; + } + b = false; + result = dalloc_add_copy(dict, &b, sl_bool_t); + if (result != 0) { + return false; + } + + /* kMDSStoreUUID = uuid */ + result = dalloc_stradd(dict, "kMDSStoreUUID"); + if (result != 0) { + return false; + } + memcpy(uuid.sl_uuid, "fakeuuidfakeuuid", sizeof(uuid.sl_uuid)); + result = dalloc_add_copy(dict, &uuid, sl_uuid_t); + if (result != 0) { + return false; + } + + /* kMDSStoreSupportsVolFS = true */ + result = dalloc_stradd(dict, "kMDSStoreSupportsVolFS"); + if (result != 0) { + return false; + } + b = true; + result = dalloc_add_copy(dict, &b, sl_bool_t); + if (result != 0) { + return false; + } + + /* kMDSVolumeUUID = uuid */ + result = dalloc_stradd(dict, "kMDSVolumeUUID"); + if (result != 0) { + return false; + } + memcpy(uuid.sl_uuid, "fakeuuidfakeuuid", sizeof(uuid.sl_uuid)); + result = dalloc_add_copy(dict, &uuid, sl_uuid_t); + if (result != 0) { + return false; + } + + /* kMDSDiskStoreSpindleNumber = 1 (fake) */ + result = dalloc_stradd(dict, "kMDSDiskStoreSpindleNumber"); + if (result != 0) { + return false; + } + u = 1; + result = dalloc_add_copy(dict, &u, uint64_t); + if (result != 0) { + return false; + } + + /* kMDSDiskStorePolicy = 3 (whatever that means, taken from OS X) */ + result = dalloc_stradd(dict, "kMDSDiskStorePolicy"); + if (result != 0) { + return false; + } + u = 3; + result = dalloc_add_copy(dict, &u, uint64_t); + if (result != 0) { + return false; + } + + /* kMDSStoreMetaScopes array */ + result = dalloc_stradd(dict, "kMDSStoreMetaScopes"); + if (result != 0) { + return false; + } + array = dalloc_zero(dict, sl_array_t); + if (array == NULL) { + return NULL; + } + result = dalloc_stradd(array, "kMDQueryScopeComputer"); + if (result != 0) { + return false; + } + result = dalloc_stradd(array, "kMDQueryScopeAllIndexed"); + if (result != 0) { + return false; + } + result = dalloc_stradd(array, "kMDQueryScopeComputerIndexed"); + if (result != 0) { + return false; + } + result = dalloc_add(dict, array, sl_array_t); + if (result != 0) { + return false; + } + + /* kMDSStoreDevice = 0x1000003 (whatever that means, taken from OS X) */ + result = dalloc_stradd(dict, "kMDSStoreDevice"); + if (result != 0) { + return false; + } + u = 0x1000003; + result = dalloc_add_copy(dict, &u, uint64_t); + if (result != 0) { + return false; + } + + /* kMDSStoreSupportsTCC = true (whatever that means, taken from OS X) */ + result = dalloc_stradd(dict, "kMDSStoreSupportsTCC"); + if (result != 0) { + return false; + } + b = true; + result = dalloc_add_copy(dict, &b, sl_bool_t); + if (result != 0) { + return false; + } + + /* kMDSStorePathScopes = ["/"] (whatever that means, taken from OS X) */ + result = dalloc_stradd(dict, "kMDSStorePathScopes"); + if (result != 0) { + return false; + } + array = dalloc_zero(dict, sl_array_t); + if (array == NULL) { + return false; + } + s = talloc_strdup(dict, "/"); + if (s == NULL) { + return false; + } + talloc_set_name(s, "smb_ucs2_t *"); + result = dalloc_add(array, s, smb_ucs2_t *); + if (result != 0) { + return false; + } + result = dalloc_add(dict, array, sl_array_t); + if (result != 0) { + return false; + } + + result = dalloc_add(reply, dict, sl_dict_t); + if (result != 0) { + return false; + } + + return true; +} + +static void slq_close_timer(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *private_data) +{ + struct sl_query *slq = talloc_get_type_abort( + private_data, struct sl_query); + struct mds_ctx *mds_ctx = slq->mds_ctx; + + SLQ_DEBUG(10, slq, "expired"); + + TALLOC_FREE(slq); + + if (CHECK_DEBUGLVL(10)) { + for (slq = mds_ctx->query_list; slq != NULL; slq = slq->next) { + SLQ_DEBUG(10, slq, "pending"); + } + } +} + +/** + * Translate a fake scope from the client like /sharename/dir + * to the real server-side path, replacing the "/sharename" part + * with the absolute server-side path of the share. + **/ +static bool mdssvc_real_scope(struct sl_query *slq, const char *fake_scope) +{ + size_t sname_len = strlen(slq->mds_ctx->sharename); + size_t fake_scope_len = strlen(fake_scope); + + if (fake_scope_len < sname_len + 1) { + DBG_ERR("Short scope [%s] for share [%s]\n", + fake_scope, slq->mds_ctx->sharename); + return false; + } + + slq->path_scope = talloc_asprintf(slq, + "%s%s", + slq->mds_ctx->spath, + fake_scope + sname_len + 1); + if (slq->path_scope == NULL) { + return false; + } + return true; +} + +/** + * Begin a search query + **/ +static bool slrpc_open_query(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply) +{ + bool ok; + uint64_t sl_result; + uint64_t *uint64p; + DALLOC_CTX *reqinfo; + sl_array_t *array, *path_scope; + sl_cnids_t *cnids; + struct sl_query *slq = NULL; + int result; + const char *querystring = NULL; + size_t querystring_len; + char *dest = NULL; + size_t dest_remaining; + size_t nconv; + char *scope = NULL; + + array = dalloc_zero(reply, sl_array_t); + if (array == NULL) { + return false; + } + + /* Allocate and initialize query object */ + slq = talloc_zero(mds_ctx, struct sl_query); + if (slq == NULL) { + return false; + } + slq->entries_ctx = talloc_named_const(slq, 0, "struct sl_query.entries_ctx"); + if (slq->entries_ctx == NULL) { + TALLOC_FREE(slq); + return false; + } + talloc_set_destructor(slq, slq_destructor_cb); + slq->state = SLQ_STATE_NEW; + slq->mds_ctx = mds_ctx; + + slq->last_used = timeval_current(); + slq->start_time = slq->last_used; + slq->expire_time = timeval_add(&slq->last_used, MAX_SL_RUNTIME, 0); + slq->te = tevent_add_timer(global_event_context(), slq, + slq->expire_time, slq_close_timer, slq); + if (slq->te == NULL) { + DEBUG(1, ("tevent_add_timer failed\n")); + goto error; + } + + querystring = dalloc_value_for_key(query, "DALLOC_CTX", 0, + "DALLOC_CTX", 1, + "kMDQueryString", + "char *"); + if (querystring == NULL) { + DEBUG(1, ("missing kMDQueryString\n")); + goto error; + } + + querystring_len = talloc_array_length(querystring); + + slq->query_string = talloc_array(slq, char, querystring_len); + if (slq->query_string == NULL) { + DEBUG(1, ("out of memory\n")); + goto error; + } + dest = slq->query_string; + dest_remaining = talloc_array_length(dest); + + nconv = smb_iconv(mds_ctx->ic_nfd_to_nfc, + &querystring, + &querystring_len, + &dest, + &dest_remaining); + if (nconv == (size_t)-1) { + DBG_ERR("smb_iconv failed for: %s\n", querystring); + return false; + } + + uint64p = dalloc_get(query, "DALLOC_CTX", 0, "DALLOC_CTX", 0, + "uint64_t", 1); + if (uint64p == NULL) { + goto error; + } + slq->ctx1 = *uint64p; + uint64p = dalloc_get(query, "DALLOC_CTX", 0, "DALLOC_CTX", 0, + "uint64_t", 2); + if (uint64p == NULL) { + goto error; + } + slq->ctx2 = *uint64p; + + path_scope = dalloc_value_for_key(query, "DALLOC_CTX", 0, + "DALLOC_CTX", 1, + "kMDScopeArray", + "sl_array_t"); + if (path_scope == NULL) { + DBG_ERR("missing kMDScopeArray\n"); + goto error; + } + + scope = dalloc_get(path_scope, "char *", 0); + if (scope == NULL) { + scope = dalloc_get(path_scope, + "DALLOC_CTX", 0, + "char *", 0); + } + if (scope == NULL) { + DBG_ERR("Failed to parse kMDScopeArray\n"); + goto error; + } + + ok = mdssvc_real_scope(slq, scope); + if (!ok) { + goto error; + } + + reqinfo = dalloc_value_for_key(query, "DALLOC_CTX", 0, + "DALLOC_CTX", 1, + "kMDAttributeArray", + "sl_array_t"); + if (reqinfo == NULL) { + DBG_ERR("missing kMDAttributeArray\n"); + goto error; + } + + slq->reqinfo = talloc_steal(slq, reqinfo); + DEBUG(10, ("requested attributes: %s", dalloc_dump(reqinfo, 0))); + + cnids = dalloc_value_for_key(query, "DALLOC_CTX", 0, + "DALLOC_CTX", 1, + "kMDQueryItemArray", + "sl_array_t"); + if (cnids) { + ok = sort_cnids(slq, cnids->ca_cnids); + if (!ok) { + goto error; + } + } + + ok = create_result_handle(slq); + if (!ok) { + DEBUG(1, ("create_result_handle error\n")); + slq->state = SLQ_STATE_ERROR; + goto error; + } + + SLQ_DEBUG(10, slq, "new"); + + DLIST_ADD(mds_ctx->query_list, slq); + + ok = mds_ctx->backend->search_start(slq); + if (!ok) { + DBG_ERR("backend search_start failed\n"); + goto error; + } + + sl_result = 0; + result = dalloc_add_copy(array, &sl_result, uint64_t); + if (result != 0) { + goto error; + } + result = dalloc_add(reply, array, sl_array_t); + if (result != 0) { + goto error; + } + return true; + +error: + sl_result = UINT64_MAX; + TALLOC_FREE(slq); + result = dalloc_add_copy(array, &sl_result, uint64_t); + if (result != 0) { + return false; + } + result = dalloc_add(reply, array, sl_array_t); + if (result != 0) { + return false; + } + return true; +} + +/** + * Fetch results of a query + **/ +static bool slrpc_fetch_query_results(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, + DALLOC_CTX *reply) +{ + bool ok; + struct sl_query *slq = NULL; + uint64_t *uint64p, ctx1, ctx2; + uint64_t status; + sl_array_t *array; + int result; + + array = dalloc_zero(reply, sl_array_t); + if (array == NULL) { + return false; + } + + /* Get query for context */ + uint64p = dalloc_get(query, "DALLOC_CTX", 0, "DALLOC_CTX", 0, + "uint64_t", 1); + if (uint64p == NULL) { + goto error; + } + ctx1 = *uint64p; + + uint64p = dalloc_get(query, "DALLOC_CTX", 0, "DALLOC_CTX", 0, + "uint64_t", 2); + if (uint64p == NULL) { + goto error; + } + ctx2 = *uint64p; + + slq = slq_for_ctx(mds_ctx, ctx1, ctx2); + if (slq == NULL) { + DEBUG(1, ("bad context: [0x%jx,0x%jx]\n", + (uintmax_t)ctx1, (uintmax_t)ctx2)); + goto error; + } + + TALLOC_FREE(slq->te); + slq->last_used = timeval_current(); + slq->expire_time = timeval_add(&slq->last_used, MAX_SL_RUNTIME, 0); + slq->te = tevent_add_timer(global_event_context(), slq, + slq->expire_time, slq_close_timer, slq); + if (slq->te == NULL) { + DEBUG(1, ("tevent_add_timer failed\n")); + goto error; + } + + SLQ_DEBUG(10, slq, "fetch"); + + switch (slq->state) { + case SLQ_STATE_RUNNING: + case SLQ_STATE_RESULTS: + case SLQ_STATE_FULL: + case SLQ_STATE_DONE: + ok = add_results(array, slq); + if (!ok) { + DEBUG(1, ("error adding results\n")); + goto error; + } + if (slq->state == SLQ_STATE_FULL) { + slq->state = SLQ_STATE_RUNNING; + slq->mds_ctx->backend->search_cont(slq); + } + break; + + case SLQ_STATE_ERROR: + DEBUG(1, ("query in error state\n")); + goto error; + + default: + DEBUG(1, ("unexpected query state %d\n", slq->state)); + goto error; + } + + result = dalloc_add(reply, array, sl_array_t); + if (result != 0) { + goto error; + } + return true; + +error: + status = UINT64_MAX; + TALLOC_FREE(slq); + result = dalloc_add_copy(array, &status, uint64_t); + if (result != 0) { + return false; + } + result = dalloc_add(reply, array, sl_array_t); + if (result != 0) { + return false; + } + return true; +} + +/** + * Store metadata attributes for a CNID + **/ +static bool slrpc_store_attributes(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply) +{ + uint64_t sl_result; + sl_array_t *array; + int result; + + array = dalloc_zero(reply, sl_array_t); + if (array == NULL) { + return false; + } + + /* + * FIXME: not implemented. Used by the client for eg setting + * the modification date of the shared directory which clients + * poll indicating changes on the share and cause the client + * to refresh view. + */ + + sl_result = 0; + result = dalloc_add_copy(array, &sl_result, uint64_t); + if (result != 0) { + return false; + } + result = dalloc_add(reply, array, sl_array_t); + if (result != 0) { + return false; + } + + return true; +} + +/** + * Fetch supported metadata attributes for a CNID + **/ +static bool slrpc_fetch_attributenames(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, + DALLOC_CTX *reply) +{ + uint64_t id; + sl_cnids_t *cnids; + sl_array_t *array; + uint64_t sl_result; + sl_cnids_t *replycnids; + sl_array_t *mdattrs; + sl_filemeta_t *fmeta; + int result; + void *p; + + cnids = dalloc_get(query, "DALLOC_CTX", 0, "sl_cnids_t", 1); + if (cnids == NULL) { + return false; + } + + p = dalloc_get_object(cnids->ca_cnids, 0); + if (p == NULL) { + return NULL; + } + memcpy(&id, p, sizeof(uint64_t)); + + /* Result array */ + array = dalloc_zero(reply, sl_array_t); + if (array == NULL) { + return false; + } + + result = dalloc_add(reply, array, sl_array_t); + if (result != 0) { + return false; + } + + /* Return result value 0 */ + sl_result = 0; + result = dalloc_add_copy(array, &sl_result, uint64_t); + if (result != 0) { + return false; + } + + /* Return CNID array */ + replycnids = talloc_zero(reply, sl_cnids_t); + if (replycnids == NULL) { + return false; + } + + replycnids->ca_cnids = dalloc_new(cnids); + if (replycnids->ca_cnids == NULL) { + return false; + } + + replycnids->ca_unkn1 = 0xfec; + replycnids->ca_context = cnids->ca_context; + result = dalloc_add_copy(replycnids->ca_cnids, &id, uint64_t); + if (result != 0) { + return false; + } + result = dalloc_add(array, replycnids, sl_cnids_t); + if (result != 0) { + return false; + } + + /* + * FIXME: this should return the real attributes from all + * known metadata sources (Tracker and filesystem) + */ + mdattrs = dalloc_zero(reply, sl_array_t); + if (mdattrs == NULL) { + return false; + } + + result = dalloc_stradd(mdattrs, "kMDItemFSName"); + if (result != 0) { + return false; + } + result = dalloc_stradd(mdattrs, "kMDItemDisplayName"); + if (result != 0) { + return false; + } + result = dalloc_stradd(mdattrs, "kMDItemFSSize"); + if (result != 0) { + return false; + } + result = dalloc_stradd(mdattrs, "kMDItemFSOwnerUserID"); + if (result != 0) { + return false; + } + result = dalloc_stradd(mdattrs, "kMDItemFSOwnerGroupID"); + if (result != 0) { + return false; + } + result = dalloc_stradd(mdattrs, "kMDItemFSContentChangeDate"); + if (result != 0) { + return false; + } + + fmeta = dalloc_zero(reply, sl_filemeta_t); + if (fmeta == NULL) { + return false; + } + result = dalloc_add(fmeta, mdattrs, sl_array_t); + if (result != 0) { + return false; + } + result = dalloc_add(array, fmeta, sl_filemeta_t); + if (result != 0) { + return false; + } + + return true; +} + +/** + * Fetch metadata attribute values for a CNID + **/ +static bool slrpc_fetch_attributes(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply) +{ + int result; + bool ok; + sl_array_t *array; + sl_cnids_t *cnids; + sl_cnids_t *replycnids; + sl_array_t *reqinfo; + uint64_t ino; + uint64_t sl_result; + sl_filemeta_t *fm; + sl_array_t *fm_array; + sl_nil_t nil; + char *path = NULL; + struct smb_filename *smb_fname = NULL; + struct stat_ex *sp = NULL; + struct sl_inode_path_map *elem = NULL; + void *p; + TDB_DATA val = tdb_null; + NTSTATUS status; + + array = dalloc_zero(reply, sl_array_t); + if (array == NULL) { + return false; + } + replycnids = talloc_zero(reply, sl_cnids_t); + if (replycnids == NULL) { + goto error; + } + replycnids->ca_cnids = dalloc_new(replycnids); + if (replycnids->ca_cnids == NULL) { + goto error; + } + fm = dalloc_zero(array, sl_filemeta_t); + if (fm == NULL) { + goto error; + } + fm_array = dalloc_zero(fm, sl_array_t); + if (fm_array == NULL) { + goto error; + } + /* For some reason the list of results always starts with a nil entry */ + result = dalloc_add_copy(fm_array, &nil, sl_nil_t); + if (result == -1) { + goto error; + } + + reqinfo = dalloc_get(query, "DALLOC_CTX", 0, "sl_array_t", 1); + if (reqinfo == NULL) { + goto error; + } + + cnids = dalloc_get(query, "DALLOC_CTX", 0, "sl_cnids_t", 2); + if (cnids == NULL) { + goto error; + } + p = dalloc_get_object(cnids->ca_cnids, 0); + if (p == NULL) { + goto error; + } + memcpy(&ino, p, sizeof(uint64_t)); + + replycnids->ca_unkn1 = 0xfec; + replycnids->ca_context = cnids->ca_context; + result = dalloc_add_copy(replycnids->ca_cnids, &ino, uint64_t); + if (result != 0) { + goto error; + } + + status = dbwrap_fetch(mds_ctx->ino_path_map, reply, + make_tdb_data((void*)&ino, sizeof(uint64_t)), + &val); + if (NT_STATUS_IS_OK(status)) { + if (val.dsize != sizeof(p)) { + DBG_ERR("invalid record pointer size: %zd\n", val.dsize); + TALLOC_FREE(val.dptr); + goto error; + } + + memcpy(&p, val.dptr, sizeof(p)); + elem = talloc_get_type_abort(p, struct sl_inode_path_map); + path = elem->path; + + sp = &elem->st; + } + + ok = add_filemeta(mds_ctx, reqinfo, fm_array, path, sp); + if (!ok) { + goto error; + } + + sl_result = 0; + result = dalloc_add_copy(array, &sl_result, uint64_t); + if (result != 0) { + goto error; + } + result = dalloc_add(array, replycnids, sl_cnids_t); + if (result != 0) { + goto error; + } + result = dalloc_add(fm, fm_array, sl_array_t); + if (result != 0) { + goto error; + } + result = dalloc_add(array, fm, sl_filemeta_t); + if (result != 0) { + goto error; + } + result = dalloc_add(reply, array, sl_array_t); + if (result != 0) { + goto error; + } + + TALLOC_FREE(smb_fname); + return true; + +error: + + TALLOC_FREE(smb_fname); + sl_result = UINT64_MAX; + result = dalloc_add_copy(array, &sl_result, uint64_t); + if (result != 0) { + return false; + } + result = dalloc_add(reply, array, sl_array_t); + if (result != 0) { + return false; + } + + return true; +} + +/** + * Close a query + **/ +static bool slrpc_close_query(struct mds_ctx *mds_ctx, + const DALLOC_CTX *query, DALLOC_CTX *reply) +{ + struct sl_query *slq = NULL; + uint64_t *uint64p, ctx1, ctx2; + sl_array_t *array; + uint64_t sl_res; + int result; + + array = dalloc_zero(reply, sl_array_t); + if (array == NULL) { + return false; + } + + /* Context */ + uint64p = dalloc_get(query, "DALLOC_CTX", 0, "DALLOC_CTX", 0, + "uint64_t", 1); + if (uint64p == NULL) { + goto done; + } + ctx1 = *uint64p; + + uint64p = dalloc_get(query, "DALLOC_CTX", 0, "DALLOC_CTX", 0, + "uint64_t", 2); + if (uint64p == NULL) { + goto done; + } + ctx2 = *uint64p; + + /* Get query for context and free it */ + slq = slq_for_ctx(mds_ctx, ctx1, ctx2); + if (slq == NULL) { + DEBUG(1, ("bad context: [0x%jx,0x%jx]\n", + (uintmax_t)ctx1, (uintmax_t)ctx2)); + goto done; + } + + SLQ_DEBUG(10, slq, "close"); + TALLOC_FREE(slq); + +done: + sl_res = UINT64_MAX; + result = dalloc_add_copy(array, &sl_res, uint64_t); + if (result != 0) { + return false; + } + result = dalloc_add(reply, array, sl_array_t); + if (result != 0) { + return false; + } + return true; +} + +static struct mdssvc_ctx *mdssvc_init(struct tevent_context *ev) +{ + bool ok; + + if (mdssvc_ctx != NULL) { + return mdssvc_ctx; + } + + mdssvc_ctx = talloc_zero(ev, struct mdssvc_ctx); + if (mdssvc_ctx == NULL) { + return NULL; + } + + mdssvc_ctx->ev_ctx = ev; + + ok = mdsscv_backend_noindex.init(mdssvc_ctx); + if (!ok) { + DBG_ERR("backend init failed\n"); + TALLOC_FREE(mdssvc_ctx); + return NULL; + } + +#ifdef HAVE_SPOTLIGHT_BACKEND_ES + ok = mdsscv_backend_es.init(mdssvc_ctx); + if (!ok) { + DBG_ERR("backend init failed\n"); + TALLOC_FREE(mdssvc_ctx); + return NULL; + } +#endif + +#ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER + ok = mdsscv_backend_tracker.init(mdssvc_ctx); + if (!ok) { + DBG_ERR("backend init failed\n"); + TALLOC_FREE(mdssvc_ctx); + return NULL; + } +#endif + + return mdssvc_ctx; +} + +/** + * Init callbacks at startup + * + * This gets typically called in the main parent smbd which means we can't + * initialize our global state here. + **/ +bool mds_init(struct messaging_context *msg_ctx) +{ + return true; +} + +bool mds_shutdown(void) +{ + bool ok; + + if (mdssvc_ctx == NULL) { + return false; + } + + ok = mdsscv_backend_noindex.shutdown(mdssvc_ctx); + if (!ok) { + goto fail; + } + +#ifdef HAVE_SPOTLIGHT_BACKEND_ES + ok = mdsscv_backend_es.shutdown(mdssvc_ctx); + if (!ok) { + goto fail; + } +#endif + +#ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER + ok = mdsscv_backend_tracker.shutdown(mdssvc_ctx); + if (!ok) { + goto fail; + } +#endif + + ok = true; +fail: + TALLOC_FREE(mdssvc_ctx); + return ok; +} + +/** + * Tear down connections and free all resources + **/ +static int mds_ctx_destructor_cb(struct mds_ctx *mds_ctx) +{ + /* + * We need to free query_list before ino_path_map + */ + while (mds_ctx->query_list != NULL) { + /* + * slq destructor removes element from list. + * Don't use TALLOC_FREE()! + */ + talloc_free(mds_ctx->query_list); + } + TALLOC_FREE(mds_ctx->ino_path_map); + + if (mds_ctx->conn != NULL) { + SMB_VFS_DISCONNECT(mds_ctx->conn); + conn_free(mds_ctx->conn); + } + + ZERO_STRUCTP(mds_ctx); + + return 0; +} + +/** + * Initialise a context per RPC bind + * + * This ends up being called for every tcon, because the client does a + * RPC bind for every tcon, so this is acually a per tcon context. + **/ +NTSTATUS mds_init_ctx(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct messaging_context *msg_ctx, + struct auth_session_info *session_info, + int snum, + const char *sharename, + const char *path, + struct mds_ctx **_mds_ctx) +{ + const struct loadparm_substitution *lp_sub = + loadparm_s3_global_substitution(); + struct smb_filename conn_basedir; + struct mds_ctx *mds_ctx; + int backend; + int ret; + bool ok; + smb_iconv_t iconv_hnd = (smb_iconv_t)-1; + NTSTATUS status; + + if (!lp_spotlight(snum)) { + return NT_STATUS_WRONG_VOLUME; + } + + mds_ctx = talloc_zero(mem_ctx, struct mds_ctx); + if (mds_ctx == NULL) { + return NT_STATUS_NO_MEMORY; + } + talloc_set_destructor(mds_ctx, mds_ctx_destructor_cb); + + mds_ctx->mdssvc_ctx = mdssvc_init(ev); + if (mds_ctx->mdssvc_ctx == NULL) { + return NT_STATUS_NO_MEMORY; + } + + backend = lp_spotlight_backend(snum); + switch (backend) { + case SPOTLIGHT_BACKEND_NOINDEX: + mds_ctx->backend = &mdsscv_backend_noindex; + break; + +#ifdef HAVE_SPOTLIGHT_BACKEND_ES + case SPOTLIGHT_BACKEND_ES: + mds_ctx->backend = &mdsscv_backend_es; + break; +#endif + +#ifdef HAVE_SPOTLIGHT_BACKEND_TRACKER + case SPOTLIGHT_BACKEND_TRACKER: + mds_ctx->backend = &mdsscv_backend_tracker; + break; +#endif + default: + DBG_ERR("Unknown backend %d\n", backend); + TALLOC_FREE(mdssvc_ctx); + status = NT_STATUS_INTERNAL_ERROR; + goto error; + } + + iconv_hnd = smb_iconv_open_ex(mds_ctx, + "UTF8-NFD", + "UTF8-NFC", + false); + if (iconv_hnd == (smb_iconv_t)-1) { + status = NT_STATUS_INTERNAL_ERROR; + goto error; + } + mds_ctx->ic_nfc_to_nfd = iconv_hnd; + + iconv_hnd = smb_iconv_open_ex(mds_ctx, + "UTF8-NFC", + "UTF8-NFD", + false); + if (iconv_hnd == (smb_iconv_t)-1) { + status = NT_STATUS_INTERNAL_ERROR; + goto error; + } + mds_ctx->ic_nfd_to_nfc = iconv_hnd; + + mds_ctx->sharename = talloc_strdup(mds_ctx, sharename); + if (mds_ctx->sharename == NULL) { + status = NT_STATUS_NO_MEMORY; + goto error; + } + + mds_ctx->spath = talloc_strdup(mds_ctx, path); + if (mds_ctx->spath == NULL) { + status = NT_STATUS_NO_MEMORY; + goto error; + } + mds_ctx->spath_len = strlen(path); + + mds_ctx->snum = snum; + mds_ctx->pipe_session_info = session_info; + + if (session_info->security_token->num_sids < 1) { + status = NT_STATUS_BAD_LOGON_SESSION_STATE; + goto error; + } + sid_copy(&mds_ctx->sid, &session_info->security_token->sids[0]); + mds_ctx->uid = session_info->unix_token->uid; + + mds_ctx->ino_path_map = db_open_rbt(mds_ctx); + if (mds_ctx->ino_path_map == NULL) { + DEBUG(1,("open inode map db failed\n")); + status = NT_STATUS_INTERNAL_ERROR; + goto error; + } + + status = create_conn_struct_cwd(mds_ctx, + ev, + msg_ctx, + session_info, + snum, + lp_path(talloc_tos(), lp_sub, snum), + &mds_ctx->conn); + if (!NT_STATUS_IS_OK(status)) { + DBG_ERR("failed to create conn for vfs: %s\n", + nt_errstr(status)); + goto error; + } + + conn_basedir = (struct smb_filename) { + .base_name = mds_ctx->conn->connectpath, + }; + + ret = vfs_ChDir(mds_ctx->conn, &conn_basedir); + if (ret != 0) { + DBG_ERR("vfs_ChDir [%s] failed: %s\n", + conn_basedir.base_name, strerror(errno)); + status = map_nt_error_from_unix(errno); + goto error; + } + + ok = mds_ctx->backend->connect(mds_ctx); + if (!ok) { + DBG_ERR("backend connect failed\n"); + status = NT_STATUS_CONNECTION_RESET; + goto error; + } + + *_mds_ctx = mds_ctx; + return NT_STATUS_OK; + +error: + if (mds_ctx->ic_nfc_to_nfd != NULL) { + smb_iconv_close(mds_ctx->ic_nfc_to_nfd); + } + if (mds_ctx->ic_nfd_to_nfc != NULL) { + smb_iconv_close(mds_ctx->ic_nfd_to_nfc); + } + + TALLOC_FREE(mds_ctx); + return status; +} + +/** + * Dispatch a Spotlight RPC command + **/ +bool mds_dispatch(struct mds_ctx *mds_ctx, + struct mdssvc_blob *request_blob, + struct mdssvc_blob *response_blob, + size_t max_fragment_size) +{ + bool ok; + int ret; + DALLOC_CTX *query = NULL; + DALLOC_CTX *reply = NULL; + char *rpccmd; + const struct slrpc_cmd *slcmd; + const struct smb_filename conn_basedir = { + .base_name = mds_ctx->conn->connectpath, + }; + NTSTATUS status; + + if (CHECK_DEBUGLVL(10)) { + const struct sl_query *slq; + + for (slq = mds_ctx->query_list; slq != NULL; slq = slq->next) { + SLQ_DEBUG(10, slq, "pending"); + } + } + + response_blob->length = 0; + + DEBUG(10, ("share path: %s\n", mds_ctx->spath)); + + query = dalloc_new(mds_ctx); + if (query == NULL) { + ok = false; + goto cleanup; + } + reply = dalloc_new(mds_ctx); + if (reply == NULL) { + ok = false; + goto cleanup; + } + + ok = sl_unpack(query, (char *)request_blob->spotlight_blob, + request_blob->length); + if (!ok) { + DEBUG(1, ("error unpacking Spotlight RPC blob\n")); + goto cleanup; + } + + DEBUG(5, ("%s", dalloc_dump(query, 0))); + + rpccmd = dalloc_get(query, "DALLOC_CTX", 0, "DALLOC_CTX", 0, + "char *", 0); + if (rpccmd == NULL) { + DEBUG(1, ("missing primary Spotlight RPC command\n")); + ok = false; + goto cleanup; + } + + DEBUG(10, ("Spotlight RPC cmd: %s\n", rpccmd)); + + slcmd = slrpc_cmd_by_name(rpccmd); + if (slcmd == NULL) { + DEBUG(1, ("unsupported primary Spotlight RPC command %s\n", + rpccmd)); + ok = false; + goto cleanup; + } + + ret = vfs_ChDir(mds_ctx->conn, &conn_basedir); + if (ret != 0) { + DBG_ERR("vfs_ChDir [%s] failed: %s\n", + conn_basedir.base_name, strerror(errno)); + ok = false; + goto cleanup; + } + + ok = slcmd->function(mds_ctx, query, reply); + if (!ok) { + goto cleanup; + } + + DBG_DEBUG("%s", dalloc_dump(reply, 0)); + + status = sl_pack_alloc(response_blob, + reply, + response_blob, + max_fragment_size); + if (!NT_STATUS_IS_OK(status)) { + DBG_ERR("sl_pack_alloc() failed\n"); + goto cleanup; + } + +cleanup: + talloc_free(query); + talloc_free(reply); + return ok; +} diff --git a/source3/rpc_server/mdssvc/mdssvc.h b/source3/rpc_server/mdssvc/mdssvc.h new file mode 100644 index 0000000..8aabf5b --- /dev/null +++ b/source3/rpc_server/mdssvc/mdssvc.h @@ -0,0 +1,169 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines + + Copyright (C) Ralph Boehme 2012-2014 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _MDSSVC_H +#define _MDSSVC_H + +#include "dalloc.h" +#include "marshalling.h" +#include "lib/util/dlinklist.h" +#include "librpc/gen_ndr/mdssvc.h" + +/* + * glib uses TRUE and FALSE which was redefined by "includes.h" to be + * unusable, undefine so glib can establish its own working + * replacement. + */ +#undef TRUE +#undef FALSE + +#define MAX_SL_FRAGMENT_SIZE 0xFFFFF +#define MAX_SL_RESULTS 100 +#define SL_PAGESIZE 50 +#define MAX_SL_RUNTIME 30 +#define MDS_TRACKER_ASYNC_TIMEOUT_MS 250 + +#define SLQ_DEBUG(lvl, _slq, state) do { if (CHECK_DEBUGLVL(lvl)) { \ + const struct sl_query *__slq = _slq; \ + struct timeval_buf start_buf; \ + const char *start; \ + struct timeval_buf last_used_buf; \ + const char *last_used; \ + struct timeval_buf expire_buf; \ + const char *expire; \ + start = timeval_str_buf(&__slq->start_time, false, \ + true, &start_buf); \ + last_used = timeval_str_buf(&__slq->last_used, false, \ + true, &last_used_buf); \ + expire = timeval_str_buf(&__slq->expire_time, false, \ + true, &expire_buf); \ + DEBUG(lvl,("%s slq[0x%jx,0x%jx], start: %s, last_used: %s, " \ + "expires: %s, query: '%s'\n", state, \ + (uintmax_t)__slq->ctx1, (uintmax_t)__slq->ctx2, \ + start, last_used, expire, __slq->query_string)); \ +}} while(0) + +/****************************************************************************** + * Some helper stuff dealing with queries + ******************************************************************************/ + +/* query state */ +typedef enum { + SLQ_STATE_NEW, /* Query received from client */ + SLQ_STATE_RUNNING, /* Query dispatched to Tracker */ + SLQ_STATE_RESULTS, /* Async Tracker query read */ + SLQ_STATE_FULL, /* the max amount of result has beed queued */ + SLQ_STATE_DONE, /* Got all results from Tracker */ + SLQ_STATE_END, /* Query results returned to client */ + SLQ_STATE_ERROR /* an error happended somewhere */ +} slq_state_t; + +/* query structure */ +struct sl_query { + struct sl_query *prev, *next; /* list pointers */ + struct mds_ctx *mds_ctx; /* context handle */ + void *backend_private; /* search backend private data */ + slq_state_t state; /* query state */ + struct timeval start_time; /* Query start time */ + struct timeval last_used; /* Time of last result fetch */ + struct timeval expire_time; /* Query expiration time */ + struct tevent_timer *te; /* query timeout */ + uint64_t ctx1; /* client context 1 */ + uint64_t ctx2; /* client context 2 */ + sl_array_t *reqinfo; /* array with requested metadata */ + char *query_string; /* the Spotlight query string */ + uint64_t *cnids; /* restrict query to these CNIDs */ + size_t cnids_num; /* Size of slq_cnids array */ + const char *path_scope; /* path to directory to search */ + struct sl_rslts *query_results; /* query results */ + TALLOC_CTX *entries_ctx; /* talloc parent of the search results */ +}; + +struct sl_rslts { + int num_results; + sl_cnids_t *cnids; + sl_array_t *fm_array; +}; + +struct sl_inode_path_map { + struct mds_ctx *mds_ctx; + uint64_t ino; + char *path; + struct stat_ex st; +}; + +/* Per process state */ +struct mdssvc_ctx { + struct tevent_context *ev_ctx; + void *backend_private; +}; + +/* Per tree connect state */ +struct mds_ctx { + struct mdssvc_backend *backend; + struct mdssvc_ctx *mdssvc_ctx; + void *backend_private; + struct auth_session_info *pipe_session_info; + struct dom_sid sid; + uid_t uid; + smb_iconv_t ic_nfc_to_nfd; + smb_iconv_t ic_nfd_to_nfc; + int snum; + const char *sharename; + const char *spath; + size_t spath_len; + struct connection_struct *conn; + struct sl_query *query_list; /* list of active queries */ + struct db_context *ino_path_map; /* dbwrap rbt for storing inode->path mappings */ +}; + +struct mdssvc_backend { + bool (*init)(struct mdssvc_ctx *mdssvc_ctx); + bool (*connect)(struct mds_ctx *mds_ctx); + bool (*search_map)(struct sl_query *slq); + bool (*search_start)(struct sl_query *slq); + bool (*search_cont)(struct sl_query *slq); + bool (*shutdown)(struct mdssvc_ctx *mdssvc_ctx); +}; + +/****************************************************************************** + * Function declarations + ******************************************************************************/ + +/* + * mdssvc.c + */ +extern bool mds_init(struct messaging_context *msg_ctx); +extern bool mds_shutdown(void); +NTSTATUS mds_init_ctx(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct messaging_context *msg_ctx, + struct auth_session_info *session_info, + int snum, + const char *sharename, + const char *path, + struct mds_ctx **_mds_ctx); +extern bool mds_dispatch(struct mds_ctx *mds_ctx, + struct mdssvc_blob *request_blob, + struct mdssvc_blob *response_blob, + size_t max_fragment_size); +bool mds_add_result(struct sl_query *slq, const char *path); + +#endif /* _MDSSVC_H */ diff --git a/source3/rpc_server/mdssvc/mdssvc_es.c b/source3/rpc_server/mdssvc/mdssvc_es.c new file mode 100644 index 0000000..8460b48 --- /dev/null +++ b/source3/rpc_server/mdssvc/mdssvc_es.c @@ -0,0 +1,865 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / ES backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "system/filesys.h" +#include "lib/util/time_basic.h" +#include "lib/tls/tls.h" +#include "lib/util/tevent_ntstatus.h" +#include "libcli/http/http.h" +#include "lib/util/tevent_unix.h" +#include "credentials.h" +#include "mdssvc.h" +#include "mdssvc_es.h" +#include "rpc_server/mdssvc/es_parser.tab.h" + +#include <jansson.h> + +#undef DBGC_CLASS +#define DBGC_CLASS DBGC_RPC_SRV + +#define MDSSVC_ELASTIC_QUERY_TEMPLATE \ + "{" \ + " \"from\": %zu," \ + " \"size\": %zu," \ + " \"_source\": [%s]," \ + " \"query\": {" \ + " \"query_string\": {" \ + " \"query\": \"%s\"" \ + " }" \ + " }" \ + "}" + +#define MDSSVC_ELASTIC_SOURCES \ + "\"path.real\"" + +static bool mdssvc_es_init(struct mdssvc_ctx *mdssvc_ctx) +{ + struct mdssvc_es_ctx *mdssvc_es_ctx = NULL; + json_error_t json_error; + char *default_path = NULL; + const char *path = NULL; + + mdssvc_es_ctx = talloc_zero(mdssvc_ctx, struct mdssvc_es_ctx); + if (mdssvc_es_ctx == NULL) { + return false; + } + mdssvc_es_ctx->mdssvc_ctx = mdssvc_ctx; + + mdssvc_es_ctx->creds = cli_credentials_init_anon(mdssvc_es_ctx); + if (mdssvc_es_ctx->creds == NULL) { + TALLOC_FREE(mdssvc_es_ctx); + return false; + } + + default_path = talloc_asprintf( + mdssvc_es_ctx, + "%s/mdssvc/elasticsearch_mappings.json", + get_dyn_SAMBA_DATADIR()); + if (default_path == NULL) { + TALLOC_FREE(mdssvc_es_ctx); + return false; + } + + path = lp_parm_const_string(GLOBAL_SECTION_SNUM, + "elasticsearch", + "mappings", + default_path); + if (path == NULL) { + TALLOC_FREE(mdssvc_es_ctx); + return false; + } + + mdssvc_es_ctx->mappings = json_load_file(path, 0, &json_error); + if (mdssvc_es_ctx->mappings == NULL) { + DBG_ERR("Opening mapping file [%s] failed: %s\n", + path, json_error.text); + TALLOC_FREE(mdssvc_es_ctx); + return false; + } + TALLOC_FREE(default_path); + + mdssvc_ctx->backend_private = mdssvc_es_ctx; + return true; +} + +static bool mdssvc_es_shutdown(struct mdssvc_ctx *mdssvc_ctx) +{ + return true; +} + +static struct tevent_req *mds_es_connect_send( + TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct mds_es_ctx *mds_es_ctx); +static int mds_es_connect_recv(struct tevent_req *req); +static void mds_es_connected(struct tevent_req *subreq); +static bool mds_es_next_search_trigger(struct mds_es_ctx *mds_es_ctx); +static void mds_es_search_set_pending(struct sl_es_search *s); +static void mds_es_search_unset_pending(struct sl_es_search *s); + +static int mds_es_ctx_destructor(struct mds_es_ctx *mds_es_ctx) +{ + struct sl_es_search *s = mds_es_ctx->searches; + + /* + * The per tree-connect state mds_es_ctx (a child of mds_ctx) is about + * to go away and has already freed all waiting searches. If there's a + * search remaining that's when the search is already active. Reset the + * mds_es_ctx pointer, so we can detect this when the search completes. + */ + + if (s == NULL) { + return 0; + } + + s->mds_es_ctx = NULL; + + return 0; +} + +static bool mds_es_connect(struct mds_ctx *mds_ctx) +{ + struct mdssvc_es_ctx *mdssvc_es_ctx = talloc_get_type_abort( + mds_ctx->mdssvc_ctx->backend_private, struct mdssvc_es_ctx); + struct mds_es_ctx *mds_es_ctx = NULL; + struct tevent_req *subreq = NULL; + + mds_es_ctx = talloc_zero(mds_ctx, struct mds_es_ctx); + if (mds_es_ctx == NULL) { + return false; + } + *mds_es_ctx = (struct mds_es_ctx) { + .mdssvc_es_ctx = mdssvc_es_ctx, + .mds_ctx = mds_ctx, + }; + + mds_ctx->backend_private = mds_es_ctx; + talloc_set_destructor(mds_es_ctx, mds_es_ctx_destructor); + + subreq = mds_es_connect_send( + mds_es_ctx, + mdssvc_es_ctx->mdssvc_ctx->ev_ctx, + mds_es_ctx); + if (subreq == NULL) { + TALLOC_FREE(mds_es_ctx); + return false; + } + tevent_req_set_callback(subreq, mds_es_connected, mds_es_ctx); + return true; +} + +static void mds_es_connected(struct tevent_req *subreq) +{ + struct mds_es_ctx *mds_es_ctx = tevent_req_callback_data( + subreq, struct mds_es_ctx); + int ret; + bool ok; + + ret = mds_es_connect_recv(subreq); + TALLOC_FREE(subreq); + if (ret != 0) { + DBG_ERR("HTTP connect failed\n"); + return; + } + + ok = mds_es_next_search_trigger(mds_es_ctx); + if (!ok) { + DBG_ERR("mds_es_next_search_trigger failed\n"); + } + return; +} + +struct mds_es_connect_state { + struct tevent_context *ev; + struct mds_es_ctx *mds_es_ctx; + struct tevent_queue_entry *qe; + const char *server_addr; + uint16_t server_port; + struct tstream_tls_params *tls_params; +}; + +static void mds_es_http_connect_done(struct tevent_req *subreq); +static void mds_es_http_waited(struct tevent_req *subreq); + +static struct tevent_req *mds_es_connect_send( + TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct mds_es_ctx *mds_es_ctx) +{ + struct tevent_req *req = NULL; + struct tevent_req *subreq = NULL; + struct mds_es_connect_state *state = NULL; + const char *server_addr = NULL; + bool use_tls; + NTSTATUS status; + + req = tevent_req_create(mem_ctx, &state, struct mds_es_connect_state); + if (req == NULL) { + return NULL; + } + *state = (struct mds_es_connect_state) { + .ev = ev, + .mds_es_ctx = mds_es_ctx, + }; + + server_addr = lp_parm_const_string( + mds_es_ctx->mds_ctx->snum, + "elasticsearch", + "address", + "localhost"); + state->server_addr = talloc_strdup(state, server_addr); + if (tevent_req_nomem(state->server_addr, req)) { + return tevent_req_post(req, ev); + } + + state->server_port = lp_parm_int( + mds_es_ctx->mds_ctx->snum, + "elasticsearch", + "port", + 9200); + + use_tls = lp_parm_bool( + mds_es_ctx->mds_ctx->snum, + "elasticsearch", + "use tls", + false); + + DBG_DEBUG("Connecting to HTTP%s [%s] port [%"PRIu16"]\n", + use_tls ? "S" : "", state->server_addr, state->server_port); + + if (use_tls) { + const char *ca_file = lp__tls_cafile(); + const char *crl_file = lp__tls_crlfile(); + const char *tls_priority = lp_tls_priority(); + enum tls_verify_peer_state verify_peer = lp_tls_verify_peer(); + + status = tstream_tls_params_client(state, + ca_file, + crl_file, + tls_priority, + verify_peer, + state->server_addr, + &state->tls_params); + if (!NT_STATUS_IS_OK(status)) { + DBG_ERR("Failed tstream_tls_params_client - %s\n", + nt_errstr(status)); + tevent_req_nterror(req, status); + return tevent_req_post(req, ev); + } + } + + subreq = http_connect_send(state, + state->ev, + state->server_addr, + state->server_port, + mds_es_ctx->mdssvc_es_ctx->creds, + state->tls_params); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, mds_es_http_connect_done, req); + return req; +} + +static void mds_es_http_connect_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct mds_es_connect_state *state = tevent_req_data( + req, struct mds_es_connect_state); + int error; + + error = http_connect_recv(subreq, + state->mds_es_ctx, + &state->mds_es_ctx->http_conn); + TALLOC_FREE(subreq); + if (error != 0) { + DBG_ERR("HTTP connect failed, retrying...\n"); + + subreq = tevent_wakeup_send( + state->mds_es_ctx, + state->mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx, + tevent_timeval_current_ofs(10, 0)); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, + mds_es_http_waited, + req); + return; + } + + DBG_DEBUG("Connected to HTTP%s [%s] port [%"PRIu16"]\n", + state->tls_params ? "S" : "", + state->server_addr, state->server_port); + + tevent_req_done(req); + return; +} + +static void mds_es_http_waited(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct mds_es_connect_state *state = tevent_req_data( + req, struct mds_es_connect_state); + bool ok; + + ok = tevent_wakeup_recv(subreq); + TALLOC_FREE(subreq); + if (!ok) { + tevent_req_error(req, ETIMEDOUT); + return; + } + + subreq = mds_es_connect_send( + state->mds_es_ctx, + state->mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx, + state->mds_es_ctx); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, mds_es_connected, state->mds_es_ctx); +} + +static int mds_es_connect_recv(struct tevent_req *req) +{ + return tevent_req_simple_recv_unix(req); +} + +static void mds_es_reconnect_on_error(struct sl_es_search *s) +{ + struct mds_es_ctx *mds_es_ctx = s->mds_es_ctx; + struct tevent_req *subreq = NULL; + + if (s->slq != NULL) { + s->slq->state = SLQ_STATE_ERROR; + } + + DBG_WARNING("Reconnecting HTTP...\n"); + TALLOC_FREE(mds_es_ctx->http_conn); + + subreq = mds_es_connect_send( + mds_es_ctx, + mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx, + mds_es_ctx); + if (subreq == NULL) { + DBG_ERR("mds_es_connect_send failed\n"); + return; + } + tevent_req_set_callback(subreq, mds_es_connected, mds_es_ctx); +} + +static int search_destructor(struct sl_es_search *s) +{ + if (s->mds_es_ctx == NULL) { + return 0; + } + DLIST_REMOVE(s->mds_es_ctx->searches, s); + return 0; +} + +static struct tevent_req *mds_es_search_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sl_es_search *s); +static int mds_es_search_recv(struct tevent_req *req); +static void mds_es_search_done(struct tevent_req *subreq); + +static bool mds_es_search(struct sl_query *slq) +{ + struct mds_es_ctx *mds_es_ctx = talloc_get_type_abort( + slq->mds_ctx->backend_private, struct mds_es_ctx); + struct sl_es_search *s = NULL; + bool ok; + + s = talloc_zero(slq, struct sl_es_search); + if (s == NULL) { + return false; + } + *s = (struct sl_es_search) { + .ev = mds_es_ctx->mdssvc_es_ctx->mdssvc_ctx->ev_ctx, + .mds_es_ctx = mds_es_ctx, + .slq = slq, + .size = SL_PAGESIZE, + }; + + /* 0 would mean no limit */ + s->max = lp_parm_ulonglong(s->slq->mds_ctx->snum, + "elasticsearch", + "max results", + MAX_SL_RESULTS); + + DBG_DEBUG("Spotlight query: '%s'\n", slq->query_string); + + ok = map_spotlight_to_es_query( + s, + mds_es_ctx->mdssvc_es_ctx->mappings, + slq->path_scope, + slq->query_string, + &s->es_query); + if (!ok) { + TALLOC_FREE(s); + return false; + } + DBG_DEBUG("Elasticsearch query: '%s'\n", s->es_query); + + slq->backend_private = s; + slq->state = SLQ_STATE_RUNNING; + DLIST_ADD_END(mds_es_ctx->searches, s); + talloc_set_destructor(s, search_destructor); + + return mds_es_next_search_trigger(mds_es_ctx); +} + +static bool mds_es_next_search_trigger(struct mds_es_ctx *mds_es_ctx) +{ + struct tevent_req *subreq = NULL; + struct sl_es_search *s = mds_es_ctx->searches; + + if (mds_es_ctx->http_conn == NULL) { + DBG_DEBUG("Waiting for HTTP connection...\n"); + return true; + } + if (s == NULL) { + DBG_DEBUG("No pending searches, idling...\n"); + return true; + } + if (s->pending) { + DBG_DEBUG("Search pending [%p]\n", s); + return true; + } + + subreq = mds_es_search_send(s, s->ev, s); + if (subreq == NULL) { + return false; + } + tevent_req_set_callback(subreq, mds_es_search_done, s); + mds_es_search_set_pending(s); + return true; +} + +static void mds_es_search_done(struct tevent_req *subreq) +{ + struct sl_es_search *s = tevent_req_callback_data( + subreq, struct sl_es_search); + struct mds_es_ctx *mds_es_ctx = s->mds_es_ctx; + struct sl_query *slq = s->slq; + int ret; + bool ok; + + DBG_DEBUG("Search done for search [%p]\n", s); + + mds_es_search_unset_pending(s); + + if (mds_es_ctx == NULL) { + /* + * Search connection closed by the user while s was pending. + */ + TALLOC_FREE(s); + return; + } + + DLIST_REMOVE(mds_es_ctx->searches, s); + + ret = mds_es_search_recv(subreq); + TALLOC_FREE(subreq); + if (ret != 0) { + mds_es_reconnect_on_error(s); + return; + } + + if (slq == NULL) { + /* + * Closed by the user. Explicitly free "s" here because the + * talloc parent slq is already gone. + */ + TALLOC_FREE(s); + goto trigger; + } + + SLQ_DEBUG(10, slq, "search done"); + + if (s->total == 0 || s->from >= s->max) { + slq->state = SLQ_STATE_DONE; + goto trigger; + } + + if (slq->query_results->num_results >= SL_PAGESIZE) { + slq->state = SLQ_STATE_FULL; + goto trigger; + } + + /* + * Reschedule this query as there are more results waiting in the + * Elasticsearch server and the client result queue has room as + * well. But put it at the end of the list of active queries as a simple + * heuristic that should ensure all client queries are dispatched to the + * server. + */ + DLIST_ADD_END(mds_es_ctx->searches, s); + +trigger: + ok = mds_es_next_search_trigger(mds_es_ctx); + if (!ok) { + DBG_ERR("mds_es_next_search_trigger failed\n"); + } +} + +static void mds_es_search_http_send_done(struct tevent_req *subreq); +static void mds_es_search_http_read_done(struct tevent_req *subreq); + +struct mds_es_search_state { + struct tevent_context *ev; + struct sl_es_search *s; + struct tevent_queue_entry *qe; + struct http_request http_request; + struct http_request *http_response; +}; + +static int mds_es_search_pending_destructor(struct sl_es_search *s) +{ + /* + * s is a child of slq which may get freed when a user closes a + * query. To maintain the HTTP request/response sequence on the HTTP + * channel, we keep processing pending requests and free s when we + * receive the HTTP response for pending requests. + */ + DBG_DEBUG("Preserving pending search [%p]\n", s); + s->slq = NULL; + return -1; +} + +static void mds_es_search_set_pending(struct sl_es_search *s) +{ + DBG_DEBUG("Set pending [%p]\n", s); + SLQ_DEBUG(10, s->slq, "pending"); + + s->pending = true; + talloc_set_destructor(s, mds_es_search_pending_destructor); +} + +static void mds_es_search_unset_pending(struct sl_es_search *s) +{ + DBG_DEBUG("Unset pending [%p]\n", s); + if (s->slq != NULL) { + SLQ_DEBUG(10, s->slq, "unset pending"); + } + + s->pending = false; + talloc_set_destructor(s, search_destructor); +} + +static struct tevent_req *mds_es_search_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + struct sl_es_search *s) +{ + struct tevent_req *req = NULL; + struct tevent_req *subreq = NULL; + struct mds_es_search_state *state = NULL; + const char *index = NULL; + char *elastic_query = NULL; + char *uri = NULL; + size_t elastic_query_len; + char *elastic_query_len_str = NULL; + char *hostname = NULL; + bool pretty = false; + + req = tevent_req_create(mem_ctx, &state, struct mds_es_search_state); + if (req == NULL) { + return NULL; + } + *state = (struct mds_es_search_state) { + .ev = ev, + .s = s, + }; + + if (!tevent_req_set_endtime(req, ev, timeval_current_ofs(60, 0))) { + return tevent_req_post(req, s->ev); + } + + index = lp_parm_const_string(s->slq->mds_ctx->snum, + "elasticsearch", + "index", + "_all"); + if (tevent_req_nomem(index, req)) { + return tevent_req_post(req, ev); + } + + if (DEBUGLVL(10)) { + pretty = true; + } + + uri = talloc_asprintf(state, + "/%s/_search%s", + index, + pretty ? "?pretty" : ""); + if (tevent_req_nomem(uri, req)) { + return tevent_req_post(req, ev); + } + + elastic_query = talloc_asprintf(state, + MDSSVC_ELASTIC_QUERY_TEMPLATE, + s->from, + s->size, + MDSSVC_ELASTIC_SOURCES, + s->es_query); + if (tevent_req_nomem(elastic_query, req)) { + return tevent_req_post(req, ev); + } + DBG_DEBUG("Elastic query: '%s'\n", elastic_query); + + elastic_query_len = strlen(elastic_query); + + state->http_request = (struct http_request) { + .type = HTTP_REQ_POST, + .uri = uri, + .body = data_blob_const(elastic_query, elastic_query_len), + .major = '1', + .minor = '1', + }; + + elastic_query_len_str = talloc_asprintf(state, "%zu", elastic_query_len); + if (tevent_req_nomem(elastic_query_len_str, req)) { + return tevent_req_post(req, ev); + } + + hostname = get_myname(state); + if (tevent_req_nomem(hostname, req)) { + return tevent_req_post(req, ev); + } + + http_add_header(state, &state->http_request.headers, + "Content-Type", "application/json"); + http_add_header(state, &state->http_request.headers, + "Accept", "application/json"); + http_add_header(state, &state->http_request.headers, + "User-Agent", "Samba/mdssvc"); + http_add_header(state, &state->http_request.headers, + "Host", hostname); + http_add_header(state, &state->http_request.headers, + "Content-Length", elastic_query_len_str); + + subreq = http_send_request_send(state, + ev, + s->mds_es_ctx->http_conn, + &state->http_request); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, mds_es_search_http_send_done, req); + return req; +} + +static void mds_es_search_http_send_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct mds_es_search_state *state = tevent_req_data( + req, struct mds_es_search_state); + NTSTATUS status; + + DBG_DEBUG("Sent out search [%p]\n", state->s); + + status = http_send_request_recv(subreq); + TALLOC_FREE(subreq); + if (!NT_STATUS_IS_OK(status)) { + tevent_req_error(req, map_errno_from_nt_status(status)); + return; + } + + if (state->s->mds_es_ctx == NULL || state->s->slq == NULL) { + tevent_req_done(req); + return; + } + + subreq = http_read_response_send(state, + state->ev, + state->s->mds_es_ctx->http_conn, + SL_PAGESIZE * 8192); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, mds_es_search_http_read_done, req); +} + +static void mds_es_search_http_read_done(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct mds_es_search_state *state = tevent_req_data( + req, struct mds_es_search_state); + struct sl_es_search *s = state->s; + struct sl_query *slq = s->slq; + json_t *root = NULL; + json_t *matches = NULL; + json_t *match = NULL; + size_t i; + json_error_t error; + size_t hits; + NTSTATUS status; + int ret; + bool ok; + + DBG_DEBUG("Got response for search [%p]\n", s); + + status = http_read_response_recv(subreq, state, &state->http_response); + TALLOC_FREE(subreq); + if (!NT_STATUS_IS_OK(status)) { + DBG_DEBUG("HTTP response failed: %s\n", nt_errstr(status)); + tevent_req_error(req, map_errno_from_nt_status(status)); + return; + } + + if (slq == NULL || s->mds_es_ctx == NULL) { + tevent_req_done(req); + return; + } + + switch (state->http_response->response_code) { + case 200: + break; + default: + DBG_ERR("HTTP server response: %u\n", + state->http_response->response_code); + goto fail; + } + + DBG_DEBUG("JSON response:\n%s\n", + talloc_strndup(talloc_tos(), + (char *)state->http_response->body.data, + state->http_response->body.length)); + + root = json_loadb((char *)state->http_response->body.data, + state->http_response->body.length, + 0, + &error); + if (root == NULL) { + DBG_ERR("json_loadb failed\n"); + goto fail; + } + + if (s->total == 0) { + /* + * Get the total number of results the first time, format + * used by Elasticsearch 7.0 or newer + */ + ret = json_unpack(root, "{s: {s: {s: i}}}", + "hits", "total", "value", &s->total); + if (ret != 0) { + /* Format used before 7.0 */ + ret = json_unpack(root, "{s: {s: i}}", + "hits", "total", &s->total); + if (ret != 0) { + DBG_ERR("json_unpack failed\n"); + goto fail; + } + } + + DBG_DEBUG("Total: %zu\n", s->total); + + if (s->total == 0) { + json_decref(root); + tevent_req_done(req); + return; + } + } + + if (s->max == 0 || s->max > s->total) { + s->max = s->total; + } + + ret = json_unpack(root, "{s: {s:o}}", + "hits", "hits", &matches); + if (ret != 0 || matches == NULL) { + DBG_ERR("json_unpack hits failed\n"); + goto fail; + } + + hits = json_array_size(matches); + if (hits == 0) { + DBG_ERR("Hu?! No results?\n"); + goto fail; + } + DBG_DEBUG("Hits: %zu\n", hits); + + for (i = 0; i < hits && s->from + i < s->max; i++) { + const char *path = NULL; + + match = json_array_get(matches, i); + if (match == NULL) { + DBG_ERR("Hu?! No value for index %zu\n", i); + goto fail; + } + ret = json_unpack(match, + "{s: {s: {s: s}}}", + "_source", + "path", + "real", + &path); + if (ret != 0) { + DBG_ERR("Missing path.real in JSON result\n"); + goto fail; + } + + ok = mds_add_result(slq, path); + if (!ok) { + DBG_ERR("error adding result for path: %s\n", path); + goto fail; + } + } + json_decref(root); + + s->from += hits; + slq->state = SLQ_STATE_RESULTS; + tevent_req_done(req); + return; + +fail: + if (root != NULL) { + json_decref(root); + } + slq->state = SLQ_STATE_ERROR; + tevent_req_error(req, EINVAL); + return; +} + +static int mds_es_search_recv(struct tevent_req *req) +{ + return tevent_req_simple_recv_unix(req); +} + +static bool mds_es_search_cont(struct sl_query *slq) +{ + struct sl_es_search *s = talloc_get_type_abort( + slq->backend_private, struct sl_es_search); + + SLQ_DEBUG(10, slq, "continue"); + DLIST_ADD_END(s->mds_es_ctx->searches, s); + return mds_es_next_search_trigger(s->mds_es_ctx); +} + +struct mdssvc_backend mdsscv_backend_es = { + .init = mdssvc_es_init, + .shutdown = mdssvc_es_shutdown, + .connect = mds_es_connect, + .search_start = mds_es_search, + .search_cont = mds_es_search_cont, +}; diff --git a/source3/rpc_server/mdssvc/mdssvc_es.h b/source3/rpc_server/mdssvc/mdssvc_es.h new file mode 100644 index 0000000..19797fa --- /dev/null +++ b/source3/rpc_server/mdssvc/mdssvc_es.h @@ -0,0 +1,108 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / HTTP/ES/JSON backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _MDSSVC_ES_H_ +#define _MDSSVC_ES_H_ + +#include <jansson.h> + +/* + * Some global state + */ +struct mdssvc_es_ctx { + struct mdssvc_ctx *mdssvc_ctx; + struct cli_credentials *creds; + json_t *mappings; +}; + +/* + * Per mdssvc RPC bind state + */ +struct mds_es_ctx { + /* + * Pointer to higher level mds_ctx + */ + struct mds_ctx *mds_ctx; + + /* + * Pointer to our global context + */ + struct mdssvc_es_ctx *mdssvc_es_ctx; + + /* + * The HTTP connection handle to the ES server + */ + struct http_conn *http_conn; + + /* + * List of pending searches + */ + struct sl_es_search *searches; +}; + +/* Per search request */ +struct sl_es_search { + /* + * List pointers + */ + struct sl_es_search *prev, *next; + + /* + * Search is being executed. Only the list head can be pending. + */ + bool pending; + + /* + * Shorthand to our tevent context + */ + struct tevent_context *ev; + + /* + * Pointer to the RPC connection ctx the request is using + */ + struct mds_es_ctx *mds_es_ctx; + + /* + * The upper mdssvc.c level query context + */ + struct sl_query *slq; + + /* + * Maximum number of results we process and total number of + * results of a query. + */ + size_t total; + size_t max; + + /* + * For paging results + */ + size_t from; + size_t size; + + /* + * The translated Es query + */ + char *es_query; +}; + +extern struct mdssvc_backend mdsscv_backend_es; + +#endif /* _MDSSVC_ES_H_ */ diff --git a/source3/rpc_server/mdssvc/mdssvc_noindex.c b/source3/rpc_server/mdssvc/mdssvc_noindex.c new file mode 100644 index 0000000..ff466af --- /dev/null +++ b/source3/rpc_server/mdssvc/mdssvc_noindex.c @@ -0,0 +1,57 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / noindex backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "mdssvc.h" + +static bool mdssvc_noindex_init(struct mdssvc_ctx *mdssvc_ctx) +{ + return true; +} + +static bool mdssvc_noindex_shutdown(struct mdssvc_ctx *mdssvc_ctx) +{ + return true; +} + +static bool mds_noindex_connect(struct mds_ctx *mds_ctx) +{ + return true; +} + +static bool mds_noindex_search_start(struct sl_query *slq) +{ + slq->state = SLQ_STATE_DONE; + return true; +} + +static bool mds_noindex_search_cont(struct sl_query *slq) +{ + slq->state = SLQ_STATE_DONE; + return true; +} + +struct mdssvc_backend mdsscv_backend_noindex = { + .init = mdssvc_noindex_init, + .shutdown = mdssvc_noindex_shutdown, + .connect = mds_noindex_connect, + .search_start = mds_noindex_search_start, + .search_cont = mds_noindex_search_cont, +}; diff --git a/source3/rpc_server/mdssvc/mdssvc_noindex.h b/source3/rpc_server/mdssvc/mdssvc_noindex.h new file mode 100644 index 0000000..750ee44 --- /dev/null +++ b/source3/rpc_server/mdssvc/mdssvc_noindex.h @@ -0,0 +1,26 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / noindex backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _MDSSVC_NOINDEX_H_ +#define _MDSSVC_NOINDEX_H_ + +extern struct mdssvc_backend mdsscv_backend_noindex; + +#endif /* _MDSSVC_VOID_H_ */ diff --git a/source3/rpc_server/mdssvc/mdssvc_tracker.c b/source3/rpc_server/mdssvc/mdssvc_tracker.c new file mode 100644 index 0000000..fab8bd2 --- /dev/null +++ b/source3/rpc_server/mdssvc/mdssvc_tracker.c @@ -0,0 +1,491 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / Tracker backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "lib/util/time_basic.h" +#include "mdssvc.h" +#include "mdssvc_tracker.h" +#include "lib/tevent_glib_glue.h" +#include "rpc_server/mdssvc/sparql_parser.tab.h" + +#undef DBGC_CLASS +#define DBGC_CLASS DBGC_RPC_SRV + +static struct mdssvc_tracker_ctx *mdssvc_tracker_ctx; + +/************************************************ + * Tracker async callbacks + ************************************************/ + +static void tracker_con_cb(GObject *object, + GAsyncResult *res, + gpointer user_data) +{ + struct mds_tracker_ctx *ctx = NULL; + TrackerSparqlConnection *tracker_con = NULL; + GError *error = NULL; + + tracker_con = tracker_sparql_connection_get_finish(res, &error); + if (error && g_error_matches(error, G_IO_ERROR, G_IO_ERROR_CANCELLED)) { + /* + * If the async request was cancelled, user_data will already be + * talloc_free'd, so we must be carefully checking for + * G_IO_ERROR_CANCELLED before using user_data. + */ + DBG_ERR("Tracker connection cancelled\n"); + g_error_free(error); + return; + } + /* + * Ok, we're not canclled, we can now safely use user_data. + */ + ctx = talloc_get_type_abort(user_data, struct mds_tracker_ctx); + ctx->async_pending = false; + /* + * Check error again, above we only checked for G_IO_ERROR_CANCELLED. + */ + if (error) { + DBG_ERR("Could not connect to Tracker: %s\n", error->message); + g_error_free(error); + return; + } + + ctx->tracker_con = tracker_con; + + DBG_DEBUG("connected to Tracker\n"); +} + +static void tracker_cursor_cb(GObject *object, + GAsyncResult *res, + gpointer user_data); + +static void tracker_query_cb(GObject *object, + GAsyncResult *res, + gpointer user_data) +{ + struct sl_tracker_query *tq = NULL; + struct sl_query *slq = NULL; + TrackerSparqlConnection *conn = NULL; + TrackerSparqlCursor *cursor = NULL; + GError *error = NULL; + + conn = TRACKER_SPARQL_CONNECTION(object); + + cursor = tracker_sparql_connection_query_finish(conn, res, &error); + /* + * If the async request was cancelled, user_data will already be + * talloc_free'd, so we must be carefully checking for + * G_IO_ERROR_CANCELLED before using user_data. + */ + if (error && g_error_matches(error, G_IO_ERROR, G_IO_ERROR_CANCELLED)) { + DBG_ERR("Tracker query cancelled\n"); + if (cursor != NULL) { + g_object_unref(cursor); + } + g_error_free(error); + return; + } + /* + * Ok, we're not cancelled, we can now safely use user_data. + */ + tq = talloc_get_type_abort(user_data, struct sl_tracker_query); + tq->async_pending = false; + slq = tq->slq; + /* + * Check error again, above we only checked for G_IO_ERROR_CANCELLED. + */ + if (error) { + DBG_ERR("Tracker query error: %s\n", error->message); + g_error_free(error); + slq->state = SLQ_STATE_ERROR; + return; + } + + tq->cursor = cursor; + slq->state = SLQ_STATE_RESULTS; + + tracker_sparql_cursor_next_async(tq->cursor, + tq->gcancellable, + tracker_cursor_cb, + tq); + tq->async_pending = true; +} + +static char *tracker_to_unix_path(TALLOC_CTX *mem_ctx, const char *uri) +{ + GFile *f = NULL; + char *path = NULL; + char *talloc_path = NULL; + + f = g_file_new_for_uri(uri); + if (f == NULL) { + return NULL; + } + + path = g_file_get_path(f); + g_object_unref(f); + + if (path == NULL) { + return NULL; + } + + talloc_path = talloc_strdup(mem_ctx, path); + g_free(path); + if (talloc_path == NULL) { + return NULL; + } + + return talloc_path; +} + +static void tracker_cursor_cb(GObject *object, + GAsyncResult *res, + gpointer user_data) +{ + TrackerSparqlCursor *cursor = NULL; + struct sl_tracker_query *tq = NULL; + struct sl_query *slq = NULL; + const gchar *uri = NULL; + GError *error = NULL; + char *path = NULL; + gboolean more_results; + bool ok; + + cursor = TRACKER_SPARQL_CURSOR(object); + more_results = tracker_sparql_cursor_next_finish(cursor, + res, + &error); + /* + * If the async request was cancelled, user_data will already be + * talloc_free'd, so we must be carefully checking for + * G_IO_ERROR_CANCELLED before using user_data. + */ + if (error && g_error_matches(error, G_IO_ERROR, G_IO_ERROR_CANCELLED)) { + g_error_free(error); + g_object_unref(cursor); + return; + } + /* + * Ok, we're not canclled, we can now safely use user_data. + */ + tq = talloc_get_type_abort(user_data, struct sl_tracker_query); + tq->async_pending = false; + slq = tq->slq; + /* + * Check error again, above we only checked for G_IO_ERROR_CANCELLED. + */ + if (error) { + DBG_ERR("Tracker cursor: %s\n", error->message); + g_error_free(error); + slq->state = SLQ_STATE_ERROR; + return; + } + + SLQ_DEBUG(10, slq, "results"); + + if (!more_results) { + slq->state = SLQ_STATE_DONE; + + g_object_unref(tq->cursor); + tq->cursor = NULL; + + g_object_unref(tq->gcancellable); + tq->gcancellable = NULL; + return; + } + + uri = tracker_sparql_cursor_get_string(tq->cursor, 0, NULL); + if (uri == NULL) { + DBG_ERR("error fetching Tracker URI\n"); + slq->state = SLQ_STATE_ERROR; + return; + } + + path = tracker_to_unix_path(slq->query_results, uri); + if (path == NULL) { + DBG_ERR("error converting Tracker URI to path: %s\n", uri); + slq->state = SLQ_STATE_ERROR; + return; + } + + ok = mds_add_result(slq, path); + if (!ok) { + DBG_ERR("error adding result for path: %s\n", uri); + slq->state = SLQ_STATE_ERROR; + return; + } + + if (slq->query_results->num_results >= MAX_SL_RESULTS) { + slq->state = SLQ_STATE_FULL; + SLQ_DEBUG(10, slq, "full"); + return; + } + + slq->state = SLQ_STATE_RESULTS; + SLQ_DEBUG(10, slq, "cursor next"); + + tracker_sparql_cursor_next_async(tq->cursor, + tq->gcancellable, + tracker_cursor_cb, + tq); + tq->async_pending = true; +} + +/* + * This gets called once, even if the backend is not configured by the user + */ +static bool mdssvc_tracker_init(struct mdssvc_ctx *mdssvc_ctx) +{ + if (mdssvc_tracker_ctx != NULL) { + return true; + } + +#if (GLIB_MAJOR_VERSION < 3) && (GLIB_MINOR_VERSION < 36) + g_type_init(); +#endif + + mdssvc_tracker_ctx = talloc_zero(mdssvc_ctx, struct mdssvc_tracker_ctx); + if (mdssvc_tracker_ctx == NULL) { + return false; + } + mdssvc_tracker_ctx->mdssvc_ctx = mdssvc_ctx; + + return true; +} + +/* + * This gets called per mdscmd_open / tcon. This runs initialisation code that + * should only run if the tracker backend is actually used. + */ +static bool mdssvc_tracker_prepare(void) +{ + if (mdssvc_tracker_ctx->gmain_ctx != NULL) { + /* + * Assuming everything is setup if gmain_ctx is. + */ + return true; + } + + mdssvc_tracker_ctx->gmain_ctx = g_main_context_new(); + if (mdssvc_tracker_ctx->gmain_ctx == NULL) { + DBG_ERR("error from g_main_context_new\n"); + TALLOC_FREE(mdssvc_tracker_ctx); + return false; + } + + mdssvc_tracker_ctx->glue = samba_tevent_glib_glue_create( + mdssvc_tracker_ctx, + mdssvc_tracker_ctx->mdssvc_ctx->ev_ctx, + mdssvc_tracker_ctx->gmain_ctx); + if (mdssvc_tracker_ctx->glue == NULL) { + DBG_ERR("samba_tevent_glib_glue_create failed\n"); + g_object_unref(mdssvc_tracker_ctx->gmain_ctx); + TALLOC_FREE(mdssvc_tracker_ctx); + return false; + } + + return true; +} + +static bool mdssvc_tracker_shutdown(struct mdssvc_ctx *mdssvc_ctx) +{ + samba_tevent_glib_glue_quit(mdssvc_tracker_ctx->glue); + TALLOC_FREE(mdssvc_tracker_ctx->glue); + + g_object_unref(mdssvc_tracker_ctx->gmain_ctx); + return true; +} + +static int mds_tracker_ctx_destructor(struct mds_tracker_ctx *ctx) +{ + /* + * Don't g_object_unref() the connection if there's an async request + * pending, it's used in the async callback and will be unreferenced + * there. + */ + if (ctx->async_pending) { + g_cancellable_cancel(ctx->gcancellable); + ctx->gcancellable = NULL; + return 0; + } + + if (ctx->tracker_con == NULL) { + return 0; + } + g_object_unref(ctx->tracker_con); + ctx->tracker_con = NULL; + + return 0; +} + +static bool mds_tracker_connect(struct mds_ctx *mds_ctx) +{ + struct mds_tracker_ctx *ctx = NULL; + bool ok; + + ok = mdssvc_tracker_prepare(); + if (!ok) { + return false; + } + + ctx = talloc_zero(mds_ctx, struct mds_tracker_ctx); + if (ctx == NULL) { + return false; + } + talloc_set_destructor(ctx, mds_tracker_ctx_destructor); + + ctx->mds_ctx = mds_ctx; + + ctx->gcancellable = g_cancellable_new(); + if (ctx->gcancellable == NULL) { + DBG_ERR("error from g_cancellable_new\n"); + TALLOC_FREE(ctx); + return false; + } + + tracker_sparql_connection_get_async(ctx->gcancellable, + tracker_con_cb, + ctx); + ctx->async_pending = true; + + mds_ctx->backend_private = ctx; + + return true; +} + +static int tq_destructor(struct sl_tracker_query *tq) +{ + /* + * Don't g_object_unref() the cursor if there's an async request + * pending, it's used in the async callback and will be unreferenced + * there. + */ + if (tq->async_pending) { + g_cancellable_cancel(tq->gcancellable); + tq->gcancellable = NULL; + return 0; + } + + if (tq->cursor == NULL) { + return 0; + } + g_object_unref(tq->cursor); + tq->cursor = NULL; + return 0; +} + +static bool mds_tracker_search_start(struct sl_query *slq) +{ + struct mds_tracker_ctx *tmds_ctx = talloc_get_type_abort( + slq->mds_ctx->backend_private, struct mds_tracker_ctx); + struct sl_tracker_query *tq = NULL; + char *escaped_scope = NULL; + bool ok; + + if (tmds_ctx->tracker_con == NULL) { + DBG_ERR("no connection to Tracker\n"); + return false; + } + + tq = talloc_zero(slq, struct sl_tracker_query); + if (tq == NULL) { + return false; + } + tq->slq = slq; + talloc_set_destructor(tq, tq_destructor); + + tq->gcancellable = g_cancellable_new(); + if (tq->gcancellable == NULL) { + DBG_ERR("g_cancellable_new() failed\n"); + goto error; + } + + escaped_scope = g_uri_escape_string( + slq->path_scope, + G_URI_RESERVED_CHARS_ALLOWED_IN_PATH, + TRUE); + if (escaped_scope == NULL) { + goto error; + } + + tq->path_scope = talloc_strdup(tq, escaped_scope); + g_free(escaped_scope); + escaped_scope = NULL; + if (tq->path_scope == NULL) { + goto error; + } + + slq->backend_private = tq; + + ok = map_spotlight_to_sparql_query(slq); + if (!ok) { + /* + * Two cases: + * + * 1) the query string is "false", the parser returns + * an error for that. We're supposed to return -1 + * here. + * + * 2) the parsing really failed, in that case we're + * probably supposed to return -1 too, this needs + * verification though + */ + goto error; + } + + DBG_DEBUG("SPARQL query: \"%s\"\n", tq->sparql_query); + + tracker_sparql_connection_query_async(tmds_ctx->tracker_con, + tq->sparql_query, + tq->gcancellable, + tracker_query_cb, + tq); + tq->async_pending = true; + + slq->state = SLQ_STATE_RUNNING; + return true; +error: + g_object_unref(tq->gcancellable); + TALLOC_FREE(tq); + slq->backend_private = NULL; + return false; +} + +static bool mds_tracker_search_cont(struct sl_query *slq) +{ + struct sl_tracker_query *tq = talloc_get_type_abort( + slq->backend_private, struct sl_tracker_query); + + tracker_sparql_cursor_next_async(tq->cursor, + tq->gcancellable, + tracker_cursor_cb, + tq); + tq->async_pending = true; + + return true; +} + +struct mdssvc_backend mdsscv_backend_tracker = { + .init = mdssvc_tracker_init, + .shutdown = mdssvc_tracker_shutdown, + .connect = mds_tracker_connect, + .search_start = mds_tracker_search_start, + .search_cont = mds_tracker_search_cont, +}; diff --git a/source3/rpc_server/mdssvc/mdssvc_tracker.h b/source3/rpc_server/mdssvc/mdssvc_tracker.h new file mode 100644 index 0000000..54a4a33 --- /dev/null +++ b/source3/rpc_server/mdssvc/mdssvc_tracker.h @@ -0,0 +1,62 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines / Tracker backend + + Copyright (C) Ralph Boehme 2019 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* allow building with --enable-developer */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-qual" +#include <gio/gio.h> +#include <tracker-sparql.h> +#pragma GCC diagnostic pop + +/* Global */ +struct mdssvc_tracker_ctx { + struct mdssvc_ctx *mdssvc_ctx; + GMainContext *gmain_ctx; + struct tevent_glib_glue *glue; +}; + +/* Per tree connect state */ +struct mds_tracker_ctx { + struct mds_ctx *mds_ctx; + GCancellable *gcancellable; + bool async_pending; + TrackerSparqlConnection *tracker_con; +}; + +/* Per query */ +struct sl_tracker_query { + struct sl_query *slq; + const char *path_scope; + const char *sparql_query; + + /* + * Notes on the lifetime of cursor: we hold a reference on the object + * and have to call g_object_unref(cursor) at the right place. This is + * either done in the talloc destructor on a struct sl_tracker_query + * talloc object when there are no tracker glib async requests + * running. Or in the glib callback after cancelling the glib async + * request. + */ + TrackerSparqlCursor *cursor; + GCancellable *gcancellable; + bool async_pending; +}; + +extern struct mdssvc_backend mdsscv_backend_tracker; diff --git a/source3/rpc_server/mdssvc/sparql_lexer.l b/source3/rpc_server/mdssvc/sparql_lexer.l new file mode 100644 index 0000000..b638350 --- /dev/null +++ b/source3/rpc_server/mdssvc/sparql_lexer.l @@ -0,0 +1,67 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines + + Copyright (C) Ralph Boehme 2012-2014 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +%{ +#include "includes.h" +#include "rpc_server/mdssvc/sparql_parser.tab.h" + +#define YY_NO_INPUT +%} + +%option nounput noyyalloc noyyrealloc prefix="mdsyy" + +ASC [a-zA-Z0-9_\*\:\-\.] +U [\x80-\xbf] +U2 [\xc2-\xdf] +U3 [\xe0-\xef] +U4 [\xf0-\xf4] + +UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} +UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} + +%% +InRange return FUNC_INRANGE; +\$time\.iso return DATE_ISO; +false {mdsyylval.bval = false; return BOOL;} +true {mdsyylval.bval = true; return BOOL;} +\" return QUOTE; +\( return OBRACE; +\) return CBRACE; +\&\& return AND; +\|\| return OR; +\=\= return EQUAL; +\!\= return UNEQUAL; +\= return EQUAL; +\< return LT; +\> return GT; +\, return COMMA; +{UANY}+ {mdsyylval.sval = talloc_strdup(talloc_tos(), mdsyytext); return WORD;} +[ \t\n] /* ignore */ +%% + +void *yyalloc(yy_size_t bytes) +{ + return SMB_MALLOC(bytes); +} + +void *yyrealloc(void *ptr, yy_size_t bytes) +{ + return SMB_REALLOC(ptr, bytes); +} diff --git a/source3/rpc_server/mdssvc/sparql_mapping.c b/source3/rpc_server/mdssvc/sparql_mapping.c new file mode 100644 index 0000000..c71c7a5 --- /dev/null +++ b/source3/rpc_server/mdssvc/sparql_mapping.c @@ -0,0 +1,378 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines + + Copyright (C) Ralph Boehme 2012-2014 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "replace.h" +#include "sparql_mapping.h" + +const struct sl_attr_map *sl_attr_map_by_spotlight(const char *sl_attr) +{ + static const struct sl_attr_map spotlight_sparql_attr_map[] = { + { + .spotlight_attr = "*", + .type = ssmt_fts, + .sparql_attr = "fts:match", + }, + + /* Filesystem metadata */ + { + .spotlight_attr = "kMDItemFSLabel", + .type = ssmt_num, + .sparql_attr = NULL, + }, + { + .spotlight_attr = "kMDItemDisplayName", + .type = ssmt_str, + .sparql_attr = "nfo:fileName", + }, + { + .spotlight_attr = "kMDItemFSName", + .type = ssmt_str, + .sparql_attr = "nfo:fileName", + }, + { + .spotlight_attr = "kMDItemFSContentChangeDate", + .type = ssmt_date, + .sparql_attr = "nfo:fileLastModified", + }, + { + .spotlight_attr = "kMDItemLastUsedDate", + .type = ssmt_date, + .sparql_attr = "nfo:fileLastAccessed", + }, + + /* Common metadata */ + { + .spotlight_attr = "kMDItemTextContent", + .type = ssmt_fts, + .sparql_attr = "fts:match", + }, + { + .spotlight_attr = "kMDItemContentCreationDate", + .type = ssmt_date, + .sparql_attr = "nie:contentCreated", + }, + { + .spotlight_attr = "kMDItemContentModificationDate", + .type = ssmt_date, + .sparql_attr = "nfo:fileLastModified", + }, + { + .spotlight_attr = "kMDItemAttributeChangeDate", + .type = ssmt_date, + .sparql_attr = "nfo:fileLastModified", + }, + { + .spotlight_attr = "kMDItemAuthors", + .type = ssmt_str, + .sparql_attr = "dc:creator", + }, + { + .spotlight_attr = "kMDItemCopyright", + .type = ssmt_str, + .sparql_attr = "nie:copyright", + }, + { + .spotlight_attr = "kMDItemCountry", + .type = ssmt_str, + .sparql_attr = "nco:country", + }, + { + .spotlight_attr = "kMDItemCreator", + .type = ssmt_str, + .sparql_attr = "dc:creator", + }, + { + .spotlight_attr = "kMDItemDurationSeconds", + .type = ssmt_num, + .sparql_attr = "nfo:duration", + }, + { + .spotlight_attr = "kMDItemNumberOfPages", + .type = ssmt_num, + .sparql_attr = "nfo:pageCount", + }, + { + .spotlight_attr = "kMDItemTitle", + .type = ssmt_str, + .sparql_attr = "nie:title", + }, + { + .spotlight_attr = "kMDItemCity", + .type = ssmt_str, + .sparql_attr = "nco:locality", + }, + { + .spotlight_attr = "kMDItemCoverage", + .type = ssmt_str, + .sparql_attr = "nco:locality", + }, + { + .spotlight_attr = "_kMDItemGroupId", + .type = ssmt_type, + .sparql_attr = NULL, + }, + { + .spotlight_attr = "kMDItemContentTypeTree", + .type = ssmt_type, + .sparql_attr = NULL, + }, + { + .spotlight_attr = "kMDItemContentType", + .type = ssmt_type, + .sparql_attr = NULL, + }, + + /* Image metadata */ + { + .spotlight_attr = "kMDItemPixelWidth", + .type = ssmt_num, + .sparql_attr = "nfo:width", + }, + { + .spotlight_attr = "kMDItemPixelHeight", + .type = ssmt_num, + .sparql_attr = "nfo:height", + }, + { + .spotlight_attr = "kMDItemColorSpace", + .type = ssmt_str, + .sparql_attr = "nexif:colorSpace", + }, + { + .spotlight_attr = "kMDItemBitsPerSample", + .type = ssmt_num, + .sparql_attr = "nfo:colorDepth", + }, + { + .spotlight_attr = "kMDItemFocalLength", + .type = ssmt_num, + .sparql_attr = "nmm:focalLength", + }, + { + .spotlight_attr = "kMDItemISOSpeed", + .type = ssmt_num, + .sparql_attr = "nmm:isoSpeed", + }, + { + .spotlight_attr = "kMDItemOrientation", + .type = ssmt_bool, + .sparql_attr = "nfo:orientation", + }, + { + .spotlight_attr = "kMDItemResolutionWidthDPI", + .type = ssmt_num, + .sparql_attr = "nfo:horizontalResolution", + }, + { + .spotlight_attr = "kMDItemResolutionHeightDPI", + .type = ssmt_num, + .sparql_attr = "nfo:verticalResolution", + }, + { + .spotlight_attr = "kMDItemExposureTimeSeconds", + .type = ssmt_num, + .sparql_attr = "nmm:exposureTime", + }, + + /* Audio metadata */ + { + .spotlight_attr = "kMDItemComposer", + .type = ssmt_str, + .sparql_attr = "nmm:composer", + }, + { + .spotlight_attr = "kMDItemMusicalGenre", + .type = ssmt_str, + .sparql_attr = "nfo:genre", + }, + }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(spotlight_sparql_attr_map); i++) { + const struct sl_attr_map *m = &spotlight_sparql_attr_map[i]; + int cmp; + + cmp = strcmp(m->spotlight_attr, sl_attr); + if (cmp == 0) { + return m; + } + } + + return NULL; +} + +const struct sl_type_map *sl_type_map_by_spotlight(const char *sl_type) +{ + static const struct sl_type_map spotlight_sparql_type_map[] = { + { + .spotlight_type = "1", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#Email", + }, + { + .spotlight_type = "2", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nco#Contact", + }, + { + .spotlight_type = "3", + .type = kMDTypeMapNotSup, + .sparql_type = NULL, /*PrefPane*/ + }, + { + .spotlight_type = "4", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Font", + }, + { + .spotlight_type = "5", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Bookmark", + }, + { + .spotlight_type = "6", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nco#Contact", + }, + { + .spotlight_type = "7", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Video", + }, + { + .spotlight_type = "8", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Executable", + }, + { + .spotlight_type = "9", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Folder", + }, + { + .spotlight_type = "10", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Audio", + }, + { + .spotlight_type = "11", + .type = kMDTypeMapMime, + .sparql_type = "application/pdf", + }, + { + .spotlight_type = "12", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Presentation", + }, + { + .spotlight_type = "13", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Image", + }, + { + .spotlight_type = "public.jpeg", + .type = kMDTypeMapMime, + .sparql_type = "image/jpeg", + }, + { + .spotlight_type = "public.tiff", + .type = kMDTypeMapMime, + .sparql_type = "image/tiff", + }, + { + .spotlight_type = "com.compuserve.gif", + .type = kMDTypeMapMime, + .sparql_type = "image/gif", + }, + { + .spotlight_type = "public.png", + .type = kMDTypeMapMime, + .sparql_type = "image/png", + }, + { + .spotlight_type = "com.microsoft.bmp", + .type = kMDTypeMapMime, + .sparql_type = "image/bmp", + }, + { + .spotlight_type = "public.content", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Document", + }, + { + .spotlight_type = "public.mp3", + .type = kMDTypeMapMime, + .sparql_type = "audio/mpeg", + }, + { + .spotlight_type = "public.mpeg-4-audio", + .type = kMDTypeMapMime, + .sparql_type = "audio/x-aac", + }, + { + .spotlight_type = "com.apple.application", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Software", + }, + { + .spotlight_type = "public.text", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#TextDocument", + }, + { + .spotlight_type = "public.plain-text", + .type = kMDTypeMapMime, + .sparql_type = "text/plain", + }, + { + .spotlight_type = "public.rtf", + .type = kMDTypeMapMime, + .sparql_type = "text/rtf", + }, + { + .spotlight_type = "public.html", + .type = kMDTypeMapMime, + .sparql_type = "text/html", + }, + { + .spotlight_type = "public.xml", + .type = kMDTypeMapMime, + .sparql_type = "text/xml", + }, + { + .spotlight_type = "public.source-code", + .type = kMDTypeMapRDF, + .sparql_type = "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#SourceCode", + }, + }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(spotlight_sparql_type_map); i++) { + const struct sl_type_map *m = &spotlight_sparql_type_map[i]; + int cmp; + + cmp = strcmp(m->spotlight_type, sl_type); + if (cmp == 0) { + return m; + } + } + + return NULL; +} diff --git a/source3/rpc_server/mdssvc/sparql_mapping.h b/source3/rpc_server/mdssvc/sparql_mapping.h new file mode 100644 index 0000000..496e19c --- /dev/null +++ b/source3/rpc_server/mdssvc/sparql_mapping.h @@ -0,0 +1,58 @@ +/* + Copyright (c) 2012 Ralph Boehme + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. +*/ + +#ifndef SPOTLIGHT_SPARQL_MAP_H +#define SPOTLIGHT_SPARQL_MAP_H + +enum ssm_type { + ssmt_bool, /* a boolean value that doesn't requires a SPARQL FILTER */ + ssmt_num, /* a numeric value that requires a SPARQL FILTER */ + ssmt_str, /* a string value that requieres a SPARQL FILTER */ + ssmt_fts, /* a string value that will be queried with SPARQL 'fts:match' */ + ssmt_date, /* date values are handled in a special map function map_daterange() */ + ssmt_type /* kMDItemContentType, requires special mapping */ +}; + +struct sl_attr_map { + const char *spotlight_attr; + enum ssm_type type; + const char *sparql_attr; +}; + +enum kMDTypeMap { + kMDTypeMapNotSup, /* not supported */ + kMDTypeMapRDF, /* query with rdf:type */ + kMDTypeMapMime /* query with nie:mimeType */ +}; + +struct sl_type_map { + /* + * MD query value of attributes '_kMDItemGroupId' and + * 'kMDItemContentTypeTree + */ + const char *spotlight_type; + + /* + * Whether SPARQL query must search attribute rdf:type or + * nie:mime_Type + */ + enum kMDTypeMap type; + + /* the SPARQL query match string */ + const char *sparql_type; +}; + +const struct sl_attr_map *sl_attr_map_by_spotlight(const char *sl_attr); +const struct sl_type_map *sl_type_map_by_spotlight(const char *sl_type); +#endif diff --git a/source3/rpc_server/mdssvc/sparql_parser.y b/source3/rpc_server/mdssvc/sparql_parser.y new file mode 100644 index 0000000..b059361 --- /dev/null +++ b/source3/rpc_server/mdssvc/sparql_parser.y @@ -0,0 +1,483 @@ +/* + Unix SMB/CIFS implementation. + Main metadata server / Spotlight routines + + Copyright (C) Ralph Boehme 2012-2014 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +%{ + #include "includes.h" + #include "rpc_server/mdssvc/mdssvc.h" + #include "rpc_server/mdssvc/mdssvc_tracker.h" + #include "rpc_server/mdssvc/sparql_parser.tab.h" + #include "rpc_server/mdssvc/sparql_mapping.h" + + #define YYMALLOC SMB_MALLOC + #define YYREALLOC SMB_REALLOC + + struct yy_buffer_state; + typedef struct yy_buffer_state *YY_BUFFER_STATE; + extern int mdsyylex (void); + extern void mdsyyerror (char const *); + extern void *mdsyyterminate(void); + extern YY_BUFFER_STATE mdsyy_scan_string( const char *str); + extern void mdsyy_delete_buffer ( YY_BUFFER_STATE buffer ); + + /* forward declarations */ + static const char *map_expr(const char *attr, char op, const char *val); + static const char *map_daterange(const char *dateattr, + time_t date1, time_t date2); + static time_t isodate2unix(const char *s); + + /* global vars, eg needed by the lexer */ + struct sparql_parser_state { + TALLOC_CTX *frame; + YY_BUFFER_STATE s; + char var; + const char *result; + } *global_sparql_parser_state; +%} + +%code provides { + #include <stdbool.h> + #include "rpc_server/mdssvc/mdssvc.h" + #define SPRAW_TIME_OFFSET 978307200 + extern int mdsyywrap(void); + extern bool map_spotlight_to_sparql_query(struct sl_query *slq); +} + +%union { + int ival; + const char *sval; + bool bval; + time_t tval; +} + +%name-prefix "mdsyy" +%expect 5 +%error-verbose + +%type <sval> match expr line function +%type <tval> date + +%token <sval> WORD +%token <bval> BOOL +%token FUNC_INRANGE +%token DATE_ISO +%token OBRACE CBRACE EQUAL UNEQUAL GT LT COMMA QUOTE +%left AND +%left OR +%% + +input: +/* empty */ +| input line +; + +line: +expr { + global_sparql_parser_state->result = $1; +} +; + +expr: +BOOL { + /* + * We can't properly handle these in expressions, fortunately this + * is probably only ever used by OS X as sole element in an + * expression ie "False" (when Finder window selected our share + * but no search string entered yet). Packet traces showed that OS + * X Spotlight server then returns a failure (ie -1) which is what + * we do here too by calling YYABORT. + */ + YYABORT; +} +/* + * We have "match OR match" and "expr OR expr", because the former is + * supposed to catch and coalesque expressions of the form + * + * MDSattribute1="hello"||MDSattribute2="hello" + * + * into a single SPARQL expression for the case where both + * MDSattribute1 and MDSattribute2 map to the same SPARQL attibute, + * which is eg the case for "*" and "kMDItemTextContent" which both + * map to SPARQL "fts:match". + */ + +| match OR match { + if (strcmp($1, $3) != 0) { + $$ = talloc_asprintf(talloc_tos(), "{ %s } UNION { %s }", $1, $3); + } else { + $$ = talloc_asprintf(talloc_tos(), "%s", $1); + } +} +| match { + $$ = $1; +} +| function { + $$ = $1; +} +| OBRACE expr CBRACE { + $$ = talloc_asprintf(talloc_tos(), "%s", $2); +} +| expr AND expr { + $$ = talloc_asprintf(talloc_tos(), "%s . %s", $1, $3); +} +| expr OR expr { + if (strcmp($1, $3) != 0) { + $$ = talloc_asprintf(talloc_tos(), "{ %s } UNION { %s }", $1, $3); + } else { + $$ = talloc_asprintf(talloc_tos(), "%s", $1); + } +} +; + +match: +WORD EQUAL QUOTE WORD QUOTE { + $$ = map_expr($1, '=', $4); + if ($$ == NULL) YYABORT; +} +| WORD UNEQUAL QUOTE WORD QUOTE { + $$ = map_expr($1, '!', $4); + if ($$ == NULL) YYABORT; +} +| WORD LT QUOTE WORD QUOTE { + $$ = map_expr($1, '<', $4); + if ($$ == NULL) YYABORT; +} +| WORD GT QUOTE WORD QUOTE { + $$ = map_expr($1, '>', $4); + if ($$ == NULL) YYABORT; +} +| WORD EQUAL QUOTE WORD QUOTE WORD { + $$ = map_expr($1, '=', $4); + if ($$ == NULL) YYABORT; +} +| WORD UNEQUAL QUOTE WORD QUOTE WORD { + $$ = map_expr($1, '!', $4); + if ($$ == NULL) YYABORT; +} +| WORD LT QUOTE WORD QUOTE WORD { + $$ = map_expr($1, '<', $4); + if ($$ == NULL) YYABORT; +} +| WORD GT QUOTE WORD QUOTE WORD { + $$ = map_expr($1, '>', $4); + if ($$ == NULL) YYABORT; +} +; + +function: +FUNC_INRANGE OBRACE WORD COMMA date COMMA date CBRACE { + $$ = map_daterange($3, $5, $7); + if ($$ == NULL) YYABORT; +} +; + +date: +DATE_ISO OBRACE WORD CBRACE {$$ = isodate2unix($3);} +| WORD {$$ = atoi($1) + SPRAW_TIME_OFFSET;} +; + +%% + +static time_t isodate2unix(const char *s) +{ + struct tm tm; + const char *p; + + p = strptime(s, "%Y-%m-%dT%H:%M:%SZ", &tm); + if (p == NULL) { + return (time_t)-1; + } + return mktime(&tm); +} + +static const char *map_daterange(const char *dateattr, + time_t date1, time_t date2) +{ + struct sparql_parser_state *s = global_sparql_parser_state; + int result = 0; + char *sparql = NULL; + const struct sl_attr_map *p; + struct tm *tmp; + char buf1[64], buf2[64]; + + if (s->var == 'z') { + return NULL; + } + + tmp = localtime(&date1); + if (tmp == NULL) { + return NULL; + } + result = strftime(buf1, sizeof(buf1), "%Y-%m-%dT%H:%M:%SZ", tmp); + if (result == 0) { + return NULL; + } + + tmp = localtime(&date2); + if (tmp == NULL) { + return NULL; + } + result = strftime(buf2, sizeof(buf2), "%Y-%m-%dT%H:%M:%SZ", tmp); + if (result == 0) { + return NULL; + } + + p = sl_attr_map_by_spotlight(dateattr); + if (p == NULL) { + return NULL; + } + + sparql = talloc_asprintf(talloc_tos(), + "?obj %s ?%c FILTER (?%c > '%s' && ?%c < '%s')", + p->sparql_attr, + s->var, + s->var, + buf1, + s->var, + buf2); + if (sparql == NULL) { + return NULL; + } + + s->var++; + return sparql; +} + +static char *map_type_search(const char *attr, char op, const char *val) +{ + char *result = NULL; + const char *sparqlAttr; + const struct sl_type_map *p; + + p = sl_type_map_by_spotlight(val); + if (p == NULL) { + return NULL; + } + + switch (p->type) { + case kMDTypeMapRDF: + sparqlAttr = "rdf:type"; + break; + case kMDTypeMapMime: + sparqlAttr = "nie:mimeType"; + break; + default: + return NULL; + } + + result = talloc_asprintf(talloc_tos(), "?obj %s '%s'", + sparqlAttr, + p->sparql_type); + if (result == NULL) { + return NULL; + } + + return result; +} + +static const char *map_expr(const char *attr, char op, const char *val) +{ + struct sparql_parser_state *s = global_sparql_parser_state; + int result = 0; + char *sparql = NULL; + const struct sl_attr_map *p; + time_t t; + struct tm *tmp; + char buf1[64]; + char *q; + const char *start; + + if (s->var == 'z') { + return NULL; + } + + p = sl_attr_map_by_spotlight(attr); + if (p == NULL) { + return NULL; + } + + if ((p->type != ssmt_type) && (p->sparql_attr == NULL)) { + yyerror("unsupported Spotlight attribute"); + return NULL; + } + + switch (p->type) { + case ssmt_bool: + sparql = talloc_asprintf(talloc_tos(), "?obj %s '%s'", + p->sparql_attr, val); + if (sparql == NULL) { + return NULL; + } + break; + + case ssmt_num: + sparql = talloc_asprintf(talloc_tos(), + "?obj %s ?%c FILTER(?%c %c%c '%s')", + p->sparql_attr, + s->var, + s->var, + op, + /* append '=' to '!' */ + op == '!' ? '=' : ' ', + val); + if (sparql == NULL) { + return NULL; + } + s->var++; + break; + + case ssmt_str: + q = talloc_strdup(talloc_tos(), ""); + if (q == NULL) { + return NULL; + } + start = val; + while (*val) { + if (*val != '*') { + val++; + continue; + } + if (val > start) { + q = talloc_strndup_append(q, start, val - start); + if (q == NULL) { + return NULL; + } + } + q = talloc_strdup_append(q, ".*"); + if (q == NULL) { + return NULL; + } + val++; + start = val; + } + if (val > start) { + q = talloc_strndup_append(q, start, val - start); + if (q == NULL) { + return NULL; + } + } + sparql = talloc_asprintf(talloc_tos(), + "?obj %s ?%c " + "FILTER(regex(?%c, '^%s$', 'i'))", + p->sparql_attr, + s->var, + s->var, + q); + TALLOC_FREE(q); + if (sparql == NULL) { + return NULL; + } + s->var++; + break; + + case ssmt_fts: + sparql = talloc_asprintf(talloc_tos(), "?obj %s '%s'", + p->sparql_attr, val); + if (sparql == NULL) { + return NULL; + } + break; + + case ssmt_date: + t = atoi(val) + SPRAW_TIME_OFFSET; + tmp = localtime(&t); + if (tmp == NULL) { + return NULL; + } + result = strftime(buf1, sizeof(buf1), + "%Y-%m-%dT%H:%M:%SZ", tmp); + if (result == 0) { + return NULL; + } + sparql = talloc_asprintf(talloc_tos(), + "?obj %s ?%c FILTER(?%c %c '%s')", + p->sparql_attr, + s->var, + s->var, + op, + buf1); + if (sparql == NULL) { + return NULL; + } + s->var++; + break; + + case ssmt_type: + sparql = map_type_search(attr, op, val); + if (sparql == NULL) { + return NULL; + } + break; + + default: + return NULL; + } + + return sparql; +} + +void mdsyyerror(const char *str) +{ + DEBUG(1, ("mdsyyerror: %s\n", str)); +} + +int mdsyywrap(void) +{ + return 1; +} + +/** + * Map a Spotlight RAW query string to a SPARQL query string + **/ +bool map_spotlight_to_sparql_query(struct sl_query *slq) +{ + struct sl_tracker_query *tq = talloc_get_type_abort( + slq->backend_private, struct sl_tracker_query); + struct sparql_parser_state s = { + .frame = talloc_stackframe(), + .var = 'a', + }; + int result; + + s.s = mdsyy_scan_string(slq->query_string); + if (s.s == NULL) { + TALLOC_FREE(s.frame); + return false; + } + global_sparql_parser_state = &s; + result = mdsyyparse(); + global_sparql_parser_state = NULL; + mdsyy_delete_buffer(s.s); + + if (result != 0) { + TALLOC_FREE(s.frame); + return false; + } + + tq->sparql_query = talloc_asprintf(slq, + "SELECT ?url WHERE { %s . ?obj nie:url ?url . " + "FILTER(tracker:uri-is-descendant('file://%s/', ?url)) }", + s.result, tq->path_scope); + TALLOC_FREE(s.frame); + if (tq->sparql_query == NULL) { + return false; + } + + return true; +} diff --git a/source3/rpc_server/mdssvc/sparql_parser_test.c b/source3/rpc_server/mdssvc/sparql_parser_test.c new file mode 100644 index 0000000..0a0f625 --- /dev/null +++ b/source3/rpc_server/mdssvc/sparql_parser_test.c @@ -0,0 +1,47 @@ +#include "includes.h" +#include "mdssvc.h" +#include "rpc_server/mdssvc/sparql_parser.tab.h" +#include "rpc_server/mdssvc/mdssvc_tracker.h" + +/* + * Examples: + * + * $ ./spotlight2sparql '_kMDItemGroupId=="11"' + * ... + * $ ./spotlight2sparql '*=="test*"cwd||kMDItemTextContent=="test*"cwd' + * ... + */ + +int main(int argc, char **argv) +{ + struct sl_tracker_query *tq = NULL; + bool ok; + struct sl_query *slq; + + if (argc != 2) { + printf("usage: %s QUERY\n", argv[0]); + return 1; + } + + slq = talloc_zero(NULL, struct sl_query); + if (slq == NULL) { + printf("talloc error\n"); + return 1; + } + + slq->query_string = argv[1]; + slq->path_scope = "/foo/bar"; + + tq = talloc_zero(slq, struct sl_tracker_query); + if (tq == NULL) { + printf("talloc error\n"); + return 1; + } + slq->backend_private = tq; + + ok = map_spotlight_to_sparql_query(slq); + printf("%s\n", ok ? tq->sparql_query : "*mapping failed*"); + + talloc_free(slq); + return ok ? 0 : 1; +} diff --git a/source3/rpc_server/mdssvc/srv_mdssvc_nt.c b/source3/rpc_server/mdssvc/srv_mdssvc_nt.c new file mode 100644 index 0000000..9a16624 --- /dev/null +++ b/source3/rpc_server/mdssvc/srv_mdssvc_nt.c @@ -0,0 +1,319 @@ +/* + * Unix SMB/CIFS implementation. + * RPC Pipe client / server routines for mdssvc + * Copyright (C) Ralph Boehme 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "includes.h" +#include "messages.h" +#include "ntdomain.h" +#include "rpc_server/rpc_server.h" +#include "rpc_server/rpc_config.h" +#include "rpc_server/mdssvc/srv_mdssvc_nt.h" +#include "libcli/security/security_token.h" +#include "libcli/security/dom_sid.h" +#include "gen_ndr/auth.h" +#include "mdssvc.h" +#include "smbd/globals.h" + +#include "librpc/rpc/dcesrv_core.h" +#include "librpc/gen_ndr/ndr_mdssvc.h" +#include "librpc/gen_ndr/ndr_mdssvc_scompat.h" +#include "lib/global_contexts.h" + +#undef DBGC_CLASS +#define DBGC_CLASS DBGC_RPC_SRV + +static NTSTATUS create_mdssvc_policy_handle(TALLOC_CTX *mem_ctx, + struct pipes_struct *p, + int snum, + const char *sharename, + const char *path, + struct policy_handle *handle) +{ + struct dcesrv_call_state *dce_call = p->dce_call; + struct auth_session_info *session_info = + dcesrv_call_session_info(dce_call); + struct mds_ctx *mds_ctx; + NTSTATUS status; + + ZERO_STRUCTP(handle); + + status = mds_init_ctx(mem_ctx, + messaging_tevent_context(p->msg_ctx), + p->msg_ctx, + session_info, + snum, + sharename, + path, + &mds_ctx); + if (!NT_STATUS_IS_OK(status)) { + DBG_DEBUG("mds_init_ctx() path [%s] failed: %s\n", + path, nt_errstr(status)); + return status; + } + + if (!create_policy_hnd(p, handle, 0, mds_ctx)) { + talloc_free(mds_ctx); + ZERO_STRUCTP(handle); + return NT_STATUS_NO_MEMORY; + } + + return NT_STATUS_OK; +} + +void _mdssvc_open(struct pipes_struct *p, struct mdssvc_open *r) +{ + const struct loadparm_substitution *lp_sub = + loadparm_s3_global_substitution(); + int snum; + char *outpath = discard_const_p(char, r->out.share_path); + char *fake_path = NULL; + char *path; + NTSTATUS status; + + DBG_DEBUG("[%s]\n", r->in.share_name); + + *r->out.device_id = *r->in.device_id; + *r->out.unkn2 = *r->in.unkn2; + *r->out.unkn3 = *r->in.unkn3; + outpath[0] = '\0'; + + snum = lp_servicenumber(r->in.share_name); + if (!VALID_SNUM(snum)) { + return; + } + + path = lp_path(talloc_tos(), lp_sub, snum); + if (path == NULL) { + DBG_ERR("Couldn't create path for %s\n", + r->in.share_name); + p->fault_state = DCERPC_FAULT_CANT_PERFORM; + return; + } + + fake_path = talloc_asprintf(p->mem_ctx, "/%s", r->in.share_name); + if (fake_path == NULL) { + DBG_ERR("Couldn't create fake share path for %s\n", + r->in.share_name); + talloc_free(path); + p->fault_state = DCERPC_FAULT_CANT_PERFORM; + return; + } + + status = create_mdssvc_policy_handle(p->mem_ctx, p, + snum, + r->in.share_name, + path, + r->out.handle); + if (NT_STATUS_EQUAL(status, NT_STATUS_WRONG_VOLUME)) { + ZERO_STRUCTP(r->out.handle); + talloc_free(path); + talloc_free(fake_path); + return; + } + if (!NT_STATUS_IS_OK(status)) { + DBG_ERR("Couldn't create policy handle for %s\n", + r->in.share_name); + talloc_free(path); + talloc_free(fake_path); + p->fault_state = DCERPC_FAULT_CANT_PERFORM; + return; + } + + strlcpy(outpath, fake_path, 1024); + talloc_free(path); + talloc_free(fake_path); + return; +} + +void _mdssvc_unknown1(struct pipes_struct *p, struct mdssvc_unknown1 *r) +{ + struct mds_ctx *mds_ctx; + NTSTATUS status; + + mds_ctx = find_policy_by_hnd(p, + r->in.handle, + DCESRV_HANDLE_ANY, + struct mds_ctx, + &status); + if (!NT_STATUS_IS_OK(status)) { + if (ndr_policy_handle_empty(r->in.handle)) { + p->fault_state = 0; + } else { + p->fault_state = DCERPC_NCA_S_PROTO_ERROR; + } + *r->out.status = 0; + *r->out.flags = 0; + *r->out.unkn7 = 0; + return; + } + + DEBUG(10, ("%s: path: %s\n", __func__, mds_ctx->spath)); + + *r->out.status = 0; + *r->out.flags = 0x6b000001; + *r->out.unkn7 = 0; + + return; +} + +void _mdssvc_cmd(struct pipes_struct *p, struct mdssvc_cmd *r) +{ + struct dcesrv_call_state *dce_call = p->dce_call; + struct auth_session_info *session_info = + dcesrv_call_session_info(dce_call); + bool ok; + struct mds_ctx *mds_ctx; + NTSTATUS status; + + mds_ctx = find_policy_by_hnd(p, + r->in.handle, + DCESRV_HANDLE_ANY, + struct mds_ctx, + &status); + if (!NT_STATUS_IS_OK(status)) { + if (ndr_policy_handle_empty(r->in.handle)) { + p->fault_state = 0; + } else { + p->fault_state = DCERPC_NCA_S_PROTO_ERROR; + } + r->out.response_blob->size = 0; + *r->out.fragment = 0; + *r->out.unkn9 = 0; + return; + } + + DEBUG(10, ("%s: path: %s\n", __func__, mds_ctx->spath)); + + ok = security_token_is_sid(session_info->security_token, + &mds_ctx->sid); + if (!ok) { + struct dom_sid_buf buf; + DBG_WARNING("not the same sid: %s\n", + dom_sid_str_buf(&mds_ctx->sid, &buf)); + p->fault_state = DCERPC_FAULT_ACCESS_DENIED; + return; + } + + if (geteuid() != mds_ctx->uid) { + DEBUG(0, ("uid mismatch: %d/%d\n", geteuid(), mds_ctx->uid)); + smb_panic("uid mismatch"); + } + + if (r->in.request_blob.size > MAX_SL_FRAGMENT_SIZE) { + DEBUG(1, ("%s: request size too large\n", __func__)); + p->fault_state = DCERPC_FAULT_CANT_PERFORM; + return; + } + + if (r->in.request_blob.length > MAX_SL_FRAGMENT_SIZE) { + DEBUG(1, ("%s: request length too large\n", __func__)); + p->fault_state = DCERPC_FAULT_CANT_PERFORM; + return; + } + + if (r->in.max_fragment_size1 > MAX_SL_FRAGMENT_SIZE) { + DEBUG(1, ("%s: request fragment size too large: %u\n", + __func__, (unsigned)r->in.max_fragment_size1)); + p->fault_state = DCERPC_FAULT_CANT_PERFORM; + return; + } + + /* We currently don't use fragmentation at the mdssvc RPC layer */ + *r->out.fragment = 0; + + ok = mds_dispatch(mds_ctx, + &r->in.request_blob, + r->out.response_blob, + r->in.max_fragment_size1); + if (ok) { + *r->out.unkn9 = 0; + } else { + /* FIXME: just interpolating from AFP, needs verification */ + *r->out.unkn9 = UINT32_MAX; + } + + return; +} + +void _mdssvc_close(struct pipes_struct *p, struct mdssvc_close *r) +{ + struct mds_ctx *mds_ctx; + NTSTATUS status; + + mds_ctx = find_policy_by_hnd(p, + r->in.in_handle, + DCESRV_HANDLE_ANY, + struct mds_ctx, + &status); + if (!NT_STATUS_IS_OK(status)) { + DBG_WARNING("invalid handle\n"); + if (ndr_policy_handle_empty(r->in.in_handle)) { + p->fault_state = 0; + } else { + p->fault_state = DCERPC_NCA_S_PROTO_ERROR; + } + return; + } + + DBG_DEBUG("Close mdssvc handle for path: %s\n", mds_ctx->spath); + TALLOC_FREE(mds_ctx); + + *r->out.out_handle = *r->in.in_handle; + close_policy_hnd(p, r->in.in_handle); + + *r->out.status = 0; + + return; +} + +static NTSTATUS mdssvc__op_init_server(struct dcesrv_context *dce_ctx, + const struct dcesrv_endpoint_server *ep_server); + +static NTSTATUS mdssvc__op_shutdown_server(struct dcesrv_context *dce_ctx, + const struct dcesrv_endpoint_server *ep_server); + +#define DCESRV_INTERFACE_MDSSVC_INIT_SERVER \ + mdssvc_init_server + +#define DCESRV_INTERFACE_MDSSVC_SHUTDOWN_SERVER \ + mdssvc_shutdown_server + +static NTSTATUS mdssvc_init_server(struct dcesrv_context *dce_ctx, + const struct dcesrv_endpoint_server *ep_server) +{ + struct messaging_context *msg_ctx = global_messaging_context(); + bool ok; + + ok = mds_init(msg_ctx); + if (!ok) { + return NT_STATUS_UNSUCCESSFUL; + } + + return mdssvc__op_init_server(dce_ctx, ep_server); +} + +static NTSTATUS mdssvc_shutdown_server(struct dcesrv_context *dce_ctx, + const struct dcesrv_endpoint_server *ep_server) +{ + mds_shutdown(); + + return mdssvc__op_shutdown_server(dce_ctx, ep_server); +} + +/* include the generated boilerplate */ +#include "librpc/gen_ndr/ndr_mdssvc_scompat.c" diff --git a/source3/rpc_server/mdssvc/srv_mdssvc_nt.h b/source3/rpc_server/mdssvc/srv_mdssvc_nt.h new file mode 100644 index 0000000..8b78f5e --- /dev/null +++ b/source3/rpc_server/mdssvc/srv_mdssvc_nt.h @@ -0,0 +1,27 @@ +/* + * Unix SMB/CIFS implementation. + * MDSSVC RPC pipe initialisation routines + * + * Copyright (C) Ralph Boehme 2014 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _SRV_MDSSVC_NT_H +#define _SRV_MDSSVC_NT_H + +bool init_service_mdssvc(struct messaging_context *msg_ctx); +bool shutdown_service_mdssvc(void); + +#endif /* _SRV_MDSSVC_NT_H */ diff --git a/source3/rpc_server/mdssvc/test_mdsparser_es.c b/source3/rpc_server/mdssvc/test_mdsparser_es.c new file mode 100644 index 0000000..af2b8e6 --- /dev/null +++ b/source3/rpc_server/mdssvc/test_mdsparser_es.c @@ -0,0 +1,302 @@ +/* + * Unix SMB/CIFS implementation. + * Copyright (C) Ralph Boehme 2019 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "includes.h" +#include <setjmp.h> +#include <cmocka.h> +#include <jansson.h> +#include <talloc.h> +#include "lib/cmdline/cmdline.h" +#include "libcli/util/ntstatus.h" +#include "lib/util/samba_util.h" +#include "lib/torture/torture.h" +#include "lib/param/param.h" +#include "rpc_server/mdssvc/es_parser.tab.h" + +#define PATH_QUERY_SUBEXPR \ + " AND path.real.fulltext:\\\"/foo/bar\\\"" + +static struct { + const char *mds; + const char *es; +} map[] = { + { + "*==\"samba\"", + "(samba)" PATH_QUERY_SUBEXPR + }, { + "kMDItemTextContent==\"samba\"", + "(content:samba)" PATH_QUERY_SUBEXPR + }, { + "_kMDItemGroupId==\"11\"", + "(file.content_type:(application\\\\/pdf))" PATH_QUERY_SUBEXPR + }, { + "kMDItemContentType==\"1\"", + "(file.content_type:(message\\\\/rfc822))" PATH_QUERY_SUBEXPR + }, { + "kMDItemContentType==\"public.content\"", + "(file.content_type:(message\\\\/rfc822 application\\\\/pdf application\\\\/vnd.oasis.opendocument.presentation image\\\\/* text\\\\/*))" PATH_QUERY_SUBEXPR + }, { + "kMDItemContentTypeTree==\"1\"", + "(file.content_type:(message\\\\/rfc822))" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSContentChangeDate==$time.iso(2018-10-01T10:00:00Z)", + "(file.last_modified:2018\\\\-10\\\\-01T10\\\\:00\\\\:00Z)" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSContentChangeDate==\"1\"", + "(file.last_modified:2001\\\\-01\\\\-01T00\\\\:00\\\\:01Z)" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSCreationDate==\"1\"", + "(file.created:2001\\\\-01\\\\-01T00\\\\:00\\\\:01Z)" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSName==\"samba*\"", + "(file.filename:samba*)" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSOwnerGroupID==\"0\"", + "(attributes.owner:0)" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSOwnerUserID==\"0\"", + "(attributes.group:0)" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSSize==\"1\"", + "(file.filesize:1)" PATH_QUERY_SUBEXPR + }, { + "kMDItemPath==\"/foo/bar\"", + "(path.real:\\\\/foo\\\\/bar)" PATH_QUERY_SUBEXPR + }, { + "kMDItemAttributeChangeDate==\"1\"", + "(file.last_modified:2001\\\\-01\\\\-01T00\\\\:00\\\\:01Z)" PATH_QUERY_SUBEXPR + }, { + "kMDItemAuthors==\"Chouka\"", + "(meta.author:Chouka)" PATH_QUERY_SUBEXPR + }, { + "kMDItemContentCreationDate==\"1\"", + "(file.created:2001\\\\-01\\\\-01T00\\\\:00\\\\:01Z)" PATH_QUERY_SUBEXPR + }, { + "kMDItemContentModificationDate==\"1\"", + "(file.last_modified:2001\\\\-01\\\\-01T00\\\\:00\\\\:01Z)" PATH_QUERY_SUBEXPR + }, { + "kMDItemCreator==\"Chouka\"", + "(meta.raw.creator:Chouka)" PATH_QUERY_SUBEXPR + }, { + "kMDItemDescription==\"Dog\"", + "(meta.raw.description:Dog)" PATH_QUERY_SUBEXPR + }, { + "kMDItemDisplayName==\"Samba\"", + "(file.filename:Samba)" PATH_QUERY_SUBEXPR + }, { + "kMDItemDurationSeconds==\"1\"", + "(meta.raw.xmpDM\\\\:duration:1)" PATH_QUERY_SUBEXPR + }, { + "kMDItemNumberOfPages==\"1\"", + "(meta.raw.xmpTPg\\\\:NPages:1)" PATH_QUERY_SUBEXPR + }, { + "kMDItemTitle==\"Samba\"", + "(meta.title:Samba)" PATH_QUERY_SUBEXPR + }, { + "kMDItemAlbum==\"Red Roses for Me\"", + "(meta.raw.xmpDM\\\\:album:Red\\\\ Roses\\\\ for\\\\ Me)" PATH_QUERY_SUBEXPR + }, { + "kMDItemBitsPerSample==\"1\"", + "(meta.raw.tiff\\\\:BitsPerSample:1)" PATH_QUERY_SUBEXPR + }, { + "kMDItemPixelHeight==\"1\"", + "(meta.raw.Image\\\\ Height:1)" PATH_QUERY_SUBEXPR + }, { + "kMDItemPixelWidth==\"1\"", + "(meta.raw.Image\\\\ Width:1)" PATH_QUERY_SUBEXPR + }, { + "kMDItemResolutionHeightDPI==\"72\"", + "(meta.raw.Y\\\\ Resolution:72)" PATH_QUERY_SUBEXPR + }, { + "kMDItemResolutionWidthDPI==\"72\"", + "(meta.raw.X\\\\ Resolution:72)" PATH_QUERY_SUBEXPR + },{ + "*!=\"samba\"", + "((NOT samba))" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSSize!=\"1\"", + "((NOT file.filesize:1))" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSSize>\"1\"", + "(file.filesize:{1 TO *})" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSSize<\"1\"", + "(file.filesize:{* TO 1})" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSCreationDate!=\"1\"", + "((NOT file.created:2001\\\\-01\\\\-01T00\\\\:00\\\\:01Z))" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSCreationDate>\"1\"", + "(file.created:{2001\\\\-01\\\\-01T00\\\\:00\\\\:01Z TO *})" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSCreationDate<\"1\"", + "(file.created:{* TO 2001\\\\-01\\\\-01T00\\\\:00\\\\:01Z})" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSName==\"Samba\"||kMDItemTextContent==\"Samba\"", + "(file.filename:Samba OR content:Samba)" PATH_QUERY_SUBEXPR + }, { + "kMDItemFSName==\"Samba\"&&kMDItemTextContent==\"Samba\"", + "((file.filename:Samba) AND (content:Samba))" PATH_QUERY_SUBEXPR + }, { + "InRange(kMDItemFSCreationDate,1,2)", + "(file.created:[2001\\\\-01\\\\-01T00\\\\:00\\\\:01Z TO 2001\\\\-01\\\\-01T00\\\\:00\\\\:02Z])" PATH_QUERY_SUBEXPR + }, { + "InRange(kMDItemFSSize,1,2)", + "(file.filesize:[1 TO 2])" PATH_QUERY_SUBEXPR + } +}; + +static struct { + const char *mds; + const char *es; +} map_ignore_failures[] = { + { + "*==\"Samba\"||foo==\"bar\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"&&foo==\"bar\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"||kMDItemContentType==\"666\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"&&kMDItemContentType==\"666\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"||foo==\"bar\"||kMDItemContentType==\"666\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"&&foo==\"bar\"&&kMDItemContentType==\"666\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "foo==\"bar\"||kMDItemContentType==\"666\"||*==\"Samba\"||x!=\"6\"", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"||InRange(foo,1,2)", + "(Samba)" PATH_QUERY_SUBEXPR + }, { + "*==\"Samba\"||foo==$time.iso(2018-10-01T10:00:00Z)", + "(Samba)" PATH_QUERY_SUBEXPR + } +}; + +static void test_mdsparser_es(void **state) +{ + TALLOC_CTX *frame = talloc_stackframe(); + const char *path_scope = "/foo/bar"; + char *es_query = NULL; + const char *path = NULL; + json_t *mappings = NULL; + json_error_t json_error; + int i; + bool ok; + + path = lp_parm_const_string(GLOBAL_SECTION_SNUM, + "elasticsearch", + "mappings", + NULL); + assert_non_null(path); + + mappings = json_load_file(path, 0, &json_error); + assert_non_null(mappings); + + for (i = 0; i < ARRAY_SIZE(map); i++) { + DBG_DEBUG("Mapping: %s\n", map[i].mds); + ok = map_spotlight_to_es_query(frame, + mappings, + path_scope, + map[i].mds, + &es_query); + assert_true(ok); + assert_string_equal(es_query, map[i].es); + } + + if (!lp_parm_bool(GLOBAL_SECTION_SNUM, + "elasticsearch", + "test mapping failures", + false)) + { + goto done; + } + + for (i = 0; i < ARRAY_SIZE(map_ignore_failures); i++) { + DBG_DEBUG("Mapping: %s\n", map_ignore_failures[i].mds); + ok = map_spotlight_to_es_query(frame, + mappings, + path_scope, + map_ignore_failures[i].mds, + &es_query); + assert_true(ok); + assert_string_equal(es_query, map_ignore_failures[i].es); + } + +done: + json_decref(mappings); + TALLOC_FREE(frame); +} + +int main(int argc, const char *argv[]) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_mdsparser_es), + }; + struct poptOption long_options[] = { + POPT_AUTOHELP + POPT_COMMON_SAMBA + POPT_TABLEEND + }; + poptContext pc; + int opt; + bool ok; + TALLOC_CTX *frame = talloc_stackframe(); + + smb_init_locale(); + + ok = samba_cmdline_init(frame, + SAMBA_CMDLINE_CONFIG_CLIENT, + false /* require_smbconf */); + if (!ok) { + DBG_ERR("Failed to init cmdline parser!\n"); + TALLOC_FREE(frame); + exit(1); + } + lp_set_cmdline("log level", "1"); + + pc = samba_popt_get_context(getprogname(), + argc, + argv, + long_options, + 0); + if (pc == NULL) { + DBG_ERR("Failed to setup popt context!\n"); + TALLOC_FREE(frame); + exit(1); + } + + while ((opt = poptGetNextOpt(pc)) != -1) { + switch(opt) { + default: + fprintf(stderr, "Unknown Option: %c\n", opt); + exit(1); + } + } + + cmocka_set_message_output(CM_OUTPUT_SUBUNIT); + + return cmocka_run_group_tests(tests, NULL, NULL); +} |