diff options
Diffstat (limited to 'src/plugins/fts-solr/solr-response.c')
-rw-r--r-- | src/plugins/fts-solr/solr-response.c | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/src/plugins/fts-solr/solr-response.c b/src/plugins/fts-solr/solr-response.c new file mode 100644 index 0000000..65a6a1f --- /dev/null +++ b/src/plugins/fts-solr/solr-response.c @@ -0,0 +1,372 @@ +/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "hash.h" +#include "str.h" +#include "istream.h" +#include "solr-response.h" + +#include <expat.h> + +#define MAX_VALUE_LEN 2048 + +enum solr_xml_response_state { + SOLR_XML_RESPONSE_STATE_ROOT, + SOLR_XML_RESPONSE_STATE_RESPONSE, + SOLR_XML_RESPONSE_STATE_RESULT, + SOLR_XML_RESPONSE_STATE_DOC, + SOLR_XML_RESPONSE_STATE_CONTENT +}; + +enum solr_xml_content_state { + SOLR_XML_CONTENT_STATE_NONE = 0, + SOLR_XML_CONTENT_STATE_UID, + SOLR_XML_CONTENT_STATE_SCORE, + SOLR_XML_CONTENT_STATE_MAILBOX, + SOLR_XML_CONTENT_STATE_NAMESPACE, + SOLR_XML_CONTENT_STATE_UIDVALIDITY, + SOLR_XML_CONTENT_STATE_ERROR +}; + +struct solr_response_parser { + XML_Parser xml_parser; + struct istream *input; + + enum solr_xml_response_state state; + enum solr_xml_content_state content_state; + int depth; + string_t *buffer; + + uint32_t uid, uidvalidity; + float score; + char *mailbox, *ns; + + pool_t result_pool; + /* box_id -> solr_result */ + HASH_TABLE(char *, struct solr_result *) mailboxes; + ARRAY(struct solr_result *) results; + + bool xml_failed:1; +}; + +static int +solr_xml_parse(struct solr_response_parser *parser, + const void *data, size_t size, bool done) +{ + enum XML_Error err; + int line, col; + + if (parser->xml_failed) + return -1; + + if (XML_Parse(parser->xml_parser, data, size, done ? 1 : 0) != 0) + return 0; + + err = XML_GetErrorCode(parser->xml_parser); + if (err != XML_ERROR_FINISHED) { + line = XML_GetCurrentLineNumber(parser->xml_parser); + col = XML_GetCurrentColumnNumber(parser->xml_parser); + i_error("fts_solr: Invalid XML input at %d:%d: %s " + "(near: %.*s)", line, col, XML_ErrorString(err), + (int)I_MIN(size, 128), (const char *)data); + parser->xml_failed = TRUE; + return -1; + } + return 0; +} + +static const char *attrs_get_name(const char **attrs) +{ + for (; *attrs != NULL; attrs += 2) { + if (strcmp(attrs[0], "name") == 0) + return attrs[1]; + } + return ""; +} + +static void +solr_lookup_xml_start(void *context, const char *name, const char **attrs) +{ + struct solr_response_parser *parser = context; + const char *name_attr; + + i_assert(parser->depth >= (int)parser->state); + + parser->depth++; + if (parser->depth - 1 > (int)parser->state) { + /* skipping over unwanted elements */ + return; + } + + str_truncate(parser->buffer, 0); + + /* response -> result -> doc */ + switch (parser->state) { + case SOLR_XML_RESPONSE_STATE_ROOT: + if (strcmp(name, "response") == 0) + parser->state++; + break; + case SOLR_XML_RESPONSE_STATE_RESPONSE: + if (strcmp(name, "result") == 0) + parser->state++; + break; + case SOLR_XML_RESPONSE_STATE_RESULT: + if (strcmp(name, "doc") == 0) { + parser->state++; + parser->uid = 0; + parser->score = 0; + i_free_and_null(parser->mailbox); + i_free_and_null(parser->ns); + parser->uidvalidity = 0; + } + break; + case SOLR_XML_RESPONSE_STATE_DOC: + name_attr = attrs_get_name(attrs); + if (strcmp(name_attr, "uid") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_UID; + else if (strcmp(name_attr, "score") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_SCORE; + else if (strcmp(name_attr, "box") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_MAILBOX; + else if (strcmp(name_attr, "ns") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_NAMESPACE; + else if (strcmp(name_attr, "uidv") == 0) + parser->content_state = SOLR_XML_CONTENT_STATE_UIDVALIDITY; + else + break; + parser->state++; + break; + case SOLR_XML_RESPONSE_STATE_CONTENT: + break; + } +} + +static struct solr_result * +solr_result_get(struct solr_response_parser *parser, const char *box_id) +{ + struct solr_result *result; + char *box_id_dup; + + result = hash_table_lookup(parser->mailboxes, box_id); + if (result != NULL) + return result; + + box_id_dup = p_strdup(parser->result_pool, box_id); + result = p_new(parser->result_pool, struct solr_result, 1); + result->box_id = box_id_dup; + p_array_init(&result->uids, parser->result_pool, 32); + p_array_init(&result->scores, parser->result_pool, 32); + hash_table_insert(parser->mailboxes, box_id_dup, result); + array_push_back(&parser->results, &result); + return result; +} + +static int solr_lookup_add_doc(struct solr_response_parser *parser) +{ + struct fts_score_map *score; + struct solr_result *result; + const char *box_id; + + if (parser->uid == 0) { + i_error("fts_solr: uid missing from inside doc"); + return -1; + } + + if (parser->mailbox == NULL) { + /* looking up from a single mailbox only */ + box_id = ""; + } else if (parser->uidvalidity != 0) { + /* old style lookup */ + string_t *str = t_str_new(64); + str_printfa(str, "%u\001", parser->uidvalidity); + str_append(str, parser->mailbox); + if (parser->ns != NULL) + str_printfa(str, "\001%s", parser->ns); + box_id = str_c(str); + } else { + /* new style lookup */ + box_id = parser->mailbox; + } + result = solr_result_get(parser, box_id); + + if (seq_range_array_add(&result->uids, parser->uid)) { + /* duplicate result */ + } else if (parser->score != 0) { + score = array_append_space(&result->scores); + score->uid = parser->uid; + score->score = parser->score; + } + return 0; +} + +static void solr_lookup_xml_end(void *context, const char *name ATTR_UNUSED) +{ + struct solr_response_parser *parser = context; + string_t *buf = parser->buffer; + int ret; + + switch (parser->content_state) { + case SOLR_XML_CONTENT_STATE_NONE: + break; + case SOLR_XML_CONTENT_STATE_UID: + if (str_to_uint32(str_c(buf), &parser->uid) < 0 || + parser->uid == 0) { + i_error("fts_solr: received invalid uid '%s'", + str_c(buf)); + parser->content_state = SOLR_XML_CONTENT_STATE_ERROR; + } + break; + case SOLR_XML_CONTENT_STATE_SCORE: + parser->score = strtod(str_c(buf), NULL); + break; + case SOLR_XML_CONTENT_STATE_MAILBOX: + parser->mailbox = i_strdup(str_c(buf)); + break; + case SOLR_XML_CONTENT_STATE_NAMESPACE: + parser->ns = i_strdup(str_c(buf)); + break; + case SOLR_XML_CONTENT_STATE_UIDVALIDITY: + if (str_to_uint32(str_c(buf), &parser->uidvalidity) < 0) + i_error("fts_solr: received invalid uidvalidity"); + break; + case SOLR_XML_CONTENT_STATE_ERROR: + return; + } + + i_assert(parser->depth >= (int)parser->state); + + if (parser->state == SOLR_XML_RESPONSE_STATE_CONTENT && + parser->content_state == SOLR_XML_CONTENT_STATE_MAILBOX && + parser->mailbox == NULL) { + /* mailbox is namespace prefix */ + parser->mailbox = i_strdup(""); + } + + if (parser->depth == (int)parser->state) { + ret = 0; + if (parser->state == SOLR_XML_RESPONSE_STATE_DOC) { + T_BEGIN { + ret = solr_lookup_add_doc(parser); + } T_END; + } + parser->state--; + if (ret < 0) + parser->content_state = SOLR_XML_CONTENT_STATE_ERROR; + else + parser->content_state = SOLR_XML_CONTENT_STATE_NONE; + } + parser->depth--; +} + +static void solr_lookup_xml_data(void *context, const char *str, int len) +{ + struct solr_response_parser *parser = context; + + switch (parser->content_state) { + case SOLR_XML_CONTENT_STATE_NONE: + case SOLR_XML_CONTENT_STATE_ERROR: + /* ignore element data */ + return; + case SOLR_XML_CONTENT_STATE_UID: + case SOLR_XML_CONTENT_STATE_SCORE: + case SOLR_XML_CONTENT_STATE_MAILBOX: + case SOLR_XML_CONTENT_STATE_NAMESPACE: + case SOLR_XML_CONTENT_STATE_UIDVALIDITY: + break; + } + + if (str_len(parser->buffer) + len > MAX_VALUE_LEN) { + i_error("fts_solr: XML element data length out of range"); + parser->content_state = SOLR_XML_CONTENT_STATE_ERROR; + return; + } + + str_append_data(parser->buffer, str, len); +} + +struct solr_response_parser * +solr_response_parser_init(pool_t result_pool, struct istream *input) +{ + struct solr_response_parser *parser; + + parser = i_new(struct solr_response_parser, 1); + + parser->xml_parser = XML_ParserCreate("UTF-8"); + if (parser->xml_parser == NULL) { + i_fatal_status(FATAL_OUTOFMEM, + "fts_solr: Failed to allocate XML parser"); + } + + parser->buffer = str_new(default_pool, 256); + hash_table_create(&parser->mailboxes, default_pool, 0, + str_hash, strcmp); + + parser->result_pool = result_pool; + pool_ref(result_pool); + p_array_init(&parser->results, result_pool, 32); + + parser->input = input; + i_stream_ref(input); + + parser->xml_failed = FALSE; + XML_SetElementHandler(parser->xml_parser, + solr_lookup_xml_start, solr_lookup_xml_end); + XML_SetCharacterDataHandler(parser->xml_parser, solr_lookup_xml_data); + XML_SetUserData(parser->xml_parser, parser); + + return parser; +} + +void solr_response_parser_deinit(struct solr_response_parser **_parser) +{ + struct solr_response_parser *parser = *_parser; + + *_parser = NULL; + + if (parser == NULL) + return; + + str_free(&parser->buffer); + hash_table_destroy(&parser->mailboxes); + XML_ParserFree(parser->xml_parser); + i_stream_unref(&parser->input); + pool_unref(&parser->result_pool); + i_free(parser); +} + +int solr_response_parse(struct solr_response_parser *parser, + struct solr_result ***box_results_r) +{ + const unsigned char *data; + size_t size; + int stream_errno, ret; + + i_assert(parser->input != NULL); + i_zero(box_results_r); + + /* read payload */ + while ((ret = i_stream_read_more(parser->input, &data, &size)) > 0) { + (void)solr_xml_parse(parser, data, size, FALSE); + i_stream_skip(parser->input, size); + } + + if (ret == 0) { + /* we will be called again for more data */ + return 0; + } + + stream_errno = parser->input->stream_errno; + i_stream_unref(&parser->input); + + if (parser->content_state == SOLR_XML_CONTENT_STATE_ERROR) + return -1; + if (stream_errno != 0) + return -1; + + ret = solr_xml_parse(parser, "", 0, TRUE); + + array_append_zero(&parser->results); + *box_results_r = array_front_modifiable(&parser->results); + return (ret == 0 ? 1 : -1); +} |