summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts-solr/solr-response.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/fts-solr/solr-response.c')
-rw-r--r--src/plugins/fts-solr/solr-response.c372
1 files changed, 372 insertions, 0 deletions
diff --git a/src/plugins/fts-solr/solr-response.c b/src/plugins/fts-solr/solr-response.c
new file mode 100644
index 0000000..65a6a1f
--- /dev/null
+++ b/src/plugins/fts-solr/solr-response.c
@@ -0,0 +1,372 @@
+/* Copyright (c) 2006-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "hash.h"
+#include "str.h"
+#include "istream.h"
+#include "solr-response.h"
+
+#include <expat.h>
+
+#define MAX_VALUE_LEN 2048
+
+enum solr_xml_response_state {
+ SOLR_XML_RESPONSE_STATE_ROOT,
+ SOLR_XML_RESPONSE_STATE_RESPONSE,
+ SOLR_XML_RESPONSE_STATE_RESULT,
+ SOLR_XML_RESPONSE_STATE_DOC,
+ SOLR_XML_RESPONSE_STATE_CONTENT
+};
+
+enum solr_xml_content_state {
+ SOLR_XML_CONTENT_STATE_NONE = 0,
+ SOLR_XML_CONTENT_STATE_UID,
+ SOLR_XML_CONTENT_STATE_SCORE,
+ SOLR_XML_CONTENT_STATE_MAILBOX,
+ SOLR_XML_CONTENT_STATE_NAMESPACE,
+ SOLR_XML_CONTENT_STATE_UIDVALIDITY,
+ SOLR_XML_CONTENT_STATE_ERROR
+};
+
+struct solr_response_parser {
+ XML_Parser xml_parser;
+ struct istream *input;
+
+ enum solr_xml_response_state state;
+ enum solr_xml_content_state content_state;
+ int depth;
+ string_t *buffer;
+
+ uint32_t uid, uidvalidity;
+ float score;
+ char *mailbox, *ns;
+
+ pool_t result_pool;
+ /* box_id -> solr_result */
+ HASH_TABLE(char *, struct solr_result *) mailboxes;
+ ARRAY(struct solr_result *) results;
+
+ bool xml_failed:1;
+};
+
+static int
+solr_xml_parse(struct solr_response_parser *parser,
+ const void *data, size_t size, bool done)
+{
+ enum XML_Error err;
+ int line, col;
+
+ if (parser->xml_failed)
+ return -1;
+
+ if (XML_Parse(parser->xml_parser, data, size, done ? 1 : 0) != 0)
+ return 0;
+
+ err = XML_GetErrorCode(parser->xml_parser);
+ if (err != XML_ERROR_FINISHED) {
+ line = XML_GetCurrentLineNumber(parser->xml_parser);
+ col = XML_GetCurrentColumnNumber(parser->xml_parser);
+ i_error("fts_solr: Invalid XML input at %d:%d: %s "
+ "(near: %.*s)", line, col, XML_ErrorString(err),
+ (int)I_MIN(size, 128), (const char *)data);
+ parser->xml_failed = TRUE;
+ return -1;
+ }
+ return 0;
+}
+
+static const char *attrs_get_name(const char **attrs)
+{
+ for (; *attrs != NULL; attrs += 2) {
+ if (strcmp(attrs[0], "name") == 0)
+ return attrs[1];
+ }
+ return "";
+}
+
+static void
+solr_lookup_xml_start(void *context, const char *name, const char **attrs)
+{
+ struct solr_response_parser *parser = context;
+ const char *name_attr;
+
+ i_assert(parser->depth >= (int)parser->state);
+
+ parser->depth++;
+ if (parser->depth - 1 > (int)parser->state) {
+ /* skipping over unwanted elements */
+ return;
+ }
+
+ str_truncate(parser->buffer, 0);
+
+ /* response -> result -> doc */
+ switch (parser->state) {
+ case SOLR_XML_RESPONSE_STATE_ROOT:
+ if (strcmp(name, "response") == 0)
+ parser->state++;
+ break;
+ case SOLR_XML_RESPONSE_STATE_RESPONSE:
+ if (strcmp(name, "result") == 0)
+ parser->state++;
+ break;
+ case SOLR_XML_RESPONSE_STATE_RESULT:
+ if (strcmp(name, "doc") == 0) {
+ parser->state++;
+ parser->uid = 0;
+ parser->score = 0;
+ i_free_and_null(parser->mailbox);
+ i_free_and_null(parser->ns);
+ parser->uidvalidity = 0;
+ }
+ break;
+ case SOLR_XML_RESPONSE_STATE_DOC:
+ name_attr = attrs_get_name(attrs);
+ if (strcmp(name_attr, "uid") == 0)
+ parser->content_state = SOLR_XML_CONTENT_STATE_UID;
+ else if (strcmp(name_attr, "score") == 0)
+ parser->content_state = SOLR_XML_CONTENT_STATE_SCORE;
+ else if (strcmp(name_attr, "box") == 0)
+ parser->content_state = SOLR_XML_CONTENT_STATE_MAILBOX;
+ else if (strcmp(name_attr, "ns") == 0)
+ parser->content_state = SOLR_XML_CONTENT_STATE_NAMESPACE;
+ else if (strcmp(name_attr, "uidv") == 0)
+ parser->content_state = SOLR_XML_CONTENT_STATE_UIDVALIDITY;
+ else
+ break;
+ parser->state++;
+ break;
+ case SOLR_XML_RESPONSE_STATE_CONTENT:
+ break;
+ }
+}
+
+static struct solr_result *
+solr_result_get(struct solr_response_parser *parser, const char *box_id)
+{
+ struct solr_result *result;
+ char *box_id_dup;
+
+ result = hash_table_lookup(parser->mailboxes, box_id);
+ if (result != NULL)
+ return result;
+
+ box_id_dup = p_strdup(parser->result_pool, box_id);
+ result = p_new(parser->result_pool, struct solr_result, 1);
+ result->box_id = box_id_dup;
+ p_array_init(&result->uids, parser->result_pool, 32);
+ p_array_init(&result->scores, parser->result_pool, 32);
+ hash_table_insert(parser->mailboxes, box_id_dup, result);
+ array_push_back(&parser->results, &result);
+ return result;
+}
+
+static int solr_lookup_add_doc(struct solr_response_parser *parser)
+{
+ struct fts_score_map *score;
+ struct solr_result *result;
+ const char *box_id;
+
+ if (parser->uid == 0) {
+ i_error("fts_solr: uid missing from inside doc");
+ return -1;
+ }
+
+ if (parser->mailbox == NULL) {
+ /* looking up from a single mailbox only */
+ box_id = "";
+ } else if (parser->uidvalidity != 0) {
+ /* old style lookup */
+ string_t *str = t_str_new(64);
+ str_printfa(str, "%u\001", parser->uidvalidity);
+ str_append(str, parser->mailbox);
+ if (parser->ns != NULL)
+ str_printfa(str, "\001%s", parser->ns);
+ box_id = str_c(str);
+ } else {
+ /* new style lookup */
+ box_id = parser->mailbox;
+ }
+ result = solr_result_get(parser, box_id);
+
+ if (seq_range_array_add(&result->uids, parser->uid)) {
+ /* duplicate result */
+ } else if (parser->score != 0) {
+ score = array_append_space(&result->scores);
+ score->uid = parser->uid;
+ score->score = parser->score;
+ }
+ return 0;
+}
+
+static void solr_lookup_xml_end(void *context, const char *name ATTR_UNUSED)
+{
+ struct solr_response_parser *parser = context;
+ string_t *buf = parser->buffer;
+ int ret;
+
+ switch (parser->content_state) {
+ case SOLR_XML_CONTENT_STATE_NONE:
+ break;
+ case SOLR_XML_CONTENT_STATE_UID:
+ if (str_to_uint32(str_c(buf), &parser->uid) < 0 ||
+ parser->uid == 0) {
+ i_error("fts_solr: received invalid uid '%s'",
+ str_c(buf));
+ parser->content_state = SOLR_XML_CONTENT_STATE_ERROR;
+ }
+ break;
+ case SOLR_XML_CONTENT_STATE_SCORE:
+ parser->score = strtod(str_c(buf), NULL);
+ break;
+ case SOLR_XML_CONTENT_STATE_MAILBOX:
+ parser->mailbox = i_strdup(str_c(buf));
+ break;
+ case SOLR_XML_CONTENT_STATE_NAMESPACE:
+ parser->ns = i_strdup(str_c(buf));
+ break;
+ case SOLR_XML_CONTENT_STATE_UIDVALIDITY:
+ if (str_to_uint32(str_c(buf), &parser->uidvalidity) < 0)
+ i_error("fts_solr: received invalid uidvalidity");
+ break;
+ case SOLR_XML_CONTENT_STATE_ERROR:
+ return;
+ }
+
+ i_assert(parser->depth >= (int)parser->state);
+
+ if (parser->state == SOLR_XML_RESPONSE_STATE_CONTENT &&
+ parser->content_state == SOLR_XML_CONTENT_STATE_MAILBOX &&
+ parser->mailbox == NULL) {
+ /* mailbox is namespace prefix */
+ parser->mailbox = i_strdup("");
+ }
+
+ if (parser->depth == (int)parser->state) {
+ ret = 0;
+ if (parser->state == SOLR_XML_RESPONSE_STATE_DOC) {
+ T_BEGIN {
+ ret = solr_lookup_add_doc(parser);
+ } T_END;
+ }
+ parser->state--;
+ if (ret < 0)
+ parser->content_state = SOLR_XML_CONTENT_STATE_ERROR;
+ else
+ parser->content_state = SOLR_XML_CONTENT_STATE_NONE;
+ }
+ parser->depth--;
+}
+
+static void solr_lookup_xml_data(void *context, const char *str, int len)
+{
+ struct solr_response_parser *parser = context;
+
+ switch (parser->content_state) {
+ case SOLR_XML_CONTENT_STATE_NONE:
+ case SOLR_XML_CONTENT_STATE_ERROR:
+ /* ignore element data */
+ return;
+ case SOLR_XML_CONTENT_STATE_UID:
+ case SOLR_XML_CONTENT_STATE_SCORE:
+ case SOLR_XML_CONTENT_STATE_MAILBOX:
+ case SOLR_XML_CONTENT_STATE_NAMESPACE:
+ case SOLR_XML_CONTENT_STATE_UIDVALIDITY:
+ break;
+ }
+
+ if (str_len(parser->buffer) + len > MAX_VALUE_LEN) {
+ i_error("fts_solr: XML element data length out of range");
+ parser->content_state = SOLR_XML_CONTENT_STATE_ERROR;
+ return;
+ }
+
+ str_append_data(parser->buffer, str, len);
+}
+
+struct solr_response_parser *
+solr_response_parser_init(pool_t result_pool, struct istream *input)
+{
+ struct solr_response_parser *parser;
+
+ parser = i_new(struct solr_response_parser, 1);
+
+ parser->xml_parser = XML_ParserCreate("UTF-8");
+ if (parser->xml_parser == NULL) {
+ i_fatal_status(FATAL_OUTOFMEM,
+ "fts_solr: Failed to allocate XML parser");
+ }
+
+ parser->buffer = str_new(default_pool, 256);
+ hash_table_create(&parser->mailboxes, default_pool, 0,
+ str_hash, strcmp);
+
+ parser->result_pool = result_pool;
+ pool_ref(result_pool);
+ p_array_init(&parser->results, result_pool, 32);
+
+ parser->input = input;
+ i_stream_ref(input);
+
+ parser->xml_failed = FALSE;
+ XML_SetElementHandler(parser->xml_parser,
+ solr_lookup_xml_start, solr_lookup_xml_end);
+ XML_SetCharacterDataHandler(parser->xml_parser, solr_lookup_xml_data);
+ XML_SetUserData(parser->xml_parser, parser);
+
+ return parser;
+}
+
+void solr_response_parser_deinit(struct solr_response_parser **_parser)
+{
+ struct solr_response_parser *parser = *_parser;
+
+ *_parser = NULL;
+
+ if (parser == NULL)
+ return;
+
+ str_free(&parser->buffer);
+ hash_table_destroy(&parser->mailboxes);
+ XML_ParserFree(parser->xml_parser);
+ i_stream_unref(&parser->input);
+ pool_unref(&parser->result_pool);
+ i_free(parser);
+}
+
+int solr_response_parse(struct solr_response_parser *parser,
+ struct solr_result ***box_results_r)
+{
+ const unsigned char *data;
+ size_t size;
+ int stream_errno, ret;
+
+ i_assert(parser->input != NULL);
+ i_zero(box_results_r);
+
+ /* read payload */
+ while ((ret = i_stream_read_more(parser->input, &data, &size)) > 0) {
+ (void)solr_xml_parse(parser, data, size, FALSE);
+ i_stream_skip(parser->input, size);
+ }
+
+ if (ret == 0) {
+ /* we will be called again for more data */
+ return 0;
+ }
+
+ stream_errno = parser->input->stream_errno;
+ i_stream_unref(&parser->input);
+
+ if (parser->content_state == SOLR_XML_CONTENT_STATE_ERROR)
+ return -1;
+ if (stream_errno != 0)
+ return -1;
+
+ ret = solr_xml_parse(parser, "", 0, TRUE);
+
+ array_append_zero(&parser->results);
+ *box_results_r = array_front_modifiable(&parser->results);
+ return (ret == 0 ? 1 : -1);
+}