summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts/fts-parser-html.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 09:51:24 +0000
commitf7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch)
treea3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/plugins/fts/fts-parser-html.c
parentInitial commit. (diff)
downloaddovecot-upstream.tar.xz
dovecot-upstream.zip
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/plugins/fts/fts-parser-html.c')
-rw-r--r--src/plugins/fts/fts-parser-html.c64
1 files changed, 64 insertions, 0 deletions
diff --git a/src/plugins/fts/fts-parser-html.c b/src/plugins/fts/fts-parser-html.c
new file mode 100644
index 0000000..aa2078d
--- /dev/null
+++ b/src/plugins/fts/fts-parser-html.c
@@ -0,0 +1,64 @@
+/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "buffer.h"
+#include "message-parser.h"
+#include "mail-html2text.h"
+#include "fts-parser.h"
+
+struct html_fts_parser {
+ struct fts_parser parser;
+ struct mail_html2text *html2text;
+ buffer_t *output;
+};
+
+static struct fts_parser *
+fts_parser_html_try_init(struct fts_parser_context *parser_context)
+{
+ struct html_fts_parser *parser;
+
+ if (!mail_html2text_content_type_match(parser_context->content_type))
+ return NULL;
+
+ parser = i_new(struct html_fts_parser, 1);
+ parser->parser.v = fts_parser_html;
+ parser->html2text = mail_html2text_init(0);
+ parser->output = buffer_create_dynamic(default_pool, 4096);
+ return &parser->parser;
+}
+
+static void fts_parser_html_more(struct fts_parser *_parser,
+ struct message_block *block)
+{
+ struct html_fts_parser *parser = (struct html_fts_parser *)_parser;
+
+ if (block->size == 0) {
+ /* finished */
+ return;
+ }
+
+ buffer_set_used_size(parser->output, 0);
+ mail_html2text_more(parser->html2text, block->data, block->size,
+ parser->output);
+
+ block->data = parser->output->data;
+ block->size = parser->output->used;
+}
+
+static int fts_parser_html_deinit(struct fts_parser *_parser,
+ const char **retriable_err_msg_r ATTR_UNUSED)
+{
+ struct html_fts_parser *parser = (struct html_fts_parser *)_parser;
+
+ mail_html2text_deinit(&parser->html2text);
+ buffer_free(&parser->output);
+ i_free(parser);
+ return 1;
+}
+
+struct fts_parser_vfuncs fts_parser_html = {
+ fts_parser_html_try_init,
+ fts_parser_html_more,
+ fts_parser_html_deinit,
+ NULL
+};