summaryrefslogtreecommitdiffstats
path: root/src/plugins/fts/fts-parser-html.c
blob: aa2078dfc80b36a32358ce3967cf3ab9390feb12 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/* Copyright (c) 2011-2018 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "buffer.h"
#include "message-parser.h"
#include "mail-html2text.h"
#include "fts-parser.h"

struct html_fts_parser {
	struct fts_parser parser;
	struct mail_html2text *html2text;
	buffer_t *output;
};

static struct fts_parser *
fts_parser_html_try_init(struct fts_parser_context *parser_context)
{
	struct html_fts_parser *parser;

	if (!mail_html2text_content_type_match(parser_context->content_type))
		return NULL;

	parser = i_new(struct html_fts_parser, 1);
	parser->parser.v = fts_parser_html;
	parser->html2text = mail_html2text_init(0);
	parser->output = buffer_create_dynamic(default_pool, 4096);
	return &parser->parser;
}

static void fts_parser_html_more(struct fts_parser *_parser,
				 struct message_block *block)
{
	struct html_fts_parser *parser = (struct html_fts_parser *)_parser;

	if (block->size == 0) {
		/* finished */
		return;
	}

	buffer_set_used_size(parser->output, 0);
	mail_html2text_more(parser->html2text, block->data, block->size,
			    parser->output);

	block->data = parser->output->data;
	block->size = parser->output->used;
}

static int fts_parser_html_deinit(struct fts_parser *_parser,
				  const char **retriable_err_msg_r ATTR_UNUSED)
{
	struct html_fts_parser *parser = (struct html_fts_parser *)_parser;

	mail_html2text_deinit(&parser->html2text);
	buffer_free(&parser->output);
	i_free(parser);
	return 1;
}

struct fts_parser_vfuncs fts_parser_html = {
	fts_parser_html_try_init,
	fts_parser_html_more,
	fts_parser_html_deinit,
	NULL
};