diff options
Diffstat (limited to 'src/lib-storage/index/index-mail-headers.c')
-rw-r--r-- | src/lib-storage/index/index-mail-headers.c | 990 |
1 files changed, 990 insertions, 0 deletions
diff --git a/src/lib-storage/index/index-mail-headers.c b/src/lib-storage/index/index-mail-headers.c new file mode 100644 index 0000000..ce23e9d --- /dev/null +++ b/src/lib-storage/index/index-mail-headers.c @@ -0,0 +1,990 @@ +/* Copyright (c) 2003-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "istream.h" +#include "array.h" +#include "buffer.h" +#include "str.h" +#include "message-date.h" +#include "message-part-data.h" +#include "message-parser.h" +#include "message-header-decode.h" +#include "istream-tee.h" +#include "istream-header-filter.h" +#include "imap-envelope.h" +#include "imap-bodystructure.h" +#include "index-storage.h" +#include "index-mail.h" + +static const struct message_parser_settings msg_parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | + MESSAGE_HEADER_PARSER_FLAG_DROP_CR, + .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, +}; + +static void index_mail_filter_stream_destroy(struct index_mail *mail); + +static int header_line_cmp(const struct index_mail_line *l1, + const struct index_mail_line *l2) +{ + int diff; + + diff = (int)l1->field_idx - (int)l2->field_idx; + return diff != 0 ? diff : + (int)l1->line_num - (int)l2->line_num; +} + +void index_mail_parse_header_deinit(struct index_mail *mail) +{ + mail->data.header_parser_initialized = FALSE; +} + +static void index_mail_parse_header_finish(struct index_mail *mail) +{ + struct mail *_mail = &mail->mail.mail; + const struct index_mail_line *lines; + const unsigned char *header; + const uint8_t *match; + buffer_t *buf; + unsigned int i, j, count, match_idx, match_count; + bool noncontiguous; + + /* sort it first so fields are grouped together and ordered by + line number */ + array_sort(&mail->header_lines, header_line_cmp); + + lines = array_get(&mail->header_lines, &count); + match = array_get(&mail->header_match, &match_count); + header = mail->header_data->data; + buf = t_buffer_create(256); + + /* go through all the header lines we found */ + for (i = match_idx = 0; i < count; i = j) { + /* matches and header lines are both sorted, all matches + until lines[i] weren't found */ + while (match_idx < lines[i].field_idx && + match_idx < match_count) { + if (HEADER_MATCH_USABLE(mail, match[match_idx]) && + mail_cache_field_can_add(_mail->transaction->cache_trans, + _mail->seq, match_idx)) { + /* this header doesn't exist. remember that. */ + i_assert((match[match_idx] & + HEADER_MATCH_FLAG_FOUND) == 0); + index_mail_cache_add_idx(mail, match_idx, + "", 0); + } + match_idx++; + } + + if (match_idx < match_count) { + /* save index to first header line */ + i_assert(match_idx == lines[i].field_idx); + j = i + 1; + array_idx_set(&mail->header_match_lines, match_idx, &j); + match_idx++; + } + + if (!mail_cache_field_can_add(_mail->transaction->cache_trans, + _mail->seq, lines[i].field_idx)) { + /* header is already cached. skip over all the + header lines. */ + for (j = i+1; j < count; j++) { + if (lines[j].field_idx != lines[i].field_idx) + break; + } + continue; + } + + /* buffer contains: { uint32_t line_num[], 0, header texts } + noncontiguous is just a small optimization.. */ + buffer_set_used_size(buf, 0); + buffer_append(buf, &lines[i].line_num, + sizeof(lines[i].line_num)); + + noncontiguous = FALSE; + for (j = i+1; j < count; j++) { + if (lines[j].field_idx != lines[i].field_idx) + break; + + if (lines[j].start_pos != lines[j-1].end_pos) + noncontiguous = TRUE; + buffer_append(buf, &lines[j].line_num, + sizeof(lines[j].line_num)); + } + buffer_append_zero(buf, sizeof(uint32_t)); + + if (noncontiguous) { + for (; i < j; i++) { + buffer_append(buf, header + lines[i].start_pos, + lines[i].end_pos - + lines[i].start_pos); + } + i--; + } else { + buffer_append(buf, header + lines[i].start_pos, + lines[j-1].end_pos - lines[i].start_pos); + } + + index_mail_cache_add_idx(mail, lines[i].field_idx, + buf->data, buf->used); + } + + for (; match_idx < match_count; match_idx++) { + if (HEADER_MATCH_USABLE(mail, match[match_idx]) && + mail_cache_field_can_add(_mail->transaction->cache_trans, + _mail->seq, match_idx)) { + /* this header doesn't exist. remember that. */ + i_assert((match[match_idx] & + HEADER_MATCH_FLAG_FOUND) == 0); + index_mail_cache_add_idx(mail, match_idx, "", 0); + } + } + + mail->data.dont_cache_field_idx = UINT_MAX; + index_mail_parse_header_deinit(mail); +} + +static unsigned int +get_header_field_idx(struct mailbox *box, const char *field) +{ + struct mail_cache_field header_field; + + i_zero(&header_field); + header_field.type = MAIL_CACHE_FIELD_HEADER; + /* Always register with NO decision. The field should be added soon + with mail_cache_add(), which changes the decision to TEMP. Most + importantly doing it this way emits mail_cache_decision event. */ + header_field.decision = MAIL_CACHE_DECISION_NO; + T_BEGIN { + header_field.name = t_strconcat("hdr.", field, NULL); + mail_cache_register_fields(box->cache, &header_field, 1); + } T_END; + return header_field.idx; +} + +bool index_mail_want_parse_headers(struct index_mail *mail) +{ + if (mail->data.wanted_headers != NULL || + mail->data.save_bodystructure_header) + return TRUE; + + if ((mail->data.cache_fetch_fields & MAIL_FETCH_DATE) != 0 && + !mail->data.sent_date_parsed) + return TRUE; + return FALSE; +} + +static void index_mail_parse_header_register_all_wanted(struct index_mail *mail) +{ + struct mail *_mail = &mail->mail.mail; + const struct mail_cache_field *all_cache_fields; + unsigned int i, count; + + all_cache_fields = + mail_cache_register_get_list(_mail->box->cache, + pool_datastack_create(), &count); + for (i = 0; i < count; i++) { + if (strncasecmp(all_cache_fields[i].name, "hdr.", 4) != 0) + continue; + if (!mail_cache_field_want_add(_mail->transaction->cache_trans, + _mail->seq, i)) + continue; + + array_idx_set(&mail->header_match, all_cache_fields[i].idx, + &mail->header_match_value); + } +} + +void index_mail_parse_header_init(struct index_mail *mail, + struct mailbox_header_lookup_ctx *headers) +{ + struct index_mail_data *data = &mail->data; + const uint8_t *match; + unsigned int i, field_idx, match_count; + + index_mail_filter_stream_destroy(mail); + i_assert(!mail->data.header_parser_initialized); + + mail->header_seq = mail->mail.mail.seq; + if (mail->header_data == NULL) { + mail->header_data = buffer_create_dynamic(default_pool, 4096); + i_array_init(&mail->header_lines, 32); + i_array_init(&mail->header_match, 32); + i_array_init(&mail->header_match_lines, 32); + mail->header_match_value = HEADER_MATCH_SKIP_COUNT; + } else { + buffer_set_used_size(mail->header_data, 0); + array_clear(&mail->header_lines); + array_clear(&mail->header_match_lines); + + i_assert((mail->header_match_value & + (HEADER_MATCH_SKIP_COUNT-1)) == 0); + if (mail->header_match_value + HEADER_MATCH_SKIP_COUNT <= UINT8_MAX) + mail->header_match_value += HEADER_MATCH_SKIP_COUNT; + else { + /* wrapped, we'll have to clear the buffer */ + array_clear(&mail->header_match); + mail->header_match_value = HEADER_MATCH_SKIP_COUNT; + } + } + + if (headers != NULL) { + for (i = 0; i < headers->count; i++) { + array_idx_set(&mail->header_match, headers->idx[i], + &mail->header_match_value); + } + } + + if (data->wanted_headers != NULL && data->wanted_headers != headers) { + headers = data->wanted_headers; + for (i = 0; i < headers->count; i++) { + array_idx_set(&mail->header_match, headers->idx[i], + &mail->header_match_value); + } + } + + /* register also all the other headers that exist in cache file */ + T_BEGIN { + index_mail_parse_header_register_all_wanted(mail); + } T_END; + + /* if we want sent date, it doesn't mean that we also want to cache + Date: header. if we have Date field's index set at this point we + know that we want it. otherwise add it and remember that we don't + want it cached. */ + field_idx = get_header_field_idx(mail->mail.mail.box, "Date"); + match = array_get(&mail->header_match, &match_count); + if (field_idx < match_count && + match[field_idx] == mail->header_match_value) { + /* cache Date: header */ + } else if ((data->cache_fetch_fields & MAIL_FETCH_DATE) != 0 || + data->save_sent_date) { + /* parse Date: header, but don't cache it. */ + data->dont_cache_field_idx = field_idx; + array_idx_set(&mail->header_match, field_idx, + &mail->header_match_value); + } + mail->data.header_parser_initialized = TRUE; + mail->data.parse_line_num = 0; + i_zero(&mail->data.parse_line); +} + +static void index_mail_parse_finish_imap_envelope(struct index_mail *mail) +{ + struct mail *_mail = &mail->mail.mail; + const unsigned int cache_field_envelope = + mail->ibox->cache_fields[MAIL_CACHE_IMAP_ENVELOPE].idx; + string_t *str; + + str = str_new(mail->mail.data_pool, 256); + imap_envelope_write(mail->data.envelope_data, str); + mail->data.envelope = str_c(str); + mail->data.save_envelope = FALSE; + + if (mail_cache_field_can_add(_mail->transaction->cache_trans, + _mail->seq, cache_field_envelope)) { + index_mail_cache_add_idx(mail, cache_field_envelope, + str_data(str), str_len(str)); + } +} + +void index_mail_parse_header(struct message_part *part, + struct message_header_line *hdr, + struct index_mail *mail) +{ + struct mail *_mail = &mail->mail.mail; + struct index_mail_data *data = &mail->data; + unsigned int field_idx, count; + uint8_t *match; + + i_assert(data->header_parser_initialized); + + data->parse_line_num++; + + if (data->save_bodystructure_header && + !data->parsed_bodystructure_header) { + i_assert(part != NULL); + message_part_data_parse_from_header(mail->mail.data_pool, part, hdr); + } + + if (data->save_envelope) { + message_part_envelope_parse_from_header(mail->mail.data_pool, + &data->envelope_data, hdr); + + if (hdr == NULL) + index_mail_parse_finish_imap_envelope(mail); + } + + if (hdr == NULL) { + /* end of headers */ + if (mail->data.save_sent_date) + mail->data.sent_date_parsed = TRUE; + T_BEGIN { + index_mail_parse_header_finish(mail); + } T_END; + if (data->save_bodystructure_header) { + i_assert(data->parser_ctx != NULL); + data->parsed_bodystructure_header = TRUE; + } + return; + } + + if (!hdr->continued) { + T_BEGIN { + const char *cache_field_name = + t_strconcat("hdr.", hdr->name, NULL); + data->parse_line.field_idx = + mail_cache_register_lookup(_mail->box->cache, + cache_field_name); + } T_END; + } + field_idx = data->parse_line.field_idx; + match = array_get_modifiable(&mail->header_match, &count); + if (field_idx >= count || + !HEADER_MATCH_USABLE(mail, match[field_idx])) { + /* we don't want this header. */ + return; + } + + if (!hdr->continued) { + /* beginning of a line. add the header name. */ + data->parse_line.start_pos = str_len(mail->header_data); + data->parse_line.line_num = data->parse_line_num; + str_append(mail->header_data, hdr->name); + str_append_data(mail->header_data, hdr->middle, hdr->middle_len); + + /* remember that we saw this header so we don't add it to + cache as nonexistent. */ + match[field_idx] |= HEADER_MATCH_FLAG_FOUND; + } + str_append_data(mail->header_data, hdr->value, hdr->value_len); + if (!hdr->no_newline) + str_append(mail->header_data, "\n"); + if (!hdr->continues) { + data->parse_line.end_pos = str_len(mail->header_data); + array_push_back(&mail->header_lines, &data->parse_line); + } +} + +static void +index_mail_parse_part_header_cb(struct message_part *part, + struct message_header_line *hdr, + struct index_mail *mail) +{ + index_mail_parse_header(part, hdr, mail); +} + +static void +index_mail_parse_header_cb(struct message_header_line *hdr, + struct index_mail *mail) +{ + index_mail_parse_header(mail->data.parts, hdr, mail); +} + +struct istream * +index_mail_cache_parse_init(struct mail *_mail, struct istream *input) +{ + struct index_mail *mail = INDEX_MAIL(_mail); + struct istream *input2; + + i_assert(mail->data.tee_stream == NULL); + i_assert(mail->data.parser_ctx == NULL); + + /* we're doing everything for now, figure out later if we want to + save them. */ + mail->data.save_sent_date = TRUE; + mail->data.save_bodystructure_header = TRUE; + mail->data.save_bodystructure_body = TRUE; + /* Don't unnecessarily waste time generating a snippet, since it's + not as cheap as the others to generate. */ + if (index_mail_want_cache(mail, MAIL_CACHE_BODY_SNIPPET)) + mail->data.save_body_snippet = TRUE; + + mail->data.tee_stream = tee_i_stream_create(input); + input = tee_i_stream_create_child(mail->data.tee_stream); + input2 = tee_i_stream_create_child(mail->data.tee_stream); + + index_mail_parse_header_init(mail, NULL); + mail->data.parser_input = input; + mail->data.parser_ctx = + message_parser_init(mail->mail.data_pool, input, + &msg_parser_set); + i_stream_unref(&input); + return input2; +} + +static void index_mail_init_parser(struct index_mail *mail) +{ + struct index_mail_data *data = &mail->data; + struct message_part *parts; + const char *error; + + if (data->parser_ctx != NULL) { + data->parser_input = NULL; + if (message_parser_deinit_from_parts(&data->parser_ctx, &parts, &error) < 0) { + index_mail_set_message_parts_corrupted(&mail->mail.mail, error); + index_mail_parts_reset(mail); + } + if (data->parts == NULL || data->parts != parts) { + /* The previous parsing didn't finish, so we're + re-parsing the header. The new parts don't have data + filled anymore. */ + data->parsed_bodystructure_header = FALSE; + } + } + + /* make sure parsing starts from the beginning of the stream */ + i_stream_seek(mail->data.stream, 0); + if (data->parts == NULL) { + data->parser_input = data->stream; + data->parser_ctx = message_parser_init(mail->mail.data_pool, + data->stream, + &msg_parser_set); + } else { + data->parser_ctx = + message_parser_init_from_parts(data->parts, + data->stream, + &msg_parser_set); + } +} + +int index_mail_parse_headers_internal(struct index_mail *mail, + struct mailbox_header_lookup_ctx *headers) +{ + struct index_mail_data *data = &mail->data; + + i_assert(data->stream != NULL); + + index_mail_parse_header_init(mail, headers); + + if (data->parts == NULL || data->save_bodystructure_header || + (data->access_part & PARSE_BODY) != 0) { + /* initialize bodystructure parsing in case we read the whole + message. */ + index_mail_init_parser(mail); + message_parser_parse_header(data->parser_ctx, &data->hdr_size, + index_mail_parse_part_header_cb, + mail); + } else { + /* just read the header */ + i_assert(!data->save_bodystructure_body || + data->parser_ctx != NULL); + message_parse_header(data->stream, &data->hdr_size, + msg_parser_set.hdr_flags, + index_mail_parse_header_cb, mail); + } + if (index_mail_stream_check_failure(mail) < 0) { + index_mail_parse_header_deinit(mail); + return -1; + } + i_assert(!mail->data.header_parser_initialized); + data->hdr_size_set = TRUE; + data->access_part &= ENUM_NEGATE(PARSE_HDR); + return 0; +} + +int index_mail_parse_headers(struct index_mail *mail, + struct mailbox_header_lookup_ctx *headers, + const char *reason) +{ + struct index_mail_data *data = &mail->data; + struct istream *input; + uoff_t old_offset; + + old_offset = data->stream == NULL ? 0 : data->stream->v_offset; + + if (mail_get_hdr_stream_because(&mail->mail.mail, NULL, reason, &input) < 0) + return -1; + + int ret = index_mail_parse_headers_internal(mail, headers); + i_stream_seek(data->stream, old_offset); + return ret; +} + +static void +imap_envelope_parse_callback(struct message_header_line *hdr, + struct index_mail *mail) +{ + message_part_envelope_parse_from_header(mail->mail.data_pool, + &mail->data.envelope_data, hdr); + + if (hdr == NULL) + index_mail_parse_finish_imap_envelope(mail); +} + +int index_mail_headers_get_envelope(struct index_mail *mail) +{ + const unsigned int cache_field_envelope = + mail->ibox->cache_fields[MAIL_CACHE_IMAP_ENVELOPE].idx; + struct mailbox_header_lookup_ctx *header_ctx; + struct istream *stream; + uoff_t old_offset; + string_t *str; + + str = str_new(mail->mail.data_pool, 256); + if (index_mail_cache_lookup_field(mail, str, + cache_field_envelope) > 0) { + mail->data.envelope = str_c(str); + return 0; + } + str_free(&str); + + old_offset = mail->data.stream == NULL ? 0 : + mail->data.stream->v_offset; + + /* Make sure header_cache_callback() isn't also parsing the ENVELOPE. + Otherwise two callbacks are doing it and mixing up results. */ + mail->data.save_envelope = FALSE; + + header_ctx = mailbox_header_lookup_init(mail->mail.mail.box, + message_part_envelope_headers); + if (mail_get_header_stream(&mail->mail.mail, header_ctx, &stream) < 0) { + mailbox_header_lookup_unref(&header_ctx); + return -1; + } + mailbox_header_lookup_unref(&header_ctx); + + if (mail->data.envelope == NULL) { + /* we got the headers from cache - parse them to get the + envelope */ + message_parse_header(stream, NULL, msg_parser_set.hdr_flags, + imap_envelope_parse_callback, mail); + if (stream->stream_errno != 0) { + index_mail_stream_log_failure_for(mail, stream); + return -1; + } + i_assert(mail->data.envelope != NULL); + } + + if (mail->data.stream != NULL) + i_stream_seek(mail->data.stream, old_offset); + return 0; +} + +static size_t get_header_size(buffer_t *buffer, size_t pos) +{ + const unsigned char *data = buffer->data; + size_t i, size = buffer->used; + + i_assert(pos <= size); + + for (i = pos; i < size; i++) { + if (data[i] == '\n') { + if (i+1 == size || + (data[i+1] != ' ' && data[i+1] != '\t')) + return i - pos; + } + } + return size - pos; +} + +static int index_mail_header_is_parsed(struct index_mail *mail, + unsigned int field_idx) +{ + const uint8_t *match; + unsigned int count; + + match = array_get(&mail->header_match, &count); + if (field_idx < count && HEADER_MATCH_USABLE(mail, match[field_idx])) + return (match[field_idx] & HEADER_MATCH_FLAG_FOUND) != 0 ? 1 : 0; + return -1; +} + +static bool skip_header(const unsigned char **data, size_t len) +{ + const unsigned char *p = *data; + size_t i; + + for (i = 0; i < len; i++) { + if (p[i] == ':') + break; + } + if (i == len) + return FALSE; + + for (i++; i < len; i++) { + if (!IS_LWSP(p[i])) + break; + } + + *data = p + i; + return TRUE; +} + +static const char *const * +index_mail_get_parsed_header(struct index_mail *mail, unsigned int field_idx) +{ + ARRAY(const char *) header_values; + const struct index_mail_line *lines; + const unsigned char *header, *value_start, *value_end; + const unsigned int *line_idx; + const char *value; + unsigned int i, lines_count, first_line_idx; + + line_idx = array_idx(&mail->header_match_lines, field_idx); + i_assert(*line_idx != 0); + first_line_idx = *line_idx - 1; + + p_array_init(&header_values, mail->mail.data_pool, 4); + header = mail->header_data->data; + + lines = array_get(&mail->header_lines, &lines_count); + for (i = first_line_idx; i < lines_count; i++) { + if (lines[i].field_idx != lines[first_line_idx].field_idx) + break; + + /* skip header: and drop ending LF */ + value_start = header + lines[i].start_pos; + value_end = header + lines[i].end_pos; + if (skip_header(&value_start, value_end - value_start)) { + if (value_start != value_end && value_end[-1] == '\n') + value_end--; + value = message_header_strdup(mail->mail.data_pool, + value_start, + value_end - value_start); + array_push_back(&header_values, &value); + } + } + + array_append_zero(&header_values); + return array_front(&header_values); +} + +static int +index_mail_get_raw_headers(struct index_mail *mail, const char *field, + const char *const **value_r) +{ + struct mail *_mail = &mail->mail.mail; + const char *headers[2], *value; + struct mailbox_header_lookup_ctx *headers_ctx; + const unsigned char *data; + unsigned int field_idx; + string_t *dest; + size_t i, len, len2; + int ret; + ARRAY(const char *) header_values; + + i_assert(field != NULL); + + field_idx = get_header_field_idx(_mail->box, field); + + dest = t_str_new(128); + if (mail_cache_lookup_headers(_mail->transaction->cache_view, dest, + _mail->seq, &field_idx, 1) <= 0) { + /* not in cache / error - first see if it's already parsed */ + p_free(mail->mail.data_pool, dest); + if (mail->data.header_parser_initialized) { + /* don't try to parse headers recursively. we're here + because message size was wrong and istream-mail + wants to log some cached headers. */ + i_assert(mail->mail.mail.lookup_abort >= MAIL_LOOKUP_ABORT_NOT_IN_CACHE); + mail_set_aborted(&mail->mail.mail); + return -1; + } + if (mail->header_seq != mail->mail.mail.seq || + index_mail_header_is_parsed(mail, field_idx) < 0) { + /* parse */ + const char *reason = index_mail_cache_reason(_mail, + t_strdup_printf("header %s", field)); + headers[0] = field; headers[1] = NULL; + headers_ctx = mailbox_header_lookup_init(_mail->box, + headers); + ret = index_mail_parse_headers(mail, headers_ctx, reason); + mailbox_header_lookup_unref(&headers_ctx); + if (ret < 0) + return -1; + } + + if ((ret = index_mail_header_is_parsed(mail, field_idx)) <= 0) { + /* not found */ + i_assert(ret != -1); + *value_r = p_new(mail->mail.data_pool, const char *, 1); + return 0; + } + *value_r = index_mail_get_parsed_header(mail, field_idx); + return 0; + } + _mail->transaction->stats.cache_hit_count++; + data = buffer_get_data(dest, &len); + + if (len == 0) { + /* cached as nonexistent. */ + *value_r = p_new(mail->mail.data_pool, const char *, 1); + return 0; + } + + p_array_init(&header_values, mail->mail.data_pool, 4); + + /* cached. skip "header name: " parts in dest. */ + for (i = 0; i < len; i++) { + if (data[i] == ':') { + i++; + while (i < len && IS_LWSP(data[i])) i++; + + /* @UNSAFE */ + len2 = get_header_size(dest, i); + value = message_header_strdup(mail->mail.data_pool, + data + i, len2); + i += len2 + 1; + + array_push_back(&header_values, &value); + } + } + + array_append_zero(&header_values); + *value_r = array_front(&header_values); + return 0; +} + +static int unfold_header(pool_t pool, const char **_str) +{ + const char *str = *_str; + char *new_str; + unsigned int i, j; + + for (i = 0; str[i] != '\0'; i++) { + if (str[i] == '\n') + break; + } + if (str[i] == '\0') + return 0; + + /* @UNSAFE */ + new_str = p_malloc(pool, i + strlen(str+i) + 1); + memcpy(new_str, str, i); + for (j = i; str[i] != '\0'; i++) { + if (str[i] == '\n') { + new_str[j++] = ' '; + i++; + if (str[i] == '\0') + break; + + if (str[i] != ' ' && str[i] != '\t') { + /* corrupted */ + return -1; + } + } else { + new_str[j++] = str[i]; + } + } + new_str[j] = '\0'; + *_str = new_str; + return 0; +} + +static void str_replace_nuls(string_t *str) +{ + char *data = str_c_modifiable(str); + size_t i, len = str_len(str); + + for (i = 0; i < len; i++) { + if (data[i] == '\0') + data[i] = ' '; + } +} + +static int +index_mail_headers_decode(struct index_mail *mail, const char *const **_list, + unsigned int max_count) +{ + const char *const *list = *_list; + const char **decoded_list, *input; + unsigned int i, count; + string_t *str; + + count = str_array_length(list); + if (count > max_count) + count = max_count; + decoded_list = p_new(mail->mail.data_pool, const char *, count + 1); + + str = t_str_new(512); + for (i = 0; i < count; i++) { + str_truncate(str, 0); + input = list[i]; + /* unfold all lines into a single line */ + if (unfold_header(mail->mail.data_pool, &input) < 0) + return -1; + + /* decode MIME encoded-words. decoding may also add new LFs. */ + message_header_decode_utf8((const unsigned char *)input, + strlen(input), str, NULL); + if (strcmp(str_c(str), input) != 0) { + if (strlen(str_c(str)) != str_len(str)) { + /* replace NULs with spaces */ + str_replace_nuls(str); + } + input = p_strdup(mail->mail.data_pool, str_c(str)); + } + decoded_list[i] = input; + } + *_list = decoded_list; + return 0; +} + +int index_mail_get_headers(struct mail *_mail, const char *field, + bool decode_to_utf8, const char *const **value_r) +{ + struct index_mail *mail = INDEX_MAIL(_mail); + bool retry = TRUE; + int ret; + + for (;; retry = FALSE) { + if (index_mail_get_raw_headers(mail, field, value_r) < 0) + return -1; + if (**value_r == NULL) + return 0; + if (!decode_to_utf8) + return 1; + + T_BEGIN { + ret = index_mail_headers_decode(mail, value_r, UINT_MAX); + } T_END; + + if (ret < 0 && retry) { + mail_set_mail_cache_corrupted(_mail, "Broken header %s", + field); + } else { + break; + } + } + if (ret < 0) { + i_panic("BUG: Broken header %s for mail UID %u " + "wasn't fixed by re-parsing the header", + field, _mail->uid); + } + return 1; +} + +int index_mail_get_first_header(struct mail *_mail, const char *field, + bool decode_to_utf8, const char **value_r) +{ + struct index_mail *mail = INDEX_MAIL(_mail); + const char *const *list; + bool retry = TRUE; + int ret; + + for (;; retry = FALSE) { + if (index_mail_get_raw_headers(mail, field, &list) < 0) + return -1; + if (!decode_to_utf8 || list[0] == NULL) { + ret = 0; + break; + } + + T_BEGIN { + ret = index_mail_headers_decode(mail, &list, 1); + } T_END; + + if (ret < 0 && retry) { + mail_set_mail_cache_corrupted(_mail, "Broken header %s", + field); + /* retry by parsing the full header */ + } else { + break; + } + } + if (ret < 0) { + i_panic("BUG: Broken header %s for mail UID %u " + "wasn't fixed by re-parsing the header", + field, _mail->uid); + } + *value_r = list[0]; + return list[0] != NULL ? 1 : 0; +} + +static void +header_cache_callback(struct header_filter_istream *input ATTR_UNUSED, + struct message_header_line *hdr, + bool *matched ATTR_UNUSED, struct index_mail *mail) +{ + index_mail_parse_header(NULL, hdr, mail); +} + +static void index_mail_filter_stream_destroy(struct index_mail *mail) +{ + if (mail->data.filter_stream == NULL) + return; + + const unsigned char *data; + size_t size; + + /* read through the previous filter_stream. this makes sure that the + fields are added to cache, and most importantly it resets + header_parser_initialized=FALSE so we don't assert on it. */ + while (i_stream_read_more(mail->data.filter_stream, &data, &size) > 0) + i_stream_skip(mail->data.filter_stream, size); + if (mail->data.header_parser_initialized) { + /* istream failed while reading the header */ + i_assert(mail->data.filter_stream->stream_errno != 0); + index_mail_parse_header_deinit(mail); + } + i_stream_destroy(&mail->data.filter_stream); +} + +int index_mail_get_header_stream(struct mail *_mail, + struct mailbox_header_lookup_ctx *headers, + struct istream **stream_r) +{ + struct index_mail *mail = INDEX_MAIL(_mail); + struct istream *input; + string_t *dest; + + index_mail_filter_stream_destroy(mail); + + if (mail->data.save_bodystructure_header) { + /* we have to parse the header. */ + const char *reason = + index_mail_cache_reason(_mail, "bodystructure"); + mail->data.access_reason_code = "mail:header_fields"; + if (index_mail_parse_headers(mail, headers, reason) < 0) + return -1; + } + + dest = str_new(mail->mail.data_pool, 256); + if (mail_cache_lookup_headers(_mail->transaction->cache_view, dest, + _mail->seq, headers->idx, + headers->count) > 0) { + str_append(dest, "\n"); + _mail->transaction->stats.cache_hit_count++; + mail->data.filter_stream = + i_stream_create_from_data(str_data(dest), + str_len(dest)); + *stream_r = mail->data.filter_stream; + return 0; + } + /* not in cache / error */ + p_free(mail->mail.data_pool, dest); + + unsigned int first_not_found = UINT_MAX, not_found_count = 0; + for (unsigned int i = 0; i < headers->count; i++) { + if (mail_cache_field_exists(_mail->transaction->cache_view, + _mail->seq, headers->idx[i]) <= 0) { + if (not_found_count++ == 0) + first_not_found = i; + } + } + + const char *reason; + if (not_found_count == 0) + reason = "BUG: all headers seem to exist in cache"; + else { + i_assert(first_not_found != UINT_MAX); + reason = index_mail_cache_reason(_mail, t_strdup_printf( + "%u/%u headers not cached (first=%s)", + not_found_count, headers->count, headers->name[first_not_found])); + } + mail->data.access_reason_code = "mail:header_fields"; + if (mail_get_hdr_stream_because(_mail, NULL, reason, &input) < 0) + return -1; + + index_mail_parse_header_init(mail, headers); + mail->data.filter_stream = + i_stream_create_header_filter(mail->data.stream, + HEADER_FILTER_INCLUDE | + HEADER_FILTER_ADD_MISSING_EOH | + HEADER_FILTER_HIDE_BODY, + headers->name, headers->count, + header_cache_callback, mail); + *stream_r = mail->data.filter_stream; + return 0; +} |