summaryrefslogtreecommitdiffstats
path: root/src/lib-imap/imap-base-subject.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib-imap/imap-base-subject.c')
-rw-r--r--src/lib-imap/imap-base-subject.c248
1 files changed, 248 insertions, 0 deletions
diff --git a/src/lib-imap/imap-base-subject.c b/src/lib-imap/imap-base-subject.c
new file mode 100644
index 0000000..02469d3
--- /dev/null
+++ b/src/lib-imap/imap-base-subject.c
@@ -0,0 +1,248 @@
+/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
+
+/* Implemented against draft-ietf-imapext-sort-10 and
+ draft-ietf-imapext-thread-12 */
+
+#include "lib.h"
+#include "buffer.h"
+#include "charset-utf8.h"
+#include "message-header-decode.h"
+#include "imap-base-subject.h"
+
+static void pack_whitespace(buffer_t *buf)
+{
+ char *data, *dest;
+ bool last_lwsp;
+
+ data = buffer_get_modifiable_data(buf, NULL);
+
+ /* check if we need to do anything */
+ while (*data != '\0') {
+ if (*data == '\t' || *data == '\n' || *data == '\r' ||
+ (*data == ' ' && (data[1] == ' ' || data[1] == '\t')))
+ break;
+ data++;
+ }
+
+ if (*data == '\0')
+ return;
+
+ /* @UNSAFE: convert/pack the whitespace */
+ dest = data; last_lwsp = FALSE;
+ while (*data != '\0') {
+ if (*data == '\t' || *data == ' ' ||
+ *data == '\r' || *data == '\n') {
+ if (!last_lwsp) {
+ *dest++ = ' ';
+ last_lwsp = TRUE;
+ }
+ } else {
+ *dest++ = *data;
+ last_lwsp = FALSE;
+ }
+ data++;
+ }
+ *dest = '\0';
+
+ data = buffer_get_modifiable_data(buf, NULL);
+ buffer_set_used_size(buf, (size_t) (dest - data)+1);
+}
+
+static void remove_subj_trailers(buffer_t *buf, size_t start_pos,
+ bool *is_reply_or_forward_r)
+{
+ const char *data;
+ size_t orig_size, size;
+
+ /* subj-trailer = "(fwd)" / WSP */
+ data = buffer_get_data(buf, &orig_size);
+
+ if (orig_size < 1) /* size includes trailing \0 */
+ return;
+
+ for (size = orig_size-1; size > start_pos; ) {
+ if (data[size-1] == ' ')
+ size--;
+ else if (size >= 5 &&
+ memcmp(data + size - 5, "(FWD)", 5) == 0) {
+ *is_reply_or_forward_r = TRUE;
+ size -= 5;
+ } else {
+ break;
+ }
+ }
+
+ if (size != orig_size-1) {
+ buffer_set_used_size(buf, size);
+ buffer_append_c(buf, '\0');
+ }
+}
+
+static bool remove_blob(const char **datap)
+{
+ const char *data = *datap;
+
+ if (*data != '[')
+ return FALSE;
+
+ data++;
+ while (*data != '\0' && *data != '[' && *data != ']')
+ data++;
+
+ if (*data != ']')
+ return FALSE;
+
+ data++;
+ if (*data == ' ')
+ data++;
+
+ *datap = data;
+ return TRUE;
+}
+
+static bool remove_subj_leader(buffer_t *buf, size_t *start_pos,
+ bool *is_reply_or_forward_r)
+{
+ const char *data, *orig_data;
+ bool ret = FALSE;
+
+ /* subj-leader = (*subj-blob subj-refwd) / WSP
+
+ subj-blob = "[" *BLOBCHAR "]" *WSP
+ subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":"
+
+ BLOBCHAR = %x01-5a / %x5c / %x5e-7f
+ ; any CHAR except '[' and ']' */
+ orig_data = buf->data;
+ orig_data += *start_pos;
+ data = orig_data;
+
+ if (*data == ' ') {
+ /* independent from checks below - always removed */
+ data++; orig_data++;
+ *start_pos += 1;
+ ret = TRUE;
+ }
+
+ while (*data == '[') {
+ if (!remove_blob(&data))
+ return ret;
+ }
+
+ if (str_begins(data, "RE"))
+ data += 2;
+ else if (str_begins(data, "FWD"))
+ data += 3;
+ else if (str_begins(data, "FW"))
+ data += 2;
+ else
+ return ret;
+
+ if (*data == ' ')
+ data++;
+
+ if (*data == '[' && !remove_blob(&data))
+ return ret;
+
+ if (*data != ':')
+ return ret;
+
+ data++;
+ *start_pos += (size_t)(data - orig_data);
+ *is_reply_or_forward_r = TRUE;
+ return TRUE;
+}
+
+static bool remove_blob_when_nonempty(buffer_t *buf, size_t *start_pos)
+{
+ const char *data, *orig_data;
+
+ orig_data = buf->data;
+ orig_data += *start_pos;
+ data = orig_data;
+ if (*data == '[' && remove_blob(&data) && *data != '\0') {
+ *start_pos += (size_t)(data - orig_data);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static bool remove_subj_fwd_hdr(buffer_t *buf, size_t *start_pos,
+ bool *is_reply_or_forward_r)
+{
+ const char *data = buf->data;
+ size_t size = buf->used;
+
+ /* subj-fwd = subj-fwd-hdr subject subj-fwd-trl
+ subj-fwd-hdr = "[fwd:"
+ subj-fwd-trl = "]" */
+
+ if (!str_begins(data + *start_pos, "[FWD:"))
+ return FALSE;
+
+ if (data[size-2] != ']')
+ return FALSE;
+
+ *is_reply_or_forward_r = TRUE;
+
+ buffer_set_used_size(buf, size-2);
+ buffer_append_c(buf, '\0');
+
+ *start_pos += 5;
+ return TRUE;
+}
+
+const char *imap_get_base_subject_cased(pool_t pool, const char *subject,
+ bool *is_reply_or_forward_r)
+{
+ buffer_t *buf;
+ size_t start_pos, subject_len;
+ bool found;
+
+ *is_reply_or_forward_r = FALSE;
+
+ subject_len = strlen(subject);
+ buf = buffer_create_dynamic(pool, subject_len);
+
+ /* (1) Convert any RFC 2047 encoded-words in the subject to
+ UTF-8. Convert all tabs and continuations to space.
+ Convert all multiple spaces to a single space. */
+ message_header_decode_utf8((const unsigned char *)subject, subject_len,
+ buf, uni_utf8_to_decomposed_titlecase);
+ buffer_append_c(buf, '\0');
+
+ pack_whitespace(buf);
+
+ start_pos = 0;
+ do {
+ /* (2) Remove all trailing text of the subject that matches
+ the subj-trailer ABNF, repeat until no more matches are
+ possible. */
+ remove_subj_trailers(buf, start_pos, is_reply_or_forward_r);
+
+ do {
+ /* (3) Remove all prefix text of the subject that
+ matches the subj-leader ABNF. */
+ found = remove_subj_leader(buf, &start_pos,
+ is_reply_or_forward_r);
+
+ /* (4) If there is prefix text of the subject that
+ matches the subj-blob ABNF, and removing that prefix
+ leaves a non-empty subj-base, then remove the prefix
+ text. */
+ found = remove_blob_when_nonempty(buf, &start_pos) ||
+ found;
+
+ /* (5) Repeat (3) and (4) until no matches remain. */
+ } while (found);
+
+ /* (6) If the resulting text begins with the subj-fwd-hdr ABNF
+ and ends with the subj-fwd-trl ABNF, remove the
+ subj-fwd-hdr and subj-fwd-trl and repeat from step (2). */
+ } while (remove_subj_fwd_hdr(buf, &start_pos, is_reply_or_forward_r));
+
+ /* (7) The resulting text is the "base subject" used in the
+ SORT. */
+ return (const char *)buf->data + start_pos;
+}