/* Copyright (c) 2008-2018 Dovecot authors, see the included COPYING file */ #include "lib.h" #include "str.h" #include "unichar.h" #include "imap-utf7.h" static const char imap_b64enc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; #define XX 0xff static const unsigned char imap_b64dec[256] = { XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,62, 63,XX,XX,XX, 52,53,54,55, 56,57,58,59, 60,61,XX,XX, XX,XX,XX,XX, XX, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 15,16,17,18, 19,20,21,22, 23,24,25,XX, XX,XX,XX,XX, XX,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX, XX,XX,XX,XX }; static void mbase64_encode(string_t *dest, const unsigned char *in, size_t len) { str_append_c(dest, '&'); while (len >= 3) { str_append_c(dest, imap_b64enc[in[0] >> 2]); str_append_c(dest, imap_b64enc[((in[0] & 3) << 4) | (in[1] >> 4)]); str_append_c(dest, imap_b64enc[((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)]); str_append_c(dest, imap_b64enc[in[2] & 0x3f]); in += 3; len -= 3; } if (len > 0) { str_append_c(dest, imap_b64enc[in[0] >> 2]); if (len == 1) str_append_c(dest, imap_b64enc[(in[0] & 0x03) << 4]); else { str_append_c(dest, imap_b64enc[((in[0] & 0x03) << 4) | (in[1] >> 4)]); str_append_c(dest, imap_b64enc[(in[1] & 0x0f) << 2]); } } str_append_c(dest, '-'); } static const char * imap_utf8_first_encode_char(const char *str, char escape_char) { const char *p; for (p = str; *p != '\0'; p++) { if (*p == '&' || *p < 0x20 || *p >= 0x7f || *p == escape_char) return p; } return NULL; } int imap_escaped_utf8_hex_to_char(const char *str, unsigned char *chr_r) { unsigned int i = 0; unsigned char c = 0; /* NOTE: Only lowercase hex characters are allowed so the output is reversible. */ for (;;) { if (str[i] >= '0' && str[i] <= '9') c += str[i] - '0'; else if (str[i] >= 'a' && str[i] <= 'f') c += str[i] - 'a' + 10; else return -1; if (++i == 2) break; c *= 0x10; } *chr_r = c; return 0; } static int imap_utf8_to_utf7_int(const char *src, char escape_char, string_t *dest) { const char *p; unichar_t chr; uint8_t *utf16, *u; uint16_t u16; unsigned char c; p = imap_utf8_first_encode_char(src, escape_char); if (p == NULL) { /* no characters that need to be encoded */ str_append(dest, src); return 0; } /* at least one encoded character */ str_append_data(dest, src, p-src); utf16 = t_malloc0(MALLOC_MULTIPLY(strlen(p), 2)); while (*p != '\0') { if (*p == escape_char && imap_escaped_utf8_hex_to_char(p+1, &c) == 0) { str_append_c(dest, c); p += 3; continue; } if (*p == '&') { str_append(dest, "&-"); p++; continue; } if (*p >= 0x20 && *p < 0x7f) { str_append_c(dest, *p); p++; continue; } u = utf16; while (*p != '\0' && (*p < 0x20 || *p >= 0x7f)) { if (uni_utf8_get_char(p, &chr) <= 0) return -1; /* @UNSAFE */ if (chr < UTF16_SURROGATE_BASE) { *u++ = chr >> 8; *u++ = chr & 0xff; } else { u16 = UTF16_SURROGATE_HIGH(chr); *u++ = u16 >> 8; *u++ = u16 & 0xff; u16 = UTF16_SURROGATE_LOW(chr); *u++ = u16 >> 8; *u++ = u16 & 0xff; } p += uni_utf8_char_bytes((unsigned char)*p); } mbase64_encode(dest, utf16, u-utf16); } return 0; } int imap_utf8_to_utf7(const char *src, string_t *dest) { return imap_utf8_to_utf7_int(src, '\0', dest); } int imap_escaped_utf8_to_utf7(const char *src, char escape_char, string_t *dest) { i_assert(escape_char != '&'); return imap_utf8_to_utf7_int(src, escape_char, dest); } int t_imap_utf8_to_utf7(const char *src, const char **dest_r) { string_t *str; int ret; if (imap_utf8_first_encode_char(src, '\0') == NULL) { *dest_r = src; return 0; } str = t_str_new(64); ret = imap_utf8_to_utf7(src, str); *dest_r = str_c(str); return ret; } static int utf16buf_to_utf8(string_t *dest, const unsigned char output[4], unsigned int *_pos, unsigned int len) { unsigned int pos = *_pos; uint16_t high, low; unichar_t chr; if (len % 2 != 0) return -1; high = (output[pos % 4] << 8) | output[(pos+1) % 4]; if (high < UTF16_SURROGATE_HIGH_FIRST || high > UTF16_SURROGATE_HIGH_MAX) { /* single byte */ size_t oldlen = str_len(dest); if (high == 0) { /* Encoded NUL isn't going to work in Dovecot code, even though it's technically valid. Return failure so the callers don't even get a chance to handle the NUL in the string inconsistently. */ return -1; } uni_ucs4_to_utf8_c(high, dest); if (str_len(dest) - oldlen == 1) { unsigned char last = str_data(dest)[oldlen]; if (last >= 0x20 && last < 0x7f) return -1; } *_pos = (pos + 2) % 4; return 0; } if (high > UTF16_SURROGATE_HIGH_LAST) return -1; if (len != 4) { /* missing the second character */ return -1; } low = (output[(pos+2)%4] << 8) | output[(pos+3) % 4]; if (low < UTF16_SURROGATE_LOW_FIRST || low > UTF16_SURROGATE_LOW_LAST) return -1; chr = UTF16_SURROGATE_BASE + (((high & UTF16_SURROGATE_MASK) << UTF16_SURROGATE_SHIFT) | (low & UTF16_SURROGATE_MASK)); uni_ucs4_to_utf8_c(chr, dest); return 0; } static int mbase64_decode_to_utf8(string_t *dest, const char **_src) { const char *src = *_src; unsigned char input[4], output[4]; unsigned int outstart = 0, outpos = 0; while (*src != '-') { input[0] = imap_b64dec[(uint8_t)src[0]]; if (input[0] == 0xff) return -1; input[1] = imap_b64dec[(uint8_t)src[1]]; if (input[1] == 0xff) return -1; output[outpos % 4] = (input[0] << 2) | (input[1] >> 4); if (++outpos % 4 == outstart) { if (utf16buf_to_utf8(dest, output, &outstart, 4) < 0) return -1; } input[2] = imap_b64dec[(uint8_t)src[2]]; if (input[2] == 0xff) { if (src[2] != '-') return -1; src += 2; break; } output[outpos % 4] = ((input[1] << 4) & 0xff) | (input[2] >> 2); if (++outpos % 4 == outstart) { if (utf16buf_to_utf8(dest, output, &outstart, 4) < 0) return -1; } input[3] = imap_b64dec[(uint8_t)src[3]]; if (input[3] == 0xff) { if (src[3] != '-') return -1; src += 3; break; } output[outpos % 4] = ((input[2] << 6) & 0xc0) | input[3]; if (++outpos % 4 == outstart) { if (utf16buf_to_utf8(dest, output, &outstart, 4) < 0) return -1; } src += 4; } if (outstart != outpos % 4) { if (utf16buf_to_utf8(dest, output, &outstart, (4 + outpos - outstart) % 4) < 0) return -1; } /* Found the ending '-'. Make sure it's not followed by unnecessary shift. Note that '&' is always escaped as "&-" so it's not an unnecessary shift. */ if (src[1] == '&' && src[2] != '-') return -1; *_src = src + 1; return 0; } static int imap_utf7_to_utf8_int(const char *src, const char *escape_chars, string_t *dest) { const char *p; for (p = src; *p != '\0'; p++) { if (*p < 0x20 || *p >= 0x7f) { if (escape_chars[0] == '\0') return -1; break; } if (*p == '&' || strchr(escape_chars, *p) != NULL) break; } if (*p == '\0') { /* no IMAP-UTF-7 encoded characters */ str_append(dest, src); return 0; } /* at least one encoded character */ str_append_data(dest, src, p-src); while (*p != '\0') { if (strchr(escape_chars, *p) != NULL || *p < 0x20 || *p >= 0x7f) { str_printfa(dest, "%c%02x", escape_chars[0], (unsigned char)*p); p++; } else if (*p == '&') { if (*++p == '-') { str_append_c(dest, '&'); p++; } else { size_t orig_size = str_len(dest); if (mbase64_decode_to_utf8(dest, &p) < 0) { if (escape_chars[0] == '\0') return -1; str_truncate(dest, orig_size); str_printfa(dest, "%c26", escape_chars[0]); } } } else { str_append_c(dest, *p++); } } return 0; } int imap_utf7_to_utf8(const char *src, string_t *dest) { return imap_utf7_to_utf8_int(src, "", dest); } void imap_utf7_to_utf8_escaped(const char *src, const char *escape_chars, string_t *dest) { i_assert(escape_chars[0] != '&'); if (imap_utf7_to_utf8_int(src, escape_chars, dest) < 0) i_unreached(); } bool imap_utf7_is_valid(const char *src) { const char *p; int ret; for (p = src; *p != '\0'; p++) { if (*p < 0x20 || *p >= 0x7f) return FALSE; if (*p == '&') { /* slow scan */ T_BEGIN { string_t *tmp = t_str_new(128); ret = imap_utf7_to_utf8(p, tmp); } T_END; if (ret < 0) return FALSE; } } return TRUE; }