diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:16:13 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:16:13 +0000 |
commit | e90fcc54809db2591dc083f43ef54c6ec8c60847 (patch) | |
tree | f20bc206c3c2d5d59d37c46c5cf5d53a20642556 /src/rfc2047.c | |
parent | Initial commit. (diff) | |
download | exim4-e90fcc54809db2591dc083f43ef54c6ec8c60847.tar.xz exim4-e90fcc54809db2591dc083f43ef54c6ec8c60847.zip |
Adding upstream version 4.96.upstream/4.96upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rfc2047.c')
-rw-r--r-- | src/rfc2047.c | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/src/rfc2047.c b/src/rfc2047.c new file mode 100644 index 0000000..1ed1dd8 --- /dev/null +++ b/src/rfc2047.c @@ -0,0 +1,345 @@ +/************************************************* +* Exim - an Internet mail transport agent * +*************************************************/ + +/* Copyright (c) The Exim Maintainers 2020 - 2022 */ +/* Copyright (c) University of Cambridge 1995 - 2018 */ +/* See the file NOTICE for conditions of use and distribution. */ + +/* This file contains a function for decoding message header lines that may +contain encoded "words" according to the rules described in + + RFC-2047 at http://www.ietf.org/rfc/rfc2047.txt + +The function is a rewritten version of code created by Norihisa Washitake. +The original could be used both inside Exim (as part of a patch) or in a +freestanding form. The original contained some built-in code conversions; I +have chosen only to do code conversions if iconv() is supported by the OS. +Because there were quite a lot of hacks to be done, for a variety of reasons, +I rewrote the code. + +You can find the latest version of the original library at + + http://washitake.com/mail/exim/mime/ + +The code below is almost completely unlike the original. */ + + +#include "exim.h" + + +/************************************************* +* Do a QP conversion * +*************************************************/ + +/* This function decodes "quoted printable" into bytes. + +Arguments: + string the string that includes QP escapes + ptrptr where to return pointer to the decoded string + +Returns: the length of the decoded string, or -1 on failure +*/ + +static int +rfc2047_qpdecode(uschar *string, uschar **ptrptr) +{ +int len = 0; +uschar *ptr; + +ptr = *ptrptr = store_get(Ustrlen(string) + 1, string); /* No longer than this */ + +while (*string != 0) + { + int ch = *string++; + + if (ch == '_') *ptr++ = ' '; + else if (ch == '=') + { + int a = *string; + int b = (a == 0)? 0 : string[1]; + if (!isxdigit(a) || !isxdigit(b)) return -1; /* Bad QP string */ + *ptr++ = ((Ustrchr(hex_digits, tolower(a)) - hex_digits) << 4) + + Ustrchr(hex_digits, tolower(b)) - hex_digits; + string += 2; + } + else if (ch == ' ' || ch == '\t') return -1; /* Whitespace is illegal */ + else *ptr++ = ch; + + len++; + } + +*ptr = 0; +return len; +} + + + +/************************************************* +* Decode next MIME word * +*************************************************/ + +/* Scan a string to see if a MIME word exists; pass back the separator +points in the string. + +Arguments: + string subject string + lencheck TRUE to enforce maximum length check + q1ptr pass back address of first question mark + q2ptr pass back address of second question mark + endptr pass back address of final ?= + dlenptr pass back length of decoded string + dptrptr pass back pointer to decoded string + +Returns: address of =? or NULL if not present +*/ + +static uschar * +decode_mimeword(uschar *string, BOOL lencheck, uschar **q1ptr, uschar **q2ptr, + uschar **endptr, size_t *dlenptr, uschar **dptrptr) +{ +uschar *mimeword; +for (;; string = mimeword + 2) + { + int encoding; + int dlen = -1; + + if ((mimeword = Ustrstr(string, "=?")) == NULL || + (*q1ptr = Ustrchr(mimeword+2, '?')) == NULL || + (*q2ptr = Ustrchr(*q1ptr+1, '?')) == NULL || + (*endptr = Ustrstr(*q2ptr+1, "?=")) == NULL) return NULL; + + /* We have found =?xxx?xxx?xxx?= in the string. Optionally check the + length, and that the second field is just one character long. If not, + continue the loop to search again. We must start just after the initial =? + because we might have found =?xxx=?xxx?xxx?xxx?=. */ + + if ((lencheck && *endptr - mimeword > 73) || *q2ptr - *q1ptr != 2) continue; + + /* Get the encoding letter, and decode the data string. */ + + encoding = toupper((*q1ptr)[1]); + **endptr = 0; + if (encoding == 'B') + dlen = b64decode(*q2ptr+1, dptrptr); + else if (encoding == 'Q') + dlen = rfc2047_qpdecode(*q2ptr+1, dptrptr); + **endptr = '?'; /* restore */ + + /* If the decoding succeeded, we are done. Set the length of the decoded + string, and pass back the initial pointer. Otherwise, the loop continues. */ + + if (dlen >= 0) + { + *dlenptr = (size_t)dlen; + return mimeword; + } + } + +/* Control should never actually get here */ +} + + + +/************************************************* +* Decode and convert an RFC 2047 string * +*************************************************/ + +/* There are two functions defined here. The original one was rfc2047_decode() +and it was documented in the local_scan() interface. I needed to add an extra +argument for use by expand_string(), so I created rfc2047_decode2() for that +purpose. The original function became a stub that just supplies NULL for the +new argument (sizeptr). + +An RFC 2047-encoded string may contain one or more "words", each of the +form =?...?.?...?= with the first ... specifying the character code, the +second being Q (for quoted printable) or B for Base64 encoding. The third ... +is the actual data. + +This function first decodes each "word" into bytes from the Q or B encoding. +Then, if provided with the name of a charset encoding, and if iconv() is +available, it attempts to translate the result to the named character set. +If this fails, the binary string is returned with an error message. + +If a binary zero is encountered in the decoded string, it is replaced by the +contents of the zeroval argument. For use with Exim headers, the value must not +be 0 because they are handled as zero-terminated strings. When zeroval==0, +lenptr should not be NULL. + +Arguments: + string the subject string + lencheck TRUE to enforce maximum MIME word length + target the name of the target encoding for MIME words, or NULL for + no charset translation + zeroval the value to use for binary zero bytes + lenptr if not NULL, the length of the result is returned via + this variable + sizeptr if not NULL, the length of a new store block in which the + result is built is placed here; if no new store is obtained, + the value is not changed + error for error messages; NULL if no problem; this can be set + when the yield is non-NULL if there was a charset + translation problem + +Returns: the decoded, converted string, or NULL on error; if there are + no MIME words in the string, the original string is returned +*/ + +uschar * +rfc2047_decode2(uschar *string, BOOL lencheck, const uschar *target, + int zeroval, int *lenptr, int *sizeptr, uschar **error) +{ +int size = Ustrlen(string); +size_t dlen; +uschar *dptr; +gstring *yield; +uschar *mimeword, *q1, *q2, *endword; + +*error = NULL; +mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); + +if (!mimeword) + { + if (lenptr) *lenptr = size; + return string; + } + +/* Scan through the string, decoding MIME words and copying intermediate text, +building the result as we go. The result may be longer than the input if it is +translated into a multibyte code such as UTF-8. That's why we use the dynamic +string building code. */ + +yield = store_get(sizeof(gstring) + ++size, string); +yield->size = size; +yield->ptr = 0; +yield->s = US(yield + 1); + +while (mimeword) + { + + #if HAVE_ICONV + iconv_t icd = (iconv_t)(-1); + #endif + + if (mimeword != string) + yield = string_catn(yield, string, mimeword - string); +/*XXX that might have to convert an untainted string to a tainted one */ + + /* Do a charset translation if required. This is supported only on hosts + that have the iconv() function. Translation errors set error, but carry on, + using the untranslated data. If there is more than one error, the message + passed back refers to the final one. We use a loop to cater for the case + of long strings - the RFC puts limits on the length, but it's best to be + robust. */ + + #if HAVE_ICONV + *q1 = 0; + if (target && strcmpic(target, mimeword+2) != 0) + if ((icd = iconv_open(CS target, CS(mimeword+2))) == (iconv_t)-1) + *error = string_sprintf("iconv_open(\"%s\", \"%s\") failed: %s%s", + target, mimeword+2, strerror(errno), + (errno == EINVAL)? " (maybe unsupported conversion)" : ""); + *q1 = '?'; + #endif + + while (dlen > 0) + { + uschar *tptr = NULL; /* Stops compiler warning */ + int tlen = -1; + + #if HAVE_ICONV + uschar tbuffer[256]; + uschar *outptr = tbuffer; + size_t outleft = sizeof(tbuffer); + + /* If translation is required, go for it. */ + + if (icd != (iconv_t)(-1)) + { + (void)iconv(icd, (ICONV_ARG2_TYPE)(&dptr), &dlen, CSS &outptr, &outleft); + + /* If outptr has been adjusted, there is some output. Set up to add it to + the output buffer. The function will have adjusted dptr and dlen. If + iconv() stopped because of an error, we'll pick it up next time when + there's no output. + + If there is no output, we expect there to have been a translation + error, because we know there was at least one input byte. We leave the + value of tlen as -1, which causes the rest of the input to be copied + verbatim. */ + + if (outptr > tbuffer) + { + tptr = tbuffer; + tlen = outptr - tbuffer; + } + else + { + DEBUG(D_any) debug_printf("iconv error translating \"%.*s\" to %s: " + "%s\n", (int)(endword + 2 - mimeword), mimeword, target, strerror(errno)); + } + } + + #endif + + /* No charset translation is happening or there was a translation error; + just set up the original as the string to be added, and mark it all used. + */ + + if (tlen == -1) + { + tptr = dptr; + tlen = dlen; + dlen = 0; + } + + /* Deal with zero values; convert them if requested. */ + + if (zeroval != 0) + for (int i = 0; i < tlen; i++) + if (tptr[i] == 0) tptr[i] = zeroval; + + /* Add the new string onto the result */ + + yield = string_catn(yield, tptr, tlen); + } + + #if HAVE_ICONV + if (icd != (iconv_t)(-1)) iconv_close(icd); + #endif + + /* Update string past the MIME word; skip any white space if the next thing + is another MIME word. */ + + string = endword + 2; + mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); + if (mimeword) + { + uschar *s = string; + while (isspace(*s)) s++; + if (s == mimeword) string = s; + } + } + +/* Copy the remaining characters of the string, zero-terminate it, and return +the length as well if requested. */ + +yield = string_cat(yield, string); + +if (lenptr) *lenptr = yield->ptr; +if (sizeptr) *sizeptr = yield->size; +return string_from_gstring(yield); +} + + +/* This is the stub that provides the original interface without the sizeptr +argument. */ + +uschar * +rfc2047_decode(uschar *string, BOOL lencheck, const uschar *target, int zeroval, + int *lenptr, uschar **error) +{ +return rfc2047_decode2(string, lencheck, target, zeroval, lenptr, NULL, error); +} + +/* End of rfc2047.c */ |