/*++ /* NAME /* uxtext 3 /* SUMMARY /* quote/unquote text, xtext style. /* SYNOPSIS /* #include /* /* VSTRING *uxtext_quote(quoted, unquoted, special) /* VSTRING *quoted; /* const char *unquoted; /* const char *special; /* /* VSTRING *uxtext_quote_append(unquoted, quoted, special) /* VSTRING *unquoted; /* const char *quoted; /* const char *special; /* /* VSTRING *uxtext_unquote(unquoted, quoted) /* VSTRING *unquoted; /* const char *quoted; /* /* VSTRING *uxtext_unquote_append(unquoted, quoted) /* VSTRING *unquoted; /* const char *quoted; /* DESCRIPTION /* uxtext_quote() takes a null-terminated UTF8 string and /* replaces characters \, <33(10) and >126(10), as well as /* characters specified with "special" with \x{XX}, XX being /* a 2-6-digit uppercase hexadecimal equivalent. /* /* uxtext_quote_append() is like uxtext_quote(), but appends /* the conversion result to the result buffer. /* /* uxtext_unquote() performs the opposite transformation. This /* function understands lowercase, uppercase, and mixed case /* \x{XX...} sequences. The result value is the unquoted /* argument in case of success, a null pointer otherwise. /* /* uxtext_unquote_append() is like uxtext_unquote(), but appends /* the conversion result to the result buffer. /* BUGS /* This module cannot process null characters in data. /* LICENSE /* .ad /* .fi /* The Secure Mailer license must be distributed with this software. /* AUTHOR(S) /* Arnt Gulbrandsen /* /* Wietse Venema /* IBM T.J. Watson Research /* P.O. Box 704 /* Yorktown Heights, NY 10598, USA /* /* Wietse Venema /* Google, Inc. /* 111 8th Avenue /* New York, NY 10011, USA /*--*/ /* System library. */ #include #include #include /* Utility library. */ #include "msg.h" #include "vstring.h" #include "uxtext.h" /* Application-specific. */ #define STR(x) vstring_str(x) #define LEN(x) VSTRING_LEN(x) /* uxtext_quote_append - append unquoted data to quoted data */ VSTRING *uxtext_quote_append(VSTRING *quoted, const char *unquoted, const char *special) { unsigned const char *cp; int ch; for (cp = (unsigned const char *) unquoted; (ch = *cp) != 0; cp++) { /* Fix 20140709: the '\' character must always be quoted. */ if (ch != '\\' && ch > 32 && ch < 127 && (*special == 0 || strchr(special, ch) == 0)) { VSTRING_ADDCH(quoted, ch); } else { /* * had RFC6533 been written like 6531 and 6532, this else clause * would be one line long. */ int unicode = 0; int pick = 0; if (ch < 0x80) { //0000 0000 - 0000 007 F 0x xxxxxx unicode = ch; } else if ((ch & 0xe0) == 0xc0) { //0000 0080 - 0000 07 FF 110 xxxxx 10 xxxxxx unicode = (ch & 0x1f); pick = 1; } else if ((ch & 0xf0) == 0xe0) { //0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx unicode = (ch & 0x0f); pick = 2; } else if ((ch & 0xf8) == 0xf0) { //0001 0000 - 001 F FFFF 11110 xxx 10 xxxxxx 10 xxxxxx 10 xxxxxx unicode = (ch & 0x07); pick = 3; } else if ((ch & 0xfc) == 0xf8) { //0020 0000 - 03 FF FFFF 111110 xx 10 xxxxxx 10 xxxxxx...10 xxxxxx unicode = (ch & 0x03); pick = 4; } else if ((ch & 0xfe) == 0xfc) { //0400 0000 - 7 FFF FFFF 1111110 x 10 xxxxxx...10 xxxxxx unicode = (ch & 0x01); pick = 5; } else { return (0); } while (pick > 0) { ch = *++cp; if ((ch & 0xc0) != 0x80) return (0); unicode = unicode << 6 | (ch & 0x3f); pick--; } vstring_sprintf_append(quoted, "\\x{%02X}", unicode); } } VSTRING_TERMINATE(quoted); return (quoted); } /* uxtext_quote - unquoted data to quoted */ VSTRING *uxtext_quote(VSTRING *quoted, const char *unquoted, const char *special) { VSTRING_RESET(quoted); uxtext_quote_append(quoted, unquoted, special); return (quoted); } /* uxtext_unquote_append - quoted data to unquoted */ VSTRING *uxtext_unquote_append(VSTRING *unquoted, const char *quoted) { const unsigned char *cp; int ch; for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++) { if (ch == '\\' && cp[1] == 'x' && cp[2] == '{') { int unicode = 0; cp += 2; while ((ch = *++cp) != '}') { if (ISDIGIT(ch)) unicode = (unicode << 4) + (ch - '0'); else if (ch >= 'a' && ch <= 'f') unicode = (unicode << 4) + (ch - 'a' + 10); else if (ch >= 'A' && ch <= 'F') unicode = (unicode << 4) + (ch - 'A' + 10); else return (0); /* also covers the null * terminator */ if (unicode > 0x10ffff) return (0); } /* * the following block is from * https://github.com/aox/aox/blob/master/encodings/utf.cpp, with * permission by the authors. */ if (unicode < 0x80) { VSTRING_ADDCH(unquoted, (char) unicode); } else if (unicode < 0x800) { VSTRING_ADDCH(unquoted, 0xc0 | ((char) (unicode >> 6))); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); } else if (unicode < 0x10000) { VSTRING_ADDCH(unquoted, 0xe0 | ((char) (unicode >> 12))); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); } else if (unicode < 0x200000) { VSTRING_ADDCH(unquoted, 0xf0 | ((char) (unicode >> 18))); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); } else if (unicode < 0x4000000) { VSTRING_ADDCH(unquoted, 0xf8 | ((char) (unicode >> 24))); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); } else { VSTRING_ADDCH(unquoted, 0xfc | ((char) (unicode >> 30))); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 24) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); } } else { VSTRING_ADDCH(unquoted, ch); } } VSTRING_TERMINATE(unquoted); return (unquoted); } /* uxtext_unquote - quoted data to unquoted */ VSTRING *uxtext_unquote(VSTRING *unquoted, const char *quoted) { VSTRING_RESET(unquoted); return (uxtext_unquote_append(unquoted, quoted) ? unquoted : 0); } #ifdef TEST /* * Proof-of-concept test program: convert to quoted and back. */ #include #define BUFLEN 1024 static ssize_t read_buf(VSTREAM *fp, VSTRING *buf) { ssize_t len; len = vstream_fread_buf(fp, buf, BUFLEN); VSTRING_TERMINATE(buf); return (len); } int main(int unused_argc, char **unused_argv) { VSTRING *unquoted = vstring_alloc(BUFLEN); VSTRING *quoted = vstring_alloc(100); ssize_t len; /* * Negative tests. */ if (uxtext_unquote(unquoted, "\\x{x1}") != 0) msg_warn("undetected error pattern 1"); if (uxtext_unquote(unquoted, "\\x{2x}") != 0) msg_warn("undetected error pattern 2"); if (uxtext_unquote(unquoted, "\\x{33") != 0) msg_warn("undetected error pattern 3"); /* * Positive tests. */ while ((len = read_buf(VSTREAM_IN, unquoted)) > 0) { uxtext_quote(quoted, STR(unquoted), "+="); if (uxtext_unquote(unquoted, STR(quoted)) == 0) msg_fatal("bad input: %.100s", STR(quoted)); if (LEN(unquoted) != len) msg_fatal("len %ld != unquoted len %ld", (long) len, (long) LEN(unquoted)); if (vstream_fwrite(VSTREAM_OUT, STR(unquoted), LEN(unquoted)) != LEN(unquoted)) msg_fatal("write error: %m"); } vstream_fflush(VSTREAM_OUT); vstring_free(unquoted); vstring_free(quoted); return (0); } #endif