diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 16:51:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 16:51:28 +0000 |
commit | 940b4d1848e8c70ab7642901a68594e8016caffc (patch) | |
tree | eb72f344ee6c3d9b80a7ecc079ea79e9fba8676d /tools/source/inet | |
parent | Initial commit. (diff) | |
download | libreoffice-upstream.tar.xz libreoffice-upstream.zip |
Adding upstream version 1:7.0.4.upstream/1%7.0.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | tools/source/inet/inetmime.cxx | 1410 | ||||
-rw-r--r-- | tools/source/inet/inetmsg.cxx | 289 | ||||
-rw-r--r-- | tools/source/inet/inetstrm.cxx | 297 |
3 files changed, 1996 insertions, 0 deletions
diff --git a/tools/source/inet/inetmime.cxx b/tools/source/inet/inetmime.cxx new file mode 100644 index 000000000..fd00fe3a4 --- /dev/null +++ b/tools/source/inet/inetmime.cxx @@ -0,0 +1,1410 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <algorithm> +#include <limits> +#include <forward_list> +#include <memory> + +#include <sal/log.hxx> +#include <rtl/ustring.hxx> +#include <rtl/strbuf.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/tencinfo.h> +#include <tools/inetmime.hxx> +#include <rtl/character.hxx> + +namespace { + +rtl_TextEncoding getCharsetEncoding(const char * pBegin, + const char * pEnd); + +/** Check for US-ASCII white space character. + + @param nChar Some UCS-4 character. + + @return True if nChar is a US-ASCII white space character (US-ASCII + 0x09 or 0x20). + */ +bool isWhiteSpace(sal_uInt32 nChar) +{ + return nChar == '\t' || nChar == ' '; +} + +/** Get the Base 64 digit weight of a US-ASCII character. + + @param nChar Some UCS-4 character. + + @return If nChar is a US-ASCII Base 64 digit character (US-ASCII + 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the + corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding + character (US-ASCII '='), return -1; otherwise, return -2. + */ +int getBase64Weight(sal_uInt32 nChar) +{ + return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') : + rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) : + rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) : + nChar == '+' ? 62 : + nChar == '/' ? 63 : + nChar == '=' ? -1 : -2; +} + +bool startsWithLineFolding(const sal_Unicode * pBegin, + const sal_Unicode * pEnd) +{ + DBG_ASSERT(pBegin && pBegin <= pEnd, + "startsWithLineFolding(): Bad sequence"); + + return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A + && isWhiteSpace(pBegin[2]); // CR, LF +} + +rtl_TextEncoding translateFromMIME(rtl_TextEncoding + eEncoding) +{ +#if defined(_WIN32) + return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ? + RTL_TEXTENCODING_MS_1252 : eEncoding; +#else + return eEncoding; +#endif +} + +bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding) +{ + return rtl_isOctetTextEncoding(eEncoding); +} + +std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin, + const char * pEnd, + rtl_TextEncoding eEncoding, + sal_Size & rSize) +{ + if (eEncoding == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + rtl_TextToUnicodeConverter hConverter + = rtl_createTextToUnicodeConverter(eEncoding); + rtl_TextToUnicodeContext hContext + = rtl_createTextToUnicodeContext(hConverter); + std::unique_ptr<sal_Unicode[]> pBuffer; + sal_uInt32 nInfo; + for (sal_Size nBufferSize = pEnd - pBegin;; + nBufferSize += nBufferSize / 3 + 1) + { + pBuffer.reset(new sal_Unicode[nBufferSize]); + sal_Size nSrcCvtBytes; + rSize = rtl_convertTextToUnicode( + hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(), + nBufferSize, + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, + &nInfo, &nSrcCvtBytes); + if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL) + break; + pBuffer.reset(); + rtl_resetTextToUnicodeContext(hConverter, hContext); + } + rtl_destroyTextToUnicodeContext(hConverter, hContext); + rtl_destroyTextToUnicodeConverter(hConverter); + if (nInfo != 0) + { + pBuffer.reset(); + } + return pBuffer; +} + +/** Put the UTF-16 encoding of a UTF-32 character into a buffer. + + @param pBuffer Points to a buffer, must not be null. + + @param nUTF32 A UTF-32 character, must be in the range 0..0x10FFFF. + + @return A pointer past the UTF-16 characters put into the buffer + (i.e., pBuffer + 1 or pBuffer + 2). + */ +sal_Unicode * putUTF32Character(sal_Unicode * pBuffer, + sal_uInt32 nUTF32) +{ + DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char"); + if (nUTF32 < 0x10000) + *pBuffer++ = sal_Unicode(nUTF32); + else + { + nUTF32 -= 0x10000; + *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10)); + *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF)); + } + return pBuffer; +} + +void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar) +{ + // See RFC 2279 for a discussion of UTF-8. + DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char"); + + if (nChar < 0x80) + rSink.append(char(nChar)); + else if (nChar < 0x800) + rSink.append(char(nChar >> 6 | 0xC0)) + .append(char((nChar & 0x3F) | 0x80)); + else if (nChar < 0x10000) + rSink.append(char(nChar >> 12 | 0xE0)) + .append(char((nChar >> 6 & 0x3F) | 0x80)) + .append(char((nChar & 0x3F) | 0x80)); + else if (nChar < 0x200000) + rSink.append(char(nChar >> 18 | 0xF0)) + .append(char((nChar >> 12 & 0x3F) | 0x80)) + .append(char((nChar >> 6 & 0x3F) | 0x80)) + .append(char((nChar & 0x3F) | 0x80)); + else if (nChar < 0x4000000) + rSink.append(char(nChar >> 24 | 0xF8)) + .append(char((nChar >> 18 & 0x3F) | 0x80)) + .append(char((nChar >> 12 & 0x3F) | 0x80)) + .append(char((nChar >> 6 & 0x3F) | 0x80)) + .append(char((nChar & 0x3F) | 0x80)); + else + rSink.append(char(nChar >> 30 | 0xFC)) + .append(char((nChar >> 24 & 0x3F) | 0x80)) + .append(char((nChar >> 18 & 0x3F) | 0x80)) + .append(char((nChar >> 12 & 0x3F) | 0x80)) + .append(char((nChar >> 6 & 0x3F) | 0x80)) + .append(char((nChar & 0x3F) | 0x80)); +} + +bool translateUTF8Char(const char *& rBegin, + const char * pEnd, + sal_uInt32 & rCharacter) +{ + if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80 + || static_cast< unsigned char >(*rBegin) >= 0xFE) + return false; + + int nCount; + sal_uInt32 nMin; + sal_uInt32 nUCS4; + const char * p = rBegin; + if (static_cast< unsigned char >(*p) < 0xE0) + { + nCount = 1; + nMin = 0x80; + nUCS4 = static_cast< unsigned char >(*p) & 0x1F; + } + else if (static_cast< unsigned char >(*p) < 0xF0) + { + nCount = 2; + nMin = 0x800; + nUCS4 = static_cast< unsigned char >(*p) & 0xF; + } + else if (static_cast< unsigned char >(*p) < 0xF8) + { + nCount = 3; + nMin = 0x10000; + nUCS4 = static_cast< unsigned char >(*p) & 7; + } + else if (static_cast< unsigned char >(*p) < 0xFC) + { + nCount = 4; + nMin = 0x200000; + nUCS4 = static_cast< unsigned char >(*p) & 3; + } + else + { + nCount = 5; + nMin = 0x4000000; + nUCS4 = static_cast< unsigned char >(*p) & 1; + } + ++p; + + for (; nCount-- > 0; ++p) + if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80) + nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F); + else + return false; + + if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin) + return false; + + rCharacter = nUCS4; + rBegin = p; + return true; +} + +void appendISO88591(OUStringBuffer & rText, char const * pBegin, + char const * pEnd); + +struct Parameter +{ + OString m_aAttribute; + OString m_aCharset; + OString m_aLanguage; + OString m_aValue; + sal_uInt32 m_nSection; + bool m_bExtended; + + bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort + { + int nComp = m_aAttribute.compareTo(rhs.m_aAttribute); + return nComp < 0 || + (nComp == 0 && m_nSection < rhs.m_nSection); + } + struct IsSameSection // is used to check container for duplicates with std::any_of + { + const OString& rAttribute; + const sal_uInt32 nSection; + bool operator()(const Parameter& r) const + { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; } + }; +}; + +typedef std::forward_list<Parameter> ParameterList; + +bool parseParameters(ParameterList const & rInput, + INetContentTypeParameterList * pOutput); + +// appendISO88591 + +void appendISO88591(OUStringBuffer & rText, char const * pBegin, + char const * pEnd) +{ + sal_Int32 nLength = pEnd - pBegin; + std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]); + for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;) + *p++ = static_cast<unsigned char>(*pBegin++); + rText.append(pBuffer.get(), nLength); +} + +// parseParameters + +bool parseParameters(ParameterList const & rInput, + INetContentTypeParameterList * pOutput) +{ + if (pOutput) + pOutput->clear(); + + for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++) + { + if (it->m_nSection > 0 + && (itPrev == rInput.end() + || itPrev->m_nSection != it->m_nSection - 1 + || itPrev->m_aAttribute != it->m_aAttribute)) + return false; + } + + if (pOutput) + for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext) + { + bool bCharset = !it->m_aCharset.isEmpty(); + rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW; + if (bCharset) + eEncoding + = getCharsetEncoding(it->m_aCharset.getStr(), + it->m_aCharset.getStr() + + it->m_aCharset.getLength()); + OUStringBuffer aValue(64); + bool bBadEncoding = false; + itNext = it; + do + { + sal_Size nSize; + std::unique_ptr<sal_Unicode[]> pUnicode + = convertToUnicode(itNext->m_aValue.getStr(), + itNext->m_aValue.getStr() + + itNext->m_aValue.getLength(), + bCharset && it->m_bExtended ? + eEncoding : + RTL_TEXTENCODING_UTF8, + nSize); + if (!pUnicode && !(bCharset && it->m_bExtended)) + pUnicode = convertToUnicode( + itNext->m_aValue.getStr(), + itNext->m_aValue.getStr() + + itNext->m_aValue.getLength(), + RTL_TEXTENCODING_ISO_8859_1, nSize); + if (!pUnicode) + { + bBadEncoding = true; + break; + } + aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize)); + ++itNext; + } + while (itNext != rInput.end() && itNext->m_nSection != 0); + + if (bBadEncoding) + { + aValue.setLength(0); + itNext = it; + do + { + if (itNext->m_bExtended) + { + for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i) + aValue.append( + static_cast<sal_Unicode>( + static_cast<unsigned char>(itNext->m_aValue[i]) + | 0xF800)); // map to unicode corporate use sub area + } + else + { + for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i) + aValue.append( static_cast<char>(itNext->m_aValue[i]) ); + } + ++itNext; + } + while (itNext != rInput.end() && itNext->m_nSection != 0); + } + auto const ret = pOutput->insert( + {it->m_aAttribute, + {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}}); + SAL_INFO_IF(!ret.second, "tools", + "INetMIME: dropping duplicate parameter: " << it->m_aAttribute); + } + return true; +} + +/** Check whether some character is valid within an RFC 2045 <token>. + + @param nChar Some UCS-4 character. + + @return True if nChar is valid within an RFC 2047 <token> (US-ASCII + 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+', + '-', '.', '^', '_', '`', '{', '|', '}', or '~'). + */ +bool isTokenChar(sal_uInt32 nChar) +{ + static const bool aMap[128] + = { false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, true, false, true, true, true, true, true, // !"#$%&' + false, false, true, true, false, true, true, false, //()*+,-./ + true, true, true, true, true, true, true, true, //01234567 + true, true, false, false, false, false, false, false, //89:;<=>? + false, true, true, true, true, true, true, true, //@ABCDEFG + true, true, true, true, true, true, true, true, //HIJKLMNO + true, true, true, true, true, true, true, true, //PQRSTUVW + true, true, true, false, false, false, true, true, //XYZ[\]^_ + true, true, true, true, true, true, true, true, //`abcdefg + true, true, true, true, true, true, true, true, //hijklmno + true, true, true, true, true, true, true, true, //pqrstuvw + true, true, true, true, true, true, true, false //xyz{|}~ + }; + return rtl::isAscii(nChar) && aMap[nChar]; +} + +const sal_Unicode * skipComment(const sal_Unicode * pBegin, + const sal_Unicode * pEnd) +{ + DBG_ASSERT(pBegin && pBegin <= pEnd, + "skipComment(): Bad sequence"); + + if (pBegin != pEnd && *pBegin == '(') + { + sal_uInt32 nLevel = 0; + for (const sal_Unicode * p = pBegin; p != pEnd;) + switch (*p++) + { + case '(': + ++nLevel; + break; + + case ')': + if (--nLevel == 0) + return p; + break; + + case '\\': + if (p != pEnd) + ++p; + break; + } + } + return pBegin; +} + +const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode * + pBegin, + const sal_Unicode * + pEnd) +{ + DBG_ASSERT(pBegin && pBegin <= pEnd, + "skipLinearWhiteSpaceComment(): Bad sequence"); + + while (pBegin != pEnd) + switch (*pBegin) + { + case '\t': + case ' ': + ++pBegin; + break; + + case 0x0D: // CR + if (startsWithLineFolding(pBegin, pEnd)) + pBegin += 3; + else + return pBegin; + break; + + case '(': + { + const sal_Unicode * p = skipComment(pBegin, pEnd); + if (p == pBegin) + return pBegin; + pBegin = p; + break; + } + + default: + return pBegin; + } + return pBegin; +} + +const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin, + const sal_Unicode * pEnd) +{ + DBG_ASSERT(pBegin && pBegin <= pEnd, + "skipQuotedString(): Bad sequence"); + + if (pBegin != pEnd && *pBegin == '"') + for (const sal_Unicode * p = pBegin + 1; p != pEnd;) + switch (*p++) + { + case 0x0D: // CR + if (pEnd - p < 2 || *p++ != 0x0A // LF + || !isWhiteSpace(*p++)) + return pBegin; + break; + + case '"': + return p; + + case '\\': + if (p != pEnd) + ++p; + break; + } + return pBegin; +} + +sal_Unicode const * scanParameters(sal_Unicode const * pBegin, + sal_Unicode const * pEnd, + INetContentTypeParameterList * + pParameters) +{ + ParameterList aList; + sal_Unicode const * pParameterBegin = pBegin; + for (sal_Unicode const * p = pParameterBegin;;) + { + pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd); + if (pParameterBegin == pEnd || *pParameterBegin != ';') + break; + p = pParameterBegin + 1; + + sal_Unicode const * pAttributeBegin + = skipLinearWhiteSpaceComment(p, pEnd); + p = pAttributeBegin; + bool bDowncaseAttribute = false; + while (p != pEnd && isTokenChar(*p) && *p != '*') + { + bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p); + ++p; + } + if (p == pAttributeBegin) + break; + OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US); + if (bDowncaseAttribute) + aAttribute = aAttribute.toAsciiLowerCase(); + + sal_uInt32 nSection = 0; + if (p != pEnd && *p == '*') + { + ++p; + if (p != pEnd && rtl::isAsciiDigit(*p) + && !INetMIME::scanUnsigned(p, pEnd, false, nSection)) + break; + } + + bool bPresent = std::any_of(aList.begin(), aList.end(), + Parameter::IsSameSection{aAttribute, nSection}); + if (bPresent) + break; + + bool bExtended = false; + if (p != pEnd && *p == '*') + { + ++p; + bExtended = true; + } + + p = skipLinearWhiteSpaceComment(p, pEnd); + + if (p == pEnd || *p != '=') + break; + + p = skipLinearWhiteSpaceComment(p + 1, pEnd); + + OString aCharset; + OString aLanguage; + OString aValue; + if (bExtended) + { + if (nSection == 0) + { + sal_Unicode const * pCharsetBegin = p; + bool bDowncaseCharset = false; + while (p != pEnd && isTokenChar(*p) && *p != '\'') + { + bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p); + ++p; + } + if (p == pCharsetBegin) + break; + if (pParameters) + { + aCharset = OString( + pCharsetBegin, + p - pCharsetBegin, + RTL_TEXTENCODING_ASCII_US); + if (bDowncaseCharset) + aCharset = aCharset.toAsciiLowerCase(); + } + + if (p == pEnd || *p != '\'') + break; + ++p; + + sal_Unicode const * pLanguageBegin = p; + bool bDowncaseLanguage = false; + int nLetters = 0; + for (; p != pEnd; ++p) + if (rtl::isAsciiAlpha(*p)) + { + if (++nLetters > 8) + break; + bDowncaseLanguage = bDowncaseLanguage + || rtl::isAsciiUpperCase(*p); + } + else if (*p == '-') + { + if (nLetters == 0) + break; + nLetters = 0; + } + else + break; + if (nLetters == 0 || nLetters > 8) + break; + if (pParameters) + { + aLanguage = OString( + pLanguageBegin, + p - pLanguageBegin, + RTL_TEXTENCODING_ASCII_US); + if (bDowncaseLanguage) + aLanguage = aLanguage.toAsciiLowerCase(); + } + + if (p == pEnd || *p != '\'') + break; + ++p; + } + if (pParameters) + { + OStringBuffer aSink; + while (p != pEnd) + { + auto q = p; + sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd); + if (rtl::isAscii(nChar) && !isTokenChar(nChar)) + break; + p = q; + if (nChar == '%' && p + 1 < pEnd) + { + int nWeight1 = INetMIME::getHexWeight(p[0]); + int nWeight2 = INetMIME::getHexWeight(p[1]); + if (nWeight1 >= 0 && nWeight2 >= 0) + { + aSink.append(char(nWeight1 << 4 | nWeight2)); + p += 2; + continue; + } + } + writeUTF8(aSink, nChar); + } + aValue = aSink.makeStringAndClear(); + } + else + while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p))) + ++p; + } + else if (p != pEnd && *p == '"') + if (pParameters) + { + OStringBuffer aSink(256); + bool bInvalid = false; + for (++p;;) + { + if (p == pEnd) + { + bInvalid = true; + break; + } + sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd); + if (nChar == '"') + break; + else if (nChar == 0x0D) // CR + { + if (pEnd - p < 2 || *p++ != 0x0A // LF + || !isWhiteSpace(*p)) + { + bInvalid = true; + break; + } + nChar = static_cast<unsigned char>(*p++); + } + else if (nChar == '\\') + { + if (p == pEnd) + { + bInvalid = true; + break; + } + nChar = INetMIME::getUTF32Character(p, pEnd); + } + writeUTF8(aSink, nChar); + } + if (bInvalid) + break; + aValue = aSink.makeStringAndClear(); + } + else + { + sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd); + if (p == pStringEnd) + break; + p = pStringEnd; + } + else + { + sal_Unicode const * pTokenBegin = p; + while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p))) + ++p; + if (p == pTokenBegin) + break; + if (pParameters) + aValue = OString( + pTokenBegin, p - pTokenBegin, + RTL_TEXTENCODING_UTF8); + } + aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended}); + } + aList.sort(); + return parseParameters(aList, pParameters) ? pParameterBegin : pBegin; +} + +bool equalIgnoreCase(const char * pBegin1, + const char * pEnd1, + const char * pString2) +{ + DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2, + "equalIgnoreCase(): Bad sequences"); + + while (*pString2 != 0) + if (pBegin1 == pEnd1 + || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++)) + != rtl::toAsciiUpperCase( + static_cast<unsigned char>(*pString2++)))) + return false; + return pBegin1 == pEnd1; +} + +struct EncodingEntry +{ + char const * m_aName; + rtl_TextEncoding m_eEncoding; +}; + +// The source for the following table is <ftp://ftp.iana.org/in-notes/iana/ +// assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise +// noted: +static EncodingEntry const aEncodingMap[] + = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, + { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US }, + { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US }, + { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US }, + { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US }, + { "ASCII", RTL_TEXTENCODING_ASCII_US }, + { "ISO646-US", RTL_TEXTENCODING_ASCII_US }, + { "US", RTL_TEXTENCODING_ASCII_US }, + { "IBM367", RTL_TEXTENCODING_ASCII_US }, + { "CP367", RTL_TEXTENCODING_ASCII_US }, + { "CSASCII", RTL_TEXTENCODING_ASCII_US }, + { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 }, + { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 }, + { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 }, + { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 }, + { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 }, + { "L1", RTL_TEXTENCODING_ISO_8859_1 }, + { "IBM819", RTL_TEXTENCODING_ISO_8859_1 }, + { "CP819", RTL_TEXTENCODING_ISO_8859_1 }, + { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 }, + { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 }, + { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 }, + { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 }, + { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 }, + { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 }, + { "L2", RTL_TEXTENCODING_ISO_8859_2 }, + { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 }, + { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 }, + { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 }, + { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 }, + { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 }, + { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 }, + { "L3", RTL_TEXTENCODING_ISO_8859_3 }, + { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 }, + { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 }, + { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 }, + { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 }, + { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 }, + { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 }, + { "L4", RTL_TEXTENCODING_ISO_8859_4 }, + { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 }, + { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 }, + { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 }, + { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 }, + { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 }, + { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 }, + { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 }, + { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 }, + { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 }, + { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 }, + { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 }, + { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 }, + { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 }, + { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 }, + { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 }, + { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 }, + { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 }, + { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 }, + { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 }, + { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 }, + { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 }, + { "GREEK", RTL_TEXTENCODING_ISO_8859_7 }, + { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 }, + { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 }, + { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 }, + { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 }, + { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 }, + { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 }, + { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 }, + { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 }, + { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 }, + { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 }, + { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 }, + { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 }, + { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 }, + { "L5", RTL_TEXTENCODING_ISO_8859_9 }, + { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 }, + { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047 + { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 }, + { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047 + { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, + { "MAC", RTL_TEXTENCODING_APPLE_ROMAN }, + { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, + { "IBM437", RTL_TEXTENCODING_IBM_437 }, + { "CP437", RTL_TEXTENCODING_IBM_437 }, + { "437", RTL_TEXTENCODING_IBM_437 }, + { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 }, + { "IBM850", RTL_TEXTENCODING_IBM_850 }, + { "CP850", RTL_TEXTENCODING_IBM_850 }, + { "850", RTL_TEXTENCODING_IBM_850 }, + { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 }, + { "IBM860", RTL_TEXTENCODING_IBM_860 }, + { "CP860", RTL_TEXTENCODING_IBM_860 }, + { "860", RTL_TEXTENCODING_IBM_860 }, + { "CSIBM860", RTL_TEXTENCODING_IBM_860 }, + { "IBM861", RTL_TEXTENCODING_IBM_861 }, + { "CP861", RTL_TEXTENCODING_IBM_861 }, + { "861", RTL_TEXTENCODING_IBM_861 }, + { "CP-IS", RTL_TEXTENCODING_IBM_861 }, + { "CSIBM861", RTL_TEXTENCODING_IBM_861 }, + { "IBM863", RTL_TEXTENCODING_IBM_863 }, + { "CP863", RTL_TEXTENCODING_IBM_863 }, + { "863", RTL_TEXTENCODING_IBM_863 }, + { "CSIBM863", RTL_TEXTENCODING_IBM_863 }, + { "IBM865", RTL_TEXTENCODING_IBM_865 }, + { "CP865", RTL_TEXTENCODING_IBM_865 }, + { "865", RTL_TEXTENCODING_IBM_865 }, + { "CSIBM865", RTL_TEXTENCODING_IBM_865 }, + { "IBM775", RTL_TEXTENCODING_IBM_775 }, + { "CP775", RTL_TEXTENCODING_IBM_775 }, + { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 }, + { "IBM852", RTL_TEXTENCODING_IBM_852 }, + { "CP852", RTL_TEXTENCODING_IBM_852 }, + { "852", RTL_TEXTENCODING_IBM_852 }, + { "CSPCP852", RTL_TEXTENCODING_IBM_852 }, + { "IBM855", RTL_TEXTENCODING_IBM_855 }, + { "CP855", RTL_TEXTENCODING_IBM_855 }, + { "855", RTL_TEXTENCODING_IBM_855 }, + { "CSIBM855", RTL_TEXTENCODING_IBM_855 }, + { "IBM857", RTL_TEXTENCODING_IBM_857 }, + { "CP857", RTL_TEXTENCODING_IBM_857 }, + { "857", RTL_TEXTENCODING_IBM_857 }, + { "CSIBM857", RTL_TEXTENCODING_IBM_857 }, + { "IBM862", RTL_TEXTENCODING_IBM_862 }, + { "CP862", RTL_TEXTENCODING_IBM_862 }, + { "862", RTL_TEXTENCODING_IBM_862 }, + { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 }, + { "IBM864", RTL_TEXTENCODING_IBM_864 }, + { "CP864", RTL_TEXTENCODING_IBM_864 }, + { "CSIBM864", RTL_TEXTENCODING_IBM_864 }, + { "IBM866", RTL_TEXTENCODING_IBM_866 }, + { "CP866", RTL_TEXTENCODING_IBM_866 }, + { "866", RTL_TEXTENCODING_IBM_866 }, + { "CSIBM866", RTL_TEXTENCODING_IBM_866 }, + { "IBM869", RTL_TEXTENCODING_IBM_869 }, + { "CP869", RTL_TEXTENCODING_IBM_869 }, + { "869", RTL_TEXTENCODING_IBM_869 }, + { "CP-GR", RTL_TEXTENCODING_IBM_869 }, + { "CSIBM869", RTL_TEXTENCODING_IBM_869 }, + { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 }, + { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 }, + { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 }, + { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 }, + { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 }, + { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 }, + { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 }, + { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 }, + { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS }, + { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS }, + { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS }, + { "GB2312", RTL_TEXTENCODING_GB_2312 }, + { "CSGB2312", RTL_TEXTENCODING_GB_2312 }, + { "BIG5", RTL_TEXTENCODING_BIG5 }, + { "CSBIG5", RTL_TEXTENCODING_BIG5 }, + { "EUC-JP", RTL_TEXTENCODING_EUC_JP }, + { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE", + RTL_TEXTENCODING_EUC_JP }, + { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP }, + { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP }, + { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP }, + { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN }, + { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, + { "CSKOI8R", RTL_TEXTENCODING_KOI8_R }, + { "UTF-7", RTL_TEXTENCODING_UTF7 }, + { "UTF-8", RTL_TEXTENCODING_UTF8 }, + { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047 + { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047 + { "EUC-KR", RTL_TEXTENCODING_EUC_KR }, + { "CSEUCKR", RTL_TEXTENCODING_EUC_KR }, + { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR }, + { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR }, + { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 }, + { "CSUCS4", RTL_TEXTENCODING_UCS4 }, + { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 }, + { "CSUNICODE", RTL_TEXTENCODING_UCS2 } }; + +rtl_TextEncoding getCharsetEncoding(char const * pBegin, + char const * pEnd) +{ + for (const EncodingEntry& i : aEncodingMap) + if (equalIgnoreCase(pBegin, pEnd, i.m_aName)) + return i.m_eEncoding; + return RTL_TEXTENCODING_DONTKNOW; +} + +} + +// INetMIME + +// static +bool INetMIME::isAtomChar(sal_uInt32 nChar) +{ + static const bool aMap[128] + = { false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, true, false, true, true, true, true, true, // !"#$%&' + false, false, true, true, false, true, false, true, //()*+,-./ + true, true, true, true, true, true, true, true, //01234567 + true, true, false, false, false, true, false, true, //89:;<=>? + false, true, true, true, true, true, true, true, //@ABCDEFG + true, true, true, true, true, true, true, true, //HIJKLMNO + true, true, true, true, true, true, true, true, //PQRSTUVW + true, true, true, false, false, false, true, true, //XYZ[\]^_ + true, true, true, true, true, true, true, true, //`abcdefg + true, true, true, true, true, true, true, true, //hijklmno + true, true, true, true, true, true, true, true, //pqrstuvw + true, true, true, true, true, true, true, false //xyz{|}~ + }; + return rtl::isAscii(nChar) && aMap[nChar]; +} + +// static +bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar) +{ + static const bool aMap[128] + = { false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, true, false, true, true, false, true, true, // !"#$%&' + false, false, false, true, true, true, true, true, //()*+,-./ + true, true, true, true, true, true, true, true, //01234567 + true, true, true, true, true, true, true, true, //89:;<=>? + true, true, true, true, true, true, true, true, //@ABCDEFG + true, true, true, true, true, true, true, true, //HIJKLMNO + true, true, true, true, true, true, true, true, //PQRSTUVW + true, true, true, true, false, true, true, true, //XYZ[\]^_ + true, true, true, true, true, true, true, true, //`abcdefg + true, true, true, true, true, true, true, true, //hijklmno + true, true, true, true, true, true, true, true, //pqrstuvw + true, true, true, false, true, true, true, false //xyz{|}~ + }; + return rtl::isAscii(nChar) && aMap[nChar]; +} + +// static +bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1, + const sal_Unicode * pEnd1, + const char * pString2) +{ + DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2, + "INetMIME::equalIgnoreCase(): Bad sequences"); + + while (*pString2 != 0) + if (pBegin1 == pEnd1 + || (rtl::toAsciiUpperCase(*pBegin1++) + != rtl::toAsciiUpperCase( + static_cast<unsigned char>(*pString2++)))) + return false; + return pBegin1 == pEnd1; +} + +// static +bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin, + const sal_Unicode * pEnd, bool bLeadingZeroes, + sal_uInt32 & rValue) +{ + sal_uInt64 nTheValue = 0; + const sal_Unicode * p = rBegin; + for ( ; p != pEnd; ++p) + { + int nWeight = getWeight(*p); + if (nWeight < 0) + break; + nTheValue = 10 * nTheValue + nWeight; + if (nTheValue > std::numeric_limits< sal_uInt32 >::max()) + return false; + } + if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1))) + return false; + rBegin = p; + rValue = sal_uInt32(nTheValue); + return true; +} + +// static +sal_Unicode const * INetMIME::scanContentType( + OUString const & rStr, OUString * pType, + OUString * pSubType, INetContentTypeParameterList * pParameters) +{ + sal_Unicode const * pBegin = rStr.getStr(); + sal_Unicode const * pEnd = pBegin + rStr.getLength(); + sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd); + sal_Unicode const * pTypeBegin = p; + while (p != pEnd && isTokenChar(*p)) + { + ++p; + } + if (p == pTypeBegin) + return nullptr; + sal_Unicode const * pTypeEnd = p; + + p = skipLinearWhiteSpaceComment(p, pEnd); + if (p == pEnd || *p++ != '/') + return nullptr; + + p = skipLinearWhiteSpaceComment(p, pEnd); + sal_Unicode const * pSubTypeBegin = p; + while (p != pEnd && isTokenChar(*p)) + { + ++p; + } + if (p == pSubTypeBegin) + return nullptr; + sal_Unicode const * pSubTypeEnd = p; + + if (pType != nullptr) + { + *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase(); + } + if (pSubType != nullptr) + { + *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin) + .toAsciiLowerCase(); + } + + return scanParameters(p, pEnd, pParameters); +} + +// static +OUString INetMIME::decodeHeaderFieldBody(const OString& rBody) +{ + // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old + // versions of StarOffice send mails with header fields where encoded + // words can be preceded by '=', ',', '.', '"', or '(', and followed by + // '=', ',', '.', '"', ')', without any required white space in between. + // And there appear to exist some broken mailers that only encode single + // letters within words, like "Appel + // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to + // detect encoded words even when not properly surrounded by white space. + + // Non US-ASCII characters in rBody are treated as ISO-8859-1. + + // encoded-word = "=?" + // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E) + // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?" + // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==") + // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG)) + // "?=" + + // base64 = ALPHA / DIGIT / "+" / "/" + + const char * pBegin = rBody.getStr(); + const char * pEnd = pBegin + rBody.getLength(); + + OUStringBuffer sDecoded; + const char * pCopyBegin = pBegin; + + /* bool bStartEncodedWord = true; */ + const char * pWSPBegin = pBegin; + + for (const char * p = pBegin; p != pEnd;) + { + OUString sEncodedText; + if (*p == '=' /* && bStartEncodedWord */) + { + const char * q = p + 1; + bool bEncodedWord = q != pEnd && *q++ == '?'; + + rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW; + if (bEncodedWord) + { + const char * pCharsetBegin = q; + const char * pLanguageBegin = nullptr; + int nAlphaCount = 0; + for (bool bDone = false; !bDone;) + if (q == pEnd) + { + bEncodedWord = false; + bDone = true; + } + else + { + char cChar = *q++; + switch (cChar) + { + case '*': + pLanguageBegin = q - 1; + nAlphaCount = 0; + break; + + case '-': + if (pLanguageBegin != nullptr) + { + if (nAlphaCount == 0) + pLanguageBegin = nullptr; + else + nAlphaCount = 0; + } + break; + + case '?': + if (pCharsetBegin == q - 1) + bEncodedWord = false; + else + { + eCharsetEncoding + = getCharsetEncoding( + pCharsetBegin, + pLanguageBegin == nullptr + || nAlphaCount == 0 ? + q - 1 : pLanguageBegin); + bEncodedWord = isMIMECharsetEncoding( + eCharsetEncoding); + eCharsetEncoding + = translateFromMIME(eCharsetEncoding); + } + bDone = true; + break; + + default: + if (pLanguageBegin != nullptr + && (!rtl::isAsciiAlpha( + static_cast<unsigned char>(cChar)) + || ++nAlphaCount > 8)) + pLanguageBegin = nullptr; + break; + } + } + } + + bool bEncodingB = false; + if (bEncodedWord) + { + if (q == pEnd) + bEncodedWord = false; + else + { + switch (*q++) + { + case 'B': + case 'b': + bEncodingB = true; + break; + + case 'Q': + case 'q': + bEncodingB = false; + break; + + default: + bEncodedWord = false; + break; + } + } + } + + bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?'; + + OStringBuffer sText; + if (bEncodedWord) + { + if (bEncodingB) + { + for (bool bDone = false; !bDone;) + { + if (pEnd - q < 4) + { + bEncodedWord = false; + bDone = true; + } + else + { + bool bFinal = false; + int nCount = 3; + sal_uInt32 nValue = 0; + for (int nShift = 18; nShift >= 0; nShift -= 6) + { + int nWeight = getBase64Weight(*q++); + if (nWeight == -2) + { + bEncodedWord = false; + bDone = true; + break; + } + if (nWeight == -1) + { + if (!bFinal) + { + if (nShift >= 12) + { + bEncodedWord = false; + bDone = true; + break; + } + bFinal = true; + nCount = nShift == 6 ? 1 : 2; + } + } + else + nValue |= nWeight << nShift; + } + if (bEncodedWord) + { + for (int nShift = 16; nCount-- > 0; nShift -= 8) + sText.append(char(nValue >> nShift & 0xFF)); + if (*q == '?') + { + ++q; + bDone = true; + } + if (bFinal && !bDone) + { + bEncodedWord = false; + bDone = true; + } + } + } + } + } + else + { + const char * pEncodedTextBegin = q; + const char * pEncodedTextCopyBegin = q; + for (bool bDone = false; !bDone;) + if (q == pEnd) + { + bEncodedWord = false; + bDone = true; + } + else + { + sal_uInt32 nChar = static_cast<unsigned char>(*q++); + switch (nChar) + { + case '=': + { + if (pEnd - q < 2) + { + bEncodedWord = false; + bDone = true; + break; + } + int nDigit1 = getHexWeight(q[0]); + int nDigit2 = getHexWeight(q[1]); + if (nDigit1 < 0 || nDigit2 < 0) + { + bEncodedWord = false; + bDone = true; + break; + } + sText.append(rBody.copy( + (pEncodedTextCopyBegin - pBegin), + (q - 1 - pEncodedTextCopyBegin))); + sText.append(char(nDigit1 << 4 | nDigit2)); + q += 2; + pEncodedTextCopyBegin = q; + break; + } + + case '?': + if (q - pEncodedTextBegin > 1) + sText.append(rBody.copy( + (pEncodedTextCopyBegin - pBegin), + (q - 1 - pEncodedTextCopyBegin))); + else + bEncodedWord = false; + bDone = true; + break; + + case '_': + sText.append(rBody.copy( + (pEncodedTextCopyBegin - pBegin), + (q - 1 - pEncodedTextCopyBegin))); + sText.append(' '); + pEncodedTextCopyBegin = q; + break; + + default: + if (!isVisible(nChar)) + { + bEncodedWord = false; + bDone = true; + } + break; + } + } + } + } + + bEncodedWord = bEncodedWord && q != pEnd && *q++ == '='; + + std::unique_ptr<sal_Unicode[]> pUnicodeBuffer; + sal_Size nUnicodeSize = 0; + if (bEncodedWord) + { + pUnicodeBuffer + = convertToUnicode(sText.getStr(), + sText.getStr() + sText.getLength(), + eCharsetEncoding, nUnicodeSize); + if (!pUnicodeBuffer) + bEncodedWord = false; + } + + if (bEncodedWord) + { + appendISO88591(sDecoded, pCopyBegin, pWSPBegin); + sDecoded.append( + pUnicodeBuffer.get(), + static_cast< sal_Int32 >(nUnicodeSize)); + pUnicodeBuffer.reset(); + p = q; + pCopyBegin = p; + + pWSPBegin = p; + while (p != pEnd && isWhiteSpace(*p)) + ++p; + /* bStartEncodedWord = p != pWSPBegin; */ + continue; + } + } + + if (!sEncodedText.isEmpty()) + sDecoded.append(sEncodedText); + + if (p == pEnd) + break; + + switch (*p++) + { + case '"': + /* bStartEncodedWord = true; */ + break; + + case '(': + /* bStartEncodedWord = true; */ + break; + + case ')': + /* bStartEncodedWord = false; */ + break; + + default: + { + const char * pUTF8Begin = p - 1; + const char * pUTF8End = pUTF8Begin; + sal_uInt32 nCharacter = 0; + if (translateUTF8Char(pUTF8End, pEnd, nCharacter)) + { + appendISO88591(sDecoded, pCopyBegin, p - 1); + sal_Unicode aUTF16Buf[2]; + sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf; + sDecoded.append(aUTF16Buf, nUTF16Len); + p = pUTF8End; + pCopyBegin = p; + } + /* bStartEncodedWord = false; */ + break; + } + } + pWSPBegin = p; + } + + appendISO88591(sDecoded, pCopyBegin, pEnd); + return sDecoded.makeStringAndClear(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/tools/source/inet/inetmsg.cxx b/tools/source/inet/inetmsg.cxx new file mode 100644 index 000000000..f8912bb9d --- /dev/null +++ b/tools/source/inet/inetmsg.cxx @@ -0,0 +1,289 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/types.h> +#include <tools/datetime.hxx> +#include <tools/inetmsg.hxx> +#include <comphelper/string.hxx> +#include <rtl/character.hxx> + +#include <map> + +void INetMIMEMessage::SetHeaderField_Impl ( + const OString &rName, + const OUString &rValue, + sal_uInt32 &rnIndex) +{ + SetHeaderField_Impl ( + INetMessageHeader (rName, rValue.toUtf8()), rnIndex); +} + +/* ParseDateField and local helper functions. + * + * Parses a String in (implied) GMT format into class Date and tools::Time objects. + * Four formats are accepted: + * + * [Wkd,] 1*2DIGIT Mon 2*4DIGIT 00:00:00 [GMT] (rfc1123) + * [Wkd,] 00 Mon 0000 00:00:00 [GMT]) (rfc822, rfc1123) + * Weekday, 00-Mon-00 00:00:00 [GMT] (rfc850, rfc1036) + * Wkd Mon 00 00:00:00 0000 [GMT] (ctime) + * 1*DIGIT (delta seconds) + */ + +static const char *months[12] = +{ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; + +static sal_uInt16 ParseNumber(const OString& rStr, sal_Int32& nIndex) +{ + sal_Int32 n = nIndex; + while ((n < rStr.getLength()) + && rtl::isAsciiDigit(static_cast<unsigned char>(rStr[n]))) + n++; + + OString aNum(rStr.copy(nIndex, (n - nIndex))); + nIndex = n; + + return static_cast<sal_uInt16>(aNum.toInt32()); +} + +static sal_uInt16 ParseMonth(const OString& rStr, sal_Int32& nIndex) +{ + sal_Int32 n = nIndex; + while ((n < rStr.getLength()) + && rtl::isAsciiAlpha(static_cast<unsigned char>(rStr[n]))) + n++; + + OString aMonth(rStr.copy(nIndex, 3)); + nIndex = n; + + sal_uInt16 i; + for (i = 0; i < 12; i++) + if (aMonth.equalsIgnoreAsciiCase(months[i])) break; + return (i + 1); +} + +bool INetMIMEMessage::ParseDateField ( + const OUString& rDateFieldW, DateTime& rDateTime) +{ + OString aDateField(OUStringToOString(rDateFieldW, + RTL_TEXTENCODING_ASCII_US)); + + if (aDateField.isEmpty()) return false; + + if (aDateField.indexOf(':') != -1) + { + // Some DateTime format. + sal_Int32 nIndex = 0; + + // Skip over <Wkd> or <Weekday>, leading and trailing space. + while ((nIndex < aDateField.getLength()) && + (aDateField[nIndex] == ' ')) + nIndex++; + + while ( + (nIndex < aDateField.getLength()) && + (rtl::isAsciiAlpha (static_cast<unsigned char>(aDateField[nIndex])) || + (aDateField[nIndex] == ',') )) + nIndex++; + + while ((nIndex < aDateField.getLength()) && + (aDateField[nIndex] == ' ')) + nIndex++; + + if (rtl::isAsciiAlpha (static_cast<unsigned char>(aDateField[nIndex]))) + { + // Format: ctime(). + if ((aDateField.getLength() - nIndex) < 20) return false; + + rDateTime.SetMonth (ParseMonth (aDateField, nIndex)); nIndex++; + rDateTime.SetDay (ParseNumber (aDateField, nIndex)); nIndex++; + + rDateTime.SetHour (ParseNumber (aDateField, nIndex)); nIndex++; + rDateTime.SetMin (ParseNumber (aDateField, nIndex)); nIndex++; + rDateTime.SetSec (ParseNumber (aDateField, nIndex)); nIndex++; + rDateTime.SetNanoSec (0); + + sal_uInt16 nYear = ParseNumber (aDateField, nIndex); + if (nYear < 100) nYear += 1900; + rDateTime.SetYear (nYear); + } + else + { + // Format: RFC1036 or RFC1123. + if ((aDateField.getLength() - nIndex) < 17) return false; + + rDateTime.SetDay (ParseNumber (aDateField, nIndex)); nIndex++; + rDateTime.SetMonth (ParseMonth (aDateField, nIndex)); nIndex++; + + sal_uInt16 nYear = ParseNumber (aDateField, nIndex); nIndex++; + if (nYear < 100) nYear += 1900; + rDateTime.SetYear (nYear); + + rDateTime.SetHour (ParseNumber (aDateField, nIndex)); nIndex++; + rDateTime.SetMin (ParseNumber (aDateField, nIndex)); nIndex++; + rDateTime.SetSec (ParseNumber (aDateField, nIndex)); nIndex++; + rDateTime.SetNanoSec (0); + + const char cPossiblePlusMinus = nIndex < aDateField.getLength() ? aDateField[nIndex] : 0; + if (cPossiblePlusMinus == '+' || cPossiblePlusMinus == '-') + { + // Offset from GMT: "(+|-)HHMM". + bool bEast = (aDateField[nIndex++] == '+'); + sal_uInt16 nOffset = ParseNumber (aDateField, nIndex); + if (nOffset > 0) + { + tools::Time aDiff( tools::Time::EMPTY ); + aDiff.SetHour (nOffset / 100); + aDiff.SetMin (nOffset % 100); + aDiff.SetSec (0); + aDiff.SetNanoSec (0); + + if (bEast) + rDateTime -= aDiff; + else + rDateTime += aDiff; + } + } + } + } + else if (comphelper::string::isdigitAsciiString(aDateField)) + { + // Format: delta seconds. + tools::Time aDelta (0); + aDelta.SetTime (aDateField.toInt32() * 100); + + DateTime aNow( DateTime::SYSTEM ); + aNow += aDelta; + aNow.ConvertToUTC(); + + rDateTime.SetDate (aNow.GetDate()); + rDateTime.SetTime (aNow.GetTime()); + } + else + { + // Junk. + return false; + } + + return (rDateTime.IsValidAndGregorian() && + !((rDateTime.GetSec() > 59) || + (rDateTime.GetMin() > 59) || + (rDateTime.GetHour() > 23) )); +} + +static const std::map<InetMessageMime, const char*> ImplINetMIMEMessageHeaderData = +{ + { InetMessageMime::VERSION, "MIME-Version"}, + { InetMessageMime::CONTENT_DISPOSITION, "Content-Disposition"}, + { InetMessageMime::CONTENT_TYPE, "Content-Type"}, + { InetMessageMime::CONTENT_TRANSFER_ENCODING, "Content-Transfer-Encoding"} +}; + +INetMIMEMessage::INetMIMEMessage() + : pParent(nullptr) +{ + for (sal_uInt16 i = 0; i < static_cast<int>(InetMessageMime::NUMHDR); i++) + m_nMIMEIndex[static_cast<InetMessageMime>(i)] = SAL_MAX_UINT32; +} + +INetMIMEMessage::~INetMIMEMessage() +{ +} + +void INetMIMEMessage::SetMIMEVersion (const OUString& rVersion) +{ + SetHeaderField_Impl ( + ImplINetMIMEMessageHeaderData.at(InetMessageMime::VERSION), rVersion, + m_nMIMEIndex[InetMessageMime::VERSION]); +} + +void INetMIMEMessage::SetContentDisposition (const OUString& rDisposition) +{ + SetHeaderField_Impl ( + ImplINetMIMEMessageHeaderData.at(InetMessageMime::CONTENT_DISPOSITION), rDisposition, + m_nMIMEIndex[InetMessageMime::CONTENT_DISPOSITION]); +} + +void INetMIMEMessage::SetContentType (const OUString& rType) +{ + SetHeaderField_Impl ( + ImplINetMIMEMessageHeaderData.at(InetMessageMime::CONTENT_TYPE), rType, + m_nMIMEIndex[InetMessageMime::CONTENT_TYPE]); +} + +void INetMIMEMessage::SetContentTransferEncoding ( + const OUString& rEncoding) +{ + SetHeaderField_Impl ( + ImplINetMIMEMessageHeaderData.at(InetMessageMime::CONTENT_TRANSFER_ENCODING), rEncoding, + m_nMIMEIndex[InetMessageMime::CONTENT_TRANSFER_ENCODING]); +} + +OUString INetMIMEMessage::GetDefaultContentType() +{ + if (pParent != nullptr) + { + OUString aParentCT (pParent->GetContentType()); + if (aParentCT.isEmpty()) + aParentCT = pParent->GetDefaultContentType(); + + if (aParentCT.equalsIgnoreAsciiCase("multipart/digest")) + return "message/rfc822"; + } + return "text/plain; charset=us-ascii"; +} + +void INetMIMEMessage::EnableAttachMultipartFormDataChild() +{ + // Check context. + if (IsContainer()) + return; + + // Generate a unique boundary from current time. + char sTail[16 + 1]; + tools::Time aCurTime( tools::Time::SYSTEM ); + sal_uInt64 nThis = reinterpret_cast< sal_uIntPtr >( this ); // we can be on a 64bit architecture + nThis = ( ( nThis >> 32 ) ^ nThis ) & SAL_MAX_UINT32; + sprintf (sTail, "%08X%08X", + static_cast< unsigned int >(aCurTime.GetTime()), + static_cast< unsigned int >(nThis)); + m_aBoundary = "------------_4D48"; + m_aBoundary += sTail; + + // Set header fields. + SetMIMEVersion("1.0"); + SetContentType( + OUString::fromUtf8("multipart/form-data; boundary=" + m_aBoundary)); + SetContentTransferEncoding("7bit"); +} + +void INetMIMEMessage::AttachChild(std::unique_ptr<INetMIMEMessage> pChildMsg) +{ + assert(IsContainer()); + if (IsContainer()) + { + pChildMsg->pParent = this; + aChildren.push_back( std::move(pChildMsg) ); + } +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/tools/source/inet/inetstrm.cxx b/tools/source/inet/inetstrm.cxx new file mode 100644 index 000000000..e2a0e9770 --- /dev/null +++ b/tools/source/inet/inetstrm.cxx @@ -0,0 +1,297 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cassert> + +#include <sal/types.h> +#include <rtl/strbuf.hxx> +#include <tools/inetmsg.hxx> +#include <tools/inetstrm.hxx> + +int INetMIMEMessageStream::GetHeaderLine(char* pData, sal_uInt32 nSize) +{ + char* pWBuf = pData; + + sal_uInt32 i, n; + + if (maMsgBuffer.Tell() == 0) + { + // Insert formatted header into buffer. + n = pSourceMsg->GetHeaderCount(); + for (i = 0; i < n; i++) + { + INetMessageHeader aHeader (pSourceMsg->GetHeaderField(i)); + if (aHeader.GetValue().getLength()) + { + // NYI: Folding long lines. + maMsgBuffer.WriteOString( aHeader.GetName() ); + maMsgBuffer.WriteCharPtr( ": " ); + maMsgBuffer.WriteOString( aHeader.GetValue() ); + maMsgBuffer.WriteCharPtr( "\r\n" ); + } + } + + pMsgWrite = const_cast<char *>(static_cast<char const *>(maMsgBuffer.GetData())); + pMsgRead = pMsgWrite + maMsgBuffer.Tell(); + } + + n = pMsgRead - pMsgWrite; + if (n > 0) + { + // Move to caller. + if (nSize < n) n = nSize; + for (i = 0; i < n; i++) *pWBuf++ = *pMsgWrite++; + } + else + { + // Reset buffer. + maMsgBuffer.Seek(STREAM_SEEK_TO_BEGIN); + } + + return (pWBuf - pData); +} + +int INetMIMEMessageStream::GetBodyLine(char* pData, sal_uInt32 nSize) +{ + char* pWBuf = pData; + char* pWEnd = pData + nSize; + + if (pSourceMsg->GetDocumentLB()) + { + if (pMsgStrm == nullptr) + pMsgStrm.reset(new SvStream (pSourceMsg->GetDocumentLB())); + + sal_uInt32 nRead = pMsgStrm->ReadBytes(pWBuf, (pWEnd - pWBuf)); + pWBuf += nRead; + } + + return (pWBuf - pData); +} + +int INetMIMEMessageStream::GetMsgLine(char* pData, sal_uInt32 nSize) +{ + // Check for header or body. + if (!bHeaderGenerated) + { + if (!done) + { + // Prepare special header fields. + if (pSourceMsg->GetParent()) + { + OUString aPCT(pSourceMsg->GetParent()->GetContentType()); + if (aPCT.startsWithIgnoreAsciiCase("message/rfc822")) + pSourceMsg->SetMIMEVersion("1.0"); + else + pSourceMsg->SetMIMEVersion(OUString()); + } + else + { + pSourceMsg->SetMIMEVersion("1.0"); + } + + // Check ContentType. + OUString aContentType(pSourceMsg->GetContentType()); + if (!aContentType.isEmpty()) + { + // Determine default Content-Type. + OUString aDefaultType = pSourceMsg->GetDefaultContentType(); + + if (aDefaultType.equalsIgnoreAsciiCase(aContentType)) + { + // No need to specify default. + pSourceMsg->SetContentType(OUString()); + } + } + + // No need to specify default. + pSourceMsg->SetContentTransferEncoding(OUString()); + + // Mark we're done. + done = true; + } + + // Generate the message header. + int nRead = GetHeaderLine(pData, nSize); + if (nRead <= 0) + { + // Reset state. + done = false; + } + return nRead; + } + else + { + // Generate the message body. + if (pSourceMsg->IsContainer()) + { + // Encapsulated message body. + while (!done) + { + if (pChildStrm == nullptr) + { + INetMIMEMessage *pChild = pSourceMsg->GetChild(nChildIndex); + if (pChild) + { + // Increment child index. + nChildIndex++; + + // Create child stream. + pChildStrm.reset(new INetMIMEMessageStream(pChild, false)); + + if (pSourceMsg->IsMultipart()) + { + // Insert multipart delimiter. + OStringBuffer aDelim("--"); + aDelim.append(pSourceMsg->GetMultipartBoundary()); + aDelim.append("\r\n"); + + memcpy(pData, aDelim.getStr(), + aDelim.getLength()); + return aDelim.getLength(); + } + } + else + { + // No more parts. Mark we're done. + done = true; + nChildIndex = 0; + + if (pSourceMsg->IsMultipart()) + { + // Insert close delimiter. + OStringBuffer aDelim("--"); + aDelim.append(pSourceMsg->GetMultipartBoundary()); + aDelim.append("--\r\n"); + + memcpy(pData, aDelim.getStr(), + aDelim.getLength()); + return aDelim.getLength(); + } + } + } + else + { + // Read current child stream. + int nRead = pChildStrm->Read(pData, nSize); + if (nRead > 0) + { + return nRead; + } + else + { + // Cleanup exhausted child stream. + pChildStrm.reset(); + } + } + } + return 0; + } + else + { + // Single part message body. + if (pSourceMsg->GetDocumentLB() == nullptr) + { + // Empty message body. + return 0; + } + + // No Encoding. + return GetBodyLine(pData, nSize); + } + } +} + +namespace +{ + +const int BUFFER_SIZE = 2048; + +} + +INetMIMEMessageStream::INetMIMEMessageStream( + INetMIMEMessage *pMsg, bool headerGenerated): + pSourceMsg(pMsg), + bHeaderGenerated(headerGenerated), + mvBuffer(BUFFER_SIZE), + pMsgRead(nullptr), + pMsgWrite(nullptr), + done(false), + nChildIndex(0) +{ + assert(pMsg != nullptr); + maMsgBuffer.SetStreamCharSet(RTL_TEXTENCODING_ASCII_US); + pRead = pWrite = mvBuffer.data(); +} + +INetMIMEMessageStream::~INetMIMEMessageStream() +{ + pChildStrm.reset(); +} + +int INetMIMEMessageStream::Read(char* pData, sal_uInt32 nSize) +{ + char* pWBuf = pData; + char* pWEnd = pData + nSize; + + while (pWBuf < pWEnd) + { + // Caller's buffer not yet filled. + sal_uInt32 n = pRead - pWrite; + if (n > 0) + { + // Bytes still in buffer. + sal_uInt32 m = pWEnd - pWBuf; + if (m < n) n = m; + for (sal_uInt32 i = 0; i < n; i++) *pWBuf++ = *pWrite++; + } + else + { + // Buffer empty. Reset to <Begin-of-Buffer>. + pRead = pWrite = mvBuffer.data(); + + // Read next message line. + int nRead = GetMsgLine(mvBuffer.data(), mvBuffer.size()); + if (nRead > 0) + { + // Set read pointer. + pRead = mvBuffer.data() + nRead; + } + else + { + if (!bHeaderGenerated) + { + // Header generated. Insert empty line. + bHeaderGenerated = true; + *pRead++ = '\r'; + *pRead++ = '\n'; + } + else + { + // Body generated. + return (pWBuf - pData); + } + } + } + } + return (pWBuf - pData); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |