diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:06:44 +0000 |
commit | ed5640d8b587fbcfed7dd7967f3de04b37a76f26 (patch) | |
tree | 7a5f7c6c9d02226d7471cb3cc8fbbf631b415303 /tools/source/inet/inetmime.cxx | |
parent | Initial commit. (diff) | |
download | libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.tar.xz libreoffice-ed5640d8b587fbcfed7dd7967f3de04b37a76f26.zip |
Adding upstream version 4:7.4.7.upstream/4%7.4.7upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/source/inet/inetmime.cxx')
-rw-r--r-- | tools/source/inet/inetmime.cxx | 1380 |
1 files changed, 1380 insertions, 0 deletions
diff --git a/tools/source/inet/inetmime.cxx b/tools/source/inet/inetmime.cxx new file mode 100644 index 000000000..2a57c099c --- /dev/null +++ b/tools/source/inet/inetmime.cxx @@ -0,0 +1,1380 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <algorithm> +#include <limits> +#include <forward_list> +#include <memory> + +#include <sal/log.hxx> +#include <rtl/ustring.hxx> +#include <rtl/strbuf.hxx> +#include <rtl/ustrbuf.hxx> +#include <rtl/tencinfo.h> +#include <tools/inetmime.hxx> +#include <rtl/character.hxx> + +namespace { + +rtl_TextEncoding getCharsetEncoding(const char * pBegin, + const char * pEnd); + +/** Check for US-ASCII white space character. + + @param nChar Some UCS-4 character. + + @return True if nChar is a US-ASCII white space character (US-ASCII + 0x09 or 0x20). + */ +bool isWhiteSpace(sal_uInt32 nChar) +{ + return nChar == '\t' || nChar == ' '; +} + +/** Get the Base 64 digit weight of a US-ASCII character. + + @param nChar Some UCS-4 character. + + @return If nChar is a US-ASCII Base 64 digit character (US-ASCII + 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the + corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding + character (US-ASCII '='), return -1; otherwise, return -2. + */ +int getBase64Weight(sal_uInt32 nChar) +{ + return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') : + rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) : + rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) : + nChar == '+' ? 62 : + nChar == '/' ? 63 : + nChar == '=' ? -1 : -2; +} + +bool startsWithLineFolding(const sal_Unicode * pBegin, + const sal_Unicode * pEnd) +{ + DBG_ASSERT(pBegin && pBegin <= pEnd, + "startsWithLineFolding(): Bad sequence"); + + return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A + && isWhiteSpace(pBegin[2]); // CR, LF +} + +rtl_TextEncoding translateFromMIME(rtl_TextEncoding + eEncoding) +{ +#if defined(_WIN32) + return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ? + RTL_TEXTENCODING_MS_1252 : eEncoding; +#else + return eEncoding; +#endif +} + +bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding) +{ + return rtl_isOctetTextEncoding(eEncoding); +} + +std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin, + const char * pEnd, + rtl_TextEncoding eEncoding, + sal_Size & rSize) +{ + if (eEncoding == RTL_TEXTENCODING_DONTKNOW) + return nullptr; + rtl_TextToUnicodeConverter hConverter + = rtl_createTextToUnicodeConverter(eEncoding); + rtl_TextToUnicodeContext hContext + = rtl_createTextToUnicodeContext(hConverter); + std::unique_ptr<sal_Unicode[]> pBuffer; + sal_uInt32 nInfo; + for (sal_Size nBufferSize = pEnd - pBegin;; + nBufferSize += nBufferSize / 3 + 1) + { + pBuffer.reset(new sal_Unicode[nBufferSize]); + sal_Size nSrcCvtBytes; + rSize = rtl_convertTextToUnicode( + hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(), + nBufferSize, + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR + | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, + &nInfo, &nSrcCvtBytes); + if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL) + break; + pBuffer.reset(); + rtl_resetTextToUnicodeContext(hConverter, hContext); + } + rtl_destroyTextToUnicodeContext(hConverter, hContext); + rtl_destroyTextToUnicodeConverter(hConverter); + if (nInfo != 0) + { + pBuffer.reset(); + } + return pBuffer; +} + +void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar) +{ + // See RFC 2279 for a discussion of UTF-8. + DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char"); + + if (nChar < 0x80) + rSink.append(char(nChar)); + else if (nChar < 0x800) + rSink.append(char(nChar >> 6 | 0xC0)) + .append(char((nChar & 0x3F) | 0x80)); + else if (nChar < 0x10000) + rSink.append(char(nChar >> 12 | 0xE0)) + .append(char((nChar >> 6 & 0x3F) | 0x80)) + .append(char((nChar & 0x3F) | 0x80)); + else if (nChar < 0x200000) + rSink.append(char(nChar >> 18 | 0xF0)) + .append(char((nChar >> 12 & 0x3F) | 0x80)) + .append(char((nChar >> 6 & 0x3F) | 0x80)) + .append(char((nChar & 0x3F) | 0x80)); + else if (nChar < 0x4000000) + rSink.append(char(nChar >> 24 | 0xF8)) + .append(char((nChar >> 18 & 0x3F) | 0x80)) + .append(char((nChar >> 12 & 0x3F) | 0x80)) + .append(char((nChar >> 6 & 0x3F) | 0x80)) + .append(char((nChar & 0x3F) | 0x80)); + else + rSink.append(char(nChar >> 30 | 0xFC)) + .append(char((nChar >> 24 & 0x3F) | 0x80)) + .append(char((nChar >> 18 & 0x3F) | 0x80)) + .append(char((nChar >> 12 & 0x3F) | 0x80)) + .append(char((nChar >> 6 & 0x3F) | 0x80)) + .append(char((nChar & 0x3F) | 0x80)); +} + +bool translateUTF8Char(const char *& rBegin, + const char * pEnd, + sal_uInt32 & rCharacter) +{ + if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80 + || static_cast< unsigned char >(*rBegin) >= 0xFE) + return false; + + int nCount; + sal_uInt32 nMin; + sal_uInt32 nUCS4; + const char * p = rBegin; + if (static_cast< unsigned char >(*p) < 0xE0) + { + nCount = 1; + nMin = 0x80; + nUCS4 = static_cast< unsigned char >(*p) & 0x1F; + } + else if (static_cast< unsigned char >(*p) < 0xF0) + { + nCount = 2; + nMin = 0x800; + nUCS4 = static_cast< unsigned char >(*p) & 0xF; + } + else if (static_cast< unsigned char >(*p) < 0xF8) + { + nCount = 3; + nMin = 0x10000; + nUCS4 = static_cast< unsigned char >(*p) & 7; + } + else if (static_cast< unsigned char >(*p) < 0xFC) + { + nCount = 4; + nMin = 0x200000; + nUCS4 = static_cast< unsigned char >(*p) & 3; + } + else + { + nCount = 5; + nMin = 0x4000000; + nUCS4 = static_cast< unsigned char >(*p) & 1; + } + ++p; + + for (; nCount-- > 0; ++p) + if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80) + nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F); + else + return false; + + if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin) + return false; + + rCharacter = nUCS4; + rBegin = p; + return true; +} + +void appendISO88591(OUStringBuffer & rText, char const * pBegin, + char const * pEnd); + +struct Parameter +{ + OString m_aAttribute; + OString m_aCharset; + OString m_aLanguage; + OString m_aValue; + sal_uInt32 m_nSection; + bool m_bExtended; + + bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort + { + int nComp = m_aAttribute.compareTo(rhs.m_aAttribute); + return nComp < 0 || + (nComp == 0 && m_nSection < rhs.m_nSection); + } + struct IsSameSection // is used to check container for duplicates with std::any_of + { + const OString& rAttribute; + const sal_uInt32 nSection; + bool operator()(const Parameter& r) const + { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; } + }; +}; + +typedef std::forward_list<Parameter> ParameterList; + +bool parseParameters(ParameterList const & rInput, + INetContentTypeParameterList * pOutput); + +// appendISO88591 + +void appendISO88591(OUStringBuffer & rText, char const * pBegin, + char const * pEnd) +{ + sal_Int32 nLength = pEnd - pBegin; + std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]); + for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;) + *p++ = static_cast<unsigned char>(*pBegin++); + rText.append(pBuffer.get(), nLength); +} + +// parseParameters + +bool parseParameters(ParameterList const & rInput, + INetContentTypeParameterList * pOutput) +{ + if (pOutput) + pOutput->clear(); + + for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++) + { + if (it->m_nSection > 0 + && (itPrev == rInput.end() + || itPrev->m_nSection != it->m_nSection - 1 + || itPrev->m_aAttribute != it->m_aAttribute)) + return false; + } + + if (pOutput) + for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext) + { + bool bCharset = !it->m_aCharset.isEmpty(); + rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW; + if (bCharset) + eEncoding + = getCharsetEncoding(it->m_aCharset.getStr(), + it->m_aCharset.getStr() + + it->m_aCharset.getLength()); + OUStringBuffer aValue(64); + bool bBadEncoding = false; + itNext = it; + do + { + sal_Size nSize; + std::unique_ptr<sal_Unicode[]> pUnicode + = convertToUnicode(itNext->m_aValue.getStr(), + itNext->m_aValue.getStr() + + itNext->m_aValue.getLength(), + bCharset && it->m_bExtended ? + eEncoding : + RTL_TEXTENCODING_UTF8, + nSize); + if (!pUnicode && !(bCharset && it->m_bExtended)) + pUnicode = convertToUnicode( + itNext->m_aValue.getStr(), + itNext->m_aValue.getStr() + + itNext->m_aValue.getLength(), + RTL_TEXTENCODING_ISO_8859_1, nSize); + if (!pUnicode) + { + bBadEncoding = true; + break; + } + aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize)); + ++itNext; + } + while (itNext != rInput.end() && itNext->m_nSection != 0); + + if (bBadEncoding) + { + aValue.setLength(0); + itNext = it; + do + { + if (itNext->m_bExtended) + { + for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i) + aValue.append( + static_cast<sal_Unicode>( + static_cast<unsigned char>(itNext->m_aValue[i]) + | 0xF800)); // map to unicode corporate use sub area + } + else + { + for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i) + aValue.append( itNext->m_aValue[i] ); + } + ++itNext; + } + while (itNext != rInput.end() && itNext->m_nSection != 0); + } + auto const ret = pOutput->insert( + {it->m_aAttribute, + {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}}); + SAL_INFO_IF(!ret.second, "tools", + "INetMIME: dropping duplicate parameter: " << it->m_aAttribute); + } + return true; +} + +/** Check whether some character is valid within an RFC 2045 <token>. + + @param nChar Some UCS-4 character. + + @return True if nChar is valid within an RFC 2047 <token> (US-ASCII + 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+', + '-', '.', '^', '_', '`', '{', '|', '}', or '~'). + */ +bool isTokenChar(sal_uInt32 nChar) +{ + static const bool aMap[128] + = { false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, true, false, true, true, true, true, true, // !"#$%&' + false, false, true, true, false, true, true, false, //()*+,-./ + true, true, true, true, true, true, true, true, //01234567 + true, true, false, false, false, false, false, false, //89:;<=>? + false, true, true, true, true, true, true, true, //@ABCDEFG + true, true, true, true, true, true, true, true, //HIJKLMNO + true, true, true, true, true, true, true, true, //PQRSTUVW + true, true, true, false, false, false, true, true, //XYZ[\]^_ + true, true, true, true, true, true, true, true, //`abcdefg + true, true, true, true, true, true, true, true, //hijklmno + true, true, true, true, true, true, true, true, //pqrstuvw + true, true, true, true, true, true, true, false //xyz{|}~ + }; + return rtl::isAscii(nChar) && aMap[nChar]; +} + +const sal_Unicode * skipComment(const sal_Unicode * pBegin, + const sal_Unicode * pEnd) +{ + DBG_ASSERT(pBegin && pBegin <= pEnd, + "skipComment(): Bad sequence"); + + if (pBegin != pEnd && *pBegin == '(') + { + sal_uInt32 nLevel = 0; + for (const sal_Unicode * p = pBegin; p != pEnd;) + switch (*p++) + { + case '(': + ++nLevel; + break; + + case ')': + if (--nLevel == 0) + return p; + break; + + case '\\': + if (p != pEnd) + ++p; + break; + } + } + return pBegin; +} + +const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode * + pBegin, + const sal_Unicode * + pEnd) +{ + DBG_ASSERT(pBegin && pBegin <= pEnd, + "skipLinearWhiteSpaceComment(): Bad sequence"); + + while (pBegin != pEnd) + switch (*pBegin) + { + case '\t': + case ' ': + ++pBegin; + break; + + case 0x0D: // CR + if (startsWithLineFolding(pBegin, pEnd)) + pBegin += 3; + else + return pBegin; + break; + + case '(': + { + const sal_Unicode * p = skipComment(pBegin, pEnd); + if (p == pBegin) + return pBegin; + pBegin = p; + break; + } + + default: + return pBegin; + } + return pBegin; +} + +const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin, + const sal_Unicode * pEnd) +{ + DBG_ASSERT(pBegin && pBegin <= pEnd, + "skipQuotedString(): Bad sequence"); + + if (pBegin != pEnd && *pBegin == '"') + for (const sal_Unicode * p = pBegin + 1; p != pEnd;) + switch (*p++) + { + case 0x0D: // CR + if (pEnd - p < 2 || *p++ != 0x0A // LF + || !isWhiteSpace(*p++)) + return pBegin; + break; + + case '"': + return p; + + case '\\': + if (p != pEnd) + ++p; + break; + } + return pBegin; +} + +sal_Unicode const * scanParameters(sal_Unicode const * pBegin, + sal_Unicode const * pEnd, + INetContentTypeParameterList * + pParameters) +{ + ParameterList aList; + sal_Unicode const * pParameterBegin = pBegin; + for (sal_Unicode const * p = pParameterBegin;;) + { + pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd); + if (pParameterBegin == pEnd || *pParameterBegin != ';') + break; + p = pParameterBegin + 1; + + sal_Unicode const * pAttributeBegin + = skipLinearWhiteSpaceComment(p, pEnd); + p = pAttributeBegin; + bool bDowncaseAttribute = false; + while (p != pEnd && isTokenChar(*p) && *p != '*') + { + bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p); + ++p; + } + if (p == pAttributeBegin) + break; + OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US); + if (bDowncaseAttribute) + aAttribute = aAttribute.toAsciiLowerCase(); + + sal_uInt32 nSection = 0; + if (p != pEnd && *p == '*') + { + ++p; + if (p != pEnd && rtl::isAsciiDigit(*p) + && !INetMIME::scanUnsigned(p, pEnd, false, nSection)) + break; + } + + bool bPresent = std::any_of(aList.begin(), aList.end(), + Parameter::IsSameSection{aAttribute, nSection}); + if (bPresent) + break; + + bool bExtended = false; + if (p != pEnd && *p == '*') + { + ++p; + bExtended = true; + } + + p = skipLinearWhiteSpaceComment(p, pEnd); + + if (p == pEnd || *p != '=') + break; + + p = skipLinearWhiteSpaceComment(p + 1, pEnd); + + OString aCharset; + OString aLanguage; + OString aValue; + if (bExtended) + { + if (nSection == 0) + { + sal_Unicode const * pCharsetBegin = p; + bool bDowncaseCharset = false; + while (p != pEnd && isTokenChar(*p) && *p != '\'') + { + bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p); + ++p; + } + if (p == pCharsetBegin) + break; + if (pParameters) + { + aCharset = OString( + pCharsetBegin, + p - pCharsetBegin, + RTL_TEXTENCODING_ASCII_US); + if (bDowncaseCharset) + aCharset = aCharset.toAsciiLowerCase(); + } + + if (p == pEnd || *p != '\'') + break; + ++p; + + sal_Unicode const * pLanguageBegin = p; + bool bDowncaseLanguage = false; + int nLetters = 0; + for (; p != pEnd; ++p) + if (rtl::isAsciiAlpha(*p)) + { + if (++nLetters > 8) + break; + bDowncaseLanguage = bDowncaseLanguage + || rtl::isAsciiUpperCase(*p); + } + else if (*p == '-') + { + if (nLetters == 0) + break; + nLetters = 0; + } + else + break; + if (nLetters == 0 || nLetters > 8) + break; + if (pParameters) + { + aLanguage = OString( + pLanguageBegin, + p - pLanguageBegin, + RTL_TEXTENCODING_ASCII_US); + if (bDowncaseLanguage) + aLanguage = aLanguage.toAsciiLowerCase(); + } + + if (p == pEnd || *p != '\'') + break; + ++p; + } + if (pParameters) + { + OStringBuffer aSink; + while (p != pEnd) + { + auto q = p; + sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd); + if (rtl::isAscii(nChar) && !isTokenChar(nChar)) + break; + p = q; + if (nChar == '%' && p + 1 < pEnd) + { + int nWeight1 = INetMIME::getHexWeight(p[0]); + int nWeight2 = INetMIME::getHexWeight(p[1]); + if (nWeight1 >= 0 && nWeight2 >= 0) + { + aSink.append(char(nWeight1 << 4 | nWeight2)); + p += 2; + continue; + } + } + writeUTF8(aSink, nChar); + } + aValue = aSink.makeStringAndClear(); + } + else + while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p))) + ++p; + } + else if (p != pEnd && *p == '"') + if (pParameters) + { + OStringBuffer aSink(256); + bool bInvalid = false; + for (++p;;) + { + if (p == pEnd) + { + bInvalid = true; + break; + } + sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd); + if (nChar == '"') + break; + else if (nChar == 0x0D) // CR + { + if (pEnd - p < 2 || *p++ != 0x0A // LF + || !isWhiteSpace(*p)) + { + bInvalid = true; + break; + } + nChar = static_cast<unsigned char>(*p++); + } + else if (nChar == '\\') + { + if (p == pEnd) + { + bInvalid = true; + break; + } + nChar = INetMIME::getUTF32Character(p, pEnd); + } + writeUTF8(aSink, nChar); + } + if (bInvalid) + break; + aValue = aSink.makeStringAndClear(); + } + else + { + sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd); + if (p == pStringEnd) + break; + p = pStringEnd; + } + else + { + sal_Unicode const * pTokenBegin = p; + while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p))) + ++p; + if (p == pTokenBegin) + break; + if (pParameters) + aValue = OString( + pTokenBegin, p - pTokenBegin, + RTL_TEXTENCODING_UTF8); + } + aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended}); + } + aList.sort(); + return parseParameters(aList, pParameters) ? pParameterBegin : pBegin; +} + +bool equalIgnoreCase(const char * pBegin1, + const char * pEnd1, + const char * pString2) +{ + DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2, + "equalIgnoreCase(): Bad sequences"); + + while (*pString2 != 0) + if (pBegin1 == pEnd1 + || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++)) + != rtl::toAsciiUpperCase( + static_cast<unsigned char>(*pString2++)))) + return false; + return pBegin1 == pEnd1; +} + +struct EncodingEntry +{ + char const * m_aName; + rtl_TextEncoding m_eEncoding; +}; + +// The source for the following table is <ftp://ftp.iana.org/in-notes/iana/ +// assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise +// noted: +EncodingEntry const aEncodingMap[] + = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US }, + { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US }, + { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US }, + { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US }, + { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US }, + { "ASCII", RTL_TEXTENCODING_ASCII_US }, + { "ISO646-US", RTL_TEXTENCODING_ASCII_US }, + { "US", RTL_TEXTENCODING_ASCII_US }, + { "IBM367", RTL_TEXTENCODING_ASCII_US }, + { "CP367", RTL_TEXTENCODING_ASCII_US }, + { "CSASCII", RTL_TEXTENCODING_ASCII_US }, + { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 }, + { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 }, + { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 }, + { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 }, + { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 }, + { "L1", RTL_TEXTENCODING_ISO_8859_1 }, + { "IBM819", RTL_TEXTENCODING_ISO_8859_1 }, + { "CP819", RTL_TEXTENCODING_ISO_8859_1 }, + { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 }, + { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 }, + { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 }, + { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 }, + { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 }, + { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 }, + { "L2", RTL_TEXTENCODING_ISO_8859_2 }, + { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 }, + { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 }, + { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 }, + { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 }, + { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 }, + { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 }, + { "L3", RTL_TEXTENCODING_ISO_8859_3 }, + { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 }, + { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 }, + { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 }, + { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 }, + { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 }, + { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 }, + { "L4", RTL_TEXTENCODING_ISO_8859_4 }, + { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 }, + { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 }, + { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 }, + { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 }, + { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 }, + { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 }, + { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 }, + { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 }, + { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 }, + { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 }, + { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 }, + { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 }, + { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 }, + { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 }, + { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 }, + { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 }, + { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 }, + { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 }, + { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 }, + { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 }, + { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 }, + { "GREEK", RTL_TEXTENCODING_ISO_8859_7 }, + { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 }, + { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 }, + { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 }, + { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 }, + { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 }, + { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 }, + { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 }, + { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 }, + { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 }, + { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 }, + { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 }, + { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 }, + { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 }, + { "L5", RTL_TEXTENCODING_ISO_8859_9 }, + { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 }, + { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047 + { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 }, + { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047 + { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, + { "MAC", RTL_TEXTENCODING_APPLE_ROMAN }, + { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN }, + { "IBM437", RTL_TEXTENCODING_IBM_437 }, + { "CP437", RTL_TEXTENCODING_IBM_437 }, + { "437", RTL_TEXTENCODING_IBM_437 }, + { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 }, + { "IBM850", RTL_TEXTENCODING_IBM_850 }, + { "CP850", RTL_TEXTENCODING_IBM_850 }, + { "850", RTL_TEXTENCODING_IBM_850 }, + { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 }, + { "IBM860", RTL_TEXTENCODING_IBM_860 }, + { "CP860", RTL_TEXTENCODING_IBM_860 }, + { "860", RTL_TEXTENCODING_IBM_860 }, + { "CSIBM860", RTL_TEXTENCODING_IBM_860 }, + { "IBM861", RTL_TEXTENCODING_IBM_861 }, + { "CP861", RTL_TEXTENCODING_IBM_861 }, + { "861", RTL_TEXTENCODING_IBM_861 }, + { "CP-IS", RTL_TEXTENCODING_IBM_861 }, + { "CSIBM861", RTL_TEXTENCODING_IBM_861 }, + { "IBM863", RTL_TEXTENCODING_IBM_863 }, + { "CP863", RTL_TEXTENCODING_IBM_863 }, + { "863", RTL_TEXTENCODING_IBM_863 }, + { "CSIBM863", RTL_TEXTENCODING_IBM_863 }, + { "IBM865", RTL_TEXTENCODING_IBM_865 }, + { "CP865", RTL_TEXTENCODING_IBM_865 }, + { "865", RTL_TEXTENCODING_IBM_865 }, + { "CSIBM865", RTL_TEXTENCODING_IBM_865 }, + { "IBM775", RTL_TEXTENCODING_IBM_775 }, + { "CP775", RTL_TEXTENCODING_IBM_775 }, + { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 }, + { "IBM852", RTL_TEXTENCODING_IBM_852 }, + { "CP852", RTL_TEXTENCODING_IBM_852 }, + { "852", RTL_TEXTENCODING_IBM_852 }, + { "CSPCP852", RTL_TEXTENCODING_IBM_852 }, + { "IBM855", RTL_TEXTENCODING_IBM_855 }, + { "CP855", RTL_TEXTENCODING_IBM_855 }, + { "855", RTL_TEXTENCODING_IBM_855 }, + { "CSIBM855", RTL_TEXTENCODING_IBM_855 }, + { "IBM857", RTL_TEXTENCODING_IBM_857 }, + { "CP857", RTL_TEXTENCODING_IBM_857 }, + { "857", RTL_TEXTENCODING_IBM_857 }, + { "CSIBM857", RTL_TEXTENCODING_IBM_857 }, + { "IBM862", RTL_TEXTENCODING_IBM_862 }, + { "CP862", RTL_TEXTENCODING_IBM_862 }, + { "862", RTL_TEXTENCODING_IBM_862 }, + { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 }, + { "IBM864", RTL_TEXTENCODING_IBM_864 }, + { "CP864", RTL_TEXTENCODING_IBM_864 }, + { "CSIBM864", RTL_TEXTENCODING_IBM_864 }, + { "IBM866", RTL_TEXTENCODING_IBM_866 }, + { "CP866", RTL_TEXTENCODING_IBM_866 }, + { "866", RTL_TEXTENCODING_IBM_866 }, + { "CSIBM866", RTL_TEXTENCODING_IBM_866 }, + { "IBM869", RTL_TEXTENCODING_IBM_869 }, + { "CP869", RTL_TEXTENCODING_IBM_869 }, + { "869", RTL_TEXTENCODING_IBM_869 }, + { "CP-GR", RTL_TEXTENCODING_IBM_869 }, + { "CSIBM869", RTL_TEXTENCODING_IBM_869 }, + { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 }, + { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 }, + { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 }, + { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 }, + { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 }, + { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 }, + { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 }, + { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 }, + { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS }, + { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS }, + { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS }, + { "GB2312", RTL_TEXTENCODING_GB_2312 }, + { "CSGB2312", RTL_TEXTENCODING_GB_2312 }, + { "BIG5", RTL_TEXTENCODING_BIG5 }, + { "CSBIG5", RTL_TEXTENCODING_BIG5 }, + { "EUC-JP", RTL_TEXTENCODING_EUC_JP }, + { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE", + RTL_TEXTENCODING_EUC_JP }, + { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP }, + { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP }, + { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP }, + { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN }, + { "KOI8-R", RTL_TEXTENCODING_KOI8_R }, + { "CSKOI8R", RTL_TEXTENCODING_KOI8_R }, + { "UTF-7", RTL_TEXTENCODING_UTF7 }, + { "UTF-8", RTL_TEXTENCODING_UTF8 }, + { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047 + { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047 + { "EUC-KR", RTL_TEXTENCODING_EUC_KR }, + { "CSEUCKR", RTL_TEXTENCODING_EUC_KR }, + { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR }, + { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR }, + { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 }, + { "CSUCS4", RTL_TEXTENCODING_UCS4 }, + { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 }, + { "CSUNICODE", RTL_TEXTENCODING_UCS2 } }; + +rtl_TextEncoding getCharsetEncoding(char const * pBegin, + char const * pEnd) +{ + for (const EncodingEntry& i : aEncodingMap) + if (equalIgnoreCase(pBegin, pEnd, i.m_aName)) + return i.m_eEncoding; + return RTL_TEXTENCODING_DONTKNOW; +} + +} + +// INetMIME + +// static +bool INetMIME::isAtomChar(sal_uInt32 nChar) +{ + static const bool aMap[128] + = { false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, true, false, true, true, true, true, true, // !"#$%&' + false, false, true, true, false, true, false, true, //()*+,-./ + true, true, true, true, true, true, true, true, //01234567 + true, true, false, false, false, true, false, true, //89:;<=>? + false, true, true, true, true, true, true, true, //@ABCDEFG + true, true, true, true, true, true, true, true, //HIJKLMNO + true, true, true, true, true, true, true, true, //PQRSTUVW + true, true, true, false, false, false, true, true, //XYZ[\]^_ + true, true, true, true, true, true, true, true, //`abcdefg + true, true, true, true, true, true, true, true, //hijklmno + true, true, true, true, true, true, true, true, //pqrstuvw + true, true, true, true, true, true, true, false //xyz{|}~ + }; + return rtl::isAscii(nChar) && aMap[nChar]; +} + +// static +bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar) +{ + static const bool aMap[128] + = { false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, true, false, true, true, false, true, true, // !"#$%&' + false, false, false, true, true, true, true, true, //()*+,-./ + true, true, true, true, true, true, true, true, //01234567 + true, true, true, true, true, true, true, true, //89:;<=>? + true, true, true, true, true, true, true, true, //@ABCDEFG + true, true, true, true, true, true, true, true, //HIJKLMNO + true, true, true, true, true, true, true, true, //PQRSTUVW + true, true, true, true, false, true, true, true, //XYZ[\]^_ + true, true, true, true, true, true, true, true, //`abcdefg + true, true, true, true, true, true, true, true, //hijklmno + true, true, true, true, true, true, true, true, //pqrstuvw + true, true, true, false, true, true, true, false //xyz{|}~ + }; + return rtl::isAscii(nChar) && aMap[nChar]; +} + +// static +bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1, + const sal_Unicode * pEnd1, + const char * pString2) +{ + DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2, + "INetMIME::equalIgnoreCase(): Bad sequences"); + + while (*pString2 != 0) + if (pBegin1 == pEnd1 + || (rtl::toAsciiUpperCase(*pBegin1++) + != rtl::toAsciiUpperCase( + static_cast<unsigned char>(*pString2++)))) + return false; + return pBegin1 == pEnd1; +} + +// static +bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin, + const sal_Unicode * pEnd, bool bLeadingZeroes, + sal_uInt32 & rValue) +{ + sal_uInt64 nTheValue = 0; + const sal_Unicode * p = rBegin; + for ( ; p != pEnd; ++p) + { + int nWeight = getWeight(*p); + if (nWeight < 0) + break; + nTheValue = 10 * nTheValue + nWeight; + if (nTheValue > std::numeric_limits< sal_uInt32 >::max()) + return false; + } + if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1))) + return false; + rBegin = p; + rValue = sal_uInt32(nTheValue); + return true; +} + +// static +sal_Unicode const * INetMIME::scanContentType( + std::u16string_view rStr, OUString * pType, + OUString * pSubType, INetContentTypeParameterList * pParameters) +{ + sal_Unicode const * pBegin = rStr.data(); + sal_Unicode const * pEnd = pBegin + rStr.size(); + sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd); + sal_Unicode const * pTypeBegin = p; + while (p != pEnd && isTokenChar(*p)) + { + ++p; + } + if (p == pTypeBegin) + return nullptr; + sal_Unicode const * pTypeEnd = p; + + p = skipLinearWhiteSpaceComment(p, pEnd); + if (p == pEnd || *p++ != '/') + return nullptr; + + p = skipLinearWhiteSpaceComment(p, pEnd); + sal_Unicode const * pSubTypeBegin = p; + while (p != pEnd && isTokenChar(*p)) + { + ++p; + } + if (p == pSubTypeBegin) + return nullptr; + sal_Unicode const * pSubTypeEnd = p; + + if (pType != nullptr) + { + *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase(); + } + if (pSubType != nullptr) + { + *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin) + .toAsciiLowerCase(); + } + + return scanParameters(p, pEnd, pParameters); +} + +// static +OUString INetMIME::decodeHeaderFieldBody(const OString& rBody) +{ + // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old + // versions of StarOffice send mails with header fields where encoded + // words can be preceded by '=', ',', '.', '"', or '(', and followed by + // '=', ',', '.', '"', ')', without any required white space in between. + // And there appear to exist some broken mailers that only encode single + // letters within words, like "Appel + // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to + // detect encoded words even when not properly surrounded by white space. + + // Non US-ASCII characters in rBody are treated as ISO-8859-1. + + // encoded-word = "=?" + // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E) + // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?" + // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==") + // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG)) + // "?=" + + // base64 = ALPHA / DIGIT / "+" / "/" + + const char * pBegin = rBody.getStr(); + const char * pEnd = pBegin + rBody.getLength(); + + OUStringBuffer sDecoded; + const char * pCopyBegin = pBegin; + + /* bool bStartEncodedWord = true; */ + const char * pWSPBegin = pBegin; + + for (const char * p = pBegin; p != pEnd;) + { + if (*p == '=' /* && bStartEncodedWord */) + { + const char * q = p + 1; + bool bEncodedWord = q != pEnd && *q++ == '?'; + + rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW; + if (bEncodedWord) + { + const char * pCharsetBegin = q; + const char * pLanguageBegin = nullptr; + int nAlphaCount = 0; + for (bool bDone = false; !bDone;) + if (q == pEnd) + { + bEncodedWord = false; + bDone = true; + } + else + { + char cChar = *q++; + switch (cChar) + { + case '*': + pLanguageBegin = q - 1; + nAlphaCount = 0; + break; + + case '-': + if (pLanguageBegin != nullptr) + { + if (nAlphaCount == 0) + pLanguageBegin = nullptr; + else + nAlphaCount = 0; + } + break; + + case '?': + if (pCharsetBegin == q - 1) + bEncodedWord = false; + else + { + eCharsetEncoding + = getCharsetEncoding( + pCharsetBegin, + pLanguageBegin == nullptr + || nAlphaCount == 0 ? + q - 1 : pLanguageBegin); + bEncodedWord = isMIMECharsetEncoding( + eCharsetEncoding); + eCharsetEncoding + = translateFromMIME(eCharsetEncoding); + } + bDone = true; + break; + + default: + if (pLanguageBegin != nullptr + && (!rtl::isAsciiAlpha( + static_cast<unsigned char>(cChar)) + || ++nAlphaCount > 8)) + pLanguageBegin = nullptr; + break; + } + } + } + + bool bEncodingB = false; + if (bEncodedWord) + { + if (q == pEnd) + bEncodedWord = false; + else + { + switch (*q++) + { + case 'B': + case 'b': + bEncodingB = true; + break; + + case 'Q': + case 'q': + bEncodingB = false; + break; + + default: + bEncodedWord = false; + break; + } + } + } + + bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?'; + + OStringBuffer sText; + if (bEncodedWord) + { + if (bEncodingB) + { + for (bool bDone = false; !bDone;) + { + if (pEnd - q < 4) + { + bEncodedWord = false; + bDone = true; + } + else + { + bool bFinal = false; + int nCount = 3; + sal_uInt32 nValue = 0; + for (int nShift = 18; nShift >= 0; nShift -= 6) + { + int nWeight = getBase64Weight(*q++); + if (nWeight == -2) + { + bEncodedWord = false; + bDone = true; + break; + } + if (nWeight == -1) + { + if (!bFinal) + { + if (nShift >= 12) + { + bEncodedWord = false; + bDone = true; + break; + } + bFinal = true; + nCount = nShift == 6 ? 1 : 2; + } + } + else + nValue |= nWeight << nShift; + } + if (bEncodedWord) + { + for (int nShift = 16; nCount-- > 0; nShift -= 8) + sText.append(char(nValue >> nShift & 0xFF)); + if (*q == '?') + { + ++q; + bDone = true; + } + if (bFinal && !bDone) + { + bEncodedWord = false; + bDone = true; + } + } + } + } + } + else + { + const char * pEncodedTextBegin = q; + const char * pEncodedTextCopyBegin = q; + for (bool bDone = false; !bDone;) + if (q == pEnd) + { + bEncodedWord = false; + bDone = true; + } + else + { + sal_uInt32 nChar = static_cast<unsigned char>(*q++); + switch (nChar) + { + case '=': + { + if (pEnd - q < 2) + { + bEncodedWord = false; + bDone = true; + break; + } + int nDigit1 = getHexWeight(q[0]); + int nDigit2 = getHexWeight(q[1]); + if (nDigit1 < 0 || nDigit2 < 0) + { + bEncodedWord = false; + bDone = true; + break; + } + sText.append(rBody.subView( + (pEncodedTextCopyBegin - pBegin), + (q - 1 - pEncodedTextCopyBegin))); + sText.append(char(nDigit1 << 4 | nDigit2)); + q += 2; + pEncodedTextCopyBegin = q; + break; + } + + case '?': + if (q - pEncodedTextBegin > 1) + sText.append(rBody.subView( + (pEncodedTextCopyBegin - pBegin), + (q - 1 - pEncodedTextCopyBegin))); + else + bEncodedWord = false; + bDone = true; + break; + + case '_': + sText.append(rBody.subView( + (pEncodedTextCopyBegin - pBegin), + (q - 1 - pEncodedTextCopyBegin))); + sText.append(' '); + pEncodedTextCopyBegin = q; + break; + + default: + if (!isVisible(nChar)) + { + bEncodedWord = false; + bDone = true; + } + break; + } + } + } + } + + bEncodedWord = bEncodedWord && q != pEnd && *q++ == '='; + + std::unique_ptr<sal_Unicode[]> pUnicodeBuffer; + sal_Size nUnicodeSize = 0; + if (bEncodedWord) + { + pUnicodeBuffer + = convertToUnicode(sText.getStr(), + sText.getStr() + sText.getLength(), + eCharsetEncoding, nUnicodeSize); + if (!pUnicodeBuffer) + bEncodedWord = false; + } + + if (bEncodedWord) + { + appendISO88591(sDecoded, pCopyBegin, pWSPBegin); + sDecoded.append( + pUnicodeBuffer.get(), + static_cast< sal_Int32 >(nUnicodeSize)); + pUnicodeBuffer.reset(); + p = q; + pCopyBegin = p; + + pWSPBegin = p; + while (p != pEnd && isWhiteSpace(*p)) + ++p; + /* bStartEncodedWord = p != pWSPBegin; */ + continue; + } + } + + if (p == pEnd) + break; + + switch (*p++) + { + case '"': + /* bStartEncodedWord = true; */ + break; + + case '(': + /* bStartEncodedWord = true; */ + break; + + case ')': + /* bStartEncodedWord = false; */ + break; + + default: + { + const char * pUTF8Begin = p - 1; + const char * pUTF8End = pUTF8Begin; + sal_uInt32 nCharacter = 0; + if (translateUTF8Char(pUTF8End, pEnd, nCharacter)) + { + appendISO88591(sDecoded, pCopyBegin, p - 1); + sDecoded.appendUtf32(nCharacter); + p = pUTF8End; + pCopyBegin = p; + } + /* bStartEncodedWord = false; */ + break; + } + } + pWSPBegin = p; + } + + appendISO88591(sDecoded, pCopyBegin, pEnd); + return sDecoded.makeStringAndClear(); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |