diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/base/src/nsMsgI18N.cpp | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'comm/mailnews/base/src/nsMsgI18N.cpp')
-rw-r--r-- | comm/mailnews/base/src/nsMsgI18N.cpp | 403 |
1 files changed, 403 insertions, 0 deletions
diff --git a/comm/mailnews/base/src/nsMsgI18N.cpp b/comm/mailnews/base/src/nsMsgI18N.cpp new file mode 100644 index 0000000000..1c81456403 --- /dev/null +++ b/comm/mailnews/base/src/nsMsgI18N.cpp @@ -0,0 +1,403 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsICharsetConverterManager.h" +#include "mozilla/Utf8.h" +#include "nsIServiceManager.h" + +#include "nsISupports.h" +#include "nsIPrefBranch.h" +#include "nsIPrefService.h" +#include "nsIMimeConverter.h" +#include "nsMsgUtils.h" +#include "nsMsgI18N.h" +#include "nsILineInputStream.h" +#include "nsMimeTypes.h" +#include "nsString.h" +#include "prmem.h" +#include "plstr.h" +#include "nsUTF8Utils.h" +#include "nsNetUtil.h" +#include "nsCRTGlue.h" +#include "nsComponentManagerUtils.h" +#include "nsUnicharUtils.h" +#include "nsIFileStreams.h" +#include "../../intl/nsUTF7ToUnicode.h" +#include "../../intl/nsMUTF7ToUnicode.h" +#include "../../intl/nsUnicodeToMUTF7.h" + +#include <stdlib.h> +#include <tuple> + +// +// International functions necessary for composition +// + +nsresult nsMsgI18NConvertFromUnicode(const nsACString& aCharset, + const nsAString& inString, + nsACString& outString, + bool aReportUencNoMapping) { + if (inString.IsEmpty()) { + outString.Truncate(); + return NS_OK; + } + + auto encoding = mozilla::Encoding::ForLabelNoReplacement(aCharset); + if (!encoding) { + return NS_ERROR_UCONV_NOCONV; + } else if (encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING) { + // We shouldn't ever ship anything in these encodings. + return NS_ERROR_UCONV_NOCONV; + } + + nsresult rv; + std::tie(rv, std::ignore) = encoding->Encode(inString, outString); + + if (rv == NS_OK_HAD_REPLACEMENTS) { + rv = aReportUencNoMapping ? NS_ERROR_UENC_NOMAPPING : NS_OK; + } + + return rv; +} + +nsresult nsMsgI18NConvertToUnicode(const nsACString& aCharset, + const nsACString& inString, + nsAString& outString) { + if (inString.IsEmpty()) { + outString.Truncate(); + return NS_OK; + } + if (aCharset.IsEmpty()) { + // Despite its name, it also works for Latin-1. + CopyASCIItoUTF16(inString, outString); + return NS_OK; + } + + if (aCharset.Equals("UTF-8", nsCaseInsensitiveCStringComparator)) { + return UTF_8_ENCODING->DecodeWithBOMRemoval(inString, outString); + } + + // Look up Thunderbird's special aliases from charsetalias.properties. + nsresult rv; + nsCOMPtr<nsICharsetConverterManager> ccm = + do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, rv); + + nsCString newCharset; + rv = ccm->GetCharsetAlias(PromiseFlatCString(aCharset).get(), newCharset); + NS_ENSURE_SUCCESS(rv, rv); + + if (newCharset.Equals("UTF-7", nsCaseInsensitiveCStringComparator)) { + // Special treatment for decoding UTF-7 since it's not handled by + // encoding_rs. + return CopyUTF7toUTF16(inString, outString); + } + + auto encoding = mozilla::Encoding::ForLabelNoReplacement(newCharset); + if (!encoding) return NS_ERROR_UCONV_NOCONV; + return encoding->DecodeWithoutBOMHandling(inString, outString); +} + +// This is used to decode UTF-7. No support for encoding in UTF-7. +nsresult CopyUTF7toUTF16(const nsACString& aSrc, nsAString& aDest) { + // UTF-7 encoding size cannot be larger than the size in UTF-16. + nsUTF7ToUnicode converter; + int32_t inLen = aSrc.Length(); + int32_t outLen = inLen; + aDest.SetLength(outLen); + converter.ConvertNoBuff(aSrc.BeginReading(), &inLen, aDest.BeginWriting(), + &outLen); + MOZ_ASSERT(inLen == (int32_t)aSrc.Length(), + "UTF-7 should not produce a longer output"); + aDest.SetLength(outLen); + return NS_OK; +} + +nsresult CopyUTF16toMUTF7(const nsAString& aSrc, nsACString& aDest) { +#define IMAP_UTF7_BUF_LENGTH 100 + nsUnicodeToMUTF7 converter; + static char buffer[IMAP_UTF7_BUF_LENGTH]; + const char16_t* in = aSrc.BeginReading(); + int32_t inLen = aSrc.Length(); + int32_t outLen; + aDest.Truncate(); + while (inLen > 0) { + outLen = IMAP_UTF7_BUF_LENGTH; + int32_t remaining = inLen; + converter.ConvertNoBuffNoErr(in, &remaining, buffer, &outLen); + aDest.Append(buffer, outLen); + in += remaining; + inLen -= remaining; + } + outLen = IMAP_UTF7_BUF_LENGTH; + converter.FinishNoBuff(buffer, &outLen); + if (outLen > 0) aDest.Append(buffer, outLen); + return NS_OK; +} + +// Hacky function to use for IMAP folders where the name can be in +// MUTF-7 or UTF-8. +nsresult CopyFolderNameToUTF16(const nsACString& aSrc, nsAString& aDest) { + if (NS_IsAscii(aSrc.BeginReading(), aSrc.Length())) { + // An ASCII string may not be valid MUTF-7. For example, it may contain an + // ampersand not immediately followed by a dash which is invalid MUTF-7. + // Check for validity by converting to UTF-16 and then back to MUTF-7 and + // the result should be unchanged. If the MUTF-7 is invalid, treat it as + // UTF-8. + if (NS_SUCCEEDED(CopyMUTF7toUTF16(aSrc, aDest))) { + nsAutoCString tmp; + CopyUTF16toMUTF7(aDest, tmp); + if (aSrc.Equals(tmp)) return NS_OK; + } + } + // Do if aSrc non-ASCII or if ASCII but invalid MUTF-7. + CopyUTF8toUTF16(aSrc, aDest); + return NS_OK; +} + +nsresult CopyMUTF7toUTF16(const nsACString& aSrc, nsAString& aDest) { + // MUTF-7 encoding size cannot be larger than the size in UTF-16. + nsMUTF7ToUnicode converter; + int32_t inLen = aSrc.Length(); + int32_t outLen = inLen; + aDest.SetLength(outLen); + converter.ConvertNoBuff(aSrc.BeginReading(), &inLen, aDest.BeginWriting(), + &outLen); + MOZ_ASSERT(inLen == (int32_t)aSrc.Length(), + "MUTF-7 should not produce a longer output"); + aDest.SetLength(outLen); + return NS_OK; +} + +// MIME encoder, output string should be freed by PR_FREE +// XXX : fix callers later to avoid allocation and copy +char* nsMsgI18NEncodeMimePartIIStr(const char* header, bool structured, + const char* charset, int32_t fieldnamelen, + bool usemime) { + // No MIME, convert to the outgoing mail charset. + if (!usemime) { + nsAutoCString convertedStr; + if (NS_SUCCEEDED(nsMsgI18NConvertFromUnicode( + charset ? nsDependentCString(charset) : EmptyCString(), + NS_ConvertUTF8toUTF16(header), convertedStr))) + return PL_strdup(convertedStr.get()); + else + return PL_strdup(header); + } + + nsAutoCString encodedString; + nsresult res; + nsCOMPtr<nsIMimeConverter> converter = + do_GetService("@mozilla.org/messenger/mimeconverter;1", &res); + if (NS_SUCCEEDED(res) && nullptr != converter) { + res = converter->EncodeMimePartIIStr_UTF8( + nsDependentCString(header), structured, fieldnamelen, + nsIMimeConverter::MIME_ENCODED_WORD_SIZE, encodedString); + } + + return NS_SUCCEEDED(res) ? PL_strdup(encodedString.get()) : nullptr; +} + +// Return True if a charset is stateful (e.g. JIS). +bool nsMsgI18Nstateful_charset(const char* charset) { + // TODO: use charset manager's service + return (PL_strcasecmp(charset, "ISO-2022-JP") == 0); +} + +bool nsMsgI18Nmultibyte_charset(const char* charset) { + nsresult res; + nsCOMPtr<nsICharsetConverterManager> ccm = + do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res); + bool result = false; + + if (NS_SUCCEEDED(res)) { + nsAutoString charsetData; + res = ccm->GetCharsetData(charset, u".isMultibyte", charsetData); + if (NS_SUCCEEDED(res)) { + result = charsetData.LowerCaseEqualsLiteral("true"); + } + } + + return result; +} + +bool nsMsgI18Ncheck_data_in_charset_range(const char* charset, + const char16_t* inString) { + if (!charset || !*charset || !inString || !*inString) return true; + + bool res = true; + + auto encoding = + mozilla::Encoding::ForLabelNoReplacement(nsDependentCString(charset)); + if (!encoding) return false; + auto encoder = encoding->NewEncoder(); + + uint8_t buffer[512]; + auto src = mozilla::MakeStringSpan(inString); + auto dst = mozilla::Span(buffer); + while (true) { + uint32_t result; + size_t read; + size_t written; + std::tie(result, read, written) = + encoder->EncodeFromUTF16WithoutReplacement(src, dst, false); + if (result == mozilla::kInputEmpty) { + // All converted successfully. + break; + } else if (result != mozilla::kOutputFull) { + // Didn't use all the input but the output isn't full, hence + // there was an unencodable character. + res = false; + break; + } + src = src.From(read); + // dst = dst.From(written); // Just overwrite output since we don't need it. + } + + return res; +} + +// Simple parser to parse META charset. +// It only supports the case when the description is within one line. +const char* nsMsgI18NParseMetaCharset(nsIFile* file) { + static char charset[nsIMimeConverter::MAX_CHARSET_NAME_LENGTH + 1]; + + *charset = '\0'; + + bool isDirectory = false; + file->IsDirectory(&isDirectory); + if (isDirectory) { + NS_ERROR("file is a directory"); + return charset; + } + + nsresult rv; + nsCOMPtr<nsIFileInputStream> fileStream = + do_CreateInstance(NS_LOCALFILEINPUTSTREAM_CONTRACTID, &rv); + NS_ENSURE_SUCCESS(rv, charset); + + rv = fileStream->Init(file, PR_RDONLY, 0664, false); + nsCOMPtr<nsILineInputStream> lineStream = do_QueryInterface(fileStream, &rv); + + nsCString curLine; + bool more = true; + while (NS_SUCCEEDED(rv) && more) { + rv = lineStream->ReadLine(curLine, &more); + if (curLine.IsEmpty()) continue; + + ToUpperCase(curLine); + + if (curLine.Find("/HEAD") != -1) break; + + if (curLine.Find("META") != -1 && curLine.Find("HTTP-EQUIV") != -1 && + curLine.Find("CONTENT-TYPE") != -1 && curLine.Find("CHARSET") != -1) { + char* cp = (char*)PL_strchr(PL_strstr(curLine.get(), "CHARSET"), '='); + char* token = nullptr; + if (cp) { + char* newStr = cp + 1; + token = NS_strtok(" \"\'", &newStr); + } + if (token) { + PL_strncpy(charset, token, sizeof(charset)); + charset[sizeof(charset) - 1] = '\0'; + + // this function cannot parse a file if it is really + // encoded by one of the following charsets + // so we can say that the charset label must be incorrect for + // the .html if we actually see those charsets parsed + // and we should ignore them + if (!PL_strncasecmp("UTF-16", charset, sizeof("UTF-16") - 1) || + !PL_strncasecmp("UTF-32", charset, sizeof("UTF-32") - 1)) + charset[0] = '\0'; + + break; + } + } + } + + return charset; +} + +nsresult nsMsgI18NShrinkUTF8Str(const nsCString& inString, uint32_t aMaxLength, + nsACString& outString) { + if (inString.IsEmpty()) { + outString.Truncate(); + return NS_OK; + } + if (inString.Length() < aMaxLength) { + outString.Assign(inString); + return NS_OK; + } + NS_ASSERTION(mozilla::IsUtf8(inString), "Invalid UTF-8 string is inputted"); + const char* start = inString.get(); + const char* end = start + inString.Length(); + const char* last = start + aMaxLength; + const char* cur = start; + const char* prev = nullptr; + bool err = false; + while (cur < last) { + prev = cur; + if (!UTF8CharEnumerator::NextChar(&cur, end, &err) || err) break; + } + if (!prev || err) { + outString.Truncate(); + return NS_OK; + } + uint32_t len = prev - start; + outString.Assign(Substring(inString, 0, len)); + return NS_OK; +} + +void nsMsgI18NConvertRawBytesToUTF16(const nsCString& inString, + const nsACString& charset, + nsAString& outString) { + if (mozilla::IsUtf8(inString)) { + CopyUTF8toUTF16(inString, outString); + return; + } + + nsresult rv = nsMsgI18NConvertToUnicode(charset, inString, outString); + if (NS_SUCCEEDED(rv)) return; + + const char* cur = inString.BeginReading(); + const char* end = inString.EndReading(); + outString.Truncate(); + while (cur < end) { + char c = *cur++; + if (c & char(0x80)) + outString.Append(UCS2_REPLACEMENT_CHAR); + else + outString.Append(c); + } +} + +void nsMsgI18NConvertRawBytesToUTF8(const nsCString& inString, + const nsACString& charset, + nsACString& outString) { + if (mozilla::IsUtf8(inString)) { + outString.Assign(inString); + return; + } + + nsAutoString utf16Text; + nsresult rv = nsMsgI18NConvertToUnicode(charset, inString, utf16Text); + if (NS_SUCCEEDED(rv)) { + CopyUTF16toUTF8(utf16Text, outString); + return; + } + + // EF BF BD (UTF-8 encoding of U+FFFD) + constexpr auto utf8ReplacementChar = "\357\277\275"_ns; + const char* cur = inString.BeginReading(); + const char* end = inString.EndReading(); + outString.Truncate(); + while (cur < end) { + char c = *cur++; + if (c & char(0x80)) + outString.Append(utf8ReplacementChar); + else + outString.Append(c); + } +} |