/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "nsICharsetConverterManager.h" #include "mozilla/Utf8.h" #include "nsIServiceManager.h" #include "nsISupports.h" #include "nsIPrefBranch.h" #include "nsIPrefService.h" #include "nsIMimeConverter.h" #include "nsMsgUtils.h" #include "nsMsgI18N.h" #include "nsILineInputStream.h" #include "nsMimeTypes.h" #include "nsString.h" #include "prmem.h" #include "plstr.h" #include "nsUTF8Utils.h" #include "nsNetUtil.h" #include "nsCRTGlue.h" #include "nsComponentManagerUtils.h" #include "nsUnicharUtils.h" #include "nsIFileStreams.h" #include "../../intl/nsUTF7ToUnicode.h" #include "../../intl/nsMUTF7ToUnicode.h" #include "../../intl/nsUnicodeToMUTF7.h" #include #include // // International functions necessary for composition // nsresult nsMsgI18NConvertFromUnicode(const nsACString& aCharset, const nsAString& inString, nsACString& outString, bool aReportUencNoMapping) { if (inString.IsEmpty()) { outString.Truncate(); return NS_OK; } auto encoding = mozilla::Encoding::ForLabelNoReplacement(aCharset); if (!encoding) { return NS_ERROR_UCONV_NOCONV; } else if (encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING) { // We shouldn't ever ship anything in these encodings. return NS_ERROR_UCONV_NOCONV; } nsresult rv; std::tie(rv, std::ignore) = encoding->Encode(inString, outString); if (rv == NS_OK_HAD_REPLACEMENTS) { rv = aReportUencNoMapping ? NS_ERROR_UENC_NOMAPPING : NS_OK; } return rv; } nsresult nsMsgI18NConvertToUnicode(const nsACString& aCharset, const nsACString& inString, nsAString& outString) { if (inString.IsEmpty()) { outString.Truncate(); return NS_OK; } if (aCharset.IsEmpty()) { // Despite its name, it also works for Latin-1. CopyASCIItoUTF16(inString, outString); return NS_OK; } if (aCharset.Equals("UTF-8", nsCaseInsensitiveCStringComparator)) { return UTF_8_ENCODING->DecodeWithBOMRemoval(inString, outString); } // Look up Thunderbird's special aliases from charsetalias.properties. nsresult rv; nsCOMPtr ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); NS_ENSURE_SUCCESS(rv, rv); nsCString newCharset; rv = ccm->GetCharsetAlias(PromiseFlatCString(aCharset).get(), newCharset); NS_ENSURE_SUCCESS(rv, rv); if (newCharset.Equals("UTF-7", nsCaseInsensitiveCStringComparator)) { // Special treatment for decoding UTF-7 since it's not handled by // encoding_rs. return CopyUTF7toUTF16(inString, outString); } auto encoding = mozilla::Encoding::ForLabelNoReplacement(newCharset); if (!encoding) return NS_ERROR_UCONV_NOCONV; return encoding->DecodeWithoutBOMHandling(inString, outString); } // This is used to decode UTF-7. No support for encoding in UTF-7. nsresult CopyUTF7toUTF16(const nsACString& aSrc, nsAString& aDest) { // UTF-7 encoding size cannot be larger than the size in UTF-16. nsUTF7ToUnicode converter; int32_t inLen = aSrc.Length(); int32_t outLen = inLen; aDest.SetLength(outLen); converter.ConvertNoBuff(aSrc.BeginReading(), &inLen, aDest.BeginWriting(), &outLen); MOZ_ASSERT(inLen == (int32_t)aSrc.Length(), "UTF-7 should not produce a longer output"); aDest.SetLength(outLen); return NS_OK; } nsresult CopyUTF16toMUTF7(const nsAString& aSrc, nsACString& aDest) { #define IMAP_UTF7_BUF_LENGTH 100 nsUnicodeToMUTF7 converter; static char buffer[IMAP_UTF7_BUF_LENGTH]; const char16_t* in = aSrc.BeginReading(); int32_t inLen = aSrc.Length(); int32_t outLen; aDest.Truncate(); while (inLen > 0) { outLen = IMAP_UTF7_BUF_LENGTH; int32_t remaining = inLen; converter.ConvertNoBuffNoErr(in, &remaining, buffer, &outLen); aDest.Append(buffer, outLen); in += remaining; inLen -= remaining; } outLen = IMAP_UTF7_BUF_LENGTH; converter.FinishNoBuff(buffer, &outLen); if (outLen > 0) aDest.Append(buffer, outLen); return NS_OK; } // Hacky function to use for IMAP folders where the name can be in // MUTF-7 or UTF-8. nsresult CopyFolderNameToUTF16(const nsACString& aSrc, nsAString& aDest) { if (NS_IsAscii(aSrc.BeginReading(), aSrc.Length())) { // An ASCII string may not be valid MUTF-7. For example, it may contain an // ampersand not immediately followed by a dash which is invalid MUTF-7. // Check for validity by converting to UTF-16 and then back to MUTF-7 and // the result should be unchanged. If the MUTF-7 is invalid, treat it as // UTF-8. if (NS_SUCCEEDED(CopyMUTF7toUTF16(aSrc, aDest))) { nsAutoCString tmp; CopyUTF16toMUTF7(aDest, tmp); if (aSrc.Equals(tmp)) return NS_OK; } } // Do if aSrc non-ASCII or if ASCII but invalid MUTF-7. CopyUTF8toUTF16(aSrc, aDest); return NS_OK; } nsresult CopyMUTF7toUTF16(const nsACString& aSrc, nsAString& aDest) { // MUTF-7 encoding size cannot be larger than the size in UTF-16. nsMUTF7ToUnicode converter; int32_t inLen = aSrc.Length(); int32_t outLen = inLen; aDest.SetLength(outLen); converter.ConvertNoBuff(aSrc.BeginReading(), &inLen, aDest.BeginWriting(), &outLen); MOZ_ASSERT(inLen == (int32_t)aSrc.Length(), "MUTF-7 should not produce a longer output"); aDest.SetLength(outLen); return NS_OK; } // MIME encoder, output string should be freed by PR_FREE // XXX : fix callers later to avoid allocation and copy char* nsMsgI18NEncodeMimePartIIStr(const char* header, bool structured, const char* charset, int32_t fieldnamelen, bool usemime) { // No MIME, convert to the outgoing mail charset. if (!usemime) { nsAutoCString convertedStr; if (NS_SUCCEEDED(nsMsgI18NConvertFromUnicode( charset ? nsDependentCString(charset) : EmptyCString(), NS_ConvertUTF8toUTF16(header), convertedStr))) return PL_strdup(convertedStr.get()); else return PL_strdup(header); } nsAutoCString encodedString; nsresult res; nsCOMPtr converter = do_GetService("@mozilla.org/messenger/mimeconverter;1", &res); if (NS_SUCCEEDED(res) && nullptr != converter) { res = converter->EncodeMimePartIIStr_UTF8( nsDependentCString(header), structured, fieldnamelen, nsIMimeConverter::MIME_ENCODED_WORD_SIZE, encodedString); } return NS_SUCCEEDED(res) ? PL_strdup(encodedString.get()) : nullptr; } // Return True if a charset is stateful (e.g. JIS). bool nsMsgI18Nstateful_charset(const char* charset) { // TODO: use charset manager's service return (PL_strcasecmp(charset, "ISO-2022-JP") == 0); } bool nsMsgI18Nmultibyte_charset(const char* charset) { nsresult res; nsCOMPtr ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res); bool result = false; if (NS_SUCCEEDED(res)) { nsAutoString charsetData; res = ccm->GetCharsetData(charset, u".isMultibyte", charsetData); if (NS_SUCCEEDED(res)) { result = charsetData.LowerCaseEqualsLiteral("true"); } } return result; } bool nsMsgI18Ncheck_data_in_charset_range(const char* charset, const char16_t* inString) { if (!charset || !*charset || !inString || !*inString) return true; bool res = true; auto encoding = mozilla::Encoding::ForLabelNoReplacement(nsDependentCString(charset)); if (!encoding) return false; auto encoder = encoding->NewEncoder(); uint8_t buffer[512]; auto src = mozilla::MakeStringSpan(inString); auto dst = mozilla::Span(buffer); while (true) { uint32_t result; size_t read; size_t written; std::tie(result, read, written) = encoder->EncodeFromUTF16WithoutReplacement(src, dst, false); if (result == mozilla::kInputEmpty) { // All converted successfully. break; } else if (result != mozilla::kOutputFull) { // Didn't use all the input but the output isn't full, hence // there was an unencodable character. res = false; break; } src = src.From(read); // dst = dst.From(written); // Just overwrite output since we don't need it. } return res; } // Simple parser to parse META charset. // It only supports the case when the description is within one line. const char* nsMsgI18NParseMetaCharset(nsIFile* file) { static char charset[nsIMimeConverter::MAX_CHARSET_NAME_LENGTH + 1]; *charset = '\0'; bool isDirectory = false; file->IsDirectory(&isDirectory); if (isDirectory) { NS_ERROR("file is a directory"); return charset; } nsresult rv; nsCOMPtr fileStream = do_CreateInstance(NS_LOCALFILEINPUTSTREAM_CONTRACTID, &rv); NS_ENSURE_SUCCESS(rv, charset); rv = fileStream->Init(file, PR_RDONLY, 0664, false); nsCOMPtr lineStream = do_QueryInterface(fileStream, &rv); nsCString curLine; bool more = true; while (NS_SUCCEEDED(rv) && more) { rv = lineStream->ReadLine(curLine, &more); if (curLine.IsEmpty()) continue; ToUpperCase(curLine); if (curLine.Find("/HEAD") != -1) break; if (curLine.Find("META") != -1 && curLine.Find("HTTP-EQUIV") != -1 && curLine.Find("CONTENT-TYPE") != -1 && curLine.Find("CHARSET") != -1) { char* cp = (char*)PL_strchr(PL_strstr(curLine.get(), "CHARSET"), '='); char* token = nullptr; if (cp) { char* newStr = cp + 1; token = NS_strtok(" \"\'", &newStr); } if (token) { PL_strncpy(charset, token, sizeof(charset)); charset[sizeof(charset) - 1] = '\0'; // this function cannot parse a file if it is really // encoded by one of the following charsets // so we can say that the charset label must be incorrect for // the .html if we actually see those charsets parsed // and we should ignore them if (!PL_strncasecmp("UTF-16", charset, sizeof("UTF-16") - 1) || !PL_strncasecmp("UTF-32", charset, sizeof("UTF-32") - 1)) charset[0] = '\0'; break; } } } return charset; } nsresult nsMsgI18NShrinkUTF8Str(const nsCString& inString, uint32_t aMaxLength, nsACString& outString) { if (inString.IsEmpty()) { outString.Truncate(); return NS_OK; } if (inString.Length() < aMaxLength) { outString.Assign(inString); return NS_OK; } NS_ASSERTION(mozilla::IsUtf8(inString), "Invalid UTF-8 string is inputted"); const char* start = inString.get(); const char* end = start + inString.Length(); const char* last = start + aMaxLength; const char* cur = start; const char* prev = nullptr; bool err = false; while (cur < last) { prev = cur; if (!UTF8CharEnumerator::NextChar(&cur, end, &err) || err) break; } if (!prev || err) { outString.Truncate(); return NS_OK; } uint32_t len = prev - start; outString.Assign(Substring(inString, 0, len)); return NS_OK; } void nsMsgI18NConvertRawBytesToUTF16(const nsCString& inString, const nsACString& charset, nsAString& outString) { if (mozilla::IsUtf8(inString)) { CopyUTF8toUTF16(inString, outString); return; } nsresult rv = nsMsgI18NConvertToUnicode(charset, inString, outString); if (NS_SUCCEEDED(rv)) return; const char* cur = inString.BeginReading(); const char* end = inString.EndReading(); outString.Truncate(); while (cur < end) { char c = *cur++; if (c & char(0x80)) outString.Append(UCS2_REPLACEMENT_CHAR); else outString.Append(c); } } void nsMsgI18NConvertRawBytesToUTF8(const nsCString& inString, const nsACString& charset, nsACString& outString) { if (mozilla::IsUtf8(inString)) { outString.Assign(inString); return; } nsAutoString utf16Text; nsresult rv = nsMsgI18NConvertToUnicode(charset, inString, utf16Text); if (NS_SUCCEEDED(rv)) { CopyUTF16toUTF8(utf16Text, outString); return; } // EF BF BD (UTF-8 encoding of U+FFFD) constexpr auto utf8ReplacementChar = "\357\277\275"_ns; const char* cur = inString.BeginReading(); const char* end = inString.EndReading(); outString.Truncate(); while (cur < end) { char c = *cur++; if (c & char(0x80)) outString.Append(utf8ReplacementChar); else outString.Append(c); } }