diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/intl | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
25 files changed, 1601 insertions, 0 deletions
diff --git a/comm/mailnews/intl/charsetData.properties b/comm/mailnews/intl/charsetData.properties new file mode 100644 index 0000000000..d4a4147cda --- /dev/null +++ b/comm/mailnews/intl/charsetData.properties @@ -0,0 +1,104 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +## Rule of this file: +## 1. key should always be in lower case ascii so we can do case insensitive +## comparison in the code faster. + +## Format of this file: +## +## charset_name.isInternal = anything - specifies that this charset should +## not be exposed to web content because of the vulnerability to XSS attacks +## or some other reasons +## +## charset_name.LangGroup = +## +## charset_name.isMultibyte = multi byte charsets + +replacement.isInternal = true + +# XXX : todo: move to something based on BCP 47 (RFC 5646); +# these should primarily specify script (and sometimes region), +# but NOT language. +# See also https://bugzilla.mozilla.org/show_bug.cgi?id=756022 +# e.g. x-western -> *-Latn-155 (Western Europe), +# *-Latn-151 (Eastern Europe), +# *-Latn-154 (Northern Europe), +# *-Latn-TR +# x-cyrillic -> *-Cyrl +# zh-TW -> *-Hant-TW +# zh-HK -> *-Hant-HK +# zh-CN -> *-Hans +# ja -> *-Jpan +# ko -> *-Hang +# he -> *-Hebr +# ar -> *-Arab +# etc + +big5.LangGroup = zh-TW +big5-hkscs.LangGroup = zh-HK +euc-jp.LangGroup = ja +euc-kr.LangGroup = ko +gb2312.LangGroup = zh-CN +gb18030.LangGroup = zh-CN +gb18030.2000-0.LangGroup = zh-CN +gb18030.2000-1.LangGroup = zh-CN +hkscs-1.LangGroup = zh-HK +ibm866.LangGroup = x-cyrillic +ibm1125.LangGroup = x-cyrillic +ibm1131.LangGroup = x-cyrillic +iso-2022-jp.LangGroup = ja +iso-8859-1.LangGroup = x-western +iso-8859-10.LangGroup = x-western +iso-8859-14.LangGroup = x-western +iso-8859-15.LangGroup = x-western +iso-8859-2.LangGroup = x-western +iso-8859-16.LangGroup = x-western +iso-8859-3.LangGroup = x-western +iso-8859-4.LangGroup = x-western +iso-8859-13.LangGroup = x-western +iso-8859-5.LangGroup = x-cyrillic +iso-8859-6.LangGroup = ar +iso-8859-7.LangGroup = el +iso-8859-8.LangGroup = he +iso-8859-8-i.LangGroup = he +jis_0208-1983.LangGroup = ja +koi8-r.LangGroup = x-cyrillic +koi8-u.LangGroup = x-cyrillic +shift_jis.LangGroup = ja +windows-874.LangGroup = th +utf-8.LangGroup = x-unicode +utf-16.LangGroup = x-unicode +utf-16be.LangGroup = x-unicode +utf-16le.LangGroup = x-unicode +utf-7.LangGroup = x-unicode +replacement.LangGroup = x-unicode +windows-1250.LangGroup = x-western +windows-1251.LangGroup = x-cyrillic +windows-1252.LangGroup = x-western +windows-1253.LangGroup = el +windows-1254.LangGroup = x-western +windows-1255.LangGroup = he +windows-1256.LangGroup = ar +windows-1257.LangGroup = x-western +windows-1258.LangGroup = x-western +gbk.LangGroup = zh-CN + +# The following two are in the Encoding Standard (https://encoding.spec.whatwg.org/), +# x-mac-ukrainian is a label. +x-mac-cyrillic.LangGroup = x-cyrillic +macintosh.LangGroup = x-western + +x-user-defined.LangGroup = x-unicode + +iso-2022-jp.isMultibyte = true +shift_jis.isMultibyte = true +euc-jp.isMultibyte = true +big5.isMultibyte = true +big5-hkscs.isMultibyte = true +gb2312.isMultibyte = true +euc-kr.isMultibyte = true +utf-7.isMultibyte = true +utf-8.isMultibyte = true +replacement.isMultibyte = true diff --git a/comm/mailnews/intl/charsetalias.properties b/comm/mailnews/intl/charsetalias.properties new file mode 100644 index 0000000000..1ef5e52ee2 --- /dev/null +++ b/comm/mailnews/intl/charsetalias.properties @@ -0,0 +1,151 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Rule of this file: +# 1. key should always be in lower case ascii so we can do case insensitive +# comparison in the code faster. +# 2. value should be the _name_ used in the WHATWG Encoding Standard +# https://encoding.spec.whatwg.org/ (of "UTF-7" for UTF-7). +# +# This file contains email-specific labels. Web-relevant labels for +# encodings are in the Encoding Standard / encoding_rs. + +# Added for Solaris ns_langinfo. Unlikely relevant to email. +# https://bugzilla.mozilla.org/show_bug.cgi?id=77300#c9 +646=windows-1252 + +# Aliases for ISO-8859-8-I +# From the original IBM bidi patch. +iso-8859-8i=ISO-8859-8-I + +# ISO 8859 series with underscore for JavaMail +# compat. +# https://bugzilla.mozilla.org/show_bug.cgi?id=820767 +iso8859_1=windows-1252 +iso8859_2=ISO-8859-2 +iso8859_3=ISO-8859-3 +iso8859_4=ISO-8859-4 +iso8859_5=ISO-8859-5 +iso8859_6=ISO-8859-6 +iso8859_7=ISO-8859-7 +# Unclear if 8 with underscore was visual or not +iso8859_9=windows-1254 +# No evidence of 10 occurring with underscore +# 11 is tis620 +# 12 does not exist +iso8859_13=ISO-8859-13 +# No evidence of 14 occurring with underscore +iso8859_15=ISO-8859-15 +# No evidence of 16 occurring with underscore + +koi8r=KOI8-R + +# Code pages shared by DOS and Windows with ms prefix. +# Evidence of this pattern in the wild: +# https://bugzilla.mozilla.org/show_bug.cgi?id=1120813 +# Plausible cause: JavaMail +# The ms prefix as used by Sun is not relevant to windows-125x series +ms874=windows-874 +# ms932 was added to the Encoding Standard as one-off Thunderbird request +# MS936 shows up at https://www.iana.org/assignments/character-sets/character-sets.xhtml +ms936=GBK +ms949=EUC-KR +ms950=Big5 +ms950_hkscs=Big5 + +# Underscore versions of Unix CJK encodings. +# No evidence of these in the wild, but these could plausibly +# occur for the same reason as the above two groups. +euc_cn=GBK +euc_kr=EUC-KR +euc_jp=EUC-JP +big5_hkscs=Big5 + +# Code pages shared by DOS and Windows with cp prefix +# cp125x series are in the Encoding Standard +# Evidence of the pattern in the wild: +# https://bugzilla.mozilla.org/show_bug.cgi?id=1511950 +# https://bugzilla.mozilla.org/show_bug.cgi?id=542823 +# https://bugzilla.mozilla.org/show_bug.cgi?id=1217161 +cp874=windows-874 +cp932=Shift_JIS +# CP936 shows up at https://www.iana.org/assignments/character-sets/character-sets.xhtml +cp936=GBK +cp949=EUC-KR +cp950=Big5 + +# Aliases for ISO-2022-JP +# The following are really not aliases ISO-2022-JP, but sharing the same decoder +# Kept mainly for compat with old Apple Mail. +iso-2022-jp-2=ISO-2022-JP +csiso2022jp2=ISO-2022-JP +# A Google search suggests the variant without hyphens has been used with +# JavaMail. +iso2022jp=ISO-2022-JP + +# Aliases for Big5 +# Added in patch that generally meant to support emails sent by +# dtmail on Sun Solaris +# https://bugzilla.mozilla.org/show_bug.cgi?id=146287 +zh_tw-big5=Big5 + +# Aliases for EUC-KR +# Added for Solaris ns_langinfo. Unlikely relevant to email. +# https://bugzilla.mozilla.org/show_bug.cgi?id=82075 +5601=EUC-KR +# https://bugzilla.mozilla.org/show_bug.cgi?id=234958 +x-windows-949=EUC-KR + +# Aliases for windows-874 +# Added originally for nl_langinfo reasons but could plausibly be sent +# by JavaMail. +# https://bugzilla.mozilla.org/show_bug.cgi?id=101295 +tis620=windows-874 + +# Aliases for IBM866 +# This alias may have been made up by accident and may +# not be relevant to real-world email. +# https://bugzilla.mozilla.org/show_bug.cgi?id=77588 +cp-866=IBM866 + +# Aliases for UTF-7 +utf-7=UTF-7 +# The below 4 aliases were not in Thunderbird 60, and there were +# no complaints. +# This alias appears to have been generated by the email part +# of the Netscape 4.0 suite per http://jkorpela.fi/chars.html +x-unicode-2-0-utf-7=UTF-7 +# This appears to be just a made-up non-x version of the above +# (checked in without bug number). +unicode-2-0-utf-7=UTF-7 +# The two aliases below show up at +# https://www.iana.org/assignments/character-sets/character-sets.xhtml +unicode-1-1-utf-7=UTF-7 +csunicode11utf7=UTF-7 + +# The below aliases were not in Thunderbird 60, and there were +# no complaints. +# These aliases show up at +# https://www.iana.org/assignments/character-sets/character-sets.xhtml +csunicode=UTF-16BE +csunicode11=UTF-16BE +iso-10646-ucs-basic=UTF-16BE +csunicodeascii=UTF-16BE +iso-10646-unicode-latin1=UTF-16BE +csunicodelatin1=UTF-16BE +iso-10646=UTF-16BE +iso-10646-j-1=UTF-16BE +iso-10646-ucs-2=UTF-16BE +# Netscape aliases checked in without bug number. +# Possibly meant to be Netscape-private. +x-iso-10646-ucs-2-be=UTF-16BE +x-iso-10646-ucs-2-le=UTF-16LE + +# Shows up at https://www.iana.org/assignments/character-sets/character-sets.xhtml +# https://bugzilla.mozilla.org/show_bug.cgi?id=651113 +windows-936=GBK + +# Added for Solaris ns_langinfo(). Unlikely to be relevant to email. +# https://bugzilla.mozilla.org/show_bug.cgi?id=82075 +ansi-1251=windows-1251 diff --git a/comm/mailnews/intl/components.conf b/comm/mailnews/intl/components.conf new file mode 100644 index 0000000000..6c1f128442 --- /dev/null +++ b/comm/mailnews/intl/components.conf @@ -0,0 +1,12 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Classes = [ + { + "cid": "{3c1c0163-9bd0-11d3-9d09-0050040007b2}", + "contract_ids": ["@mozilla.org/charset-converter-manager;1"], + "type": "nsCharsetConverterManager", + "headers": ["/comm/mailnews/intl/nsCharsetConverterManager.h"], + }, +] diff --git a/comm/mailnews/intl/jar.mn b/comm/mailnews/intl/jar.mn new file mode 100644 index 0000000000..ab02275d97 --- /dev/null +++ b/comm/mailnews/intl/jar.mn @@ -0,0 +1,6 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +toolkit.jar: + res/charsetData.properties (charsetData.properties) diff --git a/comm/mailnews/intl/moz.build b/comm/mailnews/intl/moz.build new file mode 100644 index 0000000000..03b8a13a46 --- /dev/null +++ b/comm/mailnews/intl/moz.build @@ -0,0 +1,42 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +XPIDL_SOURCES += [ + "nsICharsetConverterManager.idl", +] + +UNIFIED_SOURCES += [ + "nsCharsetAlias.cpp", + "nsCharsetConverterManager.cpp", + "nsMUTF7ToUnicode.cpp", + "nsUnicodeToMUTF7.cpp", + "nsUnicodeToUTF7.cpp", + "nsUTF7ToUnicode.cpp", +] + +XPIDL_MODULE = "commuconv" + +LOCAL_INCLUDES += [ + "/intl/locale", +] + +GENERATED_FILES += [ + "charsetalias.properties.h", +] +charsetalias = GENERATED_FILES["charsetalias.properties.h"] +charsetalias.script = "/intl/locale/props2arrays.py" +charsetalias.inputs = ["charsetalias.properties"] + +FINAL_LIBRARY = "mail" + +# Tests need more attention before they can be enabled. +TEST_DIRS += ["test"] + +JAR_MANIFESTS += ["jar.mn"] + +XPCOM_MANIFESTS += [ + "components.conf", +] diff --git a/comm/mailnews/intl/nsCharsetAlias.cpp b/comm/mailnews/intl/nsCharsetAlias.cpp new file mode 100644 index 0000000000..a92cf193c9 --- /dev/null +++ b/comm/mailnews/intl/nsCharsetAlias.cpp @@ -0,0 +1,86 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ArrayUtils.h" +#include "mozilla/Encoding.h" + +#include "nsCharsetAlias.h" + +// for NS_ERROR_UCONV_NOCONV +#include "nsCharsetConverterManager.h" + +#include "nsUConvPropertySearch.h" + +using namespace mozilla; + +// +static const nsUConvProp kAliases[] = { +#include "charsetalias.properties.h" +}; + +//-------------------------------------------------------------- +// static +nsresult nsCharsetAlias::GetPreferredInternal(const nsACString& aAlias, + nsACString& oResult) { + // First check charsetalias.properties and if there is no match, continue to + // call Encoding::ForLabel. + nsAutoCString key(aAlias); + ToLowerCase(key); + + nsresult rv = nsUConvPropertySearch::SearchPropertyValue( + kAliases, ArrayLength(kAliases), key, oResult); + if (NS_SUCCEEDED(rv)) { + return NS_OK; + } + + const Encoding* encoding = Encoding::ForLabel(key); + if (!encoding) return NS_ERROR_NOT_AVAILABLE; + encoding->Name(oResult); + return NS_OK; +} + +//-------------------------------------------------------------- +// static +nsresult nsCharsetAlias::GetPreferred(const nsACString& aAlias, + nsACString& oResult) { + if (aAlias.IsEmpty()) return NS_ERROR_NULL_POINTER; + + nsresult res = GetPreferredInternal(aAlias, oResult); + if (NS_FAILED(res)) return res; + + if (nsCharsetConverterManager::IsInternal(oResult)) + return NS_ERROR_UCONV_NOCONV; + + return res; +} + +//-------------------------------------------------------------- +// static +nsresult nsCharsetAlias::Equals(const nsACString& aCharset1, + const nsACString& aCharset2, bool* oResult) { + nsresult res = NS_OK; + + if (aCharset1.Equals(aCharset2, nsCaseInsensitiveCStringComparator)) { + *oResult = true; + return res; + } + + if (aCharset1.IsEmpty() || aCharset2.IsEmpty()) { + *oResult = false; + return res; + } + + *oResult = false; + nsAutoCString name1; + res = GetPreferredInternal(aCharset1, name1); + if (NS_FAILED(res)) return res; + + nsAutoCString name2; + res = GetPreferredInternal(aCharset2, name2); + if (NS_FAILED(res)) return res; + + *oResult = name1.Equals(name2); + return NS_OK; +} diff --git a/comm/mailnews/intl/nsCharsetAlias.h b/comm/mailnews/intl/nsCharsetAlias.h new file mode 100644 index 0000000000..6b24f33567 --- /dev/null +++ b/comm/mailnews/intl/nsCharsetAlias.h @@ -0,0 +1,27 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsCharsetAlias_h___ +#define nsCharsetAlias_h___ + +#include "nscore.h" +#include "nsString.h" + +class nsCharsetConverterManager; +class nsScriptableUnicodeConverter; + +class nsCharsetAlias { + friend class nsCharsetConverterManager; + friend class nsScriptableUnicodeConverter; + static nsresult GetPreferredInternal(const nsACString& aAlias, + nsACString& aResult); + + public: + static nsresult GetPreferred(const nsACString& aAlias, nsACString& aResult); + static nsresult Equals(const nsACString& aCharset1, + const nsACString& aCharset2, bool* aResult); +}; + +#endif /* nsCharsetAlias_h___ */ diff --git a/comm/mailnews/intl/nsCharsetConverterManager.cpp b/comm/mailnews/intl/nsCharsetConverterManager.cpp new file mode 100644 index 0000000000..a15917206e --- /dev/null +++ b/comm/mailnews/intl/nsCharsetConverterManager.cpp @@ -0,0 +1,184 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsCOMPtr.h" +#include "nsString.h" +#include "nsUnicharUtils.h" +#include "nsCharsetAlias.h" +#include "nsICharsetConverterManager.h" +#include "nsIStringBundle.h" +#include "nsTArray.h" +#include "mozilla/Components.h" + +#include "nsComponentManagerUtils.h" +#include "nsServiceManagerUtils.h" +#include "../base/src/nsMsgI18N.h" + +// just for CONTRACTIDs +#include "nsCharsetConverterManager.h" + +static nsCOMPtr<nsIStringBundle> sDataBundle; +static nsCOMPtr<nsIStringBundle> sTitleBundle; + +// Class nsCharsetConverterManager [implementation] + +NS_IMPL_ISUPPORTS(nsCharsetConverterManager, nsICharsetConverterManager) + +nsCharsetConverterManager::nsCharsetConverterManager() {} + +nsCharsetConverterManager::~nsCharsetConverterManager() { + sDataBundle = nullptr; + sTitleBundle = nullptr; +} + +static nsresult LoadBundle(const char* aBundleURLSpec, + nsIStringBundle** aResult) { + nsCOMPtr<nsIStringBundleService> sbServ = + mozilla::components::StringBundle::Service(); + if (!sbServ) return NS_ERROR_FAILURE; + + return sbServ->CreateBundle(aBundleURLSpec, aResult); +} + +static nsresult GetBundleValueInner(nsIStringBundle* aBundle, const char* aName, + const nsString& aProp, nsAString& aResult) { + nsAutoString key; + + CopyASCIItoUTF16(mozilla::MakeStringSpan(aName), key); + ToLowerCase(key); // we lowercase the main comparison key + key.Append(aProp); + + return aBundle->GetStringFromName(NS_ConvertUTF16toUTF8(key).get(), aResult); +} + +static nsresult GetBundleValue(nsIStringBundle* aBundle, const char* aName, + const nsString& aProp, nsAString& aResult) { + nsresult rv = NS_OK; + + nsAutoString value; + rv = GetBundleValueInner(aBundle, aName, aProp, value); + if (NS_FAILED(rv)) return rv; + + aResult = value; + + return NS_OK; +} + +static nsresult GetCharsetDataImpl(const char* aCharset, const char16_t* aProp, + nsAString& aResult) { + NS_ENSURE_ARG_POINTER(aCharset); + // aProp can be nullptr + + if (!sDataBundle) { + nsresult rv = LoadBundle("resource://gre-resources/charsetData.properties", + getter_AddRefs(sDataBundle)); + if (NS_FAILED(rv)) return rv; + } + + return GetBundleValue(sDataBundle, aCharset, nsDependentString(aProp), + aResult); +} + +// static +bool nsCharsetConverterManager::IsInternal(const nsACString& aCharset) { + nsAutoString str; + // fully qualify to possibly avoid vtable call + nsresult rv = GetCharsetDataImpl(PromiseFlatCString(aCharset).get(), + u".isInternal", str); + + return NS_SUCCEEDED(rv); +} + +//----------------------------------------------------------------------------//---------------------------------------------------------------------------- +// Interface nsICharsetConverterManager [implementation] + +// XXX Improve the implementation of this method. Right now, it is build on +// top of the nsCharsetAlias service. We can make the nsCharsetAlias +// better, with its own hash table (not the StringBundle anymore) and +// a nicer file format. +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetAlias(const char* aCharset, + nsACString& aResult) { + NS_ENSURE_ARG_POINTER(aCharset); + + // We try to obtain the preferred name for this charset from the charset + // aliases. + nsresult rv; + + rv = nsCharsetAlias::GetPreferred(nsDependentCString(aCharset), aResult); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetTitle(const char* aCharset, + nsAString& aResult) { + NS_ENSURE_ARG_POINTER(aCharset); + + if (!sTitleBundle) { + nsresult rv = + LoadBundle("chrome://messenger/locale/charsetTitles.properties", + getter_AddRefs(sTitleBundle)); + NS_ENSURE_SUCCESS(rv, rv); + } + + return GetBundleValue(sTitleBundle, aCharset, u".title"_ns, aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetData(const char* aCharset, + const char16_t* aProp, + nsAString& aResult) { + return GetCharsetDataImpl(aCharset, aProp, aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetLangGroup(const char* aCharset, + nsACString& aResult) { + // resolve the charset first + nsAutoCString charset; + + nsresult rv = GetCharsetAlias(aCharset, charset); + NS_ENSURE_SUCCESS(rv, rv); + + // fully qualify to possibly avoid vtable call + return nsCharsetConverterManager::GetCharsetLangGroupRaw(charset.get(), + aResult); +} + +NS_IMETHODIMP +nsCharsetConverterManager::GetCharsetLangGroupRaw(const char* aCharset, + nsACString& aResult) { + nsAutoString langGroup; + // fully qualify to possibly avoid vtable call + nsresult rv = nsCharsetConverterManager::GetCharsetData( + aCharset, u".LangGroup", langGroup); + + if (NS_SUCCEEDED(rv)) { + ToLowerCase(langGroup); // use lowercase for all language groups + aResult = NS_ConvertUTF16toUTF8(langGroup); + } + + return rv; +} + +NS_IMETHODIMP +nsCharsetConverterManager::Utf7ToUnicode(const nsACString& aSrc, + nsAString& aDest) { + return CopyUTF7toUTF16(aSrc, aDest); +} + +NS_IMETHODIMP +nsCharsetConverterManager::Mutf7ToUnicode(const nsACString& aSrc, + nsAString& aDest) { + return CopyMUTF7toUTF16(aSrc, aDest); +} + +NS_IMETHODIMP +nsCharsetConverterManager::UnicodeToMutf7(const nsAString& aSrc, + nsACString& aDest) { + return CopyUTF16toMUTF7(aSrc, aDest); +} diff --git a/comm/mailnews/intl/nsCharsetConverterManager.h b/comm/mailnews/intl/nsCharsetConverterManager.h new file mode 100644 index 0000000000..9a217bf0e6 --- /dev/null +++ b/comm/mailnews/intl/nsCharsetConverterManager.h @@ -0,0 +1,27 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef nsCharsetConverterManager_h__ +#define nsCharsetConverterManager_h__ + +#include "nsICharsetConverterManager.h" + +class nsCharsetAlias; + +class nsCharsetConverterManager : public nsICharsetConverterManager { + friend class nsCharsetAlias; + + NS_DECL_THREADSAFE_ISUPPORTS + NS_DECL_NSICHARSETCONVERTERMANAGER + + public: + nsCharsetConverterManager(); + + private: + virtual ~nsCharsetConverterManager(); + + static bool IsInternal(const nsACString& aCharset); +}; + +#endif // nsCharsetConverterManager_h__ diff --git a/comm/mailnews/intl/nsICharsetConverterManager.idl b/comm/mailnews/intl/nsICharsetConverterManager.idl new file mode 100644 index 0000000000..fe77ed6843 --- /dev/null +++ b/comm/mailnews/intl/nsICharsetConverterManager.idl @@ -0,0 +1,71 @@ +/* -*- Mode: IDL; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +%{C++ +#include "mozilla/Encoding.h" + +// XXX change to NS_CHARSETCONVERTERMANAGER_CID +#define NS_ICHARSETCONVERTERMANAGER_CID \ + {0x3c1c0163, 0x9bd0, 0x11d3, { 0x9d, 0x9, 0x0, 0x50, 0x4, 0x0, 0x7, 0xb2}} + +#define NS_CHARSETCONVERTERMANAGER_CONTRACTID "@mozilla.org/charset-converter-manager;1" +%} + +[scriptable, uuid(a0550d46-8d9c-47dd-acc7-c083620dff12)] +interface nsICharsetConverterManager : nsISupports +{ + /** + * A shortcut to calling nsICharsetAlias to do alias resolution + * @throws if aCharset is an unknown charset. + */ + ACString getCharsetAlias(in string aCharset); + + /** + * Get the human-readable name for the given charset. + * @throws if aCharset is an unknown charset. + */ + AString getCharsetTitle(in string aCharset); + + /** + * Get some data about the given charset. This includes whether the + * character encoding may be used for certain purposes, if it is + * multi-byte, and the language code for it. See charsetData.properties + * for the source of this data. Some known property names: + * LangGroup - language code for charset, e.g. 'he' and 'zh-CN'. + * isMultibyte - is this a multi-byte charset? + * isInternal - not to be used in untrusted web content. + * + * @param aCharset name of the character encoding, e.g. 'iso-8859-15'. + * @param aProp property desired for the character encoding. + * @throws if aCharset is an unknown charset. + * @return the value of the property, for the character encoding. + */ + AString getCharsetData(in string aCharset, + in wstring aProp); + + /** + * Get the language group for the given charset. This is similar to + * calling <tt>getCharsetData</tt> with the <tt>prop</tt> "LangGroup". + * + * @param aCharset name of the character encoding, e.g. 'iso-8859-15'. + * @throws if aCharset is an unknown charset. + * @return the language code for the character encoding. + */ + AUTF8String getCharsetLangGroup(in string aCharset); + AUTF8String getCharsetLangGroupRaw(in string aCharset); + + /** + * Decoding of UTF-7 in message headers and bodies. + */ + AString utf7ToUnicode(in ACString aMutf7); + + /** + * Support for Modified UTF-7 (MUTF-7) used by IMAP. + */ + AString mutf7ToUnicode(in ACString aMutf7); + ACString unicodeToMutf7(in AString aUnicode); +}; diff --git a/comm/mailnews/intl/nsMUTF7ToUnicode.cpp b/comm/mailnews/intl/nsMUTF7ToUnicode.cpp new file mode 100644 index 0000000000..bd49d647fa --- /dev/null +++ b/comm/mailnews/intl/nsMUTF7ToUnicode.cpp @@ -0,0 +1,11 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsMUTF7ToUnicode.h" + +//---------------------------------------------------------------------- +// Class nsMUTF7ToUnicode [implementation] + +nsMUTF7ToUnicode::nsMUTF7ToUnicode() : nsBasicUTF7Decoder(',', '&') {} diff --git a/comm/mailnews/intl/nsMUTF7ToUnicode.h b/comm/mailnews/intl/nsMUTF7ToUnicode.h new file mode 100644 index 0000000000..ff26e8b6ab --- /dev/null +++ b/comm/mailnews/intl/nsMUTF7ToUnicode.h @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsMUTF7ToUnicode_h___ +#define nsMUTF7ToUnicode_h___ + +#include "nsUTF7ToUnicode.h" + +//---------------------------------------------------------------------- +// Class nsMUTF7ToUnicode [declaration] + +/** + * A character set converter from Modified UTF7 to Unicode. + * + * @created 18/May/1999 + * @author Catalin Rotaru [CATA] + */ +class nsMUTF7ToUnicode : public nsBasicUTF7Decoder { + public: + /** + * Class constructor. + */ + nsMUTF7ToUnicode(); +}; + +#endif /* nsMUTF7ToUnicode_h___ */ diff --git a/comm/mailnews/intl/nsUTF7ToUnicode.cpp b/comm/mailnews/intl/nsUTF7ToUnicode.cpp new file mode 100644 index 0000000000..2257affa51 --- /dev/null +++ b/comm/mailnews/intl/nsUTF7ToUnicode.cpp @@ -0,0 +1,217 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUTF7ToUnicode.h" + +#define ENC_DIRECT 0 +#define ENC_BASE64 1 + +//---------------------------------------------------------------------- +// Class nsBasicUTF7Decoder [implementation] + +nsBasicUTF7Decoder::nsBasicUTF7Decoder(char aLastChar, char aEscChar) { + mLastChar = aLastChar; + mEscChar = aEscChar; + mFreshBase64 = false; + Reset(); +} + +nsresult nsBasicUTF7Decoder::DecodeDirect(const char* aSrc, int32_t* aSrcLength, + char16_t* aDest, + int32_t* aDestLength) { + const char* srcEnd = aSrc + *aSrcLength; + const char* src = aSrc; + char16_t* destEnd = aDest + *aDestLength; + char16_t* dest = aDest; + nsresult res = NS_OK; + char ch; + + while (src < srcEnd) { + ch = *src; + + // stop when we meet other chars or end of direct encoded seq. + // if (!(DirectEncodable(ch)) || (ch == mEscChar)) { + // but we are decoding; so we should be lax; pass everything until escchar + if (ch == mEscChar) { + res = NS_ERROR_UDEC_ILLEGALINPUT; + break; + } + + if (dest >= destEnd) { + res = NS_OK_UDEC_MOREOUTPUT; + break; + } else { + *dest++ = ch; + src++; + } + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +nsresult nsBasicUTF7Decoder::DecodeBase64(const char* aSrc, int32_t* aSrcLength, + char16_t* aDest, + int32_t* aDestLength) { + const char* srcEnd = aSrc + *aSrcLength; + const char* src = aSrc; + char16_t* destEnd = aDest + *aDestLength; + char16_t* dest = aDest; + nsresult res = NS_OK; + char ch; + uint32_t value; + + while (src < srcEnd) { + ch = *src; + + // stop when we meet other chars or end of direct encoded seq. + value = CharToValue(ch); + if (value > 0xff) { + res = NS_ERROR_UDEC_ILLEGALINPUT; + break; + } + + switch (mEncStep) { + case 0: + mEncBits = value << 10; + break; + case 1: + mEncBits += value << 4; + break; + case 2: + if (dest >= destEnd) { + res = NS_OK_UDEC_MOREOUTPUT; + break; + } + mEncBits += value >> 2; + *(dest++) = (char16_t)mEncBits; + mEncBits = (value & 0x03) << 14; + break; + case 3: + mEncBits += value << 8; + break; + case 4: + mEncBits += value << 2; + break; + case 5: + if (dest >= destEnd) { + res = NS_OK_UDEC_MOREOUTPUT; + break; + } + mEncBits += value >> 4; + *(dest++) = (char16_t)mEncBits; + mEncBits = (value & 0x0f) << 12; + break; + case 6: + mEncBits += value << 6; + break; + case 7: + if (dest >= destEnd) { + res = NS_OK_UDEC_MOREOUTPUT; + break; + } + mEncBits += value; + *(dest++) = (char16_t)mEncBits; + mEncBits = 0; + break; + } + + if (res != NS_OK) break; + + src++; + (++mEncStep) %= 8; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +uint32_t nsBasicUTF7Decoder::CharToValue(char aChar) { + if ((aChar >= 'A') && (aChar <= 'Z')) + return (uint8_t)(aChar - 'A'); + else if ((aChar >= 'a') && (aChar <= 'z')) + return (uint8_t)(26 + aChar - 'a'); + else if ((aChar >= '0') && (aChar <= '9')) + return (uint8_t)(26 + 26 + aChar - '0'); + else if (aChar == '+') + return (uint8_t)(26 + 26 + 10); + else if (aChar == mLastChar) + return (uint8_t)(26 + 26 + 10 + 1); + else + return 0xffff; +} + +//---------------------------------------------------------------------- +// Subclassing of nsBufferDecoderSupport class [implementation] + +NS_IMETHODIMP nsBasicUTF7Decoder::ConvertNoBuff(const char* aSrc, + int32_t* aSrcLength, + char16_t* aDest, + int32_t* aDestLength) { + const char* srcEnd = aSrc + *aSrcLength; + const char* src = aSrc; + char16_t* destEnd = aDest + *aDestLength; + char16_t* dest = aDest; + int32_t bcr, bcw; + nsresult res = NS_OK; + + while (src < srcEnd) { + // first, attempt to decode in the current mode + bcr = srcEnd - src; + bcw = destEnd - dest; + if (mEncoding == ENC_DIRECT) + res = DecodeDirect(src, &bcr, dest, &bcw); + else if ((mFreshBase64) && (*src == '-')) { + *dest = mEscChar; + bcr = 0; + bcw = 1; + res = NS_ERROR_UDEC_ILLEGALINPUT; + } else { + mFreshBase64 = false; + res = DecodeBase64(src, &bcr, dest, &bcw); + } + src += bcr; + dest += bcw; + + // if an illegal char was encountered, test if it is an escape seq. + if (res == NS_ERROR_UDEC_ILLEGALINPUT) { + if (mEncoding == ENC_DIRECT) { + if (*src == mEscChar) { + mEncoding = ENC_BASE64; + mFreshBase64 = true; + mEncBits = 0; + mEncStep = 0; + src++; + res = NS_OK; + } else + break; + } else { + mEncoding = ENC_DIRECT; + res = NS_OK; + // absorbe end of escape sequence + if (*src == '-') src++; + } + } else if (res != NS_OK) + break; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +NS_IMETHODIMP nsBasicUTF7Decoder::Reset() { + mEncoding = ENC_DIRECT; + mEncBits = 0; + mEncStep = 0; + return NS_OK; +} + +//---------------------------------------------------------------------- +// Class nsUTF7ToUnicode [implementation] + +nsUTF7ToUnicode::nsUTF7ToUnicode() : nsBasicUTF7Decoder('/', '+') {} diff --git a/comm/mailnews/intl/nsUTF7ToUnicode.h b/comm/mailnews/intl/nsUTF7ToUnicode.h new file mode 100644 index 0000000000..b7b5be4522 --- /dev/null +++ b/comm/mailnews/intl/nsUTF7ToUnicode.h @@ -0,0 +1,64 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUTF7ToUnicode_h___ +#define nsUTF7ToUnicode_h___ + +//---------------------------------------------------------------------- +// Class nsBasicUTF7Decoder [declaration] + +/** + * Basic class for a character set converter from UTF-7 to Unicode. + * + * @created 03/Jun/1999 + * @author Catalin Rotaru [CATA] + */ +class nsBasicUTF7Decoder { + public: + /** + * Class constructor. + */ + nsBasicUTF7Decoder(char aLastChar, char aEscChar); + NS_IMETHOD ConvertNoBuff(const char* aSrc, int32_t* aSrcLength, + char16_t* aDest, int32_t* aDestLength); + + protected: + int32_t mEncoding; // current encoding + uint32_t mEncBits; + int32_t mEncStep; + char mLastChar; + char mEscChar; + bool mFreshBase64; + + nsresult DecodeDirect(const char* aSrc, int32_t* aSrcLength, char16_t* aDest, + int32_t* aDestLength); + nsresult DecodeBase64(const char* aSrc, int32_t* aSrcLength, char16_t* aDest, + int32_t* aDestLength); + uint32_t CharToValue(char aChar); + + //-------------------------------------------------------------------- + // Subclassing of nsBufferDecoderSupport class [declaration] + + NS_IMETHOD Reset(); +}; + +//---------------------------------------------------------------------- +// Class nsUTF7ToUnicode [declaration] + +/** + * A character set converter from Modified UTF7 to Unicode. + * + * @created 18/May/1999 + * @author Catalin Rotaru [CATA] + */ +class nsUTF7ToUnicode : public nsBasicUTF7Decoder { + public: + /** + * Class constructor. + */ + nsUTF7ToUnicode(); +}; + +#endif /* nsUTF7ToUnicode_h___ */ diff --git a/comm/mailnews/intl/nsUnicodeToMUTF7.cpp b/comm/mailnews/intl/nsUnicodeToMUTF7.cpp new file mode 100644 index 0000000000..56433a5421 --- /dev/null +++ b/comm/mailnews/intl/nsUnicodeToMUTF7.cpp @@ -0,0 +1,11 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUnicodeToMUTF7.h" + +//---------------------------------------------------------------------- +// Class nsUnicodeToMUTF7 [implementation] + +nsUnicodeToMUTF7::nsUnicodeToMUTF7() : nsBasicUTF7Encoder(',', '&') {} diff --git a/comm/mailnews/intl/nsUnicodeToMUTF7.h b/comm/mailnews/intl/nsUnicodeToMUTF7.h new file mode 100644 index 0000000000..fafb3b6a84 --- /dev/null +++ b/comm/mailnews/intl/nsUnicodeToMUTF7.h @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUnicodeToMUTF7_h___ +#define nsUnicodeToMUTF7_h___ + +#include "nsUnicodeToUTF7.h" + +//---------------------------------------------------------------------- +// Class nsUnicodeToMUTF7 [declaration] + +/** + * A character set converter from Unicode to Modified UTF-7. + * + * @created 18/May/1999 + * @author Catalin Rotaru [CATA] + */ +class nsUnicodeToMUTF7 : public nsBasicUTF7Encoder { + public: + /** + * Class constructor. + */ + nsUnicodeToMUTF7(); +}; + +#endif /* nsUnicodeToMUTF7_h___ */ diff --git a/comm/mailnews/intl/nsUnicodeToUTF7.cpp b/comm/mailnews/intl/nsUnicodeToUTF7.cpp new file mode 100644 index 0000000000..ab9d1cf895 --- /dev/null +++ b/comm/mailnews/intl/nsUnicodeToUTF7.cpp @@ -0,0 +1,302 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsUnicodeToUTF7.h" +#include <string.h> + +//---------------------------------------------------------------------- +// Global functions and data [declaration] + +#define ENC_DIRECT 0 +#define ENC_BASE64 1 + +//---------------------------------------------------------------------- +// Class nsBasicUTF7Encoder [implementation] + +nsBasicUTF7Encoder::nsBasicUTF7Encoder(char aLastChar, char aEscChar) { + mLastChar = aLastChar; + mEscChar = aEscChar; + Reset(); +} + +nsresult nsBasicUTF7Encoder::ShiftEncoding(int32_t aEncoding, char* aDest, + int32_t* aDestLength) { + if (aEncoding == mEncoding) { + *aDestLength = 0; + return NS_OK; + } + + nsresult res = NS_OK; + char* dest = aDest; + char* destEnd = aDest + *aDestLength; + + if (mEncStep != 0) { + if (dest >= destEnd) return NS_OK_UENC_MOREOUTPUT; + *(dest++) = ValueToChar(mEncBits); + mEncStep = 0; + mEncBits = 0; + } + + if (dest >= destEnd) { + res = NS_OK_UENC_MOREOUTPUT; + } else { + switch (aEncoding) { + case 0: + *(dest++) = '-'; + mEncStep = 0; + mEncBits = 0; + break; + case 1: + *(dest++) = mEscChar; + break; + } + mEncoding = aEncoding; + } + + *aDestLength = dest - aDest; + return res; +} + +nsresult nsBasicUTF7Encoder::EncodeDirect(const char16_t* aSrc, + int32_t* aSrcLength, char* aDest, + int32_t* aDestLength) { + nsresult res = NS_OK; + const char16_t* src = aSrc; + const char16_t* srcEnd = aSrc + *aSrcLength; + char* dest = aDest; + char* destEnd = aDest + *aDestLength; + char16_t ch; + + while (src < srcEnd) { + ch = *src; + + // stop when we reach Unicode chars + if (!DirectEncodable(ch)) break; + + if (ch == mEscChar) { + // special case for the escape char + if (destEnd - dest < 1) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } else { + *dest++ = (char)ch; + *dest++ = (char)'-'; + src++; + } + } else { + // classic direct encoding + if (dest >= destEnd) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } else { + *dest++ = (char)ch; + src++; + } + } + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +nsresult nsBasicUTF7Encoder::EncodeBase64(const char16_t* aSrc, + int32_t* aSrcLength, char* aDest, + int32_t* aDestLength) { + nsresult res = NS_OK; + const char16_t* src = aSrc; + const char16_t* srcEnd = aSrc + *aSrcLength; + char* dest = aDest; + char* destEnd = aDest + *aDestLength; + char16_t ch; + uint32_t value; + + while (src < srcEnd) { + ch = *src; + + // stop when we reach printable US-ASCII chars + if (DirectEncodable(ch)) break; + + switch (mEncStep) { + case 0: + if (destEnd - dest < 2) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } + value = ch >> 10; + *(dest++) = ValueToChar(value); + value = (ch >> 4) & 0x3f; + *(dest++) = ValueToChar(value); + mEncBits = (ch & 0x0f) << 2; + break; + case 1: + if (destEnd - dest < 3) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } + value = mEncBits + (ch >> 14); + *(dest++) = ValueToChar(value); + value = (ch >> 8) & 0x3f; + *(dest++) = ValueToChar(value); + value = (ch >> 2) & 0x3f; + *(dest++) = ValueToChar(value); + mEncBits = (ch & 0x03) << 4; + break; + case 2: + if (destEnd - dest < 3) { + res = NS_OK_UENC_MOREOUTPUT; + break; + } + value = mEncBits + (ch >> 12); + *(dest++) = ValueToChar(value); + value = (ch >> 6) & 0x3f; + *(dest++) = ValueToChar(value); + value = ch & 0x3f; + *(dest++) = ValueToChar(value); + mEncBits = 0; + break; + } + + if (res != NS_OK) break; + + src++; + (++mEncStep) %= 3; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +char nsBasicUTF7Encoder::ValueToChar(uint32_t aValue) { + if (aValue < 26) + return (char)('A' + aValue); + else if (aValue < 26 + 26) + return (char)('a' + aValue - 26); + else if (aValue < 26 + 26 + 10) + return (char)('0' + aValue - 26 - 26); + else if (aValue == 26 + 26 + 10) + return '+'; + else if (aValue == 26 + 26 + 10 + 1) + return mLastChar; + else + return -1; +} + +bool nsBasicUTF7Encoder::DirectEncodable(char16_t aChar) { + // spec says: printable US-ASCII chars + if ((aChar >= 0x20) && (aChar <= 0x7e)) + return true; + else + return false; +} + +//---------------------------------------------------------------------- +// Subclassing of nsEncoderSupport class [implementation] + +NS_IMETHODIMP nsBasicUTF7Encoder::ConvertNoBuffNoErr(const char16_t* aSrc, + int32_t* aSrcLength, + char* aDest, + int32_t* aDestLength) { + nsresult res = NS_OK; + const char16_t* src = aSrc; + const char16_t* srcEnd = aSrc + *aSrcLength; + char* dest = aDest; + char* destEnd = aDest + *aDestLength; + int32_t bcr, bcw; + char16_t ch; + int32_t enc; + + while (src < srcEnd) { + // find the encoding for the next char + ch = *src; + if (DirectEncodable(ch)) + enc = ENC_DIRECT; + else + enc = ENC_BASE64; + + // if necessary, shift into the required encoding + bcw = destEnd - dest; + res = ShiftEncoding(enc, dest, &bcw); + dest += bcw; + if (res != NS_OK) break; + + // now encode (as much as you can) + bcr = srcEnd - src; + bcw = destEnd - dest; + if (enc == ENC_DIRECT) + res = EncodeDirect(src, &bcr, dest, &bcw); + else + res = EncodeBase64(src, &bcr, dest, &bcw); + src += bcr; + dest += bcw; + + if (res != NS_OK) break; + } + + *aSrcLength = src - aSrc; + *aDestLength = dest - aDest; + return res; +} + +NS_IMETHODIMP nsBasicUTF7Encoder::FinishNoBuff(char* aDest, + int32_t* aDestLength) { + return ShiftEncoding(ENC_DIRECT, aDest, aDestLength); +} + +NS_IMETHODIMP nsBasicUTF7Encoder::Reset() { + mEncoding = ENC_DIRECT; + mEncBits = 0; + mEncStep = 0; + return NS_OK; +} + +//---------------------------------------------------------------------- +// Class nsUnicodeToUTF7 [implementation] + +nsUnicodeToUTF7::nsUnicodeToUTF7() : nsBasicUTF7Encoder('/', '+') {} + +bool nsUnicodeToUTF7::DirectEncodable(char16_t aChar) { + if ((aChar >= 'A') && (aChar <= 'Z')) + return true; + else if ((aChar >= 'a') && (aChar <= 'z')) + return true; + else if ((aChar >= '0') && (aChar <= '9')) + return true; + else if ((aChar >= 39) && (aChar <= 41)) + return true; + else if ((aChar >= 44) && (aChar <= 47)) + return true; + else if (aChar == 58) + return true; + else if (aChar == 63) + return true; + else if (aChar == ' ') + return true; + else if (aChar == 9) + return true; + else if (aChar == 13) + return true; + else if (aChar == 10) + return true; + else if (aChar == 60) + return true; // '<' + else if (aChar == 33) + return true; // '!' + else if (aChar == 34) + return true; // '"' + else if (aChar == 62) + return true; // '>' + else if (aChar == 61) + return true; // '=' + else if (aChar == 59) + return true; // ';' + else if (aChar == 91) + return true; // '[' + else if (aChar == 93) + return true; // ']' + else + return false; +} diff --git a/comm/mailnews/intl/nsUnicodeToUTF7.h b/comm/mailnews/intl/nsUnicodeToUTF7.h new file mode 100644 index 0000000000..423bfa8198 --- /dev/null +++ b/comm/mailnews/intl/nsUnicodeToUTF7.h @@ -0,0 +1,69 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsUnicodeToUTF7_h___ +#define nsUnicodeToUTF7_h___ + +//---------------------------------------------------------------------- +// Class nsBasicUTF7Encoder [declaration] + +/** + * Basic class for a character set converter from Unicode to UTF-7. + * + * @created 03/Jun/1999 + * @author Catalin Rotaru [CATA] + */ +class nsBasicUTF7Encoder { + public: + /** + * Class constructor. + */ + nsBasicUTF7Encoder(char aLastChar, char aEscChar); + NS_IMETHOD ConvertNoBuffNoErr(const char16_t* aSrc, int32_t* aSrcLength, + char* aDest, int32_t* aDestLength); + NS_IMETHOD FinishNoBuff(char* aDest, int32_t* aDestLength); + + protected: + int32_t mEncoding; // current encoding + uint32_t mEncBits; + int32_t mEncStep; + char mLastChar; + char mEscChar; + + nsresult ShiftEncoding(int32_t aEncoding, char* aDest, int32_t* aDestLength); + nsresult EncodeDirect(const char16_t* aSrc, int32_t* aSrcLength, char* aDest, + int32_t* aDestLength); + nsresult EncodeBase64(const char16_t* aSrc, int32_t* aSrcLength, char* aDest, + int32_t* aDestLength); + char ValueToChar(uint32_t aValue); + virtual bool DirectEncodable(char16_t aChar); + + //-------------------------------------------------------------------- + // Subclassing of nsEncoderSupport class [declaration] + + NS_IMETHOD Reset(); +}; + +//---------------------------------------------------------------------- +// Class nsUnicodeToUTF7 [declaration] + +/** + * A character set converter from Unicode to UTF-7. + * + * @created 03/Jun/1999 + * @author Catalin Rotaru [CATA] + */ +class nsUnicodeToUTF7 : public nsBasicUTF7Encoder { + public: + /** + * Class constructor. + */ + nsUnicodeToUTF7(); + + protected: + virtual bool DirectEncodable(char16_t aChar); +}; + +#endif /* nsUnicodeToUTF7_h___ */ diff --git a/comm/mailnews/intl/test/moz.build b/comm/mailnews/intl/test/moz.build new file mode 100644 index 0000000000..6b37fdbe09 --- /dev/null +++ b/comm/mailnews/intl/test/moz.build @@ -0,0 +1,6 @@ +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +XPCSHELL_TESTS_MANIFESTS += ["unit/xpcshell.ini"] diff --git a/comm/mailnews/intl/test/unit/head_CharsetConversionTests.js b/comm/mailnews/intl/test/unit/head_CharsetConversionTests.js new file mode 100644 index 0000000000..f1ae6c7155 --- /dev/null +++ b/comm/mailnews/intl/test/unit/head_CharsetConversionTests.js @@ -0,0 +1,46 @@ +var CC = Components.Constructor; + +function CreateScriptableConverter() { + var ScriptableUnicodeConverter = CC( + "@mozilla.org/intl/scriptableunicodeconverter", + "nsIScriptableUnicodeConverter" + ); + + return new ScriptableUnicodeConverter(); +} + +function checkDecode(converter, charset, inText, expectedText) { + let manager = Cc["@mozilla.org/charset-converter-manager;1"].getService( + Ci.nsICharsetConverterManager + ); + + try { + converter.charset = manager.getCharsetAlias(charset); + } catch (e) { + converter.charset = "iso-8859-1"; + } + + dump("testing decoding from " + charset + " to Unicode.\n"); + try { + var outText = converter.ConvertToUnicode(inText) + converter.Finish(); + } catch (e) { + outText = "\ufffd"; + } + Assert.equal(outText, expectedText); +} + +function checkEncode(converter, charset, inText, expectedText) { + let manager = Cc["@mozilla.org/charset-converter-manager;1"].getService( + Ci.nsICharsetConverterManager + ); + + try { + converter.charset = manager.getCharsetAlias(charset); + } catch (e) { + converter.charset = "iso-8859-1"; + } + + dump("testing encoding from Unicode to " + charset + "\n"); + var outText = converter.ConvertFromUnicode(inText) + converter.Finish(); + Assert.equal(outText, expectedText); +} diff --git a/comm/mailnews/intl/test/unit/test_decode_utf-7.js b/comm/mailnews/intl/test/unit/test_decode_utf-7.js new file mode 100644 index 0000000000..e81dd137e6 --- /dev/null +++ b/comm/mailnews/intl/test/unit/test_decode_utf-7.js @@ -0,0 +1,23 @@ +// Tests conversion from UTF-7 to Unicode. The conversion should fail! + +var inString = + "+LGI--+ITIipSIp- +AocCeQ-oddns +Ad0CjQ- s+ATECZQKH- p+AlAB3QJ5- u+AlACVA- no+Ao4- +Al8-I"; + +var expectedString = + "+LGI--+ITIipSIp- +AocCeQ-oddns +Ad0CjQ- s+ATECZQKH- p+AlAB3QJ5- u+AlACVA- no+Ao4- +Al8-I"; + +var aliases = [ + "UTF-7", + "utf-7", + "x-unicode-2-0-utf-7", + "unicode-2-0-utf-7", + "unicode-1-1-utf-7", + "csunicode11utf7", +]; + +function run_test() { + let converter = CreateScriptableConverter(); + for (let i = 0; i < aliases.length; ++i) { + checkDecode(converter, aliases[i], inString, expectedString); + } +} diff --git a/comm/mailnews/intl/test/unit/test_decode_utf-7_internal.js b/comm/mailnews/intl/test/unit/test_decode_utf-7_internal.js new file mode 100644 index 0000000000..e31f0f8840 --- /dev/null +++ b/comm/mailnews/intl/test/unit/test_decode_utf-7_internal.js @@ -0,0 +1,30 @@ +// Tests conversion from UTF-7 to Unicode. + +var inString = + "+LGI--+ITIipSIp- +AocCeQ-oddns +Ad0CjQ- s+ATECZQKH- p+AlAB3QJ5- u+AlACVA- no+Ao4- +Al8-I"; + +var expectedString = + "\u2C62-\u2132\u22A5\u2229 \u0287\u0279oddns \u01DD\u028D s\u0131\u0265\u0287 p\u0250\u01DD\u0279 u\u0250\u0254 no\u028E \u025FI"; + +var aliases = [ + "UTF-7", + "utf-7", + "x-unicode-2-0-utf-7", + "unicode-2-0-utf-7", + "unicode-1-1-utf-7", + "csunicode11utf7", +]; +function run_test() { + let manager = Cc["@mozilla.org/charset-converter-manager;1"].getService( + Ci.nsICharsetConverterManager + ); + let converter = CreateScriptableConverter(); + converter.isInternal = true; + for (let i = 0; i < aliases.length; ++i) { + if (manager.getCharsetAlias(aliases[i]).toLowerCase() == "utf-7") { + Assert.equal(manager.utf7ToUnicode(inString), expectedString); + } else { + checkDecode(converter, aliases[i], inString, expectedString); + } + } +} diff --git a/comm/mailnews/intl/test/unit/test_encode_utf-7.js b/comm/mailnews/intl/test/unit/test_encode_utf-7.js new file mode 100644 index 0000000000..1acc8957bd --- /dev/null +++ b/comm/mailnews/intl/test/unit/test_encode_utf-7.js @@ -0,0 +1,22 @@ +// Tests conversion from Unicode to UTF-7. The conversion should fail! + +var inString = + "\u2C62-\u2132\u22A5\u2229 \u0287\u0279oddns \u01DD\u028D s\u0131\u0265\u0287 p\u0250\u01DD\u0279 u\u0250\u0254 no\u028E \u025FI"; + +var expectedString = "?-??? ??oddns ?? s??? p??? u?? no? ?I"; + +var aliases = [ + "UTF-7", + "utf-7", + "x-unicode-2-0-utf-7", + "unicode-2-0-utf-7", + "unicode-1-1-utf-7", + "csunicode11utf7", +]; + +function run_test() { + let converter = CreateScriptableConverter(); + for (let i = 0; i < aliases.length; ++i) { + checkEncode(converter, aliases[i], inString, expectedString); + } +} diff --git a/comm/mailnews/intl/test/unit/test_encode_utf-7_internal.js b/comm/mailnews/intl/test/unit/test_encode_utf-7_internal.js new file mode 100644 index 0000000000..31af29c30b --- /dev/null +++ b/comm/mailnews/intl/test/unit/test_encode_utf-7_internal.js @@ -0,0 +1,24 @@ +// Tests conversion from Unicode to UTF-7. + +var inString = + "\u2C62-\u2132\u22A5\u2229 \u0287\u0279oddns \u01DD\u028D s\u0131\u0265\u0287 p\u0250\u01DD\u0279 u\u0250\u0254 no\u028E \u025FI"; + +var expectedString = + "+LGI--+ITIipSIp- +AocCeQ-oddns +Ad0CjQ- s+ATECZQKH- p+AlAB3QJ5- u+AlACVA- no+Ao4- +Al8-I"; + +var aliases = [ + "UTF-7", + "utf-7", + "x-unicode-2-0-utf-7", + "unicode-2-0-utf-7", + "unicode-1-1-utf-7", + "csunicode11utf7", +]; + +function run_test() { + let converter = CreateScriptableConverter(); + converter.isInternal = true; + for (let i = 0; i < aliases.length; ++i) { + checkEncode(converter, aliases[i], inString, expectedString); + } +} diff --git a/comm/mailnews/intl/test/unit/xpcshell.ini b/comm/mailnews/intl/test/unit/xpcshell.ini new file mode 100644 index 0000000000..cbc671669c --- /dev/null +++ b/comm/mailnews/intl/test/unit/xpcshell.ini @@ -0,0 +1,10 @@ +[DEFAULT] +head = head_CharsetConversionTests.js +tail = + +[test_decode_utf-7.js] +[test_decode_utf-7_internal.js] +[test_encode_utf-7.js] +[test_encode_utf-7_internal.js] +# Disabled per bug 1363281: No scriptable converter for UTF-7 exists any more. +skip-if = true |