summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/intl
diff options
context:
space:
mode:
Diffstat (limited to 'comm/mailnews/intl')
-rw-r--r--comm/mailnews/intl/charsetData.properties104
-rw-r--r--comm/mailnews/intl/charsetalias.properties151
-rw-r--r--comm/mailnews/intl/components.conf12
-rw-r--r--comm/mailnews/intl/jar.mn6
-rw-r--r--comm/mailnews/intl/moz.build42
-rw-r--r--comm/mailnews/intl/nsCharsetAlias.cpp86
-rw-r--r--comm/mailnews/intl/nsCharsetAlias.h27
-rw-r--r--comm/mailnews/intl/nsCharsetConverterManager.cpp184
-rw-r--r--comm/mailnews/intl/nsCharsetConverterManager.h27
-rw-r--r--comm/mailnews/intl/nsICharsetConverterManager.idl71
-rw-r--r--comm/mailnews/intl/nsMUTF7ToUnicode.cpp11
-rw-r--r--comm/mailnews/intl/nsMUTF7ToUnicode.h28
-rw-r--r--comm/mailnews/intl/nsUTF7ToUnicode.cpp217
-rw-r--r--comm/mailnews/intl/nsUTF7ToUnicode.h64
-rw-r--r--comm/mailnews/intl/nsUnicodeToMUTF7.cpp11
-rw-r--r--comm/mailnews/intl/nsUnicodeToMUTF7.h28
-rw-r--r--comm/mailnews/intl/nsUnicodeToUTF7.cpp302
-rw-r--r--comm/mailnews/intl/nsUnicodeToUTF7.h69
-rw-r--r--comm/mailnews/intl/test/moz.build6
-rw-r--r--comm/mailnews/intl/test/unit/head_CharsetConversionTests.js46
-rw-r--r--comm/mailnews/intl/test/unit/test_decode_utf-7.js23
-rw-r--r--comm/mailnews/intl/test/unit/test_decode_utf-7_internal.js30
-rw-r--r--comm/mailnews/intl/test/unit/test_encode_utf-7.js22
-rw-r--r--comm/mailnews/intl/test/unit/test_encode_utf-7_internal.js24
-rw-r--r--comm/mailnews/intl/test/unit/xpcshell.ini10
25 files changed, 1601 insertions, 0 deletions
diff --git a/comm/mailnews/intl/charsetData.properties b/comm/mailnews/intl/charsetData.properties
new file mode 100644
index 0000000000..d4a4147cda
--- /dev/null
+++ b/comm/mailnews/intl/charsetData.properties
@@ -0,0 +1,104 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+## Rule of this file:
+## 1. key should always be in lower case ascii so we can do case insensitive
+## comparison in the code faster.
+
+## Format of this file:
+##
+## charset_name.isInternal = anything - specifies that this charset should
+## not be exposed to web content because of the vulnerability to XSS attacks
+## or some other reasons
+##
+## charset_name.LangGroup =
+##
+## charset_name.isMultibyte = multi byte charsets
+
+replacement.isInternal = true
+
+# XXX : todo: move to something based on BCP 47 (RFC 5646);
+# these should primarily specify script (and sometimes region),
+# but NOT language.
+# See also https://bugzilla.mozilla.org/show_bug.cgi?id=756022
+# e.g. x-western -> *-Latn-155 (Western Europe),
+# *-Latn-151 (Eastern Europe),
+# *-Latn-154 (Northern Europe),
+# *-Latn-TR
+# x-cyrillic -> *-Cyrl
+# zh-TW -> *-Hant-TW
+# zh-HK -> *-Hant-HK
+# zh-CN -> *-Hans
+# ja -> *-Jpan
+# ko -> *-Hang
+# he -> *-Hebr
+# ar -> *-Arab
+# etc
+
+big5.LangGroup = zh-TW
+big5-hkscs.LangGroup = zh-HK
+euc-jp.LangGroup = ja
+euc-kr.LangGroup = ko
+gb2312.LangGroup = zh-CN
+gb18030.LangGroup = zh-CN
+gb18030.2000-0.LangGroup = zh-CN
+gb18030.2000-1.LangGroup = zh-CN
+hkscs-1.LangGroup = zh-HK
+ibm866.LangGroup = x-cyrillic
+ibm1125.LangGroup = x-cyrillic
+ibm1131.LangGroup = x-cyrillic
+iso-2022-jp.LangGroup = ja
+iso-8859-1.LangGroup = x-western
+iso-8859-10.LangGroup = x-western
+iso-8859-14.LangGroup = x-western
+iso-8859-15.LangGroup = x-western
+iso-8859-2.LangGroup = x-western
+iso-8859-16.LangGroup = x-western
+iso-8859-3.LangGroup = x-western
+iso-8859-4.LangGroup = x-western
+iso-8859-13.LangGroup = x-western
+iso-8859-5.LangGroup = x-cyrillic
+iso-8859-6.LangGroup = ar
+iso-8859-7.LangGroup = el
+iso-8859-8.LangGroup = he
+iso-8859-8-i.LangGroup = he
+jis_0208-1983.LangGroup = ja
+koi8-r.LangGroup = x-cyrillic
+koi8-u.LangGroup = x-cyrillic
+shift_jis.LangGroup = ja
+windows-874.LangGroup = th
+utf-8.LangGroup = x-unicode
+utf-16.LangGroup = x-unicode
+utf-16be.LangGroup = x-unicode
+utf-16le.LangGroup = x-unicode
+utf-7.LangGroup = x-unicode
+replacement.LangGroup = x-unicode
+windows-1250.LangGroup = x-western
+windows-1251.LangGroup = x-cyrillic
+windows-1252.LangGroup = x-western
+windows-1253.LangGroup = el
+windows-1254.LangGroup = x-western
+windows-1255.LangGroup = he
+windows-1256.LangGroup = ar
+windows-1257.LangGroup = x-western
+windows-1258.LangGroup = x-western
+gbk.LangGroup = zh-CN
+
+# The following two are in the Encoding Standard (https://encoding.spec.whatwg.org/),
+# x-mac-ukrainian is a label.
+x-mac-cyrillic.LangGroup = x-cyrillic
+macintosh.LangGroup = x-western
+
+x-user-defined.LangGroup = x-unicode
+
+iso-2022-jp.isMultibyte = true
+shift_jis.isMultibyte = true
+euc-jp.isMultibyte = true
+big5.isMultibyte = true
+big5-hkscs.isMultibyte = true
+gb2312.isMultibyte = true
+euc-kr.isMultibyte = true
+utf-7.isMultibyte = true
+utf-8.isMultibyte = true
+replacement.isMultibyte = true
diff --git a/comm/mailnews/intl/charsetalias.properties b/comm/mailnews/intl/charsetalias.properties
new file mode 100644
index 0000000000..1ef5e52ee2
--- /dev/null
+++ b/comm/mailnews/intl/charsetalias.properties
@@ -0,0 +1,151 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Rule of this file:
+# 1. key should always be in lower case ascii so we can do case insensitive
+# comparison in the code faster.
+# 2. value should be the _name_ used in the WHATWG Encoding Standard
+# https://encoding.spec.whatwg.org/ (of "UTF-7" for UTF-7).
+#
+# This file contains email-specific labels. Web-relevant labels for
+# encodings are in the Encoding Standard / encoding_rs.
+
+# Added for Solaris ns_langinfo. Unlikely relevant to email.
+# https://bugzilla.mozilla.org/show_bug.cgi?id=77300#c9
+646=windows-1252
+
+# Aliases for ISO-8859-8-I
+# From the original IBM bidi patch.
+iso-8859-8i=ISO-8859-8-I
+
+# ISO 8859 series with underscore for JavaMail
+# compat.
+# https://bugzilla.mozilla.org/show_bug.cgi?id=820767
+iso8859_1=windows-1252
+iso8859_2=ISO-8859-2
+iso8859_3=ISO-8859-3
+iso8859_4=ISO-8859-4
+iso8859_5=ISO-8859-5
+iso8859_6=ISO-8859-6
+iso8859_7=ISO-8859-7
+# Unclear if 8 with underscore was visual or not
+iso8859_9=windows-1254
+# No evidence of 10 occurring with underscore
+# 11 is tis620
+# 12 does not exist
+iso8859_13=ISO-8859-13
+# No evidence of 14 occurring with underscore
+iso8859_15=ISO-8859-15
+# No evidence of 16 occurring with underscore
+
+koi8r=KOI8-R
+
+# Code pages shared by DOS and Windows with ms prefix.
+# Evidence of this pattern in the wild:
+# https://bugzilla.mozilla.org/show_bug.cgi?id=1120813
+# Plausible cause: JavaMail
+# The ms prefix as used by Sun is not relevant to windows-125x series
+ms874=windows-874
+# ms932 was added to the Encoding Standard as one-off Thunderbird request
+# MS936 shows up at https://www.iana.org/assignments/character-sets/character-sets.xhtml
+ms936=GBK
+ms949=EUC-KR
+ms950=Big5
+ms950_hkscs=Big5
+
+# Underscore versions of Unix CJK encodings.
+# No evidence of these in the wild, but these could plausibly
+# occur for the same reason as the above two groups.
+euc_cn=GBK
+euc_kr=EUC-KR
+euc_jp=EUC-JP
+big5_hkscs=Big5
+
+# Code pages shared by DOS and Windows with cp prefix
+# cp125x series are in the Encoding Standard
+# Evidence of the pattern in the wild:
+# https://bugzilla.mozilla.org/show_bug.cgi?id=1511950
+# https://bugzilla.mozilla.org/show_bug.cgi?id=542823
+# https://bugzilla.mozilla.org/show_bug.cgi?id=1217161
+cp874=windows-874
+cp932=Shift_JIS
+# CP936 shows up at https://www.iana.org/assignments/character-sets/character-sets.xhtml
+cp936=GBK
+cp949=EUC-KR
+cp950=Big5
+
+# Aliases for ISO-2022-JP
+# The following are really not aliases ISO-2022-JP, but sharing the same decoder
+# Kept mainly for compat with old Apple Mail.
+iso-2022-jp-2=ISO-2022-JP
+csiso2022jp2=ISO-2022-JP
+# A Google search suggests the variant without hyphens has been used with
+# JavaMail.
+iso2022jp=ISO-2022-JP
+
+# Aliases for Big5
+# Added in patch that generally meant to support emails sent by
+# dtmail on Sun Solaris
+# https://bugzilla.mozilla.org/show_bug.cgi?id=146287
+zh_tw-big5=Big5
+
+# Aliases for EUC-KR
+# Added for Solaris ns_langinfo. Unlikely relevant to email.
+# https://bugzilla.mozilla.org/show_bug.cgi?id=82075
+5601=EUC-KR
+# https://bugzilla.mozilla.org/show_bug.cgi?id=234958
+x-windows-949=EUC-KR
+
+# Aliases for windows-874
+# Added originally for nl_langinfo reasons but could plausibly be sent
+# by JavaMail.
+# https://bugzilla.mozilla.org/show_bug.cgi?id=101295
+tis620=windows-874
+
+# Aliases for IBM866
+# This alias may have been made up by accident and may
+# not be relevant to real-world email.
+# https://bugzilla.mozilla.org/show_bug.cgi?id=77588
+cp-866=IBM866
+
+# Aliases for UTF-7
+utf-7=UTF-7
+# The below 4 aliases were not in Thunderbird 60, and there were
+# no complaints.
+# This alias appears to have been generated by the email part
+# of the Netscape 4.0 suite per http://jkorpela.fi/chars.html
+x-unicode-2-0-utf-7=UTF-7
+# This appears to be just a made-up non-x version of the above
+# (checked in without bug number).
+unicode-2-0-utf-7=UTF-7
+# The two aliases below show up at
+# https://www.iana.org/assignments/character-sets/character-sets.xhtml
+unicode-1-1-utf-7=UTF-7
+csunicode11utf7=UTF-7
+
+# The below aliases were not in Thunderbird 60, and there were
+# no complaints.
+# These aliases show up at
+# https://www.iana.org/assignments/character-sets/character-sets.xhtml
+csunicode=UTF-16BE
+csunicode11=UTF-16BE
+iso-10646-ucs-basic=UTF-16BE
+csunicodeascii=UTF-16BE
+iso-10646-unicode-latin1=UTF-16BE
+csunicodelatin1=UTF-16BE
+iso-10646=UTF-16BE
+iso-10646-j-1=UTF-16BE
+iso-10646-ucs-2=UTF-16BE
+# Netscape aliases checked in without bug number.
+# Possibly meant to be Netscape-private.
+x-iso-10646-ucs-2-be=UTF-16BE
+x-iso-10646-ucs-2-le=UTF-16LE
+
+# Shows up at https://www.iana.org/assignments/character-sets/character-sets.xhtml
+# https://bugzilla.mozilla.org/show_bug.cgi?id=651113
+windows-936=GBK
+
+# Added for Solaris ns_langinfo(). Unlikely to be relevant to email.
+# https://bugzilla.mozilla.org/show_bug.cgi?id=82075
+ansi-1251=windows-1251
diff --git a/comm/mailnews/intl/components.conf b/comm/mailnews/intl/components.conf
new file mode 100644
index 0000000000..6c1f128442
--- /dev/null
+++ b/comm/mailnews/intl/components.conf
@@ -0,0 +1,12 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Classes = [
+ {
+ "cid": "{3c1c0163-9bd0-11d3-9d09-0050040007b2}",
+ "contract_ids": ["@mozilla.org/charset-converter-manager;1"],
+ "type": "nsCharsetConverterManager",
+ "headers": ["/comm/mailnews/intl/nsCharsetConverterManager.h"],
+ },
+]
diff --git a/comm/mailnews/intl/jar.mn b/comm/mailnews/intl/jar.mn
new file mode 100644
index 0000000000..ab02275d97
--- /dev/null
+++ b/comm/mailnews/intl/jar.mn
@@ -0,0 +1,6 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+toolkit.jar:
+ res/charsetData.properties (charsetData.properties)
diff --git a/comm/mailnews/intl/moz.build b/comm/mailnews/intl/moz.build
new file mode 100644
index 0000000000..03b8a13a46
--- /dev/null
+++ b/comm/mailnews/intl/moz.build
@@ -0,0 +1,42 @@
+# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+XPIDL_SOURCES += [
+ "nsICharsetConverterManager.idl",
+]
+
+UNIFIED_SOURCES += [
+ "nsCharsetAlias.cpp",
+ "nsCharsetConverterManager.cpp",
+ "nsMUTF7ToUnicode.cpp",
+ "nsUnicodeToMUTF7.cpp",
+ "nsUnicodeToUTF7.cpp",
+ "nsUTF7ToUnicode.cpp",
+]
+
+XPIDL_MODULE = "commuconv"
+
+LOCAL_INCLUDES += [
+ "/intl/locale",
+]
+
+GENERATED_FILES += [
+ "charsetalias.properties.h",
+]
+charsetalias = GENERATED_FILES["charsetalias.properties.h"]
+charsetalias.script = "/intl/locale/props2arrays.py"
+charsetalias.inputs = ["charsetalias.properties"]
+
+FINAL_LIBRARY = "mail"
+
+# Tests need more attention before they can be enabled.
+TEST_DIRS += ["test"]
+
+JAR_MANIFESTS += ["jar.mn"]
+
+XPCOM_MANIFESTS += [
+ "components.conf",
+]
diff --git a/comm/mailnews/intl/nsCharsetAlias.cpp b/comm/mailnews/intl/nsCharsetAlias.cpp
new file mode 100644
index 0000000000..a92cf193c9
--- /dev/null
+++ b/comm/mailnews/intl/nsCharsetAlias.cpp
@@ -0,0 +1,86 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Encoding.h"
+
+#include "nsCharsetAlias.h"
+
+// for NS_ERROR_UCONV_NOCONV
+#include "nsCharsetConverterManager.h"
+
+#include "nsUConvPropertySearch.h"
+
+using namespace mozilla;
+
+//
+static const nsUConvProp kAliases[] = {
+#include "charsetalias.properties.h"
+};
+
+//--------------------------------------------------------------
+// static
+nsresult nsCharsetAlias::GetPreferredInternal(const nsACString& aAlias,
+ nsACString& oResult) {
+ // First check charsetalias.properties and if there is no match, continue to
+ // call Encoding::ForLabel.
+ nsAutoCString key(aAlias);
+ ToLowerCase(key);
+
+ nsresult rv = nsUConvPropertySearch::SearchPropertyValue(
+ kAliases, ArrayLength(kAliases), key, oResult);
+ if (NS_SUCCEEDED(rv)) {
+ return NS_OK;
+ }
+
+ const Encoding* encoding = Encoding::ForLabel(key);
+ if (!encoding) return NS_ERROR_NOT_AVAILABLE;
+ encoding->Name(oResult);
+ return NS_OK;
+}
+
+//--------------------------------------------------------------
+// static
+nsresult nsCharsetAlias::GetPreferred(const nsACString& aAlias,
+ nsACString& oResult) {
+ if (aAlias.IsEmpty()) return NS_ERROR_NULL_POINTER;
+
+ nsresult res = GetPreferredInternal(aAlias, oResult);
+ if (NS_FAILED(res)) return res;
+
+ if (nsCharsetConverterManager::IsInternal(oResult))
+ return NS_ERROR_UCONV_NOCONV;
+
+ return res;
+}
+
+//--------------------------------------------------------------
+// static
+nsresult nsCharsetAlias::Equals(const nsACString& aCharset1,
+ const nsACString& aCharset2, bool* oResult) {
+ nsresult res = NS_OK;
+
+ if (aCharset1.Equals(aCharset2, nsCaseInsensitiveCStringComparator)) {
+ *oResult = true;
+ return res;
+ }
+
+ if (aCharset1.IsEmpty() || aCharset2.IsEmpty()) {
+ *oResult = false;
+ return res;
+ }
+
+ *oResult = false;
+ nsAutoCString name1;
+ res = GetPreferredInternal(aCharset1, name1);
+ if (NS_FAILED(res)) return res;
+
+ nsAutoCString name2;
+ res = GetPreferredInternal(aCharset2, name2);
+ if (NS_FAILED(res)) return res;
+
+ *oResult = name1.Equals(name2);
+ return NS_OK;
+}
diff --git a/comm/mailnews/intl/nsCharsetAlias.h b/comm/mailnews/intl/nsCharsetAlias.h
new file mode 100644
index 0000000000..6b24f33567
--- /dev/null
+++ b/comm/mailnews/intl/nsCharsetAlias.h
@@ -0,0 +1,27 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsCharsetAlias_h___
+#define nsCharsetAlias_h___
+
+#include "nscore.h"
+#include "nsString.h"
+
+class nsCharsetConverterManager;
+class nsScriptableUnicodeConverter;
+
+class nsCharsetAlias {
+ friend class nsCharsetConverterManager;
+ friend class nsScriptableUnicodeConverter;
+ static nsresult GetPreferredInternal(const nsACString& aAlias,
+ nsACString& aResult);
+
+ public:
+ static nsresult GetPreferred(const nsACString& aAlias, nsACString& aResult);
+ static nsresult Equals(const nsACString& aCharset1,
+ const nsACString& aCharset2, bool* aResult);
+};
+
+#endif /* nsCharsetAlias_h___ */
diff --git a/comm/mailnews/intl/nsCharsetConverterManager.cpp b/comm/mailnews/intl/nsCharsetConverterManager.cpp
new file mode 100644
index 0000000000..a15917206e
--- /dev/null
+++ b/comm/mailnews/intl/nsCharsetConverterManager.cpp
@@ -0,0 +1,184 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsUnicharUtils.h"
+#include "nsCharsetAlias.h"
+#include "nsICharsetConverterManager.h"
+#include "nsIStringBundle.h"
+#include "nsTArray.h"
+#include "mozilla/Components.h"
+
+#include "nsComponentManagerUtils.h"
+#include "nsServiceManagerUtils.h"
+#include "../base/src/nsMsgI18N.h"
+
+// just for CONTRACTIDs
+#include "nsCharsetConverterManager.h"
+
+static nsCOMPtr<nsIStringBundle> sDataBundle;
+static nsCOMPtr<nsIStringBundle> sTitleBundle;
+
+// Class nsCharsetConverterManager [implementation]
+
+NS_IMPL_ISUPPORTS(nsCharsetConverterManager, nsICharsetConverterManager)
+
+nsCharsetConverterManager::nsCharsetConverterManager() {}
+
+nsCharsetConverterManager::~nsCharsetConverterManager() {
+ sDataBundle = nullptr;
+ sTitleBundle = nullptr;
+}
+
+static nsresult LoadBundle(const char* aBundleURLSpec,
+ nsIStringBundle** aResult) {
+ nsCOMPtr<nsIStringBundleService> sbServ =
+ mozilla::components::StringBundle::Service();
+ if (!sbServ) return NS_ERROR_FAILURE;
+
+ return sbServ->CreateBundle(aBundleURLSpec, aResult);
+}
+
+static nsresult GetBundleValueInner(nsIStringBundle* aBundle, const char* aName,
+ const nsString& aProp, nsAString& aResult) {
+ nsAutoString key;
+
+ CopyASCIItoUTF16(mozilla::MakeStringSpan(aName), key);
+ ToLowerCase(key); // we lowercase the main comparison key
+ key.Append(aProp);
+
+ return aBundle->GetStringFromName(NS_ConvertUTF16toUTF8(key).get(), aResult);
+}
+
+static nsresult GetBundleValue(nsIStringBundle* aBundle, const char* aName,
+ const nsString& aProp, nsAString& aResult) {
+ nsresult rv = NS_OK;
+
+ nsAutoString value;
+ rv = GetBundleValueInner(aBundle, aName, aProp, value);
+ if (NS_FAILED(rv)) return rv;
+
+ aResult = value;
+
+ return NS_OK;
+}
+
+static nsresult GetCharsetDataImpl(const char* aCharset, const char16_t* aProp,
+ nsAString& aResult) {
+ NS_ENSURE_ARG_POINTER(aCharset);
+ // aProp can be nullptr
+
+ if (!sDataBundle) {
+ nsresult rv = LoadBundle("resource://gre-resources/charsetData.properties",
+ getter_AddRefs(sDataBundle));
+ if (NS_FAILED(rv)) return rv;
+ }
+
+ return GetBundleValue(sDataBundle, aCharset, nsDependentString(aProp),
+ aResult);
+}
+
+// static
+bool nsCharsetConverterManager::IsInternal(const nsACString& aCharset) {
+ nsAutoString str;
+ // fully qualify to possibly avoid vtable call
+ nsresult rv = GetCharsetDataImpl(PromiseFlatCString(aCharset).get(),
+ u".isInternal", str);
+
+ return NS_SUCCEEDED(rv);
+}
+
+//----------------------------------------------------------------------------//----------------------------------------------------------------------------
+// Interface nsICharsetConverterManager [implementation]
+
+// XXX Improve the implementation of this method. Right now, it is build on
+// top of the nsCharsetAlias service. We can make the nsCharsetAlias
+// better, with its own hash table (not the StringBundle anymore) and
+// a nicer file format.
+NS_IMETHODIMP
+nsCharsetConverterManager::GetCharsetAlias(const char* aCharset,
+ nsACString& aResult) {
+ NS_ENSURE_ARG_POINTER(aCharset);
+
+ // We try to obtain the preferred name for this charset from the charset
+ // aliases.
+ nsresult rv;
+
+ rv = nsCharsetAlias::GetPreferred(nsDependentCString(aCharset), aResult);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsCharsetConverterManager::GetCharsetTitle(const char* aCharset,
+ nsAString& aResult) {
+ NS_ENSURE_ARG_POINTER(aCharset);
+
+ if (!sTitleBundle) {
+ nsresult rv =
+ LoadBundle("chrome://messenger/locale/charsetTitles.properties",
+ getter_AddRefs(sTitleBundle));
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ return GetBundleValue(sTitleBundle, aCharset, u".title"_ns, aResult);
+}
+
+NS_IMETHODIMP
+nsCharsetConverterManager::GetCharsetData(const char* aCharset,
+ const char16_t* aProp,
+ nsAString& aResult) {
+ return GetCharsetDataImpl(aCharset, aProp, aResult);
+}
+
+NS_IMETHODIMP
+nsCharsetConverterManager::GetCharsetLangGroup(const char* aCharset,
+ nsACString& aResult) {
+ // resolve the charset first
+ nsAutoCString charset;
+
+ nsresult rv = GetCharsetAlias(aCharset, charset);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // fully qualify to possibly avoid vtable call
+ return nsCharsetConverterManager::GetCharsetLangGroupRaw(charset.get(),
+ aResult);
+}
+
+NS_IMETHODIMP
+nsCharsetConverterManager::GetCharsetLangGroupRaw(const char* aCharset,
+ nsACString& aResult) {
+ nsAutoString langGroup;
+ // fully qualify to possibly avoid vtable call
+ nsresult rv = nsCharsetConverterManager::GetCharsetData(
+ aCharset, u".LangGroup", langGroup);
+
+ if (NS_SUCCEEDED(rv)) {
+ ToLowerCase(langGroup); // use lowercase for all language groups
+ aResult = NS_ConvertUTF16toUTF8(langGroup);
+ }
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsCharsetConverterManager::Utf7ToUnicode(const nsACString& aSrc,
+ nsAString& aDest) {
+ return CopyUTF7toUTF16(aSrc, aDest);
+}
+
+NS_IMETHODIMP
+nsCharsetConverterManager::Mutf7ToUnicode(const nsACString& aSrc,
+ nsAString& aDest) {
+ return CopyMUTF7toUTF16(aSrc, aDest);
+}
+
+NS_IMETHODIMP
+nsCharsetConverterManager::UnicodeToMutf7(const nsAString& aSrc,
+ nsACString& aDest) {
+ return CopyUTF16toMUTF7(aSrc, aDest);
+}
diff --git a/comm/mailnews/intl/nsCharsetConverterManager.h b/comm/mailnews/intl/nsCharsetConverterManager.h
new file mode 100644
index 0000000000..9a217bf0e6
--- /dev/null
+++ b/comm/mailnews/intl/nsCharsetConverterManager.h
@@ -0,0 +1,27 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsCharsetConverterManager_h__
+#define nsCharsetConverterManager_h__
+
+#include "nsICharsetConverterManager.h"
+
+class nsCharsetAlias;
+
+class nsCharsetConverterManager : public nsICharsetConverterManager {
+ friend class nsCharsetAlias;
+
+ NS_DECL_THREADSAFE_ISUPPORTS
+ NS_DECL_NSICHARSETCONVERTERMANAGER
+
+ public:
+ nsCharsetConverterManager();
+
+ private:
+ virtual ~nsCharsetConverterManager();
+
+ static bool IsInternal(const nsACString& aCharset);
+};
+
+#endif // nsCharsetConverterManager_h__
diff --git a/comm/mailnews/intl/nsICharsetConverterManager.idl b/comm/mailnews/intl/nsICharsetConverterManager.idl
new file mode 100644
index 0000000000..fe77ed6843
--- /dev/null
+++ b/comm/mailnews/intl/nsICharsetConverterManager.idl
@@ -0,0 +1,71 @@
+/* -*- Mode: IDL; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+%{C++
+#include "mozilla/Encoding.h"
+
+// XXX change to NS_CHARSETCONVERTERMANAGER_CID
+#define NS_ICHARSETCONVERTERMANAGER_CID \
+ {0x3c1c0163, 0x9bd0, 0x11d3, { 0x9d, 0x9, 0x0, 0x50, 0x4, 0x0, 0x7, 0xb2}}
+
+#define NS_CHARSETCONVERTERMANAGER_CONTRACTID "@mozilla.org/charset-converter-manager;1"
+%}
+
+[scriptable, uuid(a0550d46-8d9c-47dd-acc7-c083620dff12)]
+interface nsICharsetConverterManager : nsISupports
+{
+ /**
+ * A shortcut to calling nsICharsetAlias to do alias resolution
+ * @throws if aCharset is an unknown charset.
+ */
+ ACString getCharsetAlias(in string aCharset);
+
+ /**
+ * Get the human-readable name for the given charset.
+ * @throws if aCharset is an unknown charset.
+ */
+ AString getCharsetTitle(in string aCharset);
+
+ /**
+ * Get some data about the given charset. This includes whether the
+ * character encoding may be used for certain purposes, if it is
+ * multi-byte, and the language code for it. See charsetData.properties
+ * for the source of this data. Some known property names:
+ * LangGroup - language code for charset, e.g. 'he' and 'zh-CN'.
+ * isMultibyte - is this a multi-byte charset?
+ * isInternal - not to be used in untrusted web content.
+ *
+ * @param aCharset name of the character encoding, e.g. 'iso-8859-15'.
+ * @param aProp property desired for the character encoding.
+ * @throws if aCharset is an unknown charset.
+ * @return the value of the property, for the character encoding.
+ */
+ AString getCharsetData(in string aCharset,
+ in wstring aProp);
+
+ /**
+ * Get the language group for the given charset. This is similar to
+ * calling <tt>getCharsetData</tt> with the <tt>prop</tt> "LangGroup".
+ *
+ * @param aCharset name of the character encoding, e.g. 'iso-8859-15'.
+ * @throws if aCharset is an unknown charset.
+ * @return the language code for the character encoding.
+ */
+ AUTF8String getCharsetLangGroup(in string aCharset);
+ AUTF8String getCharsetLangGroupRaw(in string aCharset);
+
+ /**
+ * Decoding of UTF-7 in message headers and bodies.
+ */
+ AString utf7ToUnicode(in ACString aMutf7);
+
+ /**
+ * Support for Modified UTF-7 (MUTF-7) used by IMAP.
+ */
+ AString mutf7ToUnicode(in ACString aMutf7);
+ ACString unicodeToMutf7(in AString aUnicode);
+};
diff --git a/comm/mailnews/intl/nsMUTF7ToUnicode.cpp b/comm/mailnews/intl/nsMUTF7ToUnicode.cpp
new file mode 100644
index 0000000000..bd49d647fa
--- /dev/null
+++ b/comm/mailnews/intl/nsMUTF7ToUnicode.cpp
@@ -0,0 +1,11 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsMUTF7ToUnicode.h"
+
+//----------------------------------------------------------------------
+// Class nsMUTF7ToUnicode [implementation]
+
+nsMUTF7ToUnicode::nsMUTF7ToUnicode() : nsBasicUTF7Decoder(',', '&') {}
diff --git a/comm/mailnews/intl/nsMUTF7ToUnicode.h b/comm/mailnews/intl/nsMUTF7ToUnicode.h
new file mode 100644
index 0000000000..ff26e8b6ab
--- /dev/null
+++ b/comm/mailnews/intl/nsMUTF7ToUnicode.h
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsMUTF7ToUnicode_h___
+#define nsMUTF7ToUnicode_h___
+
+#include "nsUTF7ToUnicode.h"
+
+//----------------------------------------------------------------------
+// Class nsMUTF7ToUnicode [declaration]
+
+/**
+ * A character set converter from Modified UTF7 to Unicode.
+ *
+ * @created 18/May/1999
+ * @author Catalin Rotaru [CATA]
+ */
+class nsMUTF7ToUnicode : public nsBasicUTF7Decoder {
+ public:
+ /**
+ * Class constructor.
+ */
+ nsMUTF7ToUnicode();
+};
+
+#endif /* nsMUTF7ToUnicode_h___ */
diff --git a/comm/mailnews/intl/nsUTF7ToUnicode.cpp b/comm/mailnews/intl/nsUTF7ToUnicode.cpp
new file mode 100644
index 0000000000..2257affa51
--- /dev/null
+++ b/comm/mailnews/intl/nsUTF7ToUnicode.cpp
@@ -0,0 +1,217 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUTF7ToUnicode.h"
+
+#define ENC_DIRECT 0
+#define ENC_BASE64 1
+
+//----------------------------------------------------------------------
+// Class nsBasicUTF7Decoder [implementation]
+
+nsBasicUTF7Decoder::nsBasicUTF7Decoder(char aLastChar, char aEscChar) {
+ mLastChar = aLastChar;
+ mEscChar = aEscChar;
+ mFreshBase64 = false;
+ Reset();
+}
+
+nsresult nsBasicUTF7Decoder::DecodeDirect(const char* aSrc, int32_t* aSrcLength,
+ char16_t* aDest,
+ int32_t* aDestLength) {
+ const char* srcEnd = aSrc + *aSrcLength;
+ const char* src = aSrc;
+ char16_t* destEnd = aDest + *aDestLength;
+ char16_t* dest = aDest;
+ nsresult res = NS_OK;
+ char ch;
+
+ while (src < srcEnd) {
+ ch = *src;
+
+ // stop when we meet other chars or end of direct encoded seq.
+ // if (!(DirectEncodable(ch)) || (ch == mEscChar)) {
+ // but we are decoding; so we should be lax; pass everything until escchar
+ if (ch == mEscChar) {
+ res = NS_ERROR_UDEC_ILLEGALINPUT;
+ break;
+ }
+
+ if (dest >= destEnd) {
+ res = NS_OK_UDEC_MOREOUTPUT;
+ break;
+ } else {
+ *dest++ = ch;
+ src++;
+ }
+ }
+
+ *aSrcLength = src - aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+nsresult nsBasicUTF7Decoder::DecodeBase64(const char* aSrc, int32_t* aSrcLength,
+ char16_t* aDest,
+ int32_t* aDestLength) {
+ const char* srcEnd = aSrc + *aSrcLength;
+ const char* src = aSrc;
+ char16_t* destEnd = aDest + *aDestLength;
+ char16_t* dest = aDest;
+ nsresult res = NS_OK;
+ char ch;
+ uint32_t value;
+
+ while (src < srcEnd) {
+ ch = *src;
+
+ // stop when we meet other chars or end of direct encoded seq.
+ value = CharToValue(ch);
+ if (value > 0xff) {
+ res = NS_ERROR_UDEC_ILLEGALINPUT;
+ break;
+ }
+
+ switch (mEncStep) {
+ case 0:
+ mEncBits = value << 10;
+ break;
+ case 1:
+ mEncBits += value << 4;
+ break;
+ case 2:
+ if (dest >= destEnd) {
+ res = NS_OK_UDEC_MOREOUTPUT;
+ break;
+ }
+ mEncBits += value >> 2;
+ *(dest++) = (char16_t)mEncBits;
+ mEncBits = (value & 0x03) << 14;
+ break;
+ case 3:
+ mEncBits += value << 8;
+ break;
+ case 4:
+ mEncBits += value << 2;
+ break;
+ case 5:
+ if (dest >= destEnd) {
+ res = NS_OK_UDEC_MOREOUTPUT;
+ break;
+ }
+ mEncBits += value >> 4;
+ *(dest++) = (char16_t)mEncBits;
+ mEncBits = (value & 0x0f) << 12;
+ break;
+ case 6:
+ mEncBits += value << 6;
+ break;
+ case 7:
+ if (dest >= destEnd) {
+ res = NS_OK_UDEC_MOREOUTPUT;
+ break;
+ }
+ mEncBits += value;
+ *(dest++) = (char16_t)mEncBits;
+ mEncBits = 0;
+ break;
+ }
+
+ if (res != NS_OK) break;
+
+ src++;
+ (++mEncStep) %= 8;
+ }
+
+ *aSrcLength = src - aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+uint32_t nsBasicUTF7Decoder::CharToValue(char aChar) {
+ if ((aChar >= 'A') && (aChar <= 'Z'))
+ return (uint8_t)(aChar - 'A');
+ else if ((aChar >= 'a') && (aChar <= 'z'))
+ return (uint8_t)(26 + aChar - 'a');
+ else if ((aChar >= '0') && (aChar <= '9'))
+ return (uint8_t)(26 + 26 + aChar - '0');
+ else if (aChar == '+')
+ return (uint8_t)(26 + 26 + 10);
+ else if (aChar == mLastChar)
+ return (uint8_t)(26 + 26 + 10 + 1);
+ else
+ return 0xffff;
+}
+
+//----------------------------------------------------------------------
+// Subclassing of nsBufferDecoderSupport class [implementation]
+
+NS_IMETHODIMP nsBasicUTF7Decoder::ConvertNoBuff(const char* aSrc,
+ int32_t* aSrcLength,
+ char16_t* aDest,
+ int32_t* aDestLength) {
+ const char* srcEnd = aSrc + *aSrcLength;
+ const char* src = aSrc;
+ char16_t* destEnd = aDest + *aDestLength;
+ char16_t* dest = aDest;
+ int32_t bcr, bcw;
+ nsresult res = NS_OK;
+
+ while (src < srcEnd) {
+ // first, attempt to decode in the current mode
+ bcr = srcEnd - src;
+ bcw = destEnd - dest;
+ if (mEncoding == ENC_DIRECT)
+ res = DecodeDirect(src, &bcr, dest, &bcw);
+ else if ((mFreshBase64) && (*src == '-')) {
+ *dest = mEscChar;
+ bcr = 0;
+ bcw = 1;
+ res = NS_ERROR_UDEC_ILLEGALINPUT;
+ } else {
+ mFreshBase64 = false;
+ res = DecodeBase64(src, &bcr, dest, &bcw);
+ }
+ src += bcr;
+ dest += bcw;
+
+ // if an illegal char was encountered, test if it is an escape seq.
+ if (res == NS_ERROR_UDEC_ILLEGALINPUT) {
+ if (mEncoding == ENC_DIRECT) {
+ if (*src == mEscChar) {
+ mEncoding = ENC_BASE64;
+ mFreshBase64 = true;
+ mEncBits = 0;
+ mEncStep = 0;
+ src++;
+ res = NS_OK;
+ } else
+ break;
+ } else {
+ mEncoding = ENC_DIRECT;
+ res = NS_OK;
+ // absorbe end of escape sequence
+ if (*src == '-') src++;
+ }
+ } else if (res != NS_OK)
+ break;
+ }
+
+ *aSrcLength = src - aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+NS_IMETHODIMP nsBasicUTF7Decoder::Reset() {
+ mEncoding = ENC_DIRECT;
+ mEncBits = 0;
+ mEncStep = 0;
+ return NS_OK;
+}
+
+//----------------------------------------------------------------------
+// Class nsUTF7ToUnicode [implementation]
+
+nsUTF7ToUnicode::nsUTF7ToUnicode() : nsBasicUTF7Decoder('/', '+') {}
diff --git a/comm/mailnews/intl/nsUTF7ToUnicode.h b/comm/mailnews/intl/nsUTF7ToUnicode.h
new file mode 100644
index 0000000000..b7b5be4522
--- /dev/null
+++ b/comm/mailnews/intl/nsUTF7ToUnicode.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsUTF7ToUnicode_h___
+#define nsUTF7ToUnicode_h___
+
+//----------------------------------------------------------------------
+// Class nsBasicUTF7Decoder [declaration]
+
+/**
+ * Basic class for a character set converter from UTF-7 to Unicode.
+ *
+ * @created 03/Jun/1999
+ * @author Catalin Rotaru [CATA]
+ */
+class nsBasicUTF7Decoder {
+ public:
+ /**
+ * Class constructor.
+ */
+ nsBasicUTF7Decoder(char aLastChar, char aEscChar);
+ NS_IMETHOD ConvertNoBuff(const char* aSrc, int32_t* aSrcLength,
+ char16_t* aDest, int32_t* aDestLength);
+
+ protected:
+ int32_t mEncoding; // current encoding
+ uint32_t mEncBits;
+ int32_t mEncStep;
+ char mLastChar;
+ char mEscChar;
+ bool mFreshBase64;
+
+ nsresult DecodeDirect(const char* aSrc, int32_t* aSrcLength, char16_t* aDest,
+ int32_t* aDestLength);
+ nsresult DecodeBase64(const char* aSrc, int32_t* aSrcLength, char16_t* aDest,
+ int32_t* aDestLength);
+ uint32_t CharToValue(char aChar);
+
+ //--------------------------------------------------------------------
+ // Subclassing of nsBufferDecoderSupport class [declaration]
+
+ NS_IMETHOD Reset();
+};
+
+//----------------------------------------------------------------------
+// Class nsUTF7ToUnicode [declaration]
+
+/**
+ * A character set converter from Modified UTF7 to Unicode.
+ *
+ * @created 18/May/1999
+ * @author Catalin Rotaru [CATA]
+ */
+class nsUTF7ToUnicode : public nsBasicUTF7Decoder {
+ public:
+ /**
+ * Class constructor.
+ */
+ nsUTF7ToUnicode();
+};
+
+#endif /* nsUTF7ToUnicode_h___ */
diff --git a/comm/mailnews/intl/nsUnicodeToMUTF7.cpp b/comm/mailnews/intl/nsUnicodeToMUTF7.cpp
new file mode 100644
index 0000000000..56433a5421
--- /dev/null
+++ b/comm/mailnews/intl/nsUnicodeToMUTF7.cpp
@@ -0,0 +1,11 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUnicodeToMUTF7.h"
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToMUTF7 [implementation]
+
+nsUnicodeToMUTF7::nsUnicodeToMUTF7() : nsBasicUTF7Encoder(',', '&') {}
diff --git a/comm/mailnews/intl/nsUnicodeToMUTF7.h b/comm/mailnews/intl/nsUnicodeToMUTF7.h
new file mode 100644
index 0000000000..fafb3b6a84
--- /dev/null
+++ b/comm/mailnews/intl/nsUnicodeToMUTF7.h
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsUnicodeToMUTF7_h___
+#define nsUnicodeToMUTF7_h___
+
+#include "nsUnicodeToUTF7.h"
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToMUTF7 [declaration]
+
+/**
+ * A character set converter from Unicode to Modified UTF-7.
+ *
+ * @created 18/May/1999
+ * @author Catalin Rotaru [CATA]
+ */
+class nsUnicodeToMUTF7 : public nsBasicUTF7Encoder {
+ public:
+ /**
+ * Class constructor.
+ */
+ nsUnicodeToMUTF7();
+};
+
+#endif /* nsUnicodeToMUTF7_h___ */
diff --git a/comm/mailnews/intl/nsUnicodeToUTF7.cpp b/comm/mailnews/intl/nsUnicodeToUTF7.cpp
new file mode 100644
index 0000000000..ab9d1cf895
--- /dev/null
+++ b/comm/mailnews/intl/nsUnicodeToUTF7.cpp
@@ -0,0 +1,302 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsUnicodeToUTF7.h"
+#include <string.h>
+
+//----------------------------------------------------------------------
+// Global functions and data [declaration]
+
+#define ENC_DIRECT 0
+#define ENC_BASE64 1
+
+//----------------------------------------------------------------------
+// Class nsBasicUTF7Encoder [implementation]
+
+nsBasicUTF7Encoder::nsBasicUTF7Encoder(char aLastChar, char aEscChar) {
+ mLastChar = aLastChar;
+ mEscChar = aEscChar;
+ Reset();
+}
+
+nsresult nsBasicUTF7Encoder::ShiftEncoding(int32_t aEncoding, char* aDest,
+ int32_t* aDestLength) {
+ if (aEncoding == mEncoding) {
+ *aDestLength = 0;
+ return NS_OK;
+ }
+
+ nsresult res = NS_OK;
+ char* dest = aDest;
+ char* destEnd = aDest + *aDestLength;
+
+ if (mEncStep != 0) {
+ if (dest >= destEnd) return NS_OK_UENC_MOREOUTPUT;
+ *(dest++) = ValueToChar(mEncBits);
+ mEncStep = 0;
+ mEncBits = 0;
+ }
+
+ if (dest >= destEnd) {
+ res = NS_OK_UENC_MOREOUTPUT;
+ } else {
+ switch (aEncoding) {
+ case 0:
+ *(dest++) = '-';
+ mEncStep = 0;
+ mEncBits = 0;
+ break;
+ case 1:
+ *(dest++) = mEscChar;
+ break;
+ }
+ mEncoding = aEncoding;
+ }
+
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+nsresult nsBasicUTF7Encoder::EncodeDirect(const char16_t* aSrc,
+ int32_t* aSrcLength, char* aDest,
+ int32_t* aDestLength) {
+ nsresult res = NS_OK;
+ const char16_t* src = aSrc;
+ const char16_t* srcEnd = aSrc + *aSrcLength;
+ char* dest = aDest;
+ char* destEnd = aDest + *aDestLength;
+ char16_t ch;
+
+ while (src < srcEnd) {
+ ch = *src;
+
+ // stop when we reach Unicode chars
+ if (!DirectEncodable(ch)) break;
+
+ if (ch == mEscChar) {
+ // special case for the escape char
+ if (destEnd - dest < 1) {
+ res = NS_OK_UENC_MOREOUTPUT;
+ break;
+ } else {
+ *dest++ = (char)ch;
+ *dest++ = (char)'-';
+ src++;
+ }
+ } else {
+ // classic direct encoding
+ if (dest >= destEnd) {
+ res = NS_OK_UENC_MOREOUTPUT;
+ break;
+ } else {
+ *dest++ = (char)ch;
+ src++;
+ }
+ }
+ }
+
+ *aSrcLength = src - aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+nsresult nsBasicUTF7Encoder::EncodeBase64(const char16_t* aSrc,
+ int32_t* aSrcLength, char* aDest,
+ int32_t* aDestLength) {
+ nsresult res = NS_OK;
+ const char16_t* src = aSrc;
+ const char16_t* srcEnd = aSrc + *aSrcLength;
+ char* dest = aDest;
+ char* destEnd = aDest + *aDestLength;
+ char16_t ch;
+ uint32_t value;
+
+ while (src < srcEnd) {
+ ch = *src;
+
+ // stop when we reach printable US-ASCII chars
+ if (DirectEncodable(ch)) break;
+
+ switch (mEncStep) {
+ case 0:
+ if (destEnd - dest < 2) {
+ res = NS_OK_UENC_MOREOUTPUT;
+ break;
+ }
+ value = ch >> 10;
+ *(dest++) = ValueToChar(value);
+ value = (ch >> 4) & 0x3f;
+ *(dest++) = ValueToChar(value);
+ mEncBits = (ch & 0x0f) << 2;
+ break;
+ case 1:
+ if (destEnd - dest < 3) {
+ res = NS_OK_UENC_MOREOUTPUT;
+ break;
+ }
+ value = mEncBits + (ch >> 14);
+ *(dest++) = ValueToChar(value);
+ value = (ch >> 8) & 0x3f;
+ *(dest++) = ValueToChar(value);
+ value = (ch >> 2) & 0x3f;
+ *(dest++) = ValueToChar(value);
+ mEncBits = (ch & 0x03) << 4;
+ break;
+ case 2:
+ if (destEnd - dest < 3) {
+ res = NS_OK_UENC_MOREOUTPUT;
+ break;
+ }
+ value = mEncBits + (ch >> 12);
+ *(dest++) = ValueToChar(value);
+ value = (ch >> 6) & 0x3f;
+ *(dest++) = ValueToChar(value);
+ value = ch & 0x3f;
+ *(dest++) = ValueToChar(value);
+ mEncBits = 0;
+ break;
+ }
+
+ if (res != NS_OK) break;
+
+ src++;
+ (++mEncStep) %= 3;
+ }
+
+ *aSrcLength = src - aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+char nsBasicUTF7Encoder::ValueToChar(uint32_t aValue) {
+ if (aValue < 26)
+ return (char)('A' + aValue);
+ else if (aValue < 26 + 26)
+ return (char)('a' + aValue - 26);
+ else if (aValue < 26 + 26 + 10)
+ return (char)('0' + aValue - 26 - 26);
+ else if (aValue == 26 + 26 + 10)
+ return '+';
+ else if (aValue == 26 + 26 + 10 + 1)
+ return mLastChar;
+ else
+ return -1;
+}
+
+bool nsBasicUTF7Encoder::DirectEncodable(char16_t aChar) {
+ // spec says: printable US-ASCII chars
+ if ((aChar >= 0x20) && (aChar <= 0x7e))
+ return true;
+ else
+ return false;
+}
+
+//----------------------------------------------------------------------
+// Subclassing of nsEncoderSupport class [implementation]
+
+NS_IMETHODIMP nsBasicUTF7Encoder::ConvertNoBuffNoErr(const char16_t* aSrc,
+ int32_t* aSrcLength,
+ char* aDest,
+ int32_t* aDestLength) {
+ nsresult res = NS_OK;
+ const char16_t* src = aSrc;
+ const char16_t* srcEnd = aSrc + *aSrcLength;
+ char* dest = aDest;
+ char* destEnd = aDest + *aDestLength;
+ int32_t bcr, bcw;
+ char16_t ch;
+ int32_t enc;
+
+ while (src < srcEnd) {
+ // find the encoding for the next char
+ ch = *src;
+ if (DirectEncodable(ch))
+ enc = ENC_DIRECT;
+ else
+ enc = ENC_BASE64;
+
+ // if necessary, shift into the required encoding
+ bcw = destEnd - dest;
+ res = ShiftEncoding(enc, dest, &bcw);
+ dest += bcw;
+ if (res != NS_OK) break;
+
+ // now encode (as much as you can)
+ bcr = srcEnd - src;
+ bcw = destEnd - dest;
+ if (enc == ENC_DIRECT)
+ res = EncodeDirect(src, &bcr, dest, &bcw);
+ else
+ res = EncodeBase64(src, &bcr, dest, &bcw);
+ src += bcr;
+ dest += bcw;
+
+ if (res != NS_OK) break;
+ }
+
+ *aSrcLength = src - aSrc;
+ *aDestLength = dest - aDest;
+ return res;
+}
+
+NS_IMETHODIMP nsBasicUTF7Encoder::FinishNoBuff(char* aDest,
+ int32_t* aDestLength) {
+ return ShiftEncoding(ENC_DIRECT, aDest, aDestLength);
+}
+
+NS_IMETHODIMP nsBasicUTF7Encoder::Reset() {
+ mEncoding = ENC_DIRECT;
+ mEncBits = 0;
+ mEncStep = 0;
+ return NS_OK;
+}
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToUTF7 [implementation]
+
+nsUnicodeToUTF7::nsUnicodeToUTF7() : nsBasicUTF7Encoder('/', '+') {}
+
+bool nsUnicodeToUTF7::DirectEncodable(char16_t aChar) {
+ if ((aChar >= 'A') && (aChar <= 'Z'))
+ return true;
+ else if ((aChar >= 'a') && (aChar <= 'z'))
+ return true;
+ else if ((aChar >= '0') && (aChar <= '9'))
+ return true;
+ else if ((aChar >= 39) && (aChar <= 41))
+ return true;
+ else if ((aChar >= 44) && (aChar <= 47))
+ return true;
+ else if (aChar == 58)
+ return true;
+ else if (aChar == 63)
+ return true;
+ else if (aChar == ' ')
+ return true;
+ else if (aChar == 9)
+ return true;
+ else if (aChar == 13)
+ return true;
+ else if (aChar == 10)
+ return true;
+ else if (aChar == 60)
+ return true; // '<'
+ else if (aChar == 33)
+ return true; // '!'
+ else if (aChar == 34)
+ return true; // '"'
+ else if (aChar == 62)
+ return true; // '>'
+ else if (aChar == 61)
+ return true; // '='
+ else if (aChar == 59)
+ return true; // ';'
+ else if (aChar == 91)
+ return true; // '['
+ else if (aChar == 93)
+ return true; // ']'
+ else
+ return false;
+}
diff --git a/comm/mailnews/intl/nsUnicodeToUTF7.h b/comm/mailnews/intl/nsUnicodeToUTF7.h
new file mode 100644
index 0000000000..423bfa8198
--- /dev/null
+++ b/comm/mailnews/intl/nsUnicodeToUTF7.h
@@ -0,0 +1,69 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsUnicodeToUTF7_h___
+#define nsUnicodeToUTF7_h___
+
+//----------------------------------------------------------------------
+// Class nsBasicUTF7Encoder [declaration]
+
+/**
+ * Basic class for a character set converter from Unicode to UTF-7.
+ *
+ * @created 03/Jun/1999
+ * @author Catalin Rotaru [CATA]
+ */
+class nsBasicUTF7Encoder {
+ public:
+ /**
+ * Class constructor.
+ */
+ nsBasicUTF7Encoder(char aLastChar, char aEscChar);
+ NS_IMETHOD ConvertNoBuffNoErr(const char16_t* aSrc, int32_t* aSrcLength,
+ char* aDest, int32_t* aDestLength);
+ NS_IMETHOD FinishNoBuff(char* aDest, int32_t* aDestLength);
+
+ protected:
+ int32_t mEncoding; // current encoding
+ uint32_t mEncBits;
+ int32_t mEncStep;
+ char mLastChar;
+ char mEscChar;
+
+ nsresult ShiftEncoding(int32_t aEncoding, char* aDest, int32_t* aDestLength);
+ nsresult EncodeDirect(const char16_t* aSrc, int32_t* aSrcLength, char* aDest,
+ int32_t* aDestLength);
+ nsresult EncodeBase64(const char16_t* aSrc, int32_t* aSrcLength, char* aDest,
+ int32_t* aDestLength);
+ char ValueToChar(uint32_t aValue);
+ virtual bool DirectEncodable(char16_t aChar);
+
+ //--------------------------------------------------------------------
+ // Subclassing of nsEncoderSupport class [declaration]
+
+ NS_IMETHOD Reset();
+};
+
+//----------------------------------------------------------------------
+// Class nsUnicodeToUTF7 [declaration]
+
+/**
+ * A character set converter from Unicode to UTF-7.
+ *
+ * @created 03/Jun/1999
+ * @author Catalin Rotaru [CATA]
+ */
+class nsUnicodeToUTF7 : public nsBasicUTF7Encoder {
+ public:
+ /**
+ * Class constructor.
+ */
+ nsUnicodeToUTF7();
+
+ protected:
+ virtual bool DirectEncodable(char16_t aChar);
+};
+
+#endif /* nsUnicodeToUTF7_h___ */
diff --git a/comm/mailnews/intl/test/moz.build b/comm/mailnews/intl/test/moz.build
new file mode 100644
index 0000000000..6b37fdbe09
--- /dev/null
+++ b/comm/mailnews/intl/test/moz.build
@@ -0,0 +1,6 @@
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+XPCSHELL_TESTS_MANIFESTS += ["unit/xpcshell.ini"]
diff --git a/comm/mailnews/intl/test/unit/head_CharsetConversionTests.js b/comm/mailnews/intl/test/unit/head_CharsetConversionTests.js
new file mode 100644
index 0000000000..f1ae6c7155
--- /dev/null
+++ b/comm/mailnews/intl/test/unit/head_CharsetConversionTests.js
@@ -0,0 +1,46 @@
+var CC = Components.Constructor;
+
+function CreateScriptableConverter() {
+ var ScriptableUnicodeConverter = CC(
+ "@mozilla.org/intl/scriptableunicodeconverter",
+ "nsIScriptableUnicodeConverter"
+ );
+
+ return new ScriptableUnicodeConverter();
+}
+
+function checkDecode(converter, charset, inText, expectedText) {
+ let manager = Cc["@mozilla.org/charset-converter-manager;1"].getService(
+ Ci.nsICharsetConverterManager
+ );
+
+ try {
+ converter.charset = manager.getCharsetAlias(charset);
+ } catch (e) {
+ converter.charset = "iso-8859-1";
+ }
+
+ dump("testing decoding from " + charset + " to Unicode.\n");
+ try {
+ var outText = converter.ConvertToUnicode(inText) + converter.Finish();
+ } catch (e) {
+ outText = "\ufffd";
+ }
+ Assert.equal(outText, expectedText);
+}
+
+function checkEncode(converter, charset, inText, expectedText) {
+ let manager = Cc["@mozilla.org/charset-converter-manager;1"].getService(
+ Ci.nsICharsetConverterManager
+ );
+
+ try {
+ converter.charset = manager.getCharsetAlias(charset);
+ } catch (e) {
+ converter.charset = "iso-8859-1";
+ }
+
+ dump("testing encoding from Unicode to " + charset + "\n");
+ var outText = converter.ConvertFromUnicode(inText) + converter.Finish();
+ Assert.equal(outText, expectedText);
+}
diff --git a/comm/mailnews/intl/test/unit/test_decode_utf-7.js b/comm/mailnews/intl/test/unit/test_decode_utf-7.js
new file mode 100644
index 0000000000..e81dd137e6
--- /dev/null
+++ b/comm/mailnews/intl/test/unit/test_decode_utf-7.js
@@ -0,0 +1,23 @@
+// Tests conversion from UTF-7 to Unicode. The conversion should fail!
+
+var inString =
+ "+LGI--+ITIipSIp- +AocCeQ-oddns +Ad0CjQ- s+ATECZQKH- p+AlAB3QJ5- u+AlACVA- no+Ao4- +Al8-I";
+
+var expectedString =
+ "+LGI--+ITIipSIp- +AocCeQ-oddns +Ad0CjQ- s+ATECZQKH- p+AlAB3QJ5- u+AlACVA- no+Ao4- +Al8-I";
+
+var aliases = [
+ "UTF-7",
+ "utf-7",
+ "x-unicode-2-0-utf-7",
+ "unicode-2-0-utf-7",
+ "unicode-1-1-utf-7",
+ "csunicode11utf7",
+];
+
+function run_test() {
+ let converter = CreateScriptableConverter();
+ for (let i = 0; i < aliases.length; ++i) {
+ checkDecode(converter, aliases[i], inString, expectedString);
+ }
+}
diff --git a/comm/mailnews/intl/test/unit/test_decode_utf-7_internal.js b/comm/mailnews/intl/test/unit/test_decode_utf-7_internal.js
new file mode 100644
index 0000000000..e31f0f8840
--- /dev/null
+++ b/comm/mailnews/intl/test/unit/test_decode_utf-7_internal.js
@@ -0,0 +1,30 @@
+// Tests conversion from UTF-7 to Unicode.
+
+var inString =
+ "+LGI--+ITIipSIp- +AocCeQ-oddns +Ad0CjQ- s+ATECZQKH- p+AlAB3QJ5- u+AlACVA- no+Ao4- +Al8-I";
+
+var expectedString =
+ "\u2C62-\u2132\u22A5\u2229 \u0287\u0279oddns \u01DD\u028D s\u0131\u0265\u0287 p\u0250\u01DD\u0279 u\u0250\u0254 no\u028E \u025FI";
+
+var aliases = [
+ "UTF-7",
+ "utf-7",
+ "x-unicode-2-0-utf-7",
+ "unicode-2-0-utf-7",
+ "unicode-1-1-utf-7",
+ "csunicode11utf7",
+];
+function run_test() {
+ let manager = Cc["@mozilla.org/charset-converter-manager;1"].getService(
+ Ci.nsICharsetConverterManager
+ );
+ let converter = CreateScriptableConverter();
+ converter.isInternal = true;
+ for (let i = 0; i < aliases.length; ++i) {
+ if (manager.getCharsetAlias(aliases[i]).toLowerCase() == "utf-7") {
+ Assert.equal(manager.utf7ToUnicode(inString), expectedString);
+ } else {
+ checkDecode(converter, aliases[i], inString, expectedString);
+ }
+ }
+}
diff --git a/comm/mailnews/intl/test/unit/test_encode_utf-7.js b/comm/mailnews/intl/test/unit/test_encode_utf-7.js
new file mode 100644
index 0000000000..1acc8957bd
--- /dev/null
+++ b/comm/mailnews/intl/test/unit/test_encode_utf-7.js
@@ -0,0 +1,22 @@
+// Tests conversion from Unicode to UTF-7. The conversion should fail!
+
+var inString =
+ "\u2C62-\u2132\u22A5\u2229 \u0287\u0279oddns \u01DD\u028D s\u0131\u0265\u0287 p\u0250\u01DD\u0279 u\u0250\u0254 no\u028E \u025FI";
+
+var expectedString = "?-??? ??oddns ?? s??? p??? u?? no? ?I";
+
+var aliases = [
+ "UTF-7",
+ "utf-7",
+ "x-unicode-2-0-utf-7",
+ "unicode-2-0-utf-7",
+ "unicode-1-1-utf-7",
+ "csunicode11utf7",
+];
+
+function run_test() {
+ let converter = CreateScriptableConverter();
+ for (let i = 0; i < aliases.length; ++i) {
+ checkEncode(converter, aliases[i], inString, expectedString);
+ }
+}
diff --git a/comm/mailnews/intl/test/unit/test_encode_utf-7_internal.js b/comm/mailnews/intl/test/unit/test_encode_utf-7_internal.js
new file mode 100644
index 0000000000..31af29c30b
--- /dev/null
+++ b/comm/mailnews/intl/test/unit/test_encode_utf-7_internal.js
@@ -0,0 +1,24 @@
+// Tests conversion from Unicode to UTF-7.
+
+var inString =
+ "\u2C62-\u2132\u22A5\u2229 \u0287\u0279oddns \u01DD\u028D s\u0131\u0265\u0287 p\u0250\u01DD\u0279 u\u0250\u0254 no\u028E \u025FI";
+
+var expectedString =
+ "+LGI--+ITIipSIp- +AocCeQ-oddns +Ad0CjQ- s+ATECZQKH- p+AlAB3QJ5- u+AlACVA- no+Ao4- +Al8-I";
+
+var aliases = [
+ "UTF-7",
+ "utf-7",
+ "x-unicode-2-0-utf-7",
+ "unicode-2-0-utf-7",
+ "unicode-1-1-utf-7",
+ "csunicode11utf7",
+];
+
+function run_test() {
+ let converter = CreateScriptableConverter();
+ converter.isInternal = true;
+ for (let i = 0; i < aliases.length; ++i) {
+ checkEncode(converter, aliases[i], inString, expectedString);
+ }
+}
diff --git a/comm/mailnews/intl/test/unit/xpcshell.ini b/comm/mailnews/intl/test/unit/xpcshell.ini
new file mode 100644
index 0000000000..cbc671669c
--- /dev/null
+++ b/comm/mailnews/intl/test/unit/xpcshell.ini
@@ -0,0 +1,10 @@
+[DEFAULT]
+head = head_CharsetConversionTests.js
+tail =
+
+[test_decode_utf-7.js]
+[test_decode_utf-7_internal.js]
+[test_encode_utf-7.js]
+[test_encode_utf-7_internal.js]
+# Disabled per bug 1363281: No scriptable converter for UTF-7 exists any more.
+skip-if = true