1 files changed, 257 insertions, 0 deletions
diff --git a/js/src/jsapi-tests/testCharacterEncoding.cpp b/js/src/jsapi-tests/testCharacterEncoding.cpp
new file mode 100644
index 0000000000..c15b1c978b
--- /dev/null
+++ b/js/src/jsapi-tests/testCharacterEncoding.cpp
@@ -0,0 +1,257 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/TextUtils.h"
+
+#include <clocale>
+#include <cstring>
+#include <cwchar>
+#include <initializer_list>
+#include <iterator>
+#include <string_view>
+
+#include "js/CharacterEncoding.h"
+#include "jsapi-tests/tests.h"
+
+static bool EqualsIgnoreCase(const char* xs, const char* ys) {
+  while (*xs && *ys) {
+    char x = *xs++;
+    char y = *ys++;
+
+    // Convert both to lower-case.
+    if (mozilla::IsAsciiAlpha(x) && mozilla::IsAsciiAlpha(y)) {
+      x |= 0x20;
+      y |= 0x20;
+    }
+
+    // Fail if the characters aren't the same.
+    if (x != y) {
+      return false;
+    }
+  }
+
+  // Both strings must be read to the end.
+  return !*xs && !*ys;
+}
+
+class ToUTF8Locale {
+  const char* previousLocale_ = nullptr;
+  bool supported_ = false;
+
+ public:
+  ToUTF8Locale() {
+    // Store the old locale so we can reset it in the destructor.
+    previousLocale_ = std::setlocale(LC_ALL, nullptr);
+
+    // Query the system default locale.
+    const char* defaultLocale = std::setlocale(LC_ALL, "");
+    if (!defaultLocale) {
+      // std::setlocale returns nullptr on failure.
+      return;
+    }
+
+    // Switch the default locale to be UTF-8 aware.
+    const char* newLocale = std::setlocale(LC_ALL, "en_US.UTF-8");
+    if (!newLocale) {
+      // std::setlocale returns nullptr on failure.
+      return;
+    }
+
+    const char* defaultCodepage = std::strchr(defaultLocale, '.');
+    const char* newCodepage = std::strchr(newLocale, '.');
+
+    // Return if either the default or new locale don't contain a code-page.
+    if (!defaultCodepage || !newCodepage) {
+      return;
+    }
+
+    // Skip past the '.'.
+    defaultCodepage++;
+    newCodepage++;
+
+    // UTF-8 is supported when the default locale and new locale support it:
+    //
+    // The default locale needs to support UTF-8, because this test is compiled
+    // using the default locale.
+    //
+    // The new locale needs to support UTF-8 to ensure UTF-8 encoding works at
+    // runtime.
+    supported_ = EqualsIgnoreCase(defaultCodepage, "UTF-8") &&
+                 EqualsIgnoreCase(newCodepage, "UTF-8");
+  }
+
+  bool supported() const { return supported_; }
+
+  ~ToUTF8Locale() {
+    // Restore the previous locale.
+    if (previousLocale_) {
+      std::setlocale(LC_ALL, previousLocale_);
+    }
+  }
+};
+
+BEGIN_TEST(testCharacterEncoding_narrow_to_utf8) {
+  // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is
+  // a no-op.
+  for (std::string_view string : {
+           "",
+           "a",
+           "abc",
+           "abc\0def",
+       }) {
+    auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data());
+    CHECK(utf8 != nullptr);
+    CHECK_EQUAL(std::strlen(utf8.get()), string.length());
+    CHECK(utf8.get() == string);
+  }
+  return true;
+}
+END_TEST(testCharacterEncoding_narrow_to_utf8)
+
+BEGIN_TEST(testCharacterEncoding_wide_to_utf8) {
+  // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is
+  // a no-op.
+  for (std::wstring_view string : {
+           L"",
+           L"a",
+           L"abc",
+           L"abc\0def",
+       }) {
+    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
+    CHECK(utf8 != nullptr);
+    CHECK_EQUAL(std::strlen(utf8.get()), string.length());
+    CHECK(std::equal(
+        string.begin(), string.end(), utf8.get(),
+        [](wchar_t x, char y) { return char32_t(x) == char32_t(y); }));
+  }
+  return true;
+}
+END_TEST(testCharacterEncoding_wide_to_utf8)
+
+BEGIN_TEST(testCharacterEncoding_wide_to_utf8_non_ascii) {
+  // Change the locale to be UTF-8 aware for the emoji string.
+  ToUTF8Locale utf8locale;
+
+  // Skip this test if UTF-8 isn't supported on this system.
+  if (!utf8locale.supported()) {
+    return true;
+  }
+
+  {
+    std::wstring_view string = L"ä";
+    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
+    CHECK(utf8 != nullptr);
+
+    CHECK_EQUAL(std::strlen(utf8.get()), 2U);
+    CHECK_EQUAL(utf8[0], char(0xC3));
+    CHECK_EQUAL(utf8[1], char(0xA4));
+  }
+  {
+    std::wstring_view string = L"💩";
+    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
+    CHECK(utf8 != nullptr);
+
+    CHECK_EQUAL(std::strlen(utf8.get()), 4U);
+    CHECK_EQUAL(utf8[0], char(0xF0));
+    CHECK_EQUAL(utf8[1], char(0x9F));
+    CHECK_EQUAL(utf8[2], char(0x92));
+    CHECK_EQUAL(utf8[3], char(0xA9));
+  }
+  return true;
+}
+END_TEST(testCharacterEncoding_wide_to_utf8_non_ascii)
+
+BEGIN_TEST(testCharacterEncoding_utf8_to_narrow) {
+  // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is
+  // a no-op.
+  for (std::string_view string : {
+           "",
+           "a",
+           "abc",
+           "abc\0def",
+       }) {
+    auto narrow = JS::EncodeUtf8ToNarrow(cx, string.data());
+    CHECK(narrow != nullptr);
+    CHECK_EQUAL(std::strlen(narrow.get()), string.length());
+    CHECK(narrow.get() == string);
+  }
+  return true;
+}
+END_TEST(testCharacterEncoding_utf8_to_narrow)
+
+BEGIN_TEST(testCharacterEncoding_utf8_to_wide) {
+  // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is
+  // a no-op.
+  for (std::string_view string : {
+           "",
+           "a",
+           "abc",
+           "abc\0def",
+       }) {
+    auto wide = JS::EncodeUtf8ToWide(cx, string.data());
+    CHECK(wide != nullptr);
+    CHECK_EQUAL(std::wcslen(wide.get()), string.length());
+    CHECK(std::equal(
+        string.begin(), string.end(), wide.get(),
+        [](char x, wchar_t y) { return char32_t(x) == char32_t(y); }));
+  }
+  return true;
+}
+END_TEST(testCharacterEncoding_utf8_to_wide)
+
+BEGIN_TEST(testCharacterEncoding_narrow_roundtrip) {
+  // Change the locale to be UTF-8 aware for the emoji string.
+  ToUTF8Locale utf8locale;
+
+  // Skip this test if UTF-8 isn't supported on this system.
+  if (!utf8locale.supported()) {
+    return true;
+  }
+
+  for (std::string_view string : {
+           "",
+           "a",
+           "abc",
+           "ä",
+           "💩",
+       }) {
+    auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data());
+    CHECK(utf8 != nullptr);
+
+    auto narrow = JS::EncodeUtf8ToNarrow(cx, utf8.get());
+    CHECK(narrow != nullptr);
+
+    CHECK(narrow.get() == string);
+  }
+  return true;
+}
+END_TEST(testCharacterEncoding_narrow_roundtrip)
+
+BEGIN_TEST(testCharacterEncoding_wide_roundtrip) {
+  // Change the locale to be UTF-8 aware for the emoji string.
+  ToUTF8Locale utf8locale;
+
+  // Skip this test if UTF-8 isn't supported on this system.
+  if (!utf8locale.supported()) {
+    return true;
+  }
+
+  for (std::wstring_view string : {
+           L"",
+           L"a",
+           L"abc",
+           L"ä",
+           L"💩",
+       }) {
+    auto utf8 = JS::EncodeWideToUtf8(cx, string.data());
+    CHECK(utf8 != nullptr);
+
+    auto wide = JS::EncodeUtf8ToWide(cx, utf8.get());
+    CHECK(wide != nullptr);
+
+    CHECK(wide.get() == string);
+  }
+  return true;
+}
+END_TEST(testCharacterEncoding_wide_roundtrip)