diff options
Diffstat (limited to 'js/src/jsapi-tests/testCharacterEncoding.cpp')
-rw-r--r-- | js/src/jsapi-tests/testCharacterEncoding.cpp | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/js/src/jsapi-tests/testCharacterEncoding.cpp b/js/src/jsapi-tests/testCharacterEncoding.cpp new file mode 100644 index 0000000000..c15b1c978b --- /dev/null +++ b/js/src/jsapi-tests/testCharacterEncoding.cpp @@ -0,0 +1,257 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/TextUtils.h" + +#include <clocale> +#include <cstring> +#include <cwchar> +#include <initializer_list> +#include <iterator> +#include <string_view> + +#include "js/CharacterEncoding.h" +#include "jsapi-tests/tests.h" + +static bool EqualsIgnoreCase(const char* xs, const char* ys) { + while (*xs && *ys) { + char x = *xs++; + char y = *ys++; + + // Convert both to lower-case. + if (mozilla::IsAsciiAlpha(x) && mozilla::IsAsciiAlpha(y)) { + x |= 0x20; + y |= 0x20; + } + + // Fail if the characters aren't the same. + if (x != y) { + return false; + } + } + + // Both strings must be read to the end. + return !*xs && !*ys; +} + +class ToUTF8Locale { + const char* previousLocale_ = nullptr; + bool supported_ = false; + + public: + ToUTF8Locale() { + // Store the old locale so we can reset it in the destructor. + previousLocale_ = std::setlocale(LC_ALL, nullptr); + + // Query the system default locale. + const char* defaultLocale = std::setlocale(LC_ALL, ""); + if (!defaultLocale) { + // std::setlocale returns nullptr on failure. + return; + } + + // Switch the default locale to be UTF-8 aware. + const char* newLocale = std::setlocale(LC_ALL, "en_US.UTF-8"); + if (!newLocale) { + // std::setlocale returns nullptr on failure. + return; + } + + const char* defaultCodepage = std::strchr(defaultLocale, '.'); + const char* newCodepage = std::strchr(newLocale, '.'); + + // Return if either the default or new locale don't contain a code-page. + if (!defaultCodepage || !newCodepage) { + return; + } + + // Skip past the '.'. + defaultCodepage++; + newCodepage++; + + // UTF-8 is supported when the default locale and new locale support it: + // + // The default locale needs to support UTF-8, because this test is compiled + // using the default locale. + // + // The new locale needs to support UTF-8 to ensure UTF-8 encoding works at + // runtime. + supported_ = EqualsIgnoreCase(defaultCodepage, "UTF-8") && + EqualsIgnoreCase(newCodepage, "UTF-8"); + } + + bool supported() const { return supported_; } + + ~ToUTF8Locale() { + // Restore the previous locale. + if (previousLocale_) { + std::setlocale(LC_ALL, previousLocale_); + } + } +}; + +BEGIN_TEST(testCharacterEncoding_narrow_to_utf8) { + // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is + // a no-op. + for (std::string_view string : { + "", + "a", + "abc", + "abc\0def", + }) { + auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data()); + CHECK(utf8 != nullptr); + CHECK_EQUAL(std::strlen(utf8.get()), string.length()); + CHECK(utf8.get() == string); + } + return true; +} +END_TEST(testCharacterEncoding_narrow_to_utf8) + +BEGIN_TEST(testCharacterEncoding_wide_to_utf8) { + // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is + // a no-op. + for (std::wstring_view string : { + L"", + L"a", + L"abc", + L"abc\0def", + }) { + auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); + CHECK(utf8 != nullptr); + CHECK_EQUAL(std::strlen(utf8.get()), string.length()); + CHECK(std::equal( + string.begin(), string.end(), utf8.get(), + [](wchar_t x, char y) { return char32_t(x) == char32_t(y); })); + } + return true; +} +END_TEST(testCharacterEncoding_wide_to_utf8) + +BEGIN_TEST(testCharacterEncoding_wide_to_utf8_non_ascii) { + // Change the locale to be UTF-8 aware for the emoji string. + ToUTF8Locale utf8locale; + + // Skip this test if UTF-8 isn't supported on this system. + if (!utf8locale.supported()) { + return true; + } + + { + std::wstring_view string = L"ä"; + auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); + CHECK(utf8 != nullptr); + + CHECK_EQUAL(std::strlen(utf8.get()), 2U); + CHECK_EQUAL(utf8[0], char(0xC3)); + CHECK_EQUAL(utf8[1], char(0xA4)); + } + { + std::wstring_view string = L"💩"; + auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); + CHECK(utf8 != nullptr); + + CHECK_EQUAL(std::strlen(utf8.get()), 4U); + CHECK_EQUAL(utf8[0], char(0xF0)); + CHECK_EQUAL(utf8[1], char(0x9F)); + CHECK_EQUAL(utf8[2], char(0x92)); + CHECK_EQUAL(utf8[3], char(0xA9)); + } + return true; +} +END_TEST(testCharacterEncoding_wide_to_utf8_non_ascii) + +BEGIN_TEST(testCharacterEncoding_utf8_to_narrow) { + // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is + // a no-op. + for (std::string_view string : { + "", + "a", + "abc", + "abc\0def", + }) { + auto narrow = JS::EncodeUtf8ToNarrow(cx, string.data()); + CHECK(narrow != nullptr); + CHECK_EQUAL(std::strlen(narrow.get()), string.length()); + CHECK(narrow.get() == string); + } + return true; +} +END_TEST(testCharacterEncoding_utf8_to_narrow) + +BEGIN_TEST(testCharacterEncoding_utf8_to_wide) { + // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is + // a no-op. + for (std::string_view string : { + "", + "a", + "abc", + "abc\0def", + }) { + auto wide = JS::EncodeUtf8ToWide(cx, string.data()); + CHECK(wide != nullptr); + CHECK_EQUAL(std::wcslen(wide.get()), string.length()); + CHECK(std::equal( + string.begin(), string.end(), wide.get(), + [](char x, wchar_t y) { return char32_t(x) == char32_t(y); })); + } + return true; +} +END_TEST(testCharacterEncoding_utf8_to_wide) + +BEGIN_TEST(testCharacterEncoding_narrow_roundtrip) { + // Change the locale to be UTF-8 aware for the emoji string. + ToUTF8Locale utf8locale; + + // Skip this test if UTF-8 isn't supported on this system. + if (!utf8locale.supported()) { + return true; + } + + for (std::string_view string : { + "", + "a", + "abc", + "ä", + "💩", + }) { + auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data()); + CHECK(utf8 != nullptr); + + auto narrow = JS::EncodeUtf8ToNarrow(cx, utf8.get()); + CHECK(narrow != nullptr); + + CHECK(narrow.get() == string); + } + return true; +} +END_TEST(testCharacterEncoding_narrow_roundtrip) + +BEGIN_TEST(testCharacterEncoding_wide_roundtrip) { + // Change the locale to be UTF-8 aware for the emoji string. + ToUTF8Locale utf8locale; + + // Skip this test if UTF-8 isn't supported on this system. + if (!utf8locale.supported()) { + return true; + } + + for (std::wstring_view string : { + L"", + L"a", + L"abc", + L"ä", + L"💩", + }) { + auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); + CHECK(utf8 != nullptr); + + auto wide = JS::EncodeUtf8ToWide(cx, utf8.get()); + CHECK(wide != nullptr); + + CHECK(wide.get() == string); + } + return true; +} +END_TEST(testCharacterEncoding_wide_roundtrip) |