/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "mozilla/TextUtils.h" #include #include #include #include #include #include #include "js/CharacterEncoding.h" #include "jsapi-tests/tests.h" static bool EqualsIgnoreCase(const char* xs, const char* ys) { while (*xs && *ys) { char x = *xs++; char y = *ys++; // Convert both to lower-case. if (mozilla::IsAsciiAlpha(x) && mozilla::IsAsciiAlpha(y)) { x |= 0x20; y |= 0x20; } // Fail if the characters aren't the same. if (x != y) { return false; } } // Both strings must be read to the end. return !*xs && !*ys; } class ToUTF8Locale { const char* previousLocale_ = nullptr; bool supported_ = false; public: ToUTF8Locale() { // Store the old locale so we can reset it in the destructor. previousLocale_ = std::setlocale(LC_ALL, nullptr); // Query the system default locale. const char* defaultLocale = std::setlocale(LC_ALL, ""); if (!defaultLocale) { // std::setlocale returns nullptr on failure. return; } // Switch the default locale to be UTF-8 aware. const char* newLocale = std::setlocale(LC_ALL, "en_US.UTF-8"); if (!newLocale) { // std::setlocale returns nullptr on failure. return; } const char* defaultCodepage = std::strchr(defaultLocale, '.'); const char* newCodepage = std::strchr(newLocale, '.'); // Return if either the default or new locale don't contain a code-page. if (!defaultCodepage || !newCodepage) { return; } // Skip past the '.'. defaultCodepage++; newCodepage++; // UTF-8 is supported when the default locale and new locale support it: // // The default locale needs to support UTF-8, because this test is compiled // using the default locale. // // The new locale needs to support UTF-8 to ensure UTF-8 encoding works at // runtime. supported_ = EqualsIgnoreCase(defaultCodepage, "UTF-8") && EqualsIgnoreCase(newCodepage, "UTF-8"); } bool supported() const { return supported_; } ~ToUTF8Locale() { // Restore the previous locale. if (previousLocale_) { std::setlocale(LC_ALL, previousLocale_); } } }; BEGIN_TEST(testCharacterEncoding_narrow_to_utf8) { // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is // a no-op. for (std::string_view string : { "", "a", "abc", "abc\0def", }) { auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data()); CHECK(utf8 != nullptr); CHECK_EQUAL(std::strlen(utf8.get()), string.length()); CHECK(utf8.get() == string); } return true; } END_TEST(testCharacterEncoding_narrow_to_utf8) BEGIN_TEST(testCharacterEncoding_wide_to_utf8) { // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is // a no-op. for (std::wstring_view string : { L"", L"a", L"abc", L"abc\0def", }) { auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); CHECK(utf8 != nullptr); CHECK_EQUAL(std::strlen(utf8.get()), string.length()); CHECK(std::equal( string.begin(), string.end(), utf8.get(), [](wchar_t x, char y) { return char32_t(x) == char32_t(y); })); } return true; } END_TEST(testCharacterEncoding_wide_to_utf8) BEGIN_TEST(testCharacterEncoding_wide_to_utf8_non_ascii) { // Change the locale to be UTF-8 aware for the emoji string. ToUTF8Locale utf8locale; // Skip this test if UTF-8 isn't supported on this system. if (!utf8locale.supported()) { return true; } { std::wstring_view string = L"ä"; auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); CHECK(utf8 != nullptr); CHECK_EQUAL(std::strlen(utf8.get()), 2U); CHECK_EQUAL(utf8[0], char(0xC3)); CHECK_EQUAL(utf8[1], char(0xA4)); } { std::wstring_view string = L"💩"; auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); CHECK(utf8 != nullptr); CHECK_EQUAL(std::strlen(utf8.get()), 4U); CHECK_EQUAL(utf8[0], char(0xF0)); CHECK_EQUAL(utf8[1], char(0x9F)); CHECK_EQUAL(utf8[2], char(0x92)); CHECK_EQUAL(utf8[3], char(0xA9)); } return true; } END_TEST(testCharacterEncoding_wide_to_utf8_non_ascii) BEGIN_TEST(testCharacterEncoding_utf8_to_narrow) { // Assume the narrow charset is ASCII-compatible. ASCII to UTF-8 conversion is // a no-op. for (std::string_view string : { "", "a", "abc", "abc\0def", }) { auto narrow = JS::EncodeUtf8ToNarrow(cx, string.data()); CHECK(narrow != nullptr); CHECK_EQUAL(std::strlen(narrow.get()), string.length()); CHECK(narrow.get() == string); } return true; } END_TEST(testCharacterEncoding_utf8_to_narrow) BEGIN_TEST(testCharacterEncoding_utf8_to_wide) { // Assume the wide charset is ASCII-compatible. ASCII to UTF-8 conversion is // a no-op. for (std::string_view string : { "", "a", "abc", "abc\0def", }) { auto wide = JS::EncodeUtf8ToWide(cx, string.data()); CHECK(wide != nullptr); CHECK_EQUAL(std::wcslen(wide.get()), string.length()); CHECK(std::equal( string.begin(), string.end(), wide.get(), [](char x, wchar_t y) { return char32_t(x) == char32_t(y); })); } return true; } END_TEST(testCharacterEncoding_utf8_to_wide) BEGIN_TEST(testCharacterEncoding_narrow_roundtrip) { // Change the locale to be UTF-8 aware for the emoji string. ToUTF8Locale utf8locale; // Skip this test if UTF-8 isn't supported on this system. if (!utf8locale.supported()) { return true; } for (std::string_view string : { "", "a", "abc", "ä", "💩", }) { auto utf8 = JS::EncodeNarrowToUtf8(cx, string.data()); CHECK(utf8 != nullptr); auto narrow = JS::EncodeUtf8ToNarrow(cx, utf8.get()); CHECK(narrow != nullptr); CHECK(narrow.get() == string); } return true; } END_TEST(testCharacterEncoding_narrow_roundtrip) BEGIN_TEST(testCharacterEncoding_wide_roundtrip) { // Change the locale to be UTF-8 aware for the emoji string. ToUTF8Locale utf8locale; // Skip this test if UTF-8 isn't supported on this system. if (!utf8locale.supported()) { return true; } for (std::wstring_view string : { L"", L"a", L"abc", L"ä", L"💩", }) { auto utf8 = JS::EncodeWideToUtf8(cx, string.data()); CHECK(utf8 != nullptr); auto wide = JS::EncodeUtf8ToWide(cx, utf8.get()); CHECK(wide != nullptr); CHECK(wide.get() == string); } return true; } END_TEST(testCharacterEncoding_wide_roundtrip)