/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "mozilla/Maybe.h" // mozilla::Maybe #include "mozilla/Utf8.h" // mozilla::IsTrailingUnit, mozilla::Utf8Unit, mozilla::DecodeOneUtf8CodePoint #include // UINT8_MAX #include // uint16_t #include "js/Exception.h" // JS_IsExceptionPending, JS_ClearPendingException #include "js/RootingAPI.h" // JS::Rooted, JS::MutableHandle #include "jsapi-tests/tests.h" // BEGIN_TEST, END_TEST, CHECK #include "vm/JSAtom.h" // js::AtomizeChars, js::AtomizeUTF8Chars #include "vm/StringType.h" // JSAtom using mozilla::DecodeOneUtf8CodePoint; using mozilla::IsAscii; using mozilla::IsTrailingUnit; using mozilla::Maybe; using mozilla::Utf8Unit; using JS::Latin1Char; using JS::MutableHandle; using JS::Rooted; BEGIN_TEST(testAtomizeTwoByteUTF8) { Rooted atom16(cx); Rooted atom8(cx); for (uint16_t i = 0; i <= UINT8_MAX; i++) { // Test cases where the first unit is ASCII. if (IsAscii(char16_t(i))) { for (uint16_t j = 0; j <= UINT8_MAX; j++) { if (IsAscii(char16_t(j))) { // If both units are ASCII, the sequence encodes a two-code point // string. if (!shouldBeTwoCodePoints(i, j, &atom16, &atom8)) { return false; } } else { // ASCII followed by non-ASCII should be invalid. if (!shouldBeInvalid(i, j)) { return false; } } } continue; } // Test remaining cases where the first unit isn't a two-byte lead. if ((i & 0b1110'0000) != 0b1100'0000) { for (uint16_t j = 0; j <= UINT8_MAX; j++) { // If the first unit isn't a two-byte lead, the sequence is invalid no // matter what the second unit is. if (!shouldBeInvalid(i, j)) { return false; } } continue; } // Test remaining cases where the first unit is the two-byte lead of a // non-Latin-1 code point. if (i >= 0b1100'0100) { for (uint16_t j = 0; j <= UINT8_MAX; j++) { if (IsTrailingUnit(Utf8Unit(static_cast(j)))) { if (!shouldBeSingleNonLatin1(i, j, &atom16, &atom8)) { return false; } } else { if (!shouldBeInvalid(i, j)) { return false; } } } continue; } // Test remaining cases where the first unit is the two-byte lead of an // overlong ASCII code point. if (i < 0b1100'0010) { for (uint16_t j = 0; j <= UINT8_MAX; j++) { if (!shouldBeInvalid(i, j)) { return false; } } continue; } // Finally, test remaining cases where the first unit is the two-byte lead // of a Latin-1 code point. for (uint16_t j = 0; j <= UINT8_MAX; j++) { if (IsTrailingUnit(Utf8Unit(static_cast(j)))) { if (!shouldBeSingleLatin1(i, j, &atom16, &atom8)) { return false; } } else { if (!shouldBeInvalid(i, j)) { return false; } } } } return true; } bool shouldBeTwoCodePoints(uint16_t first, uint16_t second, MutableHandle atom16, MutableHandle atom8) { CHECK(first <= UINT8_MAX); CHECK(second <= UINT8_MAX); CHECK(IsAscii(char16_t(first))); CHECK(IsAscii(char16_t(second))); const char16_t utf16[] = {static_cast(first), static_cast(second)}; atom16.set(js::AtomizeChars(cx, utf16, 2)); CHECK(atom16); CHECK(atom16->length() == 2); CHECK(atom16->latin1OrTwoByteChar(0) == first); CHECK(atom16->latin1OrTwoByteChar(1) == second); const char utf8[] = {static_cast(first), static_cast(second)}; atom8.set(js::AtomizeUTF8Chars(cx, utf8, 2)); CHECK(atom8); CHECK(atom8->length() == 2); CHECK(atom8->latin1OrTwoByteChar(0) == first); CHECK(atom8->latin1OrTwoByteChar(1) == second); CHECK(atom16 == atom8); return true; } bool shouldBeOneCodePoint(uint16_t first, uint16_t second, char32_t v, MutableHandle atom16, MutableHandle atom8) { CHECK(first <= UINT8_MAX); CHECK(second <= UINT8_MAX); CHECK(v <= UINT16_MAX); const char16_t utf16[] = {static_cast(v)}; atom16.set(js::AtomizeChars(cx, utf16, 1)); CHECK(atom16); CHECK(atom16->length() == 1); CHECK(atom16->latin1OrTwoByteChar(0) == v); const char utf8[] = {static_cast(first), static_cast(second)}; atom8.set(js::AtomizeUTF8Chars(cx, utf8, 2)); CHECK(atom8); CHECK(atom8->length() == 1); CHECK(atom8->latin1OrTwoByteChar(0) == v); CHECK(atom16 == atom8); return true; } bool shouldBeSingleNonLatin1(uint16_t first, uint16_t second, MutableHandle atom16, MutableHandle atom8) { CHECK(first <= UINT8_MAX); CHECK(second <= UINT8_MAX); const char bytes[] = {static_cast(first), static_cast(second)}; const char* iter = &bytes[1]; Maybe cp = DecodeOneUtf8CodePoint(Utf8Unit(bytes[0]), &iter, bytes + 2); CHECK(cp.isSome()); char32_t v = cp.value(); CHECK(v > UINT8_MAX); return shouldBeOneCodePoint(first, second, v, atom16, atom8); } bool shouldBeSingleLatin1(uint16_t first, uint16_t second, MutableHandle atom16, MutableHandle atom8) { CHECK(first <= UINT8_MAX); CHECK(second <= UINT8_MAX); const char bytes[] = {static_cast(first), static_cast(second)}; const char* iter = &bytes[1]; Maybe cp = DecodeOneUtf8CodePoint(Utf8Unit(bytes[0]), &iter, bytes + 2); CHECK(cp.isSome()); char32_t v = cp.value(); CHECK(v <= UINT8_MAX); return shouldBeOneCodePoint(first, second, v, atom16, atom8); } bool shouldBeInvalid(uint16_t first, uint16_t second) { CHECK(first <= UINT8_MAX); CHECK(second <= UINT8_MAX); const char invalid[] = {static_cast(first), static_cast(second)}; CHECK(!js::AtomizeUTF8Chars(cx, invalid, 2)); CHECK(JS_IsExceptionPending(cx)); JS_ClearPendingException(cx); return true; } END_TEST(testAtomizeTwoByteUTF8)