diff options
Diffstat (limited to '')
-rw-r--r-- | js/src/jsapi-tests/testAtomizeUtf8NonAsciiLatin1CodePoint.cpp | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/js/src/jsapi-tests/testAtomizeUtf8NonAsciiLatin1CodePoint.cpp b/js/src/jsapi-tests/testAtomizeUtf8NonAsciiLatin1CodePoint.cpp new file mode 100644 index 0000000000..4b16d303a5 --- /dev/null +++ b/js/src/jsapi-tests/testAtomizeUtf8NonAsciiLatin1CodePoint.cpp @@ -0,0 +1,212 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Maybe.h" // mozilla::Maybe +#include "mozilla/Utf8.h" // mozilla::IsTrailingUnit, mozilla::Utf8Unit, mozilla::DecodeOneUtf8CodePoint + +#include <inttypes.h> // UINT8_MAX +#include <stdint.h> // uint16_t + +#include "js/Exception.h" // JS_IsExceptionPending, JS_ClearPendingException +#include "js/RootingAPI.h" // JS::Rooted, JS::MutableHandle +#include "jsapi-tests/tests.h" // BEGIN_TEST, END_TEST, CHECK +#include "vm/JSAtom.h" // js::AtomizeChars, js::AtomizeUTF8Chars +#include "vm/StringType.h" // JSAtom + +using mozilla::DecodeOneUtf8CodePoint; +using mozilla::IsAscii; +using mozilla::IsTrailingUnit; +using mozilla::Maybe; +using mozilla::Utf8Unit; + +using JS::Latin1Char; +using JS::MutableHandle; +using JS::Rooted; + +BEGIN_TEST(testAtomizeTwoByteUTF8) { + Rooted<JSAtom*> atom16(cx); + Rooted<JSAtom*> atom8(cx); + + for (uint16_t i = 0; i <= UINT8_MAX; i++) { + // Test cases where the first unit is ASCII. + if (IsAscii(char16_t(i))) { + for (uint16_t j = 0; j <= UINT8_MAX; j++) { + if (IsAscii(char16_t(j))) { + // If both units are ASCII, the sequence encodes a two-code point + // string. + if (!shouldBeTwoCodePoints(i, j, &atom16, &atom8)) { + return false; + } + } else { + // ASCII followed by non-ASCII should be invalid. + if (!shouldBeInvalid(i, j)) { + return false; + } + } + } + + continue; + } + + // Test remaining cases where the first unit isn't a two-byte lead. + if ((i & 0b1110'0000) != 0b1100'0000) { + for (uint16_t j = 0; j <= UINT8_MAX; j++) { + // If the first unit isn't a two-byte lead, the sequence is invalid no + // matter what the second unit is. + if (!shouldBeInvalid(i, j)) { + return false; + } + } + + continue; + } + + // Test remaining cases where the first unit is the two-byte lead of a + // non-Latin-1 code point. + if (i >= 0b1100'0100) { + for (uint16_t j = 0; j <= UINT8_MAX; j++) { + if (IsTrailingUnit(Utf8Unit(static_cast<uint8_t>(j)))) { + if (!shouldBeSingleNonLatin1(i, j, &atom16, &atom8)) { + return false; + } + } else { + if (!shouldBeInvalid(i, j)) { + return false; + } + } + } + + continue; + } + + // Test remaining cases where the first unit is the two-byte lead of an + // overlong ASCII code point. + if (i < 0b1100'0010) { + for (uint16_t j = 0; j <= UINT8_MAX; j++) { + if (!shouldBeInvalid(i, j)) { + return false; + } + } + + continue; + } + + // Finally, test remaining cases where the first unit is the two-byte lead + // of a Latin-1 code point. + for (uint16_t j = 0; j <= UINT8_MAX; j++) { + if (IsTrailingUnit(Utf8Unit(static_cast<uint8_t>(j)))) { + if (!shouldBeSingleLatin1(i, j, &atom16, &atom8)) { + return false; + } + } else { + if (!shouldBeInvalid(i, j)) { + return false; + } + } + } + } + + return true; +} + +bool shouldBeTwoCodePoints(uint16_t first, uint16_t second, + MutableHandle<JSAtom*> atom16, + MutableHandle<JSAtom*> atom8) { + CHECK(first <= UINT8_MAX); + CHECK(second <= UINT8_MAX); + CHECK(IsAscii(char16_t(first))); + CHECK(IsAscii(char16_t(second))); + + const char16_t utf16[] = {static_cast<char16_t>(first), + static_cast<char16_t>(second)}; + atom16.set(js::AtomizeChars(cx, utf16, 2)); + CHECK(atom16); + CHECK(atom16->length() == 2); + CHECK(atom16->latin1OrTwoByteChar(0) == first); + CHECK(atom16->latin1OrTwoByteChar(1) == second); + + const char utf8[] = {static_cast<char>(first), static_cast<char>(second)}; + atom8.set(js::AtomizeUTF8Chars(cx, utf8, 2)); + CHECK(atom8); + CHECK(atom8->length() == 2); + CHECK(atom8->latin1OrTwoByteChar(0) == first); + CHECK(atom8->latin1OrTwoByteChar(1) == second); + + CHECK(atom16 == atom8); + + return true; +} + +bool shouldBeOneCodePoint(uint16_t first, uint16_t second, char32_t v, + MutableHandle<JSAtom*> atom16, + MutableHandle<JSAtom*> atom8) { + CHECK(first <= UINT8_MAX); + CHECK(second <= UINT8_MAX); + CHECK(v <= UINT16_MAX); + + const char16_t utf16[] = {static_cast<char16_t>(v)}; + atom16.set(js::AtomizeChars(cx, utf16, 1)); + CHECK(atom16); + CHECK(atom16->length() == 1); + CHECK(atom16->latin1OrTwoByteChar(0) == v); + + const char utf8[] = {static_cast<char>(first), static_cast<char>(second)}; + atom8.set(js::AtomizeUTF8Chars(cx, utf8, 2)); + CHECK(atom8); + CHECK(atom8->length() == 1); + CHECK(atom8->latin1OrTwoByteChar(0) == v); + + CHECK(atom16 == atom8); + + return true; +} + +bool shouldBeSingleNonLatin1(uint16_t first, uint16_t second, + MutableHandle<JSAtom*> atom16, + MutableHandle<JSAtom*> atom8) { + CHECK(first <= UINT8_MAX); + CHECK(second <= UINT8_MAX); + + const char bytes[] = {static_cast<char>(first), static_cast<char>(second)}; + const char* iter = &bytes[1]; + Maybe<char32_t> cp = + DecodeOneUtf8CodePoint(Utf8Unit(bytes[0]), &iter, bytes + 2); + CHECK(cp.isSome()); + + char32_t v = cp.value(); + CHECK(v > UINT8_MAX); + + return shouldBeOneCodePoint(first, second, v, atom16, atom8); +} + +bool shouldBeSingleLatin1(uint16_t first, uint16_t second, + MutableHandle<JSAtom*> atom16, + MutableHandle<JSAtom*> atom8) { + CHECK(first <= UINT8_MAX); + CHECK(second <= UINT8_MAX); + + const char bytes[] = {static_cast<char>(first), static_cast<char>(second)}; + const char* iter = &bytes[1]; + Maybe<char32_t> cp = + DecodeOneUtf8CodePoint(Utf8Unit(bytes[0]), &iter, bytes + 2); + CHECK(cp.isSome()); + + char32_t v = cp.value(); + CHECK(v <= UINT8_MAX); + + return shouldBeOneCodePoint(first, second, v, atom16, atom8); +} + +bool shouldBeInvalid(uint16_t first, uint16_t second) { + CHECK(first <= UINT8_MAX); + CHECK(second <= UINT8_MAX); + + const char invalid[] = {static_cast<char>(first), static_cast<char>(second)}; + CHECK(!js::AtomizeUTF8Chars(cx, invalid, 2)); + CHECK(JS_IsExceptionPending(cx)); + JS_ClearPendingException(cx); + + return true; +} +END_TEST(testAtomizeTwoByteUTF8) |