summaryrefslogtreecommitdiffstats
path: root/js/src/jsapi-tests/testAtomizeUtf8NonAsciiLatin1CodePoint.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--js/src/jsapi-tests/testAtomizeUtf8NonAsciiLatin1CodePoint.cpp212
1 files changed, 212 insertions, 0 deletions
diff --git a/js/src/jsapi-tests/testAtomizeUtf8NonAsciiLatin1CodePoint.cpp b/js/src/jsapi-tests/testAtomizeUtf8NonAsciiLatin1CodePoint.cpp
new file mode 100644
index 0000000000..4b16d303a5
--- /dev/null
+++ b/js/src/jsapi-tests/testAtomizeUtf8NonAsciiLatin1CodePoint.cpp
@@ -0,0 +1,212 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/Maybe.h" // mozilla::Maybe
+#include "mozilla/Utf8.h" // mozilla::IsTrailingUnit, mozilla::Utf8Unit, mozilla::DecodeOneUtf8CodePoint
+
+#include <inttypes.h> // UINT8_MAX
+#include <stdint.h> // uint16_t
+
+#include "js/Exception.h" // JS_IsExceptionPending, JS_ClearPendingException
+#include "js/RootingAPI.h" // JS::Rooted, JS::MutableHandle
+#include "jsapi-tests/tests.h" // BEGIN_TEST, END_TEST, CHECK
+#include "vm/JSAtom.h" // js::AtomizeChars, js::AtomizeUTF8Chars
+#include "vm/StringType.h" // JSAtom
+
+using mozilla::DecodeOneUtf8CodePoint;
+using mozilla::IsAscii;
+using mozilla::IsTrailingUnit;
+using mozilla::Maybe;
+using mozilla::Utf8Unit;
+
+using JS::Latin1Char;
+using JS::MutableHandle;
+using JS::Rooted;
+
+BEGIN_TEST(testAtomizeTwoByteUTF8) {
+ Rooted<JSAtom*> atom16(cx);
+ Rooted<JSAtom*> atom8(cx);
+
+ for (uint16_t i = 0; i <= UINT8_MAX; i++) {
+ // Test cases where the first unit is ASCII.
+ if (IsAscii(char16_t(i))) {
+ for (uint16_t j = 0; j <= UINT8_MAX; j++) {
+ if (IsAscii(char16_t(j))) {
+ // If both units are ASCII, the sequence encodes a two-code point
+ // string.
+ if (!shouldBeTwoCodePoints(i, j, &atom16, &atom8)) {
+ return false;
+ }
+ } else {
+ // ASCII followed by non-ASCII should be invalid.
+ if (!shouldBeInvalid(i, j)) {
+ return false;
+ }
+ }
+ }
+
+ continue;
+ }
+
+ // Test remaining cases where the first unit isn't a two-byte lead.
+ if ((i & 0b1110'0000) != 0b1100'0000) {
+ for (uint16_t j = 0; j <= UINT8_MAX; j++) {
+ // If the first unit isn't a two-byte lead, the sequence is invalid no
+ // matter what the second unit is.
+ if (!shouldBeInvalid(i, j)) {
+ return false;
+ }
+ }
+
+ continue;
+ }
+
+ // Test remaining cases where the first unit is the two-byte lead of a
+ // non-Latin-1 code point.
+ if (i >= 0b1100'0100) {
+ for (uint16_t j = 0; j <= UINT8_MAX; j++) {
+ if (IsTrailingUnit(Utf8Unit(static_cast<uint8_t>(j)))) {
+ if (!shouldBeSingleNonLatin1(i, j, &atom16, &atom8)) {
+ return false;
+ }
+ } else {
+ if (!shouldBeInvalid(i, j)) {
+ return false;
+ }
+ }
+ }
+
+ continue;
+ }
+
+ // Test remaining cases where the first unit is the two-byte lead of an
+ // overlong ASCII code point.
+ if (i < 0b1100'0010) {
+ for (uint16_t j = 0; j <= UINT8_MAX; j++) {
+ if (!shouldBeInvalid(i, j)) {
+ return false;
+ }
+ }
+
+ continue;
+ }
+
+ // Finally, test remaining cases where the first unit is the two-byte lead
+ // of a Latin-1 code point.
+ for (uint16_t j = 0; j <= UINT8_MAX; j++) {
+ if (IsTrailingUnit(Utf8Unit(static_cast<uint8_t>(j)))) {
+ if (!shouldBeSingleLatin1(i, j, &atom16, &atom8)) {
+ return false;
+ }
+ } else {
+ if (!shouldBeInvalid(i, j)) {
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+bool shouldBeTwoCodePoints(uint16_t first, uint16_t second,
+ MutableHandle<JSAtom*> atom16,
+ MutableHandle<JSAtom*> atom8) {
+ CHECK(first <= UINT8_MAX);
+ CHECK(second <= UINT8_MAX);
+ CHECK(IsAscii(char16_t(first)));
+ CHECK(IsAscii(char16_t(second)));
+
+ const char16_t utf16[] = {static_cast<char16_t>(first),
+ static_cast<char16_t>(second)};
+ atom16.set(js::AtomizeChars(cx, utf16, 2));
+ CHECK(atom16);
+ CHECK(atom16->length() == 2);
+ CHECK(atom16->latin1OrTwoByteChar(0) == first);
+ CHECK(atom16->latin1OrTwoByteChar(1) == second);
+
+ const char utf8[] = {static_cast<char>(first), static_cast<char>(second)};
+ atom8.set(js::AtomizeUTF8Chars(cx, utf8, 2));
+ CHECK(atom8);
+ CHECK(atom8->length() == 2);
+ CHECK(atom8->latin1OrTwoByteChar(0) == first);
+ CHECK(atom8->latin1OrTwoByteChar(1) == second);
+
+ CHECK(atom16 == atom8);
+
+ return true;
+}
+
+bool shouldBeOneCodePoint(uint16_t first, uint16_t second, char32_t v,
+ MutableHandle<JSAtom*> atom16,
+ MutableHandle<JSAtom*> atom8) {
+ CHECK(first <= UINT8_MAX);
+ CHECK(second <= UINT8_MAX);
+ CHECK(v <= UINT16_MAX);
+
+ const char16_t utf16[] = {static_cast<char16_t>(v)};
+ atom16.set(js::AtomizeChars(cx, utf16, 1));
+ CHECK(atom16);
+ CHECK(atom16->length() == 1);
+ CHECK(atom16->latin1OrTwoByteChar(0) == v);
+
+ const char utf8[] = {static_cast<char>(first), static_cast<char>(second)};
+ atom8.set(js::AtomizeUTF8Chars(cx, utf8, 2));
+ CHECK(atom8);
+ CHECK(atom8->length() == 1);
+ CHECK(atom8->latin1OrTwoByteChar(0) == v);
+
+ CHECK(atom16 == atom8);
+
+ return true;
+}
+
+bool shouldBeSingleNonLatin1(uint16_t first, uint16_t second,
+ MutableHandle<JSAtom*> atom16,
+ MutableHandle<JSAtom*> atom8) {
+ CHECK(first <= UINT8_MAX);
+ CHECK(second <= UINT8_MAX);
+
+ const char bytes[] = {static_cast<char>(first), static_cast<char>(second)};
+ const char* iter = &bytes[1];
+ Maybe<char32_t> cp =
+ DecodeOneUtf8CodePoint(Utf8Unit(bytes[0]), &iter, bytes + 2);
+ CHECK(cp.isSome());
+
+ char32_t v = cp.value();
+ CHECK(v > UINT8_MAX);
+
+ return shouldBeOneCodePoint(first, second, v, atom16, atom8);
+}
+
+bool shouldBeSingleLatin1(uint16_t first, uint16_t second,
+ MutableHandle<JSAtom*> atom16,
+ MutableHandle<JSAtom*> atom8) {
+ CHECK(first <= UINT8_MAX);
+ CHECK(second <= UINT8_MAX);
+
+ const char bytes[] = {static_cast<char>(first), static_cast<char>(second)};
+ const char* iter = &bytes[1];
+ Maybe<char32_t> cp =
+ DecodeOneUtf8CodePoint(Utf8Unit(bytes[0]), &iter, bytes + 2);
+ CHECK(cp.isSome());
+
+ char32_t v = cp.value();
+ CHECK(v <= UINT8_MAX);
+
+ return shouldBeOneCodePoint(first, second, v, atom16, atom8);
+}
+
+bool shouldBeInvalid(uint16_t first, uint16_t second) {
+ CHECK(first <= UINT8_MAX);
+ CHECK(second <= UINT8_MAX);
+
+ const char invalid[] = {static_cast<char>(first), static_cast<char>(second)};
+ CHECK(!js::AtomizeUTF8Chars(cx, invalid, 2));
+ CHECK(JS_IsExceptionPending(cx));
+ JS_ClearPendingException(cx);
+
+ return true;
+}
+END_TEST(testAtomizeTwoByteUTF8)