diff options
Diffstat (limited to 'js/src/jsapi-tests/testParserAtom.cpp')
-rw-r--r-- | js/src/jsapi-tests/testParserAtom.cpp | 445 |
1 files changed, 445 insertions, 0 deletions
diff --git a/js/src/jsapi-tests/testParserAtom.cpp b/js/src/jsapi-tests/testParserAtom.cpp new file mode 100644 index 0000000000..8a1fb6ff53 --- /dev/null +++ b/js/src/jsapi-tests/testParserAtom.cpp @@ -0,0 +1,445 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/Range.h" // mozilla::Range +#include "mozilla/Utf8.h" // mozilla::Utf8Unit + +#include <string> // std::char_traits +#include <utility> // std::initializer_list +#include <vector> // std::vector + +#include "frontend/FrontendContext.h" // AutoReportFrontendContext +#include "frontend/ParserAtom.h" // js::frontend::ParserAtomsTable, js::frontend::WellKnownParserAtoms +#include "js/TypeDecls.h" // JS::Latin1Char +#include "jsapi-tests/tests.h" + +// Test empty strings behave consistently. +BEGIN_TEST(testParserAtom_empty) { + using js::frontend::ParserAtom; + using js::frontend::ParserAtomsTable; + using js::frontend::ParserAtomVector; + using js::frontend::TaggedParserAtomIndex; + + js::AutoReportFrontendContext fc(cx); + js::LifoAlloc alloc(512); + ParserAtomsTable atomTable(alloc); + + const char ascii[] = {}; + const JS::Latin1Char latin1[] = {}; + const mozilla::Utf8Unit utf8[] = {}; + const char16_t char16[] = {}; + + // Check that the well-known empty atom matches for different entry points. + auto refIndex = TaggedParserAtomIndex::WellKnown::empty(); + CHECK(atomTable.internAscii(&fc, ascii, 0) == refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 0) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 0) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 0) == refIndex); + + return true; +} +END_TEST(testParserAtom_empty) + +// Test length-1 fast-path is consistent across entry points for ASCII. +BEGIN_TEST(testParserAtom_tiny1_ASCII) { + using js::frontend::ParserAtom; + using js::frontend::ParserAtomsTable; + using js::frontend::ParserAtomVector; + using js::frontend::WellKnownParserAtoms; + + js::AutoReportFrontendContext fc(cx); + js::LifoAlloc alloc(512); + ParserAtomsTable atomTable(alloc); + + char16_t a = 'a'; + const char ascii[] = {'a'}; + JS::Latin1Char latin1[] = {'a'}; + const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a')}; + char16_t char16[] = {'a'}; + + auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(&a, 1); + CHECK(refIndex); + CHECK(atomTable.internAscii(&fc, ascii, 1) == refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 1) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); + + return true; +} +END_TEST(testParserAtom_tiny1_ASCII) + +// Test length-1 fast-path is consistent across entry points for non-ASCII. +BEGIN_TEST(testParserAtom_tiny1_nonASCII) { + using js::frontend::ParserAtom; + using js::frontend::ParserAtomsTable; + using js::frontend::ParserAtomVector; + using js::frontend::WellKnownParserAtoms; + + js::AutoReportFrontendContext fc(cx); + js::LifoAlloc alloc(512); + ParserAtomsTable atomTable(alloc); + + { + char16_t euro = 0x0080; + JS::Latin1Char latin1[] = {0x80}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), + mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; + char16_t char16[] = {0x0080}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndex(&euro, 1); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); + } + + { + char16_t frac12 = 0x00BD; + JS::Latin1Char latin1[] = {0xBD}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xBD))}; + char16_t char16[] = {0x00BD}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndex(½, 1); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); + } + + { + char16_t iquest = 0x00BF; + JS::Latin1Char latin1[] = {0xBF}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; + char16_t char16[] = {0x00BF}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndex(¿, 1); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); + } + + { + char16_t agrave = 0x00C0; + JS::Latin1Char latin1[] = {0xC0}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), + mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; + char16_t char16[] = {0x00C0}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndex(à, 1); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); + } + + { + char16_t ae = 0x00E6; + JS::Latin1Char latin1[] = {0xE6}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xA6))}; + char16_t char16[] = {0x00E6}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndex(&ae, 1); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); + } + + { + char16_t yuml = 0x00FF; + JS::Latin1Char latin1[] = {0xFF}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; + char16_t char16[] = {0x00FF}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndex(ÿ, 1); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); + } + + return true; +} +END_TEST(testParserAtom_tiny1_nonASCII) + +// Test for tiny1 UTF-8 with valid/invalid code units. +// +// NOTE: Passing invalid UTF-8 to internUtf8 hits assertion failure, so +// test in the opposite way. +// lookupTinyIndexUTF8 is used inside internUtf8. +BEGIN_TEST(testParserAtom_tiny1_invalidUTF8) { + using js::frontend::ParserAtom; + using js::frontend::ParserAtomsTable; + using js::frontend::WellKnownParserAtoms; + + js::AutoReportFrontendContext fc(cx); + js::LifoAlloc alloc(512); + ParserAtomsTable atomTable(alloc); + + { + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC1)), + mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); + } + + { + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), + mozilla::Utf8Unit(static_cast<unsigned char>(0x7F))}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); + } + + { + JS::Latin1Char latin1[] = {0x80}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), + mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + } + + { + JS::Latin1Char latin1[] = {0xBF}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + } + + { + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC2)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xC0))}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); + } + + { + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), + mozilla::Utf8Unit(static_cast<unsigned char>(0x7F))}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); + } + + { + JS::Latin1Char latin1[] = {0xC0}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), + mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + } + + { + JS::Latin1Char latin1[] = {0xFF}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); + CHECK(refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); + } + + { + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC3)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xC0))}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); + } + + { + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)), + mozilla::Utf8Unit(static_cast<unsigned char>(0x7F))}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); + } + + { + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)), + mozilla::Utf8Unit(static_cast<unsigned char>(0x80))}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); + } + + { + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xBF))}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); + } + + { + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit(static_cast<unsigned char>(0xC4)), + mozilla::Utf8Unit(static_cast<unsigned char>(0xC0))}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); + } + + return true; +} +END_TEST(testParserAtom_tiny1_invalidUTF8) + +// Test length-2 fast-path is consistent across entry points. +BEGIN_TEST(testParserAtom_tiny2) { + using js::frontend::ParserAtom; + using js::frontend::ParserAtomsTable; + using js::frontend::ParserAtomVector; + using js::frontend::WellKnownParserAtoms; + + js::AutoReportFrontendContext fc(cx); + js::LifoAlloc alloc(512); + ParserAtomsTable atomTable(alloc); + + const char ascii[] = {'a', '0'}; + JS::Latin1Char latin1[] = {'a', '0'}; + const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a'), + mozilla::Utf8Unit('0')}; + char16_t char16[] = {'a', '0'}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 2); + CHECK(refIndex); + CHECK(atomTable.internAscii(&fc, ascii, 2) == refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 2) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 2) == refIndex); + + // Note: If Latin1-Extended characters become supported, then UTF-8 behaviour + // should be tested. + char16_t ae0[] = {0x00E6, '0'}; + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ae0, 2)); + + return true; +} +END_TEST(testParserAtom_tiny2) + +// Test length-3 fast-path is consistent across entry points. +BEGIN_TEST(testParserAtom_int) { + using js::frontend::ParserAtom; + using js::frontend::ParserAtomsTable; + using js::frontend::ParserAtomVector; + using js::frontend::WellKnownParserAtoms; + + js::AutoReportFrontendContext fc(cx); + js::LifoAlloc alloc(512); + ParserAtomsTable atomTable(alloc); + + { + const char ascii[] = {'1', '0', '0'}; + JS::Latin1Char latin1[] = {'1', '0', '0'}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit('1'), mozilla::Utf8Unit('0'), mozilla::Utf8Unit('0')}; + char16_t char16[] = {'1', '0', '0'}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3); + CHECK(refIndex); + CHECK(atomTable.internAscii(&fc, ascii, 3) == refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 3) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 3) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 3) == refIndex); + } + + { + const char ascii[] = {'2', '5', '5'}; + JS::Latin1Char latin1[] = {'2', '5', '5'}; + const mozilla::Utf8Unit utf8[] = { + mozilla::Utf8Unit('2'), mozilla::Utf8Unit('5'), mozilla::Utf8Unit('5')}; + char16_t char16[] = {'2', '5', '5'}; + + auto refIndex = + WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3); + CHECK(refIndex); + CHECK(atomTable.internAscii(&fc, ascii, 3) == refIndex); + CHECK(atomTable.internLatin1(&fc, latin1, 3) == refIndex); + CHECK(atomTable.internUtf8(&fc, utf8, 3) == refIndex); + CHECK(atomTable.internChar16(&fc, char16, 3) == refIndex); + } + + { + const char ascii[] = {'0', '9', '9'}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); + } + + { + const char ascii[] = {'0', 'F', 'F'}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); + } + + { + const char ascii[] = {'1', '0', 'A'}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); + } + + { + const char ascii[] = {'1', '0', 'a'}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); + } + + { + const char ascii[] = {'2', '5', '6'}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); + } + + { + const char ascii[] = {'3', '0', '0'}; + + CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); + } + + return true; +} +END_TEST(testParserAtom_int) + +// "€" U+0080 +// "½" U+00BD +// "¿" U+00BF +// "À" U+00C0 +// "æ" U+00E6 +// "ÿ" U+00FF +// "π" U+03C0 +// "🍕" U+1F355 |