/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "mozilla/Range.h" // mozilla::Range #include "mozilla/Utf8.h" // mozilla::Utf8Unit #include // std::char_traits #include // std::initializer_list #include // std::vector #include "frontend/FrontendContext.h" // AutoReportFrontendContext #include "frontend/ParserAtom.h" // js::frontend::ParserAtomsTable, js::frontend::WellKnownParserAtoms #include "js/TypeDecls.h" // JS::Latin1Char #include "jsapi-tests/tests.h" // Test empty strings behave consistently. BEGIN_TEST(testParserAtom_empty) { using js::frontend::ParserAtom; using js::frontend::ParserAtomsTable; using js::frontend::ParserAtomVector; using js::frontend::TaggedParserAtomIndex; js::AutoReportFrontendContext fc(cx); js::LifoAlloc alloc(512); ParserAtomsTable atomTable(alloc); const char ascii[] = {}; const JS::Latin1Char latin1[] = {}; const mozilla::Utf8Unit utf8[] = {}; const char16_t char16[] = {}; // Check that the well-known empty atom matches for different entry points. auto refIndex = TaggedParserAtomIndex::WellKnown::empty(); CHECK(atomTable.internAscii(&fc, ascii, 0) == refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 0) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 0) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 0) == refIndex); return true; } END_TEST(testParserAtom_empty) // Test length-1 fast-path is consistent across entry points for ASCII. BEGIN_TEST(testParserAtom_tiny1_ASCII) { using js::frontend::ParserAtom; using js::frontend::ParserAtomsTable; using js::frontend::ParserAtomVector; using js::frontend::WellKnownParserAtoms; js::AutoReportFrontendContext fc(cx); js::LifoAlloc alloc(512); ParserAtomsTable atomTable(alloc); char16_t a = 'a'; const char ascii[] = {'a'}; JS::Latin1Char latin1[] = {'a'}; const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a')}; char16_t char16[] = {'a'}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(&a, 1); CHECK(refIndex); CHECK(atomTable.internAscii(&fc, ascii, 1) == refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 1) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); return true; } END_TEST(testParserAtom_tiny1_ASCII) // Test length-1 fast-path is consistent across entry points for non-ASCII. BEGIN_TEST(testParserAtom_tiny1_nonASCII) { using js::frontend::ParserAtom; using js::frontend::ParserAtomsTable; using js::frontend::ParserAtomVector; using js::frontend::WellKnownParserAtoms; js::AutoReportFrontendContext fc(cx); js::LifoAlloc alloc(512); ParserAtomsTable atomTable(alloc); { char16_t euro = 0x0080; JS::Latin1Char latin1[] = {0x80}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC2)), mozilla::Utf8Unit(static_cast(0x80))}; char16_t char16[] = {0x0080}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(&euro, 1); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); } { char16_t frac12 = 0x00BD; JS::Latin1Char latin1[] = {0xBD}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC2)), mozilla::Utf8Unit(static_cast(0xBD))}; char16_t char16[] = {0x00BD}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(½, 1); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); } { char16_t iquest = 0x00BF; JS::Latin1Char latin1[] = {0xBF}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC2)), mozilla::Utf8Unit(static_cast(0xBF))}; char16_t char16[] = {0x00BF}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(¿, 1); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); } { char16_t agrave = 0x00C0; JS::Latin1Char latin1[] = {0xC0}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC3)), mozilla::Utf8Unit(static_cast(0x80))}; char16_t char16[] = {0x00C0}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(à, 1); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); } { char16_t ae = 0x00E6; JS::Latin1Char latin1[] = {0xE6}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC3)), mozilla::Utf8Unit(static_cast(0xA6))}; char16_t char16[] = {0x00E6}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(&ae, 1); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); } { char16_t yuml = 0x00FF; JS::Latin1Char latin1[] = {0xFF}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC3)), mozilla::Utf8Unit(static_cast(0xBF))}; char16_t char16[] = {0x00FF}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(ÿ, 1); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 1) == refIndex); } return true; } END_TEST(testParserAtom_tiny1_nonASCII) // Test for tiny1 UTF-8 with valid/invalid code units. // // NOTE: Passing invalid UTF-8 to internUtf8 hits assertion failure, so // test in the opposite way. // lookupTinyIndexUTF8 is used inside internUtf8. BEGIN_TEST(testParserAtom_tiny1_invalidUTF8) { using js::frontend::ParserAtom; using js::frontend::ParserAtomsTable; using js::frontend::WellKnownParserAtoms; js::AutoReportFrontendContext fc(cx); js::LifoAlloc alloc(512); ParserAtomsTable atomTable(alloc); { const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC1)), mozilla::Utf8Unit(static_cast(0x80))}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); } { const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC2)), mozilla::Utf8Unit(static_cast(0x7F))}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); } { JS::Latin1Char latin1[] = {0x80}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC2)), mozilla::Utf8Unit(static_cast(0x80))}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); } { JS::Latin1Char latin1[] = {0xBF}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC2)), mozilla::Utf8Unit(static_cast(0xBF))}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); } { const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC2)), mozilla::Utf8Unit(static_cast(0xC0))}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); } { const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC3)), mozilla::Utf8Unit(static_cast(0x7F))}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); } { JS::Latin1Char latin1[] = {0xC0}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC3)), mozilla::Utf8Unit(static_cast(0x80))}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); } { JS::Latin1Char latin1[] = {0xFF}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC3)), mozilla::Utf8Unit(static_cast(0xBF))}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2); CHECK(refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 1) == refIndex); } { const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC3)), mozilla::Utf8Unit(static_cast(0xC0))}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); } { const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC4)), mozilla::Utf8Unit(static_cast(0x7F))}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); } { const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC4)), mozilla::Utf8Unit(static_cast(0x80))}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); } { const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC4)), mozilla::Utf8Unit(static_cast(0xBF))}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); } { const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit(static_cast(0xC4)), mozilla::Utf8Unit(static_cast(0xC0))}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndexUTF8(utf8, 2)); } return true; } END_TEST(testParserAtom_tiny1_invalidUTF8) // Test length-2 fast-path is consistent across entry points. BEGIN_TEST(testParserAtom_tiny2) { using js::frontend::ParserAtom; using js::frontend::ParserAtomsTable; using js::frontend::ParserAtomVector; using js::frontend::WellKnownParserAtoms; js::AutoReportFrontendContext fc(cx); js::LifoAlloc alloc(512); ParserAtomsTable atomTable(alloc); const char ascii[] = {'a', '0'}; JS::Latin1Char latin1[] = {'a', '0'}; const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a'), mozilla::Utf8Unit('0')}; char16_t char16[] = {'a', '0'}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 2); CHECK(refIndex); CHECK(atomTable.internAscii(&fc, ascii, 2) == refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 2) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 2) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 2) == refIndex); // Note: If Latin1-Extended characters become supported, then UTF-8 behaviour // should be tested. char16_t ae0[] = {0x00E6, '0'}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ae0, 2)); return true; } END_TEST(testParserAtom_tiny2) // Test length-3 fast-path is consistent across entry points. BEGIN_TEST(testParserAtom_int) { using js::frontend::ParserAtom; using js::frontend::ParserAtomsTable; using js::frontend::ParserAtomVector; using js::frontend::WellKnownParserAtoms; js::AutoReportFrontendContext fc(cx); js::LifoAlloc alloc(512); ParserAtomsTable atomTable(alloc); { const char ascii[] = {'1', '0', '0'}; JS::Latin1Char latin1[] = {'1', '0', '0'}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit('1'), mozilla::Utf8Unit('0'), mozilla::Utf8Unit('0')}; char16_t char16[] = {'1', '0', '0'}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3); CHECK(refIndex); CHECK(atomTable.internAscii(&fc, ascii, 3) == refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 3) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 3) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 3) == refIndex); } { const char ascii[] = {'2', '5', '5'}; JS::Latin1Char latin1[] = {'2', '5', '5'}; const mozilla::Utf8Unit utf8[] = { mozilla::Utf8Unit('2'), mozilla::Utf8Unit('5'), mozilla::Utf8Unit('5')}; char16_t char16[] = {'2', '5', '5'}; auto refIndex = WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3); CHECK(refIndex); CHECK(atomTable.internAscii(&fc, ascii, 3) == refIndex); CHECK(atomTable.internLatin1(&fc, latin1, 3) == refIndex); CHECK(atomTable.internUtf8(&fc, utf8, 3) == refIndex); CHECK(atomTable.internChar16(&fc, char16, 3) == refIndex); } { const char ascii[] = {'0', '9', '9'}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); } { const char ascii[] = {'0', 'F', 'F'}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); } { const char ascii[] = {'1', '0', 'A'}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); } { const char ascii[] = {'1', '0', 'a'}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); } { const char ascii[] = {'2', '5', '6'}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); } { const char ascii[] = {'3', '0', '0'}; CHECK(!WellKnownParserAtoms::getSingleton().lookupTinyIndex(ascii, 3)); } return true; } END_TEST(testParserAtom_int) // "€" U+0080 // "½" U+00BD // "¿" U+00BF // "À" U+00C0 // "æ" U+00E6 // "ÿ" U+00FF // "π" U+03C0 // "🍕" U+1F355