/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef vm_StaticStrings_h #define vm_StaticStrings_h #include "mozilla/Assertions.h" // MOZ_ASSERT #include "mozilla/Attributes.h" // MOZ_ALWAYS_INLINE #include "mozilla/TextUtils.h" // mozilla::{IsAsciiDigit, IsAsciiLowercaseAlpha, IsAsciiUppercaseAlpha} #include // size_t #include // int32_t, uint32_t #include // std::is_same_v #include "jstypes.h" // JS_PUBLIC_API, js::Bit, js::BitMask #include "js/TypeDecls.h" // JS::Latin1Char struct JS_PUBLIC_API JSContext; class JSAtom; class JSLinearString; class JSString; namespace js { namespace frontend { class ParserAtomsTable; class TaggedParserAtomIndex; class WellKnownParserAtoms; struct CompilationAtomCache; } // namespace frontend namespace jit { class MacroAssembler; } // namespace jit class StaticStrings { // NOTE: The WellKnownParserAtoms rely on these tables and may need to be // update if these tables are changed. friend class js::frontend::ParserAtomsTable; friend class js::frontend::TaggedParserAtomIndex; friend class js::frontend::WellKnownParserAtoms; friend struct js::frontend::CompilationAtomCache; friend class js::jit::MacroAssembler; private: // Strings matches `[A-Za-z0-9$_]{2}` pattern. // Store each character in 6 bits. // See fromSmallChar/toSmallChar for the mapping. static constexpr size_t SMALL_CHAR_BITS = 6; static constexpr size_t SMALL_CHAR_MASK = js::BitMask(SMALL_CHAR_BITS); // To optimize ASCII -> small char, allocate a table. static constexpr size_t SMALL_CHAR_TABLE_SIZE = 128U; static constexpr size_t NUM_SMALL_CHARS = js::Bit(SMALL_CHAR_BITS); static constexpr size_t NUM_LENGTH2_ENTRIES = NUM_SMALL_CHARS * NUM_SMALL_CHARS; JSAtom* length2StaticTable[NUM_LENGTH2_ENTRIES] = {}; // zeroes public: /* We keep these public for the JITs. */ static const size_t UNIT_STATIC_LIMIT = 256U; JSAtom* unitStaticTable[UNIT_STATIC_LIMIT] = {}; // zeroes static const size_t INT_STATIC_LIMIT = 256U; JSAtom* intStaticTable[INT_STATIC_LIMIT] = {}; // zeroes StaticStrings() = default; bool init(JSContext* cx); static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; } JSAtom* getUint(uint32_t u) { MOZ_ASSERT(hasUint(u)); return intStaticTable[u]; } static bool hasInt(int32_t i) { return uint32_t(i) < INT_STATIC_LIMIT; } JSAtom* getInt(int32_t i) { MOZ_ASSERT(hasInt(i)); return getUint(uint32_t(i)); } static bool hasUnit(char16_t c) { return c < UNIT_STATIC_LIMIT; } JSAtom* getUnit(char16_t c) { MOZ_ASSERT(hasUnit(c)); return unitStaticTable[c]; } /* May not return atom, returns null on (reported) failure. */ inline JSLinearString* getUnitStringForElement(JSContext* cx, JSString* str, size_t index); template static bool isStatic(const CharT* chars, size_t len); /* Return null if no static atom exists for the given (chars, length). */ template MOZ_ALWAYS_INLINE JSAtom* lookup(const CharT* chars, size_t length) { static_assert(std::is_same_v || std::is_same_v, "for understandability, |chars| must be one of a few " "identified types"); switch (length) { case 1: { char16_t c = chars[0]; if (c < UNIT_STATIC_LIMIT) { return getUnit(c); } return nullptr; } case 2: if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1])) { return getLength2(chars[0], chars[1]); } return nullptr; case 3: /* * Here we know that JSString::intStringTable covers only 256 (or at * least not 1000 or more) chars. We rely on order here to resolve the * unit vs. int string/length-2 string atom identity issue by giving * priority to unit strings for "0" through "9" and length-2 strings for * "10" through "99". */ int i; if (fitsInLength3Static(chars[0], chars[1], chars[2], &i)) { return getInt(i); } return nullptr; } return nullptr; } MOZ_ALWAYS_INLINE JSAtom* lookup(const char* chars, size_t length) { // Collapse calls for |const char*| into |const Latin1Char char*| to avoid // excess instantiations. return lookup(reinterpret_cast(chars), length); } private: using SmallChar = uint8_t; struct SmallCharTable { SmallChar storage[SMALL_CHAR_TABLE_SIZE]; constexpr SmallChar& operator[](size_t idx) { return storage[idx]; } constexpr const SmallChar& operator[](size_t idx) const { return storage[idx]; } }; static const SmallChar INVALID_SMALL_CHAR = -1; static bool fitsInSmallChar(char16_t c) { return c < SMALL_CHAR_TABLE_SIZE && toSmallCharTable[c] != INVALID_SMALL_CHAR; } template static bool fitsInLength3Static(CharT c1, CharT c2, CharT c3, int* i) { static_assert(INT_STATIC_LIMIT <= 299, "static int strings assumed below to be at most " "three digits where the first digit is either 1 or 2"); if ('1' <= c1 && c1 < '3' && '0' <= c2 && c2 <= '9' && '0' <= c3 && c3 <= '9') { *i = (c1 - '0') * 100 + (c2 - '0') * 10 + (c3 - '0'); if (unsigned(*i) < INT_STATIC_LIMIT) { return true; } } return false; } static constexpr JS::Latin1Char fromSmallChar(SmallChar c); static constexpr SmallChar toSmallChar(uint32_t c); static constexpr SmallCharTable createSmallCharTable(); static const SmallCharTable toSmallCharTable; static constexpr JS::Latin1Char firstCharOfLength2(size_t s) { return fromSmallChar(s >> SMALL_CHAR_BITS); } static constexpr JS::Latin1Char secondCharOfLength2(size_t s) { return fromSmallChar(s & SMALL_CHAR_MASK); } static constexpr JS::Latin1Char firstCharOfLength3(uint32_t i) { return '0' + (i / 100); } static constexpr JS::Latin1Char secondCharOfLength3(uint32_t i) { return '0' + ((i / 10) % 10); } static constexpr JS::Latin1Char thirdCharOfLength3(uint32_t i) { return '0' + (i % 10); } static MOZ_ALWAYS_INLINE size_t getLength2Index(char16_t c1, char16_t c2) { MOZ_ASSERT(fitsInSmallChar(c1)); MOZ_ASSERT(fitsInSmallChar(c2)); return (size_t(toSmallCharTable[c1]) << SMALL_CHAR_BITS) + toSmallCharTable[c2]; } // Same as getLength2Index, but withtout runtime assertion, // this should be used only for known static string. static constexpr size_t getLength2IndexStatic(char c1, char c2) { return (size_t(toSmallChar(c1)) << SMALL_CHAR_BITS) + toSmallChar(c2); } MOZ_ALWAYS_INLINE JSAtom* getLength2FromIndex(size_t index) { return length2StaticTable[index]; } MOZ_ALWAYS_INLINE JSAtom* getLength2(char16_t c1, char16_t c2) { return getLength2FromIndex(getLength2Index(c1, c2)); } }; /* * Declare length-2 strings. We only store strings where both characters are * alphanumeric. The lower 10 short chars are the numerals, the next 26 are * the lowercase letters, and the next 26 are the uppercase letters. */ constexpr JS::Latin1Char StaticStrings::fromSmallChar(SmallChar c) { if (c < 10) { return c + '0'; } if (c < 36) { return c + 'a' - 10; } if (c < 62) { return c + 'A' - 36; } if (c == 62) { return '$'; } return '_'; } constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) { if (mozilla::IsAsciiDigit(c)) { return c - '0'; } if (mozilla::IsAsciiLowercaseAlpha(c)) { return c - 'a' + 10; } if (mozilla::IsAsciiUppercaseAlpha(c)) { return c - 'A' + 36; } if (c == '$') { return 62; } if (c == '_') { return 63; } return StaticStrings::INVALID_SMALL_CHAR; } } // namespace js #endif /* vm_StaticStrings_h */