summaryrefslogtreecommitdiffstats
path: root/js/src/vm/StaticStrings.h
blob: 0a2beda98e267c58bbbcbcdf39948644120845f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 * vim: set ts=8 sts=2 et sw=2 tw=80:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef vm_StaticStrings_h
#define vm_StaticStrings_h

#include "mozilla/Assertions.h"  // MOZ_ASSERT
#include "mozilla/Attributes.h"  // MOZ_ALWAYS_INLINE
#include "mozilla/TextUtils.h"  // mozilla::{IsAsciiDigit, IsAsciiLowercaseAlpha, IsAsciiUppercaseAlpha}

#include <stddef.h>     // size_t
#include <stdint.h>     // int32_t, uint32_t
#include <type_traits>  // std::is_same_v

#include "jstypes.h"  // JS_PUBLIC_API, js::Bit, js::BitMask

#include "js/TypeDecls.h"  // JS::Latin1Char

struct JS_PUBLIC_API JSContext;

class JSAtom;
class JSLinearString;
class JSString;

namespace js {

namespace frontend {
class ParserAtomsTable;
class TaggedParserAtomIndex;
class WellKnownParserAtoms;
struct CompilationAtomCache;
}  // namespace frontend

namespace jit {
class MacroAssembler;
}  // namespace jit

class StaticStrings {
  // NOTE: The WellKnownParserAtoms rely on these tables and may need to be
  //       update if these tables are changed.
  friend class js::frontend::ParserAtomsTable;
  friend class js::frontend::TaggedParserAtomIndex;
  friend class js::frontend::WellKnownParserAtoms;
  friend struct js::frontend::CompilationAtomCache;

  friend class js::jit::MacroAssembler;

 private:
  // Strings matches `[A-Za-z0-9$_]{2}` pattern.
  // Store each character in 6 bits.
  // See fromSmallChar/toSmallChar for the mapping.
  static constexpr size_t SMALL_CHAR_BITS = 6;
  static constexpr size_t SMALL_CHAR_MASK = js::BitMask(SMALL_CHAR_BITS);

  // To optimize ASCII -> small char, allocate a table.
  static constexpr size_t SMALL_CHAR_TABLE_SIZE = 128U;
  static constexpr size_t NUM_SMALL_CHARS = js::Bit(SMALL_CHAR_BITS);
  static constexpr size_t NUM_LENGTH2_ENTRIES =
      NUM_SMALL_CHARS * NUM_SMALL_CHARS;

  JSAtom* length2StaticTable[NUM_LENGTH2_ENTRIES] = {};  // zeroes

 public:
  /* We keep these public for the JITs. */
  static const size_t UNIT_STATIC_LIMIT = 256U;
  JSAtom* unitStaticTable[UNIT_STATIC_LIMIT] = {};  // zeroes

  static const size_t INT_STATIC_LIMIT = 256U;
  JSAtom* intStaticTable[INT_STATIC_LIMIT] = {};  // zeroes

  StaticStrings() = default;

  bool init(JSContext* cx);

  static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; }

  JSAtom* getUint(uint32_t u) {
    MOZ_ASSERT(hasUint(u));
    return intStaticTable[u];
  }

  static bool hasInt(int32_t i) { return uint32_t(i) < INT_STATIC_LIMIT; }

  JSAtom* getInt(int32_t i) {
    MOZ_ASSERT(hasInt(i));
    return getUint(uint32_t(i));
  }

  static bool hasUnit(char16_t c) { return c < UNIT_STATIC_LIMIT; }

  JSAtom* getUnit(char16_t c) {
    MOZ_ASSERT(hasUnit(c));
    return unitStaticTable[c];
  }

  /* May not return atom, returns null on (reported) failure. */
  inline JSLinearString* getUnitStringForElement(JSContext* cx, JSString* str,
                                                 size_t index);

  template <typename CharT>
  static bool isStatic(const CharT* chars, size_t len);

  /* Return null if no static atom exists for the given (chars, length). */
  template <typename CharT>
  MOZ_ALWAYS_INLINE JSAtom* lookup(const CharT* chars, size_t length) {
    static_assert(std::is_same_v<CharT, JS::Latin1Char> ||
                      std::is_same_v<CharT, char16_t>,
                  "for understandability, |chars| must be one of a few "
                  "identified types");

    switch (length) {
      case 1: {
        char16_t c = chars[0];
        if (c < UNIT_STATIC_LIMIT) {
          return getUnit(c);
        }
        return nullptr;
      }
      case 2:
        if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1])) {
          return getLength2(chars[0], chars[1]);
        }
        return nullptr;
      case 3:
        /*
         * Here we know that JSString::intStringTable covers only 256 (or at
         * least not 1000 or more) chars. We rely on order here to resolve the
         * unit vs. int string/length-2 string atom identity issue by giving
         * priority to unit strings for "0" through "9" and length-2 strings for
         * "10" through "99".
         */
        int i;
        if (fitsInLength3Static(chars[0], chars[1], chars[2], &i)) {
          return getInt(i);
        }
        return nullptr;
    }

    return nullptr;
  }

  MOZ_ALWAYS_INLINE JSAtom* lookup(const char* chars, size_t length) {
    // Collapse calls for |const char*| into |const Latin1Char char*| to avoid
    // excess instantiations.
    return lookup(reinterpret_cast<const JS::Latin1Char*>(chars), length);
  }

 private:
  using SmallChar = uint8_t;

  struct SmallCharTable {
    SmallChar storage[SMALL_CHAR_TABLE_SIZE];

    constexpr SmallChar& operator[](size_t idx) { return storage[idx]; }
    constexpr const SmallChar& operator[](size_t idx) const {
      return storage[idx];
    }
  };

  static const SmallChar INVALID_SMALL_CHAR = -1;

  static bool fitsInSmallChar(char16_t c) {
    return c < SMALL_CHAR_TABLE_SIZE &&
           toSmallCharTable[c] != INVALID_SMALL_CHAR;
  }

  template <typename CharT>
  static bool fitsInLength3Static(CharT c1, CharT c2, CharT c3, int* i) {
    static_assert(INT_STATIC_LIMIT <= 299,
                  "static int strings assumed below to be at most "
                  "three digits where the first digit is either 1 or 2");
    if ('1' <= c1 && c1 < '3' && '0' <= c2 && c2 <= '9' && '0' <= c3 &&
        c3 <= '9') {
      *i = (c1 - '0') * 100 + (c2 - '0') * 10 + (c3 - '0');

      if (unsigned(*i) < INT_STATIC_LIMIT) {
        return true;
      }
    }
    return false;
  }

  static constexpr JS::Latin1Char fromSmallChar(SmallChar c);

  static constexpr SmallChar toSmallChar(uint32_t c);

  static constexpr SmallCharTable createSmallCharTable();

  static const SmallCharTable toSmallCharTable;

  static constexpr JS::Latin1Char firstCharOfLength2(size_t s) {
    return fromSmallChar(s >> SMALL_CHAR_BITS);
  }
  static constexpr JS::Latin1Char secondCharOfLength2(size_t s) {
    return fromSmallChar(s & SMALL_CHAR_MASK);
  }

  static constexpr JS::Latin1Char firstCharOfLength3(uint32_t i) {
    return '0' + (i / 100);
  }
  static constexpr JS::Latin1Char secondCharOfLength3(uint32_t i) {
    return '0' + ((i / 10) % 10);
  }
  static constexpr JS::Latin1Char thirdCharOfLength3(uint32_t i) {
    return '0' + (i % 10);
  }

  static MOZ_ALWAYS_INLINE size_t getLength2Index(char16_t c1, char16_t c2) {
    MOZ_ASSERT(fitsInSmallChar(c1));
    MOZ_ASSERT(fitsInSmallChar(c2));
    return (size_t(toSmallCharTable[c1]) << SMALL_CHAR_BITS) +
           toSmallCharTable[c2];
  }

  // Same as getLength2Index, but withtout runtime assertion,
  // this should be used only for known static string.
  static constexpr size_t getLength2IndexStatic(char c1, char c2) {
    return (size_t(toSmallChar(c1)) << SMALL_CHAR_BITS) + toSmallChar(c2);
  }

  MOZ_ALWAYS_INLINE JSAtom* getLength2FromIndex(size_t index) {
    return length2StaticTable[index];
  }

  MOZ_ALWAYS_INLINE JSAtom* getLength2(char16_t c1, char16_t c2) {
    return getLength2FromIndex(getLength2Index(c1, c2));
  }
};

/*
 * Declare length-2 strings. We only store strings where both characters are
 * alphanumeric. The lower 10 short chars are the numerals, the next 26 are
 * the lowercase letters, and the next 26 are the uppercase letters.
 */

constexpr JS::Latin1Char StaticStrings::fromSmallChar(SmallChar c) {
  if (c < 10) {
    return c + '0';
  }
  if (c < 36) {
    return c + 'a' - 10;
  }
  if (c < 62) {
    return c + 'A' - 36;
  }
  if (c == 62) {
    return '$';
  }
  return '_';
}

constexpr StaticStrings::SmallChar StaticStrings::toSmallChar(uint32_t c) {
  if (mozilla::IsAsciiDigit(c)) {
    return c - '0';
  }
  if (mozilla::IsAsciiLowercaseAlpha(c)) {
    return c - 'a' + 10;
  }
  if (mozilla::IsAsciiUppercaseAlpha(c)) {
    return c - 'A' + 36;
  }
  if (c == '$') {
    return 62;
  }
  if (c == '_') {
    return 63;
  }
  return StaticStrings::INVALID_SMALL_CHAR;
}

}  // namespace js

#endif /* vm_StaticStrings_h */