summaryrefslogtreecommitdiffstats
path: root/js/src/util/StringBuffer.h
blob: 23c1ff61ae736c21e01ecdb6bd5833f1ce2eab32 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 * vim: set ts=8 sts=2 et sw=2 tw=80:
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef util_StringBuffer_h
#define util_StringBuffer_h

#include "mozilla/CheckedInt.h"
#include "mozilla/MaybeOneOf.h"
#include "mozilla/Utf8.h"

#include "frontend/FrontendContext.h"
#include "js/Vector.h"
#include "vm/StringType.h"

namespace js {

class FrontendContext;

namespace frontend {
class ParserAtomsTable;
class TaggedParserAtomIndex;
}  // namespace frontend

namespace detail {

// GrowEltsAggressively will multiply the space by a factor of 8 on overflow, to
// avoid very expensive memcpys for large strings (eg giant toJSON output for
// sessionstore.js). Drop back to the normal expansion policy once the buffer
// hits 128MB.
static constexpr size_t AggressiveLimit = 128 << 20;

template <size_t EltSize>
inline size_t GrowEltsAggressively(size_t aOldElts, size_t aIncr) {
  mozilla::CheckedInt<size_t> required =
      mozilla::CheckedInt<size_t>(aOldElts) + aIncr;
  if (!(required * 2).isValid()) {
    return 0;
  }
  required = mozilla::RoundUpPow2(required.value());
  required *= 8;
  if (!(required * EltSize).isValid() || required.value() > AggressiveLimit) {
    // Fall back to doubling behavior if the aggressive growth fails or gets too
    // big.
    return mozilla::detail::GrowEltsByDoubling<EltSize>(aOldElts, aIncr);
  }
  return required.value();
};

}  // namespace detail

class StringBufferAllocPolicy {
  TempAllocPolicy impl_;
  const arena_id_t& arenaId_;

 public:
  StringBufferAllocPolicy(FrontendContext* fc, const arena_id_t& arenaId)
      : impl_(fc), arenaId_(arenaId) {}

  StringBufferAllocPolicy(JSContext* cx, const arena_id_t& arenaId)
      : impl_(cx), arenaId_(arenaId) {}

  template <typename T>
  T* maybe_pod_malloc(size_t numElems) {
    return impl_.maybe_pod_arena_malloc<T>(arenaId_, numElems);
  }
  template <typename T>
  T* maybe_pod_calloc(size_t numElems) {
    return impl_.maybe_pod_arena_calloc<T>(arenaId_, numElems);
  }
  template <typename T>
  T* maybe_pod_realloc(T* p, size_t oldSize, size_t newSize) {
    return impl_.maybe_pod_arena_realloc<T>(arenaId_, p, oldSize, newSize);
  }
  template <typename T>
  T* pod_malloc(size_t numElems) {
    return impl_.pod_arena_malloc<T>(arenaId_, numElems);
  }
  template <typename T>
  T* pod_calloc(size_t numElems) {
    return impl_.pod_arena_calloc<T>(arenaId_, numElems);
  }
  template <typename T>
  T* pod_realloc(T* p, size_t oldSize, size_t newSize) {
    return impl_.pod_arena_realloc<T>(arenaId_, p, oldSize, newSize);
  }
  template <typename T>
  void free_(T* p, size_t numElems = 0) {
    impl_.free_(p, numElems);
  }
  void reportAllocOverflow() const { impl_.reportAllocOverflow(); }
  bool checkSimulatedOOM() const { return impl_.checkSimulatedOOM(); }

  // See ComputeGrowth in mfbt/Vector.h.
  template <size_t EltSize>
  static size_t computeGrowth(size_t aOldElts, size_t aIncr) {
    return detail::GrowEltsAggressively<EltSize>(aOldElts, aIncr);
  }
};

/*
 * String builder that eagerly checks for over-allocation past the maximum
 * string length.
 *
 * Any operation which would exceed the maximum string length causes an
 * exception report on the context and results in a failed return value.
 *
 * Well-sized extractions (which waste no more than 1/4 of their char
 * buffer space) are guaranteed for strings built by this interface.
 * See |extractWellSized|.
 */
class StringBuffer {
 protected:
  template <typename CharT>
  using BufferType = Vector<CharT, 64 / sizeof(CharT), StringBufferAllocPolicy>;

  /*
   * The Vector's buffer may be either stolen or copied, so we need to use
   * TempAllocPolicy and account for the memory manually when stealing.
   */
  using Latin1CharBuffer = BufferType<Latin1Char>;
  using TwoByteCharBuffer = BufferType<char16_t>;

  JSContext* maybeCx_ = nullptr;

  /*
   * If Latin1 strings are enabled, cb starts out as a Latin1CharBuffer. When
   * a TwoByte char is appended, inflateChars() constructs a TwoByteCharBuffer
   * and copies the Latin1 chars.
   */
  mozilla::MaybeOneOf<Latin1CharBuffer, TwoByteCharBuffer> cb;

  /* Number of reserve()'d chars, see inflateChars. */
  size_t reserved_ = 0;

  StringBuffer(const StringBuffer& other) = delete;
  void operator=(const StringBuffer& other) = delete;

  template <typename CharT>
  MOZ_ALWAYS_INLINE bool isCharType() const {
    return cb.constructed<BufferType<CharT>>();
  }

  MOZ_ALWAYS_INLINE bool isLatin1() const { return isCharType<Latin1Char>(); }

  MOZ_ALWAYS_INLINE bool isTwoByte() const { return isCharType<char16_t>(); }

  template <typename CharT>
  MOZ_ALWAYS_INLINE BufferType<CharT>& chars() {
    MOZ_ASSERT(isCharType<CharT>());
    return cb.ref<BufferType<CharT>>();
  }

  template <typename CharT>
  MOZ_ALWAYS_INLINE const BufferType<CharT>& chars() const {
    MOZ_ASSERT(isCharType<CharT>());
    return cb.ref<BufferType<CharT>>();
  }

  MOZ_ALWAYS_INLINE TwoByteCharBuffer& twoByteChars() {
    return chars<char16_t>();
  }

  MOZ_ALWAYS_INLINE const TwoByteCharBuffer& twoByteChars() const {
    return chars<char16_t>();
  }

  MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() {
    return chars<Latin1Char>();
  }

  MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const {
    return chars<Latin1Char>();
  }

  [[nodiscard]] bool inflateChars();

  template <typename CharT>
  JSLinearString* finishStringInternal(JSContext* cx, gc::Heap heap);

 public:
  explicit StringBuffer(JSContext* cx,
                        const arena_id_t& arenaId = js::MallocArena)
      : maybeCx_(cx) {
    MOZ_ASSERT(cx);
    cb.construct<Latin1CharBuffer>(StringBufferAllocPolicy{cx, arenaId});
  }

  // This constructor should only be used if the methods related to the
  // following are not used, because they require a JSContext:
  //   * JSString
  //   * JSAtom
  //   * mozilla::Utf8Unit
  explicit StringBuffer(FrontendContext* fc,
                        const arena_id_t& arenaId = js::MallocArena) {
    MOZ_ASSERT(fc);
    cb.construct<Latin1CharBuffer>(StringBufferAllocPolicy{fc, arenaId});
  }

  void clear() {
    if (isLatin1()) {
      latin1Chars().clear();
    } else {
      twoByteChars().clear();
    }
  }
  [[nodiscard]] bool reserve(size_t len) {
    if (len > reserved_) {
      reserved_ = len;
    }
    return isLatin1() ? latin1Chars().reserve(len)
                      : twoByteChars().reserve(len);
  }
  [[nodiscard]] bool resize(size_t len) {
    return isLatin1() ? latin1Chars().resize(len) : twoByteChars().resize(len);
  }
  [[nodiscard]] bool growByUninitialized(size_t incr) {
    return isLatin1() ? latin1Chars().growByUninitialized(incr)
                      : twoByteChars().growByUninitialized(incr);
  }
  void shrinkTo(size_t newLength) {
    return isLatin1() ? latin1Chars().shrinkTo(newLength)
                      : twoByteChars().shrinkTo(newLength);
  }
  bool empty() const {
    return isLatin1() ? latin1Chars().empty() : twoByteChars().empty();
  }
  size_t length() const {
    return isLatin1() ? latin1Chars().length() : twoByteChars().length();
  }
  char16_t getChar(size_t idx) const {
    return isLatin1() ? latin1Chars()[idx] : twoByteChars()[idx];
  }

  [[nodiscard]] bool ensureTwoByteChars() {
    return isTwoByte() || inflateChars();
  }

  [[nodiscard]] bool append(const char16_t c) {
    if (isLatin1()) {
      if (c <= JSString::MAX_LATIN1_CHAR) {
        return latin1Chars().append(Latin1Char(c));
      }
      if (!inflateChars()) {
        return false;
      }
    }
    return twoByteChars().append(c);
  }
  [[nodiscard]] bool append(Latin1Char c) {
    return isLatin1() ? latin1Chars().append(c) : twoByteChars().append(c);
  }
  [[nodiscard]] bool append(char c) { return append(Latin1Char(c)); }

  [[nodiscard]] inline bool append(const char16_t* begin, const char16_t* end);

  [[nodiscard]] bool append(const char16_t* chars, size_t len) {
    return append(chars, chars + len);
  }

  [[nodiscard]] bool append(const Latin1Char* begin, const Latin1Char* end) {
    return isLatin1() ? latin1Chars().append(begin, end)
                      : twoByteChars().append(begin, end);
  }
  [[nodiscard]] bool append(const Latin1Char* chars, size_t len) {
    return append(chars, chars + len);
  }

  /**
   * Interpret the provided count of UTF-8 code units as UTF-8, and append
   * the represented code points to this.  If the code units contain invalid
   * UTF-8, leave the internal buffer in a consistent but unspecified state,
   * report an error, and return false.
   */
  [[nodiscard]] bool append(const mozilla::Utf8Unit* units, size_t len);

  [[nodiscard]] bool append(const JS::ConstCharPtr chars, size_t len) {
    return append(chars.get(), chars.get() + len);
  }
  [[nodiscard]] bool appendN(Latin1Char c, size_t n) {
    return isLatin1() ? latin1Chars().appendN(c, n)
                      : twoByteChars().appendN(c, n);
  }

  [[nodiscard]] inline bool append(JSString* str);
  [[nodiscard]] inline bool append(JSLinearString* str);
  [[nodiscard]] inline bool appendSubstring(JSString* base, size_t off,
                                            size_t len);
  [[nodiscard]] inline bool appendSubstring(JSLinearString* base, size_t off,
                                            size_t len);
  [[nodiscard]] bool append(const frontend::ParserAtomsTable& parserAtoms,
                            frontend::TaggedParserAtomIndex atom);

  [[nodiscard]] bool append(const char* chars, size_t len) {
    return append(reinterpret_cast<const Latin1Char*>(chars), len);
  }

  template <size_t ArrayLength>
  [[nodiscard]] bool append(const char (&array)[ArrayLength]) {
    return append(array, ArrayLength - 1); /* No trailing '\0'. */
  }

  /* Infallible variants usable when the corresponding space is reserved. */
  void infallibleAppend(Latin1Char c) {
    if (isLatin1()) {
      latin1Chars().infallibleAppend(c);
    } else {
      twoByteChars().infallibleAppend(c);
    }
  }
  void infallibleAppend(char c) { infallibleAppend(Latin1Char(c)); }
  void infallibleAppend(const Latin1Char* chars, size_t len) {
    if (isLatin1()) {
      latin1Chars().infallibleAppend(chars, len);
    } else {
      twoByteChars().infallibleAppend(chars, len);
    }
  }
  void infallibleAppend(const char* chars, size_t len) {
    infallibleAppend(reinterpret_cast<const Latin1Char*>(chars), len);
  }

  void infallibleAppendSubstring(JSLinearString* base, size_t off, size_t len);

  /*
   * Because inflation is fallible, these methods should only be used after
   * calling ensureTwoByteChars().
   */
  void infallibleAppend(const char16_t* chars, size_t len) {
    twoByteChars().infallibleAppend(chars, len);
  }
  void infallibleAppend(char16_t c) { twoByteChars().infallibleAppend(c); }

  bool isUnderlyingBufferLatin1() const { return isLatin1(); }

  template <typename CharT>
  CharT* begin() {
    return chars<CharT>().begin();
  }

  template <typename CharT>
  CharT* end() {
    return chars<CharT>().end();
  }

  template <typename CharT>
  const CharT* begin() const {
    return chars<CharT>().begin();
  }

  template <typename CharT>
  const CharT* end() const {
    return chars<CharT>().end();
  }

  char16_t* rawTwoByteBegin() { return begin<char16_t>(); }
  char16_t* rawTwoByteEnd() { return end<char16_t>(); }
  const char16_t* rawTwoByteBegin() const { return begin<char16_t>(); }
  const char16_t* rawTwoByteEnd() const { return end<char16_t>(); }

  Latin1Char* rawLatin1Begin() { return begin<Latin1Char>(); }
  Latin1Char* rawLatin1End() { return end<Latin1Char>(); }
  const Latin1Char* rawLatin1Begin() const { return begin<Latin1Char>(); }
  const Latin1Char* rawLatin1End() const { return end<Latin1Char>(); }

  /* Identical to finishString() except that an atom is created. */
  JSAtom* finishAtom();
  frontend::TaggedParserAtomIndex finishParserAtom(
      frontend::ParserAtomsTable& parserAtoms, FrontendContext* fc);

  /*
   * Creates a raw string from the characters in this buffer.  The string is
   * exactly the characters in this buffer (inflated to TwoByte), it is *not*
   * null-terminated unless the last appended character was '\0'.
   */
  char16_t* stealChars();
};

// Like StringBuffer, but uses StringBufferArena for the characters.
class JSStringBuilder : public StringBuffer {
 public:
  explicit JSStringBuilder(JSContext* cx)
      : StringBuffer(cx, js::StringBufferArena) {}

  /*
   * Creates a string from the characters in this buffer, then (regardless
   * whether string creation succeeded or failed) empties the buffer.
   *
   * Returns nullptr if string creation failed.
   */
  JSLinearString* finishString(gc::Heap heap = gc::Heap::Default);
};

inline bool StringBuffer::append(const char16_t* begin, const char16_t* end) {
  MOZ_ASSERT(begin <= end);
  if (isLatin1()) {
    while (true) {
      if (begin >= end) {
        return true;
      }
      if (*begin > JSString::MAX_LATIN1_CHAR) {
        break;
      }
      if (!latin1Chars().append(*begin)) {
        return false;
      }
      ++begin;
    }
    if (!inflateChars()) {
      return false;
    }
  }
  return twoByteChars().append(begin, end);
}

inline bool StringBuffer::append(JSLinearString* str) {
  JS::AutoCheckCannotGC nogc;
  if (isLatin1()) {
    if (str->hasLatin1Chars()) {
      return latin1Chars().append(str->latin1Chars(nogc), str->length());
    }
    if (!inflateChars()) {
      return false;
    }
  }
  return str->hasLatin1Chars()
             ? twoByteChars().append(str->latin1Chars(nogc), str->length())
             : twoByteChars().append(str->twoByteChars(nogc), str->length());
}

inline void StringBuffer::infallibleAppendSubstring(JSLinearString* base,
                                                    size_t off, size_t len) {
  MOZ_ASSERT(off + len <= base->length());
  MOZ_ASSERT_IF(base->hasTwoByteChars(), isTwoByte());

  JS::AutoCheckCannotGC nogc;
  if (base->hasLatin1Chars()) {
    infallibleAppend(base->latin1Chars(nogc) + off, len);
  } else {
    infallibleAppend(base->twoByteChars(nogc) + off, len);
  }
}

inline bool StringBuffer::appendSubstring(JSLinearString* base, size_t off,
                                          size_t len) {
  MOZ_ASSERT(off + len <= base->length());

  JS::AutoCheckCannotGC nogc;
  if (isLatin1()) {
    if (base->hasLatin1Chars()) {
      return latin1Chars().append(base->latin1Chars(nogc) + off, len);
    }
    if (!inflateChars()) {
      return false;
    }
  }
  return base->hasLatin1Chars()
             ? twoByteChars().append(base->latin1Chars(nogc) + off, len)
             : twoByteChars().append(base->twoByteChars(nogc) + off, len);
}

inline bool StringBuffer::appendSubstring(JSString* base, size_t off,
                                          size_t len) {
  MOZ_ASSERT(maybeCx_);

  JSLinearString* linear = base->ensureLinear(maybeCx_);
  if (!linear) {
    return false;
  }

  return appendSubstring(linear, off, len);
}

inline bool StringBuffer::append(JSString* str) {
  MOZ_ASSERT(maybeCx_);

  JSLinearString* linear = str->ensureLinear(maybeCx_);
  if (!linear) {
    return false;
  }

  return append(linear);
}

/* ES5 9.8 ToString, appending the result to the string buffer. */
extern bool ValueToStringBufferSlow(JSContext* cx, const Value& v,
                                    StringBuffer& sb);

inline bool ValueToStringBuffer(JSContext* cx, const Value& v,
                                StringBuffer& sb) {
  if (v.isString()) {
    return sb.append(v.toString());
  }

  return ValueToStringBufferSlow(cx, v, sb);
}

/* ES5 9.8 ToString for booleans, appending the result to the string buffer. */
inline bool BooleanToStringBuffer(bool b, StringBuffer& sb) {
  return b ? sb.append("true") : sb.append("false");
}

} /* namespace js */

#endif /* util_StringBuffer_h */