Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /js/src/frontend/TokenStream.cpp
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 3733 insertions, 0 deletions
diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp
new file mode 100644
index 0000000000..2134972bf4
--- /dev/null
+++ b/js/src/frontend/TokenStream.cpp
@@ -0,0 +1,3733 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// JS lexical scanner.
+
+#include "frontend/TokenStream.h"
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Attributes.h"
+#include "mozilla/Likely.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/MemoryChecking.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/Span.h"
+#include "mozilla/TemplateLib.h"
+#include "mozilla/TextUtils.h"
+#include "mozilla/Utf8.h"
+
+#include <algorithm>
+#include <iterator>
+#include <limits>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <type_traits>
+#include <utility>
+
+#include "jsnum.h"
+
+#include "frontend/FrontendContext.h"
+#include "frontend/Parser.h"
+#include "frontend/ParserAtom.h"
+#include "frontend/ReservedWords.h"
+#include "js/CharacterEncoding.h"  // JS::ConstUTF8CharsZ
+#include "js/ColumnNumber.h"  // JS::LimitedColumnNumberOneOrigin, JS::ColumnNumberOneOrigin, JS::TaggedColumnNumberOneOrigin
+#include "js/ErrorReport.h"   // JSErrorBase
+#include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*
+#include "js/Printf.h"                // JS_smprintf
+#include "js/RegExpFlags.h"           // JS::RegExpFlags
+#include "js/UniquePtr.h"
+#include "util/Text.h"
+#include "util/Unicode.h"
+#include "vm/FrameIter.h"  // js::{,NonBuiltin}FrameIter
+#include "vm/JSContext.h"
+#include "vm/Realm.h"
+
+using mozilla::AsciiAlphanumericToNumber;
+using mozilla::AssertedCast;
+using mozilla::DecodeOneUtf8CodePoint;
+using mozilla::IsAscii;
+using mozilla::IsAsciiAlpha;
+using mozilla::IsAsciiDigit;
+using mozilla::IsAsciiHexDigit;
+using mozilla::IsTrailingUnit;
+using mozilla::MakeScopeExit;
+using mozilla::Maybe;
+using mozilla::PointerRangeSize;
+using mozilla::Span;
+using mozilla::Utf8Unit;
+
+using JS::ReadOnlyCompileOptions;
+using JS::RegExpFlag;
+using JS::RegExpFlags;
+
+struct ReservedWordInfo {
+  const char* chars;  // C string with reserved word text
+  js::frontend::TokenKind tokentype;
+};
+
+static const ReservedWordInfo reservedWords[] = {
+#define RESERVED_WORD_INFO(word, name, type) {#word, js::frontend::type},
+    FOR_EACH_JAVASCRIPT_RESERVED_WORD(RESERVED_WORD_INFO)
+#undef RESERVED_WORD_INFO
+};
+
+enum class ReservedWordsIndex : size_t {
+#define ENTRY_(_1, NAME, _3) NAME,
+  FOR_EACH_JAVASCRIPT_RESERVED_WORD(ENTRY_)
+#undef ENTRY_
+};
+
+// Returns a ReservedWordInfo for the specified characters, or nullptr if the
+// string is not a reserved word.
+template <typename CharT>
+static const ReservedWordInfo* FindReservedWord(const CharT* s, size_t length) {
+  MOZ_ASSERT(length != 0);
+
+  size_t i;
+  const ReservedWordInfo* rw;
+  const char* chars;
+
+#define JSRW_LENGTH() length
+#define JSRW_AT(column) s[column]
+#define JSRW_GOT_MATCH(index) \
+  i = (index);                \
+  goto got_match;
+#define JSRW_TEST_GUESS(index) \
+  i = (index);                 \
+  goto test_guess;
+#define JSRW_NO_MATCH() goto no_match;
+#include "frontend/ReservedWordsGenerated.h"
+#undef JSRW_NO_MATCH
+#undef JSRW_TEST_GUESS
+#undef JSRW_GOT_MATCH
+#undef JSRW_AT
+#undef JSRW_LENGTH
+
+got_match:
+  return &reservedWords[i];
+
+test_guess:
+  rw = &reservedWords[i];
+  chars = rw->chars;
+  do {
+    if (*s++ != static_cast<unsigned char>(*chars++)) {
+      goto no_match;
+    }
+  } while (--length != 0);
+  return rw;
+
+no_match:
+  return nullptr;
+}
+
+template <>
+MOZ_ALWAYS_INLINE const ReservedWordInfo* FindReservedWord<Utf8Unit>(
+    const Utf8Unit* units, size_t length) {
+  return FindReservedWord(Utf8AsUnsignedChars(units), length);
+}
+
+static const ReservedWordInfo* FindReservedWord(
+    const js::frontend::TaggedParserAtomIndex atom) {
+  switch (atom.rawData()) {
+#define CASE_(_1, NAME, _3)                                           \
+  case js::frontend::TaggedParserAtomIndex::WellKnownRawData::NAME(): \
+    return &reservedWords[size_t(ReservedWordsIndex::NAME)];
+    FOR_EACH_JAVASCRIPT_RESERVED_WORD(CASE_)
+#undef CASE_
+  }
+
+  return nullptr;
+}
+
+template <typename CharT>
+static constexpr bool IsAsciiBinary(CharT c) {
+  using UnsignedCharT = std::make_unsigned_t<CharT>;
+  auto uc = static_cast<UnsignedCharT>(c);
+  return uc == '0' || uc == '1';
+}
+
+template <typename CharT>
+static constexpr bool IsAsciiOctal(CharT c) {
+  using UnsignedCharT = std::make_unsigned_t<CharT>;
+  auto uc = static_cast<UnsignedCharT>(c);
+  return '0' <= uc && uc <= '7';
+}
+
+template <typename CharT>
+static constexpr uint8_t AsciiOctalToNumber(CharT c) {
+  using UnsignedCharT = std::make_unsigned_t<CharT>;
+  auto uc = static_cast<UnsignedCharT>(c);
+  return uc - '0';
+}
+
+namespace js {
+
+namespace frontend {
+
+bool IsKeyword(TaggedParserAtomIndex atom) {
+  if (const ReservedWordInfo* rw = FindReservedWord(atom)) {
+    return TokenKindIsKeyword(rw->tokentype);
+  }
+
+  return false;
+}
+
+TokenKind ReservedWordTokenKind(TaggedParserAtomIndex name) {
+  if (const ReservedWordInfo* rw = FindReservedWord(name)) {
+    return rw->tokentype;
+  }
+
+  return TokenKind::Limit;
+}
+
+const char* ReservedWordToCharZ(TaggedParserAtomIndex name) {
+  if (const ReservedWordInfo* rw = FindReservedWord(name)) {
+    return ReservedWordToCharZ(rw->tokentype);
+  }
+
+  return nullptr;
+}
+
+const char* ReservedWordToCharZ(TokenKind tt) {
+  MOZ_ASSERT(tt != TokenKind::Name);
+  switch (tt) {
+#define EMIT_CASE(word, name, type) \
+  case type:                        \
+    return #word;
+    FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
+#undef EMIT_CASE
+    default:
+      MOZ_ASSERT_UNREACHABLE("Not a reserved word PropertyName.");
+  }
+  return nullptr;
+}
+
+TaggedParserAtomIndex TokenStreamAnyChars::reservedWordToPropertyName(
+    TokenKind tt) const {
+  MOZ_ASSERT(tt != TokenKind::Name);
+  switch (tt) {
+#define EMIT_CASE(word, name, type) \
+  case type:                        \
+    return TaggedParserAtomIndex::WellKnown::name();
+    FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
+#undef EMIT_CASE
+    default:
+      MOZ_ASSERT_UNREACHABLE("Not a reserved word TokenKind.");
+  }
+  return TaggedParserAtomIndex::null();
+}
+
+SourceCoords::SourceCoords(FrontendContext* fc, uint32_t initialLineNumber,
+                           uint32_t initialOffset)
+    : lineStartOffsets_(fc), initialLineNum_(initialLineNumber), lastIndex_(0) {
+  // This is actually necessary!  Removing it causes compile errors on
+  // GCC and clang.  You could try declaring this:
+  //
+  //   const uint32_t SourceCoords::MAX_PTR;
+  //
+  // which fixes the GCC/clang error, but causes bustage on Windows.  Sigh.
+  //
+  uint32_t maxPtr = MAX_PTR;
+
+  // The first line begins at buffer offset |initialOffset|.  MAX_PTR is the
+  // sentinel.  The appends cannot fail because |lineStartOffsets_| has
+  // statically-allocated elements.
+  MOZ_ASSERT(lineStartOffsets_.capacity() >= 2);
+  MOZ_ALWAYS_TRUE(lineStartOffsets_.reserve(2));
+  lineStartOffsets_.infallibleAppend(initialOffset);
+  lineStartOffsets_.infallibleAppend(maxPtr);
+}
+
+MOZ_ALWAYS_INLINE bool SourceCoords::add(uint32_t lineNum,
+                                         uint32_t lineStartOffset) {
+  uint32_t index = indexFromLineNumber(lineNum);
+  uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
+
+  MOZ_ASSERT(lineStartOffsets_[0] <= lineStartOffset);
+  MOZ_ASSERT(lineStartOffsets_[sentinelIndex] == MAX_PTR);
+
+  if (index == sentinelIndex) {
+    // We haven't seen this newline before.  Update lineStartOffsets_
+    // only if lineStartOffsets_.append succeeds, to keep sentinel.
+    // Otherwise return false to tell TokenStream about OOM.
+    uint32_t maxPtr = MAX_PTR;
+    if (!lineStartOffsets_.append(maxPtr)) {
+      static_assert(std::is_same_v<decltype(lineStartOffsets_.allocPolicy()),
+                                   TempAllocPolicy&>,
+                    "this function's caller depends on it reporting an "
+                    "error on failure, as TempAllocPolicy ensures");
+      return false;
+    }
+
+    lineStartOffsets_[index] = lineStartOffset;
+  } else {
+    // We have seen this newline before (and ungot it).  Do nothing (other
+    // than checking it hasn't mysteriously changed).
+    // This path can be executed after hitting OOM, so check index.
+    MOZ_ASSERT_IF(index < sentinelIndex,
+                  lineStartOffsets_[index] == lineStartOffset);
+  }
+  return true;
+}
+
+MOZ_ALWAYS_INLINE bool SourceCoords::fill(const SourceCoords& other) {
+  MOZ_ASSERT(lineStartOffsets_[0] == other.lineStartOffsets_[0]);
+  MOZ_ASSERT(lineStartOffsets_.back() == MAX_PTR);
+  MOZ_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
+
+  if (lineStartOffsets_.length() >= other.lineStartOffsets_.length()) {
+    return true;
+  }
+
+  uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
+  lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
+
+  for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length();
+       i++) {
+    if (!lineStartOffsets_.append(other.lineStartOffsets_[i])) {
+      return false;
+    }
+  }
+  return true;
+}
+
+MOZ_ALWAYS_INLINE uint32_t
+SourceCoords::indexFromOffset(uint32_t offset) const {
+  uint32_t iMin, iMax, iMid;
+
+  if (lineStartOffsets_[lastIndex_] <= offset) {
+    // If we reach here, offset is on a line the same as or higher than
+    // last time.  Check first for the +0, +1, +2 cases, because they
+    // typically cover 85--98% of cases.
+    if (offset < lineStartOffsets_[lastIndex_ + 1]) {
+      return lastIndex_;  // index is same as last time
+    }
+
+    // If we reach here, there must be at least one more entry (plus the
+    // sentinel).  Try it.
+    lastIndex_++;
+    if (offset < lineStartOffsets_[lastIndex_ + 1]) {
+      return lastIndex_;  // index is one higher than last time
+    }
+
+    // The same logic applies here.
+    lastIndex_++;
+    if (offset < lineStartOffsets_[lastIndex_ + 1]) {
+      return lastIndex_;  // index is two higher than last time
+    }
+
+    // No luck.  Oh well, we have a better-than-default starting point for
+    // the binary search.
+    iMin = lastIndex_ + 1;
+    MOZ_ASSERT(iMin <
+               lineStartOffsets_.length() - 1);  // -1 due to the sentinel
+
+  } else {
+    iMin = 0;
+  }
+
+  // This is a binary search with deferred detection of equality, which was
+  // marginally faster in this case than a standard binary search.
+  // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
+  // want one before that.
+  iMax = lineStartOffsets_.length() - 2;
+  while (iMax > iMin) {
+    iMid = iMin + (iMax - iMin) / 2;
+    if (offset >= lineStartOffsets_[iMid + 1]) {
+      iMin = iMid + 1;  // offset is above lineStartOffsets_[iMid]
+    } else {
+      iMax = iMid;  // offset is below or within lineStartOffsets_[iMid]
+    }
+  }
+
+  MOZ_ASSERT(iMax == iMin);
+  MOZ_ASSERT(lineStartOffsets_[iMin] <= offset);
+  MOZ_ASSERT(offset < lineStartOffsets_[iMin + 1]);
+
+  lastIndex_ = iMin;
+  return iMin;
+}
+
+SourceCoords::LineToken SourceCoords::lineToken(uint32_t offset) const {
+  return LineToken(indexFromOffset(offset), offset);
+}
+
+TokenStreamAnyChars::TokenStreamAnyChars(FrontendContext* fc,
+                                         const ReadOnlyCompileOptions& options,
+                                         StrictModeGetter* smg)
+    : fc(fc),
+      options_(options),
+      strictModeGetter_(smg),
+      filename_(options.filename()),
+      longLineColumnInfo_(fc),
+      srcCoords(fc, options.lineno, options.scriptSourceOffset),
+      lineno(options.lineno),
+      mutedErrors(options.mutedErrors()) {
+  // |isExprEnding| was initially zeroed: overwrite the true entries here.
+  isExprEnding[size_t(TokenKind::Comma)] = true;
+  isExprEnding[size_t(TokenKind::Semi)] = true;
+  isExprEnding[size_t(TokenKind::Colon)] = true;
+  isExprEnding[size_t(TokenKind::RightParen)] = true;
+  isExprEnding[size_t(TokenKind::RightBracket)] = true;
+  isExprEnding[size_t(TokenKind::RightCurly)] = true;
+}
+
+template <typename Unit>
+TokenStreamCharsBase<Unit>::TokenStreamCharsBase(FrontendContext* fc,
+                                                 ParserAtomsTable* parserAtoms,
+                                                 const Unit* units,
+                                                 size_t length,
+                                                 size_t startOffset)
+    : TokenStreamCharsShared(fc, parserAtoms),
+      sourceUnits(units, length, startOffset) {}
+
+bool FillCharBufferFromSourceNormalizingAsciiLineBreaks(CharBuffer& charBuffer,
+                                                        const char16_t* cur,
+                                                        const char16_t* end) {
+  MOZ_ASSERT(charBuffer.length() == 0);
+
+  while (cur < end) {
+    char16_t ch = *cur++;
+    if (ch == '\r') {
+      ch = '\n';
+      if (cur < end && *cur == '\n') {
+        cur++;
+      }
+    }
+
+    if (!charBuffer.append(ch)) {
+      return false;
+    }
+  }
+
+  MOZ_ASSERT(cur == end);
+  return true;
+}
+
+bool FillCharBufferFromSourceNormalizingAsciiLineBreaks(CharBuffer& charBuffer,
+                                                        const Utf8Unit* cur,
+                                                        const Utf8Unit* end) {
+  MOZ_ASSERT(charBuffer.length() == 0);
+
+  while (cur < end) {
+    Utf8Unit unit = *cur++;
+    if (MOZ_LIKELY(IsAscii(unit))) {
+      char16_t ch = unit.toUint8();
+      if (ch == '\r') {
+        ch = '\n';
+        if (cur < end && *cur == Utf8Unit('\n')) {
+          cur++;
+        }
+      }
+
+      if (!charBuffer.append(ch)) {
+        return false;
+      }
+
+      continue;
+    }
+
+    Maybe<char32_t> ch = DecodeOneUtf8CodePoint(unit, &cur, end);
+    MOZ_ASSERT(ch.isSome(),
+               "provided source text should already have been validated");
+
+    if (!AppendCodePointToCharBuffer(charBuffer, ch.value())) {
+      return false;
+    }
+  }
+
+  MOZ_ASSERT(cur == end);
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+TokenStreamSpecific<Unit, AnyCharsAccess>::TokenStreamSpecific(
+    FrontendContext* fc, ParserAtomsTable* parserAtoms,
+    const ReadOnlyCompileOptions& options, const Unit* units, size_t length)
+    : TokenStreamChars<Unit, AnyCharsAccess>(fc, parserAtoms, units, length,
+                                             options.scriptSourceOffset) {}
+
+bool TokenStreamAnyChars::checkOptions() {
+  // Constrain starting columns to where they will saturate.
+  if (options().column.oneOriginValue() >
+      JS::LimitedColumnNumberOneOrigin::Limit) {
+    reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
+    return false;
+  }
+
+  return true;
+}
+
+void TokenStreamAnyChars::reportErrorNoOffset(unsigned errorNumber, ...) const {
+  va_list args;
+  va_start(args, errorNumber);
+
+  reportErrorNoOffsetVA(errorNumber, &args);
+
+  va_end(args);
+}
+
+void TokenStreamAnyChars::reportErrorNoOffsetVA(unsigned errorNumber,
+                                                va_list* args) const {
+  ErrorMetadata metadata;
+  computeErrorMetadataNoOffset(&metadata);
+
+  ReportCompileErrorLatin1VA(fc, std::move(metadata), nullptr, errorNumber,
+                             args);
+}
+
+[[nodiscard]] MOZ_ALWAYS_INLINE bool
+TokenStreamAnyChars::internalUpdateLineInfoForEOL(uint32_t lineStartOffset) {
+  prevLinebase = linebase;
+  linebase = lineStartOffset;
+  lineno++;
+
+  // On overflow, report error.
+  if (MOZ_UNLIKELY(!lineno)) {
+    reportErrorNoOffset(JSMSG_BAD_LINE_NUMBER);
+    return false;
+  }
+
+  return srcCoords.add(lineno, linebase);
+}
+
+#ifdef DEBUG
+
+template <>
+inline void SourceUnits<char16_t>::assertNextCodePoint(
+    const PeekedCodePoint<char16_t>& peeked) {
+  char32_t c = peeked.codePoint();
+  if (c < unicode::NonBMPMin) {
+    MOZ_ASSERT(peeked.lengthInUnits() == 1);
+    MOZ_ASSERT(ptr[0] == c);
+  } else {
+    MOZ_ASSERT(peeked.lengthInUnits() == 2);
+    char16_t lead, trail;
+    unicode::UTF16Encode(c, &lead, &trail);
+    MOZ_ASSERT(ptr[0] == lead);
+    MOZ_ASSERT(ptr[1] == trail);
+  }
+}
+
+template <>
+inline void SourceUnits<Utf8Unit>::assertNextCodePoint(
+    const PeekedCodePoint<Utf8Unit>& peeked) {
+  char32_t c = peeked.codePoint();
+
+  // This is all roughly indulgence of paranoia only for assertions, so the
+  // reimplementation of UTF-8 encoding a code point is (we think) a virtue.
+  uint8_t expectedUnits[4] = {};
+  if (c < 0x80) {
+    expectedUnits[0] = AssertedCast<uint8_t>(c);
+  } else if (c < 0x800) {
+    expectedUnits[0] = 0b1100'0000 | (c >> 6);
+    expectedUnits[1] = 0b1000'0000 | (c & 0b11'1111);
+  } else if (c < 0x10000) {
+    expectedUnits[0] = 0b1110'0000 | (c >> 12);
+    expectedUnits[1] = 0b1000'0000 | ((c >> 6) & 0b11'1111);
+    expectedUnits[2] = 0b1000'0000 | (c & 0b11'1111);
+  } else {
+    expectedUnits[0] = 0b1111'0000 | (c >> 18);
+    expectedUnits[1] = 0b1000'0000 | ((c >> 12) & 0b11'1111);
+    expectedUnits[2] = 0b1000'0000 | ((c >> 6) & 0b11'1111);
+    expectedUnits[3] = 0b1000'0000 | (c & 0b11'1111);
+  }
+
+  MOZ_ASSERT(peeked.lengthInUnits() <= 4);
+  for (uint8_t i = 0; i < peeked.lengthInUnits(); i++) {
+    MOZ_ASSERT(expectedUnits[i] == ptr[i].toUint8());
+  }
+}
+
+#endif  // DEBUG
+
+static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
+    const Utf8Unit** ptr, const Utf8Unit* limit) {
+  MOZ_ASSERT(*ptr <= limit);
+
+  // |limit| is a code point boundary.
+  if (MOZ_UNLIKELY(*ptr == limit)) {
+    return;
+  }
+
+  // Otherwise rewind past trailing units to the start of the code point.
+#ifdef DEBUG
+  size_t retracted = 0;
+#endif
+  while (MOZ_UNLIKELY(IsTrailingUnit((*ptr)[0]))) {
+    --*ptr;
+#ifdef DEBUG
+    retracted++;
+#endif
+  }
+
+  MOZ_ASSERT(retracted < 4,
+             "the longest UTF-8 code point is four units, so this should never "
+             "retract more than three units");
+}
+
+static MOZ_ALWAYS_INLINE void RetractPointerToCodePointBoundary(
+    const char16_t** ptr, const char16_t* limit) {
+  MOZ_ASSERT(*ptr <= limit);
+
+  // |limit| is a code point boundary.
+  if (MOZ_UNLIKELY(*ptr == limit)) {
+    return;
+  }
+
+  // Otherwise the pointer must be retracted by one iff it splits a two-unit
+  // code point.
+  if (MOZ_UNLIKELY(unicode::IsTrailSurrogate((*ptr)[0]))) {
+    // Outside test suites testing garbage WTF-16, it's basically guaranteed
+    // here that |(*ptr)[-1] (*ptr)[0]| is a surrogate pair.
+    if (MOZ_LIKELY(unicode::IsLeadSurrogate((*ptr)[-1]))) {
+      --*ptr;
+    }
+  }
+}
+
+template <typename Unit>
+JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffset(
+    const LineToken lineToken, const uint32_t offset,
+    const SourceUnits<Unit>& sourceUnits) const {
+  lineToken.assertConsistentOffset(offset);
+
+  const uint32_t start = srcCoords.lineStart(lineToken);
+  const uint32_t offsetInLine = offset - start;
+
+  if constexpr (std::is_same_v<Unit, char16_t>) {
+    // Column offset is in UTF-16 code units.
+    return JS::ColumnNumberUnsignedOffset(offsetInLine);
+  }
+
+  return computeColumnOffsetForUTF8(lineToken, offset, start, offsetInLine,
+                                    sourceUnits);
+}
+
+template <typename Unit>
+JS::ColumnNumberUnsignedOffset TokenStreamAnyChars::computeColumnOffsetForUTF8(
+    const LineToken lineToken, const uint32_t offset, const uint32_t start,
+    const uint32_t offsetInLine, const SourceUnits<Unit>& sourceUnits) const {
+  const uint32_t line = lineNumber(lineToken);
+
+  // Reset the previous offset/column number offset cache for this line, if the
+  // previous lookup wasn't on this line.
+  if (line != lineOfLastColumnComputation_) {
+    lineOfLastColumnComputation_ = line;
+    lastChunkVectorForLine_ = nullptr;
+    lastOffsetOfComputedColumn_ = start;
+    lastComputedColumnOffset_ = JS::ColumnNumberUnsignedOffset::zero();
+  }
+
+  // Compute and return the final column number offset from a partially
+  // calculated offset/column number offset, using the last-cached
+  // offset/column number offset if they're more optimal.
+  auto OffsetFromPartial =
+      [this, offset, &sourceUnits](
+          uint32_t partialOffset,
+          JS::ColumnNumberUnsignedOffset partialColumnOffset,
+          UnitsType unitsType) {
+        MOZ_ASSERT(partialOffset <= offset);
+
+        // If the last lookup on this line was closer to |offset|, use it.
+        if (partialOffset < this->lastOffsetOfComputedColumn_ &&
+            this->lastOffsetOfComputedColumn_ <= offset) {
+          partialOffset = this->lastOffsetOfComputedColumn_;
+          partialColumnOffset = this->lastComputedColumnOffset_;
+        }
+
+        const Unit* begin = sourceUnits.codeUnitPtrAt(partialOffset);
+        const Unit* end = sourceUnits.codeUnitPtrAt(offset);
+
+        size_t offsetDelta =
+            AssertedCast<uint32_t>(PointerRangeSize(begin, end));
+        partialOffset += offsetDelta;
+
+        if (unitsType == UnitsType::GuaranteedSingleUnit) {
+          MOZ_ASSERT(unicode::CountUTF16CodeUnits(begin, end) == offsetDelta,
+                     "guaranteed-single-units also guarantee pointer distance "
+                     "equals UTF-16 code unit count");
+          partialColumnOffset += JS::ColumnNumberUnsignedOffset(offsetDelta);
+        } else {
+          partialColumnOffset += JS::ColumnNumberUnsignedOffset(
+              AssertedCast<uint32_t>(unicode::CountUTF16CodeUnits(begin, end)));
+        }
+
+        this->lastOffsetOfComputedColumn_ = partialOffset;
+        this->lastComputedColumnOffset_ = partialColumnOffset;
+        return partialColumnOffset;
+      };
+
+  // We won't add an entry to |longLineColumnInfo_| for lines where the maximum
+  // column has offset less than this value.  The most common (non-minified)
+  // long line length is likely 80ch, maybe 100ch, so we use that, rounded up to
+  // the next power of two for efficient division/multiplication below.
+  constexpr uint32_t ColumnChunkLength = mozilla::tl::RoundUpPow2<100>::value;
+
+  // The index within any associated |Vector<ChunkInfo>| of |offset|'s chunk.
+  const uint32_t chunkIndex = offsetInLine / ColumnChunkLength;
+  if (chunkIndex == 0) {
+    // We don't know from an |offset| in the zeroth chunk that this line is even
+    // long.  First-chunk info is mostly useless, anyway -- we have |start|
+    // already.  So if we have *easy* access to that zeroth chunk, use it --
+    // otherwise just count pessimally.  (This will still benefit from caching
+    // the last column/offset for computations for successive offsets, so it's
+    // not *always* worst-case.)
+    UnitsType unitsType;
+    if (lastChunkVectorForLine_ && lastChunkVectorForLine_->length() > 0) {
+      MOZ_ASSERT((*lastChunkVectorForLine_)[0].columnOffset() ==
+                 JS::ColumnNumberUnsignedOffset::zero());
+      unitsType = (*lastChunkVectorForLine_)[0].unitsType();
+    } else {
+      unitsType = UnitsType::PossiblyMultiUnit;
+    }
+
+    return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(),
+                             unitsType);
+  }
+
+  // If this line has no chunk vector yet, insert one in the hash map.  (The
+  // required index is allocated and filled further down.)
+  if (!lastChunkVectorForLine_) {
+    auto ptr = longLineColumnInfo_.lookupForAdd(line);
+    if (!ptr) {
+      // This could rehash and invalidate a cached vector pointer, but the outer
+      // condition means we don't have a cached pointer.
+      if (!longLineColumnInfo_.add(ptr, line, Vector<ChunkInfo>(fc))) {
+        // In case of OOM, just count columns from the start of the line.
+        fc->recoverFromOutOfMemory();
+        return OffsetFromPartial(start, JS::ColumnNumberUnsignedOffset::zero(),
+                                 UnitsType::PossiblyMultiUnit);
+      }
+    }
+
+    // Note that adding elements to this vector won't invalidate this pointer.
+    lastChunkVectorForLine_ = &ptr->value();
+  }
+
+  const Unit* const limit = sourceUnits.codeUnitPtrAt(offset);
+
+  auto RetractedOffsetOfChunk = [
+#ifdef DEBUG
+                                    this,
+#endif
+                                    start, limit,
+                                    &sourceUnits](uint32_t index) {
+    MOZ_ASSERT(index < this->lastChunkVectorForLine_->length());
+
+    uint32_t naiveOffset = start + index * ColumnChunkLength;
+    const Unit* naivePtr = sourceUnits.codeUnitPtrAt(naiveOffset);
+
+    const Unit* actualPtr = naivePtr;
+    RetractPointerToCodePointBoundary(&actualPtr, limit);
+
+#ifdef DEBUG
+    if ((*this->lastChunkVectorForLine_)[index].unitsType() ==
+        UnitsType::GuaranteedSingleUnit) {
+      MOZ_ASSERT(naivePtr == actualPtr, "miscomputed unitsType value");
+    }
+#endif
+
+    return naiveOffset - PointerRangeSize(actualPtr, naivePtr);
+  };
+
+  uint32_t partialOffset;
+  JS::ColumnNumberUnsignedOffset partialColumnOffset;
+  UnitsType unitsType;
+
+  auto entriesLen = AssertedCast<uint32_t>(lastChunkVectorForLine_->length());
+  if (chunkIndex < entriesLen) {
+    // We've computed the chunk |offset| resides in.  Compute the column number
+    // from the chunk.
+    partialOffset = RetractedOffsetOfChunk(chunkIndex);
+    partialColumnOffset = (*lastChunkVectorForLine_)[chunkIndex].columnOffset();
+
+    // This is exact if |chunkIndex| isn't the last chunk.
+    unitsType = (*lastChunkVectorForLine_)[chunkIndex].unitsType();
+
+    // Otherwise the last chunk is pessimistically assumed to contain multi-unit
+    // code points because we haven't fully examined its contents yet -- they
+    // may not have been tokenized yet, they could contain encoding errors, or
+    // they might not even exist.
+    MOZ_ASSERT_IF(chunkIndex == entriesLen - 1,
+                  (*lastChunkVectorForLine_)[chunkIndex].unitsType() ==
+                      UnitsType::PossiblyMultiUnit);
+  } else {
+    // Extend the vector from its last entry or the start of the line.  (This is
+    // also a suitable partial start point if we must recover from OOM.)
+    if (entriesLen > 0) {
+      partialOffset = RetractedOffsetOfChunk(entriesLen - 1);
+      partialColumnOffset =
+          (*lastChunkVectorForLine_)[entriesLen - 1].columnOffset();
+    } else {
+      partialOffset = start;
+      partialColumnOffset = JS::ColumnNumberUnsignedOffset::zero();
+    }
+
+    if (!lastChunkVectorForLine_->reserve(chunkIndex + 1)) {
+      // As earlier, just start from the greatest offset/column in case of OOM.
+      fc->recoverFromOutOfMemory();
+      return OffsetFromPartial(partialOffset, partialColumnOffset,
+                               UnitsType::PossiblyMultiUnit);
+    }
+
+    // OOM is no longer possible now.  \o/
+
+    // The vector always begins with the column of the line start, i.e. zero,
+    // with chunk units pessimally assumed not single-unit.
+    if (entriesLen == 0) {
+      lastChunkVectorForLine_->infallibleAppend(
+          ChunkInfo(JS::ColumnNumberUnsignedOffset::zero(),
+                    UnitsType::PossiblyMultiUnit));
+      entriesLen++;
+    }
+
+    do {
+      const Unit* const begin = sourceUnits.codeUnitPtrAt(partialOffset);
+      const Unit* chunkLimit = sourceUnits.codeUnitPtrAt(
+          start + std::min(entriesLen++ * ColumnChunkLength, offsetInLine));
+
+      MOZ_ASSERT(begin < chunkLimit);
+      MOZ_ASSERT(chunkLimit <= limit);
+
+      static_assert(
+          ColumnChunkLength > SourceUnitTraits<Unit>::maxUnitsLength - 1,
+          "any retraction below is assumed to never underflow to the "
+          "preceding chunk, even for the longest code point");
+
+      // Prior tokenizing ensured that [begin, limit) is validly encoded, and
+      // |begin < chunkLimit|, so any retraction here can't underflow.
+      RetractPointerToCodePointBoundary(&chunkLimit, limit);
+
+      MOZ_ASSERT(begin < chunkLimit);
+      MOZ_ASSERT(chunkLimit <= limit);
+
+      size_t numUnits = PointerRangeSize(begin, chunkLimit);
+      size_t numUTF16CodeUnits =
+          unicode::CountUTF16CodeUnits(begin, chunkLimit);
+
+      // If this chunk (which will become non-final at the end of the loop) is
+      // all single-unit code points, annotate the chunk accordingly.
+      if (numUnits == numUTF16CodeUnits) {
+        lastChunkVectorForLine_->back().guaranteeSingleUnits();
+      }
+
+      partialOffset += numUnits;
+      partialColumnOffset += JS::ColumnNumberUnsignedOffset(numUTF16CodeUnits);
+
+      lastChunkVectorForLine_->infallibleEmplaceBack(
+          partialColumnOffset, UnitsType::PossiblyMultiUnit);
+    } while (entriesLen < chunkIndex + 1);
+
+    // We're at a spot in the current final chunk, and final chunks never have
+    // complete units information, so be pessimistic.
+    unitsType = UnitsType::PossiblyMultiUnit;
+  }
+
+  return OffsetFromPartial(partialOffset, partialColumnOffset, unitsType);
+}
+
+template <typename Unit, class AnyCharsAccess>
+JS::LimitedColumnNumberOneOrigin
+GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeColumn(
+    LineToken lineToken, uint32_t offset) const {
+  lineToken.assertConsistentOffset(offset);
+
+  const TokenStreamAnyChars& anyChars = anyCharsAccess();
+
+  JS::ColumnNumberUnsignedOffset columnOffset =
+      anyChars.computeColumnOffset(lineToken, offset, this->sourceUnits);
+
+  if (!lineToken.isFirstLine()) {
+    return JS::LimitedColumnNumberOneOrigin::fromUnlimited(
+        JS::ColumnNumberOneOrigin() + columnOffset);
+  }
+
+  if (1 + columnOffset.value() > JS::LimitedColumnNumberOneOrigin::Limit) {
+    return JS::LimitedColumnNumberOneOrigin::limit();
+  }
+
+  return JS::LimitedColumnNumberOneOrigin::fromUnlimited(
+      (anyChars.options_.column + columnOffset).oneOriginValue());
+}
+
+template <typename Unit, class AnyCharsAccess>
+void GeneralTokenStreamChars<Unit, AnyCharsAccess>::computeLineAndColumn(
+    uint32_t offset, uint32_t* line,
+    JS::LimitedColumnNumberOneOrigin* column) const {
+  const TokenStreamAnyChars& anyChars = anyCharsAccess();
+
+  auto lineToken = anyChars.lineToken(offset);
+  *line = anyChars.lineNumber(lineToken);
+  *column = computeColumn(lineToken, offset);
+}
+
+template <class AnyCharsAccess>
+MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::internalEncodingError(
+    uint8_t relevantUnits, unsigned errorNumber, ...) {
+  va_list args;
+  va_start(args, errorNumber);
+
+  do {
+    size_t offset = this->sourceUnits.offset();
+
+    ErrorMetadata err;
+
+    TokenStreamAnyChars& anyChars = anyCharsAccess();
+
+    bool canAddLineOfContext = fillExceptingContext(&err, offset);
+    if (canAddLineOfContext) {
+      if (!internalComputeLineOfContext(&err, offset)) {
+        break;
+      }
+
+      // As this is an encoding error, the computed window-end must be
+      // identical to the location of the error -- any further on and the
+      // window would contain invalid Unicode.
+      MOZ_ASSERT_IF(err.lineOfContext != nullptr,
+                    err.lineLength == err.tokenOffset);
+    }
+
+    auto notes = MakeUnique<JSErrorNotes>();
+    if (!notes) {
+      ReportOutOfMemory(anyChars.fc);
+      break;
+    }
+
+    // The largest encoding of a UTF-8 code point is 4 units.  (Encoding an
+    // obsolete 5- or 6-byte code point will complain only about a bad lead
+    // code unit.)
+    constexpr size_t MaxWidth = sizeof("0xHH 0xHH 0xHH 0xHH");
+
+    MOZ_ASSERT(relevantUnits > 0);
+
+    char badUnitsStr[MaxWidth];
+    char* ptr = badUnitsStr;
+    while (relevantUnits > 0) {
+      byteToString(this->sourceUnits.getCodeUnit().toUint8(), ptr);
+      ptr[4] = ' ';
+
+      ptr += 5;
+      relevantUnits--;
+    }
+
+    ptr[-1] = '\0';
+
+    uint32_t line;
+    JS::LimitedColumnNumberOneOrigin column;
+    computeLineAndColumn(offset, &line, &column);
+
+    if (!notes->addNoteASCII(anyChars.fc, anyChars.getFilename().c_str(), 0,
+                             line, JS::ColumnNumberOneOrigin(column),
+                             GetErrorMessage, nullptr, JSMSG_BAD_CODE_UNITS,
+                             badUnitsStr)) {
+      break;
+    }
+
+    ReportCompileErrorLatin1VA(anyChars.fc, std::move(err), std::move(notes),
+                               errorNumber, &args);
+  } while (false);
+
+  va_end(args);
+}
+
+template <class AnyCharsAccess>
+MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badLeadUnit(
+    Utf8Unit lead) {
+  uint8_t leadValue = lead.toUint8();
+
+  char leadByteStr[5];
+  byteToTerminatedString(leadValue, leadByteStr);
+
+  internalEncodingError(1, JSMSG_BAD_LEADING_UTF8_UNIT, leadByteStr);
+}
+
+template <class AnyCharsAccess>
+MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::notEnoughUnits(
+    Utf8Unit lead, uint8_t remaining, uint8_t required) {
+  uint8_t leadValue = lead.toUint8();
+
+  MOZ_ASSERT(required == 2 || required == 3 || required == 4);
+  MOZ_ASSERT(remaining < 4);
+  MOZ_ASSERT(remaining < required);
+
+  char leadByteStr[5];
+  byteToTerminatedString(leadValue, leadByteStr);
+
+  // |toHexChar| produces the desired decimal numbers for values < 4.
+  const char expectedStr[] = {toHexChar(required - 1), '\0'};
+  const char actualStr[] = {toHexChar(remaining - 1), '\0'};
+
+  internalEncodingError(remaining, JSMSG_NOT_ENOUGH_CODE_UNITS, leadByteStr,
+                        expectedStr, required == 2 ? "" : "s", actualStr,
+                        remaining == 2 ? " was" : "s were");
+}
+
+template <class AnyCharsAccess>
+MOZ_COLD void TokenStreamChars<Utf8Unit, AnyCharsAccess>::badTrailingUnit(
+    uint8_t unitsObserved) {
+  Utf8Unit badUnit =
+      this->sourceUnits.addressOfNextCodeUnit()[unitsObserved - 1];
+
+  char badByteStr[5];
+  byteToTerminatedString(badUnit.toUint8(), badByteStr);
+
+  internalEncodingError(unitsObserved, JSMSG_BAD_TRAILING_UTF8_UNIT,
+                        badByteStr);
+}
+
+template <class AnyCharsAccess>
+MOZ_COLD void
+TokenStreamChars<Utf8Unit, AnyCharsAccess>::badStructurallyValidCodePoint(
+    char32_t codePoint, uint8_t codePointLength, const char* reason) {
+  // Construct a string like "0x203D" (including null terminator) to include
+  // in the error message.  Write the string end-to-start from end to start
+  // of an adequately sized |char| array, shifting least significant nibbles
+  // off the number and writing the corresponding hex digits until done, then
+  // prefixing with "0x".  |codePointStr| points at the incrementally
+  // computed string, within |codePointCharsArray|'s bounds.
+
+  // 0x1F'FFFF is the maximum value that can fit in 3+6+6+6 unconstrained
+  // bits in a four-byte UTF-8 code unit sequence.
+  constexpr size_t MaxHexSize = sizeof(
+      "0x1F"
+      "FFFF");  // including '\0'
+  char codePointCharsArray[MaxHexSize];
+
+  char* codePointStr = std::end(codePointCharsArray);
+  *--codePointStr = '\0';
+
+  // Note that by do-while looping here rather than while-looping, this
+  // writes a '0' when |codePoint == 0|.
+  do {
+    MOZ_ASSERT(codePointCharsArray < codePointStr);
+    *--codePointStr = toHexChar(codePoint & 0xF);
+    codePoint >>= 4;
+  } while (codePoint);
+
+  MOZ_ASSERT(codePointCharsArray + 2 <= codePointStr);
+  *--codePointStr = 'x';
+  *--codePointStr = '0';
+
+  internalEncodingError(codePointLength, JSMSG_FORBIDDEN_UTF8_CODE_POINT,
+                        codePointStr, reason);
+}
+
+template <class AnyCharsAccess>
+[[nodiscard]] bool
+TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePointDontNormalize(
+    Utf8Unit lead, char32_t* codePoint) {
+  auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };
+
+  auto onNotEnoughUnits = [this, &lead](uint8_t remaining, uint8_t required) {
+    this->notEnoughUnits(lead, remaining, required);
+  };
+
+  auto onBadTrailingUnit = [this](uint8_t unitsObserved) {
+    this->badTrailingUnit(unitsObserved);
+  };
+
+  auto onBadCodePoint = [this](char32_t badCodePoint, uint8_t unitsObserved) {
+    this->badCodePoint(badCodePoint, unitsObserved);
+  };
+
+  auto onNotShortestForm = [this](char32_t badCodePoint,
+                                  uint8_t unitsObserved) {
+    this->notShortestForm(badCodePoint, unitsObserved);
+  };
+
+  // If a valid code point is decoded, this function call consumes its code
+  // units.  If not, it ungets the lead code unit and invokes the right error
+  // handler, so on failure we must immediately return false.
+  SourceUnitsIterator iter(this->sourceUnits);
+  Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePointInline(
+      lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,
+      onBadTrailingUnit, onBadCodePoint, onNotShortestForm);
+  if (maybeCodePoint.isNothing()) {
+    return false;
+  }
+
+  *codePoint = maybeCodePoint.value();
+  return true;
+}
+
+template <class AnyCharsAccess>
+bool TokenStreamChars<char16_t, AnyCharsAccess>::getNonAsciiCodePoint(
+    int32_t lead, char32_t* codePoint) {
+  MOZ_ASSERT(lead != EOF);
+  MOZ_ASSERT(!isAsciiCodePoint(lead),
+             "ASCII code unit/point must be handled separately");
+  MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),
+             "getNonAsciiCodePoint called incorrectly");
+
+  // The code point is usually |lead|: overwrite later if needed.
+  *codePoint = AssertedCast<char32_t>(lead);
+
+  // ECMAScript specifically requires that unpaired UTF-16 surrogates be
+  // treated as the corresponding code point and not as an error.  See
+  // <https://tc39.github.io/ecma262/#sec-ecmascript-language-types-string-type>.
+  // Thus this function does not consider any sequence of 16-bit numbers to
+  // be intrinsically in error.
+
+  // Dispense with single-unit code points and lone trailing surrogates.
+  if (MOZ_LIKELY(!unicode::IsLeadSurrogate(lead))) {
+    if (MOZ_UNLIKELY(lead == unicode::LINE_SEPARATOR ||
+                     lead == unicode::PARA_SEPARATOR)) {
+      if (!updateLineInfoForEOL()) {
+#ifdef DEBUG
+        // Assign to a sentinel value to hopefully cause errors.
+        *codePoint = std::numeric_limits<char32_t>::max();
+#endif
+        MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
+        return false;
+      }
+
+      *codePoint = '\n';
+    } else {
+      MOZ_ASSERT(!IsLineTerminator(*codePoint));
+    }
+
+    return true;
+  }
+
+  // Also handle a lead surrogate not paired with a trailing surrogate.
+  if (MOZ_UNLIKELY(
+          this->sourceUnits.atEnd() ||
+          !unicode::IsTrailSurrogate(this->sourceUnits.peekCodeUnit()))) {
+    MOZ_ASSERT(!IsLineTerminator(*codePoint));
+    return true;
+  }
+
+  // Otherwise we have a multi-unit code point.
+  *codePoint = unicode::UTF16Decode(lead, this->sourceUnits.getCodeUnit());
+  MOZ_ASSERT(!IsLineTerminator(*codePoint));
+  return true;
+}
+
+template <class AnyCharsAccess>
+bool TokenStreamChars<Utf8Unit, AnyCharsAccess>::getNonAsciiCodePoint(
+    int32_t unit, char32_t* codePoint) {
+  MOZ_ASSERT(unit != EOF);
+  MOZ_ASSERT(!isAsciiCodePoint(unit),
+             "ASCII code unit/point must be handled separately");
+
+  Utf8Unit lead = Utf8Unit(static_cast<unsigned char>(unit));
+  MOZ_ASSERT(lead == this->sourceUnits.previousCodeUnit(),
+             "getNonAsciiCodePoint called incorrectly");
+
+  auto onBadLeadUnit = [this, &lead]() { this->badLeadUnit(lead); };
+
+  auto onNotEnoughUnits = [this, &lead](uint_fast8_t remaining,
+                                        uint_fast8_t required) {
+    this->notEnoughUnits(lead, remaining, required);
+  };
+
+  auto onBadTrailingUnit = [this](uint_fast8_t unitsObserved) {
+    this->badTrailingUnit(unitsObserved);
+  };
+
+  auto onBadCodePoint = [this](char32_t badCodePoint,
+                               uint_fast8_t unitsObserved) {
+    this->badCodePoint(badCodePoint, unitsObserved);
+  };
+
+  auto onNotShortestForm = [this](char32_t badCodePoint,
+                                  uint_fast8_t unitsObserved) {
+    this->notShortestForm(badCodePoint, unitsObserved);
+  };
+
+  // This consumes the full, valid code point or ungets |lead| and calls the
+  // appropriate error functor on failure.
+  SourceUnitsIterator iter(this->sourceUnits);
+  Maybe<char32_t> maybeCodePoint = DecodeOneUtf8CodePoint(
+      lead, &iter, SourceUnitsEnd(), onBadLeadUnit, onNotEnoughUnits,
+      onBadTrailingUnit, onBadCodePoint, onNotShortestForm);
+  if (maybeCodePoint.isNothing()) {
+    return false;
+  }
+
+  char32_t cp = maybeCodePoint.value();
+  if (MOZ_UNLIKELY(cp == unicode::LINE_SEPARATOR ||
+                   cp == unicode::PARA_SEPARATOR)) {
+    if (!updateLineInfoForEOL()) {
+#ifdef DEBUG
+      // Assign to a sentinel value to hopefully cause errors.
+      *codePoint = std::numeric_limits<char32_t>::max();
+#endif
+      MOZ_MAKE_MEM_UNDEFINED(codePoint, sizeof(*codePoint));
+      return false;
+    }
+
+    *codePoint = '\n';
+  } else {
+    MOZ_ASSERT(!IsLineTerminator(cp));
+    *codePoint = cp;
+  }
+
+  return true;
+}
+
+template <>
+size_t SourceUnits<char16_t>::findWindowStart(size_t offset) const {
+  // This is JS's understanding of UTF-16 that allows lone surrogates, so
+  // we have to exclude lone surrogates from [windowStart, offset) ourselves.
+
+  const char16_t* const earliestPossibleStart = codeUnitPtrAt(startOffset_);
+
+  const char16_t* const initial = codeUnitPtrAt(offset);
+  const char16_t* p = initial;
+
+  auto HalfWindowSize = [&p, &initial]() {
+    return PointerRangeSize(p, initial);
+  };
+
+  while (true) {
+    MOZ_ASSERT(earliestPossibleStart <= p);
+    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
+    if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {
+      break;
+    }
+
+    char16_t c = p[-1];
+
+    // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in
+    // string and template literals.  These code points do affect line and
+    // column coordinates, even as they encode their literal values.
+    if (IsLineTerminator(c)) {
+      break;
+    }
+
+    // Don't allow invalid UTF-16 in pre-context.  (Current users don't
+    // require this, and this behavior isn't currently imposed on
+    // pre-context, but these facts might change someday.)
+
+    if (MOZ_UNLIKELY(unicode::IsLeadSurrogate(c))) {
+      break;
+    }
+
+    // Optimistically include the code unit, reverting below if needed.
+    p--;
+
+    // If it's not a surrogate at all, keep going.
+    if (MOZ_LIKELY(!unicode::IsTrailSurrogate(c))) {
+      continue;
+    }
+
+    // Stop if we don't have a usable surrogate pair.
+    if (HalfWindowSize() >= WindowRadius ||
+        p <= earliestPossibleStart ||      // trail surrogate at low end
+        !unicode::IsLeadSurrogate(p[-1]))  // no paired lead surrogate
+    {
+      p++;
+      break;
+    }
+
+    p--;
+  }
+
+  MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
+  return offset - HalfWindowSize();
+}
+
+template <>
+size_t SourceUnits<Utf8Unit>::findWindowStart(size_t offset) const {
+  // |offset| must be the location of the error or somewhere before it, so we
+  // know preceding data is valid UTF-8.
+
+  const Utf8Unit* const earliestPossibleStart = codeUnitPtrAt(startOffset_);
+
+  const Utf8Unit* const initial = codeUnitPtrAt(offset);
+  const Utf8Unit* p = initial;
+
+  auto HalfWindowSize = [&p, &initial]() {
+    return PointerRangeSize(p, initial);
+  };
+
+  while (true) {
+    MOZ_ASSERT(earliestPossibleStart <= p);
+    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
+    if (p <= earliestPossibleStart || HalfWindowSize() >= WindowRadius) {
+      break;
+    }
+
+    // Peek backward for a line break, and only decrement if there is none.
+    uint8_t prev = p[-1].toUint8();
+
+    // First check for the ASCII LineTerminators.
+    if (prev == '\r' || prev == '\n') {
+      break;
+    }
+
+    // Now check for the non-ASCII LineTerminators U+2028 LINE SEPARATOR
+    // (0xE2 0x80 0xA8) and U+2029 PARAGRAPH (0xE2 0x80 0xA9).  If there
+    // aren't three code units available, some comparison here will fail
+    // before we'd underflow.
+    if (MOZ_UNLIKELY((prev == 0xA8 || prev == 0xA9) &&
+                     p[-2].toUint8() == 0x80 && p[-3].toUint8() == 0xE2)) {
+      break;
+    }
+
+    // Rewind over the non-LineTerminator.  This can't underflow
+    // |earliestPossibleStart| because it begins a code point.
+    while (IsTrailingUnit(*--p)) {
+      continue;
+    }
+
+    MOZ_ASSERT(earliestPossibleStart <= p);
+
+    // But if we underflowed |WindowRadius|, adjust forward and stop.
+    if (HalfWindowSize() > WindowRadius) {
+      static_assert(WindowRadius > 3,
+                    "skipping over non-lead code units below must not "
+                    "advance past |offset|");
+
+      while (IsTrailingUnit(*++p)) {
+        continue;
+      }
+
+      MOZ_ASSERT(HalfWindowSize() < WindowRadius);
+      break;
+    }
+  }
+
+  MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
+  return offset - HalfWindowSize();
+}
+
+template <>
+size_t SourceUnits<char16_t>::findWindowEnd(size_t offset) const {
+  const char16_t* const initial = codeUnitPtrAt(offset);
+  const char16_t* p = initial;
+
+  auto HalfWindowSize = [&initial, &p]() {
+    return PointerRangeSize(initial, p);
+  };
+
+  while (true) {
+    MOZ_ASSERT(p <= limit_);
+    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
+    if (p >= limit_ || HalfWindowSize() >= WindowRadius) {
+      break;
+    }
+
+    char16_t c = *p;
+
+    // This stops at U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR in
+    // string and template literals.  These code points do affect line and
+    // column coordinates, even as they encode their literal values.
+    if (IsLineTerminator(c)) {
+      break;
+    }
+
+    // Don't allow invalid UTF-16 in post-context.  (Current users don't
+    // require this, and this behavior isn't currently imposed on
+    // pre-context, but these facts might change someday.)
+
+    if (MOZ_UNLIKELY(unicode::IsTrailSurrogate(c))) {
+      break;
+    }
+
+    // Optimistically consume the code unit, ungetting it below if needed.
+    p++;
+
+    // If it's not a surrogate at all, keep going.
+    if (MOZ_LIKELY(!unicode::IsLeadSurrogate(c))) {
+      continue;
+    }
+
+    // Retract if the lead surrogate would stand alone at the end of the
+    // window.
+    if (HalfWindowSize() >= WindowRadius ||  // split pair
+        p >= limit_ ||                       // half-pair at end of source
+        !unicode::IsTrailSurrogate(*p))      // no paired trail surrogate
+    {
+      p--;
+      break;
+    }
+
+    p++;
+  }
+
+  return offset + HalfWindowSize();
+}
+
+template <>
+size_t SourceUnits<Utf8Unit>::findWindowEnd(size_t offset) const {
+  const Utf8Unit* const initial = codeUnitPtrAt(offset);
+  const Utf8Unit* p = initial;
+
+  auto HalfWindowSize = [&initial, &p]() {
+    return PointerRangeSize(initial, p);
+  };
+
+  while (true) {
+    MOZ_ASSERT(p <= limit_);
+    MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
+    if (p >= limit_ || HalfWindowSize() >= WindowRadius) {
+      break;
+    }
+
+    // A non-encoding error might be followed by an encoding error within
+    // |maxEnd|, so we must validate as we go to not include invalid UTF-8
+    // in the computed window.  What joy!
+
+    Utf8Unit lead = *p;
+    if (mozilla::IsAscii(lead)) {
+      if (IsSingleUnitLineTerminator(lead)) {
+        break;
+      }
+
+      p++;
+      continue;
+    }
+
+    PeekedCodePoint<Utf8Unit> peeked = PeekCodePoint(p, limit_);
+    if (peeked.isNone()) {
+      break;  // encoding error
+    }
+
+    char32_t c = peeked.codePoint();
+    if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR ||
+                     c == unicode::PARA_SEPARATOR)) {
+      break;
+    }
+
+    MOZ_ASSERT(!IsLineTerminator(c));
+
+    uint8_t len = peeked.lengthInUnits();
+    if (HalfWindowSize() + len > WindowRadius) {
+      break;
+    }
+
+    p += len;
+  }
+
+  MOZ_ASSERT(HalfWindowSize() <= WindowRadius);
+  return offset + HalfWindowSize();
+}
+
+template <typename Unit, class AnyCharsAccess>
+bool TokenStreamSpecific<Unit, AnyCharsAccess>::advance(size_t position) {
+  const Unit* end = this->sourceUnits.codeUnitPtrAt(position);
+  while (this->sourceUnits.addressOfNextCodeUnit() < end) {
+    if (!getCodePoint()) {
+      return false;
+    }
+  }
+
+  TokenStreamAnyChars& anyChars = anyCharsAccess();
+  Token* cur = const_cast<Token*>(&anyChars.currentToken());
+  cur->pos.begin = this->sourceUnits.offset();
+  cur->pos.end = cur->pos.begin;
+#ifdef DEBUG
+  cur->type = TokenKind::Limit;
+#endif
+  MOZ_MAKE_MEM_UNDEFINED(&cur->type, sizeof(cur->type));
+  anyChars.lookahead = 0;
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+void TokenStreamSpecific<Unit, AnyCharsAccess>::seekTo(const Position& pos) {
+  TokenStreamAnyChars& anyChars = anyCharsAccess();
+
+  this->sourceUnits.setAddressOfNextCodeUnit(pos.buf,
+                                             /* allowPoisoned = */ true);
+  anyChars.flags = pos.flags;
+  anyChars.lineno = pos.lineno;
+  anyChars.linebase = pos.linebase;
+  anyChars.prevLinebase = pos.prevLinebase;
+  anyChars.lookahead = pos.lookahead;
+
+  anyChars.tokens[anyChars.cursor()] = pos.currentToken;
+  for (unsigned i = 0; i < anyChars.lookahead; i++) {
+    anyChars.tokens[anyChars.aheadCursor(1 + i)] = pos.lookaheadTokens[i];
+  }
+}
+
+template <typename Unit, class AnyCharsAccess>
+bool TokenStreamSpecific<Unit, AnyCharsAccess>::seekTo(
+    const Position& pos, const TokenStreamAnyChars& other) {
+  if (!anyCharsAccess().srcCoords.fill(other.srcCoords)) {
+    return false;
+  }
+
+  seekTo(pos);
+  return true;
+}
+
+void TokenStreamAnyChars::computeErrorMetadataNoOffset(
+    ErrorMetadata* err) const {
+  err->isMuted = mutedErrors;
+  err->filename = filename_;
+  err->lineNumber = 0;
+  err->columnNumber = JS::ColumnNumberOneOrigin();
+
+  MOZ_ASSERT(err->lineOfContext == nullptr);
+}
+
+bool TokenStreamAnyChars::fillExceptingContext(ErrorMetadata* err,
+                                               uint32_t offset) const {
+  err->isMuted = mutedErrors;
+
+  // If this TokenStreamAnyChars doesn't have location information, try to
+  // get it from the caller.
+  if (!filename_) {
+    JSContext* maybeCx = context()->maybeCurrentJSContext();
+    if (maybeCx) {
+      NonBuiltinFrameIter iter(maybeCx,
+                               FrameIter::FOLLOW_DEBUGGER_EVAL_PREV_LINK,
+                               maybeCx->realm()->principals());
+      if (!iter.done() && iter.filename()) {
+        err->filename = JS::ConstUTF8CharsZ(iter.filename());
+        JS::TaggedColumnNumberOneOrigin columnNumber;
+        err->lineNumber = iter.computeLine(&columnNumber);
+        // NOTE: Wasm frame cannot appear here.
+        err->columnNumber =
+            JS::ColumnNumberOneOrigin(columnNumber.toLimitedColumnNumber());
+        return false;
+      }
+    }
+  }
+
+  // Otherwise use this TokenStreamAnyChars's location information.
+  err->filename = filename_;
+  return true;
+}
+
+template <>
+inline void SourceUnits<char16_t>::computeWindowOffsetAndLength(
+    const char16_t* encodedWindow, size_t encodedTokenOffset,
+    size_t* utf16TokenOffset, size_t encodedWindowLength,
+    size_t* utf16WindowLength) const {
+  MOZ_ASSERT_UNREACHABLE("shouldn't need to recompute for UTF-16");
+}
+
+template <>
+inline void SourceUnits<Utf8Unit>::computeWindowOffsetAndLength(
+    const Utf8Unit* encodedWindow, size_t encodedTokenOffset,
+    size_t* utf16TokenOffset, size_t encodedWindowLength,
+    size_t* utf16WindowLength) const {
+  MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,
+             "token offset must be within the window, and the two lambda "
+             "calls below presume this ordering of values");
+
+  const Utf8Unit* const encodedWindowEnd = encodedWindow + encodedWindowLength;
+
+  size_t i = 0;
+  auto ComputeUtf16Count = [&i, &encodedWindow](const Utf8Unit* limit) {
+    while (encodedWindow < limit) {
+      Utf8Unit lead = *encodedWindow++;
+      if (MOZ_LIKELY(IsAscii(lead))) {
+        // ASCII contributes a single UTF-16 code unit.
+        i++;
+        continue;
+      }
+
+      Maybe<char32_t> cp = DecodeOneUtf8CodePoint(lead, &encodedWindow, limit);
+      MOZ_ASSERT(cp.isSome(),
+                 "computed window should only contain valid UTF-8");
+
+      i += unicode::IsSupplementary(cp.value()) ? 2 : 1;
+    }
+
+    return i;
+  };
+
+  // Compute the token offset from |i == 0| and the initial |encodedWindow|.
+  const Utf8Unit* token = encodedWindow + encodedTokenOffset;
+  MOZ_ASSERT(token <= encodedWindowEnd);
+  *utf16TokenOffset = ComputeUtf16Count(token);
+
+  // Compute the window length, picking up from |i| and |encodedWindow| that,
+  // in general, were modified just above.
+  *utf16WindowLength = ComputeUtf16Count(encodedWindowEnd);
+}
+
+template <typename Unit>
+bool TokenStreamCharsBase<Unit>::addLineOfContext(ErrorMetadata* err,
+                                                  uint32_t offset) const {
+  // Rename the variable to make meaning clearer: an offset into source units
+  // in Unit encoding.
+  size_t encodedOffset = offset;
+
+  // These are also offsets into source units in Unit encoding.
+  size_t encodedWindowStart = sourceUnits.findWindowStart(encodedOffset);
+  size_t encodedWindowEnd = sourceUnits.findWindowEnd(encodedOffset);
+
+  size_t encodedWindowLength = encodedWindowEnd - encodedWindowStart;
+  MOZ_ASSERT(encodedWindowLength <= SourceUnits::WindowRadius * 2);
+
+  // Don't add a useless "line" of context when the window ends up empty
+  // because of an invalid encoding at the start of a line.
+  if (encodedWindowLength == 0) {
+    MOZ_ASSERT(err->lineOfContext == nullptr,
+               "ErrorMetadata::lineOfContext must be null so we don't "
+               "have to set the lineLength/tokenOffset fields");
+    return true;
+  }
+
+  CharBuffer lineOfContext(fc);
+
+  const Unit* encodedWindow = sourceUnits.codeUnitPtrAt(encodedWindowStart);
+  if (!FillCharBufferFromSourceNormalizingAsciiLineBreaks(
+          lineOfContext, encodedWindow, encodedWindow + encodedWindowLength)) {
+    return false;
+  }
+
+  size_t utf16WindowLength = lineOfContext.length();
+
+  // The windowed string is null-terminated.
+  if (!lineOfContext.append('\0')) {
+    return false;
+  }
+
+  err->lineOfContext.reset(lineOfContext.extractOrCopyRawBuffer());
+  if (!err->lineOfContext) {
+    return false;
+  }
+
+  size_t encodedTokenOffset = encodedOffset - encodedWindowStart;
+
+  MOZ_ASSERT(encodedTokenOffset <= encodedWindowLength,
+             "token offset must be inside the window");
+
+  // The length in UTF-8 code units of a code point is always greater than or
+  // equal to the same code point's length in UTF-16 code points.  ASCII code
+  // points are 1 unit in either encoding.  Code points in [U+0080, U+10000)
+  // are 2-3 UTF-8 code units to 1 UTF-16 code unit.  And code points in
+  // [U+10000, U+10FFFF] are 4 UTF-8 code units to 2 UTF-16 code units.
+  //
+  // Therefore, if encoded window length equals the length in UTF-16 (this is
+  // always the case for Unit=char16_t), the UTF-16 offsets are exactly the
+  // encoded offsets.  Otherwise we must convert offset/length from UTF-8 to
+  // UTF-16.
+  if constexpr (std::is_same_v<Unit, char16_t>) {
+    MOZ_ASSERT(utf16WindowLength == encodedWindowLength,
+               "UTF-16 to UTF-16 shouldn't change window length");
+    err->tokenOffset = encodedTokenOffset;
+    err->lineLength = encodedWindowLength;
+  } else {
+    static_assert(std::is_same_v<Unit, Utf8Unit>, "should only see UTF-8 here");
+
+    bool simple = utf16WindowLength == encodedWindowLength;
+#ifdef DEBUG
+    auto isAscii = [](Unit u) { return IsAscii(u); };
+    MOZ_ASSERT(std::all_of(encodedWindow, encodedWindow + encodedWindowLength,
+                           isAscii) == simple,
+               "equal window lengths in UTF-8 should correspond only to "
+               "wholly-ASCII text");
+#endif
+    if (simple) {
+      err->tokenOffset = encodedTokenOffset;
+      err->lineLength = encodedWindowLength;
+    } else {
+      sourceUnits.computeWindowOffsetAndLength(
+          encodedWindow, encodedTokenOffset, &err->tokenOffset,
+          encodedWindowLength, &err->lineLength);
+    }
+  }
+
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+bool TokenStreamSpecific<Unit, AnyCharsAccess>::computeErrorMetadata(
+    ErrorMetadata* err, const ErrorOffset& errorOffset) const {
+  if (errorOffset.is<NoOffset>()) {
+    anyCharsAccess().computeErrorMetadataNoOffset(err);
+    return true;
+  }
+
+  uint32_t offset;
+  if (errorOffset.is<uint32_t>()) {
+    offset = errorOffset.as<uint32_t>();
+  } else {
+    offset = this->sourceUnits.offset();
+  }
+
+  // This function's return value isn't a success/failure indication: it
+  // returns true if this TokenStream can be used to provide a line of
+  // context.
+  if (fillExceptingContext(err, offset)) {
+    // Add a line of context from this TokenStream to help with debugging.
+    return internalComputeLineOfContext(err, offset);
+  }
+
+  // We can't fill in any more here.
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+void TokenStreamSpecific<Unit, AnyCharsAccess>::reportIllegalCharacter(
+    int32_t cp) {
+  UniqueChars display = JS_smprintf("U+%04X", cp);
+  if (!display) {
+    ReportOutOfMemory(anyCharsAccess().fc);
+    return;
+  }
+  error(JSMSG_ILLEGAL_CHARACTER, display.get());
+}
+
+// We have encountered a '\': check for a Unicode escape sequence after it.
+// Return the length of the escape sequence and the encoded code point (by
+// value) if we found a Unicode escape sequence, and skip all code units
+// involed.  Otherwise, return 0 and don't advance along the buffer.
+template <typename Unit, class AnyCharsAccess>
+uint32_t GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscape(
+    char32_t* codePoint) {
+  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
+
+  int32_t unit = getCodeUnit();
+  if (unit != 'u') {
+    // NOTE: |unit| may be EOF here.
+    ungetCodeUnit(unit);
+    MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
+    return 0;
+  }
+
+  char16_t v;
+  unit = getCodeUnit();
+  if (IsAsciiHexDigit(unit) && this->sourceUnits.matchHexDigits(3, &v)) {
+    *codePoint = (AsciiAlphanumericToNumber(unit) << 12) | v;
+    return 5;
+  }
+
+  if (unit == '{') {
+    return matchExtendedUnicodeEscape(codePoint);
+  }
+
+  // NOTE: |unit| may be EOF here, so this ungets either one or two units.
+  ungetCodeUnit(unit);
+  ungetCodeUnit('u');
+  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
+  return 0;
+}
+
+template <typename Unit, class AnyCharsAccess>
+uint32_t
+GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchExtendedUnicodeEscape(
+    char32_t* codePoint) {
+  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('{'));
+
+  int32_t unit = getCodeUnit();
+
+  // Skip leading zeroes.
+  uint32_t leadingZeroes = 0;
+  while (unit == '0') {
+    leadingZeroes++;
+    unit = getCodeUnit();
+  }
+
+  size_t i = 0;
+  uint32_t code = 0;
+  while (IsAsciiHexDigit(unit) && i < 6) {
+    code = (code << 4) | AsciiAlphanumericToNumber(unit);
+    unit = getCodeUnit();
+    i++;
+  }
+
+  uint32_t gotten =
+      2 +                  // 'u{'
+      leadingZeroes + i +  // significant hexdigits
+      (unit != EOF);       // subtract a get if it didn't contribute to length
+
+  if (unit == '}' && (leadingZeroes > 0 || i > 0) &&
+      code <= unicode::NonBMPMax) {
+    *codePoint = code;
+    return gotten;
+  }
+
+  this->sourceUnits.unskipCodeUnits(gotten);
+  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
+  return 0;
+}
+
+template <typename Unit, class AnyCharsAccess>
+uint32_t
+GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscapeIdStart(
+    char32_t* codePoint) {
+  uint32_t length = matchUnicodeEscape(codePoint);
+  if (MOZ_LIKELY(length > 0)) {
+    if (MOZ_LIKELY(unicode::IsIdentifierStart(*codePoint))) {
+      return length;
+    }
+
+    this->sourceUnits.unskipCodeUnits(length);
+  }
+
+  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
+  return 0;
+}
+
+template <typename Unit, class AnyCharsAccess>
+bool GeneralTokenStreamChars<Unit, AnyCharsAccess>::matchUnicodeEscapeIdent(
+    char32_t* codePoint) {
+  uint32_t length = matchUnicodeEscape(codePoint);
+  if (MOZ_LIKELY(length > 0)) {
+    if (MOZ_LIKELY(unicode::IsIdentifierPart(*codePoint))) {
+      return true;
+    }
+
+    this->sourceUnits.unskipCodeUnits(length);
+  }
+
+  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('\\'));
+  return false;
+}
+
+template <typename Unit, class AnyCharsAccess>
+[[nodiscard]] bool
+TokenStreamSpecific<Unit, AnyCharsAccess>::matchIdentifierStart(
+    IdentifierEscapes* sawEscape) {
+  int32_t unit = getCodeUnit();
+  if (unit == EOF) {
+    error(JSMSG_MISSING_PRIVATE_NAME);
+    return false;
+  }
+
+  if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
+    if (unicode::IsIdentifierStart(char16_t(unit))) {
+      *sawEscape = IdentifierEscapes::None;
+      return true;
+    }
+
+    if (unit == '\\') {
+      char32_t codePoint;
+      uint32_t escapeLength = matchUnicodeEscapeIdStart(&codePoint);
+      if (escapeLength != 0) {
+        *sawEscape = IdentifierEscapes::SawUnicodeEscape;
+        return true;
+      }
+
+      // We could point "into" a mistyped escape, e.g. for "\u{41H}" we
+      // could point at the 'H'.  But we don't do that now, so the code
+      // unit after the '\' isn't necessarily bad, so just point at the
+      // start of the actually-invalid escape.
+      ungetCodeUnit('\\');
+      error(JSMSG_BAD_ESCAPE);
+      return false;
+    }
+  }
+
+  // Unget the lead code unit before peeking at the full code point.
+  ungetCodeUnit(unit);
+
+  PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
+  if (!peeked.isNone() && unicode::IsIdentifierStart(peeked.codePoint())) {
+    this->sourceUnits.consumeKnownCodePoint(peeked);
+
+    *sawEscape = IdentifierEscapes::None;
+    return true;
+  }
+
+  error(JSMSG_MISSING_PRIVATE_NAME);
+  return false;
+}
+
+template <typename Unit, class AnyCharsAccess>
+bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDirectives(
+    bool isMultiline, bool shouldWarnDeprecated) {
+  // Match directive comments used in debugging, such as "//# sourceURL" and
+  // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
+  //
+  // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
+  // line comments containing a source mapping URL inside a multiline
+  // comment. To avoid potentially expensive lookahead and backtracking, we
+  // only check for this case if we encounter a '#' code unit.
+
+  bool res = getDisplayURL(isMultiline, shouldWarnDeprecated) &&
+             getSourceMappingURL(isMultiline, shouldWarnDeprecated);
+  if (!res) {
+    badToken();
+  }
+
+  return res;
+}
+
+[[nodiscard]] bool TokenStreamCharsShared::copyCharBufferTo(
+    UniquePtr<char16_t[], JS::FreePolicy>* destination) {
+  size_t length = charBuffer.length();
+
+  *destination = fc->getAllocator()->make_pod_array<char16_t>(length + 1);
+  if (!*destination) {
+    return false;
+  }
+
+  std::copy(charBuffer.begin(), charBuffer.end(), destination->get());
+  (*destination)[length] = '\0';
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDirective(
+    bool isMultiline, bool shouldWarnDeprecated, const char* directive,
+    uint8_t directiveLength, const char* errorMsgPragma,
+    UniquePtr<char16_t[], JS::FreePolicy>* destination) {
+  // Stop if we don't find |directive|.  (Note that |directive| must be
+  // ASCII, so there are no tricky encoding issues to consider in matching
+  // UTF-8/16-agnostically.)
+  if (!this->sourceUnits.matchCodeUnits(directive, directiveLength)) {
+    return true;
+  }
+
+  if (shouldWarnDeprecated) {
+    if (!warning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma)) {
+      return false;
+    }
+  }
+
+  this->charBuffer.clear();
+
+  do {
+    int32_t unit = peekCodeUnit();
+    if (unit == EOF) {
+      break;
+    }
+
+    if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
+      if (unicode::IsSpace(AssertedCast<Latin1Char>(unit))) {
+        break;
+      }
+
+      consumeKnownCodeUnit(unit);
+
+      // Debugging directives can occur in both single- and multi-line
+      // comments. If we're currently inside a multi-line comment, we
+      // also must recognize multi-line comment terminators.
+      if (isMultiline && unit == '*' && peekCodeUnit() == '/') {
+        ungetCodeUnit('*');
+        break;
+      }
+
+      if (!this->charBuffer.append(unit)) {
+        return false;
+      }
+
+      continue;
+    }
+
+    // This ignores encoding errors: subsequent caller-side code to
+    // handle the remaining source text in the comment will do so.
+    PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
+    if (peeked.isNone() || unicode::IsSpace(peeked.codePoint())) {
+      break;
+    }
+
+    MOZ_ASSERT(!IsLineTerminator(peeked.codePoint()),
+               "!IsSpace must imply !IsLineTerminator or else we'll fail to "
+               "maintain line-info/flags for EOL");
+    this->sourceUnits.consumeKnownCodePoint(peeked);
+
+    if (!AppendCodePointToCharBuffer(this->charBuffer, peeked.codePoint())) {
+      return false;
+    }
+  } while (true);
+
+  if (this->charBuffer.empty()) {
+    // The directive's URL was missing, but comments can contain anything,
+    // so it isn't an error.
+    return true;
+  }
+
+  return copyCharBufferTo(destination);
+}
+
+template <typename Unit, class AnyCharsAccess>
+bool TokenStreamSpecific<Unit, AnyCharsAccess>::getDisplayURL(
+    bool isMultiline, bool shouldWarnDeprecated) {
+  // Match comments of the form "//# sourceURL=<url>" or
+  // "/\* //# sourceURL=<url> *\/"
+  //
+  // Note that while these are labeled "sourceURL" in the source text,
+  // internally we refer to it as a "displayURL" to distinguish what the
+  // developer would like to refer to the source as from the source's actual
+  // URL.
+
+  static constexpr char sourceURLDirective[] = " sourceURL=";
+  constexpr uint8_t sourceURLDirectiveLength = js_strlen(sourceURLDirective);
+  return getDirective(isMultiline, shouldWarnDeprecated, sourceURLDirective,
+                      sourceURLDirectiveLength, "sourceURL",
+                      &anyCharsAccess().displayURL_);
+}
+
+template <typename Unit, class AnyCharsAccess>
+bool TokenStreamSpecific<Unit, AnyCharsAccess>::getSourceMappingURL(
+    bool isMultiline, bool shouldWarnDeprecated) {
+  // Match comments of the form "//# sourceMappingURL=<url>" or
+  // "/\* //# sourceMappingURL=<url> *\/"
+
+  static constexpr char sourceMappingURLDirective[] = " sourceMappingURL=";
+  constexpr uint8_t sourceMappingURLDirectiveLength =
+      js_strlen(sourceMappingURLDirective);
+  return getDirective(isMultiline, shouldWarnDeprecated,
+                      sourceMappingURLDirective,
+                      sourceMappingURLDirectiveLength, "sourceMappingURL",
+                      &anyCharsAccess().sourceMapURL_);
+}
+
+template <typename Unit, class AnyCharsAccess>
+MOZ_ALWAYS_INLINE Token*
+GeneralTokenStreamChars<Unit, AnyCharsAccess>::newTokenInternal(
+    TokenKind kind, TokenStart start, TokenKind* out) {
+  MOZ_ASSERT(kind < TokenKind::Limit);
+  MOZ_ASSERT(kind != TokenKind::Eol,
+             "TokenKind::Eol should never be used in an actual Token, only "
+             "returned by peekTokenSameLine()");
+
+  TokenStreamAnyChars& anyChars = anyCharsAccess();
+  anyChars.flags.isDirtyLine = true;
+
+  Token* token = anyChars.allocateToken();
+
+  *out = token->type = kind;
+  token->pos = TokenPos(start.offset(), this->sourceUnits.offset());
+  MOZ_ASSERT(token->pos.begin <= token->pos.end);
+
+  // NOTE: |token->modifier| is set in |newToken()| so that optimized,
+  // non-debug code won't do any work to pass a modifier-argument that will
+  // never be used.
+
+  return token;
+}
+
+template <typename Unit, class AnyCharsAccess>
+MOZ_COLD bool GeneralTokenStreamChars<Unit, AnyCharsAccess>::badToken() {
+  // We didn't get a token, so don't set |flags.isDirtyLine|.
+  anyCharsAccess().flags.hadError = true;
+
+  // Poisoning sourceUnits on error establishes an invariant: once an
+  // erroneous token has been seen, sourceUnits will not be consulted again.
+  // This is true because the parser will deal with the illegal token by
+  // aborting parsing immediately.
+  this->sourceUnits.poisonInDebug();
+
+  return false;
+};
+
+bool AppendCodePointToCharBuffer(CharBuffer& charBuffer, char32_t codePoint) {
+  MOZ_ASSERT(codePoint <= unicode::NonBMPMax,
+             "should only be processing code points validly decoded from UTF-8 "
+             "or WTF-16 source text (surrogate code points permitted)");
+
+  char16_t units[2];
+  unsigned numUnits = 0;
+  unicode::UTF16Encode(codePoint, units, &numUnits);
+
+  MOZ_ASSERT(numUnits == 1 || numUnits == 2,
+             "UTF-16 code points are only encoded in one or two units");
+
+  if (!charBuffer.append(units[0])) {
+    return false;
+  }
+
+  if (numUnits == 1) {
+    return true;
+  }
+
+  return charBuffer.append(units[1]);
+}
+
+template <typename Unit, class AnyCharsAccess>
+bool TokenStreamSpecific<Unit, AnyCharsAccess>::putIdentInCharBuffer(
+    const Unit* identStart) {
+  const Unit* const originalAddress = this->sourceUnits.addressOfNextCodeUnit();
+  this->sourceUnits.setAddressOfNextCodeUnit(identStart);
+
+  auto restoreNextRawCharAddress = MakeScopeExit([this, originalAddress]() {
+    this->sourceUnits.setAddressOfNextCodeUnit(originalAddress);
+  });
+
+  this->charBuffer.clear();
+  do {
+    int32_t unit = getCodeUnit();
+    if (unit == EOF) {
+      break;
+    }
+
+    char32_t codePoint;
+    if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
+      if (unicode::IsIdentifierPart(char16_t(unit)) || unit == '#') {
+        if (!this->charBuffer.append(unit)) {
+          return false;
+        }
+
+        continue;
+      }
+
+      if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint)) {
+        break;
+      }
+    } else {
+      // |restoreNextRawCharAddress| undoes all gets, and this function
+      // doesn't update line/column info.
+      char32_t cp;
+      if (!getNonAsciiCodePointDontNormalize(toUnit(unit), &cp)) {
+        return false;
+      }
+
+      codePoint = cp;
+      if (!unicode::IsIdentifierPart(codePoint)) {
+        break;
+      }
+    }
+
+    if (!AppendCodePointToCharBuffer(this->charBuffer, codePoint)) {
+      return false;
+    }
+  } while (true);
+
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::identifierName(
+    TokenStart start, const Unit* identStart, IdentifierEscapes escaping,
+    Modifier modifier, NameVisibility visibility, TokenKind* out) {
+  // Run the bad-token code for every path out of this function except the
+  // two success-cases.
+  auto noteBadToken = MakeScopeExit([this]() { this->badToken(); });
+
+  // We've already consumed an initial code point in the identifer, to *know*
+  // that this is an identifier.  So no need to worry about not consuming any
+  // code points in the loop below.
+  int32_t unit;
+  while (true) {
+    unit = peekCodeUnit();
+    if (unit == EOF) {
+      break;
+    }
+
+    if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
+      consumeKnownCodeUnit(unit);
+
+      if (MOZ_UNLIKELY(
+              !unicode::IsIdentifierPart(static_cast<char16_t>(unit)))) {
+        // Handle a Unicode escape -- otherwise it's not part of the
+        // identifier.
+        char32_t codePoint;
+        if (unit != '\\' || !matchUnicodeEscapeIdent(&codePoint)) {
+          ungetCodeUnit(unit);
+          break;
+        }
+
+        escaping = IdentifierEscapes::SawUnicodeEscape;
+      }
+    } else {
+      // This ignores encoding errors: subsequent caller-side code to
+      // handle source text after the IdentifierName will do so.
+      PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
+      if (peeked.isNone() || !unicode::IsIdentifierPart(peeked.codePoint())) {
+        break;
+      }
+
+      MOZ_ASSERT(!IsLineTerminator(peeked.codePoint()),
+                 "IdentifierPart must guarantee !IsLineTerminator or "
+                 "else we'll fail to maintain line-info/flags for EOL");
+
+      this->sourceUnits.consumeKnownCodePoint(peeked);
+    }
+  }
+
+  TaggedParserAtomIndex atom;
+  if (MOZ_UNLIKELY(escaping == IdentifierEscapes::SawUnicodeEscape)) {
+    // Identifiers containing Unicode escapes have to be converted into
+    // tokenbuf before atomizing.
+    if (!putIdentInCharBuffer(identStart)) {
+      return false;
+    }
+
+    atom = drainCharBufferIntoAtom();
+  } else {
+    // Escape-free identifiers can be created directly from sourceUnits.
+    const Unit* chars = identStart;
+    size_t length = this->sourceUnits.addressOfNextCodeUnit() - identStart;
+
+    // Private identifiers start with a '#', and so cannot be reserved words.
+    if (visibility == NameVisibility::Public) {
+      // Represent reserved words lacking escapes as reserved word tokens.
+      if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
+        noteBadToken.release();
+        newSimpleToken(rw->tokentype, start, modifier, out);
+        return true;
+      }
+    }
+
+    atom = atomizeSourceChars(Span(chars, length));
+  }
+  if (!atom) {
+    return false;
+  }
+
+  noteBadToken.release();
+  if (visibility == NameVisibility::Private) {
+    newPrivateNameToken(atom, start, modifier, out);
+    return true;
+  }
+  newNameToken(atom, start, modifier, out);
+  return true;
+}
+
+enum FirstCharKind {
+  // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
+  // token that cannot also be a prefix of a longer token.  E.g. ';' has the
+  // OneChar kind, but '+' does not, because '++' and '+=' are valid longer
+  // tokens
+  // that begin with '+'.
+  //
+  // The few token kinds satisfying these properties cover roughly 35--45%
+  // of the tokens seen in practice.
+  //
+  // We represent the 'OneChar' kind with any positive value less than
+  // TokenKind::Limit.  This representation lets us associate
+  // each one-char token char16_t with a TokenKind and thus avoid
+  // a subsequent char16_t-to-TokenKind conversion.
+  OneChar_Min = 0,
+  OneChar_Max = size_t(TokenKind::Limit) - 1,
+
+  Space = size_t(TokenKind::Limit),
+  Ident,
+  Dec,
+  String,
+  EOL,
+  ZeroDigit,
+  Other,
+
+  LastCharKind = Other
+};
+
+// OneChar: 40,  41,  44,  58,  59,  91,  93,  123, 125, 126:
+//          '(', ')', ',', ':', ';', '[', ']', '{', '}', '~'
+// Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
+// Dot:     46: '.'
+// Equals:  61: '='
+// String:  34, 39, 96: '"', '\'', '`'
+// Dec:     49..57: '1'..'9'
+// Plus:    43: '+'
+// ZeroDigit:  48: '0'
+// Space:   9, 11, 12, 32: '\t', '\v', '\f', ' '
+// EOL:     10, 13: '\n', '\r'
+//
+#define T_COMMA size_t(TokenKind::Comma)
+#define T_COLON size_t(TokenKind::Colon)
+#define T_BITNOT size_t(TokenKind::BitNot)
+#define T_LP size_t(TokenKind::LeftParen)
+#define T_RP size_t(TokenKind::RightParen)
+#define T_SEMI size_t(TokenKind::Semi)
+#define T_LB size_t(TokenKind::LeftBracket)
+#define T_RB size_t(TokenKind::RightBracket)
+#define T_LC size_t(TokenKind::LeftCurly)
+#define T_RC size_t(TokenKind::RightCurly)
+#define _______ Other
+static const uint8_t firstCharKinds[] = {
+    // clang-format off
+/*         0        1        2        3        4        5        6        7        8        9    */
+/*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,
+/*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,
+/*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
+/*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,
+/*  40+ */    T_LP,    T_RP, _______, _______, T_COMMA, _______, _______, _______,ZeroDigit,    Dec,
+/*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec, T_COLON,  T_SEMI,
+/*  60+ */ _______, _______, _______, _______, _______,   Ident,   Ident,   Ident,   Ident,   Ident,
+/*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
+/*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
+/*  90+ */   Ident,    T_LB, _______,    T_RB, _______,   Ident,  String,   Ident,   Ident,   Ident,
+/* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
+/* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
+/* 120+ */   Ident,   Ident,   Ident,    T_LC, _______,    T_RC,T_BITNOT, _______
+    // clang-format on
+};
+#undef T_COMMA
+#undef T_COLON
+#undef T_BITNOT
+#undef T_LP
+#undef T_RP
+#undef T_SEMI
+#undef T_LB
+#undef T_RB
+#undef T_LC
+#undef T_RC
+#undef _______
+
+static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
+              "Elements of firstCharKinds[] are too small");
+
+template <>
+void SourceUnits<char16_t>::consumeRestOfSingleLineComment() {
+  while (MOZ_LIKELY(!atEnd())) {
+    char16_t unit = peekCodeUnit();
+    if (IsLineTerminator(unit)) {
+      return;
+    }
+
+    consumeKnownCodeUnit(unit);
+  }
+}
+
+template <>
+void SourceUnits<Utf8Unit>::consumeRestOfSingleLineComment() {
+  while (MOZ_LIKELY(!atEnd())) {
+    const Utf8Unit unit = peekCodeUnit();
+    if (IsSingleUnitLineTerminator(unit)) {
+      return;
+    }
+
+    if (MOZ_LIKELY(IsAscii(unit))) {
+      consumeKnownCodeUnit(unit);
+      continue;
+    }
+
+    PeekedCodePoint<Utf8Unit> peeked = peekCodePoint();
+    if (peeked.isNone()) {
+      return;
+    }
+
+    char32_t c = peeked.codePoint();
+    if (MOZ_UNLIKELY(c == unicode::LINE_SEPARATOR ||
+                     c == unicode::PARA_SEPARATOR)) {
+      return;
+    }
+
+    consumeKnownCodePoint(peeked);
+  }
+}
+
+template <typename Unit, class AnyCharsAccess>
+[[nodiscard]] MOZ_ALWAYS_INLINE bool
+TokenStreamSpecific<Unit, AnyCharsAccess>::matchInteger(
+    IsIntegerUnit isIntegerUnit, int32_t* nextUnit) {
+  int32_t unit = getCodeUnit();
+  if (!isIntegerUnit(unit)) {
+    *nextUnit = unit;
+    return true;
+  }
+  return matchIntegerAfterFirstDigit(isIntegerUnit, nextUnit);
+}
+
+template <typename Unit, class AnyCharsAccess>
+[[nodiscard]] MOZ_ALWAYS_INLINE bool
+TokenStreamSpecific<Unit, AnyCharsAccess>::matchIntegerAfterFirstDigit(
+    IsIntegerUnit isIntegerUnit, int32_t* nextUnit) {
+  int32_t unit;
+  while (true) {
+    unit = getCodeUnit();
+    if (isIntegerUnit(unit)) {
+      continue;
+    }
+    if (unit != '_') {
+      break;
+    }
+    unit = getCodeUnit();
+    if (!isIntegerUnit(unit)) {
+      if (unit == '_') {
+        ungetCodeUnit(unit);
+        error(JSMSG_NUMBER_MULTIPLE_ADJACENT_UNDERSCORES);
+      } else {
+        ungetCodeUnit(unit);
+        ungetCodeUnit('_');
+        error(JSMSG_NUMBER_END_WITH_UNDERSCORE);
+      }
+      return false;
+    }
+  }
+
+  *nextUnit = unit;
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::decimalNumber(
+    int32_t unit, TokenStart start, const Unit* numStart, Modifier modifier,
+    TokenKind* out) {
+  // Run the bad-token code for every path out of this function except the
+  // one success-case.
+  auto noteBadToken = MakeScopeExit([this]() { this->badToken(); });
+
+  // Consume integral component digits.
+  if (IsAsciiDigit(unit)) {
+    if (!matchIntegerAfterFirstDigit(IsAsciiDigit, &unit)) {
+      return false;
+    }
+  }
+
+  // Numbers contain no escapes, so we can read directly from |sourceUnits|.
+  double dval;
+  bool isBigInt = false;
+  DecimalPoint decimalPoint = NoDecimal;
+  if (unit != '.' && unit != 'e' && unit != 'E' && unit != 'n') {
+    // NOTE: |unit| may be EOF here.
+    ungetCodeUnit(unit);
+
+    // Most numbers are pure decimal integers without fractional component
+    // or exponential notation.  Handle that with optimized code.
+    if (!GetDecimalInteger(numStart, this->sourceUnits.addressOfNextCodeUnit(),
+                           &dval)) {
+      ReportOutOfMemory(this->fc);
+      return false;
+    }
+  } else if (unit == 'n') {
+    isBigInt = true;
+    unit = peekCodeUnit();
+  } else {
+    // Consume any decimal dot and fractional component.
+    if (unit == '.') {
+      decimalPoint = HasDecimal;
+      if (!matchInteger(IsAsciiDigit, &unit)) {
+        return false;
+      }
+    }
+
+    // Consume any exponential notation.
+    if (unit == 'e' || unit == 'E') {
+      unit = getCodeUnit();
+      if (unit == '+' || unit == '-') {
+        unit = getCodeUnit();
+      }
+
+      // Exponential notation must contain at least one digit.
+      if (!IsAsciiDigit(unit)) {
+        ungetCodeUnit(unit);
+        error(JSMSG_MISSING_EXPONENT);
+        return false;
+      }
+
+      // Consume exponential digits.
+      if (!matchIntegerAfterFirstDigit(IsAsciiDigit, &unit)) {
+        return false;
+      }
+    }
+
+    ungetCodeUnit(unit);
+
+    if (!GetDecimal(numStart, this->sourceUnits.addressOfNextCodeUnit(),
+                    &dval)) {
+      ReportOutOfMemory(this->fc);
+      return false;
+    }
+  }
+
+  // Number followed by IdentifierStart is an error.  (This is the only place
+  // in ECMAScript where token boundary is inadequate to properly separate
+  // two tokens, necessitating this unaesthetic lookahead.)
+  if (unit != EOF) {
+    if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
+      if (unicode::IsIdentifierStart(char16_t(unit))) {
+        error(JSMSG_IDSTART_AFTER_NUMBER);
+        return false;
+      }
+    } else {
+      // This ignores encoding errors: subsequent caller-side code to
+      // handle source text after the number will do so.
+      PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
+      if (!peeked.isNone() && unicode::IsIdentifierStart(peeked.codePoint())) {
+        error(JSMSG_IDSTART_AFTER_NUMBER);
+        return false;
+      }
+    }
+  }
+
+  noteBadToken.release();
+
+  if (isBigInt) {
+    return bigIntLiteral(start, modifier, out);
+  }
+
+  newNumberToken(dval, decimalPoint, start, modifier, out);
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::regexpLiteral(
+    TokenStart start, TokenKind* out) {
+  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == Unit('/'));
+  this->charBuffer.clear();
+
+  auto ProcessNonAsciiCodePoint = [this](int32_t lead) {
+    MOZ_ASSERT(lead != EOF);
+    MOZ_ASSERT(!this->isAsciiCodePoint(lead));
+
+    char32_t codePoint;
+    if (!this->getNonAsciiCodePointDontNormalize(this->toUnit(lead),
+                                                 &codePoint)) {
+      return false;
+    }
+
+    if (MOZ_UNLIKELY(codePoint == unicode::LINE_SEPARATOR ||
+                     codePoint == unicode::PARA_SEPARATOR)) {
+      this->sourceUnits.ungetLineOrParagraphSeparator();
+      this->error(JSMSG_UNTERMINATED_REGEXP);
+      return false;
+    }
+
+    return AppendCodePointToCharBuffer(this->charBuffer, codePoint);
+  };
+
+  auto ReportUnterminatedRegExp = [this](int32_t unit) {
+    this->ungetCodeUnit(unit);
+    this->error(JSMSG_UNTERMINATED_REGEXP);
+  };
+
+  bool inCharClass = false;
+  do {
+    int32_t unit = getCodeUnit();
+    if (unit == EOF) {
+      ReportUnterminatedRegExp(unit);
+      return badToken();
+    }
+
+    if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
+      if (!ProcessNonAsciiCodePoint(unit)) {
+        return badToken();
+      }
+
+      continue;
+    }
+
+    if (unit == '\\') {
+      if (!this->charBuffer.append(unit)) {
+        return badToken();
+      }
+
+      unit = getCodeUnit();
+      if (unit == EOF) {
+        ReportUnterminatedRegExp(unit);
+        return badToken();
+      }
+
+      // Fallthrough only handles ASCII code points, so
+      // deal with non-ASCII and skip everything else.
+      if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
+        if (!ProcessNonAsciiCodePoint(unit)) {
+          return badToken();
+        }
+
+        continue;
+      }
+    } else if (unit == '[') {
+      inCharClass = true;
+    } else if (unit == ']') {
+      inCharClass = false;
+    } else if (unit == '/' && !inCharClass) {
+      // For IE compat, allow unescaped / in char classes.
+      break;
+    }
+
+    // NOTE: Non-ASCII LineTerminators were handled by
+    //       ProcessNonAsciiCodePoint calls above.
+    if (unit == '\r' || unit == '\n') {
+      ReportUnterminatedRegExp(unit);
+      return badToken();
+    }
+
+    MOZ_ASSERT(!IsLineTerminator(AssertedCast<char32_t>(unit)));
+    if (!this->charBuffer.append(unit)) {
+      return badToken();
+    }
+  } while (true);
+
+  int32_t unit;
+  RegExpFlags reflags = RegExpFlag::NoFlags;
+  while (true) {
+    uint8_t flag;
+    unit = getCodeUnit();
+    if (unit == 'd') {
+      flag = RegExpFlag::HasIndices;
+    } else if (unit == 'g') {
+      flag = RegExpFlag::Global;
+    } else if (unit == 'i') {
+      flag = RegExpFlag::IgnoreCase;
+    } else if (unit == 'm') {
+      flag = RegExpFlag::Multiline;
+    } else if (unit == 's') {
+      flag = RegExpFlag::DotAll;
+    } else if (unit == 'u') {
+      flag = RegExpFlag::Unicode;
+    } else if (unit == 'v') {
+      flag = RegExpFlag::UnicodeSets;
+    } else if (unit == 'y') {
+      flag = RegExpFlag::Sticky;
+    } else if (IsAsciiAlpha(unit)) {
+      flag = RegExpFlag::NoFlags;
+    } else {
+      break;
+    }
+
+    if ((reflags & flag) || flag == RegExpFlag::NoFlags) {
+      ungetCodeUnit(unit);
+      char buf[2] = {char(unit), '\0'};
+      error(JSMSG_BAD_REGEXP_FLAG, buf);
+      return badToken();
+    }
+
+    // /u and /v flags are mutually exclusive.
+    if (((reflags & RegExpFlag::Unicode) && (flag & RegExpFlag::UnicodeSets)) ||
+        ((reflags & RegExpFlag::UnicodeSets) && (flag & RegExpFlag::Unicode))) {
+      ungetCodeUnit(unit);
+      char buf[2] = {char(unit), '\0'};
+      error(JSMSG_BAD_REGEXP_FLAG, buf);
+      return badToken();
+    }
+
+    reflags |= flag;
+  }
+  ungetCodeUnit(unit);
+
+  newRegExpToken(reflags, start, out);
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::bigIntLiteral(
+    TokenStart start, Modifier modifier, TokenKind* out) {
+  MOZ_ASSERT(this->sourceUnits.previousCodeUnit() == toUnit('n'));
+  MOZ_ASSERT(this->sourceUnits.offset() > start.offset());
+  uint32_t length = this->sourceUnits.offset() - start.offset();
+  MOZ_ASSERT(length >= 2);
+  this->charBuffer.clear();
+  mozilla::Range<const Unit> chars(
+      this->sourceUnits.codeUnitPtrAt(start.offset()), length);
+  for (uint32_t idx = 0; idx < length - 1; idx++) {
+    int32_t unit = CodeUnitValue(chars[idx]);
+    // Char buffer may start with a 0[bBoOxX] prefix, then follows with
+    // binary, octal, decimal, or hex digits.  Already checked by caller, as
+    // the "n" indicating bigint comes at the end.
+    MOZ_ASSERT(isAsciiCodePoint(unit));
+    // Skip over any separators.
+    if (unit == '_') {
+      continue;
+    }
+    if (!AppendCodePointToCharBuffer(this->charBuffer, unit)) {
+      return false;
+    }
+  }
+  newBigIntToken(start, modifier, out);
+  return true;
+}
+
+template <typename Unit, class AnyCharsAccess>
+void GeneralTokenStreamChars<Unit,
+                             AnyCharsAccess>::consumeOptionalHashbangComment() {
+  MOZ_ASSERT(this->sourceUnits.atStart(),
+             "HashBangComment can only appear immediately at the start of a "
+             "Script or Module");
+
+  // HashbangComment ::
+  //   #!  SingleLineCommentChars_opt
+
+  if (!matchCodeUnit('#')) {
+    // HashbangComment is optional at start of Script or Module.
+    return;
+  }
+
+  if (!matchCodeUnit('!')) {
+    // # not followed by ! at start of Script or Module is an error, but normal
+    // parsing code will handle that error just fine if we let it.
+    ungetCodeUnit('#');
+    return;
+  }
+
+  // This doesn't consume a concluding LineTerminator, and it stops consuming
+  // just before any encoding error.  The subsequent |getToken| call will call
+  // |getTokenInternal| below which will handle these possibilities.
+  this->sourceUnits.consumeRestOfSingleLineComment();
+}
+
+template <typename Unit, class AnyCharsAccess>
+[[nodiscard]] bool TokenStreamSpecific<Unit, AnyCharsAccess>::getTokenInternal(
+    TokenKind* const ttp, const Modifier modifier) {
+  // Assume we'll fail: success cases will overwrite this.
+#ifdef DEBUG
+  *ttp = TokenKind::Limit;
+#endif
+  MOZ_MAKE_MEM_UNDEFINED(ttp, sizeof(*ttp));
+
+  // This loop runs more than once only when whitespace or comments are
+  // encountered.
+  do {
+    int32_t unit = peekCodeUnit();
+    if (MOZ_UNLIKELY(unit == EOF)) {
+      MOZ_ASSERT(this->sourceUnits.atEnd());
+      anyCharsAccess().flags.isEOF = true;
+      TokenStart start(this->sourceUnits, 0);
+      newSimpleToken(TokenKind::Eof, start, modifier, ttp);
+      return true;
+    }
+
+    if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
+      // Non-ASCII code points can only be identifiers or whitespace.  It would
+      // be nice to compute these *after* discarding whitespace, but IN A WORLD
+      // where |unicode::IsSpace| requires consuming a variable number of code
+      // units, it's easier to assume it's an identifier and maybe do a little
+      // wasted work, than to unget and compute and reget if whitespace.
+      TokenStart start(this->sourceUnits, 0);
+      const Unit* identStart = this->sourceUnits.addressOfNextCodeUnit();
+
+      PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
+      if (peeked.isNone()) {
+        MOZ_ALWAYS_FALSE(getCodePoint());
+        return badToken();
+      }
+
+      char32_t cp = peeked.codePoint();
+      if (unicode::IsSpace(cp)) {
+        this->sourceUnits.consumeKnownCodePoint(peeked);
+        if (IsLineTerminator(cp)) {
+          if (!updateLineInfoForEOL()) {
+            return badToken();
+          }
+
+          anyCharsAccess().updateFlagsForEOL();
+        }
+
+        continue;
+      }
+
+      static_assert(isAsciiCodePoint('$'),
+                    "IdentifierStart contains '$', but as "
+                    "!IsUnicodeIDStart('$'), ensure that '$' is never "
+                    "handled here");
+      static_assert(isAsciiCodePoint('_'),
+                    "IdentifierStart contains '_', but as "
+                    "!IsUnicodeIDStart('_'), ensure that '_' is never "
+                    "handled here");
+
+      if (MOZ_LIKELY(unicode::IsUnicodeIDStart(cp))) {
+        this->sourceUnits.consumeKnownCodePoint(peeked);
+        MOZ_ASSERT(!IsLineTerminator(cp),
+                   "IdentifierStart must guarantee !IsLineTerminator "
+                   "or else we'll fail to maintain line-info/flags "
+                   "for EOL here");
+
+        return identifierName(start, identStart, IdentifierEscapes::None,
+                              modifier, NameVisibility::Public, ttp);
+      }
+
+      reportIllegalCharacter(cp);
+      return badToken();
+    }  // !isAsciiCodePoint(unit)
+
+    consumeKnownCodeUnit(unit);
+
+    // Get the token kind, based on the first char.  The ordering of c1kind
+    // comparison is based on the frequency of tokens in real code:
+    // Parsemark (which represents typical JS code on the web) and the
+    // Unreal demo (which represents asm.js code).
+    //
+    //                  Parsemark   Unreal
+    //  OneChar         32.9%       39.7%
+    //  Space           25.0%        0.6%
+    //  Ident           19.2%       36.4%
+    //  Dec              7.2%        5.1%
+    //  String           7.9%        0.0%
+    //  EOL              1.7%        0.0%
+    //  ZeroDigit        0.4%        4.9%
+    //  Other            5.7%       13.3%
+    //
+    // The ordering is based mostly only Parsemark frequencies, with Unreal
+    // frequencies used to break close categories (e.g. |Dec| and
+    // |String|).  |Other| is biggish, but no other token kind is common
+    // enough for it to be worth adding extra values to FirstCharKind.
+    FirstCharKind c1kind = FirstCharKind(firstCharKinds[unit]);
+
+    // Look for an unambiguous single-char token.
+    //
+    if (c1kind <= OneChar_Max) {
+      TokenStart start(this->sourceUnits, -1);
+      newSimpleToken(TokenKind(c1kind), start, modifier, ttp);
+      return true;
+    }
+
+    // Skip over non-EOL whitespace chars.
+    //
+    if (c1kind == Space) {
+      continue;
+    }
+
+    // Look for an identifier.
+    //
+    if (c1kind == Ident) {
+      TokenStart start(this->sourceUnits, -1);
+      return identifierName(
+          start, this->sourceUnits.addressOfNextCodeUnit() - 1,
+          IdentifierEscapes::None, modifier, NameVisibility::Public, ttp);
+    }
+
+    // Look for a decimal number.
+    //
+    if (c1kind == Dec) {
+      TokenStart start(this->sourceUnits, -1);
+      const Unit* numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
+      return decimalNumber(unit, start, numStart, modifier, ttp);
+    }
+
+    // Look for a string or a template string.
+    //
+    if (c1kind == String) {
+      return getStringOrTemplateToken(static_cast<char>(unit), modifier, ttp);
+    }
+
+    // Skip over EOL chars, updating line state along the way.
+    //
+    if (c1kind == EOL) {
+      if (unit == '\r') {
+        matchLineTerminator('\n');
+      }
+
+      if (!updateLineInfoForEOL()) {
+        return badToken();
+      }
+
+      anyCharsAccess().updateFlagsForEOL();
+      continue;
+    }
+
+    // From a '0', look for a hexadecimal, binary, octal, or "noctal" (a
+    // number starting with '0' that contains '8' or '9' and is treated as
+    // decimal) number.
+    //
+    if (c1kind == ZeroDigit) {
+      TokenStart start(this->sourceUnits, -1);
+      int radix;
+      bool isBigInt = false;
+      const Unit* numStart;
+      unit = getCodeUnit();
+      if (unit == 'x' || unit == 'X') {
+        radix = 16;
+        unit = getCodeUnit();
+        if (!IsAsciiHexDigit(unit)) {
+          // NOTE: |unit| may be EOF here.
+          ungetCodeUnit(unit);
+          error(JSMSG_MISSING_HEXDIGITS);
+          return badToken();
+        }
+
+        // one past the '0x'
+        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
+
+        if (!matchIntegerAfterFirstDigit(IsAsciiHexDigit, &unit)) {
+          return badToken();
+        }
+      } else if (unit == 'b' || unit == 'B') {
+        radix = 2;
+        unit = getCodeUnit();
+        if (!IsAsciiBinary(unit)) {
+          // NOTE: |unit| may be EOF here.
+          ungetCodeUnit(unit);
+          error(JSMSG_MISSING_BINARY_DIGITS);
+          return badToken();
+        }
+
+        // one past the '0b'
+        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
+
+        if (!matchIntegerAfterFirstDigit(IsAsciiBinary, &unit)) {
+          return badToken();
+        }
+      } else if (unit == 'o' || unit == 'O') {
+        radix = 8;
+        unit = getCodeUnit();
+        if (!IsAsciiOctal(unit)) {
+          // NOTE: |unit| may be EOF here.
+          ungetCodeUnit(unit);
+          error(JSMSG_MISSING_OCTAL_DIGITS);
+          return badToken();
+        }
+
+        // one past the '0o'
+        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
+
+        if (!matchIntegerAfterFirstDigit(IsAsciiOctal, &unit)) {
+          return badToken();
+        }
+      } else if (IsAsciiDigit(unit)) {
+        // Reject octal literals that appear in strict mode code.
+        if (!strictModeError(JSMSG_DEPRECATED_OCTAL_LITERAL)) {
+          return badToken();
+        }
+
+        // The above test doesn't catch a few edge cases; see
+        // |GeneralParser::maybeParseDirective|.  Record the violation so that
+        // that function can handle them.
+        anyCharsAccess().setSawDeprecatedOctalLiteral();
+
+        radix = 8;
+        // one past the '0'
+        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
+
+        bool nonOctalDecimalIntegerLiteral = false;
+        do {
+          if (unit >= '8') {
+            nonOctalDecimalIntegerLiteral = true;
+          }
+          unit = getCodeUnit();
+        } while (IsAsciiDigit(unit));
+
+        if (unit == '_') {
+          ungetCodeUnit(unit);
+          error(JSMSG_SEPARATOR_IN_ZERO_PREFIXED_NUMBER);
+          return badToken();
+        }
+
+        if (unit == 'n') {
+          ungetCodeUnit(unit);
+          error(JSMSG_BIGINT_INVALID_SYNTAX);
+          return badToken();
+        }
+
+        if (nonOctalDecimalIntegerLiteral) {
+          // Use the decimal scanner for the rest of the number.
+          return decimalNumber(unit, start, numStart, modifier, ttp);
+        }
+      } else if (unit == '_') {
+        // Give a more explicit error message when '_' is used after '0'.
+        ungetCodeUnit(unit);
+        error(JSMSG_SEPARATOR_IN_ZERO_PREFIXED_NUMBER);
+        return badToken();
+      } else {
+        // '0' not followed by [XxBbOo0-9_];  scan as a decimal number.
+        ungetCodeUnit(unit);
+        numStart = this->sourceUnits.addressOfNextCodeUnit() - 1;  // The '0'.
+        return decimalNumber('0', start, numStart, modifier, ttp);
+      }
+
+      if (unit == 'n') {
+        isBigInt = true;
+        unit = peekCodeUnit();
+      } else {
+        ungetCodeUnit(unit);
+      }
+
+      // Error if an identifier-start code point appears immediately
+      // after the number.  Somewhat surprisingly, if we don't check
+      // here, we'll never check at all.
+      if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
+        if (unicode::IsIdentifierStart(char16_t(unit))) {
+          error(JSMSG_IDSTART_AFTER_NUMBER);
+          return badToken();
+        }
+      } else if (MOZ_LIKELY(unit != EOF)) {
+        // This ignores encoding errors: subsequent caller-side code to
+        // handle source text after the number will do so.
+        PeekedCodePoint<Unit> peeked = this->sourceUnits.peekCodePoint();
+        if (!peeked.isNone() &&
+            unicode::IsIdentifierStart(peeked.codePoint())) {
+          error(JSMSG_IDSTART_AFTER_NUMBER);
+          return badToken();
+        }
+      }
+
+      if (isBigInt) {
+        return bigIntLiteral(start, modifier, ttp);
+      }
+
+      double dval;
+      if (!GetFullInteger(numStart, this->sourceUnits.addressOfNextCodeUnit(),
+                          radix, IntegerSeparatorHandling::SkipUnderscore,
+                          &dval)) {
+        ReportOutOfMemory(this->fc);
+        return badToken();
+      }
+      newNumberToken(dval, NoDecimal, start, modifier, ttp);
+      return true;
+    }
+
+    MOZ_ASSERT(c1kind == Other);
+
+    // This handles everything else.  Simple tokens distinguished solely by
+    // TokenKind should set |simpleKind| and break, to share simple-token
+    // creation code for all such tokens.  All other tokens must be handled
+    // by returning (or by continuing from the loop enclosing this).
+    //
+    TokenStart start(this->sourceUnits, -1);
+    TokenKind simpleKind;
+#ifdef DEBUG
+    simpleKind = TokenKind::Limit;  // sentinel value for code after switch
+#endif
+
+    // The block a ways above eliminated all non-ASCII, so cast to the
+    // smallest type possible to assist the C++ compiler.
+    switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit)))) {
+      case '.':
+        if (IsAsciiDigit(peekCodeUnit())) {
+          return decimalNumber('.', start,
+                               this->sourceUnits.addressOfNextCodeUnit() - 1,
+                               modifier, ttp);
+        }
+
+        unit = getCodeUnit();
+        if (unit == '.') {
+          if (matchCodeUnit('.')) {
+            simpleKind = TokenKind::TripleDot;
+            break;
+          }
+        }
+
+        // NOTE: |unit| may be EOF here.  A stray '.' at EOF would be an
+        //       error, but subsequent code will handle it.
+        ungetCodeUnit(unit);
+
+        simpleKind = TokenKind::Dot;
+        break;
+
+      case '#': {
+#ifdef ENABLE_RECORD_TUPLE
+        if (matchCodeUnit('{')) {
+          simpleKind = TokenKind::HashCurly;
+          break;
+        }
+        if (matchCodeUnit('[')) {
+          simpleKind = TokenKind::HashBracket;
+          break;
+        }
+#endif
+
+        TokenStart start(this->sourceUnits, -1);
+        const Unit* identStart = this->sourceUnits.addressOfNextCodeUnit() - 1;
+        IdentifierEscapes sawEscape;
+        if (!matchIdentifierStart(&sawEscape)) {
+          return badToken();
+        }
+        return identifierName(start, identStart, sawEscape, modifier,
+                              NameVisibility::Private, ttp);
+      }
+
+      case '=':
+        if (matchCodeUnit('=')) {
+          simpleKind = matchCodeUnit('=') ? TokenKind::StrictEq : TokenKind::Eq;
+        } else if (matchCodeUnit('>')) {
+          simpleKind = TokenKind::Arrow;
+        } else {
+          simpleKind = TokenKind::Assign;
+        }
+        break;
+
+      case '+':
+        if (matchCodeUnit('+')) {
+          simpleKind = TokenKind::Inc;
+        } else {
+          simpleKind =
+              matchCodeUnit('=') ? TokenKind::AddAssign : TokenKind::Add;
+        }
+        break;
+
+      case '\\': {
+        char32_t codePoint;
+        if (uint32_t escapeLength = matchUnicodeEscapeIdStart(&codePoint)) {
+          return identifierName(
+              start,
+              this->sourceUnits.addressOfNextCodeUnit() - escapeLength - 1,
+              IdentifierEscapes::SawUnicodeEscape, modifier,
+              NameVisibility::Public, ttp);
+        }
+
+        // We could point "into" a mistyped escape, e.g. for "\u{41H}" we
+        // could point at the 'H'.  But we don't do that now, so the code
+        // unit after the '\' isn't necessarily bad, so just point at the
+        // start of the actually-invalid escape.
+        ungetCodeUnit('\\');
+        error(JSMSG_BAD_ESCAPE);
+        return badToken();
+      }
+
+      case '|':
+        if (matchCodeUnit('|')) {
+          simpleKind = matchCodeUnit('=') ? TokenKind::OrAssign : TokenKind::Or;
+        } else {
+          simpleKind =
+              matchCodeUnit('=') ? TokenKind::BitOrAssign : TokenKind::BitOr;
+        }
+        break;
+
+      case '^':
+        simpleKind =
+            matchCodeUnit('=') ? TokenKind::BitXorAssign : TokenKind::BitXor;
+        break;
+
+      case '&':
+        if (matchCodeUnit('&')) {
+          simpleKind =
+              matchCodeUnit('=') ? TokenKind::AndAssign : TokenKind::And;
+        } else {
+          simpleKind =
+              matchCodeUnit('=') ? TokenKind::BitAndAssign : TokenKind::BitAnd;
+        }
+        break;
+
+      case '?':
+        if (matchCodeUnit('.')) {
+          unit = getCodeUnit();
+          if (IsAsciiDigit(unit)) {
+            // if the code unit is followed by a number, for example it has the
+            // following form `<...> ?.5 <..> then it should be treated as a
+            // ternary rather than as an optional chain
+            simpleKind = TokenKind::Hook;
+            ungetCodeUnit(unit);
+            ungetCodeUnit('.');
+          } else {
+            ungetCodeUnit(unit);
+            simpleKind = TokenKind::OptionalChain;
+          }
+        } else if (matchCodeUnit('?')) {
+          simpleKind = matchCodeUnit('=') ? TokenKind::CoalesceAssign
+                                          : TokenKind::Coalesce;
+        } else {
+          simpleKind = TokenKind::Hook;
+        }
+        break;
+
+      case '!':
+        if (matchCodeUnit('=')) {
+          simpleKind = matchCodeUnit('=') ? TokenKind::StrictNe : TokenKind::Ne;
+        } else {
+          simpleKind = TokenKind::Not;
+        }
+        break;
+
+      case '<':
+        if (anyCharsAccess().options().allowHTMLComments) {
+          // Treat HTML begin-comment as comment-till-end-of-line.
+          if (matchCodeUnit('!')) {
+            if (matchCodeUnit('-')) {
+              if (matchCodeUnit('-')) {
+                this->sourceUnits.consumeRestOfSingleLineComment();
+                continue;
+              }
+              ungetCodeUnit('-');
+            }
+            ungetCodeUnit('!');
+          }
+        }
+        if (matchCodeUnit('<')) {
+          simpleKind =
+              matchCodeUnit('=') ? TokenKind::LshAssign : TokenKind::Lsh;
+        } else {
+          simpleKind = matchCodeUnit('=') ? TokenKind::Le : TokenKind::Lt;
+        }
+        break;
+
+      case '>':
+        if (matchCodeUnit('>')) {
+          if (matchCodeUnit('>')) {
+            simpleKind =
+                matchCodeUnit('=') ? TokenKind::UrshAssign : TokenKind::Ursh;
+          } else {
+            simpleKind =
+                matchCodeUnit('=') ? TokenKind::RshAssign : TokenKind::Rsh;
+          }
+        } else {
+          simpleKind = matchCodeUnit('=') ? TokenKind::Ge : TokenKind::Gt;
+        }
+        break;
+
+      case '*':
+        if (matchCodeUnit('*')) {
+          simpleKind =
+              matchCodeUnit('=') ? TokenKind::PowAssign : TokenKind::Pow;
+        } else {
+          simpleKind =
+              matchCodeUnit('=') ? TokenKind::MulAssign : TokenKind::Mul;
+        }
+        break;
+
+      case '/':
+        // Look for a single-line comment.
+        if (matchCodeUnit('/')) {
+          unit = getCodeUnit();
+          if (unit == '@' || unit == '#') {
+            bool shouldWarn = unit == '@';
+            if (!getDirectives(false, shouldWarn)) {
+              return false;
+            }
+          } else {
+            // NOTE: |unit| may be EOF here.
+            ungetCodeUnit(unit);
+          }
+
+          this->sourceUnits.consumeRestOfSingleLineComment();
+          continue;
+        }
+
+        // Look for a multi-line comment.
+        if (matchCodeUnit('*')) {
+          TokenStreamAnyChars& anyChars = anyCharsAccess();
+          unsigned linenoBefore = anyChars.lineno;
+
+          do {
+            int32_t unit = getCodeUnit();
+            if (unit == EOF) {
+              error(JSMSG_UNTERMINATED_COMMENT);
+              return badToken();
+            }
+
+            if (unit == '*' && matchCodeUnit('/')) {
+              break;
+            }
+
+            if (unit == '@' || unit == '#') {
+              bool shouldWarn = unit == '@';
+              if (!getDirectives(true, shouldWarn)) {
+                return badToken();
+              }
+            } else if (MOZ_LIKELY(isAsciiCodePoint(unit))) {
+              if (!getFullAsciiCodePoint(unit)) {
+                return badToken();
+              }
+            } else {
+              char32_t codePoint;
+              if (!getNonAsciiCodePoint(unit, &codePoint)) {
+                return badToken();
+              }
+            }
+          } while (true);
+
+          if (linenoBefore != anyChars.lineno) {
+            anyChars.updateFlagsForEOL();
+          }
+
+          continue;
+        }
+
+        // Look for a regexp.
+        if (modifier == SlashIsRegExp) {
+          return regexpLiteral(start, ttp);
+        }
+
+        simpleKind = matchCodeUnit('=') ? TokenKind::DivAssign : TokenKind::Div;
+        break;
+
+      case '%':
+        simpleKind = matchCodeUnit('=') ? TokenKind::ModAssign : TokenKind::Mod;
+        break;
+
+      case '-':
+        if (matchCodeUnit('-')) {
+          if (anyCharsAccess().options().allowHTMLComments &&
+              !anyCharsAccess().flags.isDirtyLine) {
+            if (matchCodeUnit('>')) {
+              this->sourceUnits.consumeRestOfSingleLineComment();
+              continue;
+            }
+          }
+
+          simpleKind = TokenKind::Dec;
+        } else {
+          simpleKind =
+              matchCodeUnit('=') ? TokenKind::SubAssign : TokenKind::Sub;
+        }
+        break;
+
+#ifdef ENABLE_DECORATORS
+      case '@':
+        simpleKind = TokenKind::At;
+        break;
+#endif
+
+      default:
+        // We consumed a bad ASCII code point/unit.  Put it back so the
+        // error location is the bad code point.
+        ungetCodeUnit(unit);
+        reportIllegalCharacter(unit);
+        return badToken();
+    }  // switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit))))
+
+    MOZ_ASSERT(simpleKind != TokenKind::Limit,
+               "switch-statement should have set |simpleKind| before "
+               "breaking");
+
+    newSimpleToken(simpleKind, start, modifier, ttp);
+    return true;
+  } while (true);
+}
+
+template <typename Unit, class AnyCharsAccess>
+bool TokenStreamSpecific<Unit, AnyCharsAccess>::getStringOrTemplateToken(
+    char untilChar, Modifier modifier, TokenKind* out) {
+  MOZ_ASSERT(untilChar == '\'' || untilChar == '"' || untilChar == '`',
+             "unexpected string/template literal delimiter");
+
+  bool parsingTemplate = (untilChar == '`');
+  bool templateHead = false;
+
+  TokenStart start(this->sourceUnits, -1);
+  this->charBuffer.clear();
+
+  // Run the bad-token code for every path out of this function except the
+  // one success-case.
+  auto noteBadToken = MakeScopeExit([this]() { this->badToken(); });
+
+  auto ReportPrematureEndOfLiteral = [this, untilChar](unsigned errnum) {
+    // Unicode separators aren't end-of-line in template or (as of
+    // recently) string literals, so this assertion doesn't allow them.
+    MOZ_ASSERT(this->sourceUnits.atEnd() ||
+                   this->sourceUnits.peekCodeUnit() == Unit('\r') ||
+                   this->sourceUnits.peekCodeUnit() == Unit('\n'),
+               "must be parked at EOF or EOL to call this function");
+
+    // The various errors reported here include language like "in a ''
+    // literal" or similar, with '' being '', "", or `` as appropriate.
+    const char delimiters[] = {untilChar, untilChar, '\0'};
+
+    this->error(errnum, delimiters);
+    return;
+  };
+
+  // We need to detect any of these chars:  " or ', \n (or its
+  // equivalents), \\, EOF.  Because we detect EOL sequences here and
+  // put them back immediately, we can use getCodeUnit().
+  int32_t unit;
+  while ((unit = getCodeUnit()) != untilChar) {
+    if (unit == EOF) {
+      ReportPrematureEndOfLiteral(JSMSG_EOF_BEFORE_END_OF_LITERAL);
+      return false;
+    }
+
+    // Non-ASCII code points are always directly appended -- even
+    // U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR that are
+    // ordinarily LineTerminatorSequences.  (They contribute their literal
+    // values to template and [as of recently] string literals, but they're
+    // line terminators when computing line/column coordinates.)  Handle
+    // the non-ASCII case early for readability.
+    if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
+      char32_t cp;
+      if (!getNonAsciiCodePointDontNormalize(toUnit(unit), &cp)) {
+        return false;
+      }
+
+      if (MOZ_UNLIKELY(cp == unicode::LINE_SEPARATOR ||
+                       cp == unicode::PARA_SEPARATOR)) {
+        if (!updateLineInfoForEOL()) {
+          return false;
+        }
+
+        anyCharsAccess().updateFlagsForEOL();
+      } else {
+        MOZ_ASSERT(!IsLineTerminator(cp));
+      }
+
+      if (!AppendCodePointToCharBuffer(this->charBuffer, cp)) {
+        return false;
+      }
+
+      continue;
+    }
+
+    if (unit == '\\') {
+      // When parsing templates, we don't immediately report errors for
+      // invalid escapes; these are handled by the parser.  We don't
+      // append to charBuffer in those cases because it won't be read.
+      unit = getCodeUnit();
+      if (unit == EOF) {
+        ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
+        return false;
+      }
+
+      // Non-ASCII |unit| isn't handled by code after this, so dedicate
+      // an unlikely special-case to it and then continue.
+      if (MOZ_UNLIKELY(!isAsciiCodePoint(unit))) {
+        char32_t codePoint;
+        if (!getNonAsciiCodePoint(unit, &codePoint)) {
+          return false;
+        }
+
+        // If we consumed U+2028 LINE SEPARATOR or U+2029 PARAGRAPH
+        // SEPARATOR, they'll be normalized to '\n'.  '\' followed by
+        // LineContinuation represents no code points, so don't append
+        // in this case.
+        if (codePoint != '\n') {
+          if (!AppendCodePointToCharBuffer(this->charBuffer, codePoint)) {
+            return false;
+          }
+        }
+
+        continue;
+      }
+
+      // The block above eliminated all non-ASCII, so cast to the
+      // smallest type possible to assist the C++ compiler.
+      switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit)))) {
+        case 'b':
+          unit = '\b';
+          break;
+        case 'f':
+          unit = '\f';
+          break;
+        case 'n':
+          unit = '\n';
+          break;
+        case 'r':
+          unit = '\r';
+          break;
+        case 't':
+          unit = '\t';
+          break;
+        case 'v':
+          unit = '\v';
+          break;
+
+        case '\r':
+          matchLineTerminator('\n');
+          [[fallthrough]];
+        case '\n': {
+          // LineContinuation represents no code points.  We're manually
+          // consuming a LineTerminatorSequence, so we must manually
+          // update line/column info.
+          if (!updateLineInfoForEOL()) {
+            return false;
+          }
+
+          continue;
+        }
+
+        // Unicode character specification.
+        case 'u': {
+          int32_t c2 = getCodeUnit();
+          if (c2 == EOF) {
+            ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
+            return false;
+          }
+
+          // First handle a delimited Unicode escape, e.g. \u{1F4A9}.
+          if (c2 == '{') {
+            uint32_t start = this->sourceUnits.offset() - 3;
+            uint32_t code = 0;
+            bool first = true;
+            bool valid = true;
+            do {
+              int32_t u3 = getCodeUnit();
+              if (u3 == EOF) {
+                if (parsingTemplate) {
+                  TokenStreamAnyChars& anyChars = anyCharsAccess();
+                  anyChars.setInvalidTemplateEscape(start,
+                                                    InvalidEscapeType::Unicode);
+                  valid = false;
+                  break;
+                }
+                reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
+                return false;
+              }
+              if (u3 == '}') {
+                if (first) {
+                  if (parsingTemplate) {
+                    TokenStreamAnyChars& anyChars = anyCharsAccess();
+                    anyChars.setInvalidTemplateEscape(
+                        start, InvalidEscapeType::Unicode);
+                    valid = false;
+                    break;
+                  }
+                  reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
+                  return false;
+                }
+                break;
+              }
+
+              // Beware: |u3| may be a non-ASCII code point here; if
+              // so it'll pass into this |if|-block.
+              if (!IsAsciiHexDigit(u3)) {
+                if (parsingTemplate) {
+                  // We put the code unit back so that we read it
+                  // on the next pass, which matters if it was
+                  // '`' or '\'.
+                  ungetCodeUnit(u3);
+
+                  TokenStreamAnyChars& anyChars = anyCharsAccess();
+                  anyChars.setInvalidTemplateEscape(start,
+                                                    InvalidEscapeType::Unicode);
+                  valid = false;
+                  break;
+                }
+                reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
+                return false;
+              }
+
+              code = (code << 4) | AsciiAlphanumericToNumber(u3);
+              if (code > unicode::NonBMPMax) {
+                if (parsingTemplate) {
+                  TokenStreamAnyChars& anyChars = anyCharsAccess();
+                  anyChars.setInvalidTemplateEscape(
+                      start + 3, InvalidEscapeType::UnicodeOverflow);
+                  valid = false;
+                  break;
+                }
+                reportInvalidEscapeError(start + 3,
+                                         InvalidEscapeType::UnicodeOverflow);
+                return false;
+              }
+
+              first = false;
+            } while (true);
+
+            if (!valid) {
+              continue;
+            }
+
+            MOZ_ASSERT(code <= unicode::NonBMPMax);
+            if (!AppendCodePointToCharBuffer(this->charBuffer, code)) {
+              return false;
+            }
+
+            continue;
+          }  // end of delimited Unicode escape handling
+
+          // Otherwise it must be a fixed-length \uXXXX Unicode escape.
+          // If it isn't, this is usually an error -- but if this is a
+          // template literal, we must defer error reporting because
+          // malformed escapes are okay in *tagged* template literals.
+          char16_t v;
+          if (IsAsciiHexDigit(c2) && this->sourceUnits.matchHexDigits(3, &v)) {
+            unit = (AsciiAlphanumericToNumber(c2) << 12) | v;
+          } else {
+            // Beware: |c2| may not be an ASCII code point here!
+            ungetCodeUnit(c2);
+            uint32_t start = this->sourceUnits.offset() - 2;
+            if (parsingTemplate) {
+              TokenStreamAnyChars& anyChars = anyCharsAccess();
+              anyChars.setInvalidTemplateEscape(start,
+                                                InvalidEscapeType::Unicode);
+              continue;
+            }
+            reportInvalidEscapeError(start, InvalidEscapeType::Unicode);
+            return false;
+          }
+          break;
+        }  // case 'u'
+
+        // Hexadecimal character specification.
+        case 'x': {
+          char16_t v;
+          if (this->sourceUnits.matchHexDigits(2, &v)) {
+            unit = v;
+          } else {
+            uint32_t start = this->sourceUnits.offset() - 2;
+            if (parsingTemplate) {
+              TokenStreamAnyChars& anyChars = anyCharsAccess();
+              anyChars.setInvalidTemplateEscape(start,
+                                                InvalidEscapeType::Hexadecimal);
+              continue;
+            }
+            reportInvalidEscapeError(start, InvalidEscapeType::Hexadecimal);
+            return false;
+          }
+          break;
+        }
+
+        default: {
+          if (!IsAsciiOctal(unit)) {
+            // \8 or \9 in an untagged template literal is a syntax error,
+            // reported in GeneralParser::noSubstitutionUntaggedTemplate.
+            //
+            // Tagged template literals, however, may contain \8 and \9.  The
+            // "cooked" representation of such a part will be |undefined|, and
+            // the "raw" representation will contain the literal characters.
+            //
+            //   function f(parts) {
+            //     assertEq(parts[0], undefined);
+            //     assertEq(parts.raw[0], "\\8");
+            //     return "composed";
+            //   }
+            //   assertEq(f`\8`, "composed");
+            if (unit == '8' || unit == '9') {
+              TokenStreamAnyChars& anyChars = anyCharsAccess();
+              if (parsingTemplate) {
+                anyChars.setInvalidTemplateEscape(
+                    this->sourceUnits.offset() - 2,
+                    InvalidEscapeType::EightOrNine);
+                continue;
+              }
+
+              // \8 and \9 are forbidden in string literals in strict mode code.
+              if (!strictModeError(JSMSG_DEPRECATED_EIGHT_OR_NINE_ESCAPE)) {
+                return false;
+              }
+
+              // The above test doesn't catch a few edge cases; see
+              // |GeneralParser::maybeParseDirective|.  Record the violation so
+              // that that function can handle them.
+              anyChars.setSawDeprecatedEightOrNineEscape();
+            }
+            break;
+          }
+
+          // Octal character specification.
+          int32_t val = AsciiOctalToNumber(unit);
+
+          unit = peekCodeUnit();
+          if (MOZ_UNLIKELY(unit == EOF)) {
+            ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
+            return false;
+          }
+
+          // Strict mode code allows only \0 followed by a non-digit.
+          if (val != 0 || IsAsciiDigit(unit)) {
+            TokenStreamAnyChars& anyChars = anyCharsAccess();
+            if (parsingTemplate) {
+              anyChars.setInvalidTemplateEscape(this->sourceUnits.offset() - 2,
+                                                InvalidEscapeType::Octal);
+              continue;
+            }
+
+            if (!strictModeError(JSMSG_DEPRECATED_OCTAL_ESCAPE)) {
+              return false;
+            }
+
+            // The above test doesn't catch a few edge cases; see
+            // |GeneralParser::maybeParseDirective|.  Record the violation so
+            // that that function can handle them.
+            anyChars.setSawDeprecatedOctalEscape();
+          }
+
+          if (IsAsciiOctal(unit)) {
+            val = 8 * val + AsciiOctalToNumber(unit);
+            consumeKnownCodeUnit(unit);
+
+            unit = peekCodeUnit();
+            if (MOZ_UNLIKELY(unit == EOF)) {
+              ReportPrematureEndOfLiteral(JSMSG_EOF_IN_ESCAPE_IN_LITERAL);
+              return false;
+            }
+
+            if (IsAsciiOctal(unit)) {
+              int32_t save = val;
+              val = 8 * val + AsciiOctalToNumber(unit);
+              if (val <= 0xFF) {
+                consumeKnownCodeUnit(unit);
+              } else {
+                val = save;
+              }
+            }
+          }
+
+          unit = char16_t(val);
+          break;
+        }  // default
+      }    // switch (AssertedCast<uint8_t>(CodeUnitValue(toUnit(unit))))
+
+      if (!this->charBuffer.append(unit)) {
+        return false;
+      }
+
+      continue;
+    }  // (unit == '\\')
+
+    if (unit == '\r' || unit == '\n') {
+      if (!parsingTemplate) {
+        // String literals don't allow ASCII line breaks.
+        ungetCodeUnit(unit);
+        ReportPrematureEndOfLiteral(JSMSG_EOL_BEFORE_END_OF_STRING);
+        return false;
+      }
+
+      if (unit == '\r') {
+        unit = '\n';
+        matchLineTerminator('\n');
+      }
+
+      if (!updateLineInfoForEOL()) {
+        return false;
+      }
+
+      anyCharsAccess().updateFlagsForEOL();
+    } else if (parsingTemplate && unit == '$' && matchCodeUnit('{')) {
+      templateHead = true;
+      break;
+    }
+
+    if (!this->charBuffer.append(unit)) {
+      return false;
+    }
+  }
+
+  TaggedParserAtomIndex atom = drainCharBufferIntoAtom();
+  if (!atom) {
+    return false;
+  }
+
+  noteBadToken.release();
+
+  MOZ_ASSERT_IF(!parsingTemplate, !templateHead);
+
+  TokenKind kind = !parsingTemplate ? TokenKind::String
+                   : templateHead   ? TokenKind::TemplateHead
+                                    : TokenKind::NoSubsTemplate;
+  newAtomToken(kind, atom, start, modifier, out);
+  return true;
+}
+
+const char* TokenKindToDesc(TokenKind tt) {
+  switch (tt) {
+#define EMIT_CASE(name, desc) \
+  case TokenKind::name:       \
+    return desc;
+    FOR_EACH_TOKEN_KIND(EMIT_CASE)
+#undef EMIT_CASE
+    case TokenKind::Limit:
+      MOZ_ASSERT_UNREACHABLE("TokenKind::Limit should not be passed.");
+      break;
+  }
+
+  return "<bad TokenKind>";
+}
+
+#ifdef DEBUG
+const char* TokenKindToString(TokenKind tt) {
+  switch (tt) {
+#  define EMIT_CASE(name, desc) \
+    case TokenKind::name:       \
+      return "TokenKind::" #name;
+    FOR_EACH_TOKEN_KIND(EMIT_CASE)
+#  undef EMIT_CASE
+    case TokenKind::Limit:
+      break;
+  }
+
+  return "<bad TokenKind>";
+}
+#endif
+
+template class TokenStreamCharsBase<Utf8Unit>;
+template class TokenStreamCharsBase<char16_t>;
+
+template class GeneralTokenStreamChars<char16_t, TokenStreamAnyCharsAccess>;
+template class TokenStreamChars<char16_t, TokenStreamAnyCharsAccess>;
+template class TokenStreamSpecific<char16_t, TokenStreamAnyCharsAccess>;
+
+template class GeneralTokenStreamChars<
+    Utf8Unit, ParserAnyCharsAccess<GeneralParser<FullParseHandler, Utf8Unit>>>;
+template class GeneralTokenStreamChars<
+    Utf8Unit,
+    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, Utf8Unit>>>;
+template class GeneralTokenStreamChars<
+    char16_t, ParserAnyCharsAccess<GeneralParser<FullParseHandler, char16_t>>>;
+template class GeneralTokenStreamChars<
+    char16_t,
+    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, char16_t>>>;
+
+template class TokenStreamChars<
+    Utf8Unit, ParserAnyCharsAccess<GeneralParser<FullParseHandler, Utf8Unit>>>;
+template class TokenStreamChars<
+    Utf8Unit,
+    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, Utf8Unit>>>;
+template class TokenStreamChars<
+    char16_t, ParserAnyCharsAccess<GeneralParser<FullParseHandler, char16_t>>>;
+template class TokenStreamChars<
+    char16_t,
+    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, char16_t>>>;
+
+template class TokenStreamSpecific<
+    Utf8Unit, ParserAnyCharsAccess<GeneralParser<FullParseHandler, Utf8Unit>>>;
+template class TokenStreamSpecific<
+    Utf8Unit,
+    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, Utf8Unit>>>;
+template class TokenStreamSpecific<
+    char16_t, ParserAnyCharsAccess<GeneralParser<FullParseHandler, char16_t>>>;
+template class TokenStreamSpecific<
+    char16_t,
+    ParserAnyCharsAccess<GeneralParser<SyntaxParseHandler, char16_t>>>;
+
+}  // namespace frontend
+
+}  // namespace js
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /js/src/frontend/TokenStream.cpp
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip