summaryrefslogtreecommitdiffstats
path: root/xpcom/ds/Tokenizer.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /xpcom/ds/Tokenizer.cpp
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'xpcom/ds/Tokenizer.cpp')
-rw-r--r--xpcom/ds/Tokenizer.cpp805
1 files changed, 805 insertions, 0 deletions
diff --git a/xpcom/ds/Tokenizer.cpp b/xpcom/ds/Tokenizer.cpp
new file mode 100644
index 0000000000..3b0f6b02dd
--- /dev/null
+++ b/xpcom/ds/Tokenizer.cpp
@@ -0,0 +1,805 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Tokenizer.h"
+
+#include "nsUnicharUtils.h"
+#include <algorithm>
+
+namespace mozilla {
+
+template <>
+char const TokenizerBase<char>::sWhitespaces[] = {' ', '\t', 0};
+template <>
+char16_t const TokenizerBase<char16_t>::sWhitespaces[3] = {' ', '\t', 0};
+
+template <typename TChar>
+static bool contains(TChar const* const list, TChar const needle) {
+ for (TChar const* c = list; *c; ++c) {
+ if (needle == *c) {
+ return true;
+ }
+ }
+ return false;
+}
+
+template <typename TChar>
+TTokenizer<TChar>::TTokenizer(const typename base::TAString& aSource,
+ const TChar* aWhitespaces,
+ const TChar* aAdditionalWordChars)
+ : TokenizerBase<TChar>(aWhitespaces, aAdditionalWordChars) {
+ base::mInputFinished = true;
+ aSource.BeginReading(base::mCursor);
+ mRecord = mRollback = base::mCursor;
+ aSource.EndReading(base::mEnd);
+}
+
+template <typename TChar>
+TTokenizer<TChar>::TTokenizer(const TChar* aSource, const TChar* aWhitespaces,
+ const TChar* aAdditionalWordChars)
+ : TTokenizer(typename base::TDependentString(aSource), aWhitespaces,
+ aAdditionalWordChars) {}
+
+template <typename TChar>
+bool TTokenizer<TChar>::Next(typename base::Token& aToken) {
+ if (!base::HasInput()) {
+ base::mHasFailed = true;
+ return false;
+ }
+
+ mRollback = base::mCursor;
+ base::mCursor = base::Parse(aToken);
+
+ base::AssignFragment(aToken, mRollback, base::mCursor);
+
+ base::mPastEof = aToken.Type() == base::TOKEN_EOF;
+ base::mHasFailed = false;
+ return true;
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::Check(const typename base::TokenType aTokenType,
+ typename base::Token& aResult) {
+ if (!base::HasInput()) {
+ base::mHasFailed = true;
+ return false;
+ }
+
+ typename base::TAString::const_char_iterator next = base::Parse(aResult);
+ if (aTokenType != aResult.Type()) {
+ base::mHasFailed = true;
+ return false;
+ }
+
+ mRollback = base::mCursor;
+ base::mCursor = next;
+
+ base::AssignFragment(aResult, mRollback, base::mCursor);
+
+ base::mPastEof = aResult.Type() == base::TOKEN_EOF;
+ base::mHasFailed = false;
+ return true;
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::Check(const typename base::Token& aToken) {
+#ifdef DEBUG
+ base::Validate(aToken);
+#endif
+
+ if (!base::HasInput()) {
+ base::mHasFailed = true;
+ return false;
+ }
+
+ typename base::Token parsed;
+ typename base::TAString::const_char_iterator next = base::Parse(parsed);
+ if (!aToken.Equals(parsed)) {
+ base::mHasFailed = true;
+ return false;
+ }
+
+ mRollback = base::mCursor;
+ base::mCursor = next;
+ base::mPastEof = parsed.Type() == base::TOKEN_EOF;
+ base::mHasFailed = false;
+ return true;
+}
+
+template <typename TChar>
+void TTokenizer<TChar>::SkipWhites(WhiteSkipping aIncludeNewLines) {
+ if (!CheckWhite() &&
+ (aIncludeNewLines == DONT_INCLUDE_NEW_LINE || !CheckEOL())) {
+ return;
+ }
+
+ typename base::TAString::const_char_iterator rollback = mRollback;
+ while (CheckWhite() || (aIncludeNewLines == INCLUDE_NEW_LINE && CheckEOL())) {
+ }
+
+ base::mHasFailed = false;
+ mRollback = rollback;
+}
+
+template <typename TChar>
+void TTokenizer<TChar>::SkipUntil(typename base::Token const& aToken) {
+ typename base::TAString::const_char_iterator rollback = base::mCursor;
+ const typename base::Token eof = base::Token::EndOfFile();
+
+ typename base::Token t;
+ while (Next(t)) {
+ if (aToken.Equals(t) || eof.Equals(t)) {
+ Rollback();
+ break;
+ }
+ }
+
+ mRollback = rollback;
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::CheckChar(bool (*aClassifier)(const TChar aChar)) {
+ if (!aClassifier) {
+ MOZ_ASSERT(false);
+ return false;
+ }
+
+ if (!base::HasInput() || base::mCursor == base::mEnd) {
+ base::mHasFailed = true;
+ return false;
+ }
+
+ if (!aClassifier(*base::mCursor)) {
+ base::mHasFailed = true;
+ return false;
+ }
+
+ mRollback = base::mCursor;
+ ++base::mCursor;
+ base::mHasFailed = false;
+ return true;
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::CheckPhrase(const typename base::TAString& aPhrase) {
+ if (!base::HasInput()) {
+ return false;
+ }
+
+ typedef typename base::TAString::const_char_iterator Cursor;
+
+ TTokenizer<TChar> pattern(aPhrase);
+ MOZ_ASSERT(!pattern.CheckEOF(),
+ "This will return true but won't shift the Tokenizer's cursor");
+
+ return [&](Cursor cursor, Cursor rollback) mutable {
+ while (true) {
+ if (pattern.CheckEOF()) {
+ base::mHasFailed = false;
+ mRollback = cursor;
+ return true;
+ }
+
+ typename base::Token t1, t2;
+ Unused << Next(t1);
+ Unused << pattern.Next(t2);
+ if (t1.Type() == t2.Type() && t1.Fragment().Equals(t2.Fragment())) {
+ continue;
+ }
+
+ break;
+ }
+
+ base::mHasFailed = true;
+ base::mPastEof = false;
+ base::mCursor = cursor;
+ mRollback = rollback;
+ return false;
+ }(base::mCursor, mRollback);
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::ReadChar(TChar* aValue) {
+ MOZ_RELEASE_ASSERT(aValue);
+
+ typename base::Token t;
+ if (!Check(base::TOKEN_CHAR, t)) {
+ return false;
+ }
+
+ *aValue = t.AsChar();
+ return true;
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::ReadChar(bool (*aClassifier)(const TChar aChar),
+ TChar* aValue) {
+ MOZ_RELEASE_ASSERT(aValue);
+
+ if (!CheckChar(aClassifier)) {
+ return false;
+ }
+
+ *aValue = *mRollback;
+ return true;
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::ReadWord(typename base::TAString& aValue) {
+ typename base::Token t;
+ if (!Check(base::TOKEN_WORD, t)) {
+ return false;
+ }
+
+ aValue.Assign(t.AsString());
+ return true;
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::ReadWord(typename base::TDependentSubstring& aValue) {
+ typename base::Token t;
+ if (!Check(base::TOKEN_WORD, t)) {
+ return false;
+ }
+
+ aValue.Rebind(t.AsString().BeginReading(), t.AsString().Length());
+ return true;
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
+ typename base::TAString& aResult,
+ ClaimInclusion aInclude) {
+ typename base::TDependentSubstring substring;
+ bool rv = ReadUntil(aToken, substring, aInclude);
+ aResult.Assign(substring);
+ return rv;
+}
+
+template <typename TChar>
+bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
+ typename base::TDependentSubstring& aResult,
+ ClaimInclusion aInclude) {
+ typename base::TAString::const_char_iterator record = mRecord;
+ Record();
+ typename base::TAString::const_char_iterator rollback = mRollback =
+ base::mCursor;
+
+ bool found = false;
+ typename base::Token t;
+ while (Next(t)) {
+ if (aToken.Equals(t)) {
+ found = true;
+ break;
+ }
+ if (t.Equals(base::Token::EndOfFile())) {
+ // We don't want to eat it.
+ Rollback();
+ break;
+ }
+ }
+
+ Claim(aResult, aInclude);
+ mRollback = rollback;
+ mRecord = record;
+ return found;
+}
+
+template <typename TChar>
+void TTokenizer<TChar>::Rollback() {
+ MOZ_ASSERT(base::mCursor > mRollback || base::mPastEof, "TODO!!!");
+
+ base::mPastEof = false;
+ base::mHasFailed = false;
+ base::mCursor = mRollback;
+}
+
+template <typename TChar>
+void TTokenizer<TChar>::Record(ClaimInclusion aInclude) {
+ mRecord = aInclude == INCLUDE_LAST ? mRollback : base::mCursor;
+}
+
+template <typename TChar>
+void TTokenizer<TChar>::Claim(typename base::TAString& aResult,
+ ClaimInclusion aInclusion) {
+ typename base::TAString::const_char_iterator close =
+ aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
+ aResult.Assign(Substring(mRecord, close));
+}
+
+template <typename TChar>
+void TTokenizer<TChar>::Claim(typename base::TDependentSubstring& aResult,
+ ClaimInclusion aInclusion) {
+ typename base::TAString::const_char_iterator close =
+ aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
+
+ MOZ_RELEASE_ASSERT(close >= mRecord, "Overflow!");
+ aResult.Rebind(mRecord, close - mRecord);
+}
+
+// TokenizerBase
+
+template <typename TChar>
+TokenizerBase<TChar>::TokenizerBase(const TChar* aWhitespaces,
+ const TChar* aAdditionalWordChars)
+ : mPastEof(false),
+ mHasFailed(false),
+ mInputFinished(true),
+ mMode(Mode::FULL),
+ mMinRawDelivery(1024),
+ mWhitespaces(aWhitespaces ? aWhitespaces : sWhitespaces),
+ mAdditionalWordChars(aAdditionalWordChars),
+ mCursor(nullptr),
+ mEnd(nullptr),
+ mNextCustomTokenID(TOKEN_CUSTOM0) {}
+
+template <typename TChar>
+auto TokenizerBase<TChar>::AddCustomToken(const TAString& aValue,
+ ECaseSensitivity aCaseInsensitivity,
+ bool aEnabled) -> Token {
+ MOZ_ASSERT(!aValue.IsEmpty());
+
+ UniquePtr<Token>& t = *mCustomTokens.AppendElement();
+ t = MakeUnique<Token>();
+
+ t->mType = static_cast<TokenType>(++mNextCustomTokenID);
+ t->mCustomCaseInsensitivity = aCaseInsensitivity;
+ t->mCustomEnabled = aEnabled;
+ t->mCustom.Assign(aValue);
+ return *t;
+}
+
+template <typename TChar>
+void TokenizerBase<TChar>::RemoveCustomToken(Token& aToken) {
+ if (aToken.mType == TOKEN_UNKNOWN) {
+ // Already removed
+ return;
+ }
+
+ for (UniquePtr<Token> const& custom : mCustomTokens) {
+ if (custom->mType == aToken.mType) {
+ mCustomTokens.RemoveElement(custom);
+ aToken.mType = TOKEN_UNKNOWN;
+ return;
+ }
+ }
+
+ MOZ_ASSERT(false, "Token to remove not found");
+}
+
+template <typename TChar>
+void TokenizerBase<TChar>::EnableCustomToken(Token const& aToken,
+ bool aEnabled) {
+ if (aToken.mType == TOKEN_UNKNOWN) {
+ // Already removed
+ return;
+ }
+
+ for (UniquePtr<Token> const& custom : mCustomTokens) {
+ if (custom->Type() == aToken.Type()) {
+ // This effectively destroys the token instance.
+ custom->mCustomEnabled = aEnabled;
+ return;
+ }
+ }
+
+ MOZ_ASSERT(false, "Token to change not found");
+}
+
+template <typename TChar>
+void TokenizerBase<TChar>::SetTokenizingMode(Mode aMode) {
+ mMode = aMode;
+}
+
+template <typename TChar>
+bool TokenizerBase<TChar>::HasFailed() const {
+ return mHasFailed;
+}
+
+template <typename TChar>
+bool TokenizerBase<TChar>::HasInput() const {
+ return !mPastEof;
+}
+
+template <typename TChar>
+auto TokenizerBase<TChar>::Parse(Token& aToken) const ->
+ typename TAString::const_char_iterator {
+ if (mCursor == mEnd) {
+ if (!mInputFinished) {
+ return mCursor;
+ }
+
+ aToken = Token::EndOfFile();
+ return mEnd;
+ }
+
+ MOZ_RELEASE_ASSERT(mEnd >= mCursor, "Overflow!");
+ typename TAString::size_type available = mEnd - mCursor;
+
+ uint32_t longestCustom = 0;
+ for (UniquePtr<Token> const& custom : mCustomTokens) {
+ if (IsCustom(mCursor, *custom, &longestCustom)) {
+ aToken = *custom;
+ return mCursor + custom->mCustom.Length();
+ }
+ }
+
+ if (!mInputFinished && available < longestCustom) {
+ // Not enough data to deterministically decide.
+ return mCursor;
+ }
+
+ typename TAString::const_char_iterator next = mCursor;
+
+ if (mMode == Mode::CUSTOM_ONLY) {
+ // We have to do a brute-force search for all of the enabled custom
+ // tokens.
+ while (next < mEnd) {
+ ++next;
+ for (UniquePtr<Token> const& custom : mCustomTokens) {
+ if (IsCustom(next, *custom)) {
+ aToken = Token::Raw();
+ return next;
+ }
+ }
+ }
+
+ if (mInputFinished) {
+ // End of the data reached.
+ aToken = Token::Raw();
+ return next;
+ }
+
+ if (longestCustom < available && available > mMinRawDelivery) {
+ // We can return some data w/o waiting for either a custom token
+ // or call to FinishData() when we leave the tail where all the
+ // custom tokens potentially fit, so we can't lose only partially
+ // delivered tokens. This preserves reasonable granularity.
+ aToken = Token::Raw();
+ return mEnd - longestCustom + 1;
+ }
+
+ // Not enough data to deterministically decide.
+ return mCursor;
+ }
+
+ enum State {
+ PARSE_INTEGER,
+ PARSE_WORD,
+ PARSE_CRLF,
+ PARSE_LF,
+ PARSE_WS,
+ PARSE_CHAR,
+ } state;
+
+ if (IsWordFirst(*next)) {
+ state = PARSE_WORD;
+ } else if (IsNumber(*next)) {
+ state = PARSE_INTEGER;
+ } else if (contains(mWhitespaces, *next)) { // not UTF-8 friendly?
+ state = PARSE_WS;
+ } else if (*next == '\r') {
+ state = PARSE_CRLF;
+ } else if (*next == '\n') {
+ state = PARSE_LF;
+ } else {
+ state = PARSE_CHAR;
+ }
+
+ mozilla::CheckedUint64 resultingNumber = 0;
+
+ while (next < mEnd) {
+ switch (state) {
+ case PARSE_INTEGER:
+ // Keep it simple for now
+ resultingNumber *= 10;
+ resultingNumber += static_cast<uint64_t>(*next - '0');
+
+ ++next;
+ if (IsPending(next)) {
+ break;
+ }
+ if (IsEnd(next) || !IsNumber(*next)) {
+ if (!resultingNumber.isValid()) {
+ aToken = Token::Error();
+ } else {
+ aToken = Token::Number(resultingNumber.value());
+ }
+ return next;
+ }
+ break;
+
+ case PARSE_WORD:
+ ++next;
+ if (IsPending(next)) {
+ break;
+ }
+ if (IsEnd(next) || !IsWord(*next)) {
+ aToken = Token::Word(Substring(mCursor, next));
+ return next;
+ }
+ break;
+
+ case PARSE_CRLF:
+ ++next;
+ if (IsPending(next)) {
+ break;
+ }
+ if (!IsEnd(next) && *next == '\n') { // LF is optional
+ ++next;
+ }
+ aToken = Token::NewLine();
+ return next;
+
+ case PARSE_LF:
+ ++next;
+ aToken = Token::NewLine();
+ return next;
+
+ case PARSE_WS:
+ ++next;
+ aToken = Token::Whitespace();
+ return next;
+
+ case PARSE_CHAR:
+ ++next;
+ aToken = Token::Char(*mCursor);
+ return next;
+ } // switch (state)
+ } // while (next < end)
+
+ MOZ_ASSERT(!mInputFinished);
+ return mCursor;
+}
+
+template <typename TChar>
+bool TokenizerBase<TChar>::IsEnd(
+ const typename TAString::const_char_iterator& caret) const {
+ return caret == mEnd;
+}
+
+template <typename TChar>
+bool TokenizerBase<TChar>::IsPending(
+ const typename TAString::const_char_iterator& caret) const {
+ return IsEnd(caret) && !mInputFinished;
+}
+
+template <typename TChar>
+bool TokenizerBase<TChar>::IsWordFirst(const TChar aInput) const {
+ // TODO: make this fully work with unicode
+ return (ToLowerCase(static_cast<uint32_t>(aInput)) !=
+ ToUpperCase(static_cast<uint32_t>(aInput))) ||
+ '_' == aInput ||
+ (mAdditionalWordChars ? contains(mAdditionalWordChars, aInput)
+ : false);
+}
+
+template <typename TChar>
+bool TokenizerBase<TChar>::IsWord(const TChar aInput) const {
+ return IsWordFirst(aInput) || IsNumber(aInput);
+}
+
+template <typename TChar>
+bool TokenizerBase<TChar>::IsNumber(const TChar aInput) const {
+ // TODO: are there unicode numbers?
+ return aInput >= '0' && aInput <= '9';
+}
+
+template <typename TChar>
+bool TokenizerBase<TChar>::IsCustom(
+ const typename TAString::const_char_iterator& caret,
+ const Token& aCustomToken, uint32_t* aLongest) const {
+ MOZ_ASSERT(aCustomToken.mType > TOKEN_CUSTOM0);
+ if (!aCustomToken.mCustomEnabled) {
+ return false;
+ }
+
+ if (aLongest) {
+ *aLongest = std::max<uint32_t>(*aLongest, aCustomToken.mCustom.Length());
+ }
+
+ // This is not very likely to happen according to how we call this method
+ // and since it's on a hot path, it's just a diagnostic assert,
+ // not a release assert.
+ MOZ_DIAGNOSTIC_ASSERT(mEnd >= caret, "Overflow?");
+ uint32_t inputLength = mEnd - caret;
+ if (aCustomToken.mCustom.Length() > inputLength) {
+ return false;
+ }
+
+ TDependentSubstring inputFragment(caret, aCustomToken.mCustom.Length());
+ if (aCustomToken.mCustomCaseInsensitivity == CASE_INSENSITIVE) {
+ if constexpr (std::is_same_v<TChar, char>) {
+ return inputFragment.Equals(aCustomToken.mCustom,
+ nsCaseInsensitiveUTF8StringComparator);
+ } else {
+ return inputFragment.Equals(aCustomToken.mCustom,
+ nsCaseInsensitiveStringComparator);
+ }
+ }
+ return inputFragment.Equals(aCustomToken.mCustom);
+}
+
+template <typename TChar>
+void TokenizerBase<TChar>::AssignFragment(
+ Token& aToken, typename TAString::const_char_iterator begin,
+ typename TAString::const_char_iterator end) {
+ aToken.AssignFragment(begin, end);
+}
+
+#ifdef DEBUG
+
+template <typename TChar>
+void TokenizerBase<TChar>::Validate(Token const& aToken) {
+ if (aToken.Type() == TOKEN_WORD) {
+ typename TAString::const_char_iterator c = aToken.AsString().BeginReading();
+ typename TAString::const_char_iterator e = aToken.AsString().EndReading();
+
+ if (c < e) {
+ MOZ_ASSERT(IsWordFirst(*c));
+ while (++c < e) {
+ MOZ_ASSERT(IsWord(*c));
+ }
+ }
+ }
+}
+
+#endif
+
+// TokenizerBase::Token
+
+template <typename TChar>
+TokenizerBase<TChar>::Token::Token()
+ : mType(TOKEN_UNKNOWN),
+ mChar(0),
+ mInteger(0),
+ mCustomCaseInsensitivity(CASE_SENSITIVE),
+ mCustomEnabled(false) {}
+
+template <typename TChar>
+TokenizerBase<TChar>::Token::Token(const Token& aOther)
+ : mType(aOther.mType),
+ mCustom(aOther.mCustom),
+ mChar(aOther.mChar),
+ mInteger(aOther.mInteger),
+ mCustomCaseInsensitivity(aOther.mCustomCaseInsensitivity),
+ mCustomEnabled(aOther.mCustomEnabled) {
+ if (mType == TOKEN_WORD || mType > TOKEN_CUSTOM0) {
+ mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
+ }
+}
+
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::operator=(const Token& aOther) -> Token& {
+ mType = aOther.mType;
+ mCustom = aOther.mCustom;
+ mChar = aOther.mChar;
+ mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
+ mInteger = aOther.mInteger;
+ mCustomCaseInsensitivity = aOther.mCustomCaseInsensitivity;
+ mCustomEnabled = aOther.mCustomEnabled;
+ return *this;
+}
+
+template <typename TChar>
+void TokenizerBase<TChar>::Token::AssignFragment(
+ typename TAString::const_char_iterator begin,
+ typename TAString::const_char_iterator end) {
+ MOZ_RELEASE_ASSERT(end >= begin, "Overflow!");
+ mFragment.Rebind(begin, end - begin);
+}
+
+// static
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::Raw() -> Token {
+ Token t;
+ t.mType = TOKEN_RAW;
+ return t;
+}
+
+// static
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::Word(TAString const& aValue) -> Token {
+ Token t;
+ t.mType = TOKEN_WORD;
+ t.mWord.Rebind(aValue.BeginReading(), aValue.Length());
+ return t;
+}
+
+// static
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::Char(TChar const aValue) -> Token {
+ Token t;
+ t.mType = TOKEN_CHAR;
+ t.mChar = aValue;
+ return t;
+}
+
+// static
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::Number(uint64_t const aValue) -> Token {
+ Token t;
+ t.mType = TOKEN_INTEGER;
+ t.mInteger = aValue;
+ return t;
+}
+
+// static
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::Whitespace() -> Token {
+ Token t;
+ t.mType = TOKEN_WS;
+ t.mChar = '\0';
+ return t;
+}
+
+// static
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::NewLine() -> Token {
+ Token t;
+ t.mType = TOKEN_EOL;
+ return t;
+}
+
+// static
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::EndOfFile() -> Token {
+ Token t;
+ t.mType = TOKEN_EOF;
+ return t;
+}
+
+// static
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::Error() -> Token {
+ Token t;
+ t.mType = TOKEN_ERROR;
+ return t;
+}
+
+template <typename TChar>
+bool TokenizerBase<TChar>::Token::Equals(const Token& aOther) const {
+ if (mType != aOther.mType) {
+ return false;
+ }
+
+ switch (mType) {
+ case TOKEN_INTEGER:
+ return AsInteger() == aOther.AsInteger();
+ case TOKEN_WORD:
+ return AsString() == aOther.AsString();
+ case TOKEN_CHAR:
+ return AsChar() == aOther.AsChar();
+ default:
+ return true;
+ }
+}
+
+template <typename TChar>
+TChar TokenizerBase<TChar>::Token::AsChar() const {
+ MOZ_ASSERT(mType == TOKEN_CHAR || mType == TOKEN_WS);
+ return mChar;
+}
+
+template <typename TChar>
+auto TokenizerBase<TChar>::Token::AsString() const -> TDependentSubstring {
+ MOZ_ASSERT(mType == TOKEN_WORD);
+ return mWord;
+}
+
+template <typename TChar>
+uint64_t TokenizerBase<TChar>::Token::AsInteger() const {
+ MOZ_ASSERT(mType == TOKEN_INTEGER);
+ return mInteger;
+}
+
+template class TokenizerBase<char>;
+template class TokenizerBase<char16_t>;
+
+template class TTokenizer<char>;
+template class TTokenizer<char16_t>;
+
+} // namespace mozilla