From 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:47:29 +0200 Subject: Adding upstream version 115.8.0esr. Signed-off-by: Daniel Baumann --- xpcom/ds/nsCharSeparatedTokenizer.h | 274 ++++++++++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 xpcom/ds/nsCharSeparatedTokenizer.h (limited to 'xpcom/ds/nsCharSeparatedTokenizer.h') diff --git a/xpcom/ds/nsCharSeparatedTokenizer.h b/xpcom/ds/nsCharSeparatedTokenizer.h new file mode 100644 index 0000000000..5cf6992e3e --- /dev/null +++ b/xpcom/ds/nsCharSeparatedTokenizer.h @@ -0,0 +1,274 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __nsCharSeparatedTokenizer_h +#define __nsCharSeparatedTokenizer_h + +#include "mozilla/Maybe.h" +#include "mozilla/RangedPtr.h" +#include "mozilla/TypedEnumBits.h" + +#include "nsCRTGlue.h" +#include "nsTDependentSubstring.h" + +// Flags -- only one for now. If we need more, they should be defined to +// be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.) +enum class nsTokenizerFlags { + Default = 0, + SeparatorOptional = 1 << 0, + IncludeEmptyTokenAtEnd = 1 << 1 +}; + +MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(nsTokenizerFlags) + +/** + * This parses a SeparatorChar-separated string into tokens. + * Whitespace surrounding tokens is not treated as part of tokens, however + * whitespace inside a token is. If the final token is the empty string, it is + * not returned by default. + * + * Some examples, with SeparatorChar = ',': + * + * "foo, bar, baz" -> "foo" "bar" "baz" + * "foo,bar,baz" -> "foo" "bar" "baz" + * "foo , bar hi , baz" -> "foo" "bar hi" "baz" + * "foo, ,bar,baz" -> "foo" "" "bar" "baz" + * "foo,,bar,baz" -> "foo" "" "bar" "baz" + * "foo,bar,baz," -> "foo" "bar" "baz" + * + * The function used for whitespace detection is a template argument. + * By default, it is NS_IsAsciiWhitespace. + */ +template +class nsTCharSeparatedTokenizer { + using CharType = typename TDependentSubstringType::char_type; + using SubstringType = typename TDependentSubstringType::substring_type; + + public: + using DependentSubstringType = TDependentSubstringType; + + nsTCharSeparatedTokenizer(const SubstringType& aSource, + CharType aSeparatorChar) + : mIter(aSource.Data(), aSource.Length()), + mEnd(aSource.Data() + aSource.Length(), aSource.Data(), + aSource.Length()), + mSeparatorChar(aSeparatorChar), + mWhitespaceBeforeFirstToken(false), + mWhitespaceAfterCurrentToken(false), + mSeparatorAfterCurrentToken(false) { + // Skip initial whitespace + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceBeforeFirstToken = true; + ++mIter; + } + } + + /** + * Checks if any more tokens are available. + */ + bool hasMoreTokens() const { + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), + "Should be at beginning of token if there is one"); + + if constexpr (Flags & nsTokenizerFlags::IncludeEmptyTokenAtEnd) { + return mIter < mEnd || (mIter == mEnd && mSeparatorAfterCurrentToken); + } else { + return mIter < mEnd; + } + } + + /* + * Returns true if there is whitespace prior to the first token. + */ + bool whitespaceBeforeFirstToken() const { + return mWhitespaceBeforeFirstToken; + } + + /* + * Returns true if there is a separator after the current token. + * Useful if you want to check whether the last token has a separator + * after it which may not be valid. + */ + bool separatorAfterCurrentToken() const { + return mSeparatorAfterCurrentToken; + } + + /* + * Returns true if there is any whitespace after the current token. + */ + bool whitespaceAfterCurrentToken() const { + return mWhitespaceAfterCurrentToken; + } + + /** + * Returns the next token. + */ + const DependentSubstringType nextToken() { + mozilla::RangedPtr tokenStart = mIter; + mozilla::RangedPtr tokenEnd = mIter; + + MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter), + "Should be at beginning of token if there is one"); + + // Search until we hit separator or end (or whitespace, if a separator + // isn't required -- see clause with 'break' below). + while (mIter < mEnd && *mIter != mSeparatorChar) { + // Skip to end of the current word. + while (mIter < mEnd && !IsWhitespace(*mIter) && + *mIter != mSeparatorChar) { + ++mIter; + } + tokenEnd = mIter; + + // Skip whitespace after the current word. + mWhitespaceAfterCurrentToken = false; + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceAfterCurrentToken = true; + ++mIter; + } + if constexpr (Flags & nsTokenizerFlags::SeparatorOptional) { + // We've hit (and skipped) whitespace, and that's sufficient to end + // our token, regardless of whether we've reached a SeparatorChar. + break; + } // (else, we'll keep looping until we hit mEnd or SeparatorChar) + } + + mSeparatorAfterCurrentToken = (mIter != mEnd && *mIter == mSeparatorChar); + MOZ_ASSERT((Flags & nsTokenizerFlags::SeparatorOptional) || + (mSeparatorAfterCurrentToken == (mIter < mEnd)), + "If we require a separator and haven't hit the end of " + "our string, then we shouldn't have left the loop " + "unless we hit a separator"); + + // Skip separator (and any whitespace after it), if we're at one. + if (mSeparatorAfterCurrentToken) { + ++mIter; + + while (mIter < mEnd && IsWhitespace(*mIter)) { + mWhitespaceAfterCurrentToken = true; + ++mIter; + } + } + + return Substring(tokenStart.get(), tokenEnd.get()); + } + + auto ToRange() const; + + private: + mozilla::RangedPtr mIter; + const mozilla::RangedPtr mEnd; + const CharType mSeparatorChar; + bool mWhitespaceBeforeFirstToken; + bool mWhitespaceAfterCurrentToken; + bool mSeparatorAfterCurrentToken; +}; + +constexpr bool NS_TokenizerIgnoreNothing(char16_t) { return false; } + +template +using nsTCharSeparatedTokenizerTemplate = + nsTCharSeparatedTokenizer, IsWhitespace, + Flags>; + +template +using nsCharSeparatedTokenizerTemplate = + nsTCharSeparatedTokenizerTemplate; + +using nsCharSeparatedTokenizer = + nsCharSeparatedTokenizerTemplate; + +template +using nsCCharSeparatedTokenizerTemplate = + nsTCharSeparatedTokenizerTemplate; + +using nsCCharSeparatedTokenizer = + nsCCharSeparatedTokenizerTemplate; + +/** + * Adapts a char separated tokenizer for use in a range-based for loop. + * + * Use this typically only indirectly, e.g. like + * + * for (const auto& token : nsCharSeparatedTokenizer(aText, ' ').ToRange()) { + * // ... + * } + */ +template +class nsTokenizedRange { + public: + using DependentSubstringType = typename Tokenizer::DependentSubstringType; + + explicit nsTokenizedRange(Tokenizer&& aTokenizer) + : mTokenizer(std::move(aTokenizer)) {} + + struct EndSentinel {}; + struct Iterator { + explicit Iterator(const Tokenizer& aTokenizer) : mTokenizer(aTokenizer) { + Next(); + } + + const DependentSubstringType& operator*() const { return *mCurrentToken; } + + Iterator& operator++() { + Next(); + return *this; + } + + bool operator==(const EndSentinel&) const { + return mCurrentToken.isNothing(); + } + + bool operator!=(const EndSentinel&) const { return mCurrentToken.isSome(); } + + private: + void Next() { + mCurrentToken.reset(); + + if (mTokenizer.hasMoreTokens()) { + mCurrentToken.emplace(mTokenizer.nextToken()); + } + } + + Tokenizer mTokenizer; + mozilla::Maybe mCurrentToken; + }; + + auto begin() const { return Iterator{mTokenizer}; } + auto end() const { return EndSentinel{}; } + + private: + const Tokenizer mTokenizer; +}; + +template +auto nsTCharSeparatedTokenizer::ToRange() const { + return nsTokenizedRange{nsTCharSeparatedTokenizer{*this}}; +} + +// You should not need to instantiate this class directly. +// Use nsTSubstring::Split instead. +template +class nsTSubstringSplitter + : public nsTokenizedRange> { + public: + using nsTokenizedRange>::nsTokenizedRange; +}; + +extern template class nsTSubstringSplitter; +extern template class nsTSubstringSplitter; + +#endif /* __nsCharSeparatedTokenizer_h */ -- cgit v1.2.3