/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef nsScannerString_h___ #define nsScannerString_h___ #include "nsString.h" #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator #include "mozilla/LinkedList.h" #include /** * NOTE: nsScannerString (and the other classes defined in this file) are * not related to nsAString or any of the other xpcom/string classes. * * nsScannerString is based on the nsSlidingString implementation that used * to live in xpcom/string. Now that nsAString is limited to representing * only single fragment strings, nsSlidingString can no longer be used. * * An advantage to this design is that it does not employ any virtual * functions. * * This file uses SCC-style indenting in deference to the nsSlidingString * code from which this code is derived ;-) */ class nsScannerIterator; class nsScannerSubstring; class nsScannerString; /** * nsScannerBufferList * * This class maintains a list of heap-allocated Buffer objects. The buffers * are maintained in a circular linked list. Each buffer has a usage count * that is decremented by the owning nsScannerSubstring. * * The buffer list itself is reference counted. This allows the buffer list * to be shared by multiple nsScannerSubstring objects. The reference * counting is not threadsafe, which is not at all a requirement. * * When a nsScannerSubstring releases its reference to a buffer list, it * decrements the usage count of the first buffer in the buffer list that it * was referencing. It informs the buffer list that it can discard buffers * starting at that prefix. The buffer list will do so if the usage count of * that buffer is 0 and if it is the first buffer in the list. It will * continue to prune buffers starting from the front of the buffer list until * it finds a buffer that has a usage count that is non-zero. */ class nsScannerBufferList { public: /** * Buffer objects are directly followed by a data segment. The start * of the data segment is determined by increment the |this| pointer * by 1 unit. */ class Buffer : public mozilla::LinkedListElement { public: void IncrementUsageCount() { ++mUsageCount; } void DecrementUsageCount() { --mUsageCount; } bool IsInUse() const { return mUsageCount != 0; } const char16_t* DataStart() const { return (const char16_t*)(this + 1); } char16_t* DataStart() { return (char16_t*)(this + 1); } const char16_t* DataEnd() const { return mDataEnd; } char16_t* DataEnd() { return mDataEnd; } const Buffer* Next() const { return getNext(); } Buffer* Next() { return getNext(); } const Buffer* Prev() const { return getPrevious(); } Buffer* Prev() { return getPrevious(); } uint32_t DataLength() const { return mDataEnd - DataStart(); } void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; } private: friend class nsScannerBufferList; int32_t mUsageCount; char16_t* mDataEnd; }; /** * Position objects serve as lightweight pointers into a buffer list. * The mPosition member must be contained with mBuffer->DataStart() * and mBuffer->DataEnd(). */ class Position { public: Position() : mBuffer(nullptr), mPosition(nullptr) {} Position(Buffer* buffer, char16_t* position) : mBuffer(buffer), mPosition(position) {} inline explicit Position(const nsScannerIterator& aIter); inline Position& operator=(const nsScannerIterator& aIter); static size_t Distance(const Position& p1, const Position& p2); Buffer* mBuffer; char16_t* mPosition; }; static Buffer* AllocBufferFromString(const nsAString&); static Buffer* AllocBuffer(uint32_t capacity); // capacity = number of chars explicit nsScannerBufferList(Buffer* buf) : mRefCnt(0) { mBuffers.insertBack(buf); } void AddRef() { ++mRefCnt; } void Release() { if (--mRefCnt == 0) delete this; } void Append(Buffer* buf) { mBuffers.insertBack(buf); } void InsertAfter(Buffer* buf, Buffer* prev) { prev->setNext(buf); } void SplitBuffer(const Position&); void DiscardUnreferencedPrefix(Buffer*); Buffer* Head() { return mBuffers.getFirst(); } const Buffer* Head() const { return mBuffers.getFirst(); } Buffer* Tail() { return mBuffers.getLast(); } const Buffer* Tail() const { return mBuffers.getLast(); } private: friend class nsScannerSubstring; ~nsScannerBufferList() { ReleaseAll(); } void ReleaseAll(); int32_t mRefCnt; mozilla::LinkedList mBuffers; }; /** * nsScannerFragment represents a "slice" of a Buffer object. */ struct nsScannerFragment { typedef nsScannerBufferList::Buffer Buffer; const Buffer* mBuffer; const char16_t* mFragmentStart; const char16_t* mFragmentEnd; }; /** * nsScannerSubstring is the base class for nsScannerString. It provides * access to iterators and methods to bind the substring to another * substring or nsAString instance. * * This class owns the buffer list. */ class nsScannerSubstring { public: typedef nsScannerBufferList::Buffer Buffer; typedef nsScannerBufferList::Position Position; typedef uint32_t size_type; nsScannerSubstring(); explicit nsScannerSubstring(const nsAString& s); ~nsScannerSubstring(); nsScannerIterator& BeginReading(nsScannerIterator& iter) const; nsScannerIterator& EndReading(nsScannerIterator& iter) const; size_type Length() const { return mLength; } int32_t CountChar(char16_t) const; void Rebind(const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator&); void Rebind(const nsAString&); const nsAString& AsString() const; bool GetNextFragment(nsScannerFragment&) const; bool GetPrevFragment(nsScannerFragment&) const; static inline Buffer* AllocBufferFromString(const nsAString& aStr) { return nsScannerBufferList::AllocBufferFromString(aStr); } static inline Buffer* AllocBuffer(size_type aCapacity) { return nsScannerBufferList::AllocBuffer(aCapacity); } protected: void acquire_ownership_of_buffer_list() const { mBufferList->AddRef(); mStart.mBuffer->IncrementUsageCount(); } void release_ownership_of_buffer_list() { if (mBufferList) { mStart.mBuffer->DecrementUsageCount(); mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer); mBufferList->Release(); } } void init_range_from_buffer_list() { mStart.mBuffer = mBufferList->Head(); mStart.mPosition = mStart.mBuffer->DataStart(); mEnd.mBuffer = mBufferList->Tail(); mEnd.mPosition = mEnd.mBuffer->DataEnd(); mLength = Position::Distance(mStart, mEnd); } Position mStart; Position mEnd; nsScannerBufferList* mBufferList; size_type mLength; // these fields are used to implement AsString nsDependentSubstring mFlattenedRep; bool mIsDirty; friend class nsScannerSharedSubstring; }; /** * nsScannerString provides methods to grow and modify a buffer list. */ class nsScannerString : public nsScannerSubstring { public: explicit nsScannerString(Buffer*); // you are giving ownership to the string, it takes and keeps your // buffer, deleting it when done. // Use AllocBuffer or AllocBufferFromString to create a Buffer object // for use with this function. void AppendBuffer(Buffer*); void DiscardPrefix(const nsScannerIterator&); // any other way you want to do this? void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition); }; /** * nsScannerSharedSubstring implements copy-on-write semantics for * nsScannerSubstring. When you call .writable(), it will copy the data * and return a mutable string object. This class also manages releasing * the reference to the scanner buffer when it is no longer needed. */ class nsScannerSharedSubstring { public: nsScannerSharedSubstring() : mBuffer(nullptr), mBufferList(nullptr) {} ~nsScannerSharedSubstring() { if (mBufferList) ReleaseBuffer(); } // Acquire a copy-on-write reference to the given substring. void Rebind(const nsScannerIterator& aStart, const nsScannerIterator& aEnd); // Get a mutable reference to this string nsAString& writable() { if (mBufferList) MakeMutable(); return mString; } // Get a const reference to this string const nsAString& str() const { return mString; } private: typedef nsScannerBufferList::Buffer Buffer; void ReleaseBuffer(); void MakeMutable(); nsDependentSubstring mString; Buffer* mBuffer; nsScannerBufferList* mBufferList; }; /** * nsScannerIterator works just like nsReadingIterator except that * it knows how to iterate over a list of scanner buffers. */ class nsScannerIterator { public: typedef nsScannerIterator self_type; typedef ptrdiff_t difference_type; typedef char16_t value_type; typedef const char16_t* pointer; typedef const char16_t& reference; typedef nsScannerSubstring::Buffer Buffer; protected: nsScannerFragment mFragment; const char16_t* mPosition; const nsScannerSubstring* mOwner; friend class nsScannerSubstring; friend class nsScannerSharedSubstring; public: // nsScannerIterator(); // auto-generate // default constructor is OK nsScannerIterator( const nsScannerIterator& ); // // auto-generated copy-constructor OK nsScannerIterator& operator=( const // nsScannerIterator& ); // auto-generated copy-assignment operator OK inline void normalize_forward(); inline void normalize_backward(); pointer get() const { return mPosition; } char16_t operator*() const { return *get(); } const nsScannerFragment& fragment() const { return mFragment; } const Buffer* buffer() const { return mFragment.mBuffer; } self_type& operator++() { ++mPosition; normalize_forward(); return *this; } self_type operator++(int) { self_type result(*this); ++mPosition; normalize_forward(); return result; } self_type& operator--() { normalize_backward(); --mPosition; return *this; } self_type operator--(int) { self_type result(*this); normalize_backward(); --mPosition; return result; } difference_type size_forward() const { return mFragment.mFragmentEnd - mPosition; } difference_type size_backward() const { return mPosition - mFragment.mFragmentStart; } self_type& advance(difference_type n) { while (n > 0) { difference_type one_hop = std::min(n, size_forward()); NS_ASSERTION(one_hop > 0, "Infinite loop: can't advance a reading iterator beyond the " "end of a string"); // perhaps I should |break| if |!one_hop|? mPosition += one_hop; normalize_forward(); n -= one_hop; } while (n < 0) { normalize_backward(); difference_type one_hop = std::max(n, -size_backward()); NS_ASSERTION(one_hop < 0, "Infinite loop: can't advance (backward) a reading iterator " "beyond the end of a string"); // perhaps I should |break| if |!one_hop|? mPosition += one_hop; n -= one_hop; } return *this; } }; inline bool SameFragment(const nsScannerIterator& a, const nsScannerIterator& b) { return a.fragment().mFragmentStart == b.fragment().mFragmentStart; } /** * this class is needed in order to make use of the methods in nsAlgorithm.h */ template <> struct nsCharSourceTraits { typedef nsScannerIterator::difference_type difference_type; static uint32_t readable_distance(const nsScannerIterator& first, const nsScannerIterator& last) { return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward()); } static const nsScannerIterator::value_type* read( const nsScannerIterator& iter) { return iter.get(); } static void advance(nsScannerIterator& s, difference_type n) { s.advance(n); } }; /** * inline methods follow */ inline void nsScannerIterator::normalize_forward() { while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment)) mPosition = mFragment.mFragmentStart; } inline void nsScannerIterator::normalize_backward() { while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment)) mPosition = mFragment.mFragmentEnd; } inline bool operator==(const nsScannerIterator& lhs, const nsScannerIterator& rhs) { return lhs.get() == rhs.get(); } inline bool operator!=(const nsScannerIterator& lhs, const nsScannerIterator& rhs) { return lhs.get() != rhs.get(); } inline nsScannerBufferList::Position::Position(const nsScannerIterator& aIter) : mBuffer(const_cast(aIter.buffer())), mPosition(const_cast(aIter.get())) {} inline nsScannerBufferList::Position& nsScannerBufferList::Position::operator=( const nsScannerIterator& aIter) { mBuffer = const_cast(aIter.buffer()); mPosition = const_cast(aIter.get()); return *this; } /** * scanner string utils * * These methods mimic the API provided by nsReadableUtils in xpcom/string. * Here we provide only the methods that the htmlparser module needs. */ inline size_t Distance(const nsScannerIterator& aStart, const nsScannerIterator& aEnd) { typedef nsScannerBufferList::Position Position; return Position::Distance(Position(aStart), Position(aEnd)); } bool CopyUnicodeTo(const nsScannerIterator& aSrcStart, const nsScannerIterator& aSrcEnd, nsAString& aDest); inline bool CopyUnicodeTo(const nsScannerSubstring& aSrc, nsAString& aDest) { nsScannerIterator begin, end; return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); } bool AppendUnicodeTo(const nsScannerIterator& aSrcStart, const nsScannerIterator& aSrcEnd, nsAString& aDest); inline bool AppendUnicodeTo(const nsScannerSubstring& aSrc, nsAString& aDest) { nsScannerIterator begin, end; return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest); } bool AppendUnicodeTo(const nsScannerIterator& aSrcStart, const nsScannerIterator& aSrcEnd, nsScannerSharedSubstring& aDest); bool FindCharInReadable(char16_t aChar, nsScannerIterator& aStart, const nsScannerIterator& aEnd); bool FindInReadable(const nsAString& aPattern, nsScannerIterator& aStart, nsScannerIterator& aEnd, nsStringComparator = nsTDefaultStringComparator); bool RFindInReadable(const nsAString& aPattern, nsScannerIterator& aStart, nsScannerIterator& aEnd, nsStringComparator = nsTDefaultStringComparator); inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern, nsScannerIterator& aStart, nsScannerIterator& aEnd) { return FindInReadable(aPattern, aStart, aEnd, nsCaseInsensitiveStringComparator); } #endif // !defined(nsScannerString_h___)