diff options
Diffstat (limited to 'dom/base/nsTextFragment.h')
-rw-r--r-- | dom/base/nsTextFragment.h | 316 |
1 files changed, 316 insertions, 0 deletions
diff --git a/dom/base/nsTextFragment.h b/dom/base/nsTextFragment.h new file mode 100644 index 0000000000..5330815683 --- /dev/null +++ b/dom/base/nsTextFragment.h @@ -0,0 +1,316 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * A class which represents a fragment of text (eg inside a text + * node); if only codepoints below 256 are used, the text is stored as + * a char*; otherwise the text is stored as a char16_t* + */ + +#ifndef nsTextFragment_h___ +#define nsTextFragment_h___ + +#include "mozilla/Attributes.h" +#include "mozilla/MemoryReporting.h" + +#include "nsCharTraits.h" +#include "nsString.h" +#include "nsStringBuffer.h" +#include "nsReadableUtils.h" +#include "nsISupportsImpl.h" + +// XXX should this normalize the code to keep a \u0000 at the end? + +// XXX nsTextFragmentPool? + +/** + * A fragment of text. If mIs2b is 1 then the m2b pointer is valid + * otherwise the m1b pointer is valid. If m1b is used then each byte + * of data represents a single ucs2 character with the high byte being + * zero. + * + * This class does not have a virtual destructor therefore it is not + * meant to be subclassed. + */ +class nsTextFragment final { + public: + static nsresult Init(); + static void Shutdown(); + + /** + * Default constructor. Initialize the fragment to be empty. + */ + nsTextFragment() : m1b(nullptr), mAllBits(0) { + MOZ_COUNT_CTOR(nsTextFragment); + NS_ASSERTION(sizeof(FragmentBits) == 4, "Bad field packing!"); + } + + ~nsTextFragment(); + + /** + * Change the contents of this fragment to be a copy of the + * the argument fragment, or to "" if unable to allocate enough memory. + */ + nsTextFragment& operator=(const nsTextFragment& aOther); + + /** + * Return true if this fragment is represented by char16_t data + */ + bool Is2b() const { return mState.mIs2b; } + + /** + * Return true if this fragment contains Bidi text + * For performance reasons this flag is only set if explicitely requested (by + * setting the aUpdateBidi argument on SetTo or Append to true). + */ + bool IsBidi() const { return mState.mIsBidi; } + + /** + * Get a pointer to constant char16_t data. + */ + const char16_t* Get2b() const { + MOZ_ASSERT(Is2b(), "not 2b text"); + return static_cast<char16_t*>(m2b->Data()); + } + + /** + * Get a pointer to constant char data. + */ + const char* Get1b() const { + NS_ASSERTION(!Is2b(), "not 1b text"); + return (const char*)m1b; + } + + /** + * Get the length of the fragment. The length is the number of logical + * characters, not the number of bytes to store the characters. + */ + uint32_t GetLength() const { return mState.mLength; } + +#define NS_MAX_TEXT_FRAGMENT_LENGTH (static_cast<uint32_t>(0x1FFFFFFF)) + + bool CanGrowBy(size_t n) const { + return n < (1 << 29) && mState.mLength + n < (1 << 29); + } + + /** + * Change the contents of this fragment to be a copy of the given + * buffer. If aUpdateBidi is true, contents of the fragment will be scanned, + * and mState.mIsBidi will be turned on if it includes any Bidi characters. + * If aForce2b is true, aBuffer will be stored as char16_t as is. Then, + * you can access the value faster but may waste memory if all characters + * are less than U+0100. + */ + bool SetTo(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi, + bool aForce2b); + + bool SetTo(const nsString& aString, bool aUpdateBidi, bool aForce2b) { + if (MOZ_UNLIKELY(aString.Length() > NS_MAX_TEXT_FRAGMENT_LENGTH)) { + return false; + } + ReleaseText(); + if (aForce2b && !aUpdateBidi) { + nsStringBuffer* buffer = nsStringBuffer::FromString(aString); + if (buffer) { + NS_ADDREF(m2b = buffer); + mState.mInHeap = true; + mState.mIs2b = true; + mState.mLength = aString.Length(); + return true; + } + } + + return SetTo(aString.get(), aString.Length(), aUpdateBidi, aForce2b); + } + + /** + * Append aData to the end of this fragment. If aUpdateBidi is true, contents + * of the fragment will be scanned, and mState.mIsBidi will be turned on if + * it includes any Bidi characters. + * If aForce2b is true, the string will be stored as char16_t as is. Then, + * you can access the value faster but may waste memory if all characters + * are less than U+0100. + */ + bool Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi, + bool aForce2b); + + /** + * Append the contents of this string fragment to aString + */ + void AppendTo(nsAString& aString) const { + if (!AppendTo(aString, mozilla::fallible)) { + aString.AllocFailed(aString.Length() + GetLength()); + } + } + + /** + * Append the contents of this string fragment to aString + * @return false if an out of memory condition is detected, true otherwise + */ + [[nodiscard]] bool AppendTo(nsAString& aString, + const mozilla::fallible_t& aFallible) const { + if (mState.mIs2b) { + if (aString.IsEmpty()) { + m2b->ToString(mState.mLength, aString); + return true; + } + bool ok = aString.Append(Get2b(), mState.mLength, aFallible); + if (!ok) { + return false; + } + + return true; + } else { + return AppendASCIItoUTF16(Substring(m1b, mState.mLength), aString, + aFallible); + } + } + + /** + * Append a substring of the contents of this string fragment to aString. + * @param aOffset where to start the substring in this text fragment + * @param aLength the length of the substring + */ + void AppendTo(nsAString& aString, uint32_t aOffset, uint32_t aLength) const { + if (!AppendTo(aString, aOffset, aLength, mozilla::fallible)) { + aString.AllocFailed(aString.Length() + aLength); + } + } + + /** + * Append a substring of the contents of this string fragment to aString. + * @param aString the string in which to append + * @param aOffset where to start the substring in this text fragment + * @param aLength the length of the substring + * @return false if an out of memory condition is detected, true otherwise + */ + [[nodiscard]] bool AppendTo(nsAString& aString, uint32_t aOffset, + uint32_t aLength, + const mozilla::fallible_t& aFallible) const { + if (mState.mIs2b) { + bool ok = aString.Append(Get2b() + aOffset, aLength, aFallible); + if (!ok) { + return false; + } + + return true; + } else { + return AppendASCIItoUTF16(Substring(m1b + aOffset, aLength), aString, + aFallible); + } + } + + /** + * Make a copy of the fragments contents starting at offset for + * count characters. The offset and count will be adjusted to + * lie within the fragments data. The fragments data is converted if + * necessary. + */ + void CopyTo(char16_t* aDest, uint32_t aOffset, uint32_t aCount); + + /** + * Return the character in the text-fragment at the given + * index. This always returns a char16_t. + */ + char16_t CharAt(uint32_t aIndex) const { + MOZ_ASSERT(aIndex < mState.mLength, "bad index"); + return mState.mIs2b ? Get2b()[aIndex] + : static_cast<unsigned char>(m1b[aIndex]); + } + + /** + * IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at + * aIndex is high surrogate and it's followed by low surrogate. + */ + inline bool IsHighSurrogateFollowedByLowSurrogateAt(uint32_t aIndex) const { + MOZ_ASSERT(aIndex < mState.mLength); + if (!mState.mIs2b || aIndex + 1 >= mState.mLength) { + return false; + } + return NS_IS_SURROGATE_PAIR(Get2b()[aIndex], Get2b()[aIndex + 1]); + } + + /** + * IsLowSurrogateFollowingHighSurrogateAt() returns true if character at + * aIndex is low surrogate and it follows high surrogate. + */ + inline bool IsLowSurrogateFollowingHighSurrogateAt(uint32_t aIndex) const { + MOZ_ASSERT(aIndex < mState.mLength); + if (!mState.mIs2b || !aIndex) { + return false; + } + return NS_IS_SURROGATE_PAIR(Get2b()[aIndex - 1], Get2b()[aIndex]); + } + + /** + * ScalarValueAt() returns a Unicode scalar value at aIndex. If the character + * at aIndex is a high surrogate followed by low surrogate, returns character + * code for the pair. If the index is low surrogate, or a high surrogate but + * not in a pair, returns 0. + */ + inline char32_t ScalarValueAt(uint32_t aIndex) const { + MOZ_ASSERT(aIndex < mState.mLength); + if (!mState.mIs2b) { + return static_cast<unsigned char>(m1b[aIndex]); + } + char16_t ch = Get2b()[aIndex]; + if (!IS_SURROGATE(ch)) { + return ch; + } + if (aIndex + 1 < mState.mLength && NS_IS_HIGH_SURROGATE(ch)) { + char16_t nextCh = Get2b()[aIndex + 1]; + if (NS_IS_LOW_SURROGATE(nextCh)) { + return SURROGATE_TO_UCS4(ch, nextCh); + } + } + return 0; + } + + void SetBidi(bool aBidi) { mState.mIsBidi = aBidi; } + + struct FragmentBits { + // uint32_t to ensure that the values are unsigned, because we + // want 0/1, not 0/-1! + // Making these bool causes Windows to not actually pack them, + // which causes crashes because we assume this structure is no more than + // 32 bits! + uint32_t mInHeap : 1; + uint32_t mIs2b : 1; + uint32_t mIsBidi : 1; + // Note that when you change the bits of mLength, you also need to change + // NS_MAX_TEXT_FRAGMENT_LENGTH. + uint32_t mLength : 29; + }; + + size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; + + /** + * Check whether the text in this fragment is the same as the text in the + * other fragment. + */ + [[nodiscard]] bool TextEquals(const nsTextFragment& aOther) const; + + private: + void ReleaseText(); + + /** + * Scan the contents of the fragment and turn on mState.mIsBidi if it + * includes any Bidi characters. + */ + void UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength); + + union { + nsStringBuffer* m2b; + const char* m1b; // This is const since it can point to shared data + }; + + union { + uint32_t mAllBits; + FragmentBits mState; + }; +}; + +#endif /* nsTextFragment_h___ */ |