summaryrefslogtreecommitdiffstats
path: root/dom/base/nsTextFragment.h
diff options
context:
space:
mode:
Diffstat (limited to 'dom/base/nsTextFragment.h')
-rw-r--r--dom/base/nsTextFragment.h316
1 files changed, 316 insertions, 0 deletions
diff --git a/dom/base/nsTextFragment.h b/dom/base/nsTextFragment.h
new file mode 100644
index 0000000000..5330815683
--- /dev/null
+++ b/dom/base/nsTextFragment.h
@@ -0,0 +1,316 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * A class which represents a fragment of text (eg inside a text
+ * node); if only codepoints below 256 are used, the text is stored as
+ * a char*; otherwise the text is stored as a char16_t*
+ */
+
+#ifndef nsTextFragment_h___
+#define nsTextFragment_h___
+
+#include "mozilla/Attributes.h"
+#include "mozilla/MemoryReporting.h"
+
+#include "nsCharTraits.h"
+#include "nsString.h"
+#include "nsStringBuffer.h"
+#include "nsReadableUtils.h"
+#include "nsISupportsImpl.h"
+
+// XXX should this normalize the code to keep a \u0000 at the end?
+
+// XXX nsTextFragmentPool?
+
+/**
+ * A fragment of text. If mIs2b is 1 then the m2b pointer is valid
+ * otherwise the m1b pointer is valid. If m1b is used then each byte
+ * of data represents a single ucs2 character with the high byte being
+ * zero.
+ *
+ * This class does not have a virtual destructor therefore it is not
+ * meant to be subclassed.
+ */
+class nsTextFragment final {
+ public:
+ static nsresult Init();
+ static void Shutdown();
+
+ /**
+ * Default constructor. Initialize the fragment to be empty.
+ */
+ nsTextFragment() : m1b(nullptr), mAllBits(0) {
+ MOZ_COUNT_CTOR(nsTextFragment);
+ NS_ASSERTION(sizeof(FragmentBits) == 4, "Bad field packing!");
+ }
+
+ ~nsTextFragment();
+
+ /**
+ * Change the contents of this fragment to be a copy of the
+ * the argument fragment, or to "" if unable to allocate enough memory.
+ */
+ nsTextFragment& operator=(const nsTextFragment& aOther);
+
+ /**
+ * Return true if this fragment is represented by char16_t data
+ */
+ bool Is2b() const { return mState.mIs2b; }
+
+ /**
+ * Return true if this fragment contains Bidi text
+ * For performance reasons this flag is only set if explicitely requested (by
+ * setting the aUpdateBidi argument on SetTo or Append to true).
+ */
+ bool IsBidi() const { return mState.mIsBidi; }
+
+ /**
+ * Get a pointer to constant char16_t data.
+ */
+ const char16_t* Get2b() const {
+ MOZ_ASSERT(Is2b(), "not 2b text");
+ return static_cast<char16_t*>(m2b->Data());
+ }
+
+ /**
+ * Get a pointer to constant char data.
+ */
+ const char* Get1b() const {
+ NS_ASSERTION(!Is2b(), "not 1b text");
+ return (const char*)m1b;
+ }
+
+ /**
+ * Get the length of the fragment. The length is the number of logical
+ * characters, not the number of bytes to store the characters.
+ */
+ uint32_t GetLength() const { return mState.mLength; }
+
+#define NS_MAX_TEXT_FRAGMENT_LENGTH (static_cast<uint32_t>(0x1FFFFFFF))
+
+ bool CanGrowBy(size_t n) const {
+ return n < (1 << 29) && mState.mLength + n < (1 << 29);
+ }
+
+ /**
+ * Change the contents of this fragment to be a copy of the given
+ * buffer. If aUpdateBidi is true, contents of the fragment will be scanned,
+ * and mState.mIsBidi will be turned on if it includes any Bidi characters.
+ * If aForce2b is true, aBuffer will be stored as char16_t as is. Then,
+ * you can access the value faster but may waste memory if all characters
+ * are less than U+0100.
+ */
+ bool SetTo(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi,
+ bool aForce2b);
+
+ bool SetTo(const nsString& aString, bool aUpdateBidi, bool aForce2b) {
+ if (MOZ_UNLIKELY(aString.Length() > NS_MAX_TEXT_FRAGMENT_LENGTH)) {
+ return false;
+ }
+ ReleaseText();
+ if (aForce2b && !aUpdateBidi) {
+ nsStringBuffer* buffer = nsStringBuffer::FromString(aString);
+ if (buffer) {
+ NS_ADDREF(m2b = buffer);
+ mState.mInHeap = true;
+ mState.mIs2b = true;
+ mState.mLength = aString.Length();
+ return true;
+ }
+ }
+
+ return SetTo(aString.get(), aString.Length(), aUpdateBidi, aForce2b);
+ }
+
+ /**
+ * Append aData to the end of this fragment. If aUpdateBidi is true, contents
+ * of the fragment will be scanned, and mState.mIsBidi will be turned on if
+ * it includes any Bidi characters.
+ * If aForce2b is true, the string will be stored as char16_t as is. Then,
+ * you can access the value faster but may waste memory if all characters
+ * are less than U+0100.
+ */
+ bool Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi,
+ bool aForce2b);
+
+ /**
+ * Append the contents of this string fragment to aString
+ */
+ void AppendTo(nsAString& aString) const {
+ if (!AppendTo(aString, mozilla::fallible)) {
+ aString.AllocFailed(aString.Length() + GetLength());
+ }
+ }
+
+ /**
+ * Append the contents of this string fragment to aString
+ * @return false if an out of memory condition is detected, true otherwise
+ */
+ [[nodiscard]] bool AppendTo(nsAString& aString,
+ const mozilla::fallible_t& aFallible) const {
+ if (mState.mIs2b) {
+ if (aString.IsEmpty()) {
+ m2b->ToString(mState.mLength, aString);
+ return true;
+ }
+ bool ok = aString.Append(Get2b(), mState.mLength, aFallible);
+ if (!ok) {
+ return false;
+ }
+
+ return true;
+ } else {
+ return AppendASCIItoUTF16(Substring(m1b, mState.mLength), aString,
+ aFallible);
+ }
+ }
+
+ /**
+ * Append a substring of the contents of this string fragment to aString.
+ * @param aOffset where to start the substring in this text fragment
+ * @param aLength the length of the substring
+ */
+ void AppendTo(nsAString& aString, uint32_t aOffset, uint32_t aLength) const {
+ if (!AppendTo(aString, aOffset, aLength, mozilla::fallible)) {
+ aString.AllocFailed(aString.Length() + aLength);
+ }
+ }
+
+ /**
+ * Append a substring of the contents of this string fragment to aString.
+ * @param aString the string in which to append
+ * @param aOffset where to start the substring in this text fragment
+ * @param aLength the length of the substring
+ * @return false if an out of memory condition is detected, true otherwise
+ */
+ [[nodiscard]] bool AppendTo(nsAString& aString, uint32_t aOffset,
+ uint32_t aLength,
+ const mozilla::fallible_t& aFallible) const {
+ if (mState.mIs2b) {
+ bool ok = aString.Append(Get2b() + aOffset, aLength, aFallible);
+ if (!ok) {
+ return false;
+ }
+
+ return true;
+ } else {
+ return AppendASCIItoUTF16(Substring(m1b + aOffset, aLength), aString,
+ aFallible);
+ }
+ }
+
+ /**
+ * Make a copy of the fragments contents starting at offset for
+ * count characters. The offset and count will be adjusted to
+ * lie within the fragments data. The fragments data is converted if
+ * necessary.
+ */
+ void CopyTo(char16_t* aDest, uint32_t aOffset, uint32_t aCount);
+
+ /**
+ * Return the character in the text-fragment at the given
+ * index. This always returns a char16_t.
+ */
+ char16_t CharAt(uint32_t aIndex) const {
+ MOZ_ASSERT(aIndex < mState.mLength, "bad index");
+ return mState.mIs2b ? Get2b()[aIndex]
+ : static_cast<unsigned char>(m1b[aIndex]);
+ }
+
+ /**
+ * IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at
+ * aIndex is high surrogate and it's followed by low surrogate.
+ */
+ inline bool IsHighSurrogateFollowedByLowSurrogateAt(uint32_t aIndex) const {
+ MOZ_ASSERT(aIndex < mState.mLength);
+ if (!mState.mIs2b || aIndex + 1 >= mState.mLength) {
+ return false;
+ }
+ return NS_IS_SURROGATE_PAIR(Get2b()[aIndex], Get2b()[aIndex + 1]);
+ }
+
+ /**
+ * IsLowSurrogateFollowingHighSurrogateAt() returns true if character at
+ * aIndex is low surrogate and it follows high surrogate.
+ */
+ inline bool IsLowSurrogateFollowingHighSurrogateAt(uint32_t aIndex) const {
+ MOZ_ASSERT(aIndex < mState.mLength);
+ if (!mState.mIs2b || !aIndex) {
+ return false;
+ }
+ return NS_IS_SURROGATE_PAIR(Get2b()[aIndex - 1], Get2b()[aIndex]);
+ }
+
+ /**
+ * ScalarValueAt() returns a Unicode scalar value at aIndex. If the character
+ * at aIndex is a high surrogate followed by low surrogate, returns character
+ * code for the pair. If the index is low surrogate, or a high surrogate but
+ * not in a pair, returns 0.
+ */
+ inline char32_t ScalarValueAt(uint32_t aIndex) const {
+ MOZ_ASSERT(aIndex < mState.mLength);
+ if (!mState.mIs2b) {
+ return static_cast<unsigned char>(m1b[aIndex]);
+ }
+ char16_t ch = Get2b()[aIndex];
+ if (!IS_SURROGATE(ch)) {
+ return ch;
+ }
+ if (aIndex + 1 < mState.mLength && NS_IS_HIGH_SURROGATE(ch)) {
+ char16_t nextCh = Get2b()[aIndex + 1];
+ if (NS_IS_LOW_SURROGATE(nextCh)) {
+ return SURROGATE_TO_UCS4(ch, nextCh);
+ }
+ }
+ return 0;
+ }
+
+ void SetBidi(bool aBidi) { mState.mIsBidi = aBidi; }
+
+ struct FragmentBits {
+ // uint32_t to ensure that the values are unsigned, because we
+ // want 0/1, not 0/-1!
+ // Making these bool causes Windows to not actually pack them,
+ // which causes crashes because we assume this structure is no more than
+ // 32 bits!
+ uint32_t mInHeap : 1;
+ uint32_t mIs2b : 1;
+ uint32_t mIsBidi : 1;
+ // Note that when you change the bits of mLength, you also need to change
+ // NS_MAX_TEXT_FRAGMENT_LENGTH.
+ uint32_t mLength : 29;
+ };
+
+ size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
+
+ /**
+ * Check whether the text in this fragment is the same as the text in the
+ * other fragment.
+ */
+ [[nodiscard]] bool TextEquals(const nsTextFragment& aOther) const;
+
+ private:
+ void ReleaseText();
+
+ /**
+ * Scan the contents of the fragment and turn on mState.mIsBidi if it
+ * includes any Bidi characters.
+ */
+ void UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength);
+
+ union {
+ nsStringBuffer* m2b;
+ const char* m1b; // This is const since it can point to shared data
+ };
+
+ union {
+ uint32_t mAllBits;
+ FragmentBits mState;
+ };
+};
+
+#endif /* nsTextFragment_h___ */