1 files changed, 538 insertions, 0 deletions
diff --git a/dom/base/nsTextFragment.cpp b/dom/base/nsTextFragment.cpp
new file mode 100644
index 0000000000..5cba2577b8
--- /dev/null
+++ b/dom/base/nsTextFragment.cpp
@@ -0,0 +1,538 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * A class which represents a fragment of text (eg inside a text
+ * node); if only codepoints below 256 are used, the text is stored as
+ * a char*; otherwise the text is stored as a char16_t*
+ */
+
+#include "nsTextFragment.h"
+#include "nsCRT.h"
+#include "nsReadableUtils.h"
+#include "nsBidiUtils.h"
+#include "nsUnicharUtils.h"
+#include "mozilla/CheckedInt.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/SSE.h"
+#include "mozilla/ppc.h"
+#include "nsTextFragmentImpl.h"
+#include <algorithm>
+
+#define TEXTFRAG_WHITE_AFTER_NEWLINE 50
+#define TEXTFRAG_MAX_NEWLINES 7
+
+// Static buffer used for common fragments
+static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
+static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
+static char sSingleCharSharedString[256];
+
+using namespace mozilla;
+
+// static
+nsresult nsTextFragment::Init() {
+  // Create whitespace strings
+  uint32_t i;
+  for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
+    sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
+    sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
+    sSpaceSharedString[i][0] = ' ';
+    sTabSharedString[i][0] = ' ';
+    uint32_t j;
+    for (j = 1; j < 1 + i; ++j) {
+      sSpaceSharedString[i][j] = '\n';
+      sTabSharedString[i][j] = '\n';
+    }
+    for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
+      sSpaceSharedString[i][j] = ' ';
+      sTabSharedString[i][j] = '\t';
+    }
+  }
+
+  // Create single-char strings
+  for (i = 0; i < 256; ++i) {
+    sSingleCharSharedString[i] = i;
+  }
+
+  return NS_OK;
+}
+
+// static
+void nsTextFragment::Shutdown() {
+  uint32_t i;
+  for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
+    delete[] sSpaceSharedString[i];
+    delete[] sTabSharedString[i];
+    sSpaceSharedString[i] = nullptr;
+    sTabSharedString[i] = nullptr;
+  }
+}
+
+nsTextFragment::~nsTextFragment() {
+  ReleaseText();
+  MOZ_COUNT_DTOR(nsTextFragment);
+}
+
+void nsTextFragment::ReleaseText() {
+  if (mState.mIs2b) {
+    NS_RELEASE(m2b);
+  } else if (mState.mLength && m1b && mState.mInHeap) {
+    free(const_cast<char*>(m1b));
+  }
+
+  m1b = nullptr;
+  mState.mIsBidi = false;
+
+  // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
+  mAllBits = 0;
+}
+
+nsTextFragment& nsTextFragment::operator=(const nsTextFragment& aOther) {
+  ReleaseText();
+
+  if (aOther.mState.mLength) {
+    if (!aOther.mState.mInHeap) {
+      MOZ_ASSERT(!aOther.mState.mIs2b);
+      m1b = aOther.m1b;
+    } else if (aOther.mState.mIs2b) {
+      m2b = aOther.m2b;
+      NS_ADDREF(m2b);
+    } else {
+      m1b = static_cast<char*>(malloc(aOther.mState.mLength));
+      if (m1b) {
+        memcpy(const_cast<char*>(m1b), aOther.m1b, aOther.mState.mLength);
+      } else {
+        // allocate a buffer for a single REPLACEMENT CHARACTER
+        m2b = nsStringBuffer::Alloc(sizeof(char16_t) * 2).take();
+        if (!m2b) {
+          MOZ_CRASH("OOM!");
+        }
+        char16_t* data = static_cast<char16_t*>(m2b->Data());
+        data[0] = 0xFFFD;  // REPLACEMENT CHARACTER
+        data[1] = char16_t(0);
+        mState.mIs2b = true;
+        mState.mInHeap = true;
+        mState.mLength = 1;
+        return *this;
+      }
+    }
+
+    mAllBits = aOther.mAllBits;
+  }
+
+  return *this;
+}
+
+static inline int32_t FirstNon8BitUnvectorized(const char16_t* str,
+                                               const char16_t* end) {
+  using p = Non8BitParameters<sizeof(size_t)>;
+  const size_t mask = p::mask();
+  const uint32_t alignMask = p::alignMask();
+  const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
+  const int32_t len = end - str;
+  int32_t i = 0;
+
+  // Align ourselves to a word boundary.
+  int32_t alignLen = std::min(
+      len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
+  for (; i < alignLen; i++) {
+    if (str[i] > 255) return i;
+  }
+
+  // Check one word at a time.
+  const int32_t wordWalkEnd =
+      ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
+  for (; i < wordWalkEnd; i += numUnicharsPerWord) {
+    const size_t word = *reinterpret_cast<const size_t*>(str + i);
+    if (word & mask) return i;
+  }
+
+  // Take care of the remainder one character at a time.
+  for (; i < len; i++) {
+    if (str[i] > 255) return i;
+  }
+
+  return -1;
+}
+
+#if defined(MOZILLA_MAY_SUPPORT_SSE2)
+#  include "nsTextFragmentGenericFwd.h"
+#endif
+
+#ifdef __powerpc__
+namespace mozilla {
+namespace VMX {
+int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
+}  // namespace VMX
+}  // namespace mozilla
+#endif
+
+/*
+ * This function returns -1 if all characters in str are 8 bit characters.
+ * Otherwise, it returns a value less than or equal to the index of the first
+ * non-8bit character in str. For example, if first non-8bit character is at
+ * position 25, it may return 25, or for example 24, or 16. But it guarantees
+ * there is no non-8bit character before returned value.
+ */
+static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) {
+#ifdef MOZILLA_MAY_SUPPORT_SSE2
+  if (mozilla::supports_sse2()) {
+    return mozilla::FirstNon8Bit<xsimd::sse2>(str, end);
+  }
+#elif defined(__powerpc__)
+  if (mozilla::supports_vmx()) {
+    return mozilla::VMX::FirstNon8Bit(str, end);
+  }
+#endif
+
+  return FirstNon8BitUnvectorized(str, end);
+}
+
+bool nsTextFragment::SetTo(const char16_t* aBuffer, uint32_t aLength,
+                           bool aUpdateBidi, bool aForce2b) {
+  if (MOZ_UNLIKELY(aLength > NS_MAX_TEXT_FRAGMENT_LENGTH)) {
+    return false;
+  }
+
+  if (aForce2b && mState.mIs2b && !m2b->IsReadonly()) {
+    uint32_t storageSize = m2b->StorageSize();
+    uint32_t neededSize = aLength * sizeof(char16_t);
+    if (!neededSize) {
+      if (storageSize < AutoStringDefaultStorageSize) {
+        // If we're storing small enough nsStringBuffer, let's preserve it.
+
+        static_cast<char16_t*>(m2b->Data())[0] = char16_t(0);
+        mState.mLength = 0;
+        mState.mIsBidi = false;
+        return true;
+      }
+    } else if ((neededSize < storageSize) &&
+               ((storageSize / 2) <
+                (neededSize + AutoStringDefaultStorageSize))) {
+      // Don't try to reuse the existing nsStringBuffer, if it would have
+      // lots of unused space.
+
+      memcpy(m2b->Data(), aBuffer, neededSize);
+      static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
+      mState.mLength = aLength;
+      mState.mIsBidi = false;
+      if (aUpdateBidi) {
+        UpdateBidiFlag(aBuffer, aLength);
+      }
+      return true;
+    }
+  }
+
+  ReleaseText();
+
+  if (aLength == 0) {
+    return true;
+  }
+
+  char16_t firstChar = *aBuffer;
+  if (!aForce2b && aLength == 1 && firstChar < 256) {
+    m1b = sSingleCharSharedString + firstChar;
+    mState.mInHeap = false;
+    mState.mIs2b = false;
+    mState.mLength = 1;
+
+    return true;
+  }
+
+  const char16_t* ucp = aBuffer;
+  const char16_t* uend = aBuffer + aLength;
+
+  // Check if we can use a shared string
+  if (!aForce2b &&
+      aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
+      (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
+    if (firstChar == ' ') {
+      ++ucp;
+    }
+
+    const char16_t* start = ucp;
+    while (ucp < uend && *ucp == '\n') {
+      ++ucp;
+    }
+    const char16_t* endNewLine = ucp;
+
+    char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
+    while (ucp < uend && *ucp == space) {
+      ++ucp;
+    }
+
+    if (ucp == uend && endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
+        ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
+      char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
+      m1b = strings[endNewLine - start];
+
+      // If we didn't find a space in the beginning, skip it now.
+      if (firstChar != ' ') {
+        ++m1b;
+      }
+
+      mState.mInHeap = false;
+      mState.mIs2b = false;
+      mState.mLength = aLength;
+
+      return true;
+    }
+  }
+
+  // See if we need to store the data in ucs2 or not
+  int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(ucp, uend);
+
+  if (first16bit != -1) {  // aBuffer contains no non-8bit character
+    // Use ucs2 storage because we have to
+    CheckedUint32 m2bSize = CheckedUint32(aLength) + 1;
+    if (!m2bSize.isValid()) {
+      return false;
+    }
+    m2bSize *= sizeof(char16_t);
+    if (!m2bSize.isValid()) {
+      return false;
+    }
+
+    m2b = nsStringBuffer::Alloc(m2bSize.value()).take();
+    if (!m2b) {
+      return false;
+    }
+    memcpy(m2b->Data(), aBuffer, aLength * sizeof(char16_t));
+    static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
+
+    mState.mIs2b = true;
+    if (aUpdateBidi) {
+      UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
+    }
+
+  } else {
+    // Use 1 byte storage because we can
+    char* buff = static_cast<char*>(malloc(aLength));
+    if (!buff) {
+      return false;
+    }
+
+    // Copy data
+    LossyConvertUtf16toLatin1(Span(aBuffer, aLength), Span(buff, aLength));
+    m1b = buff;
+    mState.mIs2b = false;
+  }
+
+  // Setup our fields
+  mState.mInHeap = true;
+  mState.mLength = aLength;
+
+  return true;
+}
+
+void nsTextFragment::CopyTo(char16_t* aDest, uint32_t aOffset,
+                            uint32_t aCount) {
+  const CheckedUint32 endOffset = CheckedUint32(aOffset) + aCount;
+  if (!endOffset.isValid() || endOffset.value() > GetLength()) {
+    aCount = mState.mLength - aOffset;
+  }
+
+  if (aCount) {
+    if (mState.mIs2b) {
+      memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount);
+    } else {
+      const char* cp = m1b + aOffset;
+      ConvertLatin1toUtf16(Span(cp, aCount), Span(aDest, aCount));
+    }
+  }
+}
+
+bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength,
+                            bool aUpdateBidi, bool aForce2b) {
+  if (!aLength) {
+    return true;
+  }
+
+  // This is a common case because some callsites create a textnode
+  // with a value by creating the node and then calling AppendData.
+  if (mState.mLength == 0) {
+    return SetTo(aBuffer, aLength, aUpdateBidi, aForce2b);
+  }
+
+  // Should we optimize for aData.Length() == 0?
+
+  // FYI: Don't use CheckedInt in this method since here is very hot path
+  //      in some performance tests.
+  if (NS_MAX_TEXT_FRAGMENT_LENGTH - mState.mLength < aLength) {
+    return false;  // Would be overflown if we'd keep handling.
+  }
+
+  if (mState.mIs2b) {
+    size_t size = mState.mLength + aLength + 1;
+    if (SIZE_MAX / sizeof(char16_t) < size) {
+      return false;  // Would be overflown if we'd keep handling.
+    }
+    size *= sizeof(char16_t);
+
+    // Already a 2-byte string so the result will be too
+    nsStringBuffer* buff = nullptr;
+    nsStringBuffer* bufferToRelease = nullptr;
+    if (m2b->IsReadonly()) {
+      buff = nsStringBuffer::Alloc(size).take();
+      if (!buff) {
+        return false;
+      }
+      bufferToRelease = m2b;
+      memcpy(static_cast<char16_t*>(buff->Data()), m2b->Data(),
+             mState.mLength * sizeof(char16_t));
+    } else {
+      buff = nsStringBuffer::Realloc(m2b, size);
+      if (!buff) {
+        return false;
+      }
+    }
+
+    char16_t* data = static_cast<char16_t*>(buff->Data());
+    memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
+    mState.mLength += aLength;
+    m2b = buff;
+    data[mState.mLength] = char16_t(0);
+
+    NS_IF_RELEASE(bufferToRelease);
+
+    if (aUpdateBidi) {
+      UpdateBidiFlag(aBuffer, aLength);
+    }
+
+    return true;
+  }
+
+  // Current string is a 1-byte string, check if the new data fits in one byte
+  // too.
+  int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(aBuffer, aBuffer + aLength);
+
+  if (first16bit != -1) {  // aBuffer contains no non-8bit character
+    size_t size = mState.mLength + aLength + 1;
+    if (SIZE_MAX / sizeof(char16_t) < size) {
+      return false;  // Would be overflown if we'd keep handling.
+    }
+    size *= sizeof(char16_t);
+
+    // The old data was 1-byte, but the new is not so we have to expand it
+    // all to 2-byte
+    nsStringBuffer* buff = nsStringBuffer::Alloc(size).take();
+    if (!buff) {
+      return false;
+    }
+
+    // Copy data into buff
+    char16_t* data = static_cast<char16_t*>(buff->Data());
+    ConvertLatin1toUtf16(Span(m1b, mState.mLength), Span(data, mState.mLength));
+
+    memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
+    mState.mLength += aLength;
+    mState.mIs2b = true;
+
+    if (mState.mInHeap) {
+      free(const_cast<char*>(m1b));
+    }
+    data[mState.mLength] = char16_t(0);
+    m2b = buff;
+
+    mState.mInHeap = true;
+
+    if (aUpdateBidi) {
+      UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
+    }
+
+    return true;
+  }
+
+  // The new and the old data is all 1-byte
+  size_t size = mState.mLength + aLength;
+  MOZ_ASSERT(sizeof(char) == 1);
+  char* buff;
+  if (mState.mInHeap) {
+    buff = static_cast<char*>(realloc(const_cast<char*>(m1b), size));
+    if (!buff) {
+      return false;
+    }
+  } else {
+    buff = static_cast<char*>(malloc(size));
+    if (!buff) {
+      return false;
+    }
+
+    memcpy(buff, m1b, mState.mLength);
+    mState.mInHeap = true;
+  }
+
+  // Copy aBuffer into buff.
+  LossyConvertUtf16toLatin1(Span(aBuffer, aLength),
+                            Span(buff + mState.mLength, aLength));
+
+  m1b = buff;
+  mState.mLength += aLength;
+
+  return true;
+}
+
+/* virtual */
+size_t nsTextFragment::SizeOfExcludingThis(
+    mozilla::MallocSizeOf aMallocSizeOf) const {
+  if (Is2b()) {
+    return m2b->SizeOfIncludingThisIfUnshared(aMallocSizeOf);
+  }
+
+  if (mState.mInHeap) {
+    return aMallocSizeOf(m1b);
+  }
+
+  return 0;
+}
+
+// To save time we only do this when we really want to know, not during
+// every allocation
+void nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) {
+  if (mState.mIs2b && !mState.mIsBidi) {
+    if (HasRTLChars(Span(aBuffer, aLength))) {
+      mState.mIsBidi = true;
+    }
+  }
+}
+
+bool nsTextFragment::TextEquals(const nsTextFragment& aOther) const {
+  if (!Is2b()) {
+    // We're 1-byte.
+    if (!aOther.Is2b()) {
+      nsDependentCSubstring ourStr(Get1b(), GetLength());
+      return ourStr.Equals(
+          nsDependentCSubstring(aOther.Get1b(), aOther.GetLength()));
+    }
+
+    // We're 1-byte, the other thing is 2-byte.  Instead of implementing a
+    // separate codepath for this, just use our code below.
+    return aOther.TextEquals(*this);
+  }
+
+  nsDependentSubstring ourStr(Get2b(), GetLength());
+  if (aOther.Is2b()) {
+    return ourStr.Equals(
+        nsDependentSubstring(aOther.Get2b(), aOther.GetLength()));
+  }
+
+  // We can't use EqualsASCII here, because the other string might not
+  // actually be ASCII.  Just roll our own compare; do it in the simple way.
+  // Bug 1532356 tracks not having to roll our own.
+  if (GetLength() != aOther.GetLength()) {
+    return false;
+  }
+
+  const char16_t* ourChars = Get2b();
+  const char* otherChars = aOther.Get1b();
+  for (uint32_t i = 0; i < GetLength(); ++i) {
+    if (ourChars[i] != static_cast<char16_t>(otherChars[i])) {
+      return false;
+    }
+  }
+
+  return true;
+}