diff options
Diffstat (limited to 'dom/base/nsTextFragment.cpp')
-rw-r--r-- | dom/base/nsTextFragment.cpp | 538 |
1 files changed, 538 insertions, 0 deletions
diff --git a/dom/base/nsTextFragment.cpp b/dom/base/nsTextFragment.cpp new file mode 100644 index 0000000000..5cba2577b8 --- /dev/null +++ b/dom/base/nsTextFragment.cpp @@ -0,0 +1,538 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * A class which represents a fragment of text (eg inside a text + * node); if only codepoints below 256 are used, the text is stored as + * a char*; otherwise the text is stored as a char16_t* + */ + +#include "nsTextFragment.h" +#include "nsCRT.h" +#include "nsReadableUtils.h" +#include "nsBidiUtils.h" +#include "nsUnicharUtils.h" +#include "mozilla/CheckedInt.h" +#include "mozilla/MemoryReporting.h" +#include "mozilla/SSE.h" +#include "mozilla/ppc.h" +#include "nsTextFragmentImpl.h" +#include <algorithm> + +#define TEXTFRAG_WHITE_AFTER_NEWLINE 50 +#define TEXTFRAG_MAX_NEWLINES 7 + +// Static buffer used for common fragments +static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1]; +static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1]; +static char sSingleCharSharedString[256]; + +using namespace mozilla; + +// static +nsresult nsTextFragment::Init() { + // Create whitespace strings + uint32_t i; + for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { + sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; + sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; + sSpaceSharedString[i][0] = ' '; + sTabSharedString[i][0] = ' '; + uint32_t j; + for (j = 1; j < 1 + i; ++j) { + sSpaceSharedString[i][j] = '\n'; + sTabSharedString[i][j] = '\n'; + } + for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) { + sSpaceSharedString[i][j] = ' '; + sTabSharedString[i][j] = '\t'; + } + } + + // Create single-char strings + for (i = 0; i < 256; ++i) { + sSingleCharSharedString[i] = i; + } + + return NS_OK; +} + +// static +void nsTextFragment::Shutdown() { + uint32_t i; + for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { + delete[] sSpaceSharedString[i]; + delete[] sTabSharedString[i]; + sSpaceSharedString[i] = nullptr; + sTabSharedString[i] = nullptr; + } +} + +nsTextFragment::~nsTextFragment() { + ReleaseText(); + MOZ_COUNT_DTOR(nsTextFragment); +} + +void nsTextFragment::ReleaseText() { + if (mState.mIs2b) { + NS_RELEASE(m2b); + } else if (mState.mLength && m1b && mState.mInHeap) { + free(const_cast<char*>(m1b)); + } + + m1b = nullptr; + mState.mIsBidi = false; + + // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits; + mAllBits = 0; +} + +nsTextFragment& nsTextFragment::operator=(const nsTextFragment& aOther) { + ReleaseText(); + + if (aOther.mState.mLength) { + if (!aOther.mState.mInHeap) { + MOZ_ASSERT(!aOther.mState.mIs2b); + m1b = aOther.m1b; + } else if (aOther.mState.mIs2b) { + m2b = aOther.m2b; + NS_ADDREF(m2b); + } else { + m1b = static_cast<char*>(malloc(aOther.mState.mLength)); + if (m1b) { + memcpy(const_cast<char*>(m1b), aOther.m1b, aOther.mState.mLength); + } else { + // allocate a buffer for a single REPLACEMENT CHARACTER + m2b = nsStringBuffer::Alloc(sizeof(char16_t) * 2).take(); + if (!m2b) { + MOZ_CRASH("OOM!"); + } + char16_t* data = static_cast<char16_t*>(m2b->Data()); + data[0] = 0xFFFD; // REPLACEMENT CHARACTER + data[1] = char16_t(0); + mState.mIs2b = true; + mState.mInHeap = true; + mState.mLength = 1; + return *this; + } + } + + mAllBits = aOther.mAllBits; + } + + return *this; +} + +static inline int32_t FirstNon8BitUnvectorized(const char16_t* str, + const char16_t* end) { + using p = Non8BitParameters<sizeof(size_t)>; + const size_t mask = p::mask(); + const uint32_t alignMask = p::alignMask(); + const uint32_t numUnicharsPerWord = p::numUnicharsPerWord(); + const int32_t len = end - str; + int32_t i = 0; + + // Align ourselves to a word boundary. + int32_t alignLen = std::min( + len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t))); + for (; i < alignLen; i++) { + if (str[i] > 255) return i; + } + + // Check one word at a time. + const int32_t wordWalkEnd = + ((len - i) / numUnicharsPerWord) * numUnicharsPerWord; + for (; i < wordWalkEnd; i += numUnicharsPerWord) { + const size_t word = *reinterpret_cast<const size_t*>(str + i); + if (word & mask) return i; + } + + // Take care of the remainder one character at a time. + for (; i < len; i++) { + if (str[i] > 255) return i; + } + + return -1; +} + +#if defined(MOZILLA_MAY_SUPPORT_SSE2) +# include "nsTextFragmentGenericFwd.h" +#endif + +#ifdef __powerpc__ +namespace mozilla { +namespace VMX { +int32_t FirstNon8Bit(const char16_t* str, const char16_t* end); +} // namespace VMX +} // namespace mozilla +#endif + +/* + * This function returns -1 if all characters in str are 8 bit characters. + * Otherwise, it returns a value less than or equal to the index of the first + * non-8bit character in str. For example, if first non-8bit character is at + * position 25, it may return 25, or for example 24, or 16. But it guarantees + * there is no non-8bit character before returned value. + */ +static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) { +#ifdef MOZILLA_MAY_SUPPORT_SSE2 + if (mozilla::supports_sse2()) { + return mozilla::FirstNon8Bit<xsimd::sse2>(str, end); + } +#elif defined(__powerpc__) + if (mozilla::supports_vmx()) { + return mozilla::VMX::FirstNon8Bit(str, end); + } +#endif + + return FirstNon8BitUnvectorized(str, end); +} + +bool nsTextFragment::SetTo(const char16_t* aBuffer, uint32_t aLength, + bool aUpdateBidi, bool aForce2b) { + if (MOZ_UNLIKELY(aLength > NS_MAX_TEXT_FRAGMENT_LENGTH)) { + return false; + } + + if (aForce2b && mState.mIs2b && !m2b->IsReadonly()) { + uint32_t storageSize = m2b->StorageSize(); + uint32_t neededSize = aLength * sizeof(char16_t); + if (!neededSize) { + if (storageSize < AutoStringDefaultStorageSize) { + // If we're storing small enough nsStringBuffer, let's preserve it. + + static_cast<char16_t*>(m2b->Data())[0] = char16_t(0); + mState.mLength = 0; + mState.mIsBidi = false; + return true; + } + } else if ((neededSize < storageSize) && + ((storageSize / 2) < + (neededSize + AutoStringDefaultStorageSize))) { + // Don't try to reuse the existing nsStringBuffer, if it would have + // lots of unused space. + + memcpy(m2b->Data(), aBuffer, neededSize); + static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0); + mState.mLength = aLength; + mState.mIsBidi = false; + if (aUpdateBidi) { + UpdateBidiFlag(aBuffer, aLength); + } + return true; + } + } + + ReleaseText(); + + if (aLength == 0) { + return true; + } + + char16_t firstChar = *aBuffer; + if (!aForce2b && aLength == 1 && firstChar < 256) { + m1b = sSingleCharSharedString + firstChar; + mState.mInHeap = false; + mState.mIs2b = false; + mState.mLength = 1; + + return true; + } + + const char16_t* ucp = aBuffer; + const char16_t* uend = aBuffer + aLength; + + // Check if we can use a shared string + if (!aForce2b && + aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES && + (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) { + if (firstChar == ' ') { + ++ucp; + } + + const char16_t* start = ucp; + while (ucp < uend && *ucp == '\n') { + ++ucp; + } + const char16_t* endNewLine = ucp; + + char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' '; + while (ucp < uend && *ucp == space) { + ++ucp; + } + + if (ucp == uend && endNewLine - start <= TEXTFRAG_MAX_NEWLINES && + ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) { + char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString; + m1b = strings[endNewLine - start]; + + // If we didn't find a space in the beginning, skip it now. + if (firstChar != ' ') { + ++m1b; + } + + mState.mInHeap = false; + mState.mIs2b = false; + mState.mLength = aLength; + + return true; + } + } + + // See if we need to store the data in ucs2 or not + int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(ucp, uend); + + if (first16bit != -1) { // aBuffer contains no non-8bit character + // Use ucs2 storage because we have to + CheckedUint32 m2bSize = CheckedUint32(aLength) + 1; + if (!m2bSize.isValid()) { + return false; + } + m2bSize *= sizeof(char16_t); + if (!m2bSize.isValid()) { + return false; + } + + m2b = nsStringBuffer::Alloc(m2bSize.value()).take(); + if (!m2b) { + return false; + } + memcpy(m2b->Data(), aBuffer, aLength * sizeof(char16_t)); + static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0); + + mState.mIs2b = true; + if (aUpdateBidi) { + UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); + } + + } else { + // Use 1 byte storage because we can + char* buff = static_cast<char*>(malloc(aLength)); + if (!buff) { + return false; + } + + // Copy data + LossyConvertUtf16toLatin1(Span(aBuffer, aLength), Span(buff, aLength)); + m1b = buff; + mState.mIs2b = false; + } + + // Setup our fields + mState.mInHeap = true; + mState.mLength = aLength; + + return true; +} + +void nsTextFragment::CopyTo(char16_t* aDest, uint32_t aOffset, + uint32_t aCount) { + const CheckedUint32 endOffset = CheckedUint32(aOffset) + aCount; + if (!endOffset.isValid() || endOffset.value() > GetLength()) { + aCount = mState.mLength - aOffset; + } + + if (aCount) { + if (mState.mIs2b) { + memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount); + } else { + const char* cp = m1b + aOffset; + ConvertLatin1toUtf16(Span(cp, aCount), Span(aDest, aCount)); + } + } +} + +bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength, + bool aUpdateBidi, bool aForce2b) { + if (!aLength) { + return true; + } + + // This is a common case because some callsites create a textnode + // with a value by creating the node and then calling AppendData. + if (mState.mLength == 0) { + return SetTo(aBuffer, aLength, aUpdateBidi, aForce2b); + } + + // Should we optimize for aData.Length() == 0? + + // FYI: Don't use CheckedInt in this method since here is very hot path + // in some performance tests. + if (NS_MAX_TEXT_FRAGMENT_LENGTH - mState.mLength < aLength) { + return false; // Would be overflown if we'd keep handling. + } + + if (mState.mIs2b) { + size_t size = mState.mLength + aLength + 1; + if (SIZE_MAX / sizeof(char16_t) < size) { + return false; // Would be overflown if we'd keep handling. + } + size *= sizeof(char16_t); + + // Already a 2-byte string so the result will be too + nsStringBuffer* buff = nullptr; + nsStringBuffer* bufferToRelease = nullptr; + if (m2b->IsReadonly()) { + buff = nsStringBuffer::Alloc(size).take(); + if (!buff) { + return false; + } + bufferToRelease = m2b; + memcpy(static_cast<char16_t*>(buff->Data()), m2b->Data(), + mState.mLength * sizeof(char16_t)); + } else { + buff = nsStringBuffer::Realloc(m2b, size); + if (!buff) { + return false; + } + } + + char16_t* data = static_cast<char16_t*>(buff->Data()); + memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t)); + mState.mLength += aLength; + m2b = buff; + data[mState.mLength] = char16_t(0); + + NS_IF_RELEASE(bufferToRelease); + + if (aUpdateBidi) { + UpdateBidiFlag(aBuffer, aLength); + } + + return true; + } + + // Current string is a 1-byte string, check if the new data fits in one byte + // too. + int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(aBuffer, aBuffer + aLength); + + if (first16bit != -1) { // aBuffer contains no non-8bit character + size_t size = mState.mLength + aLength + 1; + if (SIZE_MAX / sizeof(char16_t) < size) { + return false; // Would be overflown if we'd keep handling. + } + size *= sizeof(char16_t); + + // The old data was 1-byte, but the new is not so we have to expand it + // all to 2-byte + nsStringBuffer* buff = nsStringBuffer::Alloc(size).take(); + if (!buff) { + return false; + } + + // Copy data into buff + char16_t* data = static_cast<char16_t*>(buff->Data()); + ConvertLatin1toUtf16(Span(m1b, mState.mLength), Span(data, mState.mLength)); + + memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t)); + mState.mLength += aLength; + mState.mIs2b = true; + + if (mState.mInHeap) { + free(const_cast<char*>(m1b)); + } + data[mState.mLength] = char16_t(0); + m2b = buff; + + mState.mInHeap = true; + + if (aUpdateBidi) { + UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); + } + + return true; + } + + // The new and the old data is all 1-byte + size_t size = mState.mLength + aLength; + MOZ_ASSERT(sizeof(char) == 1); + char* buff; + if (mState.mInHeap) { + buff = static_cast<char*>(realloc(const_cast<char*>(m1b), size)); + if (!buff) { + return false; + } + } else { + buff = static_cast<char*>(malloc(size)); + if (!buff) { + return false; + } + + memcpy(buff, m1b, mState.mLength); + mState.mInHeap = true; + } + + // Copy aBuffer into buff. + LossyConvertUtf16toLatin1(Span(aBuffer, aLength), + Span(buff + mState.mLength, aLength)); + + m1b = buff; + mState.mLength += aLength; + + return true; +} + +/* virtual */ +size_t nsTextFragment::SizeOfExcludingThis( + mozilla::MallocSizeOf aMallocSizeOf) const { + if (Is2b()) { + return m2b->SizeOfIncludingThisIfUnshared(aMallocSizeOf); + } + + if (mState.mInHeap) { + return aMallocSizeOf(m1b); + } + + return 0; +} + +// To save time we only do this when we really want to know, not during +// every allocation +void nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) { + if (mState.mIs2b && !mState.mIsBidi) { + if (HasRTLChars(Span(aBuffer, aLength))) { + mState.mIsBidi = true; + } + } +} + +bool nsTextFragment::TextEquals(const nsTextFragment& aOther) const { + if (!Is2b()) { + // We're 1-byte. + if (!aOther.Is2b()) { + nsDependentCSubstring ourStr(Get1b(), GetLength()); + return ourStr.Equals( + nsDependentCSubstring(aOther.Get1b(), aOther.GetLength())); + } + + // We're 1-byte, the other thing is 2-byte. Instead of implementing a + // separate codepath for this, just use our code below. + return aOther.TextEquals(*this); + } + + nsDependentSubstring ourStr(Get2b(), GetLength()); + if (aOther.Is2b()) { + return ourStr.Equals( + nsDependentSubstring(aOther.Get2b(), aOther.GetLength())); + } + + // We can't use EqualsASCII here, because the other string might not + // actually be ASCII. Just roll our own compare; do it in the simple way. + // Bug 1532356 tracks not having to roll our own. + if (GetLength() != aOther.GetLength()) { + return false; + } + + const char16_t* ourChars = Get2b(); + const char* otherChars = aOther.Get1b(); + for (uint32_t i = 0; i < GetLength(); ++i) { + if (ourChars[i] != static_cast<char16_t>(otherChars[i])) { + return false; + } + } + + return true; +} |