diff options
Diffstat (limited to 'dom/base/nsLineBreaker.cpp')
-rw-r--r-- | dom/base/nsLineBreaker.cpp | 508 |
1 files changed, 508 insertions, 0 deletions
diff --git a/dom/base/nsLineBreaker.cpp b/dom/base/nsLineBreaker.cpp new file mode 100644 index 0000000000..c582a5b203 --- /dev/null +++ b/dom/base/nsLineBreaker.cpp @@ -0,0 +1,508 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsLineBreaker.h" +#include "nsContentUtils.h" +#include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values +#include "nsHyphenationManager.h" +#include "nsHyphenator.h" +#include "mozilla/gfx/2D.h" +#include "mozilla/intl/LineBreaker.h" +#include "mozilla/intl/MozLocale.h" + +using mozilla::intl::LineBreaker; +using mozilla::intl::Locale; + +nsLineBreaker::nsLineBreaker() + : mCurrentWordLanguage(nullptr), + mCurrentWordContainsMixedLang(false), + mCurrentWordContainsComplexChar(false), + mScriptIsChineseOrJapanese(false), + mAfterBreakableSpace(false), + mBreakHere(false), + mWordBreak(LineBreaker::WordBreak::Normal), + mStrictness(LineBreaker::Strictness::Auto), + mWordContinuation(false) {} + +nsLineBreaker::~nsLineBreaker() { + NS_ASSERTION(mCurrentWord.Length() == 0, + "Should have Reset() before destruction!"); +} + +static void SetupCapitalization(const char16_t* aWord, uint32_t aLength, + bool* aCapitalization) { + // Capitalize the first alphanumeric character after a space or start + // of the word. + // The only space character a word can contain is NBSP. + bool capitalizeNextChar = true; + for (uint32_t i = 0; i < aLength; ++i) { + uint32_t ch = aWord[i]; + if (capitalizeNextChar) { + if (i + 1 < aLength && NS_IS_SURROGATE_PAIR(ch, aWord[i + 1])) { + ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]); + } + if (nsContentUtils::IsAlphanumeric(ch)) { + aCapitalization[i] = true; + capitalizeNextChar = false; + } + if (!IS_IN_BMP(ch)) { + ++i; + } + } + if (ch == 0xA0 /*NBSP*/) { + capitalizeNextChar = true; + } + } +} + +nsresult nsLineBreaker::FlushCurrentWord() { + uint32_t length = mCurrentWord.Length(); + AutoTArray<uint8_t, 4000> breakState; + // XXX(Bug 1631371) Check if this should use a fallible operation as it + // pretended earlier. + breakState.AppendElements(length); + + nsTArray<bool> capitalizationState; + + if (mStrictness == LineBreaker::Strictness::Anywhere) { + memset(breakState.Elements(), + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL, + length * sizeof(uint8_t)); + } else if (!mCurrentWordContainsComplexChar) { + // For break-strict set everything internal to "break", otherwise + // to "no break"! + memset(breakState.Elements(), + mWordBreak == LineBreaker::WordBreak::BreakAll + ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL + : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, + length * sizeof(uint8_t)); + } else { + nsContentUtils::LineBreaker()->GetJISx4051Breaks( + mCurrentWord.Elements(), length, mWordBreak, mStrictness, + mScriptIsChineseOrJapanese, breakState.Elements()); + } + + bool autoHyphenate = mCurrentWordLanguage && !mCurrentWordContainsMixedLang; + uint32_t i; + for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) { + TextItem* ti = &mTextItems[i]; + if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) { + autoHyphenate = false; + } + } + if (autoHyphenate) { + RefPtr<nsHyphenator> hyphenator = + nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage); + if (hyphenator) { + FindHyphenationPoints(hyphenator, mCurrentWord.Elements(), + mCurrentWord.Elements() + length, + breakState.Elements()); + } + } + + uint32_t offset = 0; + for (i = 0; i < mTextItems.Length(); ++i) { + TextItem* ti = &mTextItems[i]; + NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?"); + + if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) { + breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; + } + if (ti->mFlags & BREAK_SUPPRESS_INSIDE) { + uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0; + memset(breakState.Elements() + offset + exclude, + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE, + (ti->mLength - exclude) * sizeof(uint8_t)); + } + + // Don't set the break state for the first character of the word, because + // it was already set correctly earlier and we don't know what the true + // value should be. + uint32_t skipSet = i == 0 ? 1 : 0; + if (ti->mSink) { + ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet, + breakState.Elements() + offset + skipSet); + + if (!mWordContinuation && (ti->mFlags & BREAK_NEED_CAPITALIZATION)) { + if (capitalizationState.Length() == 0) { + // XXX(Bug 1631371) Check if this should use a fallible operation as + // it pretended earlier. + capitalizationState.AppendElements(length); + memset(capitalizationState.Elements(), false, length * sizeof(bool)); + SetupCapitalization(mCurrentWord.Elements(), length, + capitalizationState.Elements()); + } + ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength, + capitalizationState.Elements() + offset); + } + } + + offset += ti->mLength; + } + + mCurrentWord.Clear(); + mTextItems.Clear(); + mCurrentWordContainsComplexChar = false; + mCurrentWordContainsMixedLang = false; + mCurrentWordLanguage = nullptr; + mWordContinuation = false; + return NS_OK; +} + +// If the aFlags parameter to AppendText has all these bits set, +// then we don't need to worry about finding break opportunities +// in the appended text. +#define NO_BREAKS_NEEDED_FLAGS \ + (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \ + BREAK_SKIP_SETTING_NO_BREAKS) + +nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, + const char16_t* aText, uint32_t aLength, + uint32_t aFlags, nsILineBreakSink* aSink) { + NS_ASSERTION(aLength > 0, "Appending empty text..."); + + uint32_t offset = 0; + + // Continue the current word + if (mCurrentWord.Length() > 0) { + NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, + "These should not be set"); + + while (offset < aLength && !IsSpace(aText[offset])) { + mCurrentWord.AppendElement(aText[offset]); + if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) { + mCurrentWordContainsComplexChar = true; + } + UpdateCurrentWordLanguage(aHyphenationLanguage); + ++offset; + } + + if (offset > 0) { + mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); + } + + if (offset == aLength) return NS_OK; + + // We encountered whitespace, so we're done with this word + nsresult rv = FlushCurrentWord(); + if (NS_FAILED(rv)) return rv; + } + + AutoTArray<uint8_t, 4000> breakState; + if (aSink) { + // XXX(Bug 1631371) Check if this should use a fallible operation as it + // pretended earlier. + breakState.AppendElements(aLength); + } + + bool noCapitalizationNeeded = true; + nsTArray<bool> capitalizationState; + if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) { + // XXX(Bug 1631371) Check if this should use a fallible operation as it + // pretended earlier. + capitalizationState.AppendElements(aLength); + memset(capitalizationState.Elements(), false, aLength * sizeof(bool)); + noCapitalizationNeeded = false; + } + + uint32_t start = offset; + bool noBreaksNeeded = + !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS && + !mBreakHere && !mAfterBreakableSpace); + if (noBreaksNeeded && noCapitalizationNeeded) { + // Skip to the space before the last word, since either the break data + // here is not needed, or no breaks are set in the sink and there cannot + // be any breaks in this chunk; and we don't need to do word-initial + // capitalization. All we need is the context for the next chunk (if any). + offset = aLength; + while (offset > start) { + --offset; + if (IsSpace(aText[offset])) break; + } + } + uint32_t wordStart = offset; + bool wordHasComplexChar = false; + + RefPtr<nsHyphenator> hyphenator; + if ((aFlags & BREAK_USE_AUTO_HYPHENATION) && + !(aFlags & BREAK_SUPPRESS_INSIDE) && aHyphenationLanguage) { + hyphenator = + nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage); + } + + for (;;) { + char16_t ch = aText[offset]; + bool isSpace = IsSpace(ch); + bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); + + if (aSink && !noBreaksNeeded) { + breakState[offset] = + mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || + mWordBreak == LineBreaker::WordBreak::BreakAll || + mStrictness == LineBreaker::Strictness::Anywhere + ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL + : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; + } + mBreakHere = false; + mAfterBreakableSpace = isBreakableSpace; + + if (isSpace || ch == '\n') { + if (offset > wordStart && aSink) { + if (!(aFlags & BREAK_SUPPRESS_INSIDE)) { + if (mStrictness == LineBreaker::Strictness::Anywhere) { + memset(breakState.Elements() + wordStart, + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL, + offset - wordStart); + } else if (wordHasComplexChar) { + // Save current start-of-word state because GetJISx4051Breaks will + // set it to false + uint8_t currentStart = breakState[wordStart]; + nsContentUtils::LineBreaker()->GetJISx4051Breaks( + aText + wordStart, offset - wordStart, mWordBreak, mStrictness, + mScriptIsChineseOrJapanese, breakState.Elements() + wordStart); + breakState[wordStart] = currentStart; + } + if (hyphenator) { + FindHyphenationPoints(hyphenator, aText + wordStart, aText + offset, + breakState.Elements() + wordStart); + } + } + if (!mWordContinuation && !noCapitalizationNeeded) { + SetupCapitalization(aText + wordStart, offset - wordStart, + capitalizationState.Elements() + wordStart); + } + } + wordHasComplexChar = false; + mWordContinuation = false; + ++offset; + if (offset >= aLength) break; + wordStart = offset; + } else { + if (!wordHasComplexChar && IsComplexChar(ch)) { + wordHasComplexChar = true; + } + ++offset; + if (offset >= aLength) { + // Save this word + mCurrentWordContainsComplexChar = wordHasComplexChar; + uint32_t len = offset - wordStart; + char16_t* elems = mCurrentWord.AppendElements(len); + if (!elems) return NS_ERROR_OUT_OF_MEMORY; + memcpy(elems, aText + wordStart, sizeof(char16_t) * len); + mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); + // Ensure that the break-before for this word is written out + offset = wordStart + 1; + UpdateCurrentWordLanguage(aHyphenationLanguage); + break; + } + } + } + + if (aSink) { + if (!noBreaksNeeded) { + aSink->SetBreaks(start, offset - start, breakState.Elements() + start); + } + if (!noCapitalizationNeeded) { + aSink->SetCapitalization(start, offset - start, + capitalizationState.Elements() + start); + } + } + return NS_OK; +} + +void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator, + const char16_t* aTextStart, + const char16_t* aTextLimit, + uint8_t* aBreakState) { + nsDependentSubstring string(aTextStart, aTextLimit); + AutoTArray<bool, 200> hyphens; + if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) { + for (uint32_t i = 0; i + 1 < string.Length(); ++i) { + if (hyphens[i]) { + aBreakState[i + 1] = + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN; + } + } + } +} + +nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage, + const uint8_t* aText, uint32_t aLength, + uint32_t aFlags, nsILineBreakSink* aSink) { + NS_ASSERTION(aLength > 0, "Appending empty text..."); + + if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) { + // Defer to the Unicode path if capitalization or hyphenation is required + nsAutoString str; + const char* cp = reinterpret_cast<const char*>(aText); + CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str); + return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink); + } + + uint32_t offset = 0; + + // Continue the current word + if (mCurrentWord.Length() > 0) { + NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, + "These should not be set"); + + while (offset < aLength && !IsSpace(aText[offset])) { + mCurrentWord.AppendElement(aText[offset]); + if (!mCurrentWordContainsComplexChar && + IsComplexASCIIChar(aText[offset])) { + mCurrentWordContainsComplexChar = true; + } + ++offset; + } + + if (offset > 0) { + mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags)); + } + + if (offset == aLength) { + // We did not encounter whitespace so the word hasn't finished yet. + return NS_OK; + } + + // We encountered whitespace, so we're done with this word + nsresult rv = FlushCurrentWord(); + if (NS_FAILED(rv)) return rv; + } + + AutoTArray<uint8_t, 4000> breakState; + if (aSink) { + // XXX(Bug 1631371) Check if this should use a fallible operation as it + // pretended earlier. + breakState.AppendElements(aLength); + } + + uint32_t start = offset; + bool noBreaksNeeded = + !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS && + !mBreakHere && !mAfterBreakableSpace); + if (noBreaksNeeded) { + // Skip to the space before the last word, since either the break data + // here is not needed, or no breaks are set in the sink and there cannot + // be any breaks in this chunk; all we need is the context for the next + // chunk (if any) + offset = aLength; + while (offset > start) { + --offset; + if (IsSpace(aText[offset])) break; + } + } + uint32_t wordStart = offset; + bool wordHasComplexChar = false; + + for (;;) { + uint8_t ch = aText[offset]; + bool isSpace = IsSpace(ch); + bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE); + + if (aSink) { + // Consider word-break style. Since the break position of CJK scripts + // will be set by nsILineBreaker, we don't consider CJK at this point. + breakState[offset] = + mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) || + mWordBreak == LineBreaker::WordBreak::BreakAll || + mStrictness == LineBreaker::Strictness::Anywhere + ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL + : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE; + } + mBreakHere = false; + mAfterBreakableSpace = isBreakableSpace; + + if (isSpace) { + if (offset > wordStart && aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) { + if (mStrictness == LineBreaker::Strictness::Anywhere) { + memset(breakState.Elements() + wordStart, + gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL, + offset - wordStart); + } else if (wordHasComplexChar) { + // Save current start-of-word state because GetJISx4051Breaks will + // set it to false + uint8_t currentStart = breakState[wordStart]; + nsContentUtils::LineBreaker()->GetJISx4051Breaks( + aText + wordStart, offset - wordStart, mWordBreak, mStrictness, + mScriptIsChineseOrJapanese, breakState.Elements() + wordStart); + breakState[wordStart] = currentStart; + } + } + + wordHasComplexChar = false; + ++offset; + if (offset >= aLength) break; + wordStart = offset; + } else { + if (!wordHasComplexChar && IsComplexASCIIChar(ch)) { + wordHasComplexChar = true; + } + ++offset; + if (offset >= aLength) { + // Save this word + mCurrentWordContainsComplexChar = wordHasComplexChar; + uint32_t len = offset - wordStart; + char16_t* elems = mCurrentWord.AppendElements(len); + if (!elems) return NS_ERROR_OUT_OF_MEMORY; + uint32_t i; + for (i = wordStart; i < offset; ++i) { + elems[i - wordStart] = aText[i]; + } + mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags)); + // Ensure that the break-before for this word is written out + offset = wordStart + 1; + break; + } + } + } + + if (!noBreaksNeeded) { + aSink->SetBreaks(start, offset - start, breakState.Elements() + start); + } + return NS_OK; +} + +void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom* aHyphenationLanguage) { + if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) { + mCurrentWordContainsMixedLang = true; + mScriptIsChineseOrJapanese = false; + } else { + if (aHyphenationLanguage && !mCurrentWordLanguage) { + Locale loc = Locale(nsAtomCString(aHyphenationLanguage)); + if (loc.GetScript().IsEmpty()) { + loc.Maximize(); + } + const nsDependentCSubstring& script = loc.GetScript(); + mScriptIsChineseOrJapanese = + script.EqualsLiteral("Hans") || script.EqualsLiteral("Hant") || + script.EqualsLiteral("Jpan") || script.EqualsLiteral("Hrkt"); + } + mCurrentWordLanguage = aHyphenationLanguage; + } +} + +nsresult nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) { + nsresult rv = FlushCurrentWord(); + if (NS_FAILED(rv)) return rv; + + bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE); + if (mAfterBreakableSpace && !isBreakableSpace) { + mBreakHere = true; + } + mAfterBreakableSpace = isBreakableSpace; + mWordContinuation = false; + return NS_OK; +} + +nsresult nsLineBreaker::Reset(bool* aTrailingBreak) { + nsresult rv = FlushCurrentWord(); + if (NS_FAILED(rv)) return rv; + + *aTrailingBreak = mBreakHere || mAfterBreakableSpace; + mBreakHere = false; + mAfterBreakableSpace = false; + return NS_OK; +} |