diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /parser/html/nsHtml5Highlighter.cpp | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'parser/html/nsHtml5Highlighter.cpp')
-rw-r--r-- | parser/html/nsHtml5Highlighter.cpp | 790 |
1 files changed, 790 insertions, 0 deletions
diff --git a/parser/html/nsHtml5Highlighter.cpp b/parser/html/nsHtml5Highlighter.cpp new file mode 100644 index 0000000000..45c84b743a --- /dev/null +++ b/parser/html/nsHtml5Highlighter.cpp @@ -0,0 +1,790 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsHtml5Highlighter.h" +#include "ErrorList.h" +#include "nsDebug.h" +#include "nsHtml5AttributeName.h" +#include "nsHtml5Tokenizer.h" +#include "nsHtml5ViewSourceUtils.h" +#include "nsString.h" +#include "nsThreadUtils.h" + +#include "mozilla/Attributes.h" +#include "mozilla/Preferences.h" + +using namespace mozilla; + +// The old code had a limit of 16 tokens. 1300 is a number picked my measuring +// the size of 16 tokens on cnn.com. +#define NS_HTML5_HIGHLIGHTER_PRE_BREAK_THRESHOLD 1300 + +char16_t nsHtml5Highlighter::sComment[] = {'c', 'o', 'm', 'm', + 'e', 'n', 't', 0}; + +char16_t nsHtml5Highlighter::sCdata[] = {'c', 'd', 'a', 't', 'a', 0}; + +char16_t nsHtml5Highlighter::sEntity[] = {'e', 'n', 't', 'i', 't', 'y', 0}; + +char16_t nsHtml5Highlighter::sEndTag[] = {'e', 'n', 'd', '-', 't', 'a', 'g', 0}; + +char16_t nsHtml5Highlighter::sStartTag[] = {'s', 't', 'a', 'r', 't', + '-', 't', 'a', 'g', 0}; + +char16_t nsHtml5Highlighter::sAttributeName[] = { + 'a', 't', 't', 'r', 'i', 'b', 'u', 't', 'e', '-', 'n', 'a', 'm', 'e', 0}; + +char16_t nsHtml5Highlighter::sAttributeValue[] = {'a', 't', 't', 'r', 'i', 'b', + 'u', 't', 'e', '-', 'v', 'a', + 'l', 'u', 'e', 0}; + +char16_t nsHtml5Highlighter::sDoctype[] = {'d', 'o', 'c', 't', + 'y', 'p', 'e', 0}; + +char16_t nsHtml5Highlighter::sPi[] = {'p', 'i', 0}; + +nsHtml5Highlighter::nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink) + : mState(nsHtml5Tokenizer::DATA), + mCStart(INT32_MAX), + mPos(0), + mLineNumber(1), + mInlinesOpen(0), + mInCharacters(false), + mBuffer(nullptr), + mOpSink(aOpSink), + mCurrentRun(nullptr), + mAmpersand(nullptr), + mSlash(nullptr), + mHandles( + MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH)), + mHandlesUsed(0), + mSeenBase(false) { + NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); +} + +nsHtml5Highlighter::~nsHtml5Highlighter() { + NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); +} + +void nsHtml5Highlighter::SetOpSink(nsAHtml5TreeOpSink* aOpSink) { + mOpSink = aOpSink; +} + +void nsHtml5Highlighter::Rewind() { + mState = 0; + mCStart = INT32_MAX; + mPos = 0; + mLineNumber = 1; + mInlinesOpen = 0; + mInCharacters = false; + mBuffer = nullptr; + mOpQueue.Clear(); + mCurrentRun = nullptr; + mAmpersand = nullptr; + mSlash = nullptr; + // Pop until we have three elements on the stack: + // html, body, and pre. + while (mStack.Length() > 3) { + Pop(); + } + mSeenBase = false; +} + +void nsHtml5Highlighter::Start(const nsAutoString& aTitle) { + // Doctype + opAppendDoctypeToDocument operation(nsGkAtoms::html, u""_ns, u""_ns); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); + + mOpQueue.AppendElement()->Init(mozilla::AsVariant(STANDARDS_MODE)); + + // <html> uses NS_NewHTMLSharedElement creator + nsIContent** root = + CreateElement(nsGkAtoms::html, nullptr, nullptr, NS_NewHTMLSharedElement); + opAppendToDocument appendOp(root); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(appendOp)); + mStack.AppendElement(root); + + // <head> uses NS_NewHTMLSharedElement creator + Push(nsGkAtoms::head, nullptr, NS_NewHTMLSharedElement); + + Push(nsGkAtoms::meta, nsHtml5ViewSourceUtils::NewMetaViewportAttributes(), + NS_NewHTMLMetaElement); + Pop(); // meta + + Push(nsGkAtoms::title, nullptr, NS_NewHTMLTitleElement); + // XUL will add the "Source of: " prefix. + uint32_t length = aTitle.Length(); + if (length > INT32_MAX) { + length = INT32_MAX; + } + AppendCharacters(aTitle.BeginReading(), 0, (int32_t)length); + Pop(); // title + + Push(nsGkAtoms::link, nsHtml5ViewSourceUtils::NewLinkAttributes(), + NS_NewHTMLLinkElement); + + opUpdateStyleSheet updateOp(CurrentNode()); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(updateOp)); + + Pop(); // link + + Pop(); // head + + Push(nsGkAtoms::body, nsHtml5ViewSourceUtils::NewBodyAttributes(), + NS_NewHTMLBodyElement); + + nsHtml5HtmlAttributes* preAttrs = new nsHtml5HtmlAttributes(0); + nsHtml5String preId = nsHtml5Portability::newStringFromLiteral("line1"); + preAttrs->addAttribute(nsHtml5AttributeName::ATTR_ID, preId, -1); + Push(nsGkAtoms::pre, preAttrs, NS_NewHTMLPreElement); + + // Don't call StartCharacters here in order to be able to put it in + // a speculation. + + mOpQueue.AppendElement()->Init(mozilla::AsVariant(opStartLayout())); +} + +void nsHtml5Highlighter::UpdateCharsetSource(nsCharsetSource aCharsetSource) { + opUpdateCharsetSource operation(aCharsetSource); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); +} + +int32_t nsHtml5Highlighter::Transition(int32_t aState, bool aReconsume, + int32_t aPos) { + mPos = aPos; + switch (mState) { + case nsHtml5Tokenizer::SCRIPT_DATA: + case nsHtml5Tokenizer::RAWTEXT: + case nsHtml5Tokenizer::RCDATA: + case nsHtml5Tokenizer::DATA: + // We can transition on < and on &. Either way, we don't yet know the + // role of the token, so open a span without class. + if (aState == nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE) { + StartSpan(); + // Start another span for highlighting the ampersand + StartSpan(); + mAmpersand = CurrentNode(); + } else { + EndCharactersAndStartMarkupRun(); + } + break; + case nsHtml5Tokenizer::TAG_OPEN: + switch (aState) { + case nsHtml5Tokenizer::TAG_NAME: + StartSpan(sStartTag); + break; + case nsHtml5Tokenizer::DATA: + FinishTag(); // DATA + break; + case nsHtml5Tokenizer::PROCESSING_INSTRUCTION: + AddClass(sPi); + break; + } + break; + case nsHtml5Tokenizer::TAG_NAME: + switch (aState) { + case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: + EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME + break; + case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: + EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME + StartSpan(); // for highlighting the slash + mSlash = CurrentNode(); + break; + default: + FinishTag(); + break; + } + break; + case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: + switch (aState) { + case nsHtml5Tokenizer::ATTRIBUTE_NAME: + StartSpan(sAttributeName); + break; + case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: + StartSpan(); // for highlighting the slash + mSlash = CurrentNode(); + break; + default: + FinishTag(); + break; + } + break; + case nsHtml5Tokenizer::ATTRIBUTE_NAME: + switch (aState) { + case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME: + case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE: + EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME + break; + case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: + EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME + StartSpan(); // for highlighting the slash + mSlash = CurrentNode(); + break; + default: + FinishTag(); + break; + } + break; + case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE: + switch (aState) { + case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED: + case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED: + FlushCurrent(); + StartA(); + break; + case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED: + StartA(); + break; + default: + FinishTag(); + break; + } + break; + case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED: + case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED: + switch (aState) { + case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED: + EndSpanOrA(); + break; + case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE: + StartSpan(); + StartSpan(); // for ampersand itself + mAmpersand = CurrentNode(); + break; + default: + MOZ_ASSERT_UNREACHABLE("Impossible transition."); + break; + } + break; + case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED: + switch (aState) { + case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: + break; + case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: + StartSpan(); // for highlighting the slash + mSlash = CurrentNode(); + break; + default: + FinishTag(); + break; + } + break; + case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: + EndSpanOrA(); // end the slash highlight + switch (aState) { + case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: + break; + default: + FinishTag(); + break; + } + break; + case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED: + switch (aState) { + case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: + EndSpanOrA(); + break; + case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE: + StartSpan(); + StartSpan(); // for ampersand itself + mAmpersand = CurrentNode(); + break; + default: + FinishTag(); + break; + } + break; + case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME: + switch (aState) { + case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: + StartSpan(); // for highlighting the slash + mSlash = CurrentNode(); + break; + case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE: + break; + case nsHtml5Tokenizer::ATTRIBUTE_NAME: + StartSpan(sAttributeName); + break; + default: + FinishTag(); + break; + } + break; + // most comment states are omitted, because they don't matter to + // highlighting + case nsHtml5Tokenizer::COMMENT_START: + case nsHtml5Tokenizer::COMMENT_END: + case nsHtml5Tokenizer::COMMENT_END_BANG: + case nsHtml5Tokenizer::COMMENT_START_DASH: + case nsHtml5Tokenizer::BOGUS_COMMENT: + case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN: + case nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH: + if (aState == nsHtml5Tokenizer::DATA) { + AddClass(sComment); + FinishTag(); + } + break; + // most cdata states are omitted, because they don't matter to + // highlighting + case nsHtml5Tokenizer::CDATA_RSQB_RSQB: + if (aState == nsHtml5Tokenizer::DATA) { + AddClass(sCdata); + FinishTag(); + } + break; + case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE: + EndSpanOrA(); // the span for the ampersand + switch (aState) { + case nsHtml5Tokenizer::CONSUME_NCR: + case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP: + break; + default: + // not actually a character reference + EndSpanOrA(); + break; + } + break; + case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP: + if (aState == nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL) { + break; + } + // not actually a character reference + EndSpanOrA(); + break; + case nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL: + if (!aReconsume) { + FlushCurrent(); + } + EndSpanOrA(); + break; + case nsHtml5Tokenizer::DECIMAL_NRC_LOOP: + case nsHtml5Tokenizer::HEX_NCR_LOOP: + switch (aState) { + case nsHtml5Tokenizer::HANDLE_NCR_VALUE: + AddClass(sEntity); + FlushCurrent(); + break; + case nsHtml5Tokenizer::HANDLE_NCR_VALUE_RECONSUME: + AddClass(sEntity); + break; + } + EndSpanOrA(); + break; + case nsHtml5Tokenizer::CLOSE_TAG_OPEN: + switch (aState) { + case nsHtml5Tokenizer::DATA: + FinishTag(); + break; + case nsHtml5Tokenizer::TAG_NAME: + StartSpan(sEndTag); + break; + } + break; + case nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN: + if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) { + FlushCurrent(); + StartSpan(); // don't know if it is "end-tag" yet :-( + break; + } + EndSpanOrA(); + StartCharacters(); + break; + case nsHtml5Tokenizer::NON_DATA_END_TAG_NAME: + switch (aState) { + case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: + AddClass(sEndTag); + EndSpanOrA(); + break; + case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: + AddClass(sEndTag); + EndSpanOrA(); + StartSpan(); // for highlighting the slash + mSlash = CurrentNode(); + break; + case nsHtml5Tokenizer::DATA: // yes, as a result of emitting the token + AddClass(sEndTag); + FinishTag(); + break; + default: + FinishTag(); + break; + } + break; + case nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN: + case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: + if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) { + FlushCurrent(); + StartSpan(); // don't know if it is "end-tag" yet :-( + break; + } + FinishTag(); + break; + case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH: + case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED: + case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH: + if (aState == nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN) { + EndCharactersAndStartMarkupRun(); + } + break; + // Lots of double escape states omitted, because they don't highlight. + // Likewise, only doctype states that can emit the doctype are of + // interest. Otherwise, the transition out of bogus comment deals. + case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME: + case nsHtml5Tokenizer::DOCTYPE_NAME: + case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME: + case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD: + case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: + case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: + case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER: + case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: + case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: + case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER: + case nsHtml5Tokenizer::BOGUS_DOCTYPE: + case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD: + case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: + case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: + case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: + if (aState == nsHtml5Tokenizer::DATA) { + AddClass(sDoctype); + FinishTag(); + } + break; + case nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK: + if (aState == nsHtml5Tokenizer::DATA) { + FinishTag(); + } + break; + default: + break; + } + mState = aState; + return aState; +} + +[[nodiscard]] bool nsHtml5Highlighter::End() { + switch (mState) { + case nsHtml5Tokenizer::COMMENT_END: + case nsHtml5Tokenizer::COMMENT_END_BANG: + case nsHtml5Tokenizer::COMMENT_START_DASH: + case nsHtml5Tokenizer::BOGUS_COMMENT: + case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN: + AddClass(sComment); + break; + case nsHtml5Tokenizer::CDATA_RSQB_RSQB: + AddClass(sCdata); + break; + case nsHtml5Tokenizer::DECIMAL_NRC_LOOP: + case nsHtml5Tokenizer::HEX_NCR_LOOP: + // XXX need tokenizer help here + break; + case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME: + case nsHtml5Tokenizer::DOCTYPE_NAME: + case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME: + case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD: + case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: + case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: + case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER: + case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: + case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: + case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER: + case nsHtml5Tokenizer::BOGUS_DOCTYPE: + case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD: + case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: + case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: + case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: + AddClass(sDoctype); + break; + default: + break; + } + nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); + NS_ASSERTION(treeOp, "Tree op allocation failed."); + treeOp->Init(mozilla::AsVariant(opStreamEnded())); + return FlushOps().isOk(); +} + +void nsHtml5Highlighter::SetBuffer(nsHtml5UTF16Buffer* aBuffer) { + MOZ_ASSERT(!mBuffer, "Old buffer still here!"); + mBuffer = aBuffer; + mCStart = aBuffer->getStart(); +} + +void nsHtml5Highlighter::DropBuffer(int32_t aPos) { + MOZ_ASSERT(mBuffer, "No buffer to drop!"); + mPos = aPos; + FlushChars(); + mBuffer = nullptr; +} + +void nsHtml5Highlighter::StartSpan() { + FlushChars(); + Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement); + ++mInlinesOpen; +} + +void nsHtml5Highlighter::StartSpan(const char16_t* aClass) { + StartSpan(); + AddClass(aClass); +} + +void nsHtml5Highlighter::EndSpanOrA() { + FlushChars(); + Pop(); + --mInlinesOpen; +} + +void nsHtml5Highlighter::StartCharacters() { + MOZ_ASSERT(!mInCharacters, "Already in characters!"); + FlushChars(); + Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement); + mCurrentRun = CurrentNode(); + mInCharacters = true; +} + +void nsHtml5Highlighter::EndCharactersAndStartMarkupRun() { + MOZ_ASSERT(mInCharacters, "Not in characters!"); + FlushChars(); + Pop(); + mInCharacters = false; + // Now start markup run + StartSpan(); + mCurrentRun = CurrentNode(); +} + +void nsHtml5Highlighter::StartA() { + FlushChars(); + Push(nsGkAtoms::a, nullptr, NS_NewHTMLAnchorElement); + AddClass(sAttributeValue); + ++mInlinesOpen; +} + +void nsHtml5Highlighter::FinishTag() { + while (mInlinesOpen > 1) { + EndSpanOrA(); + } + FlushCurrent(); // > + EndSpanOrA(); // DATA + NS_ASSERTION(!mInlinesOpen, "mInlinesOpen got out of sync!"); + StartCharacters(); +} + +void nsHtml5Highlighter::FlushChars() { + if (mCStart < mPos) { + char16_t* buf = mBuffer->getBuffer(); + int32_t i = mCStart; + while (i < mPos) { + char16_t c = buf[i]; + switch (c) { + case '\r': + // The input this code sees has been normalized so that there are + // CR breaks and LF breaks but no CRLF breaks. Overwrite CR with LF + // to show consistent LF line breaks to layout. It is OK to mutate + // the input data, because there are no reparses in the View Source + // case, so we won't need the original data in the buffer anymore. + buf[i] = '\n'; + [[fallthrough]]; + case '\n': { + ++i; + if (mCStart < i) { + int32_t len = i - mCStart; + AppendCharacters(buf, mCStart, len); + mCStart = i; + } + ++mLineNumber; + Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement); + nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); + NS_ASSERTION(treeOp, "Tree op allocation failed."); + opAddLineNumberId operation(CurrentNode(), mLineNumber); + treeOp->Init(mozilla::AsVariant(operation)); + Pop(); + break; + } + default: + ++i; + break; + } + } + if (mCStart < mPos) { + int32_t len = mPos - mCStart; + AppendCharacters(buf, mCStart, len); + mCStart = mPos; + } + } +} + +void nsHtml5Highlighter::FlushCurrent() { + mPos++; + FlushChars(); +} + +bool nsHtml5Highlighter::ShouldFlushOps() { + // Arbitrary threshold that doesn't have an exact justification. + // The general idea is to flush much, much sooner than reaching + // the maximum size of `nsTArray`. + return mOpQueue.Length() > 100000; +} + +mozilla::Result<bool, nsresult> nsHtml5Highlighter::FlushOps() { + bool hasOps = !mOpQueue.IsEmpty(); + if (hasOps) { + if (!mOpSink->MoveOpsFrom(mOpQueue)) { + return Err(NS_ERROR_OUT_OF_MEMORY); + } + } + return hasOps; +} + +void nsHtml5Highlighter::MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName, + nsHtml5String aValue) { + if (!(nsHtml5AttributeName::ATTR_HREF == aName || + nsHtml5AttributeName::ATTR_SRC == aName || + nsHtml5AttributeName::ATTR_ACTION == aName || + nsHtml5AttributeName::ATTR_CITE == aName || + nsHtml5AttributeName::ATTR_BACKGROUND == aName || + nsHtml5AttributeName::ATTR_LONGDESC == aName || + nsHtml5AttributeName::ATTR_XLINK_HREF == aName || + nsHtml5AttributeName::ATTR_DEFINITIONURL == aName)) { + return; + } + AddViewSourceHref(aValue); +} + +void nsHtml5Highlighter::CompletedNamedCharacterReference() { + AddClass(sEntity); +} + +nsIContent** nsHtml5Highlighter::AllocateContentHandle() { + if (mHandlesUsed == NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH) { + mOldHandles.AppendElement(std::move(mHandles)); + mHandles = + MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH); + mHandlesUsed = 0; + } +#ifdef DEBUG + mHandles[mHandlesUsed] = reinterpret_cast<nsIContent*>(uintptr_t(0xC0DEDBAD)); +#endif + return &mHandles[mHandlesUsed++]; +} + +nsIContent** nsHtml5Highlighter::CreateElement( + nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, + nsIContent** aIntendedParent, + mozilla::dom::HTMLContentCreatorFunction aCreator) { + MOZ_ASSERT(aName, "Got null name."); + nsIContent** content = AllocateContentHandle(); + opCreateHTMLElement opeation(content, aName, aAttributes, aCreator, + aIntendedParent, + mozilla::dom::FROM_PARSER_NETWORK); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(opeation)); + return content; +} + +nsIContent** nsHtml5Highlighter::CurrentNode() { + MOZ_ASSERT(mStack.Length() >= 1, "Must have something on stack."); + return mStack[mStack.Length() - 1]; +} + +void nsHtml5Highlighter::Push( + nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, + mozilla::dom::HTMLContentCreatorFunction aCreator) { + MOZ_ASSERT(mStack.Length() >= 1, "Pushing without root."); + nsIContent** elt = CreateElement(aName, aAttributes, CurrentNode(), + aCreator); // Don't inline below! + opAppend operation(elt, CurrentNode(), mozilla::dom::FROM_PARSER_NETWORK); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); + mStack.AppendElement(elt); +} + +void nsHtml5Highlighter::Pop() { + MOZ_ASSERT(mStack.Length() >= 2, "Popping when stack too short."); + mStack.RemoveLastElement(); +} + +void nsHtml5Highlighter::AppendCharacters(const char16_t* aBuffer, + int32_t aStart, int32_t aLength) { + MOZ_ASSERT(aBuffer, "Null buffer"); + + char16_t* bufferCopy = new char16_t[aLength]; + memcpy(bufferCopy, aBuffer + aStart, aLength * sizeof(char16_t)); + + opAppendText operation(CurrentNode(), bufferCopy, aLength); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); +} + +void nsHtml5Highlighter::AddClass(const char16_t* aClass) { + opAddClass operation(CurrentNode(), (char16_t*)aClass); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); +} + +void nsHtml5Highlighter::AddViewSourceHref(nsHtml5String aValue) { + char16_t* bufferCopy = new char16_t[aValue.Length() + 1]; + aValue.CopyToBuffer(bufferCopy); + bufferCopy[aValue.Length()] = 0; + + opAddViewSourceHref operation(CurrentNode(), bufferCopy, aValue.Length()); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); +} + +void nsHtml5Highlighter::AddBase(nsHtml5String aValue) { + if (mSeenBase) { + return; + } + mSeenBase = true; + char16_t* bufferCopy = new char16_t[aValue.Length() + 1]; + aValue.CopyToBuffer(bufferCopy); + bufferCopy[aValue.Length()] = 0; + + opAddViewSourceBase operation(bufferCopy, aValue.Length()); + mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); +} + +void nsHtml5Highlighter::AddErrorToCurrentNode(const char* aMsgId) { + nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); + NS_ASSERTION(treeOp, "Tree op allocation failed."); + opAddErrorType operation(CurrentNode(), (char*)aMsgId); + treeOp->Init(mozilla::AsVariant(operation)); +} + +void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId) { + MOZ_ASSERT(mCurrentRun, "Adding error to run without one!"); + nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); + NS_ASSERTION(treeOp, "Tree op allocation failed."); + opAddErrorType operation(mCurrentRun, (char*)aMsgId); + treeOp->Init(mozilla::AsVariant(operation)); +} + +void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId, + nsAtom* aName) { + MOZ_ASSERT(mCurrentRun, "Adding error to run without one!"); + nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); + NS_ASSERTION(treeOp, "Tree op allocation failed."); + opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName); + treeOp->Init(mozilla::AsVariant(operation)); +} + +void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, + nsAtom* aOther) { + MOZ_ASSERT(mCurrentRun, "Adding error to run without one!"); + nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); + NS_ASSERTION(treeOp, "Tree op allocation failed."); + opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName, aOther); + treeOp->Init(mozilla::AsVariant(operation)); +} + +void nsHtml5Highlighter::AddErrorToCurrentAmpersand(const char* aMsgId) { + MOZ_ASSERT(mAmpersand, "Adding error to ampersand without one!"); + nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); + NS_ASSERTION(treeOp, "Tree op allocation failed."); + opAddErrorType operation(mAmpersand, (char*)aMsgId); + treeOp->Init(mozilla::AsVariant(operation)); +} + +void nsHtml5Highlighter::AddErrorToCurrentSlash(const char* aMsgId) { + MOZ_ASSERT(mSlash, "Adding error to slash without one!"); + nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); + NS_ASSERTION(treeOp, "Tree op allocation failed."); + opAddErrorType operation(mSlash, (char*)aMsgId); + treeOp->Init(mozilla::AsVariant(operation)); +} |