summaryrefslogtreecommitdiffstats
path: root/parser/html/nsHtml5Highlighter.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--parser/html/nsHtml5Highlighter.cpp790
1 files changed, 790 insertions, 0 deletions
diff --git a/parser/html/nsHtml5Highlighter.cpp b/parser/html/nsHtml5Highlighter.cpp
new file mode 100644
index 0000000000..45c84b743a
--- /dev/null
+++ b/parser/html/nsHtml5Highlighter.cpp
@@ -0,0 +1,790 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHtml5Highlighter.h"
+#include "ErrorList.h"
+#include "nsDebug.h"
+#include "nsHtml5AttributeName.h"
+#include "nsHtml5Tokenizer.h"
+#include "nsHtml5ViewSourceUtils.h"
+#include "nsString.h"
+#include "nsThreadUtils.h"
+
+#include "mozilla/Attributes.h"
+#include "mozilla/Preferences.h"
+
+using namespace mozilla;
+
+// The old code had a limit of 16 tokens. 1300 is a number picked my measuring
+// the size of 16 tokens on cnn.com.
+#define NS_HTML5_HIGHLIGHTER_PRE_BREAK_THRESHOLD 1300
+
+char16_t nsHtml5Highlighter::sComment[] = {'c', 'o', 'm', 'm',
+ 'e', 'n', 't', 0};
+
+char16_t nsHtml5Highlighter::sCdata[] = {'c', 'd', 'a', 't', 'a', 0};
+
+char16_t nsHtml5Highlighter::sEntity[] = {'e', 'n', 't', 'i', 't', 'y', 0};
+
+char16_t nsHtml5Highlighter::sEndTag[] = {'e', 'n', 'd', '-', 't', 'a', 'g', 0};
+
+char16_t nsHtml5Highlighter::sStartTag[] = {'s', 't', 'a', 'r', 't',
+ '-', 't', 'a', 'g', 0};
+
+char16_t nsHtml5Highlighter::sAttributeName[] = {
+ 'a', 't', 't', 'r', 'i', 'b', 'u', 't', 'e', '-', 'n', 'a', 'm', 'e', 0};
+
+char16_t nsHtml5Highlighter::sAttributeValue[] = {'a', 't', 't', 'r', 'i', 'b',
+ 'u', 't', 'e', '-', 'v', 'a',
+ 'l', 'u', 'e', 0};
+
+char16_t nsHtml5Highlighter::sDoctype[] = {'d', 'o', 'c', 't',
+ 'y', 'p', 'e', 0};
+
+char16_t nsHtml5Highlighter::sPi[] = {'p', 'i', 0};
+
+nsHtml5Highlighter::nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink)
+ : mState(nsHtml5Tokenizer::DATA),
+ mCStart(INT32_MAX),
+ mPos(0),
+ mLineNumber(1),
+ mInlinesOpen(0),
+ mInCharacters(false),
+ mBuffer(nullptr),
+ mOpSink(aOpSink),
+ mCurrentRun(nullptr),
+ mAmpersand(nullptr),
+ mSlash(nullptr),
+ mHandles(
+ MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH)),
+ mHandlesUsed(0),
+ mSeenBase(false) {
+ NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
+}
+
+nsHtml5Highlighter::~nsHtml5Highlighter() {
+ NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
+}
+
+void nsHtml5Highlighter::SetOpSink(nsAHtml5TreeOpSink* aOpSink) {
+ mOpSink = aOpSink;
+}
+
+void nsHtml5Highlighter::Rewind() {
+ mState = 0;
+ mCStart = INT32_MAX;
+ mPos = 0;
+ mLineNumber = 1;
+ mInlinesOpen = 0;
+ mInCharacters = false;
+ mBuffer = nullptr;
+ mOpQueue.Clear();
+ mCurrentRun = nullptr;
+ mAmpersand = nullptr;
+ mSlash = nullptr;
+ // Pop until we have three elements on the stack:
+ // html, body, and pre.
+ while (mStack.Length() > 3) {
+ Pop();
+ }
+ mSeenBase = false;
+}
+
+void nsHtml5Highlighter::Start(const nsAutoString& aTitle) {
+ // Doctype
+ opAppendDoctypeToDocument operation(nsGkAtoms::html, u""_ns, u""_ns);
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
+
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(STANDARDS_MODE));
+
+ // <html> uses NS_NewHTMLSharedElement creator
+ nsIContent** root =
+ CreateElement(nsGkAtoms::html, nullptr, nullptr, NS_NewHTMLSharedElement);
+ opAppendToDocument appendOp(root);
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(appendOp));
+ mStack.AppendElement(root);
+
+ // <head> uses NS_NewHTMLSharedElement creator
+ Push(nsGkAtoms::head, nullptr, NS_NewHTMLSharedElement);
+
+ Push(nsGkAtoms::meta, nsHtml5ViewSourceUtils::NewMetaViewportAttributes(),
+ NS_NewHTMLMetaElement);
+ Pop(); // meta
+
+ Push(nsGkAtoms::title, nullptr, NS_NewHTMLTitleElement);
+ // XUL will add the "Source of: " prefix.
+ uint32_t length = aTitle.Length();
+ if (length > INT32_MAX) {
+ length = INT32_MAX;
+ }
+ AppendCharacters(aTitle.BeginReading(), 0, (int32_t)length);
+ Pop(); // title
+
+ Push(nsGkAtoms::link, nsHtml5ViewSourceUtils::NewLinkAttributes(),
+ NS_NewHTMLLinkElement);
+
+ opUpdateStyleSheet updateOp(CurrentNode());
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(updateOp));
+
+ Pop(); // link
+
+ Pop(); // head
+
+ Push(nsGkAtoms::body, nsHtml5ViewSourceUtils::NewBodyAttributes(),
+ NS_NewHTMLBodyElement);
+
+ nsHtml5HtmlAttributes* preAttrs = new nsHtml5HtmlAttributes(0);
+ nsHtml5String preId = nsHtml5Portability::newStringFromLiteral("line1");
+ preAttrs->addAttribute(nsHtml5AttributeName::ATTR_ID, preId, -1);
+ Push(nsGkAtoms::pre, preAttrs, NS_NewHTMLPreElement);
+
+ // Don't call StartCharacters here in order to be able to put it in
+ // a speculation.
+
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(opStartLayout()));
+}
+
+void nsHtml5Highlighter::UpdateCharsetSource(nsCharsetSource aCharsetSource) {
+ opUpdateCharsetSource operation(aCharsetSource);
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
+}
+
+int32_t nsHtml5Highlighter::Transition(int32_t aState, bool aReconsume,
+ int32_t aPos) {
+ mPos = aPos;
+ switch (mState) {
+ case nsHtml5Tokenizer::SCRIPT_DATA:
+ case nsHtml5Tokenizer::RAWTEXT:
+ case nsHtml5Tokenizer::RCDATA:
+ case nsHtml5Tokenizer::DATA:
+ // We can transition on < and on &. Either way, we don't yet know the
+ // role of the token, so open a span without class.
+ if (aState == nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE) {
+ StartSpan();
+ // Start another span for highlighting the ampersand
+ StartSpan();
+ mAmpersand = CurrentNode();
+ } else {
+ EndCharactersAndStartMarkupRun();
+ }
+ break;
+ case nsHtml5Tokenizer::TAG_OPEN:
+ switch (aState) {
+ case nsHtml5Tokenizer::TAG_NAME:
+ StartSpan(sStartTag);
+ break;
+ case nsHtml5Tokenizer::DATA:
+ FinishTag(); // DATA
+ break;
+ case nsHtml5Tokenizer::PROCESSING_INSTRUCTION:
+ AddClass(sPi);
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::TAG_NAME:
+ switch (aState) {
+ case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
+ EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME
+ break;
+ case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
+ EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME
+ StartSpan(); // for highlighting the slash
+ mSlash = CurrentNode();
+ break;
+ default:
+ FinishTag();
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
+ switch (aState) {
+ case nsHtml5Tokenizer::ATTRIBUTE_NAME:
+ StartSpan(sAttributeName);
+ break;
+ case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
+ StartSpan(); // for highlighting the slash
+ mSlash = CurrentNode();
+ break;
+ default:
+ FinishTag();
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::ATTRIBUTE_NAME:
+ switch (aState) {
+ case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
+ case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
+ EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
+ break;
+ case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
+ EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME
+ StartSpan(); // for highlighting the slash
+ mSlash = CurrentNode();
+ break;
+ default:
+ FinishTag();
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
+ switch (aState) {
+ case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+ case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
+ FlushCurrent();
+ StartA();
+ break;
+ case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
+ StartA();
+ break;
+ default:
+ FinishTag();
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED:
+ case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED:
+ switch (aState) {
+ case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
+ EndSpanOrA();
+ break;
+ case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
+ StartSpan();
+ StartSpan(); // for ampersand itself
+ mAmpersand = CurrentNode();
+ break;
+ default:
+ MOZ_ASSERT_UNREACHABLE("Impossible transition.");
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED:
+ switch (aState) {
+ case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
+ break;
+ case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
+ StartSpan(); // for highlighting the slash
+ mSlash = CurrentNode();
+ break;
+ default:
+ FinishTag();
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
+ EndSpanOrA(); // end the slash highlight
+ switch (aState) {
+ case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
+ break;
+ default:
+ FinishTag();
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED:
+ switch (aState) {
+ case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
+ EndSpanOrA();
+ break;
+ case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
+ StartSpan();
+ StartSpan(); // for ampersand itself
+ mAmpersand = CurrentNode();
+ break;
+ default:
+ FinishTag();
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME:
+ switch (aState) {
+ case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
+ StartSpan(); // for highlighting the slash
+ mSlash = CurrentNode();
+ break;
+ case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE:
+ break;
+ case nsHtml5Tokenizer::ATTRIBUTE_NAME:
+ StartSpan(sAttributeName);
+ break;
+ default:
+ FinishTag();
+ break;
+ }
+ break;
+ // most comment states are omitted, because they don't matter to
+ // highlighting
+ case nsHtml5Tokenizer::COMMENT_START:
+ case nsHtml5Tokenizer::COMMENT_END:
+ case nsHtml5Tokenizer::COMMENT_END_BANG:
+ case nsHtml5Tokenizer::COMMENT_START_DASH:
+ case nsHtml5Tokenizer::BOGUS_COMMENT:
+ case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
+ case nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH:
+ if (aState == nsHtml5Tokenizer::DATA) {
+ AddClass(sComment);
+ FinishTag();
+ }
+ break;
+ // most cdata states are omitted, because they don't matter to
+ // highlighting
+ case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
+ if (aState == nsHtml5Tokenizer::DATA) {
+ AddClass(sCdata);
+ FinishTag();
+ }
+ break;
+ case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE:
+ EndSpanOrA(); // the span for the ampersand
+ switch (aState) {
+ case nsHtml5Tokenizer::CONSUME_NCR:
+ case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
+ break;
+ default:
+ // not actually a character reference
+ EndSpanOrA();
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP:
+ if (aState == nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL) {
+ break;
+ }
+ // not actually a character reference
+ EndSpanOrA();
+ break;
+ case nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL:
+ if (!aReconsume) {
+ FlushCurrent();
+ }
+ EndSpanOrA();
+ break;
+ case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
+ case nsHtml5Tokenizer::HEX_NCR_LOOP:
+ switch (aState) {
+ case nsHtml5Tokenizer::HANDLE_NCR_VALUE:
+ AddClass(sEntity);
+ FlushCurrent();
+ break;
+ case nsHtml5Tokenizer::HANDLE_NCR_VALUE_RECONSUME:
+ AddClass(sEntity);
+ break;
+ }
+ EndSpanOrA();
+ break;
+ case nsHtml5Tokenizer::CLOSE_TAG_OPEN:
+ switch (aState) {
+ case nsHtml5Tokenizer::DATA:
+ FinishTag();
+ break;
+ case nsHtml5Tokenizer::TAG_NAME:
+ StartSpan(sEndTag);
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN:
+ if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
+ FlushCurrent();
+ StartSpan(); // don't know if it is "end-tag" yet :-(
+ break;
+ }
+ EndSpanOrA();
+ StartCharacters();
+ break;
+ case nsHtml5Tokenizer::NON_DATA_END_TAG_NAME:
+ switch (aState) {
+ case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME:
+ AddClass(sEndTag);
+ EndSpanOrA();
+ break;
+ case nsHtml5Tokenizer::SELF_CLOSING_START_TAG:
+ AddClass(sEndTag);
+ EndSpanOrA();
+ StartSpan(); // for highlighting the slash
+ mSlash = CurrentNode();
+ break;
+ case nsHtml5Tokenizer::DATA: // yes, as a result of emitting the token
+ AddClass(sEndTag);
+ FinishTag();
+ break;
+ default:
+ FinishTag();
+ break;
+ }
+ break;
+ case nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN:
+ case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN:
+ if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) {
+ FlushCurrent();
+ StartSpan(); // don't know if it is "end-tag" yet :-(
+ break;
+ }
+ FinishTag();
+ break;
+ case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH:
+ case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED:
+ case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH:
+ if (aState == nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN) {
+ EndCharactersAndStartMarkupRun();
+ }
+ break;
+ // Lots of double escape states omitted, because they don't highlight.
+ // Likewise, only doctype states that can emit the doctype are of
+ // interest. Otherwise, the transition out of bogus comment deals.
+ case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
+ case nsHtml5Tokenizer::DOCTYPE_NAME:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
+ case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
+ case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
+ case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
+ case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
+ case nsHtml5Tokenizer::BOGUS_DOCTYPE:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
+ case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
+ case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
+ case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
+ if (aState == nsHtml5Tokenizer::DATA) {
+ AddClass(sDoctype);
+ FinishTag();
+ }
+ break;
+ case nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK:
+ if (aState == nsHtml5Tokenizer::DATA) {
+ FinishTag();
+ }
+ break;
+ default:
+ break;
+ }
+ mState = aState;
+ return aState;
+}
+
+[[nodiscard]] bool nsHtml5Highlighter::End() {
+ switch (mState) {
+ case nsHtml5Tokenizer::COMMENT_END:
+ case nsHtml5Tokenizer::COMMENT_END_BANG:
+ case nsHtml5Tokenizer::COMMENT_START_DASH:
+ case nsHtml5Tokenizer::BOGUS_COMMENT:
+ case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN:
+ AddClass(sComment);
+ break;
+ case nsHtml5Tokenizer::CDATA_RSQB_RSQB:
+ AddClass(sCdata);
+ break;
+ case nsHtml5Tokenizer::DECIMAL_NRC_LOOP:
+ case nsHtml5Tokenizer::HEX_NCR_LOOP:
+ // XXX need tokenizer help here
+ break;
+ case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME:
+ case nsHtml5Tokenizer::DOCTYPE_NAME:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD:
+ case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER:
+ case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
+ case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
+ case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
+ case nsHtml5Tokenizer::BOGUS_DOCTYPE:
+ case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD:
+ case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
+ case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED:
+ case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED:
+ AddClass(sDoctype);
+ break;
+ default:
+ break;
+ }
+ nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
+ NS_ASSERTION(treeOp, "Tree op allocation failed.");
+ treeOp->Init(mozilla::AsVariant(opStreamEnded()));
+ return FlushOps().isOk();
+}
+
+void nsHtml5Highlighter::SetBuffer(nsHtml5UTF16Buffer* aBuffer) {
+ MOZ_ASSERT(!mBuffer, "Old buffer still here!");
+ mBuffer = aBuffer;
+ mCStart = aBuffer->getStart();
+}
+
+void nsHtml5Highlighter::DropBuffer(int32_t aPos) {
+ MOZ_ASSERT(mBuffer, "No buffer to drop!");
+ mPos = aPos;
+ FlushChars();
+ mBuffer = nullptr;
+}
+
+void nsHtml5Highlighter::StartSpan() {
+ FlushChars();
+ Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
+ ++mInlinesOpen;
+}
+
+void nsHtml5Highlighter::StartSpan(const char16_t* aClass) {
+ StartSpan();
+ AddClass(aClass);
+}
+
+void nsHtml5Highlighter::EndSpanOrA() {
+ FlushChars();
+ Pop();
+ --mInlinesOpen;
+}
+
+void nsHtml5Highlighter::StartCharacters() {
+ MOZ_ASSERT(!mInCharacters, "Already in characters!");
+ FlushChars();
+ Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
+ mCurrentRun = CurrentNode();
+ mInCharacters = true;
+}
+
+void nsHtml5Highlighter::EndCharactersAndStartMarkupRun() {
+ MOZ_ASSERT(mInCharacters, "Not in characters!");
+ FlushChars();
+ Pop();
+ mInCharacters = false;
+ // Now start markup run
+ StartSpan();
+ mCurrentRun = CurrentNode();
+}
+
+void nsHtml5Highlighter::StartA() {
+ FlushChars();
+ Push(nsGkAtoms::a, nullptr, NS_NewHTMLAnchorElement);
+ AddClass(sAttributeValue);
+ ++mInlinesOpen;
+}
+
+void nsHtml5Highlighter::FinishTag() {
+ while (mInlinesOpen > 1) {
+ EndSpanOrA();
+ }
+ FlushCurrent(); // >
+ EndSpanOrA(); // DATA
+ NS_ASSERTION(!mInlinesOpen, "mInlinesOpen got out of sync!");
+ StartCharacters();
+}
+
+void nsHtml5Highlighter::FlushChars() {
+ if (mCStart < mPos) {
+ char16_t* buf = mBuffer->getBuffer();
+ int32_t i = mCStart;
+ while (i < mPos) {
+ char16_t c = buf[i];
+ switch (c) {
+ case '\r':
+ // The input this code sees has been normalized so that there are
+ // CR breaks and LF breaks but no CRLF breaks. Overwrite CR with LF
+ // to show consistent LF line breaks to layout. It is OK to mutate
+ // the input data, because there are no reparses in the View Source
+ // case, so we won't need the original data in the buffer anymore.
+ buf[i] = '\n';
+ [[fallthrough]];
+ case '\n': {
+ ++i;
+ if (mCStart < i) {
+ int32_t len = i - mCStart;
+ AppendCharacters(buf, mCStart, len);
+ mCStart = i;
+ }
+ ++mLineNumber;
+ Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement);
+ nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
+ NS_ASSERTION(treeOp, "Tree op allocation failed.");
+ opAddLineNumberId operation(CurrentNode(), mLineNumber);
+ treeOp->Init(mozilla::AsVariant(operation));
+ Pop();
+ break;
+ }
+ default:
+ ++i;
+ break;
+ }
+ }
+ if (mCStart < mPos) {
+ int32_t len = mPos - mCStart;
+ AppendCharacters(buf, mCStart, len);
+ mCStart = mPos;
+ }
+ }
+}
+
+void nsHtml5Highlighter::FlushCurrent() {
+ mPos++;
+ FlushChars();
+}
+
+bool nsHtml5Highlighter::ShouldFlushOps() {
+ // Arbitrary threshold that doesn't have an exact justification.
+ // The general idea is to flush much, much sooner than reaching
+ // the maximum size of `nsTArray`.
+ return mOpQueue.Length() > 100000;
+}
+
+mozilla::Result<bool, nsresult> nsHtml5Highlighter::FlushOps() {
+ bool hasOps = !mOpQueue.IsEmpty();
+ if (hasOps) {
+ if (!mOpSink->MoveOpsFrom(mOpQueue)) {
+ return Err(NS_ERROR_OUT_OF_MEMORY);
+ }
+ }
+ return hasOps;
+}
+
+void nsHtml5Highlighter::MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
+ nsHtml5String aValue) {
+ if (!(nsHtml5AttributeName::ATTR_HREF == aName ||
+ nsHtml5AttributeName::ATTR_SRC == aName ||
+ nsHtml5AttributeName::ATTR_ACTION == aName ||
+ nsHtml5AttributeName::ATTR_CITE == aName ||
+ nsHtml5AttributeName::ATTR_BACKGROUND == aName ||
+ nsHtml5AttributeName::ATTR_LONGDESC == aName ||
+ nsHtml5AttributeName::ATTR_XLINK_HREF == aName ||
+ nsHtml5AttributeName::ATTR_DEFINITIONURL == aName)) {
+ return;
+ }
+ AddViewSourceHref(aValue);
+}
+
+void nsHtml5Highlighter::CompletedNamedCharacterReference() {
+ AddClass(sEntity);
+}
+
+nsIContent** nsHtml5Highlighter::AllocateContentHandle() {
+ if (mHandlesUsed == NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH) {
+ mOldHandles.AppendElement(std::move(mHandles));
+ mHandles =
+ MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH);
+ mHandlesUsed = 0;
+ }
+#ifdef DEBUG
+ mHandles[mHandlesUsed] = reinterpret_cast<nsIContent*>(uintptr_t(0xC0DEDBAD));
+#endif
+ return &mHandles[mHandlesUsed++];
+}
+
+nsIContent** nsHtml5Highlighter::CreateElement(
+ nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
+ nsIContent** aIntendedParent,
+ mozilla::dom::HTMLContentCreatorFunction aCreator) {
+ MOZ_ASSERT(aName, "Got null name.");
+ nsIContent** content = AllocateContentHandle();
+ opCreateHTMLElement opeation(content, aName, aAttributes, aCreator,
+ aIntendedParent,
+ mozilla::dom::FROM_PARSER_NETWORK);
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(opeation));
+ return content;
+}
+
+nsIContent** nsHtml5Highlighter::CurrentNode() {
+ MOZ_ASSERT(mStack.Length() >= 1, "Must have something on stack.");
+ return mStack[mStack.Length() - 1];
+}
+
+void nsHtml5Highlighter::Push(
+ nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
+ mozilla::dom::HTMLContentCreatorFunction aCreator) {
+ MOZ_ASSERT(mStack.Length() >= 1, "Pushing without root.");
+ nsIContent** elt = CreateElement(aName, aAttributes, CurrentNode(),
+ aCreator); // Don't inline below!
+ opAppend operation(elt, CurrentNode(), mozilla::dom::FROM_PARSER_NETWORK);
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
+ mStack.AppendElement(elt);
+}
+
+void nsHtml5Highlighter::Pop() {
+ MOZ_ASSERT(mStack.Length() >= 2, "Popping when stack too short.");
+ mStack.RemoveLastElement();
+}
+
+void nsHtml5Highlighter::AppendCharacters(const char16_t* aBuffer,
+ int32_t aStart, int32_t aLength) {
+ MOZ_ASSERT(aBuffer, "Null buffer");
+
+ char16_t* bufferCopy = new char16_t[aLength];
+ memcpy(bufferCopy, aBuffer + aStart, aLength * sizeof(char16_t));
+
+ opAppendText operation(CurrentNode(), bufferCopy, aLength);
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
+}
+
+void nsHtml5Highlighter::AddClass(const char16_t* aClass) {
+ opAddClass operation(CurrentNode(), (char16_t*)aClass);
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
+}
+
+void nsHtml5Highlighter::AddViewSourceHref(nsHtml5String aValue) {
+ char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
+ aValue.CopyToBuffer(bufferCopy);
+ bufferCopy[aValue.Length()] = 0;
+
+ opAddViewSourceHref operation(CurrentNode(), bufferCopy, aValue.Length());
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
+}
+
+void nsHtml5Highlighter::AddBase(nsHtml5String aValue) {
+ if (mSeenBase) {
+ return;
+ }
+ mSeenBase = true;
+ char16_t* bufferCopy = new char16_t[aValue.Length() + 1];
+ aValue.CopyToBuffer(bufferCopy);
+ bufferCopy[aValue.Length()] = 0;
+
+ opAddViewSourceBase operation(bufferCopy, aValue.Length());
+ mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation));
+}
+
+void nsHtml5Highlighter::AddErrorToCurrentNode(const char* aMsgId) {
+ nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
+ NS_ASSERTION(treeOp, "Tree op allocation failed.");
+ opAddErrorType operation(CurrentNode(), (char*)aMsgId);
+ treeOp->Init(mozilla::AsVariant(operation));
+}
+
+void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId) {
+ MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
+ nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
+ NS_ASSERTION(treeOp, "Tree op allocation failed.");
+ opAddErrorType operation(mCurrentRun, (char*)aMsgId);
+ treeOp->Init(mozilla::AsVariant(operation));
+}
+
+void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId,
+ nsAtom* aName) {
+ MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
+ nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
+ NS_ASSERTION(treeOp, "Tree op allocation failed.");
+ opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName);
+ treeOp->Init(mozilla::AsVariant(operation));
+}
+
+void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName,
+ nsAtom* aOther) {
+ MOZ_ASSERT(mCurrentRun, "Adding error to run without one!");
+ nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
+ NS_ASSERTION(treeOp, "Tree op allocation failed.");
+ opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName, aOther);
+ treeOp->Init(mozilla::AsVariant(operation));
+}
+
+void nsHtml5Highlighter::AddErrorToCurrentAmpersand(const char* aMsgId) {
+ MOZ_ASSERT(mAmpersand, "Adding error to ampersand without one!");
+ nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
+ NS_ASSERTION(treeOp, "Tree op allocation failed.");
+ opAddErrorType operation(mAmpersand, (char*)aMsgId);
+ treeOp->Init(mozilla::AsVariant(operation));
+}
+
+void nsHtml5Highlighter::AddErrorToCurrentSlash(const char* aMsgId) {
+ MOZ_ASSERT(mSlash, "Adding error to slash without one!");
+ nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement();
+ NS_ASSERTION(treeOp, "Tree op allocation failed.");
+ opAddErrorType operation(mSlash, (char*)aMsgId);
+ treeOp->Init(mozilla::AsVariant(operation));
+}