summaryrefslogtreecommitdiffstats
path: root/parser/html/nsHtml5Highlighter.h
diff options
context:
space:
mode:
Diffstat (limited to 'parser/html/nsHtml5Highlighter.h')
-rw-r--r--parser/html/nsHtml5Highlighter.h444
1 files changed, 444 insertions, 0 deletions
diff --git a/parser/html/nsHtml5Highlighter.h b/parser/html/nsHtml5Highlighter.h
new file mode 100644
index 0000000000..4966b21608
--- /dev/null
+++ b/parser/html/nsHtml5Highlighter.h
@@ -0,0 +1,444 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef nsHtml5Highlighter_h
+#define nsHtml5Highlighter_h
+
+#include "nsCOMPtr.h"
+#include "nsHtml5TreeOperation.h"
+#include "nsHtml5UTF16Buffer.h"
+#include "nsHtml5TreeOperation.h"
+#include "nsAHtml5TreeOpSink.h"
+
+#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512
+
+/**
+ * A state machine for generating HTML for display in View Source based on
+ * the transitions the tokenizer makes on the source being viewed.
+ */
+class nsHtml5Highlighter {
+ public:
+ /**
+ * The constructor.
+ *
+ * @param aOpSink the sink for the tree ops generated by this highlighter
+ */
+ explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);
+
+ /**
+ * The destructor.
+ */
+ ~nsHtml5Highlighter();
+
+ /**
+ * Set the op sink (for speculation).
+ */
+ void SetOpSink(nsAHtml5TreeOpSink* aOpSink);
+
+ /**
+ * Reset state to after generated head but before processing any of the input
+ * stream.
+ */
+ void Rewind();
+
+ /**
+ * Starts the generated document.
+ */
+ void Start(const nsAutoString& aTitle);
+
+ /**
+ * Updates the charset source via the op queue.
+ */
+ void UpdateCharsetSource(nsCharsetSource aCharsetSource);
+
+ /**
+ * Report a tokenizer state transition.
+ *
+ * @param aState the state being transitioned to
+ * @param aReconsume whether this is a reconsuming transition
+ * @param aPos the tokenizer's current position into the buffer
+ */
+ int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);
+
+ /**
+ * Report end of file.
+ *
+ * Returns `true` normally and `false` on OOM.
+ */
+ [[nodiscard]] bool End();
+
+ /**
+ * Set the current buffer being tokenized
+ */
+ void SetBuffer(nsHtml5UTF16Buffer* aBuffer);
+
+ /**
+ * Let go of the buffer being tokenized but first, flush text from it.
+ *
+ * @param aPos the first UTF-16 code unit not to flush
+ */
+ void DropBuffer(int32_t aPos);
+
+ /**
+ * Query whether there are some many ops in the queue
+ * that they should be flushed now.
+ *
+ * @return true if FlushOps() should be called now
+ */
+ bool ShouldFlushOps();
+
+ /**
+ * Flush the tree ops into the sink.
+ *
+ * @return Ok(true) if there were ops to flush, Ok(false)
+ * if there were no ops to flush and Err() on OOM.
+ */
+ mozilla::Result<bool, nsresult> FlushOps();
+
+ /**
+ * Linkify the current attribute value if the attribute name is one of
+ * known URL attributes. (When executing tree ops, javascript: URLs will
+ * not be linkified, though.)
+ *
+ * @param aName the name of the attribute
+ * @param aValue the value of the attribute
+ */
+ void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
+ nsHtml5String aValue);
+
+ /**
+ * Inform the highlighter that the tokenizer successfully completed a
+ * named character reference.
+ */
+ void CompletedNamedCharacterReference();
+
+ /**
+ * Adds an error annotation to the node that's currently on top of
+ * mStack.
+ *
+ * @param aMsgId the id of the message in the property file
+ */
+ void AddErrorToCurrentNode(const char* aMsgId);
+
+ /**
+ * Adds an error annotation to the node that corresponds to the most
+ * recently opened markup declaration/tag span, character reference or
+ * run of text.
+ *
+ * @param aMsgId the id of the message in the property file
+ */
+ void AddErrorToCurrentRun(const char* aMsgId);
+
+ /**
+ * Adds an error annotation to the node that corresponds to the most
+ * recently opened markup declaration/tag span, character reference or
+ * run of text with one atom to use when formatting the message.
+ *
+ * @param aMsgId the id of the message in the property file
+ * @param aName the atom
+ */
+ void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);
+
+ /**
+ * Adds an error annotation to the node that corresponds to the most
+ * recently opened markup declaration/tag span, character reference or
+ * run of text with two atoms to use when formatting the message.
+ *
+ * @param aMsgId the id of the message in the property file
+ * @param aName the first atom
+ * @param aOther the second atom
+ */
+ void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther);
+
+ /**
+ * Adds an error annotation to the node that corresponds to the most
+ * recent potentially character reference-starting ampersand.
+ *
+ * @param aMsgId the id of the message in the property file
+ */
+ void AddErrorToCurrentAmpersand(const char* aMsgId);
+
+ /**
+ * Adds an error annotation to the node that corresponds to the most
+ * recent potentially self-closing slash.
+ *
+ * @param aMsgId the id of the message in the property file
+ */
+ void AddErrorToCurrentSlash(const char* aMsgId);
+
+ /**
+ * Enqueues a tree op for adding base to the urls with the view-source:
+ *
+ * @param aValue the base URL to add
+ */
+ void AddBase(nsHtml5String aValue);
+
+ /**
+ * Starts a wrapper around a run of characters.
+ */
+ void StartCharacters();
+
+ private:
+ /**
+ * Starts a span with no class.
+ */
+ void StartSpan();
+
+ /**
+ * Starts a <span> and sets the class attribute on it.
+ *
+ * @param aClass the class to set (MUST be a static string that does not
+ * need to be released!)
+ */
+ void StartSpan(const char16_t* aClass);
+
+ /**
+ * End the current <span> or <a> in the highlighter output.
+ */
+ void EndSpanOrA();
+
+ /**
+ * Ends a wrapper around a run of characters.
+ */
+ void EndCharactersAndStartMarkupRun();
+
+ /**
+ * Starts an <a>.
+ */
+ void StartA();
+
+ /**
+ * Flushes characters up to but not including the current one.
+ */
+ void FlushChars();
+
+ /**
+ * Flushes characters up to and including the current one.
+ */
+ void FlushCurrent();
+
+ /**
+ * Finishes highlighting a tag in the input data by closing the open
+ * <span> and <a> elements in the highlighter output and then starts
+ * another <span> for potentially highlighting characters potentially
+ * appearing next.
+ */
+ void FinishTag();
+
+ /**
+ * Adds a class attribute to the current node.
+ *
+ * @param aClass the class to set (MUST be a static string that does not
+ * need to be released!)
+ */
+ void AddClass(const char16_t* aClass);
+
+ /**
+ * Allocates a handle for an element.
+ *
+ * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
+ * in nsHtml5TreeBuilderHSupplement.h.
+ *
+ * @return the handle
+ */
+ nsIContent** AllocateContentHandle();
+
+ /**
+ * Enqueues an element creation tree operation.
+ *
+ * @param aName the name of the element
+ * @param aAttributes the attribute holder (ownership will be taken) or
+ * nullptr for no attributes
+ * @param aIntendedParent the intended parent node for the created element
+ * @param aCreator the content creator function
+ * @return the handle for the element that will be created
+ */
+ nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
+ nsIContent** aIntendedParent,
+ mozilla::dom::HTMLContentCreatorFunction aCreator);
+
+ /**
+ * Gets the handle for the current node. May be called only after the
+ * root element has been set.
+ *
+ * @return the handle for the current node
+ */
+ nsIContent** CurrentNode();
+
+ /**
+ * Create an element and push it (its handle) on the stack.
+ *
+ * @param aName the name of the element
+ * @param aAttributes the attribute holder (ownership will be taken) or
+ * nullptr for no attributes
+ * @param aCreator the content creator function
+ */
+ void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
+ mozilla::dom::HTMLContentCreatorFunction aCreator);
+
+ /**
+ * Pops the current node off the stack.
+ */
+ void Pop();
+
+ /**
+ * Appends text content to the current node.
+ *
+ * @param aBuffer the buffer to copy from
+ * @param aStart the index of the first code unit to copy
+ * @param aLength the number of code units to copy
+ */
+ void AppendCharacters(const char16_t* aBuffer, int32_t aStart,
+ int32_t aLength);
+
+ /**
+ * Enqueues a tree op for adding an href attribute with the view-source:
+ * URL scheme to the current node.
+ *
+ * @param aValue the (potentially relative) URL to link to
+ */
+ void AddViewSourceHref(nsHtml5String aValue);
+
+ /**
+ * The state we are transitioning away from.
+ */
+ int32_t mState;
+
+ /**
+ * The index of the first UTF-16 code unit in mBuffer that hasn't been
+ * flushed yet.
+ */
+ int32_t mCStart;
+
+ /**
+ * The position of the code unit in mBuffer that caused the current
+ * transition.
+ */
+ int32_t mPos;
+
+ /**
+ * The current line number.
+ */
+ int32_t mLineNumber;
+
+ /**
+ * The number of inline elements open inside the <pre> excluding the
+ * span potentially wrapping a run of characters.
+ */
+ int32_t mInlinesOpen;
+
+ /**
+ * Whether there's a span wrapping a run of characters (excluding CDATA
+ * section) open.
+ */
+ bool mInCharacters;
+
+ /**
+ * The current buffer being tokenized.
+ */
+ nsHtml5UTF16Buffer* mBuffer;
+
+ /**
+ * The outgoing tree op queue.
+ */
+ nsTArray<nsHtml5TreeOperation> mOpQueue;
+
+ /**
+ * The tree op stage for the tree op executor or a speculation when looking
+ * for meta charset.
+ *
+ * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
+ * object, because this object is owned by the nsHtml5Tokenizer instance that
+ * is owned by the nsHtml5StreamParser, which keeps the executor alive via
+ * nsHtml5Streamparser::mExecutorFlusher.
+ */
+ nsAHtml5TreeOpSink* mOpSink;
+
+ /**
+ * The most recently opened markup declaration/tag or run of characters.
+ */
+ nsIContent** mCurrentRun;
+
+ /**
+ * The most recent ampersand in a place where character references were
+ * allowed.
+ */
+ nsIContent** mAmpersand;
+
+ /**
+ * The most recent slash that might become a self-closing slash.
+ */
+ nsIContent** mSlash;
+
+ /**
+ * Memory for element handles.
+ */
+ mozilla::UniquePtr<nsIContent*[]> mHandles;
+
+ /**
+ * Number of handles used in mHandles
+ */
+ int32_t mHandlesUsed;
+
+ /**
+ * A holder for old contents of mHandles
+ */
+ nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;
+
+ /**
+ * The element stack.
+ */
+ nsTArray<nsIContent**> mStack;
+
+ /**
+ * The string "comment"
+ */
+ static char16_t sComment[];
+
+ /**
+ * The string "cdata"
+ */
+ static char16_t sCdata[];
+
+ /**
+ * The string "start-tag"
+ */
+ static char16_t sStartTag[];
+
+ /**
+ * The string "attribute-name"
+ */
+ static char16_t sAttributeName[];
+
+ /**
+ * The string "attribute-value"
+ */
+ static char16_t sAttributeValue[];
+
+ /**
+ * The string "end-tag"
+ */
+ static char16_t sEndTag[];
+
+ /**
+ * The string "doctype"
+ */
+ static char16_t sDoctype[];
+
+ /**
+ * The string "entity"
+ */
+ static char16_t sEntity[];
+
+ /**
+ * The string "pi"
+ */
+ static char16_t sPi[];
+
+ /**
+ * Whether base is already visited once.
+ */
+ bool mSeenBase;
+};
+
+#endif // nsHtml5Highlighter_h