diff options
Diffstat (limited to 'parser/html/nsHtml5Highlighter.h')
-rw-r--r-- | parser/html/nsHtml5Highlighter.h | 444 |
1 files changed, 444 insertions, 0 deletions
diff --git a/parser/html/nsHtml5Highlighter.h b/parser/html/nsHtml5Highlighter.h new file mode 100644 index 0000000000..4966b21608 --- /dev/null +++ b/parser/html/nsHtml5Highlighter.h @@ -0,0 +1,444 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef nsHtml5Highlighter_h +#define nsHtml5Highlighter_h + +#include "nsCOMPtr.h" +#include "nsHtml5TreeOperation.h" +#include "nsHtml5UTF16Buffer.h" +#include "nsHtml5TreeOperation.h" +#include "nsAHtml5TreeOpSink.h" + +#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512 + +/** + * A state machine for generating HTML for display in View Source based on + * the transitions the tokenizer makes on the source being viewed. + */ +class nsHtml5Highlighter { + public: + /** + * The constructor. + * + * @param aOpSink the sink for the tree ops generated by this highlighter + */ + explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink); + + /** + * The destructor. + */ + ~nsHtml5Highlighter(); + + /** + * Set the op sink (for speculation). + */ + void SetOpSink(nsAHtml5TreeOpSink* aOpSink); + + /** + * Reset state to after generated head but before processing any of the input + * stream. + */ + void Rewind(); + + /** + * Starts the generated document. + */ + void Start(const nsAutoString& aTitle); + + /** + * Updates the charset source via the op queue. + */ + void UpdateCharsetSource(nsCharsetSource aCharsetSource); + + /** + * Report a tokenizer state transition. + * + * @param aState the state being transitioned to + * @param aReconsume whether this is a reconsuming transition + * @param aPos the tokenizer's current position into the buffer + */ + int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos); + + /** + * Report end of file. + * + * Returns `true` normally and `false` on OOM. + */ + [[nodiscard]] bool End(); + + /** + * Set the current buffer being tokenized + */ + void SetBuffer(nsHtml5UTF16Buffer* aBuffer); + + /** + * Let go of the buffer being tokenized but first, flush text from it. + * + * @param aPos the first UTF-16 code unit not to flush + */ + void DropBuffer(int32_t aPos); + + /** + * Query whether there are some many ops in the queue + * that they should be flushed now. + * + * @return true if FlushOps() should be called now + */ + bool ShouldFlushOps(); + + /** + * Flush the tree ops into the sink. + * + * @return Ok(true) if there were ops to flush, Ok(false) + * if there were no ops to flush and Err() on OOM. + */ + mozilla::Result<bool, nsresult> FlushOps(); + + /** + * Linkify the current attribute value if the attribute name is one of + * known URL attributes. (When executing tree ops, javascript: URLs will + * not be linkified, though.) + * + * @param aName the name of the attribute + * @param aValue the value of the attribute + */ + void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName, + nsHtml5String aValue); + + /** + * Inform the highlighter that the tokenizer successfully completed a + * named character reference. + */ + void CompletedNamedCharacterReference(); + + /** + * Adds an error annotation to the node that's currently on top of + * mStack. + * + * @param aMsgId the id of the message in the property file + */ + void AddErrorToCurrentNode(const char* aMsgId); + + /** + * Adds an error annotation to the node that corresponds to the most + * recently opened markup declaration/tag span, character reference or + * run of text. + * + * @param aMsgId the id of the message in the property file + */ + void AddErrorToCurrentRun(const char* aMsgId); + + /** + * Adds an error annotation to the node that corresponds to the most + * recently opened markup declaration/tag span, character reference or + * run of text with one atom to use when formatting the message. + * + * @param aMsgId the id of the message in the property file + * @param aName the atom + */ + void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName); + + /** + * Adds an error annotation to the node that corresponds to the most + * recently opened markup declaration/tag span, character reference or + * run of text with two atoms to use when formatting the message. + * + * @param aMsgId the id of the message in the property file + * @param aName the first atom + * @param aOther the second atom + */ + void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther); + + /** + * Adds an error annotation to the node that corresponds to the most + * recent potentially character reference-starting ampersand. + * + * @param aMsgId the id of the message in the property file + */ + void AddErrorToCurrentAmpersand(const char* aMsgId); + + /** + * Adds an error annotation to the node that corresponds to the most + * recent potentially self-closing slash. + * + * @param aMsgId the id of the message in the property file + */ + void AddErrorToCurrentSlash(const char* aMsgId); + + /** + * Enqueues a tree op for adding base to the urls with the view-source: + * + * @param aValue the base URL to add + */ + void AddBase(nsHtml5String aValue); + + /** + * Starts a wrapper around a run of characters. + */ + void StartCharacters(); + + private: + /** + * Starts a span with no class. + */ + void StartSpan(); + + /** + * Starts a <span> and sets the class attribute on it. + * + * @param aClass the class to set (MUST be a static string that does not + * need to be released!) + */ + void StartSpan(const char16_t* aClass); + + /** + * End the current <span> or <a> in the highlighter output. + */ + void EndSpanOrA(); + + /** + * Ends a wrapper around a run of characters. + */ + void EndCharactersAndStartMarkupRun(); + + /** + * Starts an <a>. + */ + void StartA(); + + /** + * Flushes characters up to but not including the current one. + */ + void FlushChars(); + + /** + * Flushes characters up to and including the current one. + */ + void FlushCurrent(); + + /** + * Finishes highlighting a tag in the input data by closing the open + * <span> and <a> elements in the highlighter output and then starts + * another <span> for potentially highlighting characters potentially + * appearing next. + */ + void FinishTag(); + + /** + * Adds a class attribute to the current node. + * + * @param aClass the class to set (MUST be a static string that does not + * need to be released!) + */ + void AddClass(const char16_t* aClass); + + /** + * Allocates a handle for an element. + * + * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle() + * in nsHtml5TreeBuilderHSupplement.h. + * + * @return the handle + */ + nsIContent** AllocateContentHandle(); + + /** + * Enqueues an element creation tree operation. + * + * @param aName the name of the element + * @param aAttributes the attribute holder (ownership will be taken) or + * nullptr for no attributes + * @param aIntendedParent the intended parent node for the created element + * @param aCreator the content creator function + * @return the handle for the element that will be created + */ + nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, + nsIContent** aIntendedParent, + mozilla::dom::HTMLContentCreatorFunction aCreator); + + /** + * Gets the handle for the current node. May be called only after the + * root element has been set. + * + * @return the handle for the current node + */ + nsIContent** CurrentNode(); + + /** + * Create an element and push it (its handle) on the stack. + * + * @param aName the name of the element + * @param aAttributes the attribute holder (ownership will be taken) or + * nullptr for no attributes + * @param aCreator the content creator function + */ + void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, + mozilla::dom::HTMLContentCreatorFunction aCreator); + + /** + * Pops the current node off the stack. + */ + void Pop(); + + /** + * Appends text content to the current node. + * + * @param aBuffer the buffer to copy from + * @param aStart the index of the first code unit to copy + * @param aLength the number of code units to copy + */ + void AppendCharacters(const char16_t* aBuffer, int32_t aStart, + int32_t aLength); + + /** + * Enqueues a tree op for adding an href attribute with the view-source: + * URL scheme to the current node. + * + * @param aValue the (potentially relative) URL to link to + */ + void AddViewSourceHref(nsHtml5String aValue); + + /** + * The state we are transitioning away from. + */ + int32_t mState; + + /** + * The index of the first UTF-16 code unit in mBuffer that hasn't been + * flushed yet. + */ + int32_t mCStart; + + /** + * The position of the code unit in mBuffer that caused the current + * transition. + */ + int32_t mPos; + + /** + * The current line number. + */ + int32_t mLineNumber; + + /** + * The number of inline elements open inside the <pre> excluding the + * span potentially wrapping a run of characters. + */ + int32_t mInlinesOpen; + + /** + * Whether there's a span wrapping a run of characters (excluding CDATA + * section) open. + */ + bool mInCharacters; + + /** + * The current buffer being tokenized. + */ + nsHtml5UTF16Buffer* mBuffer; + + /** + * The outgoing tree op queue. + */ + nsTArray<nsHtml5TreeOperation> mOpQueue; + + /** + * The tree op stage for the tree op executor or a speculation when looking + * for meta charset. + * + * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this + * object, because this object is owned by the nsHtml5Tokenizer instance that + * is owned by the nsHtml5StreamParser, which keeps the executor alive via + * nsHtml5Streamparser::mExecutorFlusher. + */ + nsAHtml5TreeOpSink* mOpSink; + + /** + * The most recently opened markup declaration/tag or run of characters. + */ + nsIContent** mCurrentRun; + + /** + * The most recent ampersand in a place where character references were + * allowed. + */ + nsIContent** mAmpersand; + + /** + * The most recent slash that might become a self-closing slash. + */ + nsIContent** mSlash; + + /** + * Memory for element handles. + */ + mozilla::UniquePtr<nsIContent*[]> mHandles; + + /** + * Number of handles used in mHandles + */ + int32_t mHandlesUsed; + + /** + * A holder for old contents of mHandles + */ + nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles; + + /** + * The element stack. + */ + nsTArray<nsIContent**> mStack; + + /** + * The string "comment" + */ + static char16_t sComment[]; + + /** + * The string "cdata" + */ + static char16_t sCdata[]; + + /** + * The string "start-tag" + */ + static char16_t sStartTag[]; + + /** + * The string "attribute-name" + */ + static char16_t sAttributeName[]; + + /** + * The string "attribute-value" + */ + static char16_t sAttributeValue[]; + + /** + * The string "end-tag" + */ + static char16_t sEndTag[]; + + /** + * The string "doctype" + */ + static char16_t sDoctype[]; + + /** + * The string "entity" + */ + static char16_t sEntity[]; + + /** + * The string "pi" + */ + static char16_t sPi[]; + + /** + * Whether base is already visited once. + */ + bool mSeenBase; +}; + +#endif // nsHtml5Highlighter_h |