From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- parser/html/nsHtml5Highlighter.h | 444 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 444 insertions(+) create mode 100644 parser/html/nsHtml5Highlighter.h (limited to 'parser/html/nsHtml5Highlighter.h') diff --git a/parser/html/nsHtml5Highlighter.h b/parser/html/nsHtml5Highlighter.h new file mode 100644 index 0000000000..4966b21608 --- /dev/null +++ b/parser/html/nsHtml5Highlighter.h @@ -0,0 +1,444 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef nsHtml5Highlighter_h +#define nsHtml5Highlighter_h + +#include "nsCOMPtr.h" +#include "nsHtml5TreeOperation.h" +#include "nsHtml5UTF16Buffer.h" +#include "nsHtml5TreeOperation.h" +#include "nsAHtml5TreeOpSink.h" + +#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512 + +/** + * A state machine for generating HTML for display in View Source based on + * the transitions the tokenizer makes on the source being viewed. + */ +class nsHtml5Highlighter { + public: + /** + * The constructor. + * + * @param aOpSink the sink for the tree ops generated by this highlighter + */ + explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink); + + /** + * The destructor. + */ + ~nsHtml5Highlighter(); + + /** + * Set the op sink (for speculation). + */ + void SetOpSink(nsAHtml5TreeOpSink* aOpSink); + + /** + * Reset state to after generated head but before processing any of the input + * stream. + */ + void Rewind(); + + /** + * Starts the generated document. + */ + void Start(const nsAutoString& aTitle); + + /** + * Updates the charset source via the op queue. + */ + void UpdateCharsetSource(nsCharsetSource aCharsetSource); + + /** + * Report a tokenizer state transition. + * + * @param aState the state being transitioned to + * @param aReconsume whether this is a reconsuming transition + * @param aPos the tokenizer's current position into the buffer + */ + int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos); + + /** + * Report end of file. + * + * Returns `true` normally and `false` on OOM. + */ + [[nodiscard]] bool End(); + + /** + * Set the current buffer being tokenized + */ + void SetBuffer(nsHtml5UTF16Buffer* aBuffer); + + /** + * Let go of the buffer being tokenized but first, flush text from it. + * + * @param aPos the first UTF-16 code unit not to flush + */ + void DropBuffer(int32_t aPos); + + /** + * Query whether there are some many ops in the queue + * that they should be flushed now. + * + * @return true if FlushOps() should be called now + */ + bool ShouldFlushOps(); + + /** + * Flush the tree ops into the sink. + * + * @return Ok(true) if there were ops to flush, Ok(false) + * if there were no ops to flush and Err() on OOM. + */ + mozilla::Result FlushOps(); + + /** + * Linkify the current attribute value if the attribute name is one of + * known URL attributes. (When executing tree ops, javascript: URLs will + * not be linkified, though.) + * + * @param aName the name of the attribute + * @param aValue the value of the attribute + */ + void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName, + nsHtml5String aValue); + + /** + * Inform the highlighter that the tokenizer successfully completed a + * named character reference. + */ + void CompletedNamedCharacterReference(); + + /** + * Adds an error annotation to the node that's currently on top of + * mStack. + * + * @param aMsgId the id of the message in the property file + */ + void AddErrorToCurrentNode(const char* aMsgId); + + /** + * Adds an error annotation to the node that corresponds to the most + * recently opened markup declaration/tag span, character reference or + * run of text. + * + * @param aMsgId the id of the message in the property file + */ + void AddErrorToCurrentRun(const char* aMsgId); + + /** + * Adds an error annotation to the node that corresponds to the most + * recently opened markup declaration/tag span, character reference or + * run of text with one atom to use when formatting the message. + * + * @param aMsgId the id of the message in the property file + * @param aName the atom + */ + void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName); + + /** + * Adds an error annotation to the node that corresponds to the most + * recently opened markup declaration/tag span, character reference or + * run of text with two atoms to use when formatting the message. + * + * @param aMsgId the id of the message in the property file + * @param aName the first atom + * @param aOther the second atom + */ + void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther); + + /** + * Adds an error annotation to the node that corresponds to the most + * recent potentially character reference-starting ampersand. + * + * @param aMsgId the id of the message in the property file + */ + void AddErrorToCurrentAmpersand(const char* aMsgId); + + /** + * Adds an error annotation to the node that corresponds to the most + * recent potentially self-closing slash. + * + * @param aMsgId the id of the message in the property file + */ + void AddErrorToCurrentSlash(const char* aMsgId); + + /** + * Enqueues a tree op for adding base to the urls with the view-source: + * + * @param aValue the base URL to add + */ + void AddBase(nsHtml5String aValue); + + /** + * Starts a wrapper around a run of characters. + */ + void StartCharacters(); + + private: + /** + * Starts a span with no class. + */ + void StartSpan(); + + /** + * Starts a and sets the class attribute on it. + * + * @param aClass the class to set (MUST be a static string that does not + * need to be released!) + */ + void StartSpan(const char16_t* aClass); + + /** + * End the current or in the highlighter output. + */ + void EndSpanOrA(); + + /** + * Ends a wrapper around a run of characters. + */ + void EndCharactersAndStartMarkupRun(); + + /** + * Starts an . + */ + void StartA(); + + /** + * Flushes characters up to but not including the current one. + */ + void FlushChars(); + + /** + * Flushes characters up to and including the current one. + */ + void FlushCurrent(); + + /** + * Finishes highlighting a tag in the input data by closing the open + * and elements in the highlighter output and then starts + * another for potentially highlighting characters potentially + * appearing next. + */ + void FinishTag(); + + /** + * Adds a class attribute to the current node. + * + * @param aClass the class to set (MUST be a static string that does not + * need to be released!) + */ + void AddClass(const char16_t* aClass); + + /** + * Allocates a handle for an element. + * + * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle() + * in nsHtml5TreeBuilderHSupplement.h. + * + * @return the handle + */ + nsIContent** AllocateContentHandle(); + + /** + * Enqueues an element creation tree operation. + * + * @param aName the name of the element + * @param aAttributes the attribute holder (ownership will be taken) or + * nullptr for no attributes + * @param aIntendedParent the intended parent node for the created element + * @param aCreator the content creator function + * @return the handle for the element that will be created + */ + nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, + nsIContent** aIntendedParent, + mozilla::dom::HTMLContentCreatorFunction aCreator); + + /** + * Gets the handle for the current node. May be called only after the + * root element has been set. + * + * @return the handle for the current node + */ + nsIContent** CurrentNode(); + + /** + * Create an element and push it (its handle) on the stack. + * + * @param aName the name of the element + * @param aAttributes the attribute holder (ownership will be taken) or + * nullptr for no attributes + * @param aCreator the content creator function + */ + void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, + mozilla::dom::HTMLContentCreatorFunction aCreator); + + /** + * Pops the current node off the stack. + */ + void Pop(); + + /** + * Appends text content to the current node. + * + * @param aBuffer the buffer to copy from + * @param aStart the index of the first code unit to copy + * @param aLength the number of code units to copy + */ + void AppendCharacters(const char16_t* aBuffer, int32_t aStart, + int32_t aLength); + + /** + * Enqueues a tree op for adding an href attribute with the view-source: + * URL scheme to the current node. + * + * @param aValue the (potentially relative) URL to link to + */ + void AddViewSourceHref(nsHtml5String aValue); + + /** + * The state we are transitioning away from. + */ + int32_t mState; + + /** + * The index of the first UTF-16 code unit in mBuffer that hasn't been + * flushed yet. + */ + int32_t mCStart; + + /** + * The position of the code unit in mBuffer that caused the current + * transition. + */ + int32_t mPos; + + /** + * The current line number. + */ + int32_t mLineNumber; + + /** + * The number of inline elements open inside the
 excluding the
+   * span potentially wrapping a run of characters.
+   */
+  int32_t mInlinesOpen;
+
+  /**
+   * Whether there's a span wrapping a run of characters (excluding CDATA
+   * section) open.
+   */
+  bool mInCharacters;
+
+  /**
+   * The current buffer being tokenized.
+   */
+  nsHtml5UTF16Buffer* mBuffer;
+
+  /**
+   * The outgoing tree op queue.
+   */
+  nsTArray mOpQueue;
+
+  /**
+   * The tree op stage for the tree op executor or a speculation when looking
+   * for meta charset.
+   *
+   * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
+   * object, because this object is owned by the nsHtml5Tokenizer instance that
+   * is owned by the nsHtml5StreamParser, which keeps the executor alive via
+   * nsHtml5Streamparser::mExecutorFlusher.
+   */
+  nsAHtml5TreeOpSink* mOpSink;
+
+  /**
+   * The most recently opened markup declaration/tag or run of characters.
+   */
+  nsIContent** mCurrentRun;
+
+  /**
+   * The most recent ampersand in a place where character references were
+   * allowed.
+   */
+  nsIContent** mAmpersand;
+
+  /**
+   * The most recent slash that might become a self-closing slash.
+   */
+  nsIContent** mSlash;
+
+  /**
+   * Memory for element handles.
+   */
+  mozilla::UniquePtr mHandles;
+
+  /**
+   * Number of handles used in mHandles
+   */
+  int32_t mHandlesUsed;
+
+  /**
+   * A holder for old contents of mHandles
+   */
+  nsTArray> mOldHandles;
+
+  /**
+   * The element stack.
+   */
+  nsTArray mStack;
+
+  /**
+   * The string "comment"
+   */
+  static char16_t sComment[];
+
+  /**
+   * The string "cdata"
+   */
+  static char16_t sCdata[];
+
+  /**
+   * The string "start-tag"
+   */
+  static char16_t sStartTag[];
+
+  /**
+   * The string "attribute-name"
+   */
+  static char16_t sAttributeName[];
+
+  /**
+   * The string "attribute-value"
+   */
+  static char16_t sAttributeValue[];
+
+  /**
+   * The string "end-tag"
+   */
+  static char16_t sEndTag[];
+
+  /**
+   * The string "doctype"
+   */
+  static char16_t sDoctype[];
+
+  /**
+   * The string "entity"
+   */
+  static char16_t sEntity[];
+
+  /**
+   * The string "pi"
+   */
+  static char16_t sPi[];
+
+  /**
+   * Whether base is already visited once.
+   */
+  bool mSeenBase;
+};
+
+#endif  // nsHtml5Highlighter_h
-- 
cgit v1.2.3