/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef nsHtml5Highlighter_h #define nsHtml5Highlighter_h #include "nsCOMPtr.h" #include "nsHtml5TreeOperation.h" #include "nsHtml5UTF16Buffer.h" #include "nsHtml5TreeOperation.h" #include "nsAHtml5TreeOpSink.h" #define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512 /** * A state machine for generating HTML for display in View Source based on * the transitions the tokenizer makes on the source being viewed. */ class nsHtml5Highlighter { public: /** * The constructor. * * @param aOpSink the sink for the tree ops generated by this highlighter */ explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink); /** * The destructor. */ ~nsHtml5Highlighter(); /** * Set the op sink (for speculation). */ void SetOpSink(nsAHtml5TreeOpSink* aOpSink); /** * Reset state to after generated head but before processing any of the input * stream. */ void Rewind(); /** * Starts the generated document. */ void Start(const nsAutoString& aTitle); /** * Updates the charset source via the op queue. */ void UpdateCharsetSource(nsCharsetSource aCharsetSource); /** * Report a tokenizer state transition. * * @param aState the state being transitioned to * @param aReconsume whether this is a reconsuming transition * @param aPos the tokenizer's current position into the buffer */ int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos); /** * Report end of file. * * Returns `true` normally and `false` on OOM. */ [[nodiscard]] bool End(); /** * Set the current buffer being tokenized */ void SetBuffer(nsHtml5UTF16Buffer* aBuffer); /** * Let go of the buffer being tokenized but first, flush text from it. * * @param aPos the first UTF-16 code unit not to flush */ void DropBuffer(int32_t aPos); /** * Query whether there are some many ops in the queue * that they should be flushed now. * * @return true if FlushOps() should be called now */ bool ShouldFlushOps(); /** * Flush the tree ops into the sink. * * @return Ok(true) if there were ops to flush, Ok(false) * if there were no ops to flush and Err() on OOM. */ mozilla::Result FlushOps(); /** * Linkify the current attribute value if the attribute name is one of * known URL attributes. (When executing tree ops, javascript: URLs will * not be linkified, though.) * * @param aName the name of the attribute * @param aValue the value of the attribute */ void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName, nsHtml5String aValue); /** * Inform the highlighter that the tokenizer successfully completed a * named character reference. */ void CompletedNamedCharacterReference(); /** * Adds an error annotation to the node that's currently on top of * mStack. * * @param aMsgId the id of the message in the property file */ void AddErrorToCurrentNode(const char* aMsgId); /** * Adds an error annotation to the node that corresponds to the most * recently opened markup declaration/tag span, character reference or * run of text. * * @param aMsgId the id of the message in the property file */ void AddErrorToCurrentRun(const char* aMsgId); /** * Adds an error annotation to the node that corresponds to the most * recently opened markup declaration/tag span, character reference or * run of text with one atom to use when formatting the message. * * @param aMsgId the id of the message in the property file * @param aName the atom */ void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName); /** * Adds an error annotation to the node that corresponds to the most * recently opened markup declaration/tag span, character reference or * run of text with two atoms to use when formatting the message. * * @param aMsgId the id of the message in the property file * @param aName the first atom * @param aOther the second atom */ void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther); /** * Adds an error annotation to the node that corresponds to the most * recent potentially character reference-starting ampersand. * * @param aMsgId the id of the message in the property file */ void AddErrorToCurrentAmpersand(const char* aMsgId); /** * Adds an error annotation to the node that corresponds to the most * recent potentially self-closing slash. * * @param aMsgId the id of the message in the property file */ void AddErrorToCurrentSlash(const char* aMsgId); /** * Enqueues a tree op for adding base to the urls with the view-source: * * @param aValue the base URL to add */ void AddBase(nsHtml5String aValue); /** * Starts a wrapper around a run of characters. */ void StartCharacters(); private: /** * Starts a span with no class. */ void StartSpan(); /** * Starts a and sets the class attribute on it. * * @param aClass the class to set (MUST be a static string that does not * need to be released!) */ void StartSpan(const char16_t* aClass); /** * End the current or in the highlighter output. */ void EndSpanOrA(); /** * Ends a wrapper around a run of characters. */ void EndCharactersAndStartMarkupRun(); /** * Starts an . */ void StartA(); /** * Flushes characters up to but not including the current one. */ void FlushChars(); /** * Flushes characters up to and including the current one. */ void FlushCurrent(); /** * Finishes highlighting a tag in the input data by closing the open * and elements in the highlighter output and then starts * another for potentially highlighting characters potentially * appearing next. */ void FinishTag(); /** * Adds a class attribute to the current node. * * @param aClass the class to set (MUST be a static string that does not * need to be released!) */ void AddClass(const char16_t* aClass); /** * Allocates a handle for an element. * * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle() * in nsHtml5TreeBuilderHSupplement.h. * * @return the handle */ nsIContent** AllocateContentHandle(); /** * Enqueues an element creation tree operation. * * @param aName the name of the element * @param aAttributes the attribute holder (ownership will be taken) or * nullptr for no attributes * @param aIntendedParent the intended parent node for the created element * @param aCreator the content creator function * @return the handle for the element that will be created */ nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, nsIContent** aIntendedParent, mozilla::dom::HTMLContentCreatorFunction aCreator); /** * Gets the handle for the current node. May be called only after the * root element has been set. * * @return the handle for the current node */ nsIContent** CurrentNode(); /** * Create an element and push it (its handle) on the stack. * * @param aName the name of the element * @param aAttributes the attribute holder (ownership will be taken) or * nullptr for no attributes * @param aCreator the content creator function */ void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, mozilla::dom::HTMLContentCreatorFunction aCreator); /** * Pops the current node off the stack. */ void Pop(); /** * Appends text content to the current node. * * @param aBuffer the buffer to copy from * @param aStart the index of the first code unit to copy * @param aLength the number of code units to copy */ void AppendCharacters(const char16_t* aBuffer, int32_t aStart, int32_t aLength); /** * Enqueues a tree op for adding an href attribute with the view-source: * URL scheme to the current node. * * @param aValue the (potentially relative) URL to link to */ void AddViewSourceHref(nsHtml5String aValue); /** * The state we are transitioning away from. */ int32_t mState; /** * The index of the first UTF-16 code unit in mBuffer that hasn't been * flushed yet. */ int32_t mCStart; /** * The position of the code unit in mBuffer that caused the current * transition. */ int32_t mPos; /** * The current line number. */ int32_t mLineNumber; /** * The number of inline elements open inside the
 excluding the
   * span potentially wrapping a run of characters.
   */
  int32_t mInlinesOpen;

  /**
   * Whether there's a span wrapping a run of characters (excluding CDATA
   * section) open.
   */
  bool mInCharacters;

  /**
   * The current buffer being tokenized.
   */
  nsHtml5UTF16Buffer* mBuffer;

  /**
   * The outgoing tree op queue.
   */
  nsTArray mOpQueue;

  /**
   * The tree op stage for the tree op executor or a speculation when looking
   * for meta charset.
   *
   * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
   * object, because this object is owned by the nsHtml5Tokenizer instance that
   * is owned by the nsHtml5StreamParser, which keeps the executor alive via
   * nsHtml5Streamparser::mExecutorFlusher.
   */
  nsAHtml5TreeOpSink* mOpSink;

  /**
   * The most recently opened markup declaration/tag or run of characters.
   */
  nsIContent** mCurrentRun;

  /**
   * The most recent ampersand in a place where character references were
   * allowed.
   */
  nsIContent** mAmpersand;

  /**
   * The most recent slash that might become a self-closing slash.
   */
  nsIContent** mSlash;

  /**
   * Memory for element handles.
   */
  mozilla::UniquePtr mHandles;

  /**
   * Number of handles used in mHandles
   */
  int32_t mHandlesUsed;

  /**
   * A holder for old contents of mHandles
   */
  nsTArray> mOldHandles;

  /**
   * The element stack.
   */
  nsTArray mStack;

  /**
   * The string "comment"
   */
  static char16_t sComment[];

  /**
   * The string "cdata"
   */
  static char16_t sCdata[];

  /**
   * The string "start-tag"
   */
  static char16_t sStartTag[];

  /**
   * The string "attribute-name"
   */
  static char16_t sAttributeName[];

  /**
   * The string "attribute-value"
   */
  static char16_t sAttributeValue[];

  /**
   * The string "end-tag"
   */
  static char16_t sEndTag[];

  /**
   * The string "doctype"
   */
  static char16_t sDoctype[];

  /**
   * The string "entity"
   */
  static char16_t sEntity[];

  /**
   * The string "pi"
   */
  static char16_t sPi[];

  /**
   * Whether base is already visited once.
   */
  bool mSeenBase;
};

#endif  // nsHtml5Highlighter_h