diff options
Diffstat (limited to 'parser/html/nsHtml5Parser.h')
-rw-r--r-- | parser/html/nsHtml5Parser.h | 337 |
1 files changed, 337 insertions, 0 deletions
diff --git a/parser/html/nsHtml5Parser.h b/parser/html/nsHtml5Parser.h new file mode 100644 index 0000000000..593d52bc42 --- /dev/null +++ b/parser/html/nsHtml5Parser.h @@ -0,0 +1,337 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef NS_HTML5_PARSER +#define NS_HTML5_PARSER + +#include "mozilla/UniquePtr.h" +#include "nsIParser.h" +#include "nsDeque.h" +#include "nsIContentSink.h" +#include "nsIRequest.h" +#include "nsIChannel.h" +#include "nsCOMArray.h" +#include "nsContentSink.h" +#include "nsCycleCollectionParticipant.h" +#include "nsHtml5OwningUTF16Buffer.h" +#include "nsHtml5TreeOpExecutor.h" +#include "nsHtml5StreamParser.h" +#include "nsHtml5AtomTable.h" +#include "nsWeakReference.h" +#include "nsHtml5StreamListener.h" +#include "nsCharsetSource.h" + +class nsHtml5Parser final : public nsIParser, public nsSupportsWeakReference { + public: + NS_DECL_CYCLE_COLLECTING_ISUPPORTS + + NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(nsHtml5Parser, nsIParser) + + nsHtml5Parser(); + + /* Start nsIParser */ + /** + * No-op for backwards compat. + */ + NS_IMETHOD_(void) SetContentSink(nsIContentSink* aSink) override; + + /** + * Returns the tree op executor for backwards compat. + */ + NS_IMETHOD_(nsIContentSink*) GetContentSink() override; + + /** + * Always returns "view" for backwards compat. + */ + NS_IMETHOD_(void) GetCommand(nsCString& aCommand) override; + + /** + * No-op for backwards compat. + */ + NS_IMETHOD_(void) SetCommand(const char* aCommand) override; + + /** + * No-op for backwards compat. + */ + NS_IMETHOD_(void) SetCommand(eParserCommands aParserCommand) override; + + /** + * Call this method once you've created a parser, and want to instruct it + * about what charset to load + * + * @param aEncoding the charset of a document + * @param aCharsetSource the source of the charset + */ + virtual void SetDocumentCharset(NotNull<const Encoding*> aEncoding, + int32_t aSource, + bool aForceAutoDetection) override; + + /** + * Get the channel associated with this parser + * @param aChannel out param that will contain the result + * @return NS_OK if successful or NS_NOT_AVAILABLE if not + */ + nsresult GetChannel(nsIChannel** aChannel); + + /** + * Get the stream parser for this parser + */ + virtual nsIStreamListener* GetStreamListener() override; + + /** + * Don't call. For interface compat only. + */ + NS_IMETHOD ContinueInterruptedParsing() override; + + /** + * Blocks the parser. + */ + NS_IMETHOD_(void) BlockParser() override; + + /** + * Unblocks the parser. + */ + NS_IMETHOD_(void) UnblockParser() override; + + /** + * Asynchronously continues parsing. + */ + NS_IMETHOD_(void) ContinueInterruptedParsingAsync() override; + + /** + * Query whether the parser is enabled (i.e. not blocked) or not. + */ + NS_IMETHOD_(bool) IsParserEnabled() override; + + /** + * Query whether the parser thinks it's done with parsing. + */ + NS_IMETHOD_(bool) IsComplete() override; + + /** + * Set up request observer. + * + * @param aURL used for View Source title + */ + NS_IMETHOD Parse(nsIURI* aURL) override; + + /** + * document.write and document.close + * + * @param aSourceBuffer the argument of document.write (empty for .close()) + * @param aKey a key unique to the script element that caused this call + * @param aLastCall true if .close() false if .write() + */ + nsresult Parse(const nsAString& aSourceBuffer, void* aKey, bool aLastCall); + + /** + * Stops the parser prematurely + */ + NS_IMETHOD Terminate() override; + + /** + * True if the insertion point (per HTML5) is defined. + */ + virtual bool IsInsertionPointDefined() override; + + /** + * Call immediately before starting to evaluate a parser-inserted script or + * in general when the spec says to increment the script nesting level. + */ + void IncrementScriptNestingLevel() final; + + /** + * Call immediately after having evaluated a parser-inserted script or + * generally want to restore to the state before the last + * IncrementScriptNestingLevel call. + */ + void DecrementScriptNestingLevel() final; + + /** + * True if this is an HTML5 parser whose script nesting level (in + * the sense of + * <https://html.spec.whatwg.org/multipage/parsing.html#script-nesting-level>) + * is nonzero. + */ + bool HasNonzeroScriptNestingLevel() const final; + + /** + * Marks the HTML5 parser as not a script-created parser: Prepares the + * parser to be able to read a stream. + * + * @param aCommand the parser command (Yeah, this is bad API design. Let's + * make this better when retiring nsIParser) + */ + void MarkAsNotScriptCreated(const char* aCommand); + + /** + * True if this is a script-created HTML5 parser. + */ + virtual bool IsScriptCreated() override; + + /* End nsIParser */ + + // Not from an external interface + // Non-inherited methods + + public: + /** + * Initializes the parser to load from a channel. + */ + virtual nsresult Initialize(mozilla::dom::Document* aDoc, nsIURI* aURI, + nsISupports* aContainer, nsIChannel* aChannel); + + inline nsHtml5Tokenizer* GetTokenizer() { return mTokenizer.get(); } + + void InitializeDocWriteParserState(nsAHtml5TreeBuilderState* aState, + int32_t aLine); + + void DropStreamParser() { + if (GetStreamParser()) { + GetStreamParser()->DropTimer(); + mStreamListener->DropDelegate(); + mStreamListener = nullptr; + } + } + + void StartTokenizer(bool aScriptingEnabled); + + void ContinueAfterFailedCharsetSwitch(); + + nsHtml5StreamParser* GetStreamParser() { + if (!mStreamListener) { + return nullptr; + } + return mStreamListener->GetDelegate(); + } + + void PermanentlyUndefineInsertionPoint() { + mInsertionPointPermanentlyUndefined = true; + } + + /** + * Parse until pending data is exhausted or a script blocks the parser + */ + nsresult ParseUntilBlocked(); + + /** + * Start our executor. This is meant to be used from document.open() _only_ + * and does some work similar to what nsHtml5StreamParser::OnStartRequest does + * for normal parses. + */ + nsresult StartExecutor(); + + private: + virtual ~nsHtml5Parser(); + + // State variables + + /** + * Whether the last character tokenized was a carriage return (for CRLF) + */ + bool mLastWasCR; + + /** + * Whether the last character tokenized was a carriage return (for CRLF) + * when preparsing document.write. + */ + bool mDocWriteSpeculativeLastWasCR; + + /** + * The parser is blocking on the load of an external script from a web + * page, or any number of extension content scripts. + */ + uint32_t mBlocked; + + /** + * Whether the document.write() speculator is already active. + */ + bool mDocWriteSpeculatorActive; + + /** + * The number of IncrementScriptNestingLevel calls we've seen without a + * matching DecrementScriptNestingLevel. + */ + int32_t mScriptNestingLevel; + + /** + * True if document.close() has been called. + */ + bool mDocumentClosed; + + bool mInDocumentWrite; + + /** + * This is set when the tokenizer has seen EOF. The purpose is to + * keep the insertion point undefined between the time the + * parser has reached the point where it can't accept more input + * and the time the document's mParser is set to nullptr. + * Scripts can run during this time period due to an update + * batch ending and due to various end-of-parse events firing. + * (Setting mParser on the document to nullptr at the point + * where this flag gets set to true would break things that for + * legacy reasons assume that mParser on the document stays + * non-null though the end-of-parse events.) + */ + bool mInsertionPointPermanentlyUndefined; + + // Portable parser objects + /** + * The first buffer in the pending UTF-16 buffer queue + */ + RefPtr<nsHtml5OwningUTF16Buffer> mFirstBuffer; + + /** + * The last buffer in the pending UTF-16 buffer queue. Always points + * to a sentinel object with nullptr as its parser key. + */ + nsHtml5OwningUTF16Buffer* mLastBuffer; // weak ref; + + /** + * The tree operation executor + */ + RefPtr<nsHtml5TreeOpExecutor> mExecutor; + + /** + * The HTML5 tree builder + */ + const mozilla::UniquePtr<nsHtml5TreeBuilder> mTreeBuilder; + + /** + * The HTML5 tokenizer + */ + const mozilla::UniquePtr<nsHtml5Tokenizer> mTokenizer; + + /** + * Another HTML5 tree builder for preloading document.written content. + */ + mozilla::UniquePtr<nsHtml5TreeBuilder> mDocWriteSpeculativeTreeBuilder; + + /** + * Another HTML5 tokenizer for preloading document.written content. + */ + mozilla::UniquePtr<nsHtml5Tokenizer> mDocWriteSpeculativeTokenizer; + + /** + * The stream listener holding the stream parser. + */ + RefPtr<nsHtml5StreamListener> mStreamListener; + + /** + * + */ + int32_t mRootContextLineNumber; + + /** + * Whether it's OK to transfer parsing back to the stream parser + */ + bool mReturnToStreamParserPermitted; + + /** + * The scoped atom table + */ + nsHtml5AtomTable mAtomTable; +}; +#endif |