diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /parser/htmlparser/nsParser.cpp | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | parser/htmlparser/nsParser.cpp | 1075 |
1 files changed, 1075 insertions, 0 deletions
diff --git a/parser/htmlparser/nsParser.cpp b/parser/htmlparser/nsParser.cpp new file mode 100644 index 0000000000..04d02e2084 --- /dev/null +++ b/parser/htmlparser/nsParser.cpp @@ -0,0 +1,1075 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set sw=2 ts=2 et tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsAtom.h" +#include "nsParser.h" +#include "nsString.h" +#include "nsCRT.h" +#include "nsScanner.h" +#include "plstr.h" +#include "nsIChannel.h" +#include "nsIInputStream.h" +#include "CNavDTD.h" +#include "prenv.h" +#include "prlock.h" +#include "prcvar.h" +#include "nsReadableUtils.h" +#include "nsCOMPtr.h" +#include "nsExpatDriver.h" +#include "nsIFragmentContentSink.h" +#include "nsStreamUtils.h" +#include "nsXPCOMCIDInternal.h" +#include "nsMimeTypes.h" +#include "nsCharsetSource.h" +#include "nsThreadUtils.h" +#include "nsIHTMLContentSink.h" + +#include "mozilla/BinarySearch.h" +#include "mozilla/CondVar.h" +#include "mozilla/dom/ScriptLoader.h" +#include "mozilla/Encoding.h" +#include "mozilla/Mutex.h" + +using namespace mozilla; + +#define NS_PARSER_FLAG_PENDING_CONTINUE_EVENT 0x00000001 +#define NS_PARSER_FLAG_CAN_TOKENIZE 0x00000002 + +//-------------- Begin ParseContinue Event Definition ------------------------ +/* +The parser can be explicitly interrupted by passing a return value of +NS_ERROR_HTMLPARSER_INTERRUPTED from BuildModel on the DTD. This will cause +the parser to stop processing and allow the application to return to the event +loop. The data which was left at the time of interruption will be processed +the next time OnDataAvailable is called. If the parser has received its final +chunk of data then OnDataAvailable will no longer be called by the networking +module, so the parser will schedule a nsParserContinueEvent which will call +the parser to process the remaining data after returning to the event loop. +If the parser is interrupted while processing the remaining data it will +schedule another ParseContinueEvent. The processing of data followed by +scheduling of the continue events will proceed until either: + + 1) All of the remaining data can be processed without interrupting + 2) The parser has been cancelled. + + +This capability is currently used in CNavDTD and nsHTMLContentSink. The +nsHTMLContentSink is notified by CNavDTD when a chunk of tokens is going to be +processed and when each token is processed. The nsHTML content sink records +the time when the chunk has started processing and will return +NS_ERROR_HTMLPARSER_INTERRUPTED if the token processing time has exceeded a +threshold called max tokenizing processing time. This allows the content sink +to limit how much data is processed in a single chunk which in turn gates how +much time is spent away from the event loop. Processing smaller chunks of data +also reduces the time spent in subsequent reflows. + +This capability is most apparent when loading large documents. If the maximum +token processing time is set small enough the application will remain +responsive during document load. + +A side-effect of this capability is that document load is not complete when +the last chunk of data is passed to OnDataAvailable since the parser may have +been interrupted when the last chunk of data arrived. The document is complete +when all of the document has been tokenized and there aren't any pending +nsParserContinueEvents. This can cause problems if the application assumes +that it can monitor the load requests to determine when the document load has +been completed. This is what happens in Mozilla. The document is considered +completely loaded when all of the load requests have been satisfied. To delay +the document load until all of the parsing has been completed the +nsHTMLContentSink adds a dummy parser load request which is not removed until +the nsHTMLContentSink's DidBuildModel is called. The CNavDTD will not call +DidBuildModel until the final chunk of data has been passed to the parser +through the OnDataAvailable and there aren't any pending +nsParserContineEvents. + +Currently the parser is ignores requests to be interrupted during the +processing of script. This is because a document.write followed by JavaScript +calls to manipulate the DOM may fail if the parser was interrupted during the +document.write. + +For more details @see bugzilla bug 76722 +*/ + +class nsParserContinueEvent : public Runnable { + public: + RefPtr<nsParser> mParser; + + explicit nsParserContinueEvent(nsParser* aParser) + : mozilla::Runnable("nsParserContinueEvent"), mParser(aParser) {} + + NS_IMETHOD Run() override { + mParser->HandleParserContinueEvent(this); + return NS_OK; + } +}; + +//-------------- End ParseContinue Event Definition ------------------------ + +/** + * default constructor + */ +nsParser::nsParser() : mCharset(WINDOWS_1252_ENCODING) { Initialize(); } + +nsParser::~nsParser() { Cleanup(); } + +void nsParser::Initialize() { + mContinueEvent = nullptr; + mCharsetSource = kCharsetUninitialized; + mCharset = WINDOWS_1252_ENCODING; + mInternalState = NS_OK; + mStreamStatus = NS_OK; + mCommand = eViewNormal; + mBlocked = 0; + mFlags = NS_PARSER_FLAG_CAN_TOKENIZE; + + mProcessingNetworkData = false; + mIsAboutBlank = false; +} + +void nsParser::Cleanup() { + // It should not be possible for this flag to be set when we are getting + // destroyed since this flag implies a pending nsParserContinueEvent, which + // has an owning reference to |this|. + NS_ASSERTION(!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT), "bad"); +} + +NS_IMPL_CYCLE_COLLECTION_CLASS(nsParser) + +NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsParser) + NS_IMPL_CYCLE_COLLECTION_UNLINK(mDTD) + NS_IMPL_CYCLE_COLLECTION_UNLINK(mSink) + NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE +NS_IMPL_CYCLE_COLLECTION_UNLINK_END + +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsParser) + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mDTD) + NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSink) +NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END + +NS_IMPL_CYCLE_COLLECTING_ADDREF(nsParser) +NS_IMPL_CYCLE_COLLECTING_RELEASE(nsParser) +NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsParser) + NS_INTERFACE_MAP_ENTRY(nsIStreamListener) + NS_INTERFACE_MAP_ENTRY(nsIParser) + NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) + NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) + NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIParser) +NS_INTERFACE_MAP_END + +// The parser continue event is posted only if +// all of the data to parse has been passed to ::OnDataAvailable +// and the parser has been interrupted by the content sink +// because the processing of tokens took too long. + +nsresult nsParser::PostContinueEvent() { + if (!(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT)) { + // If this flag isn't set, then there shouldn't be a live continue event! + NS_ASSERTION(!mContinueEvent, "bad"); + + // This creates a reference cycle between this and the event that is + // broken when the event fires. + nsCOMPtr<nsIRunnable> event = new nsParserContinueEvent(this); + if (NS_FAILED(NS_DispatchToCurrentThread(event))) { + NS_WARNING("failed to dispatch parser continuation event"); + } else { + mFlags |= NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; + mContinueEvent = event; + } + } + return NS_OK; +} + +NS_IMETHODIMP_(void) +nsParser::GetCommand(nsCString& aCommand) { aCommand = mCommandStr; } + +/** + * Call this method once you've created a parser, and want to instruct it + * about the command which caused the parser to be constructed. For example, + * this allows us to select a DTD which can do, say, view-source. + * + * @param aCommand the command string to set + */ +NS_IMETHODIMP_(void) +nsParser::SetCommand(const char* aCommand) { + mCommandStr.Assign(aCommand); + if (mCommandStr.EqualsLiteral("view-source")) { + mCommand = eViewSource; + } else if (mCommandStr.EqualsLiteral("view-fragment")) { + mCommand = eViewFragment; + } else { + mCommand = eViewNormal; + } +} + +/** + * Call this method once you've created a parser, and want to instruct it + * about the command which caused the parser to be constructed. For example, + * this allows us to select a DTD which can do, say, view-source. + * + * @param aParserCommand the command to set + */ +NS_IMETHODIMP_(void) +nsParser::SetCommand(eParserCommands aParserCommand) { + mCommand = aParserCommand; +} + +/** + * Call this method once you've created a parser, and want to instruct it + * about what charset to load + * + * @param aCharset- the charset of a document + * @param aCharsetSource- the source of the charset + */ +void nsParser::SetDocumentCharset(NotNull<const Encoding*> aCharset, + int32_t aCharsetSource, + bool aForceAutoDetection) { + mCharset = aCharset; + mCharsetSource = aCharsetSource; + if (mParserContext) { + mParserContext->mScanner.SetDocumentCharset(aCharset, aCharsetSource); + } +} + +void nsParser::SetSinkCharset(NotNull<const Encoding*> aCharset) { + if (mSink) { + mSink->SetDocumentCharset(aCharset); + } +} + +/** + * This method gets called in order to set the content + * sink for this parser to dump nodes to. + * + * @param nsIContentSink interface for node receiver + */ +NS_IMETHODIMP_(void) +nsParser::SetContentSink(nsIContentSink* aSink) { + MOZ_ASSERT(aSink, "sink cannot be null!"); + mSink = aSink; + + if (mSink) { + mSink->SetParser(this); + nsCOMPtr<nsIHTMLContentSink> htmlSink = do_QueryInterface(mSink); + if (htmlSink) { + mIsAboutBlank = true; + } + } +} + +/** + * retrieve the sink set into the parser + * @return current sink + */ +NS_IMETHODIMP_(nsIContentSink*) +nsParser::GetContentSink() { return mSink; } + +//////////////////////////////////////////////////////////////////////// + +/** + * This gets called just prior to the model actually + * being constructed. It's important to make this the + * last thing that happens right before parsing, so we + * can delay until the last moment the resolution of + * which DTD to use (unless of course we're assigned one). + */ +nsresult nsParser::WillBuildModel() { + if (!mParserContext) return NS_ERROR_HTMLPARSER_INVALIDPARSERCONTEXT; + + if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { + // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED + // to avoid introducing unintentional changes to behavior. + return mInternalState; + } + + if (eUnknownDetect != mParserContext->mAutoDetectStatus) return NS_OK; + + if (eDTDMode_autodetect == mParserContext->mDTDMode) { + if (mIsAboutBlank) { + mParserContext->mDTDMode = eDTDMode_quirks; + mParserContext->mDocType = eHTML_Quirks; + } else { + mParserContext->mDTDMode = eDTDMode_full_standards; + mParserContext->mDocType = eXML; + } + } // else XML fragment with nested parser context + + // We always find a DTD. + mParserContext->mAutoDetectStatus = ePrimaryDetect; + + // Quick check for view source. + MOZ_ASSERT(mParserContext->mParserCommand != eViewSource, + "The old parser is not supposed to be used for View Source " + "anymore."); + + // Now see if we're parsing XML or HTML (which, as far as we're concerned, + // simply means "not XML"). + if (mParserContext->mDocType == eXML) { + RefPtr<nsExpatDriver> expat = new nsExpatDriver(); + nsresult rv = expat->Initialize(mParserContext->mScanner.GetURI(), mSink); + NS_ENSURE_SUCCESS(rv, rv); + + mDTD = expat.forget(); + } else { + mDTD = new CNavDTD(); + } + + return mSink->WillBuildModel(mParserContext->mDTDMode); +} + +/** + * This gets called when the parser is done with its input. + */ +void nsParser::DidBuildModel() { + if (IsComplete() && mParserContext) { + // Let sink know if we're about to end load because we've been terminated. + // In that case we don't want it to run deferred scripts. + bool terminated = mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING; + if (mDTD && mSink) { + mDTD->DidBuildModel(); + mSink->DidBuildModel(terminated); + } + + // Ref. to bug 61462. + mParserContext->mRequest = nullptr; + } +} + +/** + * Call this when you want to *force* the parser to terminate the + * parsing process altogether. This is binary -- so once you terminate + * you can't resume without restarting altogether. + */ +NS_IMETHODIMP +nsParser::Terminate(void) { + // We should only call DidBuildModel once, so don't do anything if this is + // the second time that Terminate has been called. + if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { + return NS_OK; + } + + nsresult result = NS_OK; + // XXX - [ until we figure out a way to break parser-sink circularity ] + // Hack - Hold a reference until we are completely done... + nsCOMPtr<nsIParser> kungFuDeathGrip(this); + mInternalState = result = NS_ERROR_HTMLPARSER_STOPPARSING; + + // @see bug 108049 + // If NS_PARSER_FLAG_PENDING_CONTINUE_EVENT is set then reset it so + // DidBuildModel will call DidBuildModel on the DTD. Note: The IsComplete() + // call inside of DidBuildModel looks at the pendingContinueEvents flag. + if (mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT) { + NS_ASSERTION(mContinueEvent, "mContinueEvent is null"); + // Revoke the pending continue parsing event + mContinueEvent = nullptr; + mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; + } + + if (mDTD) { + mDTD->Terminate(); + DidBuildModel(); + } else if (mSink) { + // We have no parser context or no DTD yet (so we got terminated before we + // got any data). Manually break the reference cycle with the sink. + result = mSink->DidBuildModel(true); + NS_ENSURE_SUCCESS(result, result); + } + + return NS_OK; +} + +NS_IMETHODIMP +nsParser::ContinueInterruptedParsing() { + if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { + // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED + // to avoid introducing unintentional changes to behavior. + return mInternalState; + } + + // If there are scripts executing, then the content sink is jumping the gun + // (probably due to a synchronous XMLHttpRequest) and will re-enable us + // later, see bug 460706. + if (!IsOkToProcessNetworkData()) { + return NS_OK; + } + + // If the stream has already finished, there's a good chance + // that we might start closing things down when the parser + // is reenabled. To make sure that we're not deleted across + // the reenabling process, hold a reference to ourselves. + nsresult result = NS_OK; + nsCOMPtr<nsIParser> kungFuDeathGrip(this); + nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); + +#ifdef DEBUG + if (mBlocked) { + NS_WARNING("Don't call ContinueInterruptedParsing on a blocked parser."); + } +#endif + + bool isFinalChunk = + mParserContext && mParserContext->mStreamListenerState == eOnStop; + + mProcessingNetworkData = true; + if (sinkDeathGrip) { + sinkDeathGrip->WillParse(); + } + result = ResumeParse(true, isFinalChunk); // Ref. bug 57999 + mProcessingNetworkData = false; + + if (result != NS_OK) { + result = mInternalState; + } + + return result; +} + +/** + * Stops parsing temporarily. That is, it will prevent the + * parser from building up content model while scripts + * are being loaded (either an external script from a web + * page, or any number of extension content scripts). + */ +NS_IMETHODIMP_(void) +nsParser::BlockParser() { mBlocked++; } + +/** + * Open up the parser for tokenization, building up content + * model..etc. However, this method does not resume parsing + * automatically. It's the callers' responsibility to restart + * the parsing engine. + */ +NS_IMETHODIMP_(void) +nsParser::UnblockParser() { + MOZ_DIAGNOSTIC_ASSERT(mBlocked > 0); + if (MOZ_LIKELY(mBlocked > 0)) { + mBlocked--; + } +} + +NS_IMETHODIMP_(void) +nsParser::ContinueInterruptedParsingAsync() { + MOZ_ASSERT(mSink); + if (MOZ_LIKELY(mSink)) { + mSink->ContinueInterruptedParsingAsync(); + } +} + +/** + * Call this to query whether the parser is enabled or not. + */ +NS_IMETHODIMP_(bool) +nsParser::IsParserEnabled() { return !mBlocked; } + +/** + * Call this to query whether the parser thinks it's done with parsing. + */ +NS_IMETHODIMP_(bool) +nsParser::IsComplete() { + return !(mFlags & NS_PARSER_FLAG_PENDING_CONTINUE_EVENT); +} + +void nsParser::HandleParserContinueEvent(nsParserContinueEvent* ev) { + // Ignore any revoked continue events... + if (mContinueEvent != ev) return; + + mFlags &= ~NS_PARSER_FLAG_PENDING_CONTINUE_EVENT; + mContinueEvent = nullptr; + + NS_ASSERTION(IsOkToProcessNetworkData(), + "Interrupted in the middle of a script?"); + ContinueInterruptedParsing(); +} + +bool nsParser::IsInsertionPointDefined() { return false; } + +void nsParser::IncrementScriptNestingLevel() {} + +void nsParser::DecrementScriptNestingLevel() {} + +bool nsParser::HasNonzeroScriptNestingLevel() const { return false; } + +bool nsParser::IsScriptCreated() { return false; } + +/** + * This is the main controlling routine in the parsing process. + * Note that it may get called multiple times for the same scanner, + * since this is a pushed based system, and all the tokens may + * not have been consumed by the scanner during a given invocation + * of this method. + */ +NS_IMETHODIMP +nsParser::Parse(nsIURI* aURL) { + MOZ_ASSERT(aURL, "Error: Null URL given"); + + if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { + // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED + // to avoid introducing unintentional changes to behavior. + return mInternalState; + } + + if (!aURL) { + return NS_ERROR_HTMLPARSER_BADURL; + } + + MOZ_ASSERT(!mParserContext, "We expect mParserContext to be null."); + + mParserContext = MakeUnique<CParserContext>(aURL, mCommand); + + return NS_OK; +} + +/** + * Used by XML fragment parsing below. + * + * @param aSourceBuffer contains a string-full of real content + */ +nsresult nsParser::Parse(const nsAString& aSourceBuffer, bool aLastCall) { + if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { + // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED + // to avoid introducing unintentional changes to behavior. + return mInternalState; + } + + // Don't bother if we're never going to parse this. + if (mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING) { + return NS_OK; + } + + if (!aLastCall && aSourceBuffer.IsEmpty()) { + // Nothing is being passed to the parser so return + // immediately. mUnusedInput will get processed when + // some data is actually passed in. + // But if this is the last call, make sure to finish up + // stuff correctly. + return NS_OK; + } + + // Maintain a reference to ourselves so we don't go away + // till we're completely done. + nsCOMPtr<nsIParser> kungFuDeathGrip(this); + + if (!mParserContext) { + // Only make a new context if we don't have one. + mParserContext = + MakeUnique<CParserContext>(mUnusedInput, mCommand, aLastCall); + + mUnusedInput.Truncate(); + } else if (aLastCall) { + // Set stream listener state to eOnStop, on the final context - Fix + // 68160, to guarantee DidBuildModel() call - Fix 36148 + mParserContext->mStreamListenerState = eOnStop; + mParserContext->mScanner.SetIncremental(false); + } + + mParserContext->mScanner.Append(aSourceBuffer); + return ResumeParse(false, false, false); +} + +nsresult nsParser::ParseFragment(const nsAString& aSourceBuffer, + nsTArray<nsString>& aTagStack) { + if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { + // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED + // to avoid introducing unintentional changes to behavior. + return mInternalState; + } + + nsresult result = NS_OK; + nsAutoString theContext; + uint32_t theCount = aTagStack.Length(); + uint32_t theIndex = 0; + + for (theIndex = 0; theIndex < theCount; theIndex++) { + theContext.Append('<'); + theContext.Append(aTagStack[theCount - theIndex - 1]); + theContext.Append('>'); + } + + if (theCount == 0) { + // Ensure that the buffer is not empty. Because none of the DTDs care + // about leading whitespace, this doesn't change the result. + theContext.Assign(' '); + } + + // First, parse the context to build up the DTD's tag stack. Note that we + // pass false for the aLastCall parameter. + result = Parse(theContext, false); + if (NS_FAILED(result)) { + return result; + } + + if (!mSink) { + // Parse must have failed in the XML case and so the sink was killed. + return NS_ERROR_HTMLPARSER_STOPPARSING; + } + + nsCOMPtr<nsIFragmentContentSink> fragSink = do_QueryInterface(mSink); + NS_ASSERTION(fragSink, "ParseFragment requires a fragment content sink"); + + fragSink->WillBuildContent(); + // Now, parse the actual content. Note that this is the last call + // for HTML content, but for XML, we will want to build and parse + // the end tags. However, if tagStack is empty, it's the last call + // for XML as well. + if (theCount == 0) { + result = Parse(aSourceBuffer, true); + fragSink->DidBuildContent(); + } else { + // Add an end tag chunk, so expat will read the whole source buffer, + // and not worry about ']]' etc. + result = Parse(aSourceBuffer + u"</"_ns, false); + fragSink->DidBuildContent(); + + if (NS_SUCCEEDED(result)) { + nsAutoString endContext; + for (theIndex = 0; theIndex < theCount; theIndex++) { + // we already added an end tag chunk above + if (theIndex > 0) { + endContext.AppendLiteral("</"); + } + + nsString& thisTag = aTagStack[theIndex]; + // was there an xmlns=? + int32_t endOfTag = thisTag.FindChar(char16_t(' ')); + if (endOfTag == -1) { + endContext.Append(thisTag); + } else { + endContext.Append(Substring(thisTag, 0, endOfTag)); + } + + endContext.Append('>'); + } + + result = Parse(endContext, true); + } + } + + mParserContext.reset(); + + return result; +} + +/** + * This routine is called to cause the parser to continue parsing its + * underlying stream. This call allows the parse process to happen in + * chunks, such as when the content is push based, and we need to parse in + * pieces. + * + * An interesting change in how the parser gets used has led us to add extra + * processing to this method. The case occurs when the parser is blocked in + * one context, and gets a parse(string) call in another context. In this + * case, the parserContexts are linked. No problem. + * + * The problem is that Parse(string) assumes that it can proceed unabated, + * but if the parser is already blocked that assumption is false. So we + * needed to add a mechanism here to allow the parser to continue to process + * (the pop and free) contexts until 1) it get's blocked again; 2) it runs + * out of contexts. + * + * + * @param allowItertion : set to true if non-script resumption is requested + * @param aIsFinalChunk : tells us when the last chunk of data is provided. + * @return error code -- 0 if ok, non-zero if error. + */ +nsresult nsParser::ResumeParse(bool allowIteration, bool aIsFinalChunk, + bool aCanInterrupt) { + if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { + // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED + // to avoid introducing unintentional changes to behavior. + return mInternalState; + } + + nsresult result = NS_OK; + + if (!mBlocked && mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { + result = WillBuildModel(); + if (NS_FAILED(result)) { + mFlags &= ~NS_PARSER_FLAG_CAN_TOKENIZE; + return result; + } + + if (mDTD) { + mSink->WillResume(); + bool theIterationIsOk = true; + + while (result == NS_OK && theIterationIsOk) { + if (!mUnusedInput.IsEmpty()) { + // -- Ref: Bug# 22485 -- + // Insert the unused input into the source buffer + // as if it was read from the input stream. + // Adding UngetReadable() per vidur!! + mParserContext->mScanner.UngetReadable(mUnusedInput); + mUnusedInput.Truncate(0); + } + + // Only allow parsing to be interrupted in the subsequent call to + // build model. + nsresult theTokenizerResult; + if (mFlags & NS_PARSER_FLAG_CAN_TOKENIZE) { + mParserContext->mScanner.Mark(); + if (mParserContext->mDocType == eXML && + mParserContext->mParserCommand != eViewSource) { + nsExpatDriver* expat = static_cast<nsExpatDriver*>(mDTD.get()); + theTokenizerResult = + expat->ResumeParse(mParserContext->mScanner, aIsFinalChunk); + if (NS_FAILED(theTokenizerResult)) { + mParserContext->mScanner.RewindToMark(); + if (NS_ERROR_HTMLPARSER_STOPPARSING == theTokenizerResult) { + theTokenizerResult = Terminate(); + mSink = nullptr; + } + } + } else { + // Nothing to do for non-XML. Note that this should only be + // about:blank at this point, we're also checking for view-source + // above, but that shouldn't end up here anymore. + theTokenizerResult = NS_ERROR_HTMLPARSER_EOF; + } + } else { + theTokenizerResult = NS_OK; + } + + result = mDTD->BuildModel(mSink); + if (result == NS_ERROR_HTMLPARSER_INTERRUPTED && aIsFinalChunk) { + PostContinueEvent(); + } + + theIterationIsOk = theTokenizerResult != NS_ERROR_HTMLPARSER_EOF && + result != NS_ERROR_HTMLPARSER_INTERRUPTED; + + // Make sure not to stop parsing too early. Therefore, before shutting + // down the parser, it's important to check whether the input buffer + // has been scanned to completion (theTokenizerResult should be kEOF). + // kEOF -> End of buffer. + + // If we're told the parser has been blocked, we disable all further + // parsing (and cache any data coming in) until the parser is + // re-enabled. + if (NS_ERROR_HTMLPARSER_BLOCK == result) { + mSink->WillInterrupt(); + return NS_OK; + } + if (NS_ERROR_HTMLPARSER_STOPPARSING == result) { + // Note: Parser Terminate() calls DidBuildModel. + if (mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) { + DidBuildModel(); + mInternalState = result; + } + + return NS_OK; + } + if (((NS_OK == result && + theTokenizerResult == NS_ERROR_HTMLPARSER_EOF) || + result == NS_ERROR_HTMLPARSER_INTERRUPTED) && + mParserContext->mStreamListenerState == eOnStop) { + DidBuildModel(); + return NS_OK; + } + + if (theTokenizerResult == NS_ERROR_HTMLPARSER_EOF || + result == NS_ERROR_HTMLPARSER_INTERRUPTED) { + result = (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; + mSink->WillInterrupt(); + } + } + } else { + mInternalState = result = NS_ERROR_HTMLPARSER_UNRESOLVEDDTD; + } + } + + return (result == NS_ERROR_HTMLPARSER_INTERRUPTED) ? NS_OK : result; +} + +/******************************************************************* + These methods are used to talk to the netlib system... + *******************************************************************/ + +nsresult nsParser::OnStartRequest(nsIRequest* request) { + if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { + // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED + // to avoid introducing unintentional changes to behavior. + return mInternalState; + } + + MOZ_ASSERT(eNone == mParserContext->mStreamListenerState, + "Parser's nsIStreamListener API was not setup " + "correctly in constructor."); + + mParserContext->mStreamListenerState = eOnStart; + mParserContext->mAutoDetectStatus = eUnknownDetect; + mParserContext->mRequest = request; + + mDTD = nullptr; + + nsresult rv; + nsAutoCString contentType; + nsCOMPtr<nsIChannel> channel = do_QueryInterface(request); + if (channel) { + rv = channel->GetContentType(contentType); + if (NS_SUCCEEDED(rv)) { + mParserContext->SetMimeType(contentType); + } + } + + rv = NS_OK; + + return rv; +} + +static bool ExtractCharsetFromXmlDeclaration(const unsigned char* aBytes, + int32_t aLen, + nsCString& oCharset) { + // This code is rather pointless to have. Might as well reuse expat as + // seen in nsHtml5StreamParser. -- hsivonen + oCharset.Truncate(); + if ((aLen >= 5) && ('<' == aBytes[0]) && ('?' == aBytes[1]) && + ('x' == aBytes[2]) && ('m' == aBytes[3]) && ('l' == aBytes[4])) { + int32_t i; + bool versionFound = false, encodingFound = false; + for (i = 6; i < aLen && !encodingFound; ++i) { + // end of XML declaration? + if ((((char*)aBytes)[i] == '?') && ((i + 1) < aLen) && + (((char*)aBytes)[i + 1] == '>')) { + break; + } + // Version is required. + if (!versionFound) { + // Want to avoid string comparisons, hence looking for 'n' + // and only if found check the string leading to it. Not + // foolproof, but fast. + // The shortest string allowed before this is (strlen==13): + // <?xml version + if ((((char*)aBytes)[i] == 'n') && (i >= 12) && + (0 == strncmp("versio", (char*)(aBytes + i - 6), 6))) { + // Fast forward through version + char q = 0; + for (++i; i < aLen; ++i) { + char qi = ((char*)aBytes)[i]; + if (qi == '\'' || qi == '"') { + if (q && q == qi) { + // ending quote + versionFound = true; + break; + } else { + // Starting quote + q = qi; + } + } + } + } + } else { + // encoding must follow version + // Want to avoid string comparisons, hence looking for 'g' + // and only if found check the string leading to it. Not + // foolproof, but fast. + // The shortest allowed string before this (strlen==26): + // <?xml version="1" encoding + if ((((char*)aBytes)[i] == 'g') && (i >= 25) && + (0 == strncmp("encodin", (char*)(aBytes + i - 7), 7))) { + int32_t encStart = 0; + char q = 0; + for (++i; i < aLen; ++i) { + char qi = ((char*)aBytes)[i]; + if (qi == '\'' || qi == '"') { + if (q && q == qi) { + int32_t count = i - encStart; + // encoding value is invalid if it is UTF-16 + if (count > 0 && + PL_strncasecmp("UTF-16", (char*)(aBytes + encStart), + count)) { + oCharset.Assign((char*)(aBytes + encStart), count); + } + encodingFound = true; + break; + } else { + encStart = i + 1; + q = qi; + } + } + } + } + } // if (!versionFound) + } // for + } + return !oCharset.IsEmpty(); +} + +inline char GetNextChar(nsACString::const_iterator& aStart, + nsACString::const_iterator& aEnd) { + NS_ASSERTION(aStart != aEnd, "end of buffer"); + return (++aStart != aEnd) ? *aStart : '\0'; +} + +static nsresult NoOpParserWriteFunc(nsIInputStream* in, void* closure, + const char* fromRawSegment, + uint32_t toOffset, uint32_t count, + uint32_t* writeCount) { + *writeCount = count; + return NS_OK; +} + +typedef struct { + bool mNeedCharsetCheck; + nsParser* mParser; + nsScanner* mScanner; + nsIRequest* mRequest; +} ParserWriteStruct; + +/* + * This function is invoked as a result of a call to a stream's + * ReadSegments() method. It is called for each contiguous buffer + * of data in the underlying stream or pipe. Using ReadSegments + * allows us to avoid copying data to read out of the stream. + */ +static nsresult ParserWriteFunc(nsIInputStream* in, void* closure, + const char* fromRawSegment, uint32_t toOffset, + uint32_t count, uint32_t* writeCount) { + nsresult result; + ParserWriteStruct* pws = static_cast<ParserWriteStruct*>(closure); + const unsigned char* buf = + reinterpret_cast<const unsigned char*>(fromRawSegment); + uint32_t theNumRead = count; + + if (!pws) { + return NS_ERROR_FAILURE; + } + + if (pws->mNeedCharsetCheck) { + pws->mNeedCharsetCheck = false; + int32_t source; + auto preferred = pws->mParser->GetDocumentCharset(source); + + // This code was bogus when I found it. It expects the BOM or the XML + // declaration to be entirely in the first network buffer. -- hsivonen + const Encoding* encoding; + std::tie(encoding, std::ignore) = Encoding::ForBOM(Span(buf, count)); + if (encoding) { + // The decoder will swallow the BOM. The UTF-16 will re-sniff for + // endianness. The value of preferred is now "UTF-8", "UTF-16LE" + // or "UTF-16BE". + preferred = WrapNotNull(encoding); + source = kCharsetFromByteOrderMark; + } else if (source < kCharsetFromChannel) { + nsAutoCString declCharset; + + if (ExtractCharsetFromXmlDeclaration(buf, count, declCharset)) { + encoding = Encoding::ForLabel(declCharset); + if (encoding) { + preferred = WrapNotNull(encoding); + source = kCharsetFromMetaTag; + } + } + } + + pws->mParser->SetDocumentCharset(preferred, source, false); + pws->mParser->SetSinkCharset(preferred); + } + + result = pws->mScanner->Append(fromRawSegment, theNumRead); + if (NS_SUCCEEDED(result)) { + *writeCount = count; + } + + return result; +} + +nsresult nsParser::OnDataAvailable(nsIRequest* request, + nsIInputStream* pIStream, + uint64_t sourceOffset, uint32_t aLength) { + if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { + // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED + // to avoid introducing unintentional changes to behavior. + return mInternalState; + } + + MOZ_ASSERT((eOnStart == mParserContext->mStreamListenerState || + eOnDataAvail == mParserContext->mStreamListenerState), + "Error: OnStartRequest() must be called before OnDataAvailable()"); + MOZ_ASSERT(NS_InputStreamIsBuffered(pIStream), + "Must have a buffered input stream"); + + nsresult rv = NS_OK; + + if (mIsAboutBlank) { + MOZ_ASSERT(false, "Must not get OnDataAvailable for about:blank"); + // ... but if an extension tries to feed us data for about:blank in a + // release build, silently ignore the data. + uint32_t totalRead; + rv = pIStream->ReadSegments(NoOpParserWriteFunc, nullptr, aLength, + &totalRead); + return rv; + } + + if (mParserContext->mRequest == request) { + mParserContext->mStreamListenerState = eOnDataAvail; + + uint32_t totalRead; + ParserWriteStruct pws; + pws.mNeedCharsetCheck = true; + pws.mParser = this; + pws.mScanner = &mParserContext->mScanner; + pws.mRequest = request; + + rv = pIStream->ReadSegments(ParserWriteFunc, &pws, aLength, &totalRead); + if (NS_FAILED(rv)) { + return rv; + } + + if (IsOkToProcessNetworkData()) { + nsCOMPtr<nsIParser> kungFuDeathGrip(this); + nsCOMPtr<nsIContentSink> sinkDeathGrip(mSink); + mProcessingNetworkData = true; + if (sinkDeathGrip) { + sinkDeathGrip->WillParse(); + } + rv = ResumeParse(); + mProcessingNetworkData = false; + } + } else { + rv = NS_ERROR_UNEXPECTED; + } + + return rv; +} + +/** + * This is called by the networking library once the last block of data + * has been collected from the net. + */ +nsresult nsParser::OnStopRequest(nsIRequest* request, nsresult status) { + if (mInternalState == NS_ERROR_OUT_OF_MEMORY) { + // Checking NS_ERROR_OUT_OF_MEMORY instead of NS_FAILED + // to avoid introducing unintentional changes to behavior. + return mInternalState; + } + + nsresult rv = NS_OK; + + if (mParserContext->mRequest == request) { + mParserContext->mStreamListenerState = eOnStop; + mParserContext->mScanner.SetIncremental(false); + } + + mStreamStatus = status; + + if (IsOkToProcessNetworkData() && NS_SUCCEEDED(rv)) { + mProcessingNetworkData = true; + if (mSink) { + mSink->WillParse(); + } + rv = ResumeParse(true, true); + mProcessingNetworkData = false; + } + + // If the parser isn't enabled, we don't finish parsing till + // it is reenabled. + + return rv; +} + +/** + * Get this as nsIStreamListener + */ +nsIStreamListener* nsParser::GetStreamListener() { return this; } |