summaryrefslogtreecommitdiffstats
path: root/parser/htmlparser/nsExpatDriver.h
diff options
context:
space:
mode:
Diffstat (limited to 'parser/htmlparser/nsExpatDriver.h')
-rw-r--r--parser/htmlparser/nsExpatDriver.h255
1 files changed, 255 insertions, 0 deletions
diff --git a/parser/htmlparser/nsExpatDriver.h b/parser/htmlparser/nsExpatDriver.h
new file mode 100644
index 0000000000..78f9caf343
--- /dev/null
+++ b/parser/htmlparser/nsExpatDriver.h
@@ -0,0 +1,255 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef NS_EXPAT_DRIVER__
+#define NS_EXPAT_DRIVER__
+
+#include "expat_config.h"
+#include "expat.h"
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include "nsIDTD.h"
+#include "nsIInputStream.h"
+#include "nsIParser.h"
+#include "nsCycleCollectionParticipant.h"
+
+#include "rlbox_expat.h"
+#include "nsRLBoxExpatDriver.h"
+#include "mozilla/UniquePtr.h"
+
+class nsIExpatSink;
+struct nsCatalogData;
+class RLBoxExpatSandboxData;
+namespace mozilla {
+template <typename, size_t>
+class Array;
+}
+
+class nsExpatDriver : public nsIDTD {
+ virtual ~nsExpatDriver();
+
+ public:
+ NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL
+ NS_DECL_NSIDTD
+ NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver)
+
+ nsExpatDriver();
+
+ nsresult Initialize(nsIURI* aURI, nsIContentSink* aSink);
+
+ nsresult ResumeParse(nsScanner& aScanner, bool aIsFinalChunk);
+
+ int HandleExternalEntityRef(const char16_t* aOpenEntityNames,
+ const char16_t* aBase, const char16_t* aSystemId,
+ const char16_t* aPublicId);
+ static void HandleStartElement(rlbox_sandbox_expat& aSandbox,
+ tainted_expat<void*> aUserData,
+ tainted_expat<const char16_t*> aName,
+ tainted_expat<const char16_t**> aAtts);
+ static void HandleStartElementForSystemPrincipal(
+ rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
+ tainted_expat<const char16_t*> aName,
+ tainted_expat<const char16_t**> aAtts);
+ static void HandleEndElement(rlbox_sandbox_expat& aSandbox,
+ tainted_expat<void*> aUserData,
+ tainted_expat<const char16_t*> aName);
+ static void HandleEndElementForSystemPrincipal(
+ rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
+ tainted_expat<const char16_t*> aName);
+ nsresult HandleCharacterData(const char16_t* aCData, const uint32_t aLength);
+ nsresult HandleComment(const char16_t* aName);
+ nsresult HandleProcessingInstruction(const char16_t* aTarget,
+ const char16_t* aData);
+ nsresult HandleXMLDeclaration(const char16_t* aVersion,
+ const char16_t* aEncoding, int32_t aStandalone);
+ nsresult HandleDefault(const char16_t* aData, const uint32_t aLength);
+ nsresult HandleStartCdataSection();
+ nsresult HandleEndCdataSection();
+ nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName,
+ const char16_t* aSysid,
+ const char16_t* aPubid,
+ bool aHasInternalSubset);
+ nsresult HandleEndDoctypeDecl();
+
+ private:
+ // Load up an external stream to get external entity information
+ nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
+ const char16_t* aURLStr,
+ nsIURI* aBaseURI,
+ nsIInputStream** aStream,
+ nsIURI** aAbsURI);
+
+ enum class ChunkOrBufferIsFinal {
+ None,
+ FinalChunk,
+ FinalChunkAndBuffer,
+ };
+
+ /**
+ * Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
+ * aLength should be 0. The result of the call will be stored in
+ * mInternalState. Expat will parse as much of the buffer as it can and store
+ * the rest in its internal buffer.
+ *
+ * @param aBuffer the buffer to pass to Expat. May be null.
+ * @param aLength the length of the buffer to pass to Expat (in number of
+ * char16_t's). Must be 0 if aBuffer is null and > 0 if
+ * aBuffer is not null.
+ * @param aIsFinal whether this is the last chunk in a row passed to
+ * ParseChunk, and if so whether it's the last chunk and
+ * buffer passed to ParseChunk (meaning there will be no more
+ * calls to ParseChunk for the document being parsed).
+ * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
+ * doesn't include the PRUnichars that Expat stored in
+ * its buffer but didn't parse yet.
+ * @param aLastLineLength [out] the length of the last line that Expat has
+ * consumed. This will only be computed if
+ * aIsFinal is not None or mInternalState is set
+ * to a failure.
+ */
+ void ParseChunk(const char16_t* aBuffer, uint32_t aLength,
+ ChunkOrBufferIsFinal aIsFinal, uint32_t* aConsumed,
+ XML_Size* aLastLineLength);
+ /**
+ * Wrapper for ParseBuffer. If the buffer is too large to be copied into the
+ * sandbox all at once, splits it into chunks and invokes ParseBuffer in a
+ * loop.
+ *
+ * @param aBuffer the buffer to pass to Expat. May be null.
+ * @param aLength the length of the buffer to pass to Expat (in number of
+ * char16_t's). Must be 0 if aBuffer is null and > 0 if
+ * aBuffer is not null.
+ * @param aIsFinal whether there will definitely not be any more new buffers
+ * passed in to ParseBuffer
+ * @param aConsumed [out] the number of PRUnichars that Expat consumed. This
+ * doesn't include the PRUnichars that Expat stored in
+ * its buffer but didn't parse yet.
+ * @param aLastLineLength [out] the length of the last line that Expat has
+ * consumed.
+ */
+ void ChunkAndParseBuffer(const char16_t* aBuffer, uint32_t aLength,
+ bool aIsFinal, uint32_t* aPassedToExpat,
+ uint32_t* aConsumed, XML_Size* aLastLineLength);
+
+ nsresult HandleError();
+
+ void MaybeStopParser(nsresult aState);
+
+ bool BlockedOrInterrupted() {
+ return mInternalState == NS_ERROR_HTMLPARSER_BLOCK ||
+ mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED;
+ }
+
+ // Expat allows us to set the base URI for entities. It doesn't use the base
+ // URI itself, but just passes it along to all the entity handlers (just the
+ // external entity reference handler for us). It does expect the base URI as a
+ // null-terminated string, with the same character type as the parsed buffers
+ // (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do
+ // a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we
+ // also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool).
+ // Most of the time this base URI is unused (the external entity handler is
+ // rarely called), but when it is we also convert it back to a nsIURI, so we
+ // convert the string back to UTF-8.
+ //
+ // We'd rather not do any of these conversions and copies, so we use a (hacky)
+ // workaround. We store all base URIs in an array of nsIURIs. Instead of
+ // passing the real URI to Expat as a string, we pass it a null-terminated
+ // 2-character buffer. The first character of that buffer stores the index of
+ // the corresponding nsIURI in the array (incremented with 1 because 0 is used
+ // to terminate a string). The entity handler can then use the index from the
+ // base URI that Expat passes it to look up the right nsIURI from the array.
+ //
+ // GetExpatBaseURI pushes the nsIURI to the array, and creates the
+ // two-character buffer for it.
+ //
+ // GetBaseURI looks up the right nsIURI in the array, based on the index from
+ // the two-character buffer.
+ using ExpatBaseURI = mozilla::Array<XML_Char, 2>;
+ ExpatBaseURI GetExpatBaseURI(nsIURI* aURI);
+ nsIURI* GetBaseURI(const XML_Char* aBase) const;
+
+ RLBoxExpatSandboxData* SandboxData() const;
+ rlbox_sandbox_expat* Sandbox() const;
+
+ // Destroy expat parser and return sandbox to pool
+ void Destroy();
+
+ mozilla::UniquePtr<mozilla::RLBoxSandboxPoolData> mSandboxPoolData;
+ tainted_expat<XML_Parser> mExpatParser;
+
+ nsString mLastLine;
+ nsString mCDataText;
+ // Various parts of a doctype
+ nsString mDoctypeName;
+ nsString mSystemID;
+ nsString mPublicID;
+ nsString mInternalSubset;
+ bool mInCData;
+ bool mInInternalSubset;
+ bool mInExternalDTD;
+ bool mMadeFinalCallToExpat;
+
+ // Used to track if we're in the parser.
+ bool mInParser;
+
+ nsresult mInternalState;
+
+ // The length of the data in Expat's buffer (in number of PRUnichars).
+ uint32_t mExpatBuffered;
+
+ uint16_t mTagDepth;
+
+ // These sinks all refer the same conceptual object. mOriginalSink is
+ // identical with the nsIContentSink* passed to WillBuildModel, and exists
+ // only to avoid QI-ing back to nsIContentSink*.
+ nsCOMPtr<nsIContentSink> mOriginalSink;
+ nsCOMPtr<nsIExpatSink> mSink;
+
+ const nsCatalogData* mCatalogData; // weak
+ nsTArray<nsCOMPtr<nsIURI>> mURIs;
+
+ // Used for error reporting.
+ uint64_t mInnerWindowID;
+};
+
+class RLBoxExpatSandboxData : public mozilla::RLBoxSandboxDataBase {
+ friend class RLBoxExpatSandboxPool;
+ friend class nsExpatDriver;
+
+ public:
+ explicit RLBoxExpatSandboxData(uint64_t aSize)
+ : mozilla::RLBoxSandboxDataBase(aSize) {
+ MOZ_COUNT_CTOR(RLBoxExpatSandboxData);
+ }
+ ~RLBoxExpatSandboxData();
+ rlbox_sandbox_expat* Sandbox() const { return mSandbox.get(); }
+ // After getting a sandbox from the pool we need to register the
+ // Handle{Start,End}Element callbacks and associate the driver with the
+ // sandbox.
+ void AttachDriver(bool IsSystemPrincipal, void* aDriver);
+ void DetachDriver();
+
+ private:
+ mozilla::UniquePtr<rlbox_sandbox_expat> mSandbox;
+ // Common expat callbacks that persist across calls to {Attach,Detach}Driver,
+ // and consequently across sandbox reuses.
+ sandbox_callback_expat<XML_XmlDeclHandler> mHandleXMLDeclaration;
+ sandbox_callback_expat<XML_CharacterDataHandler> mHandleCharacterData;
+ sandbox_callback_expat<XML_ProcessingInstructionHandler>
+ mHandleProcessingInstruction;
+ sandbox_callback_expat<XML_DefaultHandler> mHandleDefault;
+ sandbox_callback_expat<XML_ExternalEntityRefHandler> mHandleExternalEntityRef;
+ sandbox_callback_expat<XML_CommentHandler> mHandleComment;
+ sandbox_callback_expat<XML_StartCdataSectionHandler> mHandleStartCdataSection;
+ sandbox_callback_expat<XML_EndCdataSectionHandler> mHandleEndCdataSection;
+ sandbox_callback_expat<XML_StartDoctypeDeclHandler> mHandleStartDoctypeDecl;
+ sandbox_callback_expat<XML_EndDoctypeDeclHandler> mHandleEndDoctypeDecl;
+ // Expat callbacks specific to each driver, and thus (re)set across sandbox
+ // reuses.
+ sandbox_callback_expat<XML_StartElementHandler> mHandleStartElement;
+ sandbox_callback_expat<XML_EndElementHandler> mHandleEndElement;
+};
+
+#endif