256 lines
11 KiB
C++
256 lines
11 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#ifndef NS_EXPAT_DRIVER__
|
|
#define NS_EXPAT_DRIVER__
|
|
|
|
#include "expat_config.h"
|
|
#include "moz_expat.h"
|
|
#include "nsCOMPtr.h"
|
|
#include "nsString.h"
|
|
#include "nsIDTD.h"
|
|
#include "nsIInputStream.h"
|
|
#include "nsIParser.h"
|
|
#include "nsCycleCollectionParticipant.h"
|
|
#include "nsScanner.h"
|
|
|
|
#include "rlbox_expat.h"
|
|
#include "nsRLBoxExpatDriver.h"
|
|
#include "mozilla/UniquePtr.h"
|
|
|
|
class nsIExpatSink;
|
|
struct nsCatalogData;
|
|
class RLBoxExpatSandboxData;
|
|
namespace mozilla {
|
|
template <typename, size_t>
|
|
class Array;
|
|
}
|
|
|
|
class nsExpatDriver : public nsIDTD {
|
|
virtual ~nsExpatDriver();
|
|
|
|
public:
|
|
NS_DECL_CYCLE_COLLECTING_ISUPPORTS_FINAL
|
|
NS_DECL_NSIDTD
|
|
NS_DECL_CYCLE_COLLECTION_CLASS(nsExpatDriver)
|
|
|
|
nsExpatDriver();
|
|
|
|
nsresult Initialize(nsIURI* aURI, nsIContentSink* aSink);
|
|
|
|
nsresult ResumeParse(nsScanner& aScanner, bool aIsFinalChunk);
|
|
|
|
int HandleExternalEntityRef(const char16_t* aOpenEntityNames,
|
|
const char16_t* aBase, const char16_t* aSystemId,
|
|
const char16_t* aPublicId);
|
|
static void HandleStartElement(rlbox_sandbox_expat& aSandbox,
|
|
tainted_expat<void*> aUserData,
|
|
tainted_expat<const char16_t*> aName,
|
|
tainted_expat<const char16_t**> aAtts);
|
|
static void HandleStartElementForSystemPrincipal(
|
|
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
|
|
tainted_expat<const char16_t*> aName,
|
|
tainted_expat<const char16_t**> aAtts);
|
|
static void HandleEndElement(rlbox_sandbox_expat& aSandbox,
|
|
tainted_expat<void*> aUserData,
|
|
tainted_expat<const char16_t*> aName);
|
|
static void HandleEndElementForSystemPrincipal(
|
|
rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
|
|
tainted_expat<const char16_t*> aName);
|
|
nsresult HandleCharacterData(const char16_t* aCData, const uint32_t aLength);
|
|
nsresult HandleComment(const char16_t* aName);
|
|
nsresult HandleProcessingInstruction(const char16_t* aTarget,
|
|
const char16_t* aData);
|
|
nsresult HandleXMLDeclaration(const char16_t* aVersion,
|
|
const char16_t* aEncoding, int32_t aStandalone);
|
|
nsresult HandleDefault(const char16_t* aData, const uint32_t aLength);
|
|
nsresult HandleStartCdataSection();
|
|
nsresult HandleEndCdataSection();
|
|
nsresult HandleStartDoctypeDecl(const char16_t* aDoctypeName,
|
|
const char16_t* aSysid,
|
|
const char16_t* aPubid,
|
|
bool aHasInternalSubset);
|
|
nsresult HandleEndDoctypeDecl();
|
|
|
|
private:
|
|
// Load up an external stream to get external entity information
|
|
nsresult OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
|
|
const char16_t* aURLStr,
|
|
nsIURI* aBaseURI,
|
|
nsIInputStream** aStream,
|
|
nsIURI** aAbsURI);
|
|
|
|
enum class ChunkOrBufferIsFinal {
|
|
None,
|
|
FinalChunk,
|
|
FinalChunkAndBuffer,
|
|
};
|
|
|
|
/**
|
|
* Pass a buffer to Expat. If Expat is blocked aBuffer should be null and
|
|
* aLength should be 0. The result of the call will be stored in
|
|
* mInternalState. Expat will parse as much of the buffer as it can and store
|
|
* the rest in its internal buffer.
|
|
*
|
|
* @param aBuffer the buffer to pass to Expat. May be null.
|
|
* @param aLength the length of the buffer to pass to Expat (in number of
|
|
* char16_t's). Must be 0 if aBuffer is null and > 0 if
|
|
* aBuffer is not null.
|
|
* @param aIsFinal whether this is the last chunk in a row passed to
|
|
* ParseChunk, and if so whether it's the last chunk and
|
|
* buffer passed to ParseChunk (meaning there will be no more
|
|
* calls to ParseChunk for the document being parsed).
|
|
* @param aConsumed [out] the number of PRUnichars that Expat consumed. This
|
|
* doesn't include the PRUnichars that Expat stored in
|
|
* its buffer but didn't parse yet.
|
|
* @param aLastLineLength [out] the length of the last line that Expat has
|
|
* consumed. This will only be computed if
|
|
* aIsFinal is not None or mInternalState is set
|
|
* to a failure.
|
|
*/
|
|
void ParseChunk(const char16_t* aBuffer, uint32_t aLength,
|
|
ChunkOrBufferIsFinal aIsFinal, uint32_t* aConsumed,
|
|
XML_Size* aLastLineLength);
|
|
/**
|
|
* Wrapper for ParseBuffer. If the buffer is too large to be copied into the
|
|
* sandbox all at once, splits it into chunks and invokes ParseBuffer in a
|
|
* loop.
|
|
*
|
|
* @param aBuffer the buffer to pass to Expat. May be null.
|
|
* @param aLength the length of the buffer to pass to Expat (in number of
|
|
* char16_t's). Must be 0 if aBuffer is null and > 0 if
|
|
* aBuffer is not null.
|
|
* @param aIsFinal whether there will definitely not be any more new buffers
|
|
* passed in to ParseBuffer
|
|
* @param aConsumed [out] the number of PRUnichars that Expat consumed. This
|
|
* doesn't include the PRUnichars that Expat stored in
|
|
* its buffer but didn't parse yet.
|
|
* @param aLastLineLength [out] the length of the last line that Expat has
|
|
* consumed.
|
|
*/
|
|
void ChunkAndParseBuffer(const char16_t* aBuffer, uint32_t aLength,
|
|
bool aIsFinal, uint32_t* aPassedToExpat,
|
|
uint32_t* aConsumed, XML_Size* aLastLineLength);
|
|
|
|
nsresult HandleError();
|
|
|
|
void MaybeStopParser(nsresult aState);
|
|
|
|
bool BlockedOrInterrupted() {
|
|
return mInternalState == NS_ERROR_HTMLPARSER_BLOCK ||
|
|
mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED;
|
|
}
|
|
|
|
// Expat allows us to set the base URI for entities. It doesn't use the base
|
|
// URI itself, but just passes it along to all the entity handlers (just the
|
|
// external entity reference handler for us). It does expect the base URI as a
|
|
// null-terminated string, with the same character type as the parsed buffers
|
|
// (char16_t in our case). Because nsIURI stores a UTF-8 string we have to do
|
|
// a conversion to UTF-16 for Expat. We also RLBox the Expat parser, so we
|
|
// also do 2 copies (into RLBox sandbox, and Expat does a copy into its pool).
|
|
// Most of the time this base URI is unused (the external entity handler is
|
|
// rarely called), but when it is we also convert it back to a nsIURI, so we
|
|
// convert the string back to UTF-8.
|
|
//
|
|
// We'd rather not do any of these conversions and copies, so we use a (hacky)
|
|
// workaround. We store all base URIs in an array of nsIURIs. Instead of
|
|
// passing the real URI to Expat as a string, we pass it a null-terminated
|
|
// 2-character buffer. The first character of that buffer stores the index of
|
|
// the corresponding nsIURI in the array (incremented with 1 because 0 is used
|
|
// to terminate a string). The entity handler can then use the index from the
|
|
// base URI that Expat passes it to look up the right nsIURI from the array.
|
|
//
|
|
// GetExpatBaseURI pushes the nsIURI to the array, and creates the
|
|
// two-character buffer for it.
|
|
//
|
|
// GetBaseURI looks up the right nsIURI in the array, based on the index from
|
|
// the two-character buffer.
|
|
using ExpatBaseURI = mozilla::Array<XML_Char, 2>;
|
|
ExpatBaseURI GetExpatBaseURI(nsIURI* aURI);
|
|
nsIURI* GetBaseURI(const XML_Char* aBase) const;
|
|
|
|
RLBoxExpatSandboxData* SandboxData() const;
|
|
rlbox_sandbox_expat* Sandbox() const;
|
|
|
|
// Destroy expat parser and return sandbox to pool
|
|
void Destroy();
|
|
|
|
mozilla::UniquePtr<mozilla::RLBoxSandboxPoolData> mSandboxPoolData;
|
|
tainted_expat<XML_Parser> mExpatParser;
|
|
|
|
nsString mLastLine;
|
|
nsString mCDataText;
|
|
// Various parts of a doctype
|
|
nsString mDoctypeName;
|
|
nsString mSystemID;
|
|
nsString mPublicID;
|
|
nsString mInternalSubset;
|
|
bool mInCData;
|
|
bool mInInternalSubset;
|
|
bool mInExternalDTD;
|
|
bool mMadeFinalCallToExpat;
|
|
|
|
// Used to track if we're in the parser.
|
|
bool mInParser;
|
|
|
|
nsresult mInternalState;
|
|
|
|
// The length of the data in Expat's buffer (in number of PRUnichars).
|
|
uint32_t mExpatBuffered;
|
|
|
|
uint16_t mTagDepth;
|
|
|
|
// These sinks all refer the same conceptual object. mOriginalSink is
|
|
// identical with the nsIContentSink* passed to WillBuildModel, and exists
|
|
// only to avoid QI-ing back to nsIContentSink*.
|
|
nsCOMPtr<nsIContentSink> mOriginalSink;
|
|
nsCOMPtr<nsIExpatSink> mSink;
|
|
|
|
const nsCatalogData* mCatalogData; // weak
|
|
nsTArray<nsCOMPtr<nsIURI>> mURIs;
|
|
|
|
// Used for error reporting.
|
|
uint64_t mInnerWindowID;
|
|
};
|
|
|
|
class RLBoxExpatSandboxData : public mozilla::RLBoxSandboxDataBase {
|
|
friend class RLBoxExpatSandboxPool;
|
|
friend class nsExpatDriver;
|
|
|
|
public:
|
|
explicit RLBoxExpatSandboxData(uint64_t aSize)
|
|
: mozilla::RLBoxSandboxDataBase(aSize) {
|
|
MOZ_COUNT_CTOR(RLBoxExpatSandboxData);
|
|
}
|
|
~RLBoxExpatSandboxData();
|
|
rlbox_sandbox_expat* Sandbox() const { return mSandbox.get(); }
|
|
// After getting a sandbox from the pool we need to register the
|
|
// Handle{Start,End}Element callbacks and associate the driver with the
|
|
// sandbox.
|
|
void AttachDriver(bool IsSystemPrincipal, void* aDriver);
|
|
void DetachDriver();
|
|
|
|
private:
|
|
mozilla::UniquePtr<rlbox_sandbox_expat> mSandbox;
|
|
// Common expat callbacks that persist across calls to {Attach,Detach}Driver,
|
|
// and consequently across sandbox reuses.
|
|
sandbox_callback_expat<XML_XmlDeclHandler> mHandleXMLDeclaration;
|
|
sandbox_callback_expat<XML_CharacterDataHandler> mHandleCharacterData;
|
|
sandbox_callback_expat<XML_ProcessingInstructionHandler>
|
|
mHandleProcessingInstruction;
|
|
sandbox_callback_expat<XML_DefaultHandler> mHandleDefault;
|
|
sandbox_callback_expat<XML_ExternalEntityRefHandler> mHandleExternalEntityRef;
|
|
sandbox_callback_expat<XML_CommentHandler> mHandleComment;
|
|
sandbox_callback_expat<XML_StartCdataSectionHandler> mHandleStartCdataSection;
|
|
sandbox_callback_expat<XML_EndCdataSectionHandler> mHandleEndCdataSection;
|
|
sandbox_callback_expat<XML_StartDoctypeDeclHandler> mHandleStartDoctypeDecl;
|
|
sandbox_callback_expat<XML_EndDoctypeDeclHandler> mHandleEndDoctypeDecl;
|
|
// Expat callbacks specific to each driver, and thus (re)set across sandbox
|
|
// reuses.
|
|
sandbox_callback_expat<XML_StartElementHandler> mHandleStartElement;
|
|
sandbox_callback_expat<XML_EndElementHandler> mHandleEndElement;
|
|
};
|
|
|
|
#endif
|