diff options
Diffstat (limited to 'dom/serializers/nsXMLContentSerializer.h')
-rw-r--r-- | dom/serializers/nsXMLContentSerializer.h | 440 |
1 files changed, 440 insertions, 0 deletions
diff --git a/dom/serializers/nsXMLContentSerializer.h b/dom/serializers/nsXMLContentSerializer.h new file mode 100644 index 0000000000..167255fe09 --- /dev/null +++ b/dom/serializers/nsXMLContentSerializer.h @@ -0,0 +1,440 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * nsIContentSerializer implementation that can be used with an + * nsIDocumentEncoder to convert an XML DOM to an XML string that + * could be parsed into more or less the original DOM. + */ + +#ifndef nsXMLContentSerializer_h__ +#define nsXMLContentSerializer_h__ + +#include "mozilla/Attributes.h" +#include "nsIContentSerializer.h" +#include "nsISupportsUtils.h" +#include "nsCOMPtr.h" +#include "nsTArray.h" +#include "nsString.h" + +#define kIndentStr u" "_ns +#define kEndTag u"</"_ns + +class nsAtom; +class nsINode; + +namespace mozilla { +class Encoding; +} + +class nsXMLContentSerializer : public nsIContentSerializer { + public: + nsXMLContentSerializer(); + + NS_DECL_ISUPPORTS + + NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn, + const mozilla::Encoding* aEncoding, bool aIsCopying, + bool aRewriteEncodingDeclaration, + bool* aNeedsPreformatScanning, nsAString& aOutput) override; + + NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset, + int32_t aEndOffset) override; + + NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, int32_t aStartOffset, + int32_t aEndOffset) override; + + NS_IMETHOD AppendProcessingInstruction( + mozilla::dom::ProcessingInstruction* aPI, int32_t aStartOffset, + int32_t aEndOffset) override; + + NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment, + int32_t aStartOffset, int32_t aEndOffset) override; + + NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype) override; + + NS_IMETHOD AppendElementStart( + mozilla::dom::Element* aElement, + mozilla::dom::Element* aOriginalElement) override; + + NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement, + mozilla::dom::Element* aOriginalElement) override; + + NS_IMETHOD FlushAndFinish() override { return NS_OK; } + + NS_IMETHOD Finish() override; + + NS_IMETHOD GetOutputLength(uint32_t& aLength) const override; + + NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) override; + + NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override { + return NS_OK; + } + NS_IMETHOD ForgetElementForPreformat( + mozilla::dom::Element* aElement) override { + return NS_OK; + } + + protected: + virtual ~nsXMLContentSerializer(); + + /** + * Appends a char16_t character and increments the column position + */ + [[nodiscard]] bool AppendToString(const char16_t aChar, + nsAString& aOutputStr); + + /** + * Appends a nsAString string and increments the column position + */ + [[nodiscard]] bool AppendToString(const nsAString& aStr, + nsAString& aOutputStr); + + /** + * Appends a string by replacing all line-endings + * by mLineBreak, except in the case of raw output. + * It increments the column position. + */ + [[nodiscard]] bool AppendToStringConvertLF(const nsAString& aStr, + nsAString& aOutputStr); + + /** + * Appends a string by wrapping it when necessary. + * It updates the column position. + */ + [[nodiscard]] bool AppendToStringWrapped(const nsAString& aStr, + nsAString& aOutputStr); + + /** + * Appends a string by formating and wrapping it when necessary + * It updates the column position. + */ + [[nodiscard]] bool AppendToStringFormatedWrapped(const nsAString& aStr, + nsAString& aOutputStr); + + // used by AppendToStringWrapped + [[nodiscard]] bool AppendWrapped_WhitespaceSequence( + nsAString::const_char_iterator& aPos, + const nsAString::const_char_iterator aEnd, + const nsAString::const_char_iterator aSequenceStart, + nsAString& aOutputStr); + + // used by AppendToStringFormatedWrapped + [[nodiscard]] bool AppendFormatedWrapped_WhitespaceSequence( + nsAString::const_char_iterator& aPos, + const nsAString::const_char_iterator aEnd, + const nsAString::const_char_iterator aSequenceStart, + bool& aMayIgnoreStartOfLineWhitespaceSequence, nsAString& aOutputStr); + + // used by AppendToStringWrapped and AppendToStringFormatedWrapped + [[nodiscard]] bool AppendWrapped_NonWhitespaceSequence( + nsAString::const_char_iterator& aPos, + const nsAString::const_char_iterator aEnd, + const nsAString::const_char_iterator aSequenceStart, + bool& aMayIgnoreStartOfLineWhitespaceSequence, + bool& aSequenceStartAfterAWhiteSpace, nsAString& aOutputStr); + + /** + * add mLineBreak to the string + * It updates the column position and other flags. + */ + [[nodiscard]] bool AppendNewLineToString(nsAString& aOutputStr); + + /** + * Appends a string by translating entities + * It doesn't increment the column position + */ + [[nodiscard]] virtual bool AppendAndTranslateEntities(const nsAString& aStr, + nsAString& aOutputStr); + + /** + * Helper for virtual AppendAndTranslateEntities that does the actualy work. + * + * Do not call this directly. Call it via the template helper below. + */ + private: + [[nodiscard]] static bool AppendAndTranslateEntities( + const nsAString& aStr, nsAString& aOutputStr, + const uint8_t aEntityTable[], uint16_t aMaxTableIndex, + const char* const aStringTable[]); + + protected: + /** + * Helper for calling AppendAndTranslateEntities in a way that guarantees we + * don't mess up our aEntityTable sizing. This is a bit more complicated than + * it could be, becaue sometimes we don't want to use all of aEntityTable, so + * we have to allow passing the amount to use independently. But we can + * statically ensure it's not too big. + * + * The first integer template argument, which callers need to specify + * explicitly, is the index of the last entry in aEntityTable that should be + * considered for encoding as an entity reference. The second integer + * argument will be deduced from the actual table passed in. + * + * aEntityTable contains as values indices into aStringTable. Those represent + * the strings that should be used to replace the characters that are used to + * index into aEntityTable. aStringTable[0] should be nullptr, and characters + * that do not need replacement should map to 0 in aEntityTable. + */ + template <uint16_t LargestIndex, uint16_t TableLength> + [[nodiscard]] bool AppendAndTranslateEntities( + const nsAString& aStr, nsAString& aOutputStr, + const uint8_t (&aEntityTable)[TableLength], + const char* const aStringTable[]) { + static_assert(LargestIndex < TableLength, + "Largest allowed index must be smaller than table length"); + return AppendAndTranslateEntities(aStr, aOutputStr, aEntityTable, + LargestIndex, aStringTable); + } + + /** + * Max index that can be used with some of our entity tables. + */ + static const uint16_t kGTVal = 62; + + /** + * retrieve the text content of the node and append it to the given string + * It doesn't increment the column position + */ + nsresult AppendTextData(nsIContent* aNode, int32_t aStartOffset, + int32_t aEndOffset, nsAString& aStr, + bool aTranslateEntities); + + virtual nsresult PushNameSpaceDecl(const nsAString& aPrefix, + const nsAString& aURI, nsIContent* aOwner); + void PopNameSpaceDeclsFor(nsIContent* aOwner); + + /** + * The problem that ConfirmPrefix fixes is that anyone can insert nodes + * through the DOM that have a namespace URI and a random or empty or + * previously existing prefix that's totally unrelated to the prefixes + * declared at that point through xmlns attributes. So what ConfirmPrefix + * does is ensure that we can map aPrefix to the namespace URI aURI (for + * example, that the prefix is not already mapped to some other namespace). + * aPrefix will be adjusted, if necessary, so the value of the prefix + * _after_ this call is what should be serialized. + * @param aPrefix the prefix that may need adjusting + * @param aURI the namespace URI we want aPrefix to point to + * @param aElement the element we're working with (needed for proper default + * namespace handling) + * @param aIsAttribute true if we're confirming a prefix for an attribute. + * @return true if we need to push the (prefix, uri) pair on the namespace + * stack (note that this can happen even if the prefix is + * empty). + */ + bool ConfirmPrefix(nsAString& aPrefix, const nsAString& aURI, + nsIContent* aElement, bool aIsAttribute); + /** + * GenerateNewPrefix generates a new prefix and writes it to aPrefix + */ + void GenerateNewPrefix(nsAString& aPrefix); + + uint32_t ScanNamespaceDeclarations(mozilla::dom::Element* aContent, + mozilla::dom::Element* aOriginalElement, + const nsAString& aTagNamespaceURI); + + [[nodiscard]] virtual bool SerializeAttributes( + mozilla::dom::Element* aContent, mozilla::dom::Element* aOriginalElement, + nsAString& aTagPrefix, const nsAString& aTagNamespaceURI, + nsAtom* aTagName, nsAString& aStr, uint32_t aSkipAttr, bool aAddNSAttr); + + [[nodiscard]] bool SerializeAttr(const nsAString& aPrefix, + const nsAString& aName, + const nsAString& aValue, nsAString& aStr, + bool aDoEscapeEntities); + + bool IsJavaScript(nsIContent* aContent, nsAtom* aAttrNameAtom, + int32_t aAttrNamespaceID, const nsAString& aValueString); + + /** + * This method can be redefined to check if the element can be serialized. + * It is called when the serialization of the start tag is asked + * (AppendElementStart) + * In this method you can also force the formating + * by setting aForceFormat to true. + * @return boolean true if the element can be output + */ + virtual bool CheckElementStart(mozilla::dom::Element* aElement, + bool& aForceFormat, nsAString& aStr, + nsresult& aResult); + + /** + * This method is responsible for appending the '>' at the end of the start + * tag, possibly preceded by '/' and maybe a ' ' before that too. + * + * aElement and aOriginalElement are the same as the corresponding arguments + * to AppendElementStart. + */ + [[nodiscard]] bool AppendEndOfElementStart( + mozilla::dom::Element* aEleemnt, mozilla::dom::Element* aOriginalElement, + nsAString& aStr); + + /** + * This method can be redefine to serialize additional things just after + * the serialization of the start tag. + * (called at the end of AppendElementStart) + */ + [[nodiscard]] virtual bool AfterElementStart(nsIContent* aContent, + nsIContent* aOriginalElement, + nsAString& aStr) { + return true; + }; + + /** + * This method can be redefined to check if the element can be serialized. + * It is called when the serialization of the end tag is asked + * (AppendElementEnd) + * In this method you can also force the formating + * by setting aForceFormat to true. + * @return boolean true if the element can be output + */ + virtual bool CheckElementEnd(mozilla::dom::Element* aElement, + mozilla::dom::Element* aOriginalElement, + bool& aForceFormat, nsAString& aStr); + + /** + * This method can be redefine to serialize additional things just after + * the serialization of the end tag. + * (called at the end of AppendElementStart) + */ + virtual void AfterElementEnd(nsIContent* aContent, nsAString& aStr){}; + + /** + * Returns true if a line break should be inserted before an element open tag + */ + virtual bool LineBreakBeforeOpen(int32_t aNamespaceID, nsAtom* aName); + + /** + * Returns true if a line break should be inserted after an element open tag + */ + virtual bool LineBreakAfterOpen(int32_t aNamespaceID, nsAtom* aName); + + /** + * Returns true if a line break should be inserted after an element close tag + */ + virtual bool LineBreakBeforeClose(int32_t aNamespaceID, nsAtom* aName); + + /** + * Returns true if a line break should be inserted after an element close tag + */ + virtual bool LineBreakAfterClose(int32_t aNamespaceID, nsAtom* aName); + + /** + * add intendation. Call only in the case of formating and if the current + * position is at 0. It updates the column position. + */ + [[nodiscard]] bool AppendIndentation(nsAString& aStr); + + [[nodiscard]] bool IncrIndentation(nsAtom* aName); + void DecrIndentation(nsAtom* aName); + + // Functions to check for newlines that needs to be added between nodes in + // the root of a document. See mAddNewlineForRootNode + [[nodiscard]] bool MaybeAddNewlineForRootNode(nsAString& aStr); + void MaybeFlagNewlineForRootNode(nsINode* aNode); + + // Functions to check if we enter in or leave from a preformated content + virtual void MaybeEnterInPreContent(nsIContent* aNode); + virtual void MaybeLeaveFromPreContent(nsIContent* aNode); + + bool ShouldMaintainPreLevel() const; + int32_t PreLevel() const { + MOZ_ASSERT(ShouldMaintainPreLevel()); + return mPreLevel; + } + int32_t& PreLevel() { + MOZ_ASSERT(ShouldMaintainPreLevel()); + return mPreLevel; + } + + bool MaybeSerializeIsValue(mozilla::dom::Element* aElement, nsAString& aStr); + + int32_t mPrefixIndex; + + struct NameSpaceDecl { + nsString mPrefix; + nsString mURI; + nsIContent* mOwner; + }; + + nsTArray<NameSpaceDecl> mNameSpaceStack; + + // nsIDocumentEncoder flags + MOZ_INIT_OUTSIDE_CTOR uint32_t mFlags; + + // characters to use for line break + nsString mLineBreak; + + // The charset that was passed to Init() + nsCString mCharset; + + // current column position on the current line + uint32_t mColPos; + + // true = pretty formating should be done (OutputFormated flag) + MOZ_INIT_OUTSIDE_CTOR bool mDoFormat; + + // true = no formatting,(OutputRaw flag) + // no newline convertion and no rewrap long lines even if OutputWrap is set. + MOZ_INIT_OUTSIDE_CTOR bool mDoRaw; + + // true = wrapping should be done (OutputWrap flag) + MOZ_INIT_OUTSIDE_CTOR bool mDoWrap; + + // true = we can break lines (OutputDisallowLineBreaking flag) + MOZ_INIT_OUTSIDE_CTOR bool mAllowLineBreaking; + + // number of maximum column in a line, in the wrap mode + MOZ_INIT_OUTSIDE_CTOR uint32_t mMaxColumn; + + // current indent value + nsString mIndent; + + // this is the indentation level after the indentation reached + // the maximum length of indentation + int32_t mIndentOverflow; + + // says if the indentation has been already added on the current line + bool mIsIndentationAddedOnCurrentLine; + + // the string which is currently added is in an attribute + bool mInAttribute; + + // true = a newline character should be added. It's only + // useful when serializing root nodes. see MaybeAddNewlineForRootNode and + // MaybeFlagNewlineForRootNode + bool mAddNewlineForRootNode; + + // Indicates that a space will be added if and only if content is + // continued on the same line while serializing source. Otherwise, + // the newline character acts as the whitespace and no space is needed. + // used when mDoFormat = true + bool mAddSpace; + + // says that if the next string to add contains a newline character at the + // begining, then this newline character should be ignored, because a + // such character has already been added into the output string + bool mMayIgnoreLineBreakSequence; + + bool mBodyOnly; + int32_t mInBody; + + // Non-owning. + nsAString* mOutput; + + private: + // number of nested elements which have preformated content + MOZ_INIT_OUTSIDE_CTOR int32_t mPreLevel; + + static const uint8_t kEntities[]; + static const uint8_t kAttrEntities[]; + static const char* const kEntityStrings[]; +}; + +nsresult NS_NewXMLContentSerializer(nsIContentSerializer** aSerializer); + +#endif |