summaryrefslogtreecommitdiffstats
path: root/dom/serializers/nsXMLContentSerializer.h
diff options
context:
space:
mode:
Diffstat (limited to 'dom/serializers/nsXMLContentSerializer.h')
-rw-r--r--dom/serializers/nsXMLContentSerializer.h440
1 files changed, 440 insertions, 0 deletions
diff --git a/dom/serializers/nsXMLContentSerializer.h b/dom/serializers/nsXMLContentSerializer.h
new file mode 100644
index 0000000000..167255fe09
--- /dev/null
+++ b/dom/serializers/nsXMLContentSerializer.h
@@ -0,0 +1,440 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert an XML DOM to an XML string that
+ * could be parsed into more or less the original DOM.
+ */
+
+#ifndef nsXMLContentSerializer_h__
+#define nsXMLContentSerializer_h__
+
+#include "mozilla/Attributes.h"
+#include "nsIContentSerializer.h"
+#include "nsISupportsUtils.h"
+#include "nsCOMPtr.h"
+#include "nsTArray.h"
+#include "nsString.h"
+
+#define kIndentStr u" "_ns
+#define kEndTag u"</"_ns
+
+class nsAtom;
+class nsINode;
+
+namespace mozilla {
+class Encoding;
+}
+
+class nsXMLContentSerializer : public nsIContentSerializer {
+ public:
+ nsXMLContentSerializer();
+
+ NS_DECL_ISUPPORTS
+
+ NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
+ const mozilla::Encoding* aEncoding, bool aIsCopying,
+ bool aRewriteEncodingDeclaration,
+ bool* aNeedsPreformatScanning, nsAString& aOutput) override;
+
+ NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
+ int32_t aEndOffset) override;
+
+ NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, int32_t aStartOffset,
+ int32_t aEndOffset) override;
+
+ NS_IMETHOD AppendProcessingInstruction(
+ mozilla::dom::ProcessingInstruction* aPI, int32_t aStartOffset,
+ int32_t aEndOffset) override;
+
+ NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment,
+ int32_t aStartOffset, int32_t aEndOffset) override;
+
+ NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype) override;
+
+ NS_IMETHOD AppendElementStart(
+ mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) override;
+
+ NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) override;
+
+ NS_IMETHOD FlushAndFinish() override { return NS_OK; }
+
+ NS_IMETHOD Finish() override;
+
+ NS_IMETHOD GetOutputLength(uint32_t& aLength) const override;
+
+ NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) override;
+
+ NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override {
+ return NS_OK;
+ }
+ NS_IMETHOD ForgetElementForPreformat(
+ mozilla::dom::Element* aElement) override {
+ return NS_OK;
+ }
+
+ protected:
+ virtual ~nsXMLContentSerializer();
+
+ /**
+ * Appends a char16_t character and increments the column position
+ */
+ [[nodiscard]] bool AppendToString(const char16_t aChar,
+ nsAString& aOutputStr);
+
+ /**
+ * Appends a nsAString string and increments the column position
+ */
+ [[nodiscard]] bool AppendToString(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ /**
+ * Appends a string by replacing all line-endings
+ * by mLineBreak, except in the case of raw output.
+ * It increments the column position.
+ */
+ [[nodiscard]] bool AppendToStringConvertLF(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ /**
+ * Appends a string by wrapping it when necessary.
+ * It updates the column position.
+ */
+ [[nodiscard]] bool AppendToStringWrapped(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ /**
+ * Appends a string by formating and wrapping it when necessary
+ * It updates the column position.
+ */
+ [[nodiscard]] bool AppendToStringFormatedWrapped(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ // used by AppendToStringWrapped
+ [[nodiscard]] bool AppendWrapped_WhitespaceSequence(
+ nsAString::const_char_iterator& aPos,
+ const nsAString::const_char_iterator aEnd,
+ const nsAString::const_char_iterator aSequenceStart,
+ nsAString& aOutputStr);
+
+ // used by AppendToStringFormatedWrapped
+ [[nodiscard]] bool AppendFormatedWrapped_WhitespaceSequence(
+ nsAString::const_char_iterator& aPos,
+ const nsAString::const_char_iterator aEnd,
+ const nsAString::const_char_iterator aSequenceStart,
+ bool& aMayIgnoreStartOfLineWhitespaceSequence, nsAString& aOutputStr);
+
+ // used by AppendToStringWrapped and AppendToStringFormatedWrapped
+ [[nodiscard]] bool AppendWrapped_NonWhitespaceSequence(
+ nsAString::const_char_iterator& aPos,
+ const nsAString::const_char_iterator aEnd,
+ const nsAString::const_char_iterator aSequenceStart,
+ bool& aMayIgnoreStartOfLineWhitespaceSequence,
+ bool& aSequenceStartAfterAWhiteSpace, nsAString& aOutputStr);
+
+ /**
+ * add mLineBreak to the string
+ * It updates the column position and other flags.
+ */
+ [[nodiscard]] bool AppendNewLineToString(nsAString& aOutputStr);
+
+ /**
+ * Appends a string by translating entities
+ * It doesn't increment the column position
+ */
+ [[nodiscard]] virtual bool AppendAndTranslateEntities(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ /**
+ * Helper for virtual AppendAndTranslateEntities that does the actualy work.
+ *
+ * Do not call this directly. Call it via the template helper below.
+ */
+ private:
+ [[nodiscard]] static bool AppendAndTranslateEntities(
+ const nsAString& aStr, nsAString& aOutputStr,
+ const uint8_t aEntityTable[], uint16_t aMaxTableIndex,
+ const char* const aStringTable[]);
+
+ protected:
+ /**
+ * Helper for calling AppendAndTranslateEntities in a way that guarantees we
+ * don't mess up our aEntityTable sizing. This is a bit more complicated than
+ * it could be, becaue sometimes we don't want to use all of aEntityTable, so
+ * we have to allow passing the amount to use independently. But we can
+ * statically ensure it's not too big.
+ *
+ * The first integer template argument, which callers need to specify
+ * explicitly, is the index of the last entry in aEntityTable that should be
+ * considered for encoding as an entity reference. The second integer
+ * argument will be deduced from the actual table passed in.
+ *
+ * aEntityTable contains as values indices into aStringTable. Those represent
+ * the strings that should be used to replace the characters that are used to
+ * index into aEntityTable. aStringTable[0] should be nullptr, and characters
+ * that do not need replacement should map to 0 in aEntityTable.
+ */
+ template <uint16_t LargestIndex, uint16_t TableLength>
+ [[nodiscard]] bool AppendAndTranslateEntities(
+ const nsAString& aStr, nsAString& aOutputStr,
+ const uint8_t (&aEntityTable)[TableLength],
+ const char* const aStringTable[]) {
+ static_assert(LargestIndex < TableLength,
+ "Largest allowed index must be smaller than table length");
+ return AppendAndTranslateEntities(aStr, aOutputStr, aEntityTable,
+ LargestIndex, aStringTable);
+ }
+
+ /**
+ * Max index that can be used with some of our entity tables.
+ */
+ static const uint16_t kGTVal = 62;
+
+ /**
+ * retrieve the text content of the node and append it to the given string
+ * It doesn't increment the column position
+ */
+ nsresult AppendTextData(nsIContent* aNode, int32_t aStartOffset,
+ int32_t aEndOffset, nsAString& aStr,
+ bool aTranslateEntities);
+
+ virtual nsresult PushNameSpaceDecl(const nsAString& aPrefix,
+ const nsAString& aURI, nsIContent* aOwner);
+ void PopNameSpaceDeclsFor(nsIContent* aOwner);
+
+ /**
+ * The problem that ConfirmPrefix fixes is that anyone can insert nodes
+ * through the DOM that have a namespace URI and a random or empty or
+ * previously existing prefix that's totally unrelated to the prefixes
+ * declared at that point through xmlns attributes. So what ConfirmPrefix
+ * does is ensure that we can map aPrefix to the namespace URI aURI (for
+ * example, that the prefix is not already mapped to some other namespace).
+ * aPrefix will be adjusted, if necessary, so the value of the prefix
+ * _after_ this call is what should be serialized.
+ * @param aPrefix the prefix that may need adjusting
+ * @param aURI the namespace URI we want aPrefix to point to
+ * @param aElement the element we're working with (needed for proper default
+ * namespace handling)
+ * @param aIsAttribute true if we're confirming a prefix for an attribute.
+ * @return true if we need to push the (prefix, uri) pair on the namespace
+ * stack (note that this can happen even if the prefix is
+ * empty).
+ */
+ bool ConfirmPrefix(nsAString& aPrefix, const nsAString& aURI,
+ nsIContent* aElement, bool aIsAttribute);
+ /**
+ * GenerateNewPrefix generates a new prefix and writes it to aPrefix
+ */
+ void GenerateNewPrefix(nsAString& aPrefix);
+
+ uint32_t ScanNamespaceDeclarations(mozilla::dom::Element* aContent,
+ mozilla::dom::Element* aOriginalElement,
+ const nsAString& aTagNamespaceURI);
+
+ [[nodiscard]] virtual bool SerializeAttributes(
+ mozilla::dom::Element* aContent, mozilla::dom::Element* aOriginalElement,
+ nsAString& aTagPrefix, const nsAString& aTagNamespaceURI,
+ nsAtom* aTagName, nsAString& aStr, uint32_t aSkipAttr, bool aAddNSAttr);
+
+ [[nodiscard]] bool SerializeAttr(const nsAString& aPrefix,
+ const nsAString& aName,
+ const nsAString& aValue, nsAString& aStr,
+ bool aDoEscapeEntities);
+
+ bool IsJavaScript(nsIContent* aContent, nsAtom* aAttrNameAtom,
+ int32_t aAttrNamespaceID, const nsAString& aValueString);
+
+ /**
+ * This method can be redefined to check if the element can be serialized.
+ * It is called when the serialization of the start tag is asked
+ * (AppendElementStart)
+ * In this method you can also force the formating
+ * by setting aForceFormat to true.
+ * @return boolean true if the element can be output
+ */
+ virtual bool CheckElementStart(mozilla::dom::Element* aElement,
+ bool& aForceFormat, nsAString& aStr,
+ nsresult& aResult);
+
+ /**
+ * This method is responsible for appending the '>' at the end of the start
+ * tag, possibly preceded by '/' and maybe a ' ' before that too.
+ *
+ * aElement and aOriginalElement are the same as the corresponding arguments
+ * to AppendElementStart.
+ */
+ [[nodiscard]] bool AppendEndOfElementStart(
+ mozilla::dom::Element* aEleemnt, mozilla::dom::Element* aOriginalElement,
+ nsAString& aStr);
+
+ /**
+ * This method can be redefine to serialize additional things just after
+ * the serialization of the start tag.
+ * (called at the end of AppendElementStart)
+ */
+ [[nodiscard]] virtual bool AfterElementStart(nsIContent* aContent,
+ nsIContent* aOriginalElement,
+ nsAString& aStr) {
+ return true;
+ };
+
+ /**
+ * This method can be redefined to check if the element can be serialized.
+ * It is called when the serialization of the end tag is asked
+ * (AppendElementEnd)
+ * In this method you can also force the formating
+ * by setting aForceFormat to true.
+ * @return boolean true if the element can be output
+ */
+ virtual bool CheckElementEnd(mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement,
+ bool& aForceFormat, nsAString& aStr);
+
+ /**
+ * This method can be redefine to serialize additional things just after
+ * the serialization of the end tag.
+ * (called at the end of AppendElementStart)
+ */
+ virtual void AfterElementEnd(nsIContent* aContent, nsAString& aStr){};
+
+ /**
+ * Returns true if a line break should be inserted before an element open tag
+ */
+ virtual bool LineBreakBeforeOpen(int32_t aNamespaceID, nsAtom* aName);
+
+ /**
+ * Returns true if a line break should be inserted after an element open tag
+ */
+ virtual bool LineBreakAfterOpen(int32_t aNamespaceID, nsAtom* aName);
+
+ /**
+ * Returns true if a line break should be inserted after an element close tag
+ */
+ virtual bool LineBreakBeforeClose(int32_t aNamespaceID, nsAtom* aName);
+
+ /**
+ * Returns true if a line break should be inserted after an element close tag
+ */
+ virtual bool LineBreakAfterClose(int32_t aNamespaceID, nsAtom* aName);
+
+ /**
+ * add intendation. Call only in the case of formating and if the current
+ * position is at 0. It updates the column position.
+ */
+ [[nodiscard]] bool AppendIndentation(nsAString& aStr);
+
+ [[nodiscard]] bool IncrIndentation(nsAtom* aName);
+ void DecrIndentation(nsAtom* aName);
+
+ // Functions to check for newlines that needs to be added between nodes in
+ // the root of a document. See mAddNewlineForRootNode
+ [[nodiscard]] bool MaybeAddNewlineForRootNode(nsAString& aStr);
+ void MaybeFlagNewlineForRootNode(nsINode* aNode);
+
+ // Functions to check if we enter in or leave from a preformated content
+ virtual void MaybeEnterInPreContent(nsIContent* aNode);
+ virtual void MaybeLeaveFromPreContent(nsIContent* aNode);
+
+ bool ShouldMaintainPreLevel() const;
+ int32_t PreLevel() const {
+ MOZ_ASSERT(ShouldMaintainPreLevel());
+ return mPreLevel;
+ }
+ int32_t& PreLevel() {
+ MOZ_ASSERT(ShouldMaintainPreLevel());
+ return mPreLevel;
+ }
+
+ bool MaybeSerializeIsValue(mozilla::dom::Element* aElement, nsAString& aStr);
+
+ int32_t mPrefixIndex;
+
+ struct NameSpaceDecl {
+ nsString mPrefix;
+ nsString mURI;
+ nsIContent* mOwner;
+ };
+
+ nsTArray<NameSpaceDecl> mNameSpaceStack;
+
+ // nsIDocumentEncoder flags
+ MOZ_INIT_OUTSIDE_CTOR uint32_t mFlags;
+
+ // characters to use for line break
+ nsString mLineBreak;
+
+ // The charset that was passed to Init()
+ nsCString mCharset;
+
+ // current column position on the current line
+ uint32_t mColPos;
+
+ // true = pretty formating should be done (OutputFormated flag)
+ MOZ_INIT_OUTSIDE_CTOR bool mDoFormat;
+
+ // true = no formatting,(OutputRaw flag)
+ // no newline convertion and no rewrap long lines even if OutputWrap is set.
+ MOZ_INIT_OUTSIDE_CTOR bool mDoRaw;
+
+ // true = wrapping should be done (OutputWrap flag)
+ MOZ_INIT_OUTSIDE_CTOR bool mDoWrap;
+
+ // true = we can break lines (OutputDisallowLineBreaking flag)
+ MOZ_INIT_OUTSIDE_CTOR bool mAllowLineBreaking;
+
+ // number of maximum column in a line, in the wrap mode
+ MOZ_INIT_OUTSIDE_CTOR uint32_t mMaxColumn;
+
+ // current indent value
+ nsString mIndent;
+
+ // this is the indentation level after the indentation reached
+ // the maximum length of indentation
+ int32_t mIndentOverflow;
+
+ // says if the indentation has been already added on the current line
+ bool mIsIndentationAddedOnCurrentLine;
+
+ // the string which is currently added is in an attribute
+ bool mInAttribute;
+
+ // true = a newline character should be added. It's only
+ // useful when serializing root nodes. see MaybeAddNewlineForRootNode and
+ // MaybeFlagNewlineForRootNode
+ bool mAddNewlineForRootNode;
+
+ // Indicates that a space will be added if and only if content is
+ // continued on the same line while serializing source. Otherwise,
+ // the newline character acts as the whitespace and no space is needed.
+ // used when mDoFormat = true
+ bool mAddSpace;
+
+ // says that if the next string to add contains a newline character at the
+ // begining, then this newline character should be ignored, because a
+ // such character has already been added into the output string
+ bool mMayIgnoreLineBreakSequence;
+
+ bool mBodyOnly;
+ int32_t mInBody;
+
+ // Non-owning.
+ nsAString* mOutput;
+
+ private:
+ // number of nested elements which have preformated content
+ MOZ_INIT_OUTSIDE_CTOR int32_t mPreLevel;
+
+ static const uint8_t kEntities[];
+ static const uint8_t kAttrEntities[];
+ static const char* const kEntityStrings[];
+};
+
+nsresult NS_NewXMLContentSerializer(nsIContentSerializer** aSerializer);
+
+#endif