summaryrefslogtreecommitdiffstats
path: root/dom/serializers
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:47:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:47:29 +0000
commit0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
treea31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /dom/serializers
parentInitial commit. (diff)
downloadfirefox-esr-upstream/115.8.0esr.tar.xz
firefox-esr-upstream/115.8.0esr.zip
Adding upstream version 115.8.0esr.upstream/115.8.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dom/serializers')
-rw-r--r--dom/serializers/crashtests/bug1747114.html30
-rw-r--r--dom/serializers/crashtests/crashtests.list1
-rw-r--r--dom/serializers/moz.build36
-rw-r--r--dom/serializers/nsDOMSerializer.cpp112
-rw-r--r--dom/serializers/nsDOMSerializer.h43
-rw-r--r--dom/serializers/nsDocumentEncoder.cpp2109
-rw-r--r--dom/serializers/nsHTMLContentSerializer.cpp445
-rw-r--r--dom/serializers/nsHTMLContentSerializer.h53
-rw-r--r--dom/serializers/nsIContentSerializer.h97
-rw-r--r--dom/serializers/nsIDocumentEncoder.idl361
-rw-r--r--dom/serializers/nsPlainTextSerializer.cpp1826
-rw-r--r--dom/serializers/nsPlainTextSerializer.h384
-rw-r--r--dom/serializers/nsXHTMLContentSerializer.cpp731
-rw-r--r--dom/serializers/nsXHTMLContentSerializer.h143
-rw-r--r--dom/serializers/nsXMLContentSerializer.cpp1814
-rw-r--r--dom/serializers/nsXMLContentSerializer.h440
16 files changed, 8625 insertions, 0 deletions
diff --git a/dom/serializers/crashtests/bug1747114.html b/dom/serializers/crashtests/bug1747114.html
new file mode 100644
index 0000000000..09b429a65e
--- /dev/null
+++ b/dom/serializers/crashtests/bug1747114.html
@@ -0,0 +1,30 @@
+<script>
+function go() {
+ a.appendChild(b)
+ b.setSelectionRange(1, 37, "1")
+ c.onselectionchange = () => {
+ b.wrap = "hard"
+ b.setRangeText(String.fromCodePoint(
+ 171825,
+ 2568,
+ 23726,
+ 391291,
+ 509063,
+ 163770,
+ 896774,
+ 556839,
+ 880943,
+ 606650,
+ 55697,
+ 95835,
+ 28852,
+ 507694,
+ 849936
+ ))
+ }
+}
+</script>
+<body onload=go()>
+<textarea id="b" style="white-space: nowrap">&amp;sJ&lt;*\muT+hj</textarea>
+<time id="c">a</tt>
+<marquee id="a">a</marquee>
diff --git a/dom/serializers/crashtests/crashtests.list b/dom/serializers/crashtests/crashtests.list
new file mode 100644
index 0000000000..3324d1f473
--- /dev/null
+++ b/dom/serializers/crashtests/crashtests.list
@@ -0,0 +1 @@
+load bug1747114.html
diff --git a/dom/serializers/moz.build b/dom/serializers/moz.build
new file mode 100644
index 0000000000..b0a0a35b6f
--- /dev/null
+++ b/dom/serializers/moz.build
@@ -0,0 +1,36 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files("*"):
+ BUG_COMPONENT = ("Core", "DOM: Serializers")
+
+XPIDL_SOURCES += [
+ "nsIDocumentEncoder.idl",
+]
+
+XPIDL_MODULE = "dom_serializers"
+
+EXPORTS += [
+ "nsDOMSerializer.h",
+ "nsHTMLContentSerializer.h",
+ "nsIContentSerializer.h",
+ "nsPlainTextSerializer.h",
+ "nsXHTMLContentSerializer.h",
+ "nsXMLContentSerializer.h",
+]
+
+UNIFIED_SOURCES += [
+ "nsDocumentEncoder.cpp",
+ "nsDOMSerializer.cpp",
+ "nsHTMLContentSerializer.cpp",
+ "nsPlainTextSerializer.cpp",
+ "nsXHTMLContentSerializer.cpp",
+ "nsXMLContentSerializer.cpp",
+]
+
+FINAL_LIBRARY = "xul"
+
+CRASHTEST_MANIFESTS += ["crashtests/crashtests.list"]
diff --git a/dom/serializers/nsDOMSerializer.cpp b/dom/serializers/nsDOMSerializer.cpp
new file mode 100644
index 0000000000..98d92e64ce
--- /dev/null
+++ b/dom/serializers/nsDOMSerializer.cpp
@@ -0,0 +1,112 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsDOMSerializer.h"
+
+#include "mozilla/Encoding.h"
+#include "mozilla/dom/Document.h"
+#include "nsIDocumentEncoder.h"
+#include "nsComponentManagerUtils.h"
+#include "nsContentCID.h"
+#include "nsContentUtils.h"
+#include "nsError.h"
+#include "nsINode.h"
+
+using namespace mozilla;
+
+nsDOMSerializer::nsDOMSerializer() = default;
+
+static already_AddRefed<nsIDocumentEncoder> SetUpEncoder(
+ nsINode& aRoot, const nsAString& aCharset, ErrorResult& aRv) {
+ nsCOMPtr<nsIDocumentEncoder> encoder =
+ do_createDocumentEncoder("application/xhtml+xml");
+ if (!encoder) {
+ aRv.Throw(NS_ERROR_FAILURE);
+ return nullptr;
+ }
+
+ dom::Document* doc = aRoot.OwnerDoc();
+ bool entireDocument = (doc == &aRoot);
+
+ // This method will fail if no document
+ nsresult rv = encoder->NativeInit(
+ doc, u"application/xhtml+xml"_ns,
+ nsIDocumentEncoder::OutputRaw |
+ nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration);
+
+ if (NS_FAILED(rv)) {
+ aRv.Throw(rv);
+ return nullptr;
+ }
+
+ NS_ConvertUTF16toUTF8 charset(aCharset);
+ if (charset.IsEmpty()) {
+ doc->GetDocumentCharacterSet()->Name(charset);
+ }
+ rv = encoder->SetCharset(charset);
+ if (NS_FAILED(rv)) {
+ aRv.Throw(rv);
+ return nullptr;
+ }
+
+ // If we are working on the entire document we do not need to
+ // specify which part to serialize
+ if (!entireDocument) {
+ rv = encoder->SetNode(&aRoot);
+ }
+
+ if (NS_FAILED(rv)) {
+ aRv.Throw(rv);
+ return nullptr;
+ }
+
+ return encoder.forget();
+}
+
+void nsDOMSerializer::SerializeToString(nsINode& aRoot, nsAString& aStr,
+ ErrorResult& aRv) {
+ aStr.Truncate();
+
+ if (!nsContentUtils::CanCallerAccess(&aRoot)) {
+ aRv.Throw(NS_ERROR_DOM_SECURITY_ERR);
+ return;
+ }
+
+ nsCOMPtr<nsIDocumentEncoder> encoder = SetUpEncoder(aRoot, u""_ns, aRv);
+ if (aRv.Failed()) {
+ return;
+ }
+
+ nsresult rv = encoder->EncodeToString(aStr);
+ if (NS_FAILED(rv)) {
+ aRv.Throw(rv);
+ }
+}
+
+void nsDOMSerializer::SerializeToStream(nsINode& aRoot,
+ nsIOutputStream* aStream,
+ const nsAString& aCharset,
+ ErrorResult& aRv) {
+ if (NS_WARN_IF(!aStream)) {
+ aRv.Throw(NS_ERROR_INVALID_ARG);
+ return;
+ }
+
+ // The charset arg can be empty, in which case we get the document's
+ // charset and use that when serializing.
+
+ // No point doing a CanCallerAccess check, because we can only be
+ // called by system JS or C++.
+ nsCOMPtr<nsIDocumentEncoder> encoder = SetUpEncoder(aRoot, aCharset, aRv);
+ if (aRv.Failed()) {
+ return;
+ }
+
+ nsresult rv = encoder->EncodeToStream(aStream);
+ if (NS_FAILED(rv)) {
+ aRv.Throw(rv);
+ }
+}
diff --git a/dom/serializers/nsDOMSerializer.h b/dom/serializers/nsDOMSerializer.h
new file mode 100644
index 0000000000..cebf3d9aaa
--- /dev/null
+++ b/dom/serializers/nsDOMSerializer.h
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsDOMSerializer_h_
+#define nsDOMSerializer_h_
+
+#include "mozilla/dom/NonRefcountedDOMObject.h"
+#include "mozilla/dom/XMLSerializerBinding.h"
+
+class nsINode;
+class nsIOutputStream;
+
+namespace mozilla {
+class ErrorResult;
+}
+
+class nsDOMSerializer final : public mozilla::dom::NonRefcountedDOMObject {
+ public:
+ nsDOMSerializer();
+
+ // WebIDL API
+ static nsDOMSerializer* Constructor(
+ const mozilla::dom::GlobalObject& aOwner) {
+ return new nsDOMSerializer();
+ }
+
+ void SerializeToString(nsINode& aRoot, nsAString& aStr,
+ mozilla::ErrorResult& rv);
+
+ void SerializeToStream(nsINode& aRoot, nsIOutputStream* aStream,
+ const nsAString& aCharset, mozilla::ErrorResult& aRv);
+
+ bool WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto,
+ JS::MutableHandle<JSObject*> aReflector) {
+ return mozilla::dom::XMLSerializer_Binding::Wrap(aCx, this, aGivenProto,
+ aReflector);
+ }
+};
+
+#endif
diff --git a/dom/serializers/nsDocumentEncoder.cpp b/dom/serializers/nsDocumentEncoder.cpp
new file mode 100644
index 0000000000..14120bae64
--- /dev/null
+++ b/dom/serializers/nsDocumentEncoder.cpp
@@ -0,0 +1,2109 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Object that can be used to serialize selections, ranges, or nodes
+ * to strings in a gazillion different ways.
+ */
+
+#include <utility>
+
+#include "nscore.h"
+#include "nsISupports.h"
+#include "nsCOMPtr.h"
+#include "nsCRT.h"
+#include "nsIContentSerializer.h"
+#include "nsIDocumentEncoder.h"
+#include "nsINode.h"
+#include "nsIContentInlines.h"
+#include "nsComponentManagerUtils.h"
+#include "nsIOutputStream.h"
+#include "nsRange.h"
+#include "nsGkAtoms.h"
+#include "nsHTMLDocument.h"
+#include "nsIContent.h"
+#include "nsIScriptContext.h"
+#include "nsIScriptGlobalObject.h"
+#include "nsITransferable.h"
+#include "mozilla/dom/Selection.h"
+#include "nsContentUtils.h"
+#include "nsElementTable.h"
+#include "nsUnicharUtils.h"
+#include "nsReadableUtils.h"
+#include "nsTArray.h"
+#include "nsIFrame.h"
+#include "nsLayoutUtils.h"
+#include "nsStringBuffer.h"
+#include "mozilla/dom/Comment.h"
+#include "mozilla/dom/Document.h"
+#include "mozilla/dom/DocumentType.h"
+#include "mozilla/dom/Element.h"
+#include "mozilla/dom/HTMLBRElement.h"
+#include "mozilla/dom/ProcessingInstruction.h"
+#include "mozilla/dom/ShadowRoot.h"
+#include "mozilla/dom/Text.h"
+#include "mozilla/Encoding.h"
+#include "mozilla/IntegerRange.h"
+#include "mozilla/Maybe.h"
+#include "mozilla/ScopeExit.h"
+#include "mozilla/UniquePtr.h"
+
+using namespace mozilla;
+using namespace mozilla::dom;
+
+enum nsRangeIterationDirection { kDirectionOut = -1, kDirectionIn = 1 };
+
+class TextStreamer {
+ public:
+ /**
+ * @param aStream Will be kept alive by the TextStreamer.
+ * @param aUnicodeEncoder Needs to be non-nullptr.
+ */
+ TextStreamer(nsIOutputStream& aStream, UniquePtr<Encoder> aUnicodeEncoder,
+ bool aIsPlainText, nsAString& aOutputBuffer);
+
+ /**
+ * String will be truncated if it is written to stream.
+ */
+ nsresult FlushIfStringLongEnough();
+
+ /**
+ * String will be truncated.
+ */
+ nsresult ForceFlush();
+
+ private:
+ const static uint32_t kMaxLengthBeforeFlush = 1024;
+
+ const static uint32_t kEncoderBufferSizeInBytes = 4096;
+
+ nsresult EncodeAndWrite();
+
+ nsresult EncodeAndWriteAndTruncate();
+
+ const nsCOMPtr<nsIOutputStream> mStream;
+ const UniquePtr<Encoder> mUnicodeEncoder;
+ const bool mIsPlainText;
+ nsAString& mOutputBuffer;
+};
+
+TextStreamer::TextStreamer(nsIOutputStream& aStream,
+ UniquePtr<Encoder> aUnicodeEncoder,
+ bool aIsPlainText, nsAString& aOutputBuffer)
+ : mStream{&aStream},
+ mUnicodeEncoder(std::move(aUnicodeEncoder)),
+ mIsPlainText(aIsPlainText),
+ mOutputBuffer(aOutputBuffer) {
+ MOZ_ASSERT(mUnicodeEncoder);
+}
+
+nsresult TextStreamer::FlushIfStringLongEnough() {
+ nsresult rv = NS_OK;
+
+ if (mOutputBuffer.Length() > kMaxLengthBeforeFlush) {
+ rv = EncodeAndWriteAndTruncate();
+ }
+
+ return rv;
+}
+
+nsresult TextStreamer::ForceFlush() { return EncodeAndWriteAndTruncate(); }
+
+nsresult TextStreamer::EncodeAndWrite() {
+ if (mOutputBuffer.IsEmpty()) {
+ return NS_OK;
+ }
+
+ uint8_t buffer[kEncoderBufferSizeInBytes];
+ auto src = Span(mOutputBuffer);
+ auto bufferSpan = Span(buffer);
+ // Reserve space for terminator
+ auto dst = bufferSpan.To(bufferSpan.Length() - 1);
+ for (;;) {
+ uint32_t result;
+ size_t read;
+ size_t written;
+ if (mIsPlainText) {
+ std::tie(result, read, written) =
+ mUnicodeEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
+ if (result != kInputEmpty && result != kOutputFull) {
+ // There's always room for one byte in the case of
+ // an unmappable character, because otherwise
+ // we'd have gotten `kOutputFull`.
+ dst[written++] = '?';
+ }
+ } else {
+ std::tie(result, read, written, std::ignore) =
+ mUnicodeEncoder->EncodeFromUTF16(src, dst, false);
+ }
+ src = src.From(read);
+ // Sadly, we still have test cases that implement nsIOutputStream in JS, so
+ // the buffer needs to be zero-terminated for XPConnect to do its thing.
+ // See bug 170416.
+ bufferSpan[written] = 0;
+ uint32_t streamWritten;
+ nsresult rv = mStream->Write(reinterpret_cast<char*>(dst.Elements()),
+ written, &streamWritten);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ if (result == kInputEmpty) {
+ return NS_OK;
+ }
+ }
+}
+
+nsresult TextStreamer::EncodeAndWriteAndTruncate() {
+ const nsresult rv = EncodeAndWrite();
+ mOutputBuffer.Truncate();
+ return rv;
+}
+
+/**
+ * The scope may be limited to either a selection, range, or node.
+ */
+class EncodingScope {
+ public:
+ /**
+ * @return true, iff the scope is limited to a selection, range or node.
+ */
+ bool IsLimited() const;
+
+ RefPtr<Selection> mSelection;
+ RefPtr<nsRange> mRange;
+ nsCOMPtr<nsINode> mNode;
+ bool mNodeIsContainer = false;
+};
+
+bool EncodingScope::IsLimited() const { return mSelection || mRange || mNode; }
+
+struct RangeBoundariesInclusiveAncestorsAndOffsets {
+ /**
+ * https://dom.spec.whatwg.org/#concept-tree-inclusive-ancestor.
+ */
+ using InclusiveAncestors = AutoTArray<nsIContent*, 8>;
+
+ /**
+ * https://dom.spec.whatwg.org/#concept-tree-inclusive-ancestor.
+ */
+ using InclusiveAncestorsOffsets = AutoTArray<Maybe<uint32_t>, 8>;
+
+ // The first node is the range's boundary node, the following ones the
+ // ancestors.
+ InclusiveAncestors mInclusiveAncestorsOfStart;
+ // The first offset represents where at the boundary node the range starts.
+ // Each other offset is the index of the child relative to its parent.
+ InclusiveAncestorsOffsets mInclusiveAncestorsOffsetsOfStart;
+
+ // The first node is the range's boundary node, the following one the
+ // ancestors.
+ InclusiveAncestors mInclusiveAncestorsOfEnd;
+ // The first offset represents where at the boundary node the range ends.
+ // Each other offset is the index of the child relative to its parent.
+ InclusiveAncestorsOffsets mInclusiveAncestorsOffsetsOfEnd;
+};
+
+struct ContextInfoDepth {
+ uint32_t mStart = 0;
+ uint32_t mEnd = 0;
+};
+
+class nsDocumentEncoder : public nsIDocumentEncoder {
+ protected:
+ class RangeNodeContext {
+ public:
+ virtual ~RangeNodeContext() = default;
+
+ virtual bool IncludeInContext(nsINode& aNode) const { return false; }
+
+ virtual int32_t GetImmediateContextCount(
+ const nsTArray<nsINode*>& aAncestorArray) const {
+ return -1;
+ }
+ };
+
+ public:
+ nsDocumentEncoder();
+
+ protected:
+ /**
+ * @param aRangeNodeContext has to be non-null.
+ */
+ explicit nsDocumentEncoder(UniquePtr<RangeNodeContext> aRangeNodeContext);
+
+ public:
+ NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+ NS_DECL_CYCLE_COLLECTION_CLASS(nsDocumentEncoder)
+ NS_DECL_NSIDOCUMENTENCODER
+
+ protected:
+ virtual ~nsDocumentEncoder();
+
+ void Initialize(bool aClearCachedSerializer = true);
+
+ /**
+ * @param aMaxLength As described at
+ * `nsIDocumentEncodder.encodeToStringWithMaxLength`.
+ */
+ nsresult SerializeDependingOnScope(uint32_t aMaxLength);
+
+ nsresult SerializeSelection();
+
+ nsresult SerializeNode();
+
+ /**
+ * @param aMaxLength As described at
+ * `nsIDocumentEncodder.encodeToStringWithMaxLength`.
+ */
+ nsresult SerializeWholeDocument(uint32_t aMaxLength);
+
+ /**
+ * @param aFlags multiple of the flags defined in nsIDocumentEncoder.idl.o
+ */
+ static bool IsInvisibleNodeAndShouldBeSkipped(const nsINode& aNode,
+ const uint32_t aFlags) {
+ if (aFlags & SkipInvisibleContent) {
+ // Treat the visibility of the ShadowRoot as if it were
+ // the host content.
+ //
+ // FIXME(emilio): I suspect instead of this a bunch of the GetParent()
+ // calls here should be doing GetFlattenedTreeParent, then this condition
+ // should be unreachable...
+ const nsINode* node{&aNode};
+ if (const ShadowRoot* shadowRoot = ShadowRoot::FromNode(node)) {
+ node = shadowRoot->GetHost();
+ }
+
+ if (node->IsContent()) {
+ nsIFrame* frame = node->AsContent()->GetPrimaryFrame();
+ if (!frame) {
+ if (node->IsElement() && node->AsElement()->IsDisplayContents()) {
+ return false;
+ }
+ if (node->IsText()) {
+ // We have already checked that our parent is visible.
+ //
+ // FIXME(emilio): Text not assigned to a <slot> in Shadow DOM should
+ // probably return false...
+ return false;
+ }
+ if (node->IsHTMLElement(nsGkAtoms::rp)) {
+ // Ruby parentheses are part of ruby structure, hence
+ // shouldn't be stripped out even if it is not displayed.
+ return false;
+ }
+ return true;
+ }
+ bool isVisible = frame->StyleVisibility()->IsVisible();
+ if (!isVisible && node->IsText()) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ void ReleaseDocumentReferenceAndInitialize(bool aClearCachedSerializer);
+
+ class MOZ_STACK_CLASS AutoReleaseDocumentIfNeeded final {
+ public:
+ explicit AutoReleaseDocumentIfNeeded(nsDocumentEncoder* aEncoder)
+ : mEncoder(aEncoder) {}
+
+ ~AutoReleaseDocumentIfNeeded() {
+ if (mEncoder->mFlags & RequiresReinitAfterOutput) {
+ const bool clearCachedSerializer = false;
+ mEncoder->ReleaseDocumentReferenceAndInitialize(clearCachedSerializer);
+ }
+ }
+
+ private:
+ nsDocumentEncoder* mEncoder;
+ };
+
+ nsCOMPtr<Document> mDocument;
+ EncodingScope mEncodingScope;
+ nsCOMPtr<nsIContentSerializer> mSerializer;
+
+ Maybe<TextStreamer> mTextStreamer;
+ nsCOMPtr<nsIDocumentEncoderNodeFixup> mNodeFixup;
+
+ nsString mMimeType;
+ const Encoding* mEncoding;
+ // Multiple of the flags defined in nsIDocumentEncoder.idl.
+ uint32_t mFlags;
+ uint32_t mWrapColumn;
+ // Whether the serializer cares about being notified to scan elements to
+ // keep track of whether they are preformatted. This stores the out
+ // argument of nsIContentSerializer::Init().
+ bool mNeedsPreformatScanning;
+ bool mIsCopying; // Set to true only while copying
+ nsStringBuffer* mCachedBuffer;
+
+ class NodeSerializer {
+ public:
+ /**
+ * @param aFlags multiple of the flags defined in nsIDocumentEncoder.idl.
+ */
+ NodeSerializer(const bool& aNeedsPreformatScanning,
+ const nsCOMPtr<nsIContentSerializer>& aSerializer,
+ const uint32_t& aFlags,
+ const nsCOMPtr<nsIDocumentEncoderNodeFixup>& aNodeFixup,
+ Maybe<TextStreamer>& aTextStreamer)
+ : mNeedsPreformatScanning{aNeedsPreformatScanning},
+ mSerializer{aSerializer},
+ mFlags{aFlags},
+ mNodeFixup{aNodeFixup},
+ mTextStreamer{aTextStreamer} {}
+
+ nsresult SerializeNodeStart(nsINode& aOriginalNode, int32_t aStartOffset,
+ int32_t aEndOffset,
+ nsINode* aFixupNode = nullptr) const;
+
+ enum class SerializeRoot { eYes, eNo };
+
+ nsresult SerializeToStringRecursive(nsINode* aNode,
+ SerializeRoot aSerializeRoot,
+ uint32_t aMaxLength = 0) const;
+
+ nsresult SerializeNodeEnd(nsINode& aOriginalNode,
+ nsINode* aFixupNode = nullptr) const;
+
+ [[nodiscard]] nsresult SerializeTextNode(nsINode& aNode,
+ int32_t aStartOffset,
+ int32_t aEndOffset) const;
+
+ nsresult SerializeToStringIterative(nsINode* aNode) const;
+
+ private:
+ const bool& mNeedsPreformatScanning;
+ const nsCOMPtr<nsIContentSerializer>& mSerializer;
+ // Multiple of the flags defined in nsIDocumentEncoder.idl.
+ const uint32_t& mFlags;
+ const nsCOMPtr<nsIDocumentEncoderNodeFixup>& mNodeFixup;
+ Maybe<TextStreamer>& mTextStreamer;
+ };
+
+ NodeSerializer mNodeSerializer;
+
+ const UniquePtr<RangeNodeContext> mRangeNodeContext;
+
+ struct RangeContextSerializer final {
+ RangeContextSerializer(const RangeNodeContext& aRangeNodeContext,
+ const NodeSerializer& aNodeSerializer)
+ : mDisableContextSerialize{false},
+ mRangeNodeContext{aRangeNodeContext},
+ mNodeSerializer{aNodeSerializer} {}
+
+ nsresult SerializeRangeContextStart(
+ const nsTArray<nsINode*>& aAncestorArray);
+ nsresult SerializeRangeContextEnd();
+
+ // Used when context has already been serialized for
+ // table cell selections (where parent is <tr>)
+ bool mDisableContextSerialize;
+ AutoTArray<AutoTArray<nsINode*, 8>, 8> mRangeContexts;
+
+ const RangeNodeContext& mRangeNodeContext;
+
+ private:
+ const NodeSerializer& mNodeSerializer;
+ };
+
+ RangeContextSerializer mRangeContextSerializer;
+
+ struct RangeSerializer {
+ // @param aFlags multiple of the flags defined in nsIDocumentEncoder.idl.
+ RangeSerializer(const uint32_t& aFlags,
+ const NodeSerializer& aNodeSerializer,
+ RangeContextSerializer& aRangeContextSerializer)
+ : mStartRootIndex{0},
+ mEndRootIndex{0},
+ mHaltRangeHint{false},
+ mFlags{aFlags},
+ mNodeSerializer{aNodeSerializer},
+ mRangeContextSerializer{aRangeContextSerializer} {}
+
+ void Initialize();
+
+ /**
+ * @param aDepth the distance (number of `GetParent` calls) from aNode to
+ * aRange's closest common inclusive ancestor.
+ */
+ nsresult SerializeRangeNodes(const nsRange* aRange, nsINode* aNode,
+ int32_t aDepth);
+
+ /**
+ * Serialize aContent's children from aStartOffset to aEndOffset.
+ *
+ * @param aDepth the distance (number of `GetParent` calls) from aContent to
+ * aRange's closest common inclusive ancestor.
+ */
+ [[nodiscard]] nsresult SerializeChildrenOfContent(nsIContent& aContent,
+ uint32_t aStartOffset,
+ uint32_t aEndOffset,
+ const nsRange* aRange,
+ int32_t aDepth);
+
+ nsresult SerializeRangeToString(const nsRange* aRange);
+
+ /**
+ * https://dom.spec.whatwg.org/#concept-tree-inclusive-ancestor.
+ */
+ nsCOMPtr<nsINode> mClosestCommonInclusiveAncestorOfRange;
+
+ /**
+ * https://dom.spec.whatwg.org/#concept-tree-inclusive-ancestor.
+ */
+ AutoTArray<nsINode*, 8> mCommonInclusiveAncestors;
+
+ ContextInfoDepth mContextInfoDepth;
+
+ private:
+ struct StartAndEndContent {
+ nsCOMPtr<nsIContent> mStart;
+ nsCOMPtr<nsIContent> mEnd;
+ };
+
+ StartAndEndContent GetStartAndEndContentForRecursionLevel(
+ int32_t aDepth) const;
+
+ bool HasInvisibleParentAndShouldBeSkipped(nsINode& aNode) const;
+
+ nsresult SerializeNodePartiallyContainedInRange(
+ nsINode& aNode, nsIContent& aContent,
+ const StartAndEndContent& aStartAndEndContent, const nsRange& aRange,
+ int32_t aDepth);
+
+ nsresult SerializeTextNode(nsINode& aNode, const nsIContent& aContent,
+ const StartAndEndContent& aStartAndEndContent,
+ const nsRange& aRange) const;
+
+ RangeBoundariesInclusiveAncestorsAndOffsets
+ mRangeBoundariesInclusiveAncestorsAndOffsets;
+ int32_t mStartRootIndex;
+ int32_t mEndRootIndex;
+ bool mHaltRangeHint;
+
+ // Multiple of the flags defined in nsIDocumentEncoder.idl.
+ const uint32_t& mFlags;
+
+ const NodeSerializer& mNodeSerializer;
+ RangeContextSerializer& mRangeContextSerializer;
+ };
+
+ RangeSerializer mRangeSerializer;
+};
+
+void nsDocumentEncoder::RangeSerializer::Initialize() {
+ mContextInfoDepth = {};
+ mStartRootIndex = 0;
+ mEndRootIndex = 0;
+ mHaltRangeHint = false;
+ mClosestCommonInclusiveAncestorOfRange = nullptr;
+ mRangeBoundariesInclusiveAncestorsAndOffsets = {};
+}
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(nsDocumentEncoder)
+NS_IMPL_CYCLE_COLLECTING_RELEASE_WITH_LAST_RELEASE(
+ nsDocumentEncoder, ReleaseDocumentReferenceAndInitialize(true))
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsDocumentEncoder)
+ NS_INTERFACE_MAP_ENTRY(nsIDocumentEncoder)
+ NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTION(
+ nsDocumentEncoder, mDocument, mEncodingScope.mSelection,
+ mEncodingScope.mRange, mEncodingScope.mNode, mSerializer,
+ mRangeSerializer.mClosestCommonInclusiveAncestorOfRange)
+
+nsDocumentEncoder::nsDocumentEncoder(
+ UniquePtr<RangeNodeContext> aRangeNodeContext)
+ : mEncoding(nullptr),
+ mIsCopying(false),
+ mCachedBuffer(nullptr),
+ mNodeSerializer(mNeedsPreformatScanning, mSerializer, mFlags, mNodeFixup,
+ mTextStreamer),
+ mRangeNodeContext(std::move(aRangeNodeContext)),
+ mRangeContextSerializer(*mRangeNodeContext, mNodeSerializer),
+ mRangeSerializer(mFlags, mNodeSerializer, mRangeContextSerializer) {
+ MOZ_ASSERT(mRangeNodeContext);
+
+ Initialize();
+ mMimeType.AssignLiteral("text/plain");
+}
+
+nsDocumentEncoder::nsDocumentEncoder()
+ : nsDocumentEncoder(MakeUnique<RangeNodeContext>()) {}
+
+void nsDocumentEncoder::Initialize(bool aClearCachedSerializer) {
+ mFlags = 0;
+ mWrapColumn = 72;
+ mRangeSerializer.Initialize();
+ mNeedsPreformatScanning = false;
+ mRangeContextSerializer.mDisableContextSerialize = false;
+ mEncodingScope = {};
+ mNodeFixup = nullptr;
+ if (aClearCachedSerializer) {
+ mSerializer = nullptr;
+ }
+}
+
+static bool ParentIsTR(nsIContent* aContent) {
+ mozilla::dom::Element* parent = aContent->GetParentElement();
+ if (!parent) {
+ return false;
+ }
+ return parent->IsHTMLElement(nsGkAtoms::tr);
+}
+
+nsresult nsDocumentEncoder::SerializeDependingOnScope(uint32_t aMaxLength) {
+ nsresult rv = NS_OK;
+ if (mEncodingScope.mSelection) {
+ rv = SerializeSelection();
+ } else if (nsRange* range = mEncodingScope.mRange) {
+ rv = mRangeSerializer.SerializeRangeToString(range);
+ } else if (mEncodingScope.mNode) {
+ rv = SerializeNode();
+ } else {
+ rv = SerializeWholeDocument(aMaxLength);
+ }
+
+ mEncodingScope = {};
+
+ return rv;
+}
+
+nsresult nsDocumentEncoder::SerializeSelection() {
+ NS_ENSURE_TRUE(mEncodingScope.mSelection, NS_ERROR_FAILURE);
+
+ nsresult rv = NS_OK;
+ const Selection* selection = mEncodingScope.mSelection;
+ nsCOMPtr<nsINode> node;
+ nsCOMPtr<nsINode> prevNode;
+ uint32_t firstRangeStartDepth = 0;
+ const uint32_t rangeCount = selection->RangeCount();
+ for (const uint32_t i : IntegerRange(rangeCount)) {
+ MOZ_ASSERT(selection->RangeCount() == rangeCount);
+ RefPtr<const nsRange> range = selection->GetRangeAt(i);
+
+ // Bug 236546: newlines not added when copying table cells into clipboard
+ // Each selected cell shows up as a range containing a row with a single
+ // cell get the row, compare it to previous row and emit </tr><tr> as
+ // needed Bug 137450: Problem copying/pasting a table from a web page to
+ // Excel. Each separate block of <tr></tr> produced above will be wrapped
+ // by the immediate context. This assumes that you can't select cells that
+ // are multiple selections from two tables simultaneously.
+ node = range->GetStartContainer();
+ NS_ENSURE_TRUE(node, NS_ERROR_FAILURE);
+ if (node != prevNode) {
+ if (prevNode) {
+ rv = mNodeSerializer.SerializeNodeEnd(*prevNode);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+ nsCOMPtr<nsIContent> content = nsIContent::FromNodeOrNull(node);
+ if (content && content->IsHTMLElement(nsGkAtoms::tr) &&
+ !ParentIsTR(content)) {
+ if (!prevNode) {
+ // Went from a non-<tr> to a <tr>
+ mRangeSerializer.mCommonInclusiveAncestors.Clear();
+ nsContentUtils::GetInclusiveAncestors(
+ node->GetParentNode(),
+ mRangeSerializer.mCommonInclusiveAncestors);
+ rv = mRangeContextSerializer.SerializeRangeContextStart(
+ mRangeSerializer.mCommonInclusiveAncestors);
+ NS_ENSURE_SUCCESS(rv, rv);
+ // Don't let SerializeRangeToString serialize the context again
+ mRangeContextSerializer.mDisableContextSerialize = true;
+ }
+
+ rv = mNodeSerializer.SerializeNodeStart(*node, 0, -1);
+ NS_ENSURE_SUCCESS(rv, rv);
+ prevNode = node;
+ } else if (prevNode) {
+ // Went from a <tr> to a non-<tr>
+ mRangeContextSerializer.mDisableContextSerialize = false;
+
+ // `mCommonInclusiveAncestors` is used in `EncodeToStringWithContext`
+ // too. Update it here to mimic the old behavior.
+ mRangeSerializer.mCommonInclusiveAncestors.Clear();
+ nsContentUtils::GetInclusiveAncestors(
+ prevNode->GetParentNode(),
+ mRangeSerializer.mCommonInclusiveAncestors);
+
+ rv = mRangeContextSerializer.SerializeRangeContextEnd();
+ NS_ENSURE_SUCCESS(rv, rv);
+ prevNode = nullptr;
+ }
+ }
+
+ rv = mRangeSerializer.SerializeRangeToString(range);
+ NS_ENSURE_SUCCESS(rv, rv);
+ if (i == 0) {
+ firstRangeStartDepth = mRangeSerializer.mContextInfoDepth.mStart;
+ }
+ }
+ mRangeSerializer.mContextInfoDepth.mStart = firstRangeStartDepth;
+
+ if (prevNode) {
+ rv = mNodeSerializer.SerializeNodeEnd(*prevNode);
+ NS_ENSURE_SUCCESS(rv, rv);
+ mRangeContextSerializer.mDisableContextSerialize = false;
+
+ // `mCommonInclusiveAncestors` is used in `EncodeToStringWithContext`
+ // too. Update it here to mimic the old behavior.
+ mRangeSerializer.mCommonInclusiveAncestors.Clear();
+ nsContentUtils::GetInclusiveAncestors(
+ prevNode->GetParentNode(), mRangeSerializer.mCommonInclusiveAncestors);
+
+ rv = mRangeContextSerializer.SerializeRangeContextEnd();
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ // Just to be safe
+ mRangeContextSerializer.mDisableContextSerialize = false;
+
+ return rv;
+}
+
+nsresult nsDocumentEncoder::SerializeNode() {
+ NS_ENSURE_TRUE(mEncodingScope.mNode, NS_ERROR_FAILURE);
+
+ nsresult rv = NS_OK;
+ nsINode* node = mEncodingScope.mNode;
+ const bool nodeIsContainer = mEncodingScope.mNodeIsContainer;
+ if (!mNodeFixup && !(mFlags & SkipInvisibleContent) && !mTextStreamer &&
+ nodeIsContainer) {
+ rv = mNodeSerializer.SerializeToStringIterative(node);
+ } else {
+ rv = mNodeSerializer.SerializeToStringRecursive(
+ node, nodeIsContainer ? NodeSerializer::SerializeRoot::eNo
+ : NodeSerializer::SerializeRoot::eYes);
+ }
+
+ return rv;
+}
+
+nsresult nsDocumentEncoder::SerializeWholeDocument(uint32_t aMaxLength) {
+ NS_ENSURE_FALSE(mEncodingScope.mSelection, NS_ERROR_FAILURE);
+ NS_ENSURE_FALSE(mEncodingScope.mRange, NS_ERROR_FAILURE);
+ NS_ENSURE_FALSE(mEncodingScope.mNode, NS_ERROR_FAILURE);
+
+ nsresult rv = mSerializer->AppendDocumentStart(mDocument);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = mNodeSerializer.SerializeToStringRecursive(
+ mDocument, NodeSerializer::SerializeRoot::eYes, aMaxLength);
+ return rv;
+}
+
+nsDocumentEncoder::~nsDocumentEncoder() {
+ if (mCachedBuffer) {
+ mCachedBuffer->Release();
+ }
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::Init(Document* aDocument, const nsAString& aMimeType,
+ uint32_t aFlags) {
+ return NativeInit(aDocument, aMimeType, aFlags);
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::NativeInit(Document* aDocument, const nsAString& aMimeType,
+ uint32_t aFlags) {
+ if (!aDocument) return NS_ERROR_INVALID_ARG;
+
+ Initialize(!mMimeType.Equals(aMimeType));
+
+ mDocument = aDocument;
+
+ mMimeType = aMimeType;
+
+ mFlags = aFlags;
+ mIsCopying = false;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::SetWrapColumn(uint32_t aWC) {
+ mWrapColumn = aWC;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::SetSelection(Selection* aSelection) {
+ mEncodingScope.mSelection = aSelection;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::SetRange(nsRange* aRange) {
+ mEncodingScope.mRange = aRange;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::SetNode(nsINode* aNode) {
+ mEncodingScope.mNodeIsContainer = false;
+ mEncodingScope.mNode = aNode;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::SetContainerNode(nsINode* aContainer) {
+ mEncodingScope.mNodeIsContainer = true;
+ mEncodingScope.mNode = aContainer;
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::SetCharset(const nsACString& aCharset) {
+ const Encoding* encoding = Encoding::ForLabel(aCharset);
+ if (!encoding) {
+ return NS_ERROR_UCONV_NOCONV;
+ }
+ mEncoding = encoding->OutputEncoding();
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::GetMimeType(nsAString& aMimeType) {
+ aMimeType = mMimeType;
+ return NS_OK;
+}
+
+class FixupNodeDeterminer {
+ public:
+ FixupNodeDeterminer(nsIDocumentEncoderNodeFixup* aNodeFixup,
+ nsINode* aFixupNode, nsINode& aOriginalNode)
+ : mIsSerializationOfFixupChildrenNeeded{false},
+ mNodeFixup(aNodeFixup),
+ mOriginalNode(aOriginalNode) {
+ if (mNodeFixup) {
+ if (aFixupNode) {
+ mFixupNode = aFixupNode;
+ } else {
+ mNodeFixup->FixupNode(&mOriginalNode,
+ &mIsSerializationOfFixupChildrenNeeded,
+ getter_AddRefs(mFixupNode));
+ }
+ }
+ }
+
+ bool IsSerializationOfFixupChildrenNeeded() const {
+ return mIsSerializationOfFixupChildrenNeeded;
+ }
+
+ /**
+ * @return The fixup node, if available, otherwise the original node. The
+ * former is kept alive by this object.
+ */
+ nsINode& GetFixupNodeFallBackToOriginalNode() const {
+ return mFixupNode ? *mFixupNode : mOriginalNode;
+ }
+
+ private:
+ bool mIsSerializationOfFixupChildrenNeeded;
+ nsIDocumentEncoderNodeFixup* mNodeFixup;
+ nsCOMPtr<nsINode> mFixupNode;
+ nsINode& mOriginalNode;
+};
+
+nsresult nsDocumentEncoder::NodeSerializer::SerializeNodeStart(
+ nsINode& aOriginalNode, int32_t aStartOffset, int32_t aEndOffset,
+ nsINode* aFixupNode) const {
+ if (mNeedsPreformatScanning) {
+ if (aOriginalNode.IsElement()) {
+ mSerializer->ScanElementForPreformat(aOriginalNode.AsElement());
+ } else if (aOriginalNode.IsText()) {
+ const nsCOMPtr<nsINode> parent = aOriginalNode.GetParent();
+ if (parent && parent->IsElement()) {
+ mSerializer->ScanElementForPreformat(parent->AsElement());
+ }
+ }
+ }
+
+ if (IsInvisibleNodeAndShouldBeSkipped(aOriginalNode, mFlags)) {
+ return NS_OK;
+ }
+
+ FixupNodeDeterminer fixupNodeDeterminer{mNodeFixup, aFixupNode,
+ aOriginalNode};
+ nsINode* node = &fixupNodeDeterminer.GetFixupNodeFallBackToOriginalNode();
+
+ nsresult rv = NS_OK;
+
+ if (node->IsElement()) {
+ if ((mFlags & (nsIDocumentEncoder::OutputPreformatted |
+ nsIDocumentEncoder::OutputDropInvisibleBreak)) &&
+ nsLayoutUtils::IsInvisibleBreak(node)) {
+ return rv;
+ }
+ rv = mSerializer->AppendElementStart(node->AsElement(),
+ aOriginalNode.AsElement());
+ return rv;
+ }
+
+ switch (node->NodeType()) {
+ case nsINode::TEXT_NODE: {
+ rv = mSerializer->AppendText(static_cast<nsIContent*>(node), aStartOffset,
+ aEndOffset);
+ break;
+ }
+ case nsINode::CDATA_SECTION_NODE: {
+ rv = mSerializer->AppendCDATASection(static_cast<nsIContent*>(node),
+ aStartOffset, aEndOffset);
+ break;
+ }
+ case nsINode::PROCESSING_INSTRUCTION_NODE: {
+ rv = mSerializer->AppendProcessingInstruction(
+ static_cast<ProcessingInstruction*>(node), aStartOffset, aEndOffset);
+ break;
+ }
+ case nsINode::COMMENT_NODE: {
+ rv = mSerializer->AppendComment(static_cast<Comment*>(node), aStartOffset,
+ aEndOffset);
+ break;
+ }
+ case nsINode::DOCUMENT_TYPE_NODE: {
+ rv = mSerializer->AppendDoctype(static_cast<DocumentType*>(node));
+ break;
+ }
+ }
+
+ return rv;
+}
+
+nsresult nsDocumentEncoder::NodeSerializer::SerializeNodeEnd(
+ nsINode& aOriginalNode, nsINode* aFixupNode) const {
+ if (mNeedsPreformatScanning) {
+ if (aOriginalNode.IsElement()) {
+ mSerializer->ForgetElementForPreformat(aOriginalNode.AsElement());
+ } else if (aOriginalNode.IsText()) {
+ const nsCOMPtr<nsINode> parent = aOriginalNode.GetParent();
+ if (parent && parent->IsElement()) {
+ mSerializer->ForgetElementForPreformat(parent->AsElement());
+ }
+ }
+ }
+
+ if (IsInvisibleNodeAndShouldBeSkipped(aOriginalNode, mFlags)) {
+ return NS_OK;
+ }
+
+ nsresult rv = NS_OK;
+
+ FixupNodeDeterminer fixupNodeDeterminer{mNodeFixup, aFixupNode,
+ aOriginalNode};
+ nsINode* node = &fixupNodeDeterminer.GetFixupNodeFallBackToOriginalNode();
+
+ if (node->IsElement()) {
+ rv = mSerializer->AppendElementEnd(node->AsElement(),
+ aOriginalNode.AsElement());
+ }
+
+ return rv;
+}
+
+nsresult nsDocumentEncoder::NodeSerializer::SerializeToStringRecursive(
+ nsINode* aNode, SerializeRoot aSerializeRoot, uint32_t aMaxLength) const {
+ uint32_t outputLength{0};
+ nsresult rv = mSerializer->GetOutputLength(outputLength);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (aMaxLength > 0 && outputLength >= aMaxLength) {
+ return NS_OK;
+ }
+
+ NS_ENSURE_TRUE(aNode, NS_ERROR_NULL_POINTER);
+
+ if (IsInvisibleNodeAndShouldBeSkipped(*aNode, mFlags)) {
+ return NS_OK;
+ }
+
+ FixupNodeDeterminer fixupNodeDeterminer{mNodeFixup, nullptr, *aNode};
+ nsINode* maybeFixedNode =
+ &fixupNodeDeterminer.GetFixupNodeFallBackToOriginalNode();
+
+ if (mFlags & SkipInvisibleContent) {
+ if (aNode->IsContent()) {
+ if (nsIFrame* frame = aNode->AsContent()->GetPrimaryFrame()) {
+ if (!frame->IsSelectable(nullptr)) {
+ aSerializeRoot = SerializeRoot::eNo;
+ }
+ }
+ }
+ }
+
+ if (aSerializeRoot == SerializeRoot::eYes) {
+ int32_t endOffset = -1;
+ if (aMaxLength > 0) {
+ MOZ_ASSERT(aMaxLength >= outputLength);
+ endOffset = aMaxLength - outputLength;
+ }
+ rv = SerializeNodeStart(*aNode, 0, endOffset, maybeFixedNode);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ nsINode* node = fixupNodeDeterminer.IsSerializationOfFixupChildrenNeeded()
+ ? maybeFixedNode
+ : aNode;
+
+ for (nsINode* child = node->GetFirstChildOfTemplateOrNode(); child;
+ child = child->GetNextSibling()) {
+ rv = SerializeToStringRecursive(child, SerializeRoot::eYes, aMaxLength);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ if (aSerializeRoot == SerializeRoot::eYes) {
+ rv = SerializeNodeEnd(*aNode, maybeFixedNode);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ if (mTextStreamer) {
+ rv = mTextStreamer->FlushIfStringLongEnough();
+ }
+
+ return rv;
+}
+
+nsresult nsDocumentEncoder::NodeSerializer::SerializeToStringIterative(
+ nsINode* aNode) const {
+ nsresult rv;
+
+ nsINode* node = aNode->GetFirstChildOfTemplateOrNode();
+ while (node) {
+ nsINode* current = node;
+ rv = SerializeNodeStart(*current, 0, -1, current);
+ NS_ENSURE_SUCCESS(rv, rv);
+ node = current->GetFirstChildOfTemplateOrNode();
+ while (!node && current && current != aNode) {
+ rv = SerializeNodeEnd(*current);
+ NS_ENSURE_SUCCESS(rv, rv);
+ // Check if we have siblings.
+ node = current->GetNextSibling();
+ if (!node) {
+ // Perhaps parent node has siblings.
+ current = current->GetParentNode();
+
+ // Handle template element. If the parent is a template's content,
+ // then adjust the parent to be the template element.
+ if (current && current != aNode && current->IsDocumentFragment()) {
+ nsIContent* host = current->AsDocumentFragment()->GetHost();
+ if (host && host->IsHTMLElement(nsGkAtoms::_template)) {
+ current = host;
+ }
+ }
+ }
+ }
+ }
+
+ return NS_OK;
+}
+
+static bool IsTextNode(nsINode* aNode) { return aNode && aNode->IsText(); }
+
+nsresult nsDocumentEncoder::NodeSerializer::SerializeTextNode(
+ nsINode& aNode, int32_t aStartOffset, int32_t aEndOffset) const {
+ MOZ_ASSERT(IsTextNode(&aNode));
+
+ nsresult rv = SerializeNodeStart(aNode, aStartOffset, aEndOffset);
+ NS_ENSURE_SUCCESS(rv, rv);
+ rv = SerializeNodeEnd(aNode);
+ NS_ENSURE_SUCCESS(rv, rv);
+ return rv;
+}
+
+nsDocumentEncoder::RangeSerializer::StartAndEndContent
+nsDocumentEncoder::RangeSerializer::GetStartAndEndContentForRecursionLevel(
+ const int32_t aDepth) const {
+ StartAndEndContent result;
+
+ const auto& inclusiveAncestorsOfStart =
+ mRangeBoundariesInclusiveAncestorsAndOffsets.mInclusiveAncestorsOfStart;
+ const auto& inclusiveAncestorsOfEnd =
+ mRangeBoundariesInclusiveAncestorsAndOffsets.mInclusiveAncestorsOfEnd;
+ int32_t start = mStartRootIndex - aDepth;
+ if (start >= 0 && (uint32_t)start <= inclusiveAncestorsOfStart.Length()) {
+ result.mStart = inclusiveAncestorsOfStart[start];
+ }
+
+ int32_t end = mEndRootIndex - aDepth;
+ if (end >= 0 && (uint32_t)end <= inclusiveAncestorsOfEnd.Length()) {
+ result.mEnd = inclusiveAncestorsOfEnd[end];
+ }
+
+ return result;
+}
+
+nsresult nsDocumentEncoder::RangeSerializer::SerializeTextNode(
+ nsINode& aNode, const nsIContent& aContent,
+ const StartAndEndContent& aStartAndEndContent,
+ const nsRange& aRange) const {
+ const int32_t startOffset =
+ (aStartAndEndContent.mStart == &aContent) ? aRange.StartOffset() : 0;
+ const int32_t endOffset =
+ (aStartAndEndContent.mEnd == &aContent) ? aRange.EndOffset() : -1;
+ return mNodeSerializer.SerializeTextNode(aNode, startOffset, endOffset);
+}
+
+nsresult nsDocumentEncoder::RangeSerializer::SerializeRangeNodes(
+ const nsRange* const aRange, nsINode* const aNode, const int32_t aDepth) {
+ MOZ_ASSERT(aDepth >= 0);
+ MOZ_ASSERT(aRange);
+
+ nsCOMPtr<nsIContent> content = nsIContent::FromNodeOrNull(aNode);
+ NS_ENSURE_TRUE(content, NS_ERROR_FAILURE);
+
+ if (nsDocumentEncoder::IsInvisibleNodeAndShouldBeSkipped(*aNode, mFlags)) {
+ return NS_OK;
+ }
+
+ nsresult rv = NS_OK;
+
+ StartAndEndContent startAndEndContent =
+ GetStartAndEndContentForRecursionLevel(aDepth);
+
+ if (startAndEndContent.mStart != content &&
+ startAndEndContent.mEnd != content) {
+ // node is completely contained in range. Serialize the whole subtree
+ // rooted by this node.
+ rv = mNodeSerializer.SerializeToStringRecursive(
+ aNode, NodeSerializer::SerializeRoot::eYes);
+ NS_ENSURE_SUCCESS(rv, rv);
+ } else {
+ rv = SerializeNodePartiallyContainedInRange(
+ *aNode, *content, startAndEndContent, *aRange, aDepth);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+ }
+ return NS_OK;
+}
+
+nsresult
+nsDocumentEncoder::RangeSerializer::SerializeNodePartiallyContainedInRange(
+ nsINode& aNode, nsIContent& aContent,
+ const StartAndEndContent& aStartAndEndContent, const nsRange& aRange,
+ const int32_t aDepth) {
+ // due to implementation it is impossible for text node to be both start and
+ // end of range. We would have handled that case without getting here.
+ // XXXsmaug What does this all mean?
+ if (IsTextNode(&aNode)) {
+ nsresult rv =
+ SerializeTextNode(aNode, aContent, aStartAndEndContent, aRange);
+ NS_ENSURE_SUCCESS(rv, rv);
+ } else {
+ if (&aNode != mClosestCommonInclusiveAncestorOfRange) {
+ if (mRangeContextSerializer.mRangeNodeContext.IncludeInContext(aNode)) {
+ // halt the incrementing of mContextInfoDepth. This
+ // is so paste client will include this node in paste.
+ mHaltRangeHint = true;
+ }
+ if ((aStartAndEndContent.mStart == &aContent) && !mHaltRangeHint) {
+ ++mContextInfoDepth.mStart;
+ }
+ if ((aStartAndEndContent.mEnd == &aContent) && !mHaltRangeHint) {
+ ++mContextInfoDepth.mEnd;
+ }
+
+ // serialize the start of this node
+ nsresult rv = mNodeSerializer.SerializeNodeStart(aNode, 0, -1);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ const auto& inclusiveAncestorsOffsetsOfStart =
+ mRangeBoundariesInclusiveAncestorsAndOffsets
+ .mInclusiveAncestorsOffsetsOfStart;
+ const auto& inclusiveAncestorsOffsetsOfEnd =
+ mRangeBoundariesInclusiveAncestorsAndOffsets
+ .mInclusiveAncestorsOffsetsOfEnd;
+ // do some calculations that will tell us which children of this
+ // node are in the range.
+ Maybe<uint32_t> startOffset = Some(0);
+ Maybe<uint32_t> endOffset;
+ if (aStartAndEndContent.mStart == &aContent && mStartRootIndex >= aDepth) {
+ startOffset = inclusiveAncestorsOffsetsOfStart[mStartRootIndex - aDepth];
+ }
+ if (aStartAndEndContent.mEnd == &aContent && mEndRootIndex >= aDepth) {
+ endOffset = inclusiveAncestorsOffsetsOfEnd[mEndRootIndex - aDepth];
+ }
+ // generated aContent will cause offset values of Nothing to be returned.
+ if (startOffset.isNothing()) {
+ startOffset = Some(0);
+ }
+ if (endOffset.isNothing()) {
+ endOffset = Some(aContent.GetChildCount());
+ } else {
+ // if we are at the "tip" of the selection, endOffset is fine.
+ // otherwise, we need to add one. This is because of the semantics
+ // of the offset list created by GetInclusiveAncestorsAndOffsets(). The
+ // intermediate points on the list use the endOffset of the
+ // location of the ancestor, rather than just past it. So we need
+ // to add one here in order to include it in the children we serialize.
+ if (&aNode != aRange.GetEndContainer()) {
+ MOZ_ASSERT(*endOffset != UINT32_MAX);
+ endOffset.ref()++;
+ }
+ }
+
+ if (*endOffset) {
+ nsresult rv = SerializeChildrenOfContent(aContent, *startOffset,
+ *endOffset, &aRange, aDepth);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+ // serialize the end of this node
+ if (&aNode != mClosestCommonInclusiveAncestorOfRange) {
+ nsresult rv = mNodeSerializer.SerializeNodeEnd(aNode);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+ }
+
+ return NS_OK;
+}
+
+nsresult nsDocumentEncoder::RangeSerializer::SerializeChildrenOfContent(
+ nsIContent& aContent, uint32_t aStartOffset, uint32_t aEndOffset,
+ const nsRange* aRange, int32_t aDepth) {
+ // serialize the children of this node that are in the range
+ nsIContent* childAsNode = aContent.GetFirstChild();
+ uint32_t j = 0;
+
+ for (; j < aStartOffset && childAsNode; ++j) {
+ childAsNode = childAsNode->GetNextSibling();
+ }
+
+ MOZ_ASSERT(j == aStartOffset);
+
+ for (; childAsNode && j < aEndOffset; ++j) {
+ nsresult rv{NS_OK};
+ if ((j == aStartOffset) || (j == aEndOffset - 1)) {
+ rv = SerializeRangeNodes(aRange, childAsNode, aDepth + 1);
+ } else {
+ rv = mNodeSerializer.SerializeToStringRecursive(
+ childAsNode, NodeSerializer::SerializeRoot::eYes);
+ }
+
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ childAsNode = childAsNode->GetNextSibling();
+ }
+
+ return NS_OK;
+}
+
+nsresult nsDocumentEncoder::RangeContextSerializer::SerializeRangeContextStart(
+ const nsTArray<nsINode*>& aAncestorArray) {
+ if (mDisableContextSerialize) {
+ return NS_OK;
+ }
+
+ AutoTArray<nsINode*, 8>* serializedContext = mRangeContexts.AppendElement();
+
+ int32_t i = aAncestorArray.Length(), j;
+ nsresult rv = NS_OK;
+
+ // currently only for table-related elements; see Bug 137450
+ j = mRangeNodeContext.GetImmediateContextCount(aAncestorArray);
+
+ while (i > 0) {
+ nsINode* node = aAncestorArray.ElementAt(--i);
+ if (!node) break;
+
+ // Either a general inclusion or as immediate context
+ if (mRangeNodeContext.IncludeInContext(*node) || i < j) {
+ rv = mNodeSerializer.SerializeNodeStart(*node, 0, -1);
+ serializedContext->AppendElement(node);
+ if (NS_FAILED(rv)) break;
+ }
+ }
+
+ return rv;
+}
+
+nsresult nsDocumentEncoder::RangeContextSerializer::SerializeRangeContextEnd() {
+ if (mDisableContextSerialize) {
+ return NS_OK;
+ }
+
+ MOZ_RELEASE_ASSERT(!mRangeContexts.IsEmpty(),
+ "Tried to end context without starting one.");
+ AutoTArray<nsINode*, 8>& serializedContext = mRangeContexts.LastElement();
+
+ nsresult rv = NS_OK;
+ for (nsINode* node : Reversed(serializedContext)) {
+ rv = mNodeSerializer.SerializeNodeEnd(*node);
+
+ if (NS_FAILED(rv)) break;
+ }
+
+ mRangeContexts.RemoveLastElement();
+ return rv;
+}
+
+bool nsDocumentEncoder::RangeSerializer::HasInvisibleParentAndShouldBeSkipped(
+ nsINode& aNode) const {
+ if (!(mFlags & SkipInvisibleContent)) {
+ return false;
+ }
+
+ // Check that the parent is visible if we don't a frame.
+ // IsInvisibleNodeAndShouldBeSkipped() will do it when there's a frame.
+ nsCOMPtr<nsIContent> content = nsIContent::FromNode(aNode);
+ if (content && !content->GetPrimaryFrame()) {
+ nsIContent* parent = content->GetParent();
+ return !parent || IsInvisibleNodeAndShouldBeSkipped(*parent, mFlags);
+ }
+
+ return false;
+}
+
+nsresult nsDocumentEncoder::RangeSerializer::SerializeRangeToString(
+ const nsRange* aRange) {
+ if (!aRange || aRange->Collapsed()) return NS_OK;
+
+ mClosestCommonInclusiveAncestorOfRange =
+ aRange->GetClosestCommonInclusiveAncestor();
+
+ if (!mClosestCommonInclusiveAncestorOfRange) {
+ return NS_OK;
+ }
+
+ nsINode* startContainer = aRange->GetStartContainer();
+ NS_ENSURE_TRUE(startContainer, NS_ERROR_FAILURE);
+ int32_t startOffset = aRange->StartOffset();
+
+ nsINode* endContainer = aRange->GetEndContainer();
+ NS_ENSURE_TRUE(endContainer, NS_ERROR_FAILURE);
+ int32_t endOffset = aRange->EndOffset();
+
+ mContextInfoDepth = {};
+ mCommonInclusiveAncestors.Clear();
+
+ mRangeBoundariesInclusiveAncestorsAndOffsets = {};
+ auto& inclusiveAncestorsOfStart =
+ mRangeBoundariesInclusiveAncestorsAndOffsets.mInclusiveAncestorsOfStart;
+ auto& inclusiveAncestorsOffsetsOfStart =
+ mRangeBoundariesInclusiveAncestorsAndOffsets
+ .mInclusiveAncestorsOffsetsOfStart;
+ auto& inclusiveAncestorsOfEnd =
+ mRangeBoundariesInclusiveAncestorsAndOffsets.mInclusiveAncestorsOfEnd;
+ auto& inclusiveAncestorsOffsetsOfEnd =
+ mRangeBoundariesInclusiveAncestorsAndOffsets
+ .mInclusiveAncestorsOffsetsOfEnd;
+
+ nsContentUtils::GetInclusiveAncestors(mClosestCommonInclusiveAncestorOfRange,
+ mCommonInclusiveAncestors);
+ nsContentUtils::GetInclusiveAncestorsAndOffsets(
+ startContainer, startOffset, &inclusiveAncestorsOfStart,
+ &inclusiveAncestorsOffsetsOfStart);
+ nsContentUtils::GetInclusiveAncestorsAndOffsets(
+ endContainer, endOffset, &inclusiveAncestorsOfEnd,
+ &inclusiveAncestorsOffsetsOfEnd);
+
+ nsCOMPtr<nsIContent> commonContent =
+ nsIContent::FromNodeOrNull(mClosestCommonInclusiveAncestorOfRange);
+ mStartRootIndex = inclusiveAncestorsOfStart.IndexOf(commonContent);
+ mEndRootIndex = inclusiveAncestorsOfEnd.IndexOf(commonContent);
+
+ nsresult rv = NS_OK;
+
+ rv = mRangeContextSerializer.SerializeRangeContextStart(
+ mCommonInclusiveAncestors);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (startContainer == endContainer && IsTextNode(startContainer)) {
+ if (HasInvisibleParentAndShouldBeSkipped(*startContainer)) {
+ return NS_OK;
+ }
+ rv = mNodeSerializer.SerializeTextNode(*startContainer, startOffset,
+ endOffset);
+ NS_ENSURE_SUCCESS(rv, rv);
+ } else {
+ rv = SerializeRangeNodes(aRange, mClosestCommonInclusiveAncestorOfRange, 0);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+ rv = mRangeContextSerializer.SerializeRangeContextEnd();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return rv;
+}
+
+void nsDocumentEncoder::ReleaseDocumentReferenceAndInitialize(
+ bool aClearCachedSerializer) {
+ mDocument = nullptr;
+
+ Initialize(aClearCachedSerializer);
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::EncodeToString(nsAString& aOutputString) {
+ return EncodeToStringWithMaxLength(0, aOutputString);
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::EncodeToStringWithMaxLength(uint32_t aMaxLength,
+ nsAString& aOutputString) {
+ MOZ_ASSERT(mRangeContextSerializer.mRangeContexts.IsEmpty(),
+ "Re-entrant call to nsDocumentEncoder.");
+ auto rangeContextGuard =
+ MakeScopeExit([&] { mRangeContextSerializer.mRangeContexts.Clear(); });
+
+ if (!mDocument) return NS_ERROR_NOT_INITIALIZED;
+
+ AutoReleaseDocumentIfNeeded autoReleaseDocument(this);
+
+ aOutputString.Truncate();
+
+ nsString output;
+ static const size_t kStringBufferSizeInBytes = 2048;
+ if (!mCachedBuffer) {
+ mCachedBuffer = nsStringBuffer::Alloc(kStringBufferSizeInBytes).take();
+ if (NS_WARN_IF(!mCachedBuffer)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+ NS_ASSERTION(
+ !mCachedBuffer->IsReadonly(),
+ "nsIDocumentEncoder shouldn't keep reference to non-readonly buffer!");
+ static_cast<char16_t*>(mCachedBuffer->Data())[0] = char16_t(0);
+ mCachedBuffer->ToString(0, output, true);
+ // output owns the buffer now!
+ mCachedBuffer = nullptr;
+
+ if (!mSerializer) {
+ nsAutoCString progId(NS_CONTENTSERIALIZER_CONTRACTID_PREFIX);
+ AppendUTF16toUTF8(mMimeType, progId);
+
+ mSerializer = do_CreateInstance(progId.get());
+ NS_ENSURE_TRUE(mSerializer, NS_ERROR_NOT_IMPLEMENTED);
+ }
+
+ nsresult rv = NS_OK;
+
+ bool rewriteEncodingDeclaration =
+ !mEncodingScope.IsLimited() &&
+ !(mFlags & OutputDontRewriteEncodingDeclaration);
+ mSerializer->Init(mFlags, mWrapColumn, mEncoding, mIsCopying,
+ rewriteEncodingDeclaration, &mNeedsPreformatScanning,
+ output);
+
+ rv = SerializeDependingOnScope(aMaxLength);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = mSerializer->FlushAndFinish();
+
+ mCachedBuffer = nsStringBuffer::FromString(output);
+ // We have to be careful how we set aOutputString, because we don't
+ // want it to end up sharing mCachedBuffer if we plan to reuse it.
+ bool setOutput = false;
+ // Try to cache the buffer.
+ if (mCachedBuffer) {
+ if ((mCachedBuffer->StorageSize() == kStringBufferSizeInBytes) &&
+ !mCachedBuffer->IsReadonly()) {
+ mCachedBuffer->AddRef();
+ } else {
+ if (NS_SUCCEEDED(rv)) {
+ mCachedBuffer->ToString(output.Length(), aOutputString);
+ setOutput = true;
+ }
+ mCachedBuffer = nullptr;
+ }
+ }
+
+ if (!setOutput && NS_SUCCEEDED(rv)) {
+ aOutputString.Append(output.get(), output.Length());
+ }
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::EncodeToStream(nsIOutputStream* aStream) {
+ MOZ_ASSERT(mRangeContextSerializer.mRangeContexts.IsEmpty(),
+ "Re-entrant call to nsDocumentEncoder.");
+ auto rangeContextGuard =
+ MakeScopeExit([&] { mRangeContextSerializer.mRangeContexts.Clear(); });
+ NS_ENSURE_ARG_POINTER(aStream);
+
+ nsresult rv = NS_OK;
+
+ if (!mDocument) return NS_ERROR_NOT_INITIALIZED;
+
+ if (!mEncoding) {
+ return NS_ERROR_UCONV_NOCONV;
+ }
+
+ nsAutoString buf;
+ const bool isPlainText = mMimeType.LowerCaseEqualsLiteral(kTextMime);
+ mTextStreamer.emplace(*aStream, mEncoding->NewEncoder(), isPlainText, buf);
+
+ rv = EncodeToString(buf);
+
+ // Force a flush of the last chunk of data.
+ rv = mTextStreamer->ForceFlush();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ mTextStreamer.reset();
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::EncodeToStringWithContext(nsAString& aContextString,
+ nsAString& aInfoString,
+ nsAString& aEncodedString) {
+ return NS_ERROR_NOT_IMPLEMENTED;
+}
+
+NS_IMETHODIMP
+nsDocumentEncoder::SetNodeFixup(nsIDocumentEncoderNodeFixup* aFixup) {
+ mNodeFixup = aFixup;
+ return NS_OK;
+}
+
+bool do_getDocumentTypeSupportedForEncoding(const char* aContentType) {
+ if (!nsCRT::strcmp(aContentType, "text/xml") ||
+ !nsCRT::strcmp(aContentType, "application/xml") ||
+ !nsCRT::strcmp(aContentType, "application/xhtml+xml") ||
+ !nsCRT::strcmp(aContentType, "image/svg+xml") ||
+ !nsCRT::strcmp(aContentType, "text/html") ||
+ !nsCRT::strcmp(aContentType, "text/plain")) {
+ return true;
+ }
+ return false;
+}
+
+already_AddRefed<nsIDocumentEncoder> do_createDocumentEncoder(
+ const char* aContentType) {
+ if (do_getDocumentTypeSupportedForEncoding(aContentType)) {
+ return do_AddRef(new nsDocumentEncoder);
+ }
+ return nullptr;
+}
+
+class nsHTMLCopyEncoder : public nsDocumentEncoder {
+ private:
+ class RangeNodeContext final : public nsDocumentEncoder::RangeNodeContext {
+ bool IncludeInContext(nsINode& aNode) const final;
+
+ int32_t GetImmediateContextCount(
+ const nsTArray<nsINode*>& aAncestorArray) const final;
+ };
+
+ public:
+ nsHTMLCopyEncoder();
+ ~nsHTMLCopyEncoder();
+
+ NS_IMETHOD Init(Document* aDocument, const nsAString& aMimeType,
+ uint32_t aFlags) override;
+
+ // overridden methods from nsDocumentEncoder
+ MOZ_CAN_RUN_SCRIPT_BOUNDARY
+ NS_IMETHOD SetSelection(Selection* aSelection) override;
+ NS_IMETHOD EncodeToStringWithContext(nsAString& aContextString,
+ nsAString& aInfoString,
+ nsAString& aEncodedString) override;
+ NS_IMETHOD EncodeToString(nsAString& aOutputString) override;
+
+ protected:
+ enum Endpoint { kStart, kEnd };
+
+ nsresult PromoteRange(nsRange* inRange);
+ nsresult PromoteAncestorChain(nsCOMPtr<nsINode>* ioNode,
+ int32_t* ioStartOffset, int32_t* ioEndOffset);
+ nsresult GetPromotedPoint(Endpoint aWhere, nsINode* aNode, int32_t aOffset,
+ nsCOMPtr<nsINode>* outNode, int32_t* outOffset,
+ nsINode* aCommon);
+ static nsCOMPtr<nsINode> GetChildAt(nsINode* aParent, int32_t aOffset);
+ static bool IsMozBR(Element* aNode);
+ static nsresult GetNodeLocation(nsINode* inChild,
+ nsCOMPtr<nsINode>* outParent,
+ int32_t* outOffset);
+ bool IsRoot(nsINode* aNode);
+ static bool IsFirstNode(nsINode* aNode);
+ static bool IsLastNode(nsINode* aNode);
+
+ bool mIsTextWidget;
+};
+
+nsHTMLCopyEncoder::nsHTMLCopyEncoder()
+ : nsDocumentEncoder{MakeUnique<nsHTMLCopyEncoder::RangeNodeContext>()} {
+ mIsTextWidget = false;
+}
+
+nsHTMLCopyEncoder::~nsHTMLCopyEncoder() = default;
+
+NS_IMETHODIMP
+nsHTMLCopyEncoder::Init(Document* aDocument, const nsAString& aMimeType,
+ uint32_t aFlags) {
+ if (!aDocument) return NS_ERROR_INVALID_ARG;
+
+ mIsTextWidget = false;
+ Initialize();
+
+ mIsCopying = true;
+ mDocument = aDocument;
+
+ // Hack, hack! Traditionally, the caller passes text/plain, which is
+ // treated as "guess text/html or text/plain" in this context. (It has a
+ // different meaning in other contexts. Sigh.) From now on, "text/plain"
+ // means forcing text/plain instead of guessing.
+ if (aMimeType.EqualsLiteral("text/plain")) {
+ mMimeType.AssignLiteral("text/plain");
+ } else {
+ mMimeType.AssignLiteral("text/html");
+ }
+
+ // Make all links absolute when copying
+ // (see related bugs #57296, #41924, #58646, #32768)
+ mFlags = aFlags | OutputAbsoluteLinks;
+
+ if (!mDocument->IsScriptEnabled()) mFlags |= OutputNoScriptContent;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsHTMLCopyEncoder::SetSelection(Selection* aSelection) {
+ // check for text widgets: we need to recognize these so that
+ // we don't tweak the selection to be outside of the magic
+ // div that ender-lite text widgets are embedded in.
+
+ if (!aSelection) return NS_ERROR_NULL_POINTER;
+
+ const uint32_t rangeCount = aSelection->RangeCount();
+
+ // if selection is uninitialized return
+ if (!rangeCount) {
+ return NS_ERROR_FAILURE;
+ }
+
+ // we'll just use the common parent of the first range. Implicit assumption
+ // here that multi-range selections are table cell selections, in which case
+ // the common parent is somewhere in the table and we don't really care where.
+ //
+ // FIXME(emilio, bug 1455894): This assumption is already wrong, and will
+ // probably be more wrong in a Shadow DOM world...
+ //
+ // We should be able to write this as "Find the common ancestor of the
+ // selection, then go through the flattened tree and serialize the selected
+ // nodes", effectively serializing the composed tree.
+ RefPtr<nsRange> range = aSelection->GetRangeAt(0);
+ nsINode* commonParent = range->GetClosestCommonInclusiveAncestor();
+
+ for (nsCOMPtr<nsIContent> selContent(
+ nsIContent::FromNodeOrNull(commonParent));
+ selContent; selContent = selContent->GetParent()) {
+ // checking for selection inside a plaintext form widget
+ if (selContent->IsAnyOfHTMLElements(nsGkAtoms::input,
+ nsGkAtoms::textarea)) {
+ mIsTextWidget = true;
+ break;
+ }
+ }
+
+ // normalize selection if we are not in a widget
+ if (mIsTextWidget) {
+ mEncodingScope.mSelection = aSelection;
+ mMimeType.AssignLiteral("text/plain");
+ return NS_OK;
+ }
+
+ // XXX We should try to get rid of the Selection object here.
+ // XXX bug 1245883
+
+ // also consider ourselves in a text widget if we can't find an html document
+ if (!(mDocument && mDocument->IsHTMLDocument())) {
+ mIsTextWidget = true;
+ mEncodingScope.mSelection = aSelection;
+ // mMimeType is set to text/plain when encoding starts.
+ return NS_OK;
+ }
+
+ // there's no Clone() for selection! fix...
+ // nsresult rv = aSelection->Clone(getter_AddRefs(mSelection);
+ // NS_ENSURE_SUCCESS(rv, rv);
+ mEncodingScope.mSelection = new Selection(SelectionType::eNormal, nullptr);
+
+ // loop thru the ranges in the selection
+ for (const uint32_t rangeIdx : IntegerRange(rangeCount)) {
+ MOZ_ASSERT(aSelection->RangeCount() == rangeCount);
+ range = aSelection->GetRangeAt(rangeIdx);
+ NS_ENSURE_TRUE(range, NS_ERROR_FAILURE);
+ RefPtr<nsRange> myRange = range->CloneRange();
+ MOZ_ASSERT(myRange);
+
+ // adjust range to include any ancestors who's children are entirely
+ // selected
+ nsresult rv = PromoteRange(myRange);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ ErrorResult result;
+ RefPtr<Selection> selection(mEncodingScope.mSelection);
+ RefPtr<Document> document(mDocument);
+ selection->AddRangeAndSelectFramesAndNotifyListenersInternal(
+ *myRange, document, result);
+ rv = result.StealNSResult();
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsHTMLCopyEncoder::EncodeToString(nsAString& aOutputString) {
+ if (mIsTextWidget) {
+ mMimeType.AssignLiteral("text/plain");
+ }
+ return nsDocumentEncoder::EncodeToString(aOutputString);
+}
+
+NS_IMETHODIMP
+nsHTMLCopyEncoder::EncodeToStringWithContext(nsAString& aContextString,
+ nsAString& aInfoString,
+ nsAString& aEncodedString) {
+ nsresult rv = EncodeToString(aEncodedString);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // do not encode any context info or range hints if we are in a text widget.
+ if (mIsTextWidget) return NS_OK;
+
+ // now encode common ancestors into aContextString. Note that the common
+ // ancestors will be for the last range in the selection in the case of
+ // multirange selections. encoding ancestors every range in a multirange
+ // selection in a way that could be understood by the paste code would be a
+ // lot more work to do. As a practical matter, selections are single range,
+ // and the ones that aren't are table cell selections where all the cells are
+ // in the same table.
+
+ mSerializer->Init(mFlags, mWrapColumn, mEncoding, mIsCopying, false,
+ &mNeedsPreformatScanning, aContextString);
+
+ // leaf of ancestors might be text node. If so discard it.
+ int32_t count = mRangeSerializer.mCommonInclusiveAncestors.Length();
+ int32_t i;
+ nsCOMPtr<nsINode> node;
+ if (count > 0) {
+ node = mRangeSerializer.mCommonInclusiveAncestors.ElementAt(0);
+ }
+
+ if (node && IsTextNode(node)) {
+ mRangeSerializer.mCommonInclusiveAncestors.RemoveElementAt(0);
+ if (mRangeSerializer.mContextInfoDepth.mStart) {
+ --mRangeSerializer.mContextInfoDepth.mStart;
+ }
+ if (mRangeSerializer.mContextInfoDepth.mEnd) {
+ --mRangeSerializer.mContextInfoDepth.mEnd;
+ }
+ count--;
+ }
+
+ i = count;
+ while (i > 0) {
+ node = mRangeSerializer.mCommonInclusiveAncestors.ElementAt(--i);
+ rv = mNodeSerializer.SerializeNodeStart(*node, 0, -1);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+ // i = 0; guaranteed by above
+ while (i < count) {
+ node = mRangeSerializer.mCommonInclusiveAncestors.ElementAt(i++);
+ rv = mNodeSerializer.SerializeNodeEnd(*node);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ mSerializer->Finish();
+
+ // encode range info : the start and end depth of the selection, where the
+ // depth is distance down in the parent hierarchy. Later we will need to add
+ // leading/trailing whitespace info to this.
+ nsAutoString infoString;
+ infoString.AppendInt(mRangeSerializer.mContextInfoDepth.mStart);
+ infoString.Append(char16_t(','));
+ infoString.AppendInt(mRangeSerializer.mContextInfoDepth.mEnd);
+ aInfoString = infoString;
+
+ return rv;
+}
+
+bool nsHTMLCopyEncoder::RangeNodeContext::IncludeInContext(
+ nsINode& aNode) const {
+ nsCOMPtr<nsIContent> content(nsIContent::FromNodeOrNull(&aNode));
+
+ if (!content) return false;
+
+ return content->IsAnyOfHTMLElements(
+ nsGkAtoms::b, nsGkAtoms::i, nsGkAtoms::u, nsGkAtoms::a, nsGkAtoms::tt,
+ nsGkAtoms::s, nsGkAtoms::big, nsGkAtoms::small, nsGkAtoms::strike,
+ nsGkAtoms::em, nsGkAtoms::strong, nsGkAtoms::dfn, nsGkAtoms::code,
+ nsGkAtoms::cite, nsGkAtoms::var, nsGkAtoms::abbr, nsGkAtoms::font,
+ nsGkAtoms::script, nsGkAtoms::span, nsGkAtoms::pre, nsGkAtoms::h1,
+ nsGkAtoms::h2, nsGkAtoms::h3, nsGkAtoms::h4, nsGkAtoms::h5,
+ nsGkAtoms::h6);
+}
+
+nsresult nsHTMLCopyEncoder::PromoteRange(nsRange* inRange) {
+ if (!inRange->IsPositioned()) {
+ return NS_ERROR_UNEXPECTED;
+ }
+ nsCOMPtr<nsINode> startNode = inRange->GetStartContainer();
+ uint32_t startOffset = inRange->StartOffset();
+ nsCOMPtr<nsINode> endNode = inRange->GetEndContainer();
+ uint32_t endOffset = inRange->EndOffset();
+ nsCOMPtr<nsINode> common = inRange->GetClosestCommonInclusiveAncestor();
+
+ nsCOMPtr<nsINode> opStartNode;
+ nsCOMPtr<nsINode> opEndNode;
+ int32_t opStartOffset, opEndOffset;
+
+ // examine range endpoints.
+ nsresult rv =
+ GetPromotedPoint(kStart, startNode, static_cast<int32_t>(startOffset),
+ address_of(opStartNode), &opStartOffset, common);
+ NS_ENSURE_SUCCESS(rv, rv);
+ rv = GetPromotedPoint(kEnd, endNode, static_cast<int32_t>(endOffset),
+ address_of(opEndNode), &opEndOffset, common);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // if both range endpoints are at the common ancestor, check for possible
+ // inclusion of ancestors
+ if (opStartNode == common && opEndNode == common) {
+ rv = PromoteAncestorChain(address_of(opStartNode), &opStartOffset,
+ &opEndOffset);
+ NS_ENSURE_SUCCESS(rv, rv);
+ opEndNode = opStartNode;
+ }
+
+ // set the range to the new values
+ ErrorResult err;
+ inRange->SetStart(*opStartNode, static_cast<uint32_t>(opStartOffset), err);
+ if (NS_WARN_IF(err.Failed())) {
+ return err.StealNSResult();
+ }
+ inRange->SetEnd(*opEndNode, static_cast<uint32_t>(opEndOffset), err);
+ if (NS_WARN_IF(err.Failed())) {
+ return err.StealNSResult();
+ }
+ return NS_OK;
+}
+
+// PromoteAncestorChain will promote a range represented by
+// [{*ioNode,*ioStartOffset} , {*ioNode,*ioEndOffset}] The promotion is
+// different from that found in getPromotedPoint: it will only promote one
+// endpoint if it can promote the other. Thus, instead of having a
+// startnode/endNode, there is just the one ioNode.
+nsresult nsHTMLCopyEncoder::PromoteAncestorChain(nsCOMPtr<nsINode>* ioNode,
+ int32_t* ioStartOffset,
+ int32_t* ioEndOffset) {
+ if (!ioNode || !ioStartOffset || !ioEndOffset) return NS_ERROR_NULL_POINTER;
+
+ nsresult rv = NS_OK;
+ bool done = false;
+
+ nsCOMPtr<nsINode> frontNode, endNode, parent;
+ int32_t frontOffset, endOffset;
+
+ // save the editable state of the ioNode, so we don't promote an ancestor if
+ // it has different editable state
+ nsCOMPtr<nsINode> node = *ioNode;
+ bool isEditable = node->IsEditable();
+
+ // loop for as long as we can promote both endpoints
+ while (!done) {
+ node = *ioNode;
+ parent = node->GetParentNode();
+ if (!parent) {
+ done = true;
+ } else {
+ // passing parent as last param to GetPromotedPoint() allows it to promote
+ // only one level up the hierarchy.
+ rv = GetPromotedPoint(kStart, *ioNode, *ioStartOffset,
+ address_of(frontNode), &frontOffset, parent);
+ NS_ENSURE_SUCCESS(rv, rv);
+ // then we make the same attempt with the endpoint
+ rv = GetPromotedPoint(kEnd, *ioNode, *ioEndOffset, address_of(endNode),
+ &endOffset, parent);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // if both endpoints were promoted one level and isEditable is the same as
+ // the original node, keep looping - otherwise we are done.
+ if ((frontNode != parent) || (endNode != parent) ||
+ (frontNode->IsEditable() != isEditable))
+ done = true;
+ else {
+ *ioNode = frontNode;
+ *ioStartOffset = frontOffset;
+ *ioEndOffset = endOffset;
+ }
+ }
+ }
+ return rv;
+}
+
+nsresult nsHTMLCopyEncoder::GetPromotedPoint(Endpoint aWhere, nsINode* aNode,
+ int32_t aOffset,
+ nsCOMPtr<nsINode>* outNode,
+ int32_t* outOffset,
+ nsINode* common) {
+ nsresult rv = NS_OK;
+ nsCOMPtr<nsINode> node = aNode;
+ nsCOMPtr<nsINode> parent = aNode;
+ int32_t offset = aOffset;
+ bool bResetPromotion = false;
+
+ // default values
+ *outNode = node;
+ *outOffset = offset;
+
+ if (common == node) return NS_OK;
+
+ if (aWhere == kStart) {
+ // some special casing for text nodes
+ if (auto nodeAsText = aNode->GetAsText()) {
+ // if not at beginning of text node, we are done
+ if (offset > 0) {
+ // unless everything before us in just whitespace. NOTE: we need a more
+ // general solution that truly detects all cases of non-significant
+ // whitesace with no false alarms.
+ nsAutoString text;
+ nodeAsText->SubstringData(0, offset, text, IgnoreErrors());
+ text.CompressWhitespace();
+ if (!text.IsEmpty()) return NS_OK;
+ bResetPromotion = true;
+ }
+ // else
+ rv = GetNodeLocation(aNode, address_of(parent), &offset);
+ NS_ENSURE_SUCCESS(rv, rv);
+ } else {
+ node = GetChildAt(parent, offset);
+ }
+ if (!node) node = parent;
+
+ // finding the real start for this point. look up the tree for as long as
+ // we are the first node in the container, and as long as we haven't hit the
+ // body node.
+ if (!IsRoot(node) && (parent != common)) {
+ rv = GetNodeLocation(node, address_of(parent), &offset);
+ NS_ENSURE_SUCCESS(rv, rv);
+ if (offset == -1) return NS_OK; // we hit generated content; STOP
+ while ((IsFirstNode(node)) && (!IsRoot(parent)) && (parent != common)) {
+ if (bResetPromotion) {
+ nsCOMPtr<nsIContent> content = nsIContent::FromNodeOrNull(parent);
+ if (content && content->IsHTMLElement()) {
+ if (nsHTMLElement::IsBlock(
+ nsHTMLTags::AtomTagToId(content->NodeInfo()->NameAtom()))) {
+ bResetPromotion = false;
+ }
+ }
+ }
+
+ node = parent;
+ rv = GetNodeLocation(node, address_of(parent), &offset);
+ NS_ENSURE_SUCCESS(rv, rv);
+ if (offset == -1) // we hit generated content; STOP
+ {
+ // back up a bit
+ parent = node;
+ offset = 0;
+ break;
+ }
+ }
+ if (bResetPromotion) {
+ *outNode = aNode;
+ *outOffset = aOffset;
+ } else {
+ *outNode = parent;
+ *outOffset = offset;
+ }
+ return rv;
+ }
+ }
+
+ if (aWhere == kEnd) {
+ // some special casing for text nodes
+ if (auto nodeAsText = aNode->GetAsText()) {
+ // if not at end of text node, we are done
+ uint32_t len = aNode->Length();
+ if (offset < (int32_t)len) {
+ // unless everything after us in just whitespace. NOTE: we need a more
+ // general solution that truly detects all cases of non-significant
+ // whitespace with no false alarms.
+ nsAutoString text;
+ nodeAsText->SubstringData(offset, len - offset, text, IgnoreErrors());
+ text.CompressWhitespace();
+ if (!text.IsEmpty()) return NS_OK;
+ bResetPromotion = true;
+ }
+ rv = GetNodeLocation(aNode, address_of(parent), &offset);
+ NS_ENSURE_SUCCESS(rv, rv);
+ } else {
+ if (offset) offset--; // we want node _before_ offset
+ node = GetChildAt(parent, offset);
+ }
+ if (!node) node = parent;
+
+ // finding the real end for this point. look up the tree for as long as we
+ // are the last node in the container, and as long as we haven't hit the
+ // body node.
+ if (!IsRoot(node) && (parent != common)) {
+ rv = GetNodeLocation(node, address_of(parent), &offset);
+ NS_ENSURE_SUCCESS(rv, rv);
+ if (offset == -1) return NS_OK; // we hit generated content; STOP
+ while ((IsLastNode(node)) && (!IsRoot(parent)) && (parent != common)) {
+ if (bResetPromotion) {
+ nsCOMPtr<nsIContent> content = nsIContent::FromNodeOrNull(parent);
+ if (content && content->IsHTMLElement()) {
+ if (nsHTMLElement::IsBlock(
+ nsHTMLTags::AtomTagToId(content->NodeInfo()->NameAtom()))) {
+ bResetPromotion = false;
+ }
+ }
+ }
+
+ node = parent;
+ rv = GetNodeLocation(node, address_of(parent), &offset);
+ NS_ENSURE_SUCCESS(rv, rv);
+ if (offset == -1) // we hit generated content; STOP
+ {
+ // back up a bit
+ parent = node;
+ offset = 0;
+ break;
+ }
+ }
+ if (bResetPromotion) {
+ *outNode = aNode;
+ *outOffset = aOffset;
+ } else {
+ *outNode = parent;
+ offset++; // add one since this in an endpoint - want to be AFTER node.
+ *outOffset = offset;
+ }
+ return rv;
+ }
+ }
+
+ return rv;
+}
+
+nsCOMPtr<nsINode> nsHTMLCopyEncoder::GetChildAt(nsINode* aParent,
+ int32_t aOffset) {
+ nsCOMPtr<nsINode> resultNode;
+
+ if (!aParent) return resultNode;
+
+ nsCOMPtr<nsIContent> content = nsIContent::FromNodeOrNull(aParent);
+ MOZ_ASSERT(content, "null content in nsHTMLCopyEncoder::GetChildAt");
+
+ resultNode = content->GetChildAt_Deprecated(aOffset);
+
+ return resultNode;
+}
+
+bool nsHTMLCopyEncoder::IsMozBR(Element* aElement) {
+ HTMLBRElement* brElement = HTMLBRElement::FromNodeOrNull(aElement);
+ return brElement && brElement->IsPaddingForEmptyLastLine();
+}
+
+nsresult nsHTMLCopyEncoder::GetNodeLocation(nsINode* inChild,
+ nsCOMPtr<nsINode>* outParent,
+ int32_t* outOffset) {
+ NS_ASSERTION((inChild && outParent && outOffset), "bad args");
+ if (inChild && outParent && outOffset) {
+ nsCOMPtr<nsIContent> child = nsIContent::FromNodeOrNull(inChild);
+ if (!child) {
+ return NS_ERROR_NULL_POINTER;
+ }
+
+ nsIContent* parent = child->GetParent();
+ if (!parent) {
+ return NS_ERROR_NULL_POINTER;
+ }
+
+ *outParent = parent;
+ *outOffset = parent->ComputeIndexOf_Deprecated(child);
+ return NS_OK;
+ }
+ return NS_ERROR_NULL_POINTER;
+}
+
+bool nsHTMLCopyEncoder::IsRoot(nsINode* aNode) {
+ nsCOMPtr<nsIContent> content = nsIContent::FromNodeOrNull(aNode);
+ if (!content) {
+ return false;
+ }
+
+ if (mIsTextWidget) {
+ return content->IsHTMLElement(nsGkAtoms::div);
+ }
+
+ return content->IsAnyOfHTMLElements(nsGkAtoms::body, nsGkAtoms::td,
+ nsGkAtoms::th);
+}
+
+bool nsHTMLCopyEncoder::IsFirstNode(nsINode* aNode) {
+ // need to check if any nodes before us are really visible.
+ // Mike wrote something for me along these lines in nsSelectionController,
+ // but I don't think it's ready for use yet - revisit.
+ // HACK: for now, simply consider all whitespace text nodes to be
+ // invisible formatting nodes.
+ for (nsIContent* sibling = aNode->GetPreviousSibling(); sibling;
+ sibling = sibling->GetPreviousSibling()) {
+ if (!sibling->TextIsOnlyWhitespace()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool nsHTMLCopyEncoder::IsLastNode(nsINode* aNode) {
+ // need to check if any nodes after us are really visible.
+ // Mike wrote something for me along these lines in nsSelectionController,
+ // but I don't think it's ready for use yet - revisit.
+ // HACK: for now, simply consider all whitespace text nodes to be
+ // invisible formatting nodes.
+ for (nsIContent* sibling = aNode->GetNextSibling(); sibling;
+ sibling = sibling->GetNextSibling()) {
+ if (sibling->IsElement() && IsMozBR(sibling->AsElement())) {
+ // we ignore trailing moz BRs.
+ continue;
+ }
+ if (!sibling->TextIsOnlyWhitespace()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+already_AddRefed<nsIDocumentEncoder> do_createHTMLCopyEncoder() {
+ return do_AddRef(new nsHTMLCopyEncoder);
+}
+
+int32_t nsHTMLCopyEncoder::RangeNodeContext::GetImmediateContextCount(
+ const nsTArray<nsINode*>& aAncestorArray) const {
+ int32_t i = aAncestorArray.Length(), j = 0;
+ while (j < i) {
+ nsINode* node = aAncestorArray.ElementAt(j);
+ if (!node) {
+ break;
+ }
+ nsCOMPtr<nsIContent> content(nsIContent::FromNodeOrNull(node));
+ if (!content || !content->IsAnyOfHTMLElements(
+ nsGkAtoms::tr, nsGkAtoms::thead, nsGkAtoms::tbody,
+ nsGkAtoms::tfoot, nsGkAtoms::table)) {
+ break;
+ }
+ ++j;
+ }
+ return j;
+}
diff --git a/dom/serializers/nsHTMLContentSerializer.cpp b/dom/serializers/nsHTMLContentSerializer.cpp
new file mode 100644
index 0000000000..84b8f5748f
--- /dev/null
+++ b/dom/serializers/nsHTMLContentSerializer.cpp
@@ -0,0 +1,445 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
+ * string that could be parsed into more or less the original DOM.
+ */
+
+#include "nsHTMLContentSerializer.h"
+
+#include "nsIContent.h"
+#include "mozilla/dom/Document.h"
+#include "nsElementTable.h"
+#include "nsNameSpaceManager.h"
+#include "nsString.h"
+#include "nsUnicharUtils.h"
+#include "nsIDocumentEncoder.h"
+#include "nsGkAtoms.h"
+#include "nsIURI.h"
+#include "nsNetUtil.h"
+#include "nsEscape.h"
+#include "nsCRT.h"
+#include "nsContentUtils.h"
+#include "nsIScriptElement.h"
+#include "nsAttrName.h"
+#include "mozilla/dom/Element.h"
+#include "nsParserConstants.h"
+
+using namespace mozilla::dom;
+
+nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer) {
+ RefPtr<nsHTMLContentSerializer> it = new nsHTMLContentSerializer();
+ it.forget(aSerializer);
+ return NS_OK;
+}
+
+nsHTMLContentSerializer::nsHTMLContentSerializer() { mIsHTMLSerializer = true; }
+
+nsHTMLContentSerializer::~nsHTMLContentSerializer() = default;
+
+NS_IMETHODIMP
+nsHTMLContentSerializer::AppendDocumentStart(Document* aDocument) {
+ return NS_OK;
+}
+
+bool nsHTMLContentSerializer::SerializeHTMLAttributes(
+ Element* aElement, Element* aOriginalElement, nsAString& aTagPrefix,
+ const nsAString& aTagNamespaceURI, nsAtom* aTagName, int32_t aNamespace,
+ nsAString& aStr) {
+ MaybeSerializeIsValue(aElement, aStr);
+
+ int32_t count = aElement->GetAttrCount();
+ if (!count) return true;
+
+ nsresult rv;
+ nsAutoString valueStr;
+
+ for (int32_t index = 0; index < count; index++) {
+ const nsAttrName* name = aElement->GetAttrNameAt(index);
+ int32_t namespaceID = name->NamespaceID();
+ nsAtom* attrName = name->LocalName();
+
+ // Filter out any attribute starting with [-|_]moz
+ nsDependentAtomString attrNameStr(attrName);
+ if (StringBeginsWith(attrNameStr, u"_moz"_ns) ||
+ StringBeginsWith(attrNameStr, u"-moz"_ns)) {
+ continue;
+ }
+ aElement->GetAttr(namespaceID, attrName, valueStr);
+
+ if (mIsCopying && mIsFirstChildOfOL && aTagName == nsGkAtoms::li &&
+ aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::value &&
+ namespaceID == kNameSpaceID_None) {
+ // This is handled separately in SerializeLIValueAttribute()
+ continue;
+ }
+ bool isJS = IsJavaScript(aElement, attrName, namespaceID, valueStr);
+
+ if (((attrName == nsGkAtoms::href && (namespaceID == kNameSpaceID_None ||
+ namespaceID == kNameSpaceID_XLink)) ||
+ (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) {
+ // Make all links absolute when converting only the selection:
+ if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
+ // Would be nice to handle OBJECT tags, but that gets more complicated
+ // since we have to search the tag list for CODEBASE as well. For now,
+ // just leave them relative.
+ nsIURI* uri = aElement->GetBaseURI();
+ if (uri) {
+ nsAutoString absURI;
+ rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
+ if (NS_SUCCEEDED(rv)) {
+ valueStr = absURI;
+ }
+ }
+ }
+ }
+
+ if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
+ aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content &&
+ namespaceID == kNameSpaceID_None) {
+ // If we're serializing a <meta http-equiv="content-type">,
+ // use the proper value, rather than what's in the document.
+ nsAutoString header;
+ aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
+ if (header.LowerCaseEqualsLiteral("content-type")) {
+ valueStr = u"text/html; charset="_ns + NS_ConvertASCIItoUTF16(mCharset);
+ }
+ }
+
+ nsDependentAtomString nameStr(attrName);
+ nsAutoString prefix;
+ if (namespaceID == kNameSpaceID_XML) {
+ prefix.AssignLiteral(u"xml");
+ } else if (namespaceID == kNameSpaceID_XLink) {
+ prefix.AssignLiteral(u"xlink");
+ }
+
+ // Expand shorthand attribute.
+ if (aNamespace == kNameSpaceID_XHTML && namespaceID == kNameSpaceID_None &&
+ IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) {
+ valueStr = nameStr;
+ }
+ NS_ENSURE_TRUE(SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS),
+ false);
+ }
+
+ return true;
+}
+
+NS_IMETHODIMP
+nsHTMLContentSerializer::AppendElementStart(Element* aElement,
+ Element* aOriginalElement) {
+ NS_ENSURE_ARG(aElement);
+ NS_ENSURE_STATE(mOutput);
+
+ bool forceFormat = false;
+ nsresult rv = NS_OK;
+ if (!CheckElementStart(aElement, forceFormat, *mOutput, rv)) {
+ // When we go to AppendElementEnd for this element, we're going to
+ // MaybeLeaveFromPreContent(). So make sure to MaybeEnterInPreContent()
+ // now, so our PreLevel() doesn't get confused.
+ MaybeEnterInPreContent(aElement);
+ return rv;
+ }
+
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsAtom* name = aElement->NodeInfo()->NameAtom();
+ int32_t ns = aElement->GetNameSpaceID();
+
+ bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name);
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
+ if (mColPos && lineBreakBeforeOpen) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+ if (!mColPos) {
+ NS_ENSURE_TRUE(AppendIndentation(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else if (mAddSpace) {
+ bool result = AppendToString(char16_t(' '), *mOutput);
+ mAddSpace = false;
+ NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
+ }
+ } else if (mAddSpace) {
+ bool result = AppendToString(char16_t(' '), *mOutput);
+ mAddSpace = false;
+ NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+ // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode
+ // wasn't called
+ mAddNewlineForRootNode = false;
+
+ NS_ENSURE_TRUE(AppendToString(kLessThan, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+
+ NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ MaybeEnterInPreContent(aElement);
+
+ // for block elements, we increase the indentation
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel())
+ NS_ENSURE_TRUE(IncrIndentation(name), NS_ERROR_OUT_OF_MEMORY);
+
+ // Need to keep track of OL and LI elements in order to get ordinal number
+ // for the LI.
+ if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
+ // We are copying and current node is an OL;
+ // Store its start attribute value in olState->startVal.
+ nsAutoString start;
+ int32_t startAttrVal = 0;
+
+ aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
+ if (!start.IsEmpty()) {
+ nsresult rv = NS_OK;
+ startAttrVal = start.ToInteger(&rv);
+ // If OL has "start" attribute, first LI element has to start with that
+ // value Therefore subtracting 1 as all the LI elements are incrementing
+ // it before using it; In failure of ToInteger(), default StartAttrValue
+ // to 0.
+ if (NS_SUCCEEDED(rv))
+ startAttrVal--;
+ else
+ startAttrVal = 0;
+ }
+ mOLStateStack.AppendElement(olState(startAttrVal, true));
+ }
+
+ if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) {
+ mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
+ if (mIsFirstChildOfOL) {
+ // If OL is parent of this LI, serialize attributes in different manner.
+ NS_ENSURE_TRUE(SerializeLIValueAttribute(aElement, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+ }
+
+ // Even LI passed above have to go through this
+ // for serializing attributes other than "value".
+ nsAutoString dummyPrefix;
+ NS_ENSURE_TRUE(
+ SerializeHTMLAttributes(aElement, aOriginalElement, dummyPrefix, u""_ns,
+ name, ns, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ NS_ENSURE_TRUE(AppendToString(kGreaterThan, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ if (ns == kNameSpaceID_XHTML &&
+ (name == nsGkAtoms::script || name == nsGkAtoms::style ||
+ (name == nsGkAtoms::noscript &&
+ aElement->OwnerDoc()->IsScriptEnabled()) ||
+ name == nsGkAtoms::noframes)) {
+ ++mDisableEntityEncoding;
+ }
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() &&
+ LineBreakAfterOpen(ns, name)) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ NS_ENSURE_TRUE(AfterElementStart(aElement, aOriginalElement, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsHTMLContentSerializer::AppendElementEnd(Element* aElement,
+ Element* aOriginalElement) {
+ NS_ENSURE_ARG(aElement);
+ NS_ENSURE_STATE(mOutput);
+
+ nsAtom* name = aElement->NodeInfo()->NameAtom();
+ int32_t ns = aElement->GetNameSpaceID();
+
+ if (ns == kNameSpaceID_XHTML &&
+ (name == nsGkAtoms::script || name == nsGkAtoms::style ||
+ (name == nsGkAtoms::noscript &&
+ aElement->OwnerDoc()->IsScriptEnabled()) ||
+ name == nsGkAtoms::noframes)) {
+ --mDisableEntityEncoding;
+ }
+
+ bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
+ aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
+ DecrIndentation(name);
+ }
+
+ if (name == nsGkAtoms::script) {
+ nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
+
+ if (ShouldMaintainPreLevel() && script && script->IsMalformed()) {
+ // We're looking at a malformed script tag. This means that the end tag
+ // was missing in the source. Imitate that here by not serializing the end
+ // tag.
+ --PreLevel();
+ return NS_OK;
+ }
+ } else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) {
+ NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
+ /* Though at this point we must always have an state to be deleted as all
+ the OL opening tags are supposed to push an olState object to the stack*/
+ if (!mOLStateStack.IsEmpty()) {
+ mOLStateStack.RemoveLastElement();
+ }
+ }
+
+ if (ns == kNameSpaceID_XHTML) {
+ bool isContainer =
+ nsHTMLElement::IsContainer(nsHTMLTags::CaseSensitiveAtomTagToId(name));
+ if (!isContainer) {
+ // Keep this in sync with the cleanup at the end of this method.
+ MOZ_ASSERT(name != nsGkAtoms::body);
+ MaybeLeaveFromPreContent(aElement);
+ return NS_OK;
+ }
+ }
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
+ bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name);
+
+ if (mColPos && lineBreakBeforeClose) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+ if (!mColPos) {
+ NS_ENSURE_TRUE(AppendIndentation(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else if (mAddSpace) {
+ bool result = AppendToString(char16_t(' '), *mOutput);
+ mAddSpace = false;
+ NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
+ }
+ } else if (mAddSpace) {
+ bool result = AppendToString(char16_t(' '), *mOutput);
+ mAddSpace = false;
+ NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ NS_ENSURE_TRUE(AppendToString(kEndTag, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(kGreaterThan, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ // Keep this cleanup in sync with the IsContainer() early return above.
+ MaybeLeaveFromPreContent(aElement);
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() &&
+ LineBreakAfterClose(ns, name)) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ MaybeFlagNewlineForRootNode(aElement);
+ }
+
+ if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) {
+ --mInBody;
+ }
+
+ return NS_OK;
+}
+
+static const uint16_t kValNBSP = 160;
+
+#define _ 0
+
+// This table indexes into kEntityStrings[].
+const uint8_t nsHTMLContentSerializer::kEntities[] = {
+ // clang-format off
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, 2, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ 3, _, 4, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ 5
+ // clang-format on
+};
+
+// This table indexes into kEntityStrings[].
+const uint8_t nsHTMLContentSerializer::kAttrEntities[] = {
+ // clang-format off
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, 1, _, _, _, 2, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ 3, _, 4, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ 5
+ // clang-format on
+};
+
+#undef _
+
+const char* const nsHTMLContentSerializer::kEntityStrings[] = {
+ /* 0 */ nullptr,
+ /* 1 */ "&quot;",
+ /* 2 */ "&amp;",
+ /* 3 */ "&lt;",
+ /* 4 */ "&gt;",
+ /* 5 */ "&nbsp;"};
+
+bool nsHTMLContentSerializer::AppendAndTranslateEntities(
+ const nsAString& aStr, nsAString& aOutputStr) {
+ if (mBodyOnly && !mInBody) {
+ return true;
+ }
+
+ if (mDisableEntityEncoding) {
+ return aOutputStr.Append(aStr, mozilla::fallible);
+ }
+
+ if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities)) {
+ // Per the API documentation, encode &nbsp;, &amp;, &lt;, &gt;, and &quot;
+ if (mInAttribute) {
+ return nsXMLContentSerializer::AppendAndTranslateEntities<kValNBSP>(
+ aStr, aOutputStr, kAttrEntities, kEntityStrings);
+ }
+
+ return nsXMLContentSerializer::AppendAndTranslateEntities<kValNBSP>(
+ aStr, aOutputStr, kEntities, kEntityStrings);
+ }
+
+ // We don't want to call into our superclass 2-arg version of
+ // AppendAndTranslateEntities, because it wants to encode more characters
+ // than we do. Use our tables, but avoid encoding &nbsp; by passing in a
+ // smaller max index. This will only encode &amp;, &lt;, &gt;, and &quot;.
+ if (mInAttribute) {
+ return nsXMLContentSerializer::AppendAndTranslateEntities<kGTVal>(
+ aStr, aOutputStr, kAttrEntities, kEntityStrings);
+ }
+
+ return nsXMLContentSerializer::AppendAndTranslateEntities<kGTVal>(
+ aStr, aOutputStr, kEntities, kEntityStrings);
+}
diff --git a/dom/serializers/nsHTMLContentSerializer.h b/dom/serializers/nsHTMLContentSerializer.h
new file mode 100644
index 0000000000..7307eb6e3c
--- /dev/null
+++ b/dom/serializers/nsHTMLContentSerializer.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
+ * string that could be parsed into more or less the original DOM.
+ */
+
+#ifndef nsHTMLContentSerializer_h__
+#define nsHTMLContentSerializer_h__
+
+#include "mozilla/Attributes.h"
+#include "nsXHTMLContentSerializer.h"
+#include "nsString.h"
+
+class nsAtom;
+
+class nsHTMLContentSerializer final : public nsXHTMLContentSerializer {
+ public:
+ nsHTMLContentSerializer();
+ virtual ~nsHTMLContentSerializer();
+
+ NS_IMETHOD AppendElementStart(
+ mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) override;
+
+ NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) override;
+
+ NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) override;
+
+ protected:
+ [[nodiscard]] virtual bool SerializeHTMLAttributes(
+ mozilla::dom::Element* aContent, mozilla::dom::Element* aOriginalElement,
+ nsAString& aTagPrefix, const nsAString& aTagNamespaceURI,
+ nsAtom* aTagName, int32_t aNamespace, nsAString& aStr);
+
+ [[nodiscard]] virtual bool AppendAndTranslateEntities(
+ const nsAString& aStr, nsAString& aOutputStr) override;
+
+ private:
+ static const uint8_t kEntities[];
+ static const uint8_t kAttrEntities[];
+ static const char* const kEntityStrings[];
+};
+
+nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer);
+
+#endif
diff --git a/dom/serializers/nsIContentSerializer.h b/dom/serializers/nsIContentSerializer.h
new file mode 100644
index 0000000000..18e9e5b4cd
--- /dev/null
+++ b/dom/serializers/nsIContentSerializer.h
@@ -0,0 +1,97 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsIContentSerializer_h
+#define nsIContentSerializer_h
+
+#include "nsISupports.h"
+#include "nsStringFwd.h"
+
+class nsIContent;
+
+namespace mozilla {
+class Encoding;
+namespace dom {
+class Comment;
+class Document;
+class DocumentType;
+class Element;
+class ProcessingInstruction;
+} // namespace dom
+} // namespace mozilla
+
+#define NS_ICONTENTSERIALIZER_IID \
+ { \
+ 0xb1ee32f2, 0xb8c4, 0x49b9, { \
+ 0x93, 0xdf, 0xb6, 0xfa, 0xb5, 0xd5, 0x46, 0x88 \
+ } \
+ }
+
+class nsIContentSerializer : public nsISupports {
+ public:
+ NS_DECLARE_STATIC_IID_ACCESSOR(NS_ICONTENTSERIALIZER_IID)
+
+ /**
+ * @param aOutput The `Append*` methods will append to this string. The
+ * reference to it will be dropped with `Finish`.
+ */
+ NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
+ const mozilla::Encoding* aEncoding, bool aIsCopying,
+ bool aIsWholeDocument, bool* aNeedsPerformatScanning,
+ nsAString& aOutput) = 0;
+
+ NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
+ int32_t aEndOffset) = 0;
+
+ NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, int32_t aStartOffset,
+ int32_t aEndOffset) = 0;
+
+ NS_IMETHOD AppendProcessingInstruction(
+ mozilla::dom::ProcessingInstruction* aPI, int32_t aStartOffset,
+ int32_t aEndOffset) = 0;
+
+ NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment,
+ int32_t aStartOffset, int32_t aEndOffset) = 0;
+
+ NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype) = 0;
+
+ NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) = 0;
+
+ NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) = 0;
+
+ NS_IMETHOD FlushAndFinish() = 0;
+
+ /**
+ * Drops the reference to the output buffer.
+ */
+ NS_IMETHOD Finish() = 0;
+
+ NS_IMETHOD GetOutputLength(uint32_t& aLength) const = 0;
+
+ /**
+ * Append any items in the beginning of the document that won't be
+ * serialized by other methods. XML declaration is the most likely
+ * thing this method can produce.
+ */
+ NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) = 0;
+
+ // If Init() sets *aNeedsPerformatScanning to true, then these methods are
+ // called when elements are started and ended, before AppendElementStart
+ // and AppendElementEnd, respectively. They are supposed to be used to
+ // allow the implementer to keep track of whether the element is
+ // preformatted.
+ NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) = 0;
+ NS_IMETHOD ForgetElementForPreformat(mozilla::dom::Element* aElement) = 0;
+};
+
+NS_DEFINE_STATIC_IID_ACCESSOR(nsIContentSerializer, NS_ICONTENTSERIALIZER_IID)
+
+#define NS_CONTENTSERIALIZER_CONTRACTID_PREFIX \
+ "@mozilla.org/layout/contentserializer;1?mimetype="
+
+#endif /* nsIContentSerializer_h */
diff --git a/dom/serializers/nsIDocumentEncoder.idl b/dom/serializers/nsIDocumentEncoder.idl
new file mode 100644
index 0000000000..d909c3989a
--- /dev/null
+++ b/dom/serializers/nsIDocumentEncoder.idl
@@ -0,0 +1,361 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+interface nsIOutputStream;
+
+webidl Document;
+webidl Node;
+webidl Range;
+webidl Selection;
+
+%{ C++
+class nsINode;
+
+%}
+[ptr] native nsINodePtr(nsINode);
+
+[scriptable, uuid(3d9371d8-a2ad-403e-8b0e-8885ad3562e3)]
+interface nsIDocumentEncoderNodeFixup : nsISupports
+{
+ /**
+ * Create a fixed up version of a node. This method is called before
+ * each node in a document is about to be persisted. The implementor
+ * may return a new node with fixed up attributes or null. If null is
+ * returned the node should be used as-is.
+ * @param aNode Node to fixup.
+ * @param [OUT] aSerializeCloneKids True if the document encoder should
+ * apply recursive serialization to the children of the fixed up node
+ * instead of the children of the original node.
+ * @return The resulting fixed up node.
+ */
+ Node fixupNode(in Node aNode, out boolean aSerializeCloneKids);
+};
+
+[scriptable, uuid(21f112df-d96f-47da-bfcb-5331273003d1)]
+interface nsIDocumentEncoder : nsISupports
+{
+ // Output methods flag bits. There are a frightening number of these,
+ // because everyone wants something a little bit different
+
+
+ /**
+ * Output only the selection (as opposed to the whole document).
+ */
+ const unsigned long OutputSelectionOnly = (1 << 0);
+
+ /**
+ * Plaintext output:
+ * - Convert html to plaintext that looks like the html.
+ * - Can't be used in conjunction with `OutputPreformatted`.
+ * - Implies wrap (except inside <pre>), since html wraps.
+ * HTML and XHTML output:
+ * - Do prettyprinting, ignoring existing formatting.
+ * - Implies wrap (except in attribute values and inside <pre>).
+ * XML output:
+ * - Do prettyprinting, ignoring existing formatting.
+ * - Doesn't implicitly wrap
+ */
+ const unsigned long OutputFormatted = (1 << 1);
+
+ /** Don't do prettyprinting. Don't do any wrapping that's not in the existing
+ * HTML/XML source. This option overrides OutputFormatted if both are set.
+ * HTML/XHTML output: If neither are set, there won't be prettyprinting too, but
+ * long lines will be wrapped.
+ * Supported also in XML and Plaintext output.
+ * @note This option does not affect entity conversion.
+ */
+ const unsigned long OutputRaw = (1 << 2);
+
+ /**
+ * Do not print html head tags.
+ * XHTML/HTML output only.
+ */
+ const unsigned long OutputBodyOnly = (1 << 3);
+
+ /**
+ * Output as though the content is preformatted
+ * (e.g. maybe it's wrapped in a PRE or PRE_WRAP style tag)
+ * Plaintext output only.
+ * Can't be used together with `OutputFormatted`/`OutputFormatFlowed`.
+ * XXXbz How does this interact with OutputRaw?
+ */
+ const unsigned long OutputPreformatted = (1 << 4);
+
+ /**
+ * Wrap even if we're not doing formatted output (e.g. for text fields).
+ * Supported in XML, XHTML, HTML and Plaintext output.
+ * Set implicitly in HTML/XHTML output when no OutputRaw.
+ * Ignored when OutputRaw.
+ * For XML, XHTML and HTML: does not wrap values in attributes.
+ * XXXLJ: set implicitly in HTML/XHTML output, to keep compatible behaviors
+ * for old callers of this interface
+ * XXXbz How does this interact with OutputFormatFlowed?
+ */
+ const unsigned long OutputWrap = (1 << 5);
+
+ /**
+ * Output for format flowed (RFC 2646). This is used when converting
+ * to text for mail sending. This differs just slightly
+ * but in an important way from normal formatted, and that is that
+ * lines are space stuffed. This can't (correctly) be done later.
+ * PlainText output only.
+ * If this flag is set, `OutputFormat` has to be set too.
+ * XXXbz How does this interact with OutputRaw/OutputWrap?
+ */
+ const unsigned long OutputFormatFlowed = (1 << 6);
+
+ /**
+ * Convert links, image src, and script src to absolute URLs when possible.
+ * XHTML/HTML output only.
+ */
+ const unsigned long OutputAbsoluteLinks = (1 << 7);
+
+ /**
+ * LineBreak processing: if this flag is set than CR line breaks will
+ * be written. If neither this nor OutputLFLineBreak is set, then we
+ * will use platform line breaks. The combination of the two flags will
+ * cause CRLF line breaks to be written.
+ */
+ const unsigned long OutputCRLineBreak = (1 << 9);
+
+ /**
+ * LineBreak processing: if this flag is set than LF line breaks will
+ * be written. If neither this nor OutputCRLineBreak is set, then we
+ * will use platform line breaks. The combination of the two flags will
+ * cause CRLF line breaks to be written.
+ */
+ const unsigned long OutputLFLineBreak = (1 << 10);
+
+ /**
+ * Output the content of noscript elements (only for serializing
+ * to plaintext).
+ */
+ const unsigned long OutputNoScriptContent = (1 << 11);
+
+ /**
+ * Output the content of noframes elements (only for serializing
+ * to plaintext). (Used only internally in the plain text serializer;
+ * ignored if passed by the caller.)
+ */
+ const unsigned long OutputNoFramesContent = (1 << 12);
+
+ /**
+ * Don't allow any formatting nodes (e.g. <br>, <b>) inside a <pre>.
+ * This is used primarily by mail. XHTML/HTML output only.
+ */
+ const unsigned long OutputNoFormattingInPre = (1 << 13);
+
+ /**
+ * Encode entities when outputting to a string.
+ * E.g. If set, we'll output &nbsp; if clear, we'll output 0xa0.
+ * The basic set is just &nbsp; &amp; &lt; &gt; &quot; for interoperability
+ * with older products that don't support &alpha; and friends.
+ * HTML output only.
+ */
+ const unsigned long OutputEncodeBasicEntities = (1 << 14);
+
+ /**
+ * Normally &nbsp; is replaced with a space character when
+ * encoding data as plain text, set this flag if that's
+ * not desired.
+ * Plaintext output only.
+ */
+ const unsigned long OutputPersistNBSP = (1 << 17);
+
+ /**
+ * Normally when serializing the whole document using the HTML or
+ * XHTML serializer, the encoding declaration is rewritten to match.
+ * This flag suppresses that behavior.
+ */
+ const unsigned long OutputDontRewriteEncodingDeclaration = (1 << 18);
+
+ /**
+ * When using the HTML or XHTML serializer, skip elements that are not
+ * visible when this flag is set. Elements are not visible when they
+ * have CSS style display:none or visibility:collapse, for example.
+ */
+ const unsigned long SkipInvisibleContent = (1 << 19);
+
+ /**
+ * Output for delsp=yes (RFC 3676). This is used with OutputFormatFlowed
+ * when converting to text for mail sending.
+ * PlainText output only.
+ */
+ const unsigned long OutputFormatDelSp = (1 << 20);
+
+ /**
+ * Drop <br> elements considered "invisible" by the editor. OutputPreformatted
+ * implies this flag.
+ */
+ const unsigned long OutputDropInvisibleBreak = (1 << 21);
+
+ /**
+ * Don't check for _moz_dirty attributes when deciding whether to
+ * pretty-print if this flag is set (bug 599983).
+ */
+ const unsigned long OutputIgnoreMozDirty = (1 << 22);
+
+ /**
+ * Serialize in a way that is suitable for copying a plaintext version of the
+ * document to the clipboard. This can for example cause line endings to be
+ * injected at preformatted block element boundaries.
+ */
+ const unsigned long OutputForPlainTextClipboardCopy = (1 << 25);
+
+ /**
+ * Include ruby annotations and ruby parentheses in the output.
+ * PlainText output only.
+ */
+ const unsigned long OutputRubyAnnotation = (1 << 26);
+
+ /**
+ * Disallow breaking of long character strings. This is important
+ * for serializing e-mail which contains CJK strings. These must
+ * not be broken just as "normal" longs strings aren't broken.
+ */
+ const unsigned long OutputDisallowLineBreaking = (1 << 27);
+
+ /**
+ * Release reference of Document after using encodeTo* method to recycle
+ * this encoder without holding Document. To use this encoder again,
+ * we have to call init again.
+ */
+ const unsigned long RequiresReinitAfterOutput = (1 << 28);
+
+ /**
+ * Initialize with a pointer to the document and the mime type.
+ * Resets wrap column to 72 and resets node fixup.
+ * @param aDocument Document to encode.
+ * @param aMimeType MimeType to use. May also be set by SetMimeType.
+ * @param aFlags Flags to use while encoding. May also be set by SetFlags.
+ */
+ void init(in Document aDocument,
+ in AString aMimeType,
+ in unsigned long aFlags);
+ [noscript] void nativeInit(in Document aDocument,
+ in AString aMimeType,
+ in unsigned long aFlags);
+
+ /**
+ * If the selection is set to a non-null value, then the
+ * selection is used for encoding, otherwise the entire
+ * document is encoded.
+ * @param aSelection The selection to encode.
+ */
+ void setSelection(in Selection aSelection);
+
+ /**
+ * If the range is set to a non-null value, then the
+ * range is used for encoding, otherwise the entire
+ * document or selection is encoded.
+ * @param aRange The range to encode.
+ */
+ void setRange(in Range aRange);
+
+ /**
+ * If the node is set to a non-null value, then the
+ * node is used for encoding, otherwise the entire
+ * document or range or selection is encoded.
+ * @param aNode The node to encode.
+ */
+ void setNode(in Node aNode);
+
+ /**
+ * If the container is set to a non-null value, then its
+ * child nodes are used for encoding, otherwise the entire
+ * document or range or selection or node is encoded.
+ * @param aContainer The node which child nodes will be encoded.
+ */
+ void setContainerNode(in Node aContainer);
+
+ /**
+ * Documents typically have an intrinsic character set,
+ * but if no intrinsic value is found, the platform character set
+ * is used. This function overrides both the intrinisc and platform
+ * charset.
+ * @param aCharset Overrides the both the intrinsic or platform
+ * character set when encoding the document.
+ *
+ * Possible result codes: NS_ERROR_NO_CHARSET_CONVERTER
+ */
+ void setCharset(in ACString aCharset);
+
+ /**
+ * Set a wrap column. This may have no effect in some types of encoders.
+ * @param aWrapColumn Column to which to wrap. If 0, wrapping is disabled.
+ */
+ void setWrapColumn(in unsigned long aWrapColumn);
+
+ /**
+ * The mime type preferred by the encoder. This piece of api was
+ * added because the copy encoder may need to switch mime types on you
+ * if you ask it to copy html that really represents plaintext content.
+ * Call this AFTER Init() and SetSelection() have both been called.
+ */
+ readonly attribute AString mimeType;
+
+ /**
+ * Encode the document and send the result to the nsIOutputStream.
+ *
+ * Possible result codes are the stream errors which might have
+ * been encountered.
+ * @param aStream Stream into which to encode.
+ */
+ void encodeToStream(in nsIOutputStream aStream);
+
+ /**
+ * Encode the document into a string.
+ *
+ * @return The document encoded into a string.
+ */
+ AString encodeToString();
+
+ /**
+ * Encode the document into a string. Stores the extra context information
+ * into the two arguments.
+ * @param [OUT] aContextString The string where the parent hierarchy
+ * information will be stored.
+ * @param [OUT] aInfoString The string where extra context info will
+ * be stored.
+ * @return The document encoded as a string.
+ *
+ */
+ AString encodeToStringWithContext( out AString aContextString,
+ out AString aInfoString);
+
+ /**
+ * Encode the document into a string of limited size.
+ * @param aMaxLength After aMaxLength characters, the encoder will stop
+ * encoding new data.
+ * Only values > 0 will be considered.
+ * The returned string may be slightly larger than
+ * aMaxLength because some serializers (eg. HTML)
+ * may need to close some tags after they stop
+ * encoding new data, or finish a line (72 columns
+ * by default for the plain text serializer).
+ *
+ * @return The document encoded into a string.
+ */
+ AString encodeToStringWithMaxLength(in unsigned long aMaxLength);
+
+ /**
+ * Set the fixup object associated with node persistence.
+ * @param aFixup The fixup object.
+ */
+ void setNodeFixup(in nsIDocumentEncoderNodeFixup aFixup);
+};
+
+%{ C++
+template<class T> struct already_AddRefed;
+
+bool
+do_getDocumentTypeSupportedForEncoding(const char* aContentType);
+already_AddRefed<nsIDocumentEncoder>
+do_createDocumentEncoder(const char* aContentType);
+already_AddRefed<nsIDocumentEncoder>
+do_createHTMLCopyEncoder();
+%}
diff --git a/dom/serializers/nsPlainTextSerializer.cpp b/dom/serializers/nsPlainTextSerializer.cpp
new file mode 100644
index 0000000000..952ed39942
--- /dev/null
+++ b/dom/serializers/nsPlainTextSerializer.cpp
@@ -0,0 +1,1826 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
+ * (eg for copy/paste as plaintext).
+ */
+
+#include "nsPlainTextSerializer.h"
+
+#include <limits>
+
+#include "nsPrintfCString.h"
+#include "nsDebug.h"
+#include "nsGkAtoms.h"
+#include "nsNameSpaceManager.h"
+#include "nsTextFragment.h"
+#include "nsContentUtils.h"
+#include "nsReadableUtils.h"
+#include "nsUnicharUtils.h"
+#include "nsCRT.h"
+#include "mozilla/Casting.h"
+#include "mozilla/TextEditor.h"
+#include "mozilla/dom/CharacterData.h"
+#include "mozilla/dom/Element.h"
+#include "mozilla/dom/HTMLBRElement.h"
+#include "mozilla/dom/Text.h"
+#include "mozilla/intl/Segmenter.h"
+#include "mozilla/intl/UnicodeProperties.h"
+#include "nsUnicodeProperties.h"
+#include "mozilla/Span.h"
+#include "mozilla/Preferences.h"
+#include "mozilla/StaticPrefs_converter.h"
+#include "nsComputedDOMStyle.h"
+
+namespace mozilla {
+class Encoding;
+}
+
+using namespace mozilla;
+using namespace mozilla::dom;
+
+#define PREF_STRUCTS "converter.html2txt.structs"
+#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
+
+static const int32_t kTabSize = 4;
+static const int32_t kIndentSizeHeaders =
+ 2; /* Indention of h1, if
+ mHeaderStrategy = kIndentIncreasedWithHeaderLevel
+ or = kNumberHeadingsAndIndentSlightly. Indention of
+ other headers is derived from that. */
+static const int32_t kIndentIncrementHeaders =
+ 2; /* If mHeaderStrategy = kIndentIncreasedWithHeaderLevel,
+ indent h(x+1) this many
+ columns more than h(x) */
+static const int32_t kIndentSizeList = kTabSize;
+// Indention of non-first lines of ul and ol
+static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd>
+static const char16_t kNBSP = 160;
+static const char16_t kSPACE = ' ';
+
+static int32_t HeaderLevel(const nsAtom* aTag);
+static int32_t GetUnicharWidth(char32_t ucs);
+static int32_t GetUnicharStringWidth(Span<const char16_t> aString);
+
+// Someday may want to make this non-const:
+static const uint32_t TagStackSize = 500;
+
+NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
+NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)
+
+NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
+ NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
+ NS_INTERFACE_MAP_ENTRY(nsISupports)
+NS_INTERFACE_MAP_END
+
+NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer, mElement)
+
+nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) {
+ RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
+ it.forget(aSerializer);
+ return NS_OK;
+}
+
+// @param aFlags As defined in nsIDocumentEncoder.idl.
+static void DetermineLineBreak(const int32_t aFlags, nsAString& aLineBreak) {
+ // Set the line break character:
+ if ((aFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
+ (aFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
+ // Windows
+ aLineBreak.AssignLiteral(u"\r\n");
+ } else if (aFlags & nsIDocumentEncoder::OutputCRLineBreak) {
+ // Mac
+ aLineBreak.AssignLiteral(u"\r");
+ } else if (aFlags & nsIDocumentEncoder::OutputLFLineBreak) {
+ // Unix/DOM
+ aLineBreak.AssignLiteral(u"\n");
+ } else {
+ // Platform/default
+ aLineBreak.AssignLiteral(NS_ULINEBREAK);
+ }
+}
+
+void nsPlainTextSerializer::CurrentLine::MaybeReplaceNbspsInContent(
+ const int32_t aFlags) {
+ if (!(aFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
+ // First, replace all nbsp characters with spaces,
+ // which the unicode encoder won't do for us.
+ mContent.ReplaceChar(kNBSP, kSPACE);
+ }
+}
+
+void nsPlainTextSerializer::CurrentLine::ResetContentAndIndentationHeader() {
+ mContent.Truncate();
+ mIndentation.mHeader.Truncate();
+}
+
+int32_t nsPlainTextSerializer::CurrentLine::FindWrapIndexForContent(
+ const uint32_t aWrapColumn, bool aUseLineBreaker) const {
+ MOZ_ASSERT(!mContent.IsEmpty());
+
+ const uint32_t prefixwidth = DeterminePrefixWidth();
+ int32_t goodSpace = 0;
+
+ if (aUseLineBreaker) {
+ // We advance one line break point at a time from the beginning of the
+ // mContent until we find a width less than or equal to wrap column.
+ uint32_t width = 0;
+ intl::LineBreakIteratorUtf16 lineBreakIter(mContent);
+ while (const Maybe<uint32_t> nextGoodSpace = lineBreakIter.Next()) {
+ width += GetUnicharStringWidth(Span<const char16_t>(
+ mContent.get() + goodSpace, *nextGoodSpace - goodSpace));
+ if (prefixwidth + width > aWrapColumn) {
+ // The next break point makes the width exceeding the wrap column, so
+ // goodSpace is what we want.
+ break;
+ }
+ goodSpace = AssertedCast<int32_t>(*nextGoodSpace);
+ }
+
+ return goodSpace;
+ }
+
+ // In this case we don't want strings, especially CJK-ones, to be split. See
+ // bug 333064 for more information. We break only at ASCII spaces.
+ if (aWrapColumn >= prefixwidth) {
+ // Search backward from the adjusted wrap column or from the text end.
+ goodSpace =
+ std::min<int32_t>(aWrapColumn - prefixwidth, mContent.Length() - 1);
+ while (goodSpace >= 0) {
+ if (nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
+ return goodSpace;
+ }
+ goodSpace--;
+ }
+ }
+
+ // Search forward from the adjusted wrap column.
+ goodSpace = (prefixwidth > aWrapColumn) ? 1 : aWrapColumn - prefixwidth;
+ const int32_t contentLength = mContent.Length();
+ while (goodSpace < contentLength &&
+ !nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
+ goodSpace++;
+ }
+
+ return goodSpace;
+}
+
+nsPlainTextSerializer::OutputManager::OutputManager(const int32_t aFlags,
+ nsAString& aOutput)
+ : mFlags{aFlags}, mOutput{aOutput}, mAtFirstColumn{true} {
+ MOZ_ASSERT(aOutput.IsEmpty());
+
+ DetermineLineBreak(mFlags, mLineBreak);
+}
+
+void nsPlainTextSerializer::OutputManager::Append(
+ const CurrentLine& aCurrentLine,
+ const StripTrailingWhitespaces aStripTrailingWhitespaces) {
+ if (IsAtFirstColumn()) {
+ nsAutoString quotesAndIndent;
+ aCurrentLine.CreateQuotesAndIndent(quotesAndIndent);
+
+ if ((aStripTrailingWhitespaces == StripTrailingWhitespaces::kMaybe)) {
+ const bool stripTrailingSpaces = aCurrentLine.mContent.IsEmpty();
+ if (stripTrailingSpaces) {
+ quotesAndIndent.Trim(" ", false, true, false);
+ }
+ }
+
+ Append(quotesAndIndent);
+ }
+
+ Append(aCurrentLine.mContent);
+}
+
+void nsPlainTextSerializer::OutputManager::Append(const nsAString& aString) {
+ if (!aString.IsEmpty()) {
+ mOutput.Append(aString);
+ mAtFirstColumn = false;
+ }
+}
+
+void nsPlainTextSerializer::OutputManager::AppendLineBreak() {
+ mOutput.Append(mLineBreak);
+ mAtFirstColumn = true;
+}
+
+uint32_t nsPlainTextSerializer::OutputManager::GetOutputLength() const {
+ return mOutput.Length();
+}
+
+nsPlainTextSerializer::nsPlainTextSerializer()
+ : mFloatingLines(-1),
+ mLineBreakDue(false),
+ kSpace(u" "_ns) // Init of "constant"
+{
+ mHeadLevel = 0;
+ mHasWrittenCiteBlockquote = false;
+ mSpanLevel = 0;
+ for (int32_t i = 0; i <= 6; i++) {
+ mHeaderCounter[i] = 0;
+ }
+
+ // Flow
+ mEmptyLines = 1; // The start of the document is an "empty line" in itself,
+ mInWhitespace = false;
+ mPreFormattedMail = false;
+
+ mPreformattedBlockBoundary = false;
+
+ // initialize the tag stack to zero:
+ // The stack only ever contains pointers to static atoms, so they don't
+ // need refcounting.
+ mTagStack = new const nsAtom*[TagStackSize];
+ mTagStackIndex = 0;
+ mIgnoreAboveIndex = (uint32_t)kNotFound;
+
+ mULCount = 0;
+
+ mIgnoredChildNodeLevel = 0;
+}
+
+nsPlainTextSerializer::~nsPlainTextSerializer() {
+ delete[] mTagStack;
+ NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
+}
+
+nsPlainTextSerializer::Settings::HeaderStrategy
+nsPlainTextSerializer::Settings::Convert(const int32_t aPrefHeaderStrategy) {
+ HeaderStrategy result{HeaderStrategy::kIndentIncreasedWithHeaderLevel};
+
+ switch (aPrefHeaderStrategy) {
+ case 0: {
+ result = HeaderStrategy::kNoIndentation;
+ break;
+ }
+ case 1: {
+ result = HeaderStrategy::kIndentIncreasedWithHeaderLevel;
+ break;
+ }
+ case 2: {
+ result = HeaderStrategy::kNumberHeadingsAndIndentSlightly;
+ break;
+ }
+ default: {
+ NS_WARNING(
+ nsPrintfCString("Header strategy pref contains undefined value: %i",
+ aPrefHeaderStrategy)
+ .get());
+ }
+ }
+
+ return result;
+}
+
+const int32_t kDefaultHeaderStrategy = 1;
+
+void nsPlainTextSerializer::Settings::Init(const int32_t aFlags,
+ const uint32_t aWrapColumn) {
+ mFlags = aFlags;
+
+ if (mFlags & nsIDocumentEncoder::OutputFormatted) {
+ // Get some prefs that controls how we do formatted output
+ mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
+
+ int32_t headerStrategy =
+ Preferences::GetInt(PREF_HEADER_STRATEGY, kDefaultHeaderStrategy);
+ mHeaderStrategy = Convert(headerStrategy);
+ }
+
+ mWithRubyAnnotation = StaticPrefs::converter_html2txt_always_include_ruby() ||
+ (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);
+
+ // XXX We should let the caller decide whether to do this or not
+ mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
+
+ mWrapColumn = aWrapColumn;
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::Init(const uint32_t aFlags, uint32_t aWrapColumn,
+ const Encoding* aEncoding, bool aIsCopying,
+ bool aIsWholeDocument,
+ bool* aNeedsPreformatScanning, nsAString& aOutput) {
+#ifdef DEBUG
+ // Check if the major control flags are set correctly.
+ if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
+ NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted,
+ "If you want format=flowed, you must combine it with "
+ "nsIDocumentEncoder::OutputFormatted");
+ }
+
+ if (aFlags & nsIDocumentEncoder::OutputFormatted) {
+ NS_ASSERTION(
+ !(aFlags & nsIDocumentEncoder::OutputPreformatted),
+ "Can't do formatted and preformatted output at the same time!");
+ }
+#endif
+ MOZ_ASSERT(!(aFlags & nsIDocumentEncoder::OutputFormatDelSp) ||
+ (aFlags & nsIDocumentEncoder::OutputFormatFlowed));
+
+ *aNeedsPreformatScanning = true;
+ mSettings.Init(aFlags, aWrapColumn);
+ mOutputManager.emplace(mSettings.GetFlags(), aOutput);
+
+ mUseLineBreaker = mSettings.MayWrap() && mSettings.MayBreakLines();
+
+ mLineBreakDue = false;
+ mFloatingLines = -1;
+
+ mPreformattedBlockBoundary = false;
+
+ MOZ_ASSERT(mOLStack.IsEmpty());
+
+ return NS_OK;
+}
+
+bool nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) {
+ uint32_t size = aStack.Length();
+ if (size == 0) {
+ return false;
+ }
+ return aStack.ElementAt(size - 1);
+}
+
+void nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) {
+ uint32_t size = aStack.Length();
+ if (size > 0) {
+ aStack.ElementAt(size - 1) = aValue;
+ } else {
+ NS_ERROR("There is no \"Last\" value");
+ }
+}
+
+void nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) {
+ aStack.AppendElement(bool(aValue));
+}
+
+bool nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) {
+ return aStack.Length() ? aStack.PopLastElement() : false;
+}
+
+bool nsPlainTextSerializer::IsIgnorableRubyAnnotation(
+ const nsAtom* aTag) const {
+ if (mSettings.GetWithRubyAnnotation()) {
+ return false;
+ }
+
+ return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt ||
+ aTag == nsGkAtoms::rtc;
+}
+
+// Return true if aElement has 'display:none' or if we just don't know.
+static bool IsDisplayNone(Element* aElement) {
+ RefPtr<const ComputedStyle> computedStyle =
+ nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
+ return !computedStyle ||
+ computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
+}
+
+static bool IsIgnorableScriptOrStyle(Element* aElement) {
+ return aElement->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style) &&
+ IsDisplayNone(aElement);
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::AppendText(nsIContent* aText, int32_t aStartOffset,
+ int32_t aEndOffset) {
+ if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
+ return NS_OK;
+ }
+
+ NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
+ if (aStartOffset < 0) return NS_ERROR_INVALID_ARG;
+
+ NS_ENSURE_ARG(aText);
+
+ nsresult rv = NS_OK;
+
+ nsIContent* content = aText;
+ const nsTextFragment* frag;
+ if (!content || !(frag = content->GetText())) {
+ return NS_ERROR_FAILURE;
+ }
+
+ int32_t fragLength = frag->GetLength();
+ int32_t endoffset =
+ (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
+ NS_ASSERTION(aStartOffset <= endoffset,
+ "A start offset is beyond the end of the text fragment!");
+
+ int32_t length = endoffset - aStartOffset;
+ if (length <= 0) {
+ return NS_OK;
+ }
+
+ nsAutoString textstr;
+ if (frag->Is2b()) {
+ textstr.Assign(frag->Get2b() + aStartOffset, length);
+ } else {
+ // AssignASCII is for 7-bit character only, so don't use it
+ const char* data = frag->Get1b();
+ CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
+ }
+
+ // Mask the text if the text node is in a password field.
+ if (content->HasFlag(NS_MAYBE_MASKED)) {
+ TextEditor::MaskString(textstr, *content->AsText(), 0, aStartOffset);
+ }
+
+ // We have to split the string across newlines
+ // to match parser behavior
+ int32_t start = 0;
+ int32_t offset = textstr.FindCharInSet(u"\n\r");
+ while (offset != kNotFound) {
+ if (offset > start) {
+ // Pass in the line
+ DoAddText(false, Substring(textstr, start, offset - start));
+ }
+
+ // Pass in a newline
+ DoAddText();
+
+ start = offset + 1;
+ offset = textstr.FindCharInSet(u"\n\r", start);
+ }
+
+ // Consume the last bit of the string if there's any left
+ if (start < length) {
+ if (start) {
+ DoAddText(false, Substring(textstr, start, length - start));
+ } else {
+ DoAddText(false, textstr);
+ }
+ }
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::AppendCDATASection(nsIContent* aCDATASection,
+ int32_t aStartOffset,
+ int32_t aEndOffset) {
+ return AppendText(aCDATASection, aStartOffset, aEndOffset);
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::ScanElementForPreformat(Element* aElement) {
+ mPreformatStack.push(IsElementPreformatted(aElement));
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) {
+ MOZ_RELEASE_ASSERT(!mPreformatStack.empty(),
+ "Tried to pop without previous push.");
+ mPreformatStack.pop();
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::AppendElementStart(Element* aElement,
+ Element* aOriginalElement) {
+ NS_ENSURE_ARG(aElement);
+
+ mElement = aElement;
+
+ nsresult rv;
+ nsAtom* id = GetIdForContent(mElement);
+
+ bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
+
+ if (isContainer) {
+ rv = DoOpenContainer(id);
+ } else {
+ rv = DoAddLeaf(id);
+ }
+
+ mElement = nullptr;
+
+ if (id == nsGkAtoms::head) {
+ ++mHeadLevel;
+ }
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::AppendElementEnd(Element* aElement,
+ Element* aOriginalElement) {
+ NS_ENSURE_ARG(aElement);
+
+ mElement = aElement;
+
+ nsresult rv;
+ nsAtom* id = GetIdForContent(mElement);
+
+ bool isContainer = !FragmentOrElement::IsHTMLVoid(id);
+
+ rv = NS_OK;
+ if (isContainer) {
+ rv = DoCloseContainer(id);
+ }
+
+ mElement = nullptr;
+
+ if (id == nsGkAtoms::head) {
+ NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0");
+ --mHeadLevel;
+ }
+
+ return rv;
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::FlushAndFinish() {
+ MOZ_ASSERT(mOutputManager);
+
+ mOutputManager->Flush(mCurrentLine);
+ return Finish();
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::Finish() {
+ mOutputManager.reset();
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::GetOutputLength(uint32_t& aLength) const {
+ MOZ_ASSERT(mOutputManager);
+
+ aLength = mOutputManager->GetOutputLength();
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsPlainTextSerializer::AppendDocumentStart(Document* aDocument) {
+ return NS_OK;
+}
+
+constexpr int32_t kOlStackDummyValue = 0;
+
+nsresult nsPlainTextSerializer::DoOpenContainer(const nsAtom* aTag) {
+ if (IsIgnorableRubyAnnotation(aTag)) {
+ // Ignorable ruby annotation shouldn't be replaced by a placeholder
+ // character, neither any of its descendants.
+ mIgnoredChildNodeLevel++;
+ return NS_OK;
+ }
+ if (IsIgnorableScriptOrStyle(mElement)) {
+ mIgnoredChildNodeLevel++;
+ return NS_OK;
+ }
+
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
+ if (mPreformattedBlockBoundary && DoOutput()) {
+ // Should always end a line, but get no more whitespace
+ if (mFloatingLines < 0) mFloatingLines = 0;
+ mLineBreakDue = true;
+ }
+ mPreformattedBlockBoundary = false;
+ }
+
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
+ // Raw means raw. Don't even think about doing anything fancy
+ // here like indenting, adding line breaks or any other
+ // characters such as list item bullets, quote characters
+ // around <q>, etc.
+
+ return NS_OK;
+ }
+
+ if (mTagStackIndex < TagStackSize) {
+ mTagStack[mTagStackIndex++] = aTag;
+ }
+
+ if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
+ return NS_OK;
+ }
+
+ // Reset this so that <blockquote type=cite> doesn't affect the whitespace
+ // above random <pre>s below it.
+ mHasWrittenCiteBlockquote =
+ mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre;
+
+ bool isInCiteBlockquote = false;
+
+ // XXX special-case <blockquote type=cite> so that we don't add additional
+ // newlines before the text.
+ if (aTag == nsGkAtoms::blockquote) {
+ nsAutoString value;
+ nsresult rv = GetAttributeValue(nsGkAtoms::type, value);
+ isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
+ }
+
+ if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines);
+
+ // Check if this tag's content that should not be output
+ if ((aTag == nsGkAtoms::noscript &&
+ !mSettings.HasFlag(nsIDocumentEncoder::OutputNoScriptContent)) ||
+ ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
+ !mSettings.HasFlag(nsIDocumentEncoder::OutputNoFramesContent))) {
+ // Ignore everything that follows the current tag in
+ // question until a matching end tag is encountered.
+ mIgnoreAboveIndex = mTagStackIndex - 1;
+ return NS_OK;
+ }
+
+ if (aTag == nsGkAtoms::body) {
+ // Try to figure out here whether we have a
+ // preformatted style attribute set by Thunderbird.
+ //
+ // Trigger on the presence of a "pre-wrap" in the
+ // style attribute. That's a very simplistic way to do
+ // it, but better than nothing.
+ nsAutoString style;
+ int32_t whitespace;
+ if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::style, style)) &&
+ (kNotFound != (whitespace = style.Find(u"white-space:")))) {
+ if (kNotFound != style.LowerCaseFindASCII("pre-wrap", whitespace)) {
+#ifdef DEBUG_preformatted
+ printf("Set mPreFormattedMail based on style pre-wrap\n");
+#endif
+ mPreFormattedMail = true;
+ } else if (kNotFound != style.LowerCaseFindASCII("pre", whitespace)) {
+#ifdef DEBUG_preformatted
+ printf("Set mPreFormattedMail based on style pre\n");
+#endif
+ mPreFormattedMail = true;
+ }
+ } else {
+ /* See comment at end of function. */
+ mInWhitespace = true;
+ mPreFormattedMail = false;
+ }
+
+ return NS_OK;
+ }
+
+ // Keep this in sync with DoCloseContainer!
+ if (!DoOutput()) {
+ return NS_OK;
+ }
+
+ if (aTag == nsGkAtoms::p)
+ EnsureVerticalSpace(1);
+ else if (aTag == nsGkAtoms::pre) {
+ if (GetLastBool(mIsInCiteBlockquote))
+ EnsureVerticalSpace(0);
+ else if (mHasWrittenCiteBlockquote) {
+ EnsureVerticalSpace(0);
+ mHasWrittenCiteBlockquote = false;
+ } else
+ EnsureVerticalSpace(1);
+ } else if (aTag == nsGkAtoms::tr) {
+ PushBool(mHasWrittenCellsForRow, false);
+ } else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
+ // We must make sure that the content of two table cells get a
+ // space between them.
+
+ // To make the separation between cells most obvious and
+ // importable, we use a TAB.
+ if (mHasWrittenCellsForRow.IsEmpty()) {
+ // We don't always see a <tr> (nor a <table>) before the <td> if we're
+ // copying part of a table
+ PushBool(mHasWrittenCellsForRow, true); // will never be popped
+ } else if (GetLastBool(mHasWrittenCellsForRow)) {
+ // Bypass |Write| so that the TAB isn't compressed away.
+ AddToLine(u"\t", 1);
+ mInWhitespace = true;
+ } else {
+ SetLastBool(mHasWrittenCellsForRow, true);
+ }
+ } else if (aTag == nsGkAtoms::ul) {
+ // Indent here to support nested lists, which aren't included in li :-(
+ EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
+ // Must end the current line before we change indention
+ mCurrentLine.mIndentation.mLength += kIndentSizeList;
+ mULCount++;
+ } else if (aTag == nsGkAtoms::ol) {
+ EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
+ // Must end the current line before we change indention
+ nsAutoString startAttr;
+ int32_t startVal = 1;
+ if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::start, startAttr))) {
+ nsresult rv = NS_OK;
+ startVal = startAttr.ToInteger(&rv);
+ if (NS_FAILED(rv)) {
+ startVal = 1;
+ }
+ }
+ mOLStack.AppendElement(startVal);
+ } else {
+ mOLStack.AppendElement(kOlStackDummyValue);
+ }
+ mCurrentLine.mIndentation.mLength += kIndentSizeList; // see ul
+ } else if (aTag == nsGkAtoms::li &&
+ mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
+ if (mTagStackIndex > 1 && IsInOL()) {
+ if (!mOLStack.IsEmpty()) {
+ nsAutoString valueAttr;
+ if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::value, valueAttr))) {
+ nsresult rv = NS_OK;
+ int32_t valueAttrVal = valueAttr.ToInteger(&rv);
+ if (NS_SUCCEEDED(rv)) {
+ mOLStack.LastElement() = valueAttrVal;
+ }
+ }
+ // This is what nsBulletFrame does for OLs:
+ mCurrentLine.mIndentation.mHeader.AppendInt(mOLStack.LastElement(), 10);
+ mOLStack.LastElement()++;
+ } else {
+ mCurrentLine.mIndentation.mHeader.Append(char16_t('#'));
+ }
+
+ mCurrentLine.mIndentation.mHeader.Append(char16_t('.'));
+
+ } else {
+ static const char bulletCharArray[] = "*o+#";
+ uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
+ char bulletChar = bulletCharArray[index % 4];
+ mCurrentLine.mIndentation.mHeader.Append(char16_t(bulletChar));
+ }
+
+ mCurrentLine.mIndentation.mHeader.Append(char16_t(' '));
+ } else if (aTag == nsGkAtoms::dl) {
+ EnsureVerticalSpace(1);
+ } else if (aTag == nsGkAtoms::dt) {
+ EnsureVerticalSpace(0);
+ } else if (aTag == nsGkAtoms::dd) {
+ EnsureVerticalSpace(0);
+ mCurrentLine.mIndentation.mLength += kIndentSizeDD;
+ } else if (aTag == nsGkAtoms::span) {
+ ++mSpanLevel;
+ } else if (aTag == nsGkAtoms::blockquote) {
+ // Push
+ PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
+ if (isInCiteBlockquote) {
+ EnsureVerticalSpace(0);
+ mCurrentLine.mCiteQuoteLevel++;
+ } else {
+ EnsureVerticalSpace(1);
+ mCurrentLine.mIndentation.mLength +=
+ kTabSize; // Check for some maximum value?
+ }
+ } else if (aTag == nsGkAtoms::q) {
+ Write(u"\""_ns);
+ }
+
+ // Else make sure we'll separate block level tags,
+ // even if we're about to leave, before doing any other formatting.
+ else if (IsCssBlockLevelElement(mElement)) {
+ EnsureVerticalSpace(0);
+ }
+
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
+ OpenContainerForOutputFormatted(aTag);
+ }
+ return NS_OK;
+}
+
+void nsPlainTextSerializer::OpenContainerForOutputFormatted(
+ const nsAtom* aTag) {
+ const bool currentNodeIsConverted = IsCurrentNodeConverted();
+
+ if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
+ aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
+ EnsureVerticalSpace(2);
+ if (mSettings.GetHeaderStrategy() ==
+ Settings::HeaderStrategy::kNumberHeadingsAndIndentSlightly) {
+ mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
+ // Caching
+ int32_t level = HeaderLevel(aTag);
+ // Increase counter for current level
+ mHeaderCounter[level]++;
+ // Reset all lower levels
+ int32_t i;
+
+ for (i = level + 1; i <= 6; i++) {
+ mHeaderCounter[i] = 0;
+ }
+
+ // Construct numbers
+ nsAutoString leadup;
+ for (i = 1; i <= level; i++) {
+ leadup.AppendInt(mHeaderCounter[i]);
+ leadup.Append(char16_t('.'));
+ }
+ leadup.Append(char16_t(' '));
+ Write(leadup);
+ } else if (mSettings.GetHeaderStrategy() ==
+ Settings::HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
+ mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
+ for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
+ // for h(x), run x-1 times
+ mCurrentLine.mIndentation.mLength += kIndentIncrementHeaders;
+ }
+ }
+ } else if (aTag == nsGkAtoms::sup && mSettings.GetStructs() &&
+ !currentNodeIsConverted) {
+ Write(u"^"_ns);
+ } else if (aTag == nsGkAtoms::sub && mSettings.GetStructs() &&
+ !currentNodeIsConverted) {
+ Write(u"_"_ns);
+ } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
+ !currentNodeIsConverted) {
+ Write(u"|"_ns);
+ } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
+ mSettings.GetStructs() && !currentNodeIsConverted) {
+ Write(u"*"_ns);
+ } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
+ mSettings.GetStructs() && !currentNodeIsConverted) {
+ Write(u"/"_ns);
+ } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
+ !currentNodeIsConverted) {
+ Write(u"_"_ns);
+ }
+
+ /* Container elements are always block elements, so we shouldn't
+ output any whitespace immediately after the container tag even if
+ there's extra whitespace there because the HTML is pretty-printed
+ or something. To ensure that happens, tell the serializer we're
+ already in whitespace so it won't output more. */
+ mInWhitespace = true;
+}
+
+nsresult nsPlainTextSerializer::DoCloseContainer(const nsAtom* aTag) {
+ if (IsIgnorableRubyAnnotation(aTag)) {
+ mIgnoredChildNodeLevel--;
+ return NS_OK;
+ }
+ if (IsIgnorableScriptOrStyle(mElement)) {
+ mIgnoredChildNodeLevel--;
+ return NS_OK;
+ }
+
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
+ if (DoOutput() && IsElementPreformatted() &&
+ IsCssBlockLevelElement(mElement)) {
+ // If we're closing a preformatted block element, output a line break
+ // when we find a new container.
+ mPreformattedBlockBoundary = true;
+ }
+ }
+
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
+ // Raw means raw. Don't even think about doing anything fancy
+ // here like indenting, adding line breaks or any other
+ // characters such as list item bullets, quote characters
+ // around <q>, etc.
+
+ return NS_OK;
+ }
+
+ if (mTagStackIndex > 0) {
+ --mTagStackIndex;
+ }
+
+ if (mTagStackIndex >= mIgnoreAboveIndex) {
+ if (mTagStackIndex == mIgnoreAboveIndex) {
+ // We're dealing with the close tag whose matching
+ // open tag had set the mIgnoreAboveIndex value.
+ // Reset mIgnoreAboveIndex before discarding this tag.
+ mIgnoreAboveIndex = (uint32_t)kNotFound;
+ }
+ return NS_OK;
+ }
+
+ MOZ_ASSERT(mOutputManager);
+
+ // End current line if we're ending a block level tag
+ if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
+ // We want the output to end with a new line,
+ // but in preformatted areas like text fields,
+ // we can't emit newlines that weren't there.
+ // So add the newline only in the case of formatted output.
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
+ EnsureVerticalSpace(0);
+ } else {
+ mOutputManager->Flush(mCurrentLine);
+ }
+ // We won't want to do anything with these in formatted mode either,
+ // so just return now:
+ return NS_OK;
+ }
+
+ // Keep this in sync with DoOpenContainer!
+ if (!DoOutput()) {
+ return NS_OK;
+ }
+
+ if (aTag == nsGkAtoms::tr) {
+ PopBool(mHasWrittenCellsForRow);
+ // Should always end a line, but get no more whitespace
+ if (mFloatingLines < 0) mFloatingLines = 0;
+ mLineBreakDue = true;
+ } else if (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) &&
+ mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
+ // Items that should always end a line, but get no more whitespace
+ if (mFloatingLines < 0) mFloatingLines = 0;
+ mLineBreakDue = true;
+ } else if (aTag == nsGkAtoms::pre) {
+ mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
+ mLineBreakDue = true;
+ } else if (aTag == nsGkAtoms::ul) {
+ mOutputManager->Flush(mCurrentLine);
+ mCurrentLine.mIndentation.mLength -= kIndentSizeList;
+ --mULCount;
+ if (!IsInOlOrUl()) {
+ mFloatingLines = 1;
+ mLineBreakDue = true;
+ }
+ } else if (aTag == nsGkAtoms::ol) {
+ mOutputManager->Flush(mCurrentLine); // Doing this after decreasing
+ // OLStackIndex would be wrong.
+ mCurrentLine.mIndentation.mLength -= kIndentSizeList;
+ MOZ_ASSERT(!mOLStack.IsEmpty(), "Wrong OLStack level!");
+ mOLStack.RemoveLastElement();
+ if (!IsInOlOrUl()) {
+ mFloatingLines = 1;
+ mLineBreakDue = true;
+ }
+ } else if (aTag == nsGkAtoms::dl) {
+ mFloatingLines = 1;
+ mLineBreakDue = true;
+ } else if (aTag == nsGkAtoms::dd) {
+ mOutputManager->Flush(mCurrentLine);
+ mCurrentLine.mIndentation.mLength -= kIndentSizeDD;
+ } else if (aTag == nsGkAtoms::span) {
+ NS_ASSERTION(mSpanLevel, "Span level will be negative!");
+ --mSpanLevel;
+ } else if (aTag == nsGkAtoms::div) {
+ if (mFloatingLines < 0) mFloatingLines = 0;
+ mLineBreakDue = true;
+ } else if (aTag == nsGkAtoms::blockquote) {
+ mOutputManager->Flush(mCurrentLine); // Is this needed?
+
+ // Pop
+ bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
+
+ if (isInCiteBlockquote) {
+ NS_ASSERTION(mCurrentLine.mCiteQuoteLevel,
+ "CiteQuote level will be negative!");
+ mCurrentLine.mCiteQuoteLevel--;
+ mFloatingLines = 0;
+ mHasWrittenCiteBlockquote = true;
+ } else {
+ mCurrentLine.mIndentation.mLength -= kTabSize;
+ mFloatingLines = 1;
+ }
+ mLineBreakDue = true;
+ } else if (aTag == nsGkAtoms::q) {
+ Write(u"\""_ns);
+ } else if (IsCssBlockLevelElement(mElement)) {
+ // All other blocks get 1 vertical space after them
+ // in formatted mode, otherwise 0.
+ // This is hard. Sometimes 0 is a better number, but
+ // how to know?
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
+ EnsureVerticalSpace(1);
+ } else {
+ if (mFloatingLines < 0) mFloatingLines = 0;
+ mLineBreakDue = true;
+ }
+ }
+
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
+ CloseContainerForOutputFormatted(aTag);
+ }
+
+ return NS_OK;
+}
+
+void nsPlainTextSerializer::CloseContainerForOutputFormatted(
+ const nsAtom* aTag) {
+ const bool currentNodeIsConverted = IsCurrentNodeConverted();
+
+ if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
+ aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
+ using HeaderStrategy = Settings::HeaderStrategy;
+ if ((mSettings.GetHeaderStrategy() ==
+ HeaderStrategy::kIndentIncreasedWithHeaderLevel) ||
+ (mSettings.GetHeaderStrategy() ==
+ HeaderStrategy::kNumberHeadingsAndIndentSlightly)) {
+ mCurrentLine.mIndentation.mLength -= kIndentSizeHeaders;
+ }
+ if (mSettings.GetHeaderStrategy() ==
+ HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
+ for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
+ // for h(x), run x-1 times
+ mCurrentLine.mIndentation.mLength -= kIndentIncrementHeaders;
+ }
+ }
+ EnsureVerticalSpace(1);
+ } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
+ nsAutoString url;
+ if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::href, url)) &&
+ !url.IsEmpty()) {
+ nsAutoString temp;
+ temp.AssignLiteral(" <");
+ temp += url;
+ temp.Append(char16_t('>'));
+ Write(temp);
+ }
+ } else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) &&
+ mSettings.GetStructs() && !currentNodeIsConverted) {
+ Write(kSpace);
+ } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
+ !currentNodeIsConverted) {
+ Write(u"|"_ns);
+ } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
+ mSettings.GetStructs() && !currentNodeIsConverted) {
+ Write(u"*"_ns);
+ } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
+ mSettings.GetStructs() && !currentNodeIsConverted) {
+ Write(u"/"_ns);
+ } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
+ !currentNodeIsConverted) {
+ Write(u"_"_ns);
+ }
+}
+
+bool nsPlainTextSerializer::MustSuppressLeaf() const {
+ if (mIgnoredChildNodeLevel > 0) {
+ return true;
+ }
+
+ if ((mTagStackIndex > 1 &&
+ mTagStack[mTagStackIndex - 2] == nsGkAtoms::select) ||
+ (mTagStackIndex > 0 &&
+ mTagStack[mTagStackIndex - 1] == nsGkAtoms::select)) {
+ // Don't output the contents of SELECT elements;
+ // Might be nice, eventually, to output just the selected element.
+ // Read more in bug 31994.
+ return true;
+ }
+
+ return false;
+}
+
+void nsPlainTextSerializer::DoAddText() { DoAddText(true, u""_ns); }
+
+void nsPlainTextSerializer::DoAddText(bool aIsLineBreak,
+ const nsAString& aText) {
+ // If we don't want any output, just return
+ if (!DoOutput()) {
+ return;
+ }
+
+ if (!aIsLineBreak) {
+ // Make sure to reset this, since it's no longer true.
+ mHasWrittenCiteBlockquote = false;
+ }
+
+ if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
+
+ if (MustSuppressLeaf()) {
+ return;
+ }
+
+ if (aIsLineBreak) {
+ // The only times we want to pass along whitespace from the original
+ // html source are if we're forced into preformatted mode via flags,
+ // or if we're prettyprinting and we're inside a <pre>.
+ // Otherwise, either we're collapsing to minimal text, or we're
+ // prettyprinting to mimic the html format, and in neither case
+ // does the formatting of the html source help us.
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) ||
+ (mPreFormattedMail && !mSettings.GetWrapColumn()) ||
+ IsElementPreformatted()) {
+ EnsureVerticalSpace(mEmptyLines + 1);
+ } else if (!mInWhitespace) {
+ Write(kSpace);
+ mInWhitespace = true;
+ }
+ return;
+ }
+
+ Write(aText);
+}
+
+void CreateLineOfDashes(nsAString& aResult, const uint32_t aWrapColumn) {
+ MOZ_ASSERT(aResult.IsEmpty());
+
+ const uint32_t width = (aWrapColumn > 0 ? aWrapColumn : 25);
+ while (aResult.Length() < width) {
+ aResult.Append(char16_t('-'));
+ }
+}
+
+nsresult nsPlainTextSerializer::DoAddLeaf(const nsAtom* aTag) {
+ mPreformattedBlockBoundary = false;
+
+ if (!DoOutput()) {
+ return NS_OK;
+ }
+
+ if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
+
+ if (MustSuppressLeaf()) {
+ return NS_OK;
+ }
+
+ if (aTag == nsGkAtoms::br) {
+ // Another egregious editor workaround, see bug 38194:
+ // ignore the bogus br tags that the editor sticks here and there.
+ // FYI: `brElement` may be `nullptr` if the element is <br> element
+ // of non-HTML element.
+ // XXX Do we need to call `EnsureVerticalSpace()` when the <br> element
+ // is not an HTML element?
+ HTMLBRElement* brElement = HTMLBRElement::FromNodeOrNull(mElement);
+ if (!brElement || !brElement->IsPaddingForEmptyLastLine()) {
+ EnsureVerticalSpace(mEmptyLines + 1);
+ }
+ } else if (aTag == nsGkAtoms::hr &&
+ mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
+ EnsureVerticalSpace(0);
+
+ // Make a line of dashes as wide as the wrap width
+ // XXX honoring percentage would be nice
+ nsAutoString line;
+ CreateLineOfDashes(line, mSettings.GetWrapColumn());
+ Write(line);
+
+ EnsureVerticalSpace(0);
+ } else if (aTag == nsGkAtoms::img) {
+ /* Output (in decreasing order of preference)
+ alt, title or nothing */
+ // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
+ nsAutoString imageDescription;
+ if (NS_SUCCEEDED(GetAttributeValue(nsGkAtoms::alt, imageDescription))) {
+ // If the alt attribute has an empty value (|alt=""|), output nothing
+ } else if (NS_SUCCEEDED(
+ GetAttributeValue(nsGkAtoms::title, imageDescription)) &&
+ !imageDescription.IsEmpty()) {
+ imageDescription = u" ["_ns + imageDescription + u"] "_ns;
+ }
+
+ Write(imageDescription);
+ }
+
+ return NS_OK;
+}
+
+/**
+ * Adds as many newline as necessary to get |aNumberOfRows| empty lines
+ *
+ * aNumberOfRows = -1 : Being in the middle of some line of text
+ * aNumberOfRows = 0 : Being at the start of a line
+ * aNumberOfRows = n>0 : Having n empty lines before the current line.
+ */
+void nsPlainTextSerializer::EnsureVerticalSpace(const int32_t aNumberOfRows) {
+ // If we have something in the indent we probably want to output
+ // it and it's not included in the count for empty lines so we don't
+ // realize that we should start a new line.
+ if (aNumberOfRows >= 0 && !mCurrentLine.mIndentation.mHeader.IsEmpty()) {
+ EndLine(false);
+ mInWhitespace = true;
+ }
+
+ while (mEmptyLines < aNumberOfRows) {
+ EndLine(false);
+ mInWhitespace = true;
+ }
+ mLineBreakDue = false;
+ mFloatingLines = -1;
+}
+
+void nsPlainTextSerializer::OutputManager::Flush(CurrentLine& aCurrentLine) {
+ if (!aCurrentLine.mContent.IsEmpty()) {
+ aCurrentLine.MaybeReplaceNbspsInContent(mFlags);
+
+ Append(aCurrentLine, StripTrailingWhitespaces::kNo);
+
+ aCurrentLine.ResetContentAndIndentationHeader();
+ }
+}
+
+static bool IsSpaceStuffable(const char16_t* s) {
+ return (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
+ NS_strncmp(s, u"From ", 5) == 0);
+}
+
+void nsPlainTextSerializer::MaybeWrapAndOutputCompleteLines() {
+ if (!mSettings.MayWrap()) {
+ return;
+ }
+
+ const uint32_t prefixwidth = mCurrentLine.DeterminePrefixWidth();
+
+ // Yes, wrap!
+ // The "+4" is to avoid wrap lines that only would be a couple
+ // of letters too long. We give this bonus only if the
+ // wrapcolumn is more than 20.
+ const uint32_t wrapColumn = mSettings.GetWrapColumn();
+ uint32_t bonuswidth = (wrapColumn > 20) ? 4 : 0;
+
+ while (!mCurrentLine.mContent.IsEmpty()) {
+ // The width of the line as it will appear on the screen (approx.).
+ const uint32_t currentLineContentWidth =
+ GetUnicharStringWidth(mCurrentLine.mContent);
+ if (currentLineContentWidth + prefixwidth <= wrapColumn + bonuswidth) {
+ break;
+ }
+
+ const int32_t goodSpace =
+ mCurrentLine.FindWrapIndexForContent(wrapColumn, mUseLineBreaker);
+
+ const int32_t contentLength = mCurrentLine.mContent.Length();
+ if ((goodSpace < contentLength) && (goodSpace > 0)) {
+ // Found a place to break
+
+ // -1 (trim a char at the break position)
+ // only if the line break was a space.
+ nsAutoString restOfContent;
+ if (nsCRT::IsAsciiSpace(mCurrentLine.mContent.CharAt(goodSpace))) {
+ mCurrentLine.mContent.Right(restOfContent,
+ contentLength - goodSpace - 1);
+ } else {
+ mCurrentLine.mContent.Right(restOfContent, contentLength - goodSpace);
+ }
+ // if breaker was U+0020, it has to consider for delsp=yes support
+ const bool breakBySpace = mCurrentLine.mContent.CharAt(goodSpace) == ' ';
+ mCurrentLine.mContent.Truncate(goodSpace);
+ EndLine(true, breakBySpace);
+ mCurrentLine.mContent.Truncate();
+ // Space stuff new line?
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
+ if (!restOfContent.IsEmpty() && IsSpaceStuffable(restOfContent.get()) &&
+ mCurrentLine.mCiteQuoteLevel ==
+ 0 // We space-stuff quoted lines anyway
+ ) {
+ // Space stuffing a la RFC 2646 (format=flowed).
+ mCurrentLine.mContent.Append(char16_t(' '));
+ // XXX doesn't seem to work correctly for ' '
+ }
+ }
+ mCurrentLine.mContent.Append(restOfContent);
+ mEmptyLines = -1;
+ } else {
+ // Nothing to do. Hopefully we get more data later
+ // to use for a place to break line
+ break;
+ }
+ }
+}
+
+/**
+ * This function adds a piece of text to the current stored line. If we are
+ * wrapping text and the stored line will become too long, a suitable
+ * location to wrap will be found and the line that's complete will be
+ * output.
+ */
+void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment,
+ int32_t aLineFragmentLength) {
+ if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
+
+ if (mCurrentLine.mContent.IsEmpty()) {
+ if (0 == aLineFragmentLength) {
+ return;
+ }
+
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
+ if (IsSpaceStuffable(aLineFragment) &&
+ mCurrentLine.mCiteQuoteLevel ==
+ 0 // We space-stuff quoted lines anyway
+ ) {
+ // Space stuffing a la RFC 2646 (format=flowed).
+ mCurrentLine.mContent.Append(char16_t(' '));
+ }
+ }
+ mEmptyLines = -1;
+ }
+
+ mCurrentLine.mContent.Append(aLineFragment, aLineFragmentLength);
+
+ MaybeWrapAndOutputCompleteLines();
+}
+
+// The signature separator (RFC 2646).
+const char kSignatureSeparator[] = "-- ";
+
+// The OpenPGP dash-escaped signature separator in inline
+// signed messages according to the OpenPGP standard (RFC 2440).
+const char kDashEscapedSignatureSeparator[] = "- -- ";
+
+static bool IsSignatureSeparator(const nsAString& aString) {
+ return aString.EqualsLiteral(kSignatureSeparator) ||
+ aString.EqualsLiteral(kDashEscapedSignatureSeparator);
+}
+
+/**
+ * Outputs the contents of mCurrentLine.mContent, and resets line
+ * specific variables. Also adds an indentation and prefix if there is one
+ * specified. Strips ending spaces from the line if it isn't preformatted.
+ */
+void nsPlainTextSerializer::EndLine(bool aSoftLineBreak, bool aBreakBySpace) {
+ if (aSoftLineBreak && mCurrentLine.mContent.IsEmpty()) {
+ // No meaning
+ return;
+ }
+
+ /* In non-preformatted mode, remove spaces from the end of the line for
+ * format=flowed compatibility. Don't do this for these special cases:
+ * "-- ", the signature separator (RFC 2646) shouldn't be touched and
+ * "- -- ", the OpenPGP dash-escaped signature separator in inline
+ * signed messages according to the OpenPGP standard (RFC 2440).
+ */
+ if (!mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) &&
+ (aSoftLineBreak || !IsSignatureSeparator(mCurrentLine.mContent))) {
+ mCurrentLine.mContent.Trim(" ", false, true, false);
+ }
+
+ if (aSoftLineBreak &&
+ mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed) &&
+ (mCurrentLine.mIndentation.mLength == 0)) {
+ // Add the soft part of the soft linebreak (RFC 2646 4.1)
+ // We only do this when there is no indentation since format=flowed
+ // lines and indentation doesn't work well together.
+
+ // If breaker character is ASCII space with RFC 3676 support (delsp=yes),
+ // add twice space.
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatDelSp) &&
+ aBreakBySpace) {
+ mCurrentLine.mContent.AppendLiteral(" ");
+ } else {
+ mCurrentLine.mContent.Append(char16_t(' '));
+ }
+ }
+
+ if (aSoftLineBreak) {
+ mEmptyLines = 0;
+ } else {
+ // Hard break
+ if (mCurrentLine.HasContentOrIndentationHeader()) {
+ mEmptyLines = 0;
+ } else {
+ mEmptyLines++;
+ }
+ }
+
+ MOZ_ASSERT(mOutputManager);
+
+ mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());
+
+ // If we don't have anything "real" to output we have to
+ // make sure the indent doesn't end in a space since that
+ // would trick a format=flowed-aware receiver.
+ mOutputManager->Append(mCurrentLine,
+ OutputManager::StripTrailingWhitespaces::kMaybe);
+ mOutputManager->AppendLineBreak();
+ mCurrentLine.ResetContentAndIndentationHeader();
+ mInWhitespace = true;
+ mLineBreakDue = false;
+ mFloatingLines = -1;
+}
+
+/**
+ * Creates the calculated and stored indent and text in the indentation. That is
+ * quote chars and numbers for numbered lists and such.
+ */
+void nsPlainTextSerializer::CurrentLine::CreateQuotesAndIndent(
+ nsAString& aResult) const {
+ // Put the mail quote "> " chars in, if appropriate:
+ if (mCiteQuoteLevel > 0) {
+ nsAutoString quotes;
+ for (int i = 0; i < mCiteQuoteLevel; i++) {
+ quotes.Append(char16_t('>'));
+ }
+ if (!mContent.IsEmpty()) {
+ /* Better don't output a space here, if the line is empty,
+ in case a receiving format=flowed-aware UA thinks, this were a flowed
+ line, which it isn't - it's just empty. (Flowed lines may be joined
+ with the following one, so the empty line may be lost completely.) */
+ quotes.Append(char16_t(' '));
+ }
+ aResult = quotes;
+ }
+
+ // Indent if necessary
+ int32_t indentwidth = mIndentation.mLength - mIndentation.mHeader.Length();
+ if (indentwidth > 0 && HasContentOrIndentationHeader()
+ // Don't make empty lines look flowed
+ ) {
+ nsAutoString spaces;
+ for (int i = 0; i < indentwidth; ++i) spaces.Append(char16_t(' '));
+ aResult += spaces;
+ }
+
+ if (!mIndentation.mHeader.IsEmpty()) {
+ aResult += mIndentation.mHeader;
+ }
+}
+
+static bool IsLineFeedCarriageReturnBlankOrTab(char16_t c) {
+ return ('\n' == c || '\r' == c || ' ' == c || '\t' == c);
+}
+
+static void ReplaceVisiblyTrailingNbsps(nsAString& aString) {
+ const int32_t totLen = aString.Length();
+ for (int32_t i = totLen - 1; i >= 0; i--) {
+ char16_t c = aString[i];
+ if (IsLineFeedCarriageReturnBlankOrTab(c)) {
+ continue;
+ }
+ if (kNBSP == c) {
+ aString.Replace(i, 1, ' ');
+ } else {
+ break;
+ }
+ }
+}
+
+void nsPlainTextSerializer::ConvertToLinesAndOutput(const nsAString& aString) {
+ const int32_t totLen = aString.Length();
+ int32_t newline{0};
+
+ // Put the mail quote "> " chars in, if appropriate.
+ // Have to put it in before every line.
+ int32_t bol = 0;
+ while (bol < totLen) {
+ bool outputLineBreak = false;
+ bool spacesOnly = true;
+
+ // Find one of '\n' or '\r' using iterators since nsAString
+ // doesn't have the old FindCharInSet function.
+ nsAString::const_iterator iter;
+ aString.BeginReading(iter);
+ nsAString::const_iterator done_searching;
+ aString.EndReading(done_searching);
+ iter.advance(bol);
+ int32_t new_newline = bol;
+ newline = kNotFound;
+ while (iter != done_searching) {
+ if ('\n' == *iter || '\r' == *iter) {
+ newline = new_newline;
+ break;
+ }
+ if (' ' != *iter) {
+ spacesOnly = false;
+ }
+ ++new_newline;
+ ++iter;
+ }
+
+ // Done searching
+ nsAutoString stringpart;
+ if (newline == kNotFound) {
+ // No new lines.
+ stringpart.Assign(Substring(aString, bol, totLen - bol));
+ if (!stringpart.IsEmpty()) {
+ char16_t lastchar = stringpart.Last();
+ mInWhitespace = IsLineFeedCarriageReturnBlankOrTab(lastchar);
+ }
+ mEmptyLines = -1;
+ bol = totLen;
+ } else {
+ // There is a newline
+ stringpart.Assign(Substring(aString, bol, newline - bol));
+ mInWhitespace = true;
+ outputLineBreak = true;
+ mEmptyLines = 0;
+ bol = newline + 1;
+ if ('\r' == *iter && bol < totLen && '\n' == *++iter) {
+ // There was a CRLF in the input. This used to be illegal and
+ // stripped by the parser. Apparently not anymore. Let's skip
+ // over the LF.
+ bol++;
+ }
+ }
+
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
+ if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928
+ !IsQuotedLine(stringpart) && !IsSignatureSeparator(stringpart)) {
+ stringpart.Trim(" ", false, true, true);
+ }
+ if (IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart)) {
+ mCurrentLine.mContent.Append(char16_t(' '));
+ }
+ }
+ mCurrentLine.mContent.Append(stringpart);
+
+ mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());
+
+ mOutputManager->Append(mCurrentLine,
+ OutputManager::StripTrailingWhitespaces::kNo);
+ if (outputLineBreak) {
+ mOutputManager->AppendLineBreak();
+ }
+
+ mCurrentLine.ResetContentAndIndentationHeader();
+ }
+
+#ifdef DEBUG_wrapping
+ printf("No wrapping: newline is %d, totLen is %d\n", newline, totLen);
+#endif
+}
+
+/**
+ * Write a string. This is the highlevel function to use to get text output.
+ * By using AddToLine, Output, EndLine and other functions it handles quotation,
+ * line wrapping, indentation, whitespace compression and other things.
+ */
+void nsPlainTextSerializer::Write(const nsAString& aStr) {
+ // XXX Copy necessary to use nsString methods and gain
+ // access to underlying buffer
+ nsAutoString str(aStr);
+
+#ifdef DEBUG_wrapping
+ printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(),
+ mSettings.GetWrapColumn());
+#endif
+
+ const int32_t totLen = str.Length();
+
+ // If the string is empty, do nothing:
+ if (totLen <= 0) return;
+
+ // For Flowed text change nbsp-ses to spaces at end of lines to allow them
+ // to be cut off along with usual spaces if required. (bug #125928)
+ if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
+ ReplaceVisiblyTrailingNbsps(str);
+ }
+
+ // We have two major codepaths here. One that does preformatted text and one
+ // that does normal formatted text. The one for preformatted text calls
+ // Output directly while the other code path goes through AddToLine.
+ if ((mPreFormattedMail && !mSettings.GetWrapColumn()) ||
+ (IsElementPreformatted() && !mPreFormattedMail) ||
+ (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
+ // No intelligent wrapping.
+
+ // This mustn't be mixed with intelligent wrapping without clearing
+ // the mCurrentLine.mContent buffer before!!!
+ NS_ASSERTION(mCurrentLine.mContent.IsEmpty() ||
+ (IsElementPreformatted() && !mPreFormattedMail),
+ "Mixed wrapping data and nonwrapping data on the same line");
+ MOZ_ASSERT(mOutputManager);
+
+ if (!mCurrentLine.mContent.IsEmpty()) {
+ mOutputManager->Flush(mCurrentLine);
+ }
+
+ ConvertToLinesAndOutput(str);
+ return;
+ }
+
+ // Intelligent handling of text
+ // If needed, strip out all "end of lines"
+ // and multiple whitespace between words
+ int32_t nextpos;
+ const char16_t* offsetIntoBuffer = nullptr;
+
+ int32_t bol = 0;
+ while (bol < totLen) { // Loop over lines
+ // Find a place where we may have to do whitespace compression
+ nextpos = str.FindCharInSet(u" \t\n\r", bol);
+#ifdef DEBUG_wrapping
+ nsAutoString remaining;
+ str.Right(remaining, totLen - bol);
+ foo = ToNewCString(remaining);
+ // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, "
+ // "string = '%s'\n", bol, nextpos, totLen, foo);
+ free(foo);
+#endif
+
+ if (nextpos == kNotFound) {
+ // The rest of the string
+ offsetIntoBuffer = str.get() + bol;
+ AddToLine(offsetIntoBuffer, totLen - bol);
+ bol = totLen;
+ mInWhitespace = false;
+ } else {
+ // There's still whitespace left in the string
+ if (nextpos != 0 && (nextpos + 1) < totLen) {
+ offsetIntoBuffer = str.get() + nextpos;
+ // skip '\n' if it is between CJ chars
+ if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) &&
+ IS_CJ_CHAR(offsetIntoBuffer[1])) {
+ offsetIntoBuffer = str.get() + bol;
+ AddToLine(offsetIntoBuffer, nextpos - bol);
+ bol = nextpos + 1;
+ continue;
+ }
+ }
+ // If we're already in whitespace and not preformatted, just skip it:
+ if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
+ !mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
+ // Skip whitespace
+ bol++;
+ continue;
+ }
+
+ if (nextpos == bol) {
+ // Note that we are in whitespace.
+ mInWhitespace = true;
+ offsetIntoBuffer = str.get() + nextpos;
+ AddToLine(offsetIntoBuffer, 1);
+ bol++;
+ continue;
+ }
+
+ mInWhitespace = true;
+
+ offsetIntoBuffer = str.get() + bol;
+ if (mPreFormattedMail ||
+ mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
+ // Preserve the real whitespace character
+ nextpos++;
+ AddToLine(offsetIntoBuffer, nextpos - bol);
+ bol = nextpos;
+ } else {
+ // Replace the whitespace with a space
+ AddToLine(offsetIntoBuffer, nextpos - bol);
+ AddToLine(kSpace.get(), 1);
+ bol = nextpos + 1; // Let's eat the whitespace
+ }
+ }
+ } // Continue looping over the string
+}
+
+/**
+ * Gets the value of an attribute in a string. If the function returns
+ * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
+ */
+nsresult nsPlainTextSerializer::GetAttributeValue(const nsAtom* aName,
+ nsString& aValueRet) const {
+ if (mElement) {
+ if (mElement->GetAttr(kNameSpaceID_None, aName, aValueRet)) {
+ return NS_OK;
+ }
+ }
+
+ return NS_ERROR_NOT_AVAILABLE;
+}
+
+/**
+ * Returns true, if the element was inserted by Moz' TXT->HTML converter.
+ * In this case, we should ignore it.
+ */
+bool nsPlainTextSerializer::IsCurrentNodeConverted() const {
+ nsAutoString value;
+ nsresult rv = GetAttributeValue(nsGkAtoms::_class, value);
+ return (NS_SUCCEEDED(rv) &&
+ (StringBeginsWith(value, u"moz-txt"_ns,
+ nsASCIICaseInsensitiveStringComparator) ||
+ StringBeginsWith(value, u"\"moz-txt"_ns,
+ nsASCIICaseInsensitiveStringComparator)));
+}
+
+// static
+nsAtom* nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) {
+ if (!aContent->IsHTMLElement()) {
+ return nullptr;
+ }
+
+ nsAtom* localName = aContent->NodeInfo()->NameAtom();
+ return localName->IsStatic() ? localName : nullptr;
+}
+
+bool nsPlainTextSerializer::IsElementPreformatted() const {
+ return !mPreformatStack.empty() && mPreformatStack.top();
+}
+
+bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) {
+ RefPtr<const ComputedStyle> computedStyle =
+ nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
+ if (computedStyle) {
+ const nsStyleText* textStyle = computedStyle->StyleText();
+ return textStyle->WhiteSpaceOrNewlineIsSignificant();
+ }
+ // Fall back to looking at the tag, in case there is no style information.
+ return GetIdForContent(aElement) == nsGkAtoms::pre;
+}
+
+bool nsPlainTextSerializer::IsCssBlockLevelElement(Element* aElement) {
+ RefPtr<const ComputedStyle> computedStyle =
+ nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
+ if (computedStyle) {
+ const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay();
+ return displayStyle->IsBlockOutsideStyle();
+ }
+ // Fall back to looking at the tag, in case there is no style information.
+ return nsContentUtils::IsHTMLBlockLevelElement(aElement);
+}
+
+/**
+ * This method is required only to identify LI's inside OL.
+ * Returns TRUE if we are inside an OL tag and FALSE otherwise.
+ */
+bool nsPlainTextSerializer::IsInOL() const {
+ int32_t i = mTagStackIndex;
+ while (--i >= 0) {
+ if (mTagStack[i] == nsGkAtoms::ol) return true;
+ if (mTagStack[i] == nsGkAtoms::ul) {
+ // If a UL is reached first, LI belongs the UL nested in OL.
+ return false;
+ }
+ }
+ // We may reach here for orphan LI's.
+ return false;
+}
+
+bool nsPlainTextSerializer::IsInOlOrUl() const {
+ return (mULCount > 0) || !mOLStack.IsEmpty();
+}
+
+/*
+ @return 0 = no header, 1 = h1, ..., 6 = h6
+*/
+int32_t HeaderLevel(const nsAtom* aTag) {
+ if (aTag == nsGkAtoms::h1) {
+ return 1;
+ }
+ if (aTag == nsGkAtoms::h2) {
+ return 2;
+ }
+ if (aTag == nsGkAtoms::h3) {
+ return 3;
+ }
+ if (aTag == nsGkAtoms::h4) {
+ return 4;
+ }
+ if (aTag == nsGkAtoms::h5) {
+ return 5;
+ }
+ if (aTag == nsGkAtoms::h6) {
+ return 6;
+ }
+ return 0;
+}
+
+/* These functions define the column width of an ISO 10646 character
+ * as follows:
+ *
+ * - The null character (U+0000) has a column width of 0.
+ *
+ * - Other C0/C1 control characters and DEL will lead to a return
+ * value of -1.
+ *
+ * - Non-spacing and enclosing combining characters (general
+ * category code Mn or Me in the Unicode database) have a
+ * column width of 0.
+ *
+ * - Spacing characters in the East Asian Wide (W) or East Asian
+ * FullWidth (F) category as defined in Unicode Technical
+ * Report #11 have a column width of 2.
+ *
+ * - All remaining characters (including all printable
+ * ISO 8859-1 and WGL4 characters, Unicode control characters,
+ * etc.) have a column width of 1.
+ */
+
+int32_t GetUnicharWidth(char32_t aCh) {
+ /* test for 8-bit control characters */
+ if (aCh == 0) {
+ return 0;
+ }
+ if (aCh < 32 || (aCh >= 0x7f && aCh < 0xa0)) {
+ return -1;
+ }
+
+ /* The first combining char in Unicode is U+0300 */
+ if (aCh < 0x0300) {
+ return 1;
+ }
+
+ auto gc = unicode::GetGeneralCategory(aCh);
+ if (gc == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ||
+ gc == HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) {
+ return 0;
+ }
+
+ /* if we arrive here, ucs is not a combining or C0/C1 control character */
+
+ /* fast test for majority of non-wide scripts */
+ if (aCh < 0x1100) {
+ return 1;
+ }
+
+ return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1;
+}
+
+int32_t GetUnicharStringWidth(Span<const char16_t> aString) {
+ int32_t width = 0;
+ for (auto iter = aString.begin(); iter != aString.end(); ++iter) {
+ char32_t c = *iter;
+ if (NS_IS_HIGH_SURROGATE(c) && (iter + 1) != aString.end() &&
+ NS_IS_LOW_SURROGATE(*(iter + 1))) {
+ c = SURROGATE_TO_UCS4(c, *++iter);
+ }
+ const int32_t w = GetUnicharWidth(c);
+ // Taking 1 as the width of non-printable character, for bug 94475.
+ width += (w < 0 ? 1 : w);
+ }
+ return width;
+}
diff --git a/dom/serializers/nsPlainTextSerializer.h b/dom/serializers/nsPlainTextSerializer.h
new file mode 100644
index 0000000000..7c5e8db64c
--- /dev/null
+++ b/dom/serializers/nsPlainTextSerializer.h
@@ -0,0 +1,384 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
+ * (eg for copy/paste as plaintext).
+ */
+
+#ifndef nsPlainTextSerializer_h__
+#define nsPlainTextSerializer_h__
+
+#include "mozilla/Attributes.h"
+#include "mozilla/Maybe.h"
+#include "nsCOMPtr.h"
+#include "nsAtom.h"
+#include "nsCycleCollectionParticipant.h"
+#include "nsIContentSerializer.h"
+#include "nsIDocumentEncoder.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+#include <stack>
+
+class nsIContent;
+
+namespace mozilla::dom {
+class DocumentType;
+class Element;
+} // namespace mozilla::dom
+
+class nsPlainTextSerializer final : public nsIContentSerializer {
+ public:
+ nsPlainTextSerializer();
+
+ NS_DECL_CYCLE_COLLECTING_ISUPPORTS
+ NS_DECL_CYCLE_COLLECTION_CLASS(nsPlainTextSerializer)
+
+ // nsIContentSerializer
+ NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
+ const mozilla::Encoding* aEncoding, bool aIsCopying,
+ bool aIsWholeDocument, bool* aNeedsPreformatScanning,
+ nsAString& aOutput) override;
+
+ NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
+ int32_t aEndOffset) override;
+ NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, int32_t aStartOffset,
+ int32_t aEndOffset) override;
+ NS_IMETHOD AppendProcessingInstruction(
+ mozilla::dom::ProcessingInstruction* aPI, int32_t aStartOffset,
+ int32_t aEndOffset) override {
+ return NS_OK;
+ }
+ NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment,
+ int32_t aStartOffset, int32_t aEndOffset) override {
+ return NS_OK;
+ }
+ NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype) override {
+ return NS_OK;
+ }
+ NS_IMETHOD AppendElementStart(
+ mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) override;
+ NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) override;
+
+ NS_IMETHOD FlushAndFinish() override;
+
+ NS_IMETHOD Finish() override;
+
+ NS_IMETHOD GetOutputLength(uint32_t& aLength) const override;
+
+ NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) override;
+
+ NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override;
+ NS_IMETHOD ForgetElementForPreformat(
+ mozilla::dom::Element* aElement) override;
+
+ private:
+ ~nsPlainTextSerializer();
+
+ nsresult GetAttributeValue(const nsAtom* aName, nsString& aValueRet) const;
+ void AddToLine(const char16_t* aStringToAdd, int32_t aLength);
+
+ void MaybeWrapAndOutputCompleteLines();
+
+ // @param aSoftLineBreak A soft line break is a space followed by a linebreak
+ // (cf. https://www.ietf.org/rfc/rfc3676.txt, section 4.2).
+ void EndLine(bool aSoftLineBreak, bool aBreakBySpace = false);
+
+ void EnsureVerticalSpace(int32_t noOfRows);
+
+ void ConvertToLinesAndOutput(const nsAString& aString);
+
+ void Write(const nsAString& aString);
+
+ // @return true, iff the elements' whitespace and newline characters have to
+ // be preserved according to its style or because it's a `<pre>`
+ // element.
+ bool IsElementPreformatted() const;
+ bool IsInOL() const;
+ bool IsInOlOrUl() const;
+ bool IsCurrentNodeConverted() const;
+ bool MustSuppressLeaf() const;
+
+ /**
+ * Returns the local name of the element as an atom if the element is an
+ * HTML element and the atom is a static atom. Otherwise, nullptr is returned.
+ */
+ static nsAtom* GetIdForContent(nsIContent* aContent);
+ nsresult DoOpenContainer(const nsAtom* aTag);
+ void OpenContainerForOutputFormatted(const nsAtom* aTag);
+ nsresult DoCloseContainer(const nsAtom* aTag);
+ void CloseContainerForOutputFormatted(const nsAtom* aTag);
+ nsresult DoAddLeaf(const nsAtom* aTag);
+
+ void DoAddText();
+ // @param aText Ignored if aIsLineBreak is true.
+ void DoAddText(bool aIsLineBreak, const nsAString& aText);
+
+ inline bool DoOutput() const { return mHeadLevel == 0; }
+
+ static inline bool IsQuotedLine(const nsAString& aLine) {
+ return !aLine.IsEmpty() && aLine.First() == char16_t('>');
+ }
+
+ // Stack handling functions
+ bool GetLastBool(const nsTArray<bool>& aStack);
+ void SetLastBool(nsTArray<bool>& aStack, bool aValue);
+ void PushBool(nsTArray<bool>& aStack, bool aValue);
+ bool PopBool(nsTArray<bool>& aStack);
+
+ bool IsIgnorableRubyAnnotation(const nsAtom* aTag) const;
+
+ // @return true, iff the elements' whitespace and newline characters have to
+ // be preserved according to its style or because it's a `<pre>`
+ // element.
+ static bool IsElementPreformatted(mozilla::dom::Element* aElement);
+
+ // https://drafts.csswg.org/css-display/#block-level
+ static bool IsCssBlockLevelElement(mozilla::dom::Element* aElement);
+
+ private:
+ uint32_t mHeadLevel;
+
+ class Settings {
+ public:
+ enum class HeaderStrategy {
+ kNoIndentation,
+ kIndentIncreasedWithHeaderLevel,
+ kNumberHeadingsAndIndentSlightly
+ };
+
+ // May adapt the flags.
+ //
+ // @param aFlags As defined in nsIDocumentEncoder.idl.
+ void Init(int32_t aFlags, uint32_t aWrapColumn);
+
+ // Pref: converter.html2txt.structs.
+ bool GetStructs() const { return mStructs; }
+
+ // Pref: converter.html2txt.header_strategy.
+ HeaderStrategy GetHeaderStrategy() const { return mHeaderStrategy; }
+
+ // @return As defined in nsIDocumentEncoder.idl.
+ int32_t GetFlags() const { return mFlags; }
+
+ // @param aFlag As defined in nsIDocumentEncoder.idl. May consist of
+ // multiple bitwise or'd flags.
+ bool HasFlag(int32_t aFlag) const { return mFlags & aFlag; }
+
+ // Whether the output should include ruby annotations.
+ bool GetWithRubyAnnotation() const { return mWithRubyAnnotation; }
+
+ uint32_t GetWrapColumn() const { return mWrapColumn; }
+
+ bool MayWrap() const {
+ return GetWrapColumn() && HasFlag(nsIDocumentEncoder::OutputFormatted |
+ nsIDocumentEncoder::OutputWrap);
+ }
+
+ bool MayBreakLines() const {
+ return !HasFlag(nsIDocumentEncoder::OutputDisallowLineBreaking);
+ }
+
+ private:
+ // @param aPrefHeaderStrategy Pref: converter.html2txt.header_strategy.
+ static HeaderStrategy Convert(int32_t aPrefHeaderStrategy);
+
+ // Pref: converter.html2txt.structs.
+ bool mStructs = true;
+
+ // Pref: converter.html2txt.header_strategy.
+ HeaderStrategy mHeaderStrategy =
+ HeaderStrategy::kIndentIncreasedWithHeaderLevel;
+
+ // Flags defined in nsIDocumentEncoder.idl.
+ int32_t mFlags = 0;
+
+ // Whether the output should include ruby annotations.
+ bool mWithRubyAnnotation = false;
+
+ // The wrap column is how many fixed-pitch narrow
+ // (https://unicode.org/reports/tr11/) (e.g. Latin) characters
+ // should be allowed on a line. There could be less chars if the chars
+ // are wider than latin chars of more if the chars are more narrow.
+ uint32_t mWrapColumn = 0;
+ };
+
+ Settings mSettings;
+
+ struct Indentation {
+ // The number of space characters to be inserted including the length of
+ // mHeader.
+ int32_t mLength = 0;
+
+ // The header that has to be written in the indent.
+ // That could be, for instance, the bullet in a bulleted list.
+ nsString mHeader;
+ };
+
+ class CurrentLine {
+ public:
+ void ResetContentAndIndentationHeader();
+
+ // @param aFlags As defined in nsIDocumentEncoder.idl.
+ void MaybeReplaceNbspsInContent(int32_t aFlags);
+
+ void CreateQuotesAndIndent(nsAString& aResult) const;
+
+ bool HasContentOrIndentationHeader() const {
+ return !mContent.IsEmpty() || !mIndentation.mHeader.IsEmpty();
+ }
+
+ // @param aLineBreaker May be nullptr.
+ int32_t FindWrapIndexForContent(uint32_t aWrapColumn,
+ bool aUseLineBreaker) const;
+
+ // @return Combined width of cite quote level and indentation.
+ uint32_t DeterminePrefixWidth() const {
+ // XXX: Should calculate prefixwidth with GetUnicharStringWidth
+ return (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1 : 0) +
+ mIndentation.mLength;
+ }
+
+ Indentation mIndentation;
+
+ // The number of '>' characters.
+ int32_t mCiteQuoteLevel = 0;
+
+ // Excludes indentation and quotes.
+ nsString mContent;
+ };
+
+ CurrentLine mCurrentLine;
+
+ class OutputManager {
+ public:
+ /**
+ * @param aFlags As defined in nsIDocumentEncoder.idl.
+ * @param aOutput An empty string.
+ */
+ OutputManager(int32_t aFlags, nsAString& aOutput);
+
+ enum class StripTrailingWhitespaces { kMaybe, kNo };
+
+ void Append(const CurrentLine& aCurrentLine,
+ StripTrailingWhitespaces aStripTrailingWhitespaces);
+
+ void AppendLineBreak();
+
+ /**
+ * This empties the current line cache without adding a NEWLINE.
+ * Should not be used if line wrapping is of importance since
+ * this function destroys the cache information.
+ *
+ * It will also write indentation and quotes if we believe us to be
+ * at the start of the line.
+ */
+ void Flush(CurrentLine& aCurrentLine);
+
+ bool IsAtFirstColumn() const { return mAtFirstColumn; }
+
+ uint32_t GetOutputLength() const;
+
+ private:
+ /**
+ * @param aString Last character is expected to not be a line break.
+ */
+ void Append(const nsAString& aString);
+
+ // As defined in nsIDocumentEncoder.idl.
+ const int32_t mFlags;
+
+ nsAString& mOutput;
+
+ bool mAtFirstColumn;
+
+ nsString mLineBreak;
+ };
+
+ mozilla::Maybe<OutputManager> mOutputManager;
+
+ // If we've just written out a cite blockquote, we need to remember it
+ // so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
+ // old messages).
+ bool mHasWrittenCiteBlockquote;
+
+ int32_t mFloatingLines; // To store the number of lazy line breaks
+
+ // Treat quoted text as though it's preformatted -- don't wrap it.
+ // Having it on a pref is a temporary measure, See bug 69638.
+ int32_t mSpanLevel;
+
+ int32_t mEmptyLines; // Will be the number of empty lines before
+ // the current. 0 if we are starting a new
+ // line and -1 if we are in a line.
+
+ bool mInWhitespace;
+ bool mPreFormattedMail; // we're dealing with special DOM
+ // used by Thunderbird code.
+
+ // While handling a new tag, this variable should remind if any line break
+ // is due because of a closing tag. Setting it to "TRUE" while closing the
+ // tags. Hence opening tags are guaranteed to start with appropriate line
+ // breaks.
+ bool mLineBreakDue;
+
+ bool mPreformattedBlockBoundary;
+
+ int32_t mHeaderCounter[7]; /* For header-numbering:
+ Number of previous headers of
+ the same depth and in the same
+ section.
+ mHeaderCounter[1] for <h1> etc. */
+
+ RefPtr<mozilla::dom::Element> mElement;
+
+ // For handling table rows
+ AutoTArray<bool, 8> mHasWrittenCellsForRow;
+
+ // Values gotten in OpenContainer that is (also) needed in CloseContainer
+ AutoTArray<bool, 8> mIsInCiteBlockquote;
+
+ // The tag stack: the stack of tags we're operating on, so we can nest.
+ // The stack only ever points to static atoms, so they don't need to be
+ // refcounted.
+ const nsAtom** mTagStack;
+ uint32_t mTagStackIndex;
+
+ // The stack indicating whether the elements we've been operating on are
+ // CSS preformatted elements, so that we can tell if the text inside them
+ // should be formatted.
+ std::stack<bool> mPreformatStack;
+
+ // Content in the stack above this index should be ignored:
+ uint32_t mIgnoreAboveIndex;
+
+ // The stack for ordered lists
+ AutoTArray<int32_t, 100> mOLStack;
+
+ uint32_t mULCount;
+
+ bool mUseLineBreaker = false;
+
+ // Conveniance constant. It would be nice to have it as a const static
+ // variable, but that causes issues with OpenBSD and module unloading.
+ const nsString kSpace;
+
+ // mIgnoredChildNodeLevel is used to tell if current node is an ignorable
+ // child node. The initial value of mIgnoredChildNodeLevel is 0. When
+ // serializer enters those specific nodes, mIgnoredChildNodeLevel increases
+ // and is greater than 0. Otherwise when serializer leaves those nodes,
+ // mIgnoredChildNodeLevel decreases.
+ uint32_t mIgnoredChildNodeLevel;
+};
+
+nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
+
+#endif
diff --git a/dom/serializers/nsXHTMLContentSerializer.cpp b/dom/serializers/nsXHTMLContentSerializer.cpp
new file mode 100644
index 0000000000..76c8dc9e20
--- /dev/null
+++ b/dom/serializers/nsXHTMLContentSerializer.cpp
@@ -0,0 +1,731 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert an XHTML (not HTML!) DOM to an XHTML
+ * string that could be parsed into more or less the original DOM.
+ */
+
+#include "nsXHTMLContentSerializer.h"
+
+#include "mozilla/dom/Element.h"
+#include "nsIContent.h"
+#include "mozilla/dom/Document.h"
+#include "nsElementTable.h"
+#include "nsNameSpaceManager.h"
+#include "nsString.h"
+#include "nsUnicharUtils.h"
+#include "nsIDocumentEncoder.h"
+#include "nsGkAtoms.h"
+#include "nsIURI.h"
+#include "nsNetUtil.h"
+#include "nsEscape.h"
+#include "nsCRT.h"
+#include "nsContentUtils.h"
+#include "nsIScriptElement.h"
+#include "nsStubMutationObserver.h"
+#include "nsAttrName.h"
+#include "nsComputedDOMStyle.h"
+
+using namespace mozilla;
+using namespace mozilla::dom;
+
+static const int32_t kLongLineLen = 128;
+
+#define kXMLNS "xmlns"
+
+nsresult NS_NewXHTMLContentSerializer(nsIContentSerializer** aSerializer) {
+ RefPtr<nsXHTMLContentSerializer> it = new nsXHTMLContentSerializer();
+ it.forget(aSerializer);
+ return NS_OK;
+}
+
+nsXHTMLContentSerializer::nsXHTMLContentSerializer()
+ : mIsHTMLSerializer(false),
+ mIsCopying(false),
+ mDisableEntityEncoding(0),
+ mRewriteEncodingDeclaration(false),
+ mIsFirstChildOfOL(false) {}
+
+nsXHTMLContentSerializer::~nsXHTMLContentSerializer() {
+ NS_ASSERTION(mOLStateStack.IsEmpty(), "Expected OL State stack to be empty");
+}
+
+NS_IMETHODIMP
+nsXHTMLContentSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
+ const Encoding* aEncoding, bool aIsCopying,
+ bool aRewriteEncodingDeclaration,
+ bool* aNeedsPreformatScanning,
+ nsAString& aOutput) {
+ // The previous version of the HTML serializer did implicit wrapping
+ // when there is no flags, so we keep wrapping in order to keep
+ // compatibility with the existing calling code
+ // XXXLJ perhaps should we remove this default settings later ?
+ if (aFlags & nsIDocumentEncoder::OutputFormatted) {
+ aFlags = aFlags | nsIDocumentEncoder::OutputWrap;
+ }
+
+ nsresult rv;
+ rv = nsXMLContentSerializer::Init(aFlags, aWrapColumn, aEncoding, aIsCopying,
+ aRewriteEncodingDeclaration,
+ aNeedsPreformatScanning, aOutput);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ mRewriteEncodingDeclaration = aRewriteEncodingDeclaration;
+ mIsCopying = aIsCopying;
+ mIsFirstChildOfOL = false;
+ mInBody = 0;
+ mDisableEntityEncoding = 0;
+ mBodyOnly = (mFlags & nsIDocumentEncoder::OutputBodyOnly);
+
+ return NS_OK;
+}
+
+// See if the string has any lines longer than longLineLen:
+// if so, we presume formatting is wonky (e.g. the node has been edited)
+// and we'd better rewrap the whole text node.
+bool nsXHTMLContentSerializer::HasLongLines(const nsString& text,
+ int32_t& aLastNewlineOffset) {
+ uint32_t start = 0;
+ uint32_t theLen = text.Length();
+ bool rv = false;
+ aLastNewlineOffset = kNotFound;
+ for (start = 0; start < theLen;) {
+ int32_t eol = text.FindChar('\n', start);
+ if (eol < 0) {
+ eol = text.Length();
+ } else {
+ aLastNewlineOffset = eol;
+ }
+ if (int32_t(eol - start) > kLongLineLen) rv = true;
+ start = eol + 1;
+ }
+ return rv;
+}
+
+NS_IMETHODIMP
+nsXHTMLContentSerializer::AppendText(nsIContent* aText, int32_t aStartOffset,
+ int32_t aEndOffset) {
+ NS_ENSURE_ARG(aText);
+ NS_ENSURE_STATE(mOutput);
+
+ nsAutoString data;
+ nsresult rv;
+
+ rv = AppendTextData(aText, aStartOffset, aEndOffset, data, true);
+ if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
+
+ if (mDoRaw || PreLevel() > 0) {
+ NS_ENSURE_TRUE(AppendToStringConvertLF(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoFormat) {
+ NS_ENSURE_TRUE(AppendToStringFormatedWrapped(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoWrap) {
+ NS_ENSURE_TRUE(AppendToStringWrapped(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ int32_t lastNewlineOffset = kNotFound;
+ if (HasLongLines(data, lastNewlineOffset)) {
+ // We have long lines, rewrap
+ mDoWrap = true;
+ bool result = AppendToStringWrapped(data, *mOutput);
+ mDoWrap = false;
+ NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(AppendToStringConvertLF(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+ }
+
+ return NS_OK;
+}
+
+bool nsXHTMLContentSerializer::SerializeAttributes(
+ Element* aElement, Element* aOriginalElement, nsAString& aTagPrefix,
+ const nsAString& aTagNamespaceURI, nsAtom* aTagName, nsAString& aStr,
+ uint32_t aSkipAttr, bool aAddNSAttr) {
+ nsresult rv;
+ uint32_t index, count;
+ nsAutoString prefixStr, uriStr, valueStr;
+ nsAutoString xmlnsStr;
+ xmlnsStr.AssignLiteral(kXMLNS);
+
+ int32_t contentNamespaceID = aElement->GetNameSpaceID();
+
+ MaybeSerializeIsValue(aElement, aStr);
+
+ // this method is not called by nsHTMLContentSerializer
+ // so we don't have to check HTML element, just XHTML
+
+ if (mIsCopying && kNameSpaceID_XHTML == contentNamespaceID) {
+ // Need to keep track of OL and LI elements in order to get ordinal number
+ // for the LI.
+ if (aTagName == nsGkAtoms::ol) {
+ // We are copying and current node is an OL;
+ // Store its start attribute value in olState->startVal.
+ nsAutoString start;
+ int32_t startAttrVal = 0;
+ aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start);
+ if (!start.IsEmpty()) {
+ nsresult rv = NS_OK;
+ startAttrVal = start.ToInteger(&rv);
+ // If OL has "start" attribute, first LI element has to start with that
+ // value Therefore subtracting 1 as all the LI elements are incrementing
+ // it before using it; In failure of ToInteger(), default StartAttrValue
+ // to 0.
+ if (NS_SUCCEEDED(rv))
+ --startAttrVal;
+ else
+ startAttrVal = 0;
+ }
+ olState state(startAttrVal, true);
+ mOLStateStack.AppendElement(state);
+ } else if (aTagName == nsGkAtoms::li) {
+ mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
+ if (mIsFirstChildOfOL) {
+ // If OL is parent of this LI, serialize attributes in different manner.
+ NS_ENSURE_TRUE(SerializeLIValueAttribute(aElement, aStr), false);
+ }
+ }
+ }
+
+ // If we had to add a new namespace declaration, serialize
+ // and push it on the namespace stack
+ if (aAddNSAttr) {
+ if (aTagPrefix.IsEmpty()) {
+ // Serialize default namespace decl
+ NS_ENSURE_TRUE(
+ SerializeAttr(u""_ns, xmlnsStr, aTagNamespaceURI, aStr, true), false);
+ } else {
+ // Serialize namespace decl
+ NS_ENSURE_TRUE(
+ SerializeAttr(xmlnsStr, aTagPrefix, aTagNamespaceURI, aStr, true),
+ false);
+ }
+ PushNameSpaceDecl(aTagPrefix, aTagNamespaceURI, aOriginalElement);
+ }
+
+ count = aElement->GetAttrCount();
+
+ // Now serialize each of the attributes
+ // XXX Unfortunately we need a namespace manager to get
+ // attribute URIs.
+ for (index = 0; index < count; index++) {
+ if (aSkipAttr == index) {
+ continue;
+ }
+
+ dom::BorrowedAttrInfo info = aElement->GetAttrInfoAt(index);
+ const nsAttrName* name = info.mName;
+
+ int32_t namespaceID = name->NamespaceID();
+ nsAtom* attrName = name->LocalName();
+ nsAtom* attrPrefix = name->GetPrefix();
+
+ // Filter out any attribute starting with [-|_]moz
+ nsDependentAtomString attrNameStr(attrName);
+ if (StringBeginsWith(attrNameStr, u"_moz"_ns) ||
+ StringBeginsWith(attrNameStr, u"-moz"_ns)) {
+ continue;
+ }
+
+ if (attrPrefix) {
+ attrPrefix->ToString(prefixStr);
+ } else {
+ prefixStr.Truncate();
+ }
+
+ bool addNSAttr = false;
+ if (kNameSpaceID_XMLNS != namespaceID) {
+ nsNameSpaceManager::GetInstance()->GetNameSpaceURI(namespaceID, uriStr);
+ addNSAttr = ConfirmPrefix(prefixStr, uriStr, aOriginalElement, true);
+ }
+
+ info.mValue->ToString(valueStr);
+
+ nsDependentAtomString nameStr(attrName);
+ bool isJS = false;
+
+ if (kNameSpaceID_XHTML == contentNamespaceID) {
+ if (mIsCopying && mIsFirstChildOfOL && (aTagName == nsGkAtoms::li) &&
+ (attrName == nsGkAtoms::value)) {
+ // This is handled separately in SerializeLIValueAttribute()
+ continue;
+ }
+
+ isJS = IsJavaScript(aElement, attrName, namespaceID, valueStr);
+
+ if (namespaceID == kNameSpaceID_None &&
+ ((attrName == nsGkAtoms::href) || (attrName == nsGkAtoms::src))) {
+ // Make all links absolute when converting only the selection:
+ if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
+ // Would be nice to handle OBJECT tags,
+ // but that gets more complicated since we have to
+ // search the tag list for CODEBASE as well.
+ // For now, just leave them relative.
+ nsIURI* uri = aElement->GetBaseURI();
+ if (uri) {
+ nsAutoString absURI;
+ rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
+ if (NS_SUCCEEDED(rv)) {
+ valueStr = absURI;
+ }
+ }
+ }
+ }
+
+ if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
+ attrName == nsGkAtoms::content) {
+ // If we're serializing a <meta http-equiv="content-type">,
+ // use the proper value, rather than what's in the document.
+ nsAutoString header;
+ aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
+ if (header.LowerCaseEqualsLiteral("content-type")) {
+ valueStr =
+ u"text/html; charset="_ns + NS_ConvertASCIItoUTF16(mCharset);
+ }
+ }
+
+ // Expand shorthand attribute.
+ if (namespaceID == kNameSpaceID_None &&
+ IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) {
+ valueStr = nameStr;
+ }
+ } else {
+ isJS = IsJavaScript(aElement, attrName, namespaceID, valueStr);
+ }
+
+ NS_ENSURE_TRUE(SerializeAttr(prefixStr, nameStr, valueStr, aStr, !isJS),
+ false);
+
+ if (addNSAttr) {
+ NS_ASSERTION(!prefixStr.IsEmpty(),
+ "Namespaced attributes must have a prefix");
+ NS_ENSURE_TRUE(SerializeAttr(xmlnsStr, prefixStr, uriStr, aStr, true),
+ false);
+ PushNameSpaceDecl(prefixStr, uriStr, aOriginalElement);
+ }
+ }
+
+ return true;
+}
+
+bool nsXHTMLContentSerializer::AfterElementStart(nsIContent* aContent,
+ nsIContent* aOriginalElement,
+ nsAString& aStr) {
+ if (mRewriteEncodingDeclaration && aContent->IsHTMLElement(nsGkAtoms::head)) {
+ // Check if there already are any content-type meta children.
+ // If there are, they will be modified to use the correct charset.
+ // If there aren't, we'll insert one here.
+ bool hasMeta = false;
+ for (nsIContent* child = aContent->GetFirstChild(); child;
+ child = child->GetNextSibling()) {
+ if (child->IsHTMLElement(nsGkAtoms::meta) &&
+ child->AsElement()->HasAttr(kNameSpaceID_None, nsGkAtoms::content)) {
+ nsAutoString header;
+ child->AsElement()->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv,
+ header);
+
+ if (header.LowerCaseEqualsLiteral("content-type")) {
+ hasMeta = true;
+ break;
+ }
+ }
+ }
+
+ if (!hasMeta) {
+ NS_ENSURE_TRUE(AppendNewLineToString(aStr), false);
+ if (mDoFormat) {
+ NS_ENSURE_TRUE(AppendIndentation(aStr), false);
+ }
+ NS_ENSURE_TRUE(
+ AppendToString(u"<meta http-equiv=\"content-type\""_ns, aStr), false);
+ NS_ENSURE_TRUE(AppendToString(u" content=\"text/html; charset="_ns, aStr),
+ false);
+ NS_ENSURE_TRUE(AppendToString(NS_ConvertASCIItoUTF16(mCharset), aStr),
+ false);
+ if (mIsHTMLSerializer) {
+ NS_ENSURE_TRUE(AppendToString(u"\">"_ns, aStr), false);
+ } else {
+ NS_ENSURE_TRUE(AppendToString(u"\" />"_ns, aStr), false);
+ }
+ }
+ }
+
+ return true;
+}
+
+void nsXHTMLContentSerializer::AfterElementEnd(nsIContent* aContent,
+ nsAString& aStr) {
+ NS_ASSERTION(!mIsHTMLSerializer,
+ "nsHTMLContentSerializer shouldn't call this method !");
+
+ // this method is not called by nsHTMLContentSerializer
+ // so we don't have to check HTML element, just XHTML
+ if (aContent->IsHTMLElement(nsGkAtoms::body)) {
+ --mInBody;
+ }
+}
+
+NS_IMETHODIMP
+nsXHTMLContentSerializer::AppendDocumentStart(Document* aDocument) {
+ if (!mBodyOnly) {
+ return nsXMLContentSerializer::AppendDocumentStart(aDocument);
+ }
+
+ return NS_OK;
+}
+
+bool nsXHTMLContentSerializer::CheckElementStart(Element* aElement,
+ bool& aForceFormat,
+ nsAString& aStr,
+ nsresult& aResult) {
+ aResult = NS_OK;
+
+ // The _moz_dirty attribute is emitted by the editor to
+ // indicate that this element should be pretty printed
+ // even if we're not in pretty printing mode
+ aForceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
+ aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
+
+ if (aElement->IsHTMLElement(nsGkAtoms::br) &&
+ (mFlags & nsIDocumentEncoder::OutputNoFormattingInPre) &&
+ PreLevel() > 0) {
+ aResult = AppendNewLineToString(aStr) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
+ return false;
+ }
+
+ if (aElement->IsHTMLElement(nsGkAtoms::body)) {
+ ++mInBody;
+ }
+
+ return true;
+}
+
+bool nsXHTMLContentSerializer::CheckElementEnd(Element* aElement,
+ Element* aOriginalElement,
+ bool& aForceFormat,
+ nsAString& aStr) {
+ NS_ASSERTION(!mIsHTMLSerializer,
+ "nsHTMLContentSerializer shouldn't call this method !");
+
+ aForceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) &&
+ aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty);
+
+ if (mIsCopying && aElement->IsHTMLElement(nsGkAtoms::ol)) {
+ NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
+ /* Though at this point we must always have an state to be deleted as all
+ the OL opening tags are supposed to push an olState object to the stack*/
+ if (!mOLStateStack.IsEmpty()) {
+ mOLStateStack.RemoveLastElement();
+ }
+ }
+
+ bool dummyFormat;
+ return nsXMLContentSerializer::CheckElementEnd(aElement, aOriginalElement,
+ dummyFormat, aStr);
+}
+
+bool nsXHTMLContentSerializer::AppendAndTranslateEntities(
+ const nsAString& aStr, nsAString& aOutputStr) {
+ if (mBodyOnly && !mInBody) {
+ return true;
+ }
+
+ if (mDisableEntityEncoding) {
+ return aOutputStr.Append(aStr, fallible);
+ }
+
+ return nsXMLContentSerializer::AppendAndTranslateEntities(aStr, aOutputStr);
+}
+
+bool nsXHTMLContentSerializer::IsShorthandAttr(const nsAtom* aAttrName,
+ const nsAtom* aElementName) {
+ // checked
+ if ((aAttrName == nsGkAtoms::checked) && (aElementName == nsGkAtoms::input)) {
+ return true;
+ }
+
+ // compact
+ if ((aAttrName == nsGkAtoms::compact) &&
+ (aElementName == nsGkAtoms::dir || aElementName == nsGkAtoms::dl ||
+ aElementName == nsGkAtoms::menu || aElementName == nsGkAtoms::ol ||
+ aElementName == nsGkAtoms::ul)) {
+ return true;
+ }
+
+ // declare
+ if ((aAttrName == nsGkAtoms::declare) &&
+ (aElementName == nsGkAtoms::object)) {
+ return true;
+ }
+
+ // defer
+ if ((aAttrName == nsGkAtoms::defer) && (aElementName == nsGkAtoms::script)) {
+ return true;
+ }
+
+ // disabled
+ if ((aAttrName == nsGkAtoms::disabled) &&
+ (aElementName == nsGkAtoms::button || aElementName == nsGkAtoms::input ||
+ aElementName == nsGkAtoms::optgroup ||
+ aElementName == nsGkAtoms::option || aElementName == nsGkAtoms::select ||
+ aElementName == nsGkAtoms::textarea)) {
+ return true;
+ }
+
+ // ismap
+ if ((aAttrName == nsGkAtoms::ismap) &&
+ (aElementName == nsGkAtoms::img || aElementName == nsGkAtoms::input)) {
+ return true;
+ }
+
+ // multiple
+ if ((aAttrName == nsGkAtoms::multiple) &&
+ (aElementName == nsGkAtoms::select)) {
+ return true;
+ }
+
+ // noresize
+ if ((aAttrName == nsGkAtoms::noresize) &&
+ (aElementName == nsGkAtoms::frame)) {
+ return true;
+ }
+
+ // noshade
+ if ((aAttrName == nsGkAtoms::noshade) && (aElementName == nsGkAtoms::hr)) {
+ return true;
+ }
+
+ // nowrap
+ if ((aAttrName == nsGkAtoms::nowrap) &&
+ (aElementName == nsGkAtoms::td || aElementName == nsGkAtoms::th)) {
+ return true;
+ }
+
+ // readonly
+ if ((aAttrName == nsGkAtoms::readonly) &&
+ (aElementName == nsGkAtoms::input ||
+ aElementName == nsGkAtoms::textarea)) {
+ return true;
+ }
+
+ // selected
+ if ((aAttrName == nsGkAtoms::selected) &&
+ (aElementName == nsGkAtoms::option)) {
+ return true;
+ }
+
+ // autoplay and controls
+ if ((aElementName == nsGkAtoms::video || aElementName == nsGkAtoms::audio) &&
+ (aAttrName == nsGkAtoms::autoplay || aAttrName == nsGkAtoms::muted ||
+ aAttrName == nsGkAtoms::controls)) {
+ return true;
+ }
+
+ return false;
+}
+
+bool nsXHTMLContentSerializer::LineBreakBeforeOpen(int32_t aNamespaceID,
+ nsAtom* aName) {
+ if (aNamespaceID != kNameSpaceID_XHTML) {
+ return mAddSpace;
+ }
+
+ if (aName == nsGkAtoms::title || aName == nsGkAtoms::meta ||
+ aName == nsGkAtoms::link || aName == nsGkAtoms::style ||
+ aName == nsGkAtoms::select || aName == nsGkAtoms::option ||
+ aName == nsGkAtoms::script || aName == nsGkAtoms::html) {
+ return true;
+ }
+
+ return nsHTMLElement::IsBlock(nsHTMLTags::CaseSensitiveAtomTagToId(aName));
+}
+
+bool nsXHTMLContentSerializer::LineBreakAfterOpen(int32_t aNamespaceID,
+ nsAtom* aName) {
+ if (aNamespaceID != kNameSpaceID_XHTML) {
+ return false;
+ }
+
+ if ((aName == nsGkAtoms::html) || (aName == nsGkAtoms::head) ||
+ (aName == nsGkAtoms::body) || (aName == nsGkAtoms::ul) ||
+ (aName == nsGkAtoms::ol) || (aName == nsGkAtoms::dl) ||
+ (aName == nsGkAtoms::table) || (aName == nsGkAtoms::tbody) ||
+ (aName == nsGkAtoms::tr) || (aName == nsGkAtoms::br) ||
+ (aName == nsGkAtoms::meta) || (aName == nsGkAtoms::link) ||
+ (aName == nsGkAtoms::script) || (aName == nsGkAtoms::select) ||
+ (aName == nsGkAtoms::map) || (aName == nsGkAtoms::area) ||
+ (aName == nsGkAtoms::style)) {
+ return true;
+ }
+
+ return false;
+}
+
+bool nsXHTMLContentSerializer::LineBreakBeforeClose(int32_t aNamespaceID,
+ nsAtom* aName) {
+ if (aNamespaceID != kNameSpaceID_XHTML) {
+ return false;
+ }
+
+ if ((aName == nsGkAtoms::html) || (aName == nsGkAtoms::head) ||
+ (aName == nsGkAtoms::body) || (aName == nsGkAtoms::ul) ||
+ (aName == nsGkAtoms::ol) || (aName == nsGkAtoms::dl) ||
+ (aName == nsGkAtoms::select) || (aName == nsGkAtoms::table) ||
+ (aName == nsGkAtoms::tbody)) {
+ return true;
+ }
+ return false;
+}
+
+bool nsXHTMLContentSerializer::LineBreakAfterClose(int32_t aNamespaceID,
+ nsAtom* aName) {
+ if (aNamespaceID != kNameSpaceID_XHTML) {
+ return false;
+ }
+
+ if ((aName == nsGkAtoms::html) || (aName == nsGkAtoms::head) ||
+ (aName == nsGkAtoms::body) || (aName == nsGkAtoms::tr) ||
+ (aName == nsGkAtoms::th) || (aName == nsGkAtoms::td) ||
+ (aName == nsGkAtoms::title) || (aName == nsGkAtoms::dt) ||
+ (aName == nsGkAtoms::dd) || (aName == nsGkAtoms::select) ||
+ (aName == nsGkAtoms::option) || (aName == nsGkAtoms::map)) {
+ return true;
+ }
+
+ return nsHTMLElement::IsBlock(nsHTMLTags::CaseSensitiveAtomTagToId(aName));
+}
+
+void nsXHTMLContentSerializer::MaybeEnterInPreContent(nsIContent* aNode) {
+ if (!ShouldMaintainPreLevel() || !aNode->IsHTMLElement()) {
+ return;
+ }
+
+ if (IsElementPreformatted(aNode) ||
+ aNode->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style,
+ nsGkAtoms::noscript, nsGkAtoms::noframes)) {
+ PreLevel()++;
+ }
+}
+
+void nsXHTMLContentSerializer::MaybeLeaveFromPreContent(nsIContent* aNode) {
+ if (!ShouldMaintainPreLevel() || !aNode->IsHTMLElement()) {
+ return;
+ }
+
+ if (IsElementPreformatted(aNode) ||
+ aNode->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style,
+ nsGkAtoms::noscript, nsGkAtoms::noframes)) {
+ --PreLevel();
+ }
+}
+
+bool nsXHTMLContentSerializer::IsElementPreformatted(nsIContent* aNode) {
+ MOZ_ASSERT(ShouldMaintainPreLevel(),
+ "We should not be calling this needlessly");
+
+ if (!aNode->IsElement()) {
+ return false;
+ }
+ RefPtr<const ComputedStyle> computedStyle =
+ nsComputedDOMStyle::GetComputedStyleNoFlush(aNode->AsElement());
+ if (computedStyle) {
+ const nsStyleText* textStyle = computedStyle->StyleText();
+ return textStyle->WhiteSpaceOrNewlineIsSignificant();
+ }
+ return false;
+}
+
+bool nsXHTMLContentSerializer::SerializeLIValueAttribute(nsIContent* aElement,
+ nsAString& aStr) {
+ // We are copying and we are at the "first" LI node of OL in selected range.
+ // It may not be the first LI child of OL but it's first in the selected
+ // range. Note that we get into this condition only once per a OL.
+ bool found = false;
+ nsAutoString valueStr;
+
+ olState state(0, false);
+
+ if (!mOLStateStack.IsEmpty()) {
+ state = mOLStateStack[mOLStateStack.Length() - 1];
+ // isFirstListItem should be true only before the serialization of the
+ // first item in the list.
+ state.isFirstListItem = false;
+ mOLStateStack[mOLStateStack.Length() - 1] = state;
+ }
+
+ int32_t startVal = state.startVal;
+ int32_t offset = 0;
+
+ // Traverse previous siblings until we find one with "value" attribute.
+ // offset keeps track of how many previous siblings we had to traverse.
+ nsIContent* currNode = aElement;
+ while (currNode && !found) {
+ if (currNode->IsHTMLElement(nsGkAtoms::li)) {
+ currNode->AsElement()->GetAttr(kNameSpaceID_None, nsGkAtoms::value,
+ valueStr);
+ if (valueStr.IsEmpty()) {
+ offset++;
+ } else {
+ found = true;
+ nsresult rv = NS_OK;
+ startVal = valueStr.ToInteger(&rv);
+ }
+ }
+ currNode = currNode->GetPreviousSibling();
+ }
+ // If LI was not having "value", Set the "value" attribute for it.
+ // Note that We are at the first LI in the selected range of OL.
+ if (offset == 0 && found) {
+ // offset = 0 => LI itself has the value attribute and we did not need to
+ // traverse back. Just serialize value attribute like other tags.
+ NS_ENSURE_TRUE(SerializeAttr(u""_ns, u"value"_ns, valueStr, aStr, false),
+ false);
+ } else if (offset == 1 && !found) {
+ /*(offset = 1 && !found) means either LI is the first child node of OL
+ and LI is not having "value" attribute.
+ In that case we would not like to set "value" attribute to reduce the
+ changes.
+ */
+ // do nothing...
+ } else if (offset > 0) {
+ // Set value attribute.
+ nsAutoString valueStr;
+
+ // As serializer needs to use this valueAttr we are creating here,
+ valueStr.AppendInt(startVal + offset);
+ NS_ENSURE_TRUE(SerializeAttr(u""_ns, u"value"_ns, valueStr, aStr, false),
+ false);
+ }
+
+ return true;
+}
+
+bool nsXHTMLContentSerializer::IsFirstChildOfOL(nsIContent* aElement) {
+ nsIContent* parent = aElement->GetParent();
+ if (parent && parent->NodeName().LowerCaseEqualsLiteral("ol")) {
+ if (!mOLStateStack.IsEmpty()) {
+ olState state = mOLStateStack[mOLStateStack.Length() - 1];
+ if (state.isFirstListItem) return true;
+ }
+ }
+
+ return false;
+}
+
+bool nsXHTMLContentSerializer::HasNoChildren(nsIContent* aContent) {
+ for (nsIContent* child = aContent->GetFirstChild(); child;
+ child = child->GetNextSibling()) {
+ if (!child->IsText()) return false;
+
+ if (child->TextLength()) return false;
+ }
+
+ return true;
+}
diff --git a/dom/serializers/nsXHTMLContentSerializer.h b/dom/serializers/nsXHTMLContentSerializer.h
new file mode 100644
index 0000000000..ea4c83840b
--- /dev/null
+++ b/dom/serializers/nsXHTMLContentSerializer.h
@@ -0,0 +1,143 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert an XHTML (not HTML!) DOM to an XHTML
+ * string that could be parsed into more or less the original DOM.
+ */
+
+#ifndef nsXHTMLContentSerializer_h__
+#define nsXHTMLContentSerializer_h__
+
+#include "mozilla/Attributes.h"
+#include "nsXMLContentSerializer.h"
+#include "nsString.h"
+#include "nsTArray.h"
+
+class nsIContent;
+class nsAtom;
+
+namespace mozilla {
+class Encoding;
+}
+
+class nsXHTMLContentSerializer : public nsXMLContentSerializer {
+ public:
+ nsXHTMLContentSerializer();
+ virtual ~nsXHTMLContentSerializer();
+
+ NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
+ const mozilla::Encoding* aEncoding, bool aIsCopying,
+ bool aRewriteEncodingDeclaration,
+ bool* aNeedsPreformatScanning, nsAString& aOutput) override;
+
+ NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
+ int32_t aEndOffset) override;
+
+ NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) override;
+
+ protected:
+ virtual bool CheckElementStart(mozilla::dom::Element* aElement,
+ bool& aForceFormat, nsAString& aStr,
+ nsresult& aResult) override;
+
+ [[nodiscard]] virtual bool AfterElementStart(nsIContent* aContent,
+ nsIContent* aOriginalElement,
+ nsAString& aStr) override;
+
+ virtual bool CheckElementEnd(mozilla::dom::Element* aContent,
+ mozilla::dom::Element* aOriginalElement,
+ bool& aForceFormat, nsAString& aStr) override;
+
+ virtual void AfterElementEnd(nsIContent* aContent, nsAString& aStr) override;
+
+ virtual bool LineBreakBeforeOpen(int32_t aNamespaceID,
+ nsAtom* aName) override;
+ virtual bool LineBreakAfterOpen(int32_t aNamespaceID, nsAtom* aName) override;
+ virtual bool LineBreakBeforeClose(int32_t aNamespaceID,
+ nsAtom* aName) override;
+ virtual bool LineBreakAfterClose(int32_t aNamespaceID,
+ nsAtom* aName) override;
+
+ bool HasLongLines(const nsString& text, int32_t& aLastNewlineOffset);
+
+ // functions to check if we enter in or leave from a preformated content
+ virtual void MaybeEnterInPreContent(nsIContent* aNode) override;
+ virtual void MaybeLeaveFromPreContent(nsIContent* aNode) override;
+
+ [[nodiscard]] virtual bool SerializeAttributes(
+ mozilla::dom::Element* aContent, mozilla::dom::Element* aOriginalElement,
+ nsAString& aTagPrefix, const nsAString& aTagNamespaceURI,
+ nsAtom* aTagName, nsAString& aStr, uint32_t aSkipAttr,
+ bool aAddNSAttr) override;
+
+ bool IsFirstChildOfOL(nsIContent* aElement);
+
+ [[nodiscard]] bool SerializeLIValueAttribute(nsIContent* aElement,
+ nsAString& aStr);
+ bool IsShorthandAttr(const nsAtom* aAttrName, const nsAtom* aElementName);
+
+ [[nodiscard]] virtual bool AppendAndTranslateEntities(
+ const nsAString& aStr, nsAString& aOutputStr) override;
+
+ private:
+ bool IsElementPreformatted(nsIContent* aNode);
+
+ protected:
+ /*
+ * isHTMLParser should be set to true by the HTML parser which inherits from
+ * this class. It avoids to redefine methods just for few changes.
+ */
+ bool mIsHTMLSerializer;
+
+ bool mIsCopying; // Set to true only while copying
+
+ /*
+ * mDisableEntityEncoding is higher than 0 while the serializer is serializing
+ * the content of a element whose content is considerd CDATA by the
+ * serializer (such elements are 'script', 'style', 'noscript' and
+ * possibly others in XHTML) This doesn't have anything to do with if the
+ * element is defined as CDATA in the DTD, it simply means we'll
+ * output the content of the element without doing any entity encoding
+ * what so ever.
+ */
+ int32_t mDisableEntityEncoding;
+
+ // This is to ensure that we only do meta tag fixups when dealing with
+ // whole documents.
+ bool mRewriteEncodingDeclaration;
+
+ // To keep track of First LI child of OL in selected range
+ bool mIsFirstChildOfOL;
+
+ // To keep track of startvalue of OL and first list item for nested lists
+ struct olState {
+ olState(int32_t aStart, bool aIsFirst)
+ : startVal(aStart), isFirstListItem(aIsFirst) {}
+
+ olState(const olState& aOlState) {
+ startVal = aOlState.startVal;
+ isFirstListItem = aOlState.isFirstListItem;
+ }
+
+ // the value of the start attribute in the OL
+ int32_t startVal;
+
+ // is true only before the serialization of the first li of an ol
+ // should be false for other li in the list
+ bool isFirstListItem;
+ };
+
+ // Stack to store one olState struct per <OL>.
+ AutoTArray<olState, 8> mOLStateStack;
+
+ bool HasNoChildren(nsIContent* aContent);
+};
+
+nsresult NS_NewXHTMLContentSerializer(nsIContentSerializer** aSerializer);
+
+#endif
diff --git a/dom/serializers/nsXMLContentSerializer.cpp b/dom/serializers/nsXMLContentSerializer.cpp
new file mode 100644
index 0000000000..dc83745fe3
--- /dev/null
+++ b/dom/serializers/nsXMLContentSerializer.cpp
@@ -0,0 +1,1814 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert an XML DOM to an XML string that
+ * could be parsed into more or less the original DOM.
+ */
+
+#include "nsXMLContentSerializer.h"
+
+#include "nsGkAtoms.h"
+#include "nsIContent.h"
+#include "nsIContentInlines.h"
+#include "mozilla/dom/Document.h"
+#include "nsIDocumentEncoder.h"
+#include "nsElementTable.h"
+#include "nsNameSpaceManager.h"
+#include "nsTextFragment.h"
+#include "nsString.h"
+#include "mozilla/Sprintf.h"
+#include "nsUnicharUtils.h"
+#include "nsCRT.h"
+#include "nsContentUtils.h"
+#include "nsAttrName.h"
+#include "mozilla/dom/Comment.h"
+#include "mozilla/dom/CustomElementRegistry.h"
+#include "mozilla/dom/DocumentType.h"
+#include "mozilla/dom/Element.h"
+#include "mozilla/dom/ProcessingInstruction.h"
+#include "mozilla/intl/Segmenter.h"
+#include "nsParserConstants.h"
+#include "mozilla/Encoding.h"
+
+using namespace mozilla;
+using namespace mozilla::dom;
+
+#define kXMLNS "xmlns"
+
+// to be readable, we assume that an indented line contains
+// at least this number of characters (arbitrary value here).
+// This is a limit for the indentation.
+#define MIN_INDENTED_LINE_LENGTH 15
+
+// the string used to indent.
+#define INDENT_STRING " "
+#define INDENT_STRING_LENGTH 2
+
+nsresult NS_NewXMLContentSerializer(nsIContentSerializer** aSerializer) {
+ RefPtr<nsXMLContentSerializer> it = new nsXMLContentSerializer();
+ it.forget(aSerializer);
+ return NS_OK;
+}
+
+nsXMLContentSerializer::nsXMLContentSerializer()
+ : mPrefixIndex(0),
+ mColPos(0),
+ mIndentOverflow(0),
+ mIsIndentationAddedOnCurrentLine(false),
+ mInAttribute(false),
+ mAddNewlineForRootNode(false),
+ mAddSpace(false),
+ mMayIgnoreLineBreakSequence(false),
+ mBodyOnly(false),
+ mInBody(0) {}
+
+nsXMLContentSerializer::~nsXMLContentSerializer() = default;
+
+NS_IMPL_ISUPPORTS(nsXMLContentSerializer, nsIContentSerializer)
+
+NS_IMETHODIMP
+nsXMLContentSerializer::Init(uint32_t aFlags, uint32_t aWrapColumn,
+ const Encoding* aEncoding, bool aIsCopying,
+ bool aRewriteEncodingDeclaration,
+ bool* aNeedsPreformatScanning,
+ nsAString& aOutput) {
+ *aNeedsPreformatScanning = false;
+ mPrefixIndex = 0;
+ mColPos = 0;
+ mIndentOverflow = 0;
+ mIsIndentationAddedOnCurrentLine = false;
+ mInAttribute = false;
+ mAddNewlineForRootNode = false;
+ mAddSpace = false;
+ mMayIgnoreLineBreakSequence = false;
+ mBodyOnly = false;
+ mInBody = 0;
+
+ if (aEncoding) {
+ aEncoding->Name(mCharset);
+ }
+ mFlags = aFlags;
+
+ // Set the line break character:
+ if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
+ (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows
+ mLineBreak.AssignLiteral("\r\n");
+ } else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac
+ mLineBreak.Assign('\r');
+ } else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM
+ mLineBreak.Assign('\n');
+ } else {
+ mLineBreak.AssignLiteral(NS_LINEBREAK); // Platform/default
+ }
+
+ mDoRaw = !!(mFlags & nsIDocumentEncoder::OutputRaw);
+
+ mDoFormat = (mFlags & nsIDocumentEncoder::OutputFormatted && !mDoRaw);
+
+ mDoWrap = (mFlags & nsIDocumentEncoder::OutputWrap && !mDoRaw);
+
+ mAllowLineBreaking =
+ !(mFlags & nsIDocumentEncoder::OutputDisallowLineBreaking);
+
+ if (!aWrapColumn) {
+ mMaxColumn = 72;
+ } else {
+ mMaxColumn = aWrapColumn;
+ }
+
+ mOutput = &aOutput;
+ mPreLevel = 0;
+ mIsIndentationAddedOnCurrentLine = false;
+ return NS_OK;
+}
+
+nsresult nsXMLContentSerializer::AppendTextData(nsIContent* aNode,
+ int32_t aStartOffset,
+ int32_t aEndOffset,
+ nsAString& aStr,
+ bool aTranslateEntities) {
+ nsIContent* content = aNode;
+ const nsTextFragment* frag;
+ if (!content || !(frag = content->GetText())) {
+ return NS_ERROR_FAILURE;
+ }
+
+ int32_t fragLength = frag->GetLength();
+ int32_t endoffset =
+ (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
+ int32_t length = endoffset - aStartOffset;
+
+ NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
+ NS_ASSERTION(aStartOffset <= endoffset,
+ "A start offset is beyond the end of the text fragment!");
+
+ if (length <= 0) {
+ // XXX Zero is a legal value, maybe non-zero values should be an
+ // error.
+ return NS_OK;
+ }
+
+ if (frag->Is2b()) {
+ const char16_t* strStart = frag->Get2b() + aStartOffset;
+ if (aTranslateEntities) {
+ NS_ENSURE_TRUE(AppendAndTranslateEntities(
+ Substring(strStart, strStart + length), aStr),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(aStr.Append(Substring(strStart, strStart + length),
+ mozilla::fallible),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+ } else {
+ nsAutoString utf16;
+ if (!CopyASCIItoUTF16(Span(frag->Get1b() + aStartOffset, length), utf16,
+ mozilla::fallible_t())) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ if (aTranslateEntities) {
+ NS_ENSURE_TRUE(AppendAndTranslateEntities(utf16, aStr),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(aStr.Append(utf16, mozilla::fallible),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+ }
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::AppendText(nsIContent* aText, int32_t aStartOffset,
+ int32_t aEndOffset) {
+ NS_ENSURE_ARG(aText);
+ NS_ENSURE_STATE(mOutput);
+
+ nsAutoString data;
+ nsresult rv;
+
+ rv = AppendTextData(aText, aStartOffset, aEndOffset, data, true);
+ if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
+
+ if (mDoRaw || PreLevel() > 0) {
+ NS_ENSURE_TRUE(AppendToStringConvertLF(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoFormat) {
+ NS_ENSURE_TRUE(AppendToStringFormatedWrapped(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoWrap) {
+ NS_ENSURE_TRUE(AppendToStringWrapped(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(AppendToStringConvertLF(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::AppendCDATASection(nsIContent* aCDATASection,
+ int32_t aStartOffset,
+ int32_t aEndOffset) {
+ NS_ENSURE_ARG(aCDATASection);
+ NS_ENSURE_STATE(mOutput);
+
+ nsresult rv;
+
+ constexpr auto cdata = u"<![CDATA["_ns;
+
+ if (mDoRaw || PreLevel() > 0) {
+ NS_ENSURE_TRUE(AppendToString(cdata, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoFormat) {
+ NS_ENSURE_TRUE(AppendToStringFormatedWrapped(cdata, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoWrap) {
+ NS_ENSURE_TRUE(AppendToStringWrapped(cdata, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(AppendToString(cdata, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ nsAutoString data;
+ rv = AppendTextData(aCDATASection, aStartOffset, aEndOffset, data, false);
+ if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
+
+ NS_ENSURE_TRUE(AppendToStringConvertLF(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ NS_ENSURE_TRUE(AppendToString(u"]]>"_ns, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::AppendProcessingInstruction(ProcessingInstruction* aPI,
+ int32_t aStartOffset,
+ int32_t aEndOffset) {
+ NS_ENSURE_STATE(mOutput);
+
+ nsAutoString target, data, start;
+
+ NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+
+ aPI->GetTarget(target);
+
+ aPI->GetData(data);
+
+ NS_ENSURE_TRUE(start.AppendLiteral("<?", mozilla::fallible),
+ NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(start.Append(target, mozilla::fallible),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ if (mDoRaw || PreLevel() > 0) {
+ NS_ENSURE_TRUE(AppendToString(start, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoFormat) {
+ if (mAddSpace) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+ NS_ENSURE_TRUE(AppendToStringFormatedWrapped(start, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoWrap) {
+ NS_ENSURE_TRUE(AppendToStringWrapped(start, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(AppendToString(start, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ if (!data.IsEmpty()) {
+ NS_ENSURE_TRUE(AppendToString(char16_t(' '), *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToStringConvertLF(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+ NS_ENSURE_TRUE(AppendToString(u"?>"_ns, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+
+ MaybeFlagNewlineForRootNode(aPI);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::AppendComment(Comment* aComment, int32_t aStartOffset,
+ int32_t aEndOffset) {
+ NS_ENSURE_STATE(mOutput);
+
+ nsAutoString data;
+ aComment->GetData(data);
+
+ int32_t dataLength = data.Length();
+ if (aStartOffset || (aEndOffset != -1 && aEndOffset < dataLength)) {
+ int32_t length =
+ (aEndOffset == -1) ? dataLength : std::min(aEndOffset, dataLength);
+ length -= aStartOffset;
+
+ nsAutoString frag;
+ if (length > 0) {
+ data.Mid(frag, aStartOffset, length);
+ }
+ data.Assign(frag);
+ }
+
+ NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+
+ constexpr auto startComment = u"<!--"_ns;
+
+ if (mDoRaw || PreLevel() > 0) {
+ NS_ENSURE_TRUE(AppendToString(startComment, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoFormat) {
+ if (mAddSpace) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+ NS_ENSURE_TRUE(AppendToStringFormatedWrapped(startComment, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else if (mDoWrap) {
+ NS_ENSURE_TRUE(AppendToStringWrapped(startComment, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(AppendToString(startComment, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ // Even if mDoformat, we don't format the content because it
+ // could have been preformated by the author
+ NS_ENSURE_TRUE(AppendToStringConvertLF(data, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(u"-->"_ns, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+
+ MaybeFlagNewlineForRootNode(aComment);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::AppendDoctype(DocumentType* aDocType) {
+ NS_ENSURE_STATE(mOutput);
+
+ nsAutoString name, publicId, systemId;
+ aDocType->GetName(name);
+ aDocType->GetPublicId(publicId);
+ aDocType->GetSystemId(systemId);
+
+ NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+
+ NS_ENSURE_TRUE(AppendToString(u"<!DOCTYPE "_ns, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(name, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+
+ char16_t quote;
+ if (!publicId.IsEmpty()) {
+ NS_ENSURE_TRUE(AppendToString(u" PUBLIC "_ns, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ if (publicId.FindChar(char16_t('"')) == -1) {
+ quote = char16_t('"');
+ } else {
+ quote = char16_t('\'');
+ }
+ NS_ENSURE_TRUE(AppendToString(quote, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(publicId, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(quote, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+
+ if (!systemId.IsEmpty()) {
+ NS_ENSURE_TRUE(AppendToString(char16_t(' '), *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ if (systemId.FindChar(char16_t('"')) == -1) {
+ quote = char16_t('"');
+ } else {
+ quote = char16_t('\'');
+ }
+ NS_ENSURE_TRUE(AppendToString(quote, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(systemId, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(quote, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+ } else if (!systemId.IsEmpty()) {
+ if (systemId.FindChar(char16_t('"')) == -1) {
+ quote = char16_t('"');
+ } else {
+ quote = char16_t('\'');
+ }
+ NS_ENSURE_TRUE(AppendToString(u" SYSTEM "_ns, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(quote, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(systemId, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(quote, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ NS_ENSURE_TRUE(AppendToString(kGreaterThan, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ MaybeFlagNewlineForRootNode(aDocType);
+
+ return NS_OK;
+}
+
+nsresult nsXMLContentSerializer::PushNameSpaceDecl(const nsAString& aPrefix,
+ const nsAString& aURI,
+ nsIContent* aOwner) {
+ NameSpaceDecl* decl = mNameSpaceStack.AppendElement();
+ if (!decl) return NS_ERROR_OUT_OF_MEMORY;
+
+ decl->mPrefix.Assign(aPrefix);
+ decl->mURI.Assign(aURI);
+ // Don't addref - this weak reference will be removed when
+ // we pop the stack
+ decl->mOwner = aOwner;
+ return NS_OK;
+}
+
+void nsXMLContentSerializer::PopNameSpaceDeclsFor(nsIContent* aOwner) {
+ int32_t index, count;
+
+ count = mNameSpaceStack.Length();
+ for (index = count - 1; index >= 0; index--) {
+ if (mNameSpaceStack[index].mOwner != aOwner) {
+ break;
+ }
+ mNameSpaceStack.RemoveLastElement();
+ }
+}
+
+bool nsXMLContentSerializer::ConfirmPrefix(nsAString& aPrefix,
+ const nsAString& aURI,
+ nsIContent* aElement,
+ bool aIsAttribute) {
+ if (aPrefix.EqualsLiteral(kXMLNS)) {
+ return false;
+ }
+
+ if (aURI.EqualsLiteral("http://www.w3.org/XML/1998/namespace")) {
+ // The prefix must be xml for this namespace. We don't need to declare it,
+ // so always just set the prefix to xml.
+ aPrefix.AssignLiteral("xml");
+
+ return false;
+ }
+
+ bool mustHavePrefix;
+ if (aIsAttribute) {
+ if (aURI.IsEmpty()) {
+ // Attribute in the null namespace. This just shouldn't have a prefix.
+ // And there's no need to push any namespace decls
+ aPrefix.Truncate();
+ return false;
+ }
+
+ // Attribute not in the null namespace -- must have a prefix
+ mustHavePrefix = true;
+ } else {
+ // Not an attribute, so doesn't _have_ to have a prefix
+ mustHavePrefix = false;
+ }
+
+ // Keep track of the closest prefix that's bound to aURI and whether we've
+ // found such a thing. closestURIMatch holds the prefix, and uriMatch
+ // indicates whether we actually have one.
+ nsAutoString closestURIMatch;
+ bool uriMatch = false;
+
+ // Also keep track of whether we've seen aPrefix already. If we have, that
+ // means that it's already bound to a URI different from aURI, so even if we
+ // later (so in a more outer scope) see it bound to aURI we can't reuse it.
+ bool haveSeenOurPrefix = false;
+
+ int32_t count = mNameSpaceStack.Length();
+ int32_t index = count - 1;
+ while (index >= 0) {
+ NameSpaceDecl& decl = mNameSpaceStack.ElementAt(index);
+ // Check if we've found a prefix match
+ if (aPrefix.Equals(decl.mPrefix)) {
+ // If the URIs match and aPrefix is not bound to any other URI, we can
+ // use aPrefix
+ if (!haveSeenOurPrefix && aURI.Equals(decl.mURI)) {
+ // Just use our uriMatch stuff. That will deal with an empty aPrefix
+ // the right way. We can break out of the loop now, though.
+ uriMatch = true;
+ closestURIMatch = aPrefix;
+ break;
+ }
+
+ haveSeenOurPrefix = true;
+
+ // If they don't, and either:
+ // 1) We have a prefix (so we'd be redeclaring this prefix to point to a
+ // different namespace) or
+ // 2) We're looking at an existing default namespace decl on aElement (so
+ // we can't create a new default namespace decl for this URI)
+ // then generate a new prefix. Note that we do NOT generate new prefixes
+ // if we happen to have aPrefix == decl->mPrefix == "" and mismatching
+ // URIs when |decl| doesn't have aElement as its owner. In that case we
+ // can simply push the new namespace URI as the default namespace for
+ // aElement.
+ if (!aPrefix.IsEmpty() || decl.mOwner == aElement) {
+ NS_ASSERTION(!aURI.IsEmpty(),
+ "Not allowed to add a xmlns attribute with an empty "
+ "namespace name unless it declares the default "
+ "namespace.");
+
+ GenerateNewPrefix(aPrefix);
+ // Now we need to validate our new prefix/uri combination; check it
+ // against the full namespace stack again. Note that just restarting
+ // the while loop is ok, since we haven't changed aURI, so the
+ // closestURIMatch and uriMatch state is not affected.
+ index = count - 1;
+ haveSeenOurPrefix = false;
+ continue;
+ }
+ }
+
+ // If we've found a URI match, then record the first one
+ if (!uriMatch && aURI.Equals(decl.mURI)) {
+ // Need to check that decl->mPrefix is not declared anywhere closer to
+ // us. If it is, we can't use it.
+ bool prefixOK = true;
+ int32_t index2;
+ for (index2 = count - 1; index2 > index && prefixOK; --index2) {
+ prefixOK = (mNameSpaceStack[index2].mPrefix != decl.mPrefix);
+ }
+
+ if (prefixOK) {
+ uriMatch = true;
+ closestURIMatch.Assign(decl.mPrefix);
+ }
+ }
+
+ --index;
+ }
+
+ // At this point the following invariants hold:
+ // 1) The prefix in closestURIMatch is mapped to aURI in our scope if
+ // uriMatch is set.
+ // 2) There is nothing on the namespace stack that has aPrefix as the prefix
+ // and a _different_ URI, except for the case aPrefix.IsEmpty (and
+ // possible default namespaces on ancestors)
+
+ // So if uriMatch is set it's OK to use the closestURIMatch prefix. The one
+ // exception is when closestURIMatch is actually empty (default namespace
+ // decl) and we must have a prefix.
+ if (uriMatch && (!mustHavePrefix || !closestURIMatch.IsEmpty())) {
+ aPrefix.Assign(closestURIMatch);
+ return false;
+ }
+
+ if (aPrefix.IsEmpty()) {
+ // At this point, aPrefix is empty (which means we never had a prefix to
+ // start with). If we must have a prefix, just generate a new prefix and
+ // then send it back through the namespace stack checks to make sure it's
+ // OK.
+ if (mustHavePrefix) {
+ GenerateNewPrefix(aPrefix);
+ return ConfirmPrefix(aPrefix, aURI, aElement, aIsAttribute);
+ }
+
+ // One final special case. If aPrefix is empty and we never saw an empty
+ // prefix (default namespace decl) on the namespace stack and we're in the
+ // null namespace there is no reason to output an |xmlns=""| here. It just
+ // makes the output less readable.
+ if (!haveSeenOurPrefix && aURI.IsEmpty()) {
+ return false;
+ }
+ }
+
+ // Now just set aURI as the new default namespace URI. Indicate that we need
+ // to create a namespace decl for the final prefix
+ return true;
+}
+
+void nsXMLContentSerializer::GenerateNewPrefix(nsAString& aPrefix) {
+ aPrefix.Assign('a');
+ aPrefix.AppendInt(mPrefixIndex++);
+}
+
+bool nsXMLContentSerializer::SerializeAttr(const nsAString& aPrefix,
+ const nsAString& aName,
+ const nsAString& aValue,
+ nsAString& aStr,
+ bool aDoEscapeEntities) {
+ // Because this method can short-circuit AppendToString for raw output, we
+ // need to make sure that we're not inappropriately serializing attributes
+ // from outside the body
+ if (mBodyOnly && !mInBody) {
+ return true;
+ }
+
+ nsAutoString attrString_;
+ // For innerHTML we can do faster appending without
+ // temporary strings.
+ bool rawAppend = mDoRaw && aDoEscapeEntities;
+ nsAString& attrString = (rawAppend) ? aStr : attrString_;
+
+ NS_ENSURE_TRUE(attrString.Append(char16_t(' '), mozilla::fallible), false);
+ if (!aPrefix.IsEmpty()) {
+ NS_ENSURE_TRUE(attrString.Append(aPrefix, mozilla::fallible), false);
+ NS_ENSURE_TRUE(attrString.Append(char16_t(':'), mozilla::fallible), false);
+ }
+ NS_ENSURE_TRUE(attrString.Append(aName, mozilla::fallible), false);
+
+ if (aDoEscapeEntities) {
+ // if problem characters are turned into character entity references
+ // then there will be no problem with the value delimiter characters
+ NS_ENSURE_TRUE(attrString.AppendLiteral("=\"", mozilla::fallible), false);
+
+ mInAttribute = true;
+ bool result = AppendAndTranslateEntities(aValue, attrString);
+ mInAttribute = false;
+ NS_ENSURE_TRUE(result, false);
+
+ NS_ENSURE_TRUE(attrString.Append(char16_t('"'), mozilla::fallible), false);
+ if (rawAppend) {
+ return true;
+ }
+ } else {
+ // Depending on whether the attribute value contains quotes or apostrophes
+ // we need to select the delimiter character and escape characters using
+ // character entity references, ignoring the value of aDoEscapeEntities.
+ // See http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.3.2.2 for
+ // the standard on character entity references in values. We also have to
+ // make sure to escape any '&' characters.
+
+ bool bIncludesSingle = false;
+ bool bIncludesDouble = false;
+ nsAString::const_iterator iCurr, iEnd;
+ aValue.BeginReading(iCurr);
+ aValue.EndReading(iEnd);
+ for (; iCurr != iEnd; ++iCurr) {
+ if (*iCurr == char16_t('\'')) {
+ bIncludesSingle = true;
+ if (bIncludesDouble) {
+ break;
+ }
+ } else if (*iCurr == char16_t('"')) {
+ bIncludesDouble = true;
+ if (bIncludesSingle) {
+ break;
+ }
+ }
+ }
+
+ // Delimiter and escaping is according to the following table
+ // bIncludesDouble bIncludesSingle Delimiter Escape Double Quote
+ // FALSE FALSE " FALSE
+ // FALSE TRUE " FALSE
+ // TRUE FALSE ' FALSE
+ // TRUE TRUE " TRUE
+ char16_t cDelimiter =
+ (bIncludesDouble && !bIncludesSingle) ? char16_t('\'') : char16_t('"');
+ NS_ENSURE_TRUE(attrString.Append(char16_t('='), mozilla::fallible), false);
+ NS_ENSURE_TRUE(attrString.Append(cDelimiter, mozilla::fallible), false);
+ nsAutoString sValue(aValue);
+ NS_ENSURE_TRUE(
+ sValue.ReplaceSubstring(u"&"_ns, u"&amp;"_ns, mozilla::fallible),
+ false);
+ if (bIncludesDouble && bIncludesSingle) {
+ NS_ENSURE_TRUE(
+ sValue.ReplaceSubstring(u"\""_ns, u"&quot;"_ns, mozilla::fallible),
+ false);
+ }
+ NS_ENSURE_TRUE(attrString.Append(sValue, mozilla::fallible), false);
+ NS_ENSURE_TRUE(attrString.Append(cDelimiter, mozilla::fallible), false);
+ }
+
+ if (mDoWrap && mColPos + attrString.Length() > mMaxColumn) {
+ // Attr would cause us to overrun the max width, so begin a new line.
+ NS_ENSURE_TRUE(AppendNewLineToString(aStr), false);
+
+ // Chomp the leading space.
+ nsDependentSubstring chomped(attrString, 1);
+ if (mDoFormat && mIndent.Length() + chomped.Length() <= mMaxColumn) {
+ NS_ENSURE_TRUE(AppendIndentation(aStr), false);
+ }
+ NS_ENSURE_TRUE(AppendToStringConvertLF(chomped, aStr), false);
+ } else {
+ NS_ENSURE_TRUE(AppendToStringConvertLF(attrString, aStr), false);
+ }
+
+ return true;
+}
+
+uint32_t nsXMLContentSerializer::ScanNamespaceDeclarations(
+ Element* aElement, Element* aOriginalElement,
+ const nsAString& aTagNamespaceURI) {
+ uint32_t index, count;
+ nsAutoString uriStr, valueStr;
+
+ count = aElement->GetAttrCount();
+
+ // First scan for namespace declarations, pushing each on the stack
+ uint32_t skipAttr = count;
+ for (index = 0; index < count; index++) {
+ const BorrowedAttrInfo info = aElement->GetAttrInfoAt(index);
+ const nsAttrName* name = info.mName;
+
+ int32_t namespaceID = name->NamespaceID();
+ nsAtom* attrName = name->LocalName();
+
+ if (namespaceID == kNameSpaceID_XMLNS ||
+ // Also push on the stack attrs named "xmlns" in the null
+ // namespace... because once we serialize those out they'll look like
+ // namespace decls. :(
+ // XXXbz what if we have both "xmlns" in the null namespace and "xmlns"
+ // in the xmlns namespace?
+ (namespaceID == kNameSpaceID_None && attrName == nsGkAtoms::xmlns)) {
+ info.mValue->ToString(uriStr);
+
+ if (!name->GetPrefix()) {
+ if (aTagNamespaceURI.IsEmpty() && !uriStr.IsEmpty()) {
+ // If the element is in no namespace we need to add a xmlns
+ // attribute to declare that. That xmlns attribute must not have a
+ // prefix (see http://www.w3.org/TR/REC-xml-names/#dt-prefix), ie it
+ // must declare the default namespace. We just found an xmlns
+ // attribute that declares the default namespace to something
+ // non-empty. We're going to ignore this attribute, for children we
+ // will detect that we need to add it again and attributes aren't
+ // affected by the default namespace.
+ skipAttr = index;
+ } else {
+ // Default NS attribute does not have prefix (and the name is "xmlns")
+ PushNameSpaceDecl(u""_ns, uriStr, aOriginalElement);
+ }
+ } else {
+ PushNameSpaceDecl(nsDependentAtomString(attrName), uriStr,
+ aOriginalElement);
+ }
+ }
+ }
+ return skipAttr;
+}
+
+bool nsXMLContentSerializer::IsJavaScript(nsIContent* aContent,
+ nsAtom* aAttrNameAtom,
+ int32_t aAttrNamespaceID,
+ const nsAString& aValueString) {
+ bool isHtml = aContent->IsHTMLElement();
+ bool isXul = aContent->IsXULElement();
+ bool isSvg = aContent->IsSVGElement();
+
+ if (aAttrNamespaceID == kNameSpaceID_None && (isHtml || isXul || isSvg) &&
+ (aAttrNameAtom == nsGkAtoms::href || aAttrNameAtom == nsGkAtoms::src)) {
+ static const char kJavaScript[] = "javascript";
+ int32_t pos = aValueString.FindChar(':');
+ if (pos < (int32_t)(sizeof kJavaScript - 1)) return false;
+ nsAutoString scheme(Substring(aValueString, 0, pos));
+ scheme.StripWhitespace();
+ if ((scheme.Length() == (sizeof kJavaScript - 1)) &&
+ scheme.EqualsIgnoreCase(kJavaScript))
+ return true;
+ else
+ return false;
+ }
+
+ return aContent->IsEventAttributeName(aAttrNameAtom);
+}
+
+bool nsXMLContentSerializer::SerializeAttributes(
+ Element* aElement, Element* aOriginalElement, nsAString& aTagPrefix,
+ const nsAString& aTagNamespaceURI, nsAtom* aTagName, nsAString& aStr,
+ uint32_t aSkipAttr, bool aAddNSAttr) {
+ nsAutoString prefixStr, uriStr, valueStr;
+ nsAutoString xmlnsStr;
+ xmlnsStr.AssignLiteral(kXMLNS);
+ uint32_t index, count;
+
+ MaybeSerializeIsValue(aElement, aStr);
+
+ // If we had to add a new namespace declaration, serialize
+ // and push it on the namespace stack
+ if (aAddNSAttr) {
+ if (aTagPrefix.IsEmpty()) {
+ // Serialize default namespace decl
+ NS_ENSURE_TRUE(
+ SerializeAttr(u""_ns, xmlnsStr, aTagNamespaceURI, aStr, true), false);
+ } else {
+ // Serialize namespace decl
+ NS_ENSURE_TRUE(
+ SerializeAttr(xmlnsStr, aTagPrefix, aTagNamespaceURI, aStr, true),
+ false);
+ }
+ PushNameSpaceDecl(aTagPrefix, aTagNamespaceURI, aOriginalElement);
+ }
+
+ count = aElement->GetAttrCount();
+
+ // Now serialize each of the attributes
+ // XXX Unfortunately we need a namespace manager to get
+ // attribute URIs.
+ for (index = 0; index < count; index++) {
+ if (aSkipAttr == index) {
+ continue;
+ }
+
+ const nsAttrName* name = aElement->GetAttrNameAt(index);
+ int32_t namespaceID = name->NamespaceID();
+ nsAtom* attrName = name->LocalName();
+ nsAtom* attrPrefix = name->GetPrefix();
+
+ // Filter out any attribute starting with [-|_]moz
+ nsDependentAtomString attrNameStr(attrName);
+ if (StringBeginsWith(attrNameStr, u"_moz"_ns) ||
+ StringBeginsWith(attrNameStr, u"-moz"_ns)) {
+ continue;
+ }
+
+ if (attrPrefix) {
+ attrPrefix->ToString(prefixStr);
+ } else {
+ prefixStr.Truncate();
+ }
+
+ bool addNSAttr = false;
+ if (kNameSpaceID_XMLNS != namespaceID) {
+ nsNameSpaceManager::GetInstance()->GetNameSpaceURI(namespaceID, uriStr);
+ addNSAttr = ConfirmPrefix(prefixStr, uriStr, aOriginalElement, true);
+ }
+
+ aElement->GetAttr(namespaceID, attrName, valueStr);
+
+ nsDependentAtomString nameStr(attrName);
+ bool isJS = IsJavaScript(aElement, attrName, namespaceID, valueStr);
+
+ NS_ENSURE_TRUE(SerializeAttr(prefixStr, nameStr, valueStr, aStr, !isJS),
+ false);
+
+ if (addNSAttr) {
+ NS_ASSERTION(!prefixStr.IsEmpty(),
+ "Namespaced attributes must have a prefix");
+ NS_ENSURE_TRUE(SerializeAttr(xmlnsStr, prefixStr, uriStr, aStr, true),
+ false);
+ PushNameSpaceDecl(prefixStr, uriStr, aOriginalElement);
+ }
+ }
+
+ return true;
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::AppendElementStart(Element* aElement,
+ Element* aOriginalElement) {
+ NS_ENSURE_ARG(aElement);
+ NS_ENSURE_STATE(mOutput);
+
+ bool forceFormat = false;
+ nsresult rv = NS_OK;
+ if (!CheckElementStart(aElement, forceFormat, *mOutput, rv)) {
+ // When we go to AppendElementEnd for this element, we're going to
+ // MaybeLeaveFromPreContent(). So make sure to MaybeEnterInPreContent()
+ // now, so our PreLevel() doesn't get confused.
+ MaybeEnterInPreContent(aElement);
+ return rv;
+ }
+
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsAutoString tagPrefix, tagLocalName, tagNamespaceURI;
+ aElement->NodeInfo()->GetPrefix(tagPrefix);
+ aElement->NodeInfo()->GetName(tagLocalName);
+ aElement->NodeInfo()->GetNamespaceURI(tagNamespaceURI);
+
+ uint32_t skipAttr =
+ ScanNamespaceDeclarations(aElement, aOriginalElement, tagNamespaceURI);
+
+ nsAtom* name = aElement->NodeInfo()->NameAtom();
+ bool lineBreakBeforeOpen =
+ LineBreakBeforeOpen(aElement->GetNameSpaceID(), name);
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
+ if (mColPos && lineBreakBeforeOpen) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+ if (!mColPos) {
+ NS_ENSURE_TRUE(AppendIndentation(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else if (mAddSpace) {
+ NS_ENSURE_TRUE(AppendToString(char16_t(' '), *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ mAddSpace = false;
+ }
+ } else if (mAddSpace) {
+ NS_ENSURE_TRUE(AppendToString(char16_t(' '), *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ mAddSpace = false;
+ } else {
+ NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode
+ // wasn't called
+ mAddNewlineForRootNode = false;
+
+ bool addNSAttr;
+ addNSAttr =
+ ConfirmPrefix(tagPrefix, tagNamespaceURI, aOriginalElement, false);
+
+ // Serialize the qualified name of the element
+ NS_ENSURE_TRUE(AppendToString(kLessThan, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ if (!tagPrefix.IsEmpty()) {
+ NS_ENSURE_TRUE(AppendToString(tagPrefix, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(u":"_ns, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+ NS_ENSURE_TRUE(AppendToString(tagLocalName, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ MaybeEnterInPreContent(aElement);
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
+ NS_ENSURE_TRUE(IncrIndentation(name), NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ NS_ENSURE_TRUE(
+ SerializeAttributes(aElement, aOriginalElement, tagPrefix,
+ tagNamespaceURI, name, *mOutput, skipAttr, addNSAttr),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ NS_ENSURE_TRUE(AppendEndOfElementStart(aElement, aOriginalElement, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() &&
+ LineBreakAfterOpen(aElement->GetNameSpaceID(), name)) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+
+ NS_ENSURE_TRUE(AfterElementStart(aElement, aOriginalElement, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ return NS_OK;
+}
+
+// aElement is the actual element we're outputting. aOriginalElement is the one
+// in the original DOM, which is the one we have to test for kids.
+static bool ElementNeedsSeparateEndTag(Element* aElement,
+ Element* aOriginalElement) {
+ if (aOriginalElement->GetChildCount()) {
+ // We have kids, so we need a separate end tag. This needs to be checked on
+ // aOriginalElement because that's the one that's actually in the DOM and
+ // might have kids.
+ return true;
+ }
+
+ if (!aElement->IsHTMLElement()) {
+ // Empty non-HTML elements can just skip a separate end tag.
+ return false;
+ }
+
+ // HTML container tags should have a separate end tag even if empty, per spec.
+ // See
+ // https://w3c.github.io/DOM-Parsing/#dfn-concept-xml-serialization-algorithm
+ nsAtom* localName = aElement->NodeInfo()->NameAtom();
+ bool isHTMLContainer = nsHTMLElement::IsContainer(
+ nsHTMLTags::CaseSensitiveAtomTagToId(localName));
+ return isHTMLContainer;
+}
+
+bool nsXMLContentSerializer::AppendEndOfElementStart(Element* aElement,
+ Element* aOriginalElement,
+ nsAString& aStr) {
+ if (ElementNeedsSeparateEndTag(aElement, aOriginalElement)) {
+ return AppendToString(kGreaterThan, aStr);
+ }
+
+ // We don't need a separate end tag. For HTML elements (which at this point
+ // must be non-containers), append a space before the '/', per spec. See
+ // https://w3c.github.io/DOM-Parsing/#dfn-concept-xml-serialization-algorithm
+ if (aOriginalElement->IsHTMLElement()) {
+ if (!AppendToString(kSpace, aStr)) {
+ return false;
+ }
+ }
+
+ return AppendToString(u"/>"_ns, aStr);
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::AppendElementEnd(Element* aElement,
+ Element* aOriginalElement) {
+ NS_ENSURE_ARG(aElement);
+ NS_ENSURE_STATE(mOutput);
+
+ nsIContent* content = aElement;
+
+ bool forceFormat = false, outputElementEnd;
+ outputElementEnd =
+ CheckElementEnd(aElement, aOriginalElement, forceFormat, *mOutput);
+
+ nsAtom* name = content->NodeInfo()->NameAtom();
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
+ DecrIndentation(name);
+ }
+
+ if (!outputElementEnd) {
+ // Keep this in sync with the cleanup at the end of this method.
+ PopNameSpaceDeclsFor(aElement);
+ MaybeLeaveFromPreContent(content);
+ MaybeFlagNewlineForRootNode(aElement);
+ AfterElementEnd(content, *mOutput);
+ return NS_OK;
+ }
+
+ nsAutoString tagPrefix, tagLocalName, tagNamespaceURI;
+
+ aElement->NodeInfo()->GetPrefix(tagPrefix);
+ aElement->NodeInfo()->GetName(tagLocalName);
+ aElement->NodeInfo()->GetNamespaceURI(tagNamespaceURI);
+
+#ifdef DEBUG
+ bool debugNeedToPushNamespace =
+#endif
+ ConfirmPrefix(tagPrefix, tagNamespaceURI, aElement, false);
+ NS_ASSERTION(!debugNeedToPushNamespace,
+ "Can't push namespaces in closing tag!");
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) {
+ bool lineBreakBeforeClose =
+ LineBreakBeforeClose(content->GetNameSpaceID(), name);
+
+ if (mColPos && lineBreakBeforeClose) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+ if (!mColPos) {
+ NS_ENSURE_TRUE(AppendIndentation(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else if (mAddSpace) {
+ NS_ENSURE_TRUE(AppendToString(char16_t(' '), *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ mAddSpace = false;
+ }
+ } else if (mAddSpace) {
+ NS_ENSURE_TRUE(AppendToString(char16_t(' '), *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ mAddSpace = false;
+ }
+
+ NS_ENSURE_TRUE(AppendToString(kEndTag, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ if (!tagPrefix.IsEmpty()) {
+ NS_ENSURE_TRUE(AppendToString(tagPrefix, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(u":"_ns, *mOutput), NS_ERROR_OUT_OF_MEMORY);
+ }
+ NS_ENSURE_TRUE(AppendToString(tagLocalName, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+ NS_ENSURE_TRUE(AppendToString(kGreaterThan, *mOutput),
+ NS_ERROR_OUT_OF_MEMORY);
+
+ // Keep what follows in sync with the cleanup in the !outputElementEnd case.
+ PopNameSpaceDeclsFor(aElement);
+
+ MaybeLeaveFromPreContent(content);
+
+ if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() &&
+ LineBreakAfterClose(content->GetNameSpaceID(), name)) {
+ NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY);
+ } else {
+ MaybeFlagNewlineForRootNode(aElement);
+ }
+
+ AfterElementEnd(content, *mOutput);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::Finish() {
+ NS_ENSURE_STATE(mOutput);
+
+ mOutput = nullptr;
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::GetOutputLength(uint32_t& aLength) const {
+ NS_ENSURE_STATE(mOutput);
+
+ aLength = mOutput->Length();
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsXMLContentSerializer::AppendDocumentStart(Document* aDocument) {
+ NS_ENSURE_ARG_POINTER(aDocument);
+ NS_ENSURE_STATE(mOutput);
+
+ nsAutoString version, encoding, standalone;
+ aDocument->GetXMLDeclaration(version, encoding, standalone);
+
+ if (version.IsEmpty())
+ return NS_OK; // A declaration must have version, or there is no decl
+
+ constexpr auto endQuote = u"\""_ns;
+
+ *mOutput += u"<?xml version=\""_ns + version + endQuote;
+
+ if (!mCharset.IsEmpty()) {
+ *mOutput +=
+ u" encoding=\""_ns + NS_ConvertASCIItoUTF16(mCharset) + endQuote;
+ }
+ // Otherwise just don't output an encoding attr. Not that we expect
+ // mCharset to ever be empty.
+#ifdef DEBUG
+ else {
+ NS_WARNING("Empty mCharset? How come?");
+ }
+#endif
+
+ if (!standalone.IsEmpty()) {
+ *mOutput += u" standalone=\""_ns + standalone + endQuote;
+ }
+
+ NS_ENSURE_TRUE(mOutput->AppendLiteral("?>", mozilla::fallible),
+ NS_ERROR_OUT_OF_MEMORY);
+ mAddNewlineForRootNode = true;
+
+ return NS_OK;
+}
+
+bool nsXMLContentSerializer::CheckElementStart(Element*, bool& aForceFormat,
+ nsAString& aStr,
+ nsresult& aResult) {
+ aResult = NS_OK;
+ aForceFormat = false;
+ return true;
+}
+
+bool nsXMLContentSerializer::CheckElementEnd(Element* aElement,
+ Element* aOriginalElement,
+ bool& aForceFormat,
+ nsAString& aStr) {
+ // We don't output a separate end tag for empty element
+ aForceFormat = false;
+ return ElementNeedsSeparateEndTag(aElement, aOriginalElement);
+}
+
+bool nsXMLContentSerializer::AppendToString(const char16_t aChar,
+ nsAString& aOutputStr) {
+ if (mBodyOnly && !mInBody) {
+ return true;
+ }
+ mColPos += 1;
+ return aOutputStr.Append(aChar, mozilla::fallible);
+}
+
+bool nsXMLContentSerializer::AppendToString(const nsAString& aStr,
+ nsAString& aOutputStr) {
+ if (mBodyOnly && !mInBody) {
+ return true;
+ }
+ mColPos += aStr.Length();
+ return aOutputStr.Append(aStr, mozilla::fallible);
+}
+
+#define _ 0
+
+// This table indexes into kEntityStrings[].
+const uint8_t nsXMLContentSerializer::kEntities[] = {
+ // clang-format off
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, 2, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ 3, _, 4
+ // clang-format on
+};
+
+// This table indexes into kEntityStrings[].
+const uint8_t nsXMLContentSerializer::kAttrEntities[] = {
+ // clang-format off
+ _, _, _, _, _, _, _, _, _, 5,
+ 6, _, _, 7, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, 1, _, _, _, 2, _,
+ _, _, _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _, _, _,
+ 3, _, 4
+ // clang-format on
+};
+
+#undef _
+
+const char* const nsXMLContentSerializer::kEntityStrings[] = {
+ /* 0 */ nullptr,
+ /* 1 */ "&quot;",
+ /* 2 */ "&amp;",
+ /* 3 */ "&lt;",
+ /* 4 */ "&gt;",
+ /* 5 */ "&#9;",
+ /* 6 */ "&#xA;",
+ /* 7 */ "&#xD;",
+};
+
+bool nsXMLContentSerializer::AppendAndTranslateEntities(const nsAString& aStr,
+ nsAString& aOutputStr) {
+ if (mInAttribute) {
+ return AppendAndTranslateEntities<kGTVal>(aStr, aOutputStr, kAttrEntities,
+ kEntityStrings);
+ }
+
+ return AppendAndTranslateEntities<kGTVal>(aStr, aOutputStr, kEntities,
+ kEntityStrings);
+}
+
+/* static */
+bool nsXMLContentSerializer::AppendAndTranslateEntities(
+ const nsAString& aStr, nsAString& aOutputStr, const uint8_t aEntityTable[],
+ uint16_t aMaxTableIndex, const char* const aStringTable[]) {
+ nsReadingIterator<char16_t> done_reading;
+ aStr.EndReading(done_reading);
+
+ // for each chunk of |aString|...
+ uint32_t advanceLength = 0;
+ nsReadingIterator<char16_t> iter;
+
+ for (aStr.BeginReading(iter); iter != done_reading;
+ iter.advance(int32_t(advanceLength))) {
+ uint32_t fragmentLength = done_reading - iter;
+ const char16_t* c = iter.get();
+ const char16_t* fragmentStart = c;
+ const char16_t* fragmentEnd = c + fragmentLength;
+ const char* entityText = nullptr;
+
+ advanceLength = 0;
+ // for each character in this chunk, check if it
+ // needs to be replaced
+ for (; c < fragmentEnd; c++, advanceLength++) {
+ char16_t val = *c;
+ if ((val <= aMaxTableIndex) && aEntityTable[val]) {
+ entityText = aStringTable[aEntityTable[val]];
+ break;
+ }
+ }
+
+ NS_ENSURE_TRUE(
+ aOutputStr.Append(fragmentStart, advanceLength, mozilla::fallible),
+ false);
+ if (entityText) {
+ NS_ENSURE_TRUE(AppendASCIItoUTF16(mozilla::MakeStringSpan(entityText),
+ aOutputStr, mozilla::fallible),
+ false);
+ advanceLength++;
+ }
+ }
+
+ return true;
+}
+
+bool nsXMLContentSerializer::MaybeAddNewlineForRootNode(nsAString& aStr) {
+ if (mAddNewlineForRootNode) {
+ return AppendNewLineToString(aStr);
+ }
+
+ return true;
+}
+
+void nsXMLContentSerializer::MaybeFlagNewlineForRootNode(nsINode* aNode) {
+ nsINode* parent = aNode->GetParentNode();
+ if (parent) {
+ mAddNewlineForRootNode = parent->IsDocument();
+ }
+}
+
+void nsXMLContentSerializer::MaybeEnterInPreContent(nsIContent* aNode) {
+ // support of the xml:space attribute
+ nsAutoString space;
+ if (ShouldMaintainPreLevel() && aNode->IsElement() &&
+ aNode->AsElement()->GetAttr(kNameSpaceID_XML, nsGkAtoms::space, space) &&
+ space.EqualsLiteral("preserve")) {
+ ++PreLevel();
+ }
+}
+
+void nsXMLContentSerializer::MaybeLeaveFromPreContent(nsIContent* aNode) {
+ // support of the xml:space attribute
+ nsAutoString space;
+ if (ShouldMaintainPreLevel() && aNode->IsElement() &&
+ aNode->AsElement()->GetAttr(kNameSpaceID_XML, nsGkAtoms::space, space) &&
+ space.EqualsLiteral("preserve")) {
+ --PreLevel();
+ }
+}
+
+bool nsXMLContentSerializer::AppendNewLineToString(nsAString& aStr) {
+ bool result = AppendToString(mLineBreak, aStr);
+ mMayIgnoreLineBreakSequence = true;
+ mColPos = 0;
+ mAddSpace = false;
+ mIsIndentationAddedOnCurrentLine = false;
+ return result;
+}
+
+bool nsXMLContentSerializer::AppendIndentation(nsAString& aStr) {
+ mIsIndentationAddedOnCurrentLine = true;
+ bool result = AppendToString(mIndent, aStr);
+ mAddSpace = false;
+ mMayIgnoreLineBreakSequence = false;
+ return result;
+}
+
+bool nsXMLContentSerializer::IncrIndentation(nsAtom* aName) {
+ // we want to keep the source readable
+ if (mDoWrap &&
+ mIndent.Length() >= uint32_t(mMaxColumn) - MIN_INDENTED_LINE_LENGTH) {
+ ++mIndentOverflow;
+ } else {
+ return mIndent.AppendLiteral(INDENT_STRING, mozilla::fallible);
+ }
+
+ return true;
+}
+
+void nsXMLContentSerializer::DecrIndentation(nsAtom* aName) {
+ if (mIndentOverflow)
+ --mIndentOverflow;
+ else
+ mIndent.Cut(0, INDENT_STRING_LENGTH);
+}
+
+bool nsXMLContentSerializer::LineBreakBeforeOpen(int32_t aNamespaceID,
+ nsAtom* aName) {
+ return mAddSpace;
+}
+
+bool nsXMLContentSerializer::LineBreakAfterOpen(int32_t aNamespaceID,
+ nsAtom* aName) {
+ return false;
+}
+
+bool nsXMLContentSerializer::LineBreakBeforeClose(int32_t aNamespaceID,
+ nsAtom* aName) {
+ return mAddSpace;
+}
+
+bool nsXMLContentSerializer::LineBreakAfterClose(int32_t aNamespaceID,
+ nsAtom* aName) {
+ return false;
+}
+
+bool nsXMLContentSerializer::AppendToStringConvertLF(const nsAString& aStr,
+ nsAString& aOutputStr) {
+ if (mBodyOnly && !mInBody) {
+ return true;
+ }
+
+ if (mDoRaw) {
+ NS_ENSURE_TRUE(AppendToString(aStr, aOutputStr), false);
+ } else {
+ // Convert line-endings to mLineBreak
+ uint32_t start = 0;
+ uint32_t theLen = aStr.Length();
+ while (start < theLen) {
+ int32_t eol = aStr.FindChar('\n', start);
+ if (eol == kNotFound) {
+ nsDependentSubstring dataSubstring(aStr, start, theLen - start);
+ NS_ENSURE_TRUE(AppendToString(dataSubstring, aOutputStr), false);
+ start = theLen;
+ // if there was a line break before this substring
+ // AppendNewLineToString was called, so we should reverse
+ // this flag
+ mMayIgnoreLineBreakSequence = false;
+ } else {
+ nsDependentSubstring dataSubstring(aStr, start, eol - start);
+ NS_ENSURE_TRUE(AppendToString(dataSubstring, aOutputStr), false);
+ NS_ENSURE_TRUE(AppendNewLineToString(aOutputStr), false);
+ start = eol + 1;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool nsXMLContentSerializer::AppendFormatedWrapped_WhitespaceSequence(
+ nsAString::const_char_iterator& aPos,
+ const nsAString::const_char_iterator aEnd,
+ const nsAString::const_char_iterator aSequenceStart,
+ bool& aMayIgnoreStartOfLineWhitespaceSequence, nsAString& aOutputStr) {
+ // Handle the complete sequence of whitespace.
+ // Continue to iterate until we find the first non-whitespace char.
+ // Updates "aPos" to point to the first unhandled char.
+ // Also updates the aMayIgnoreStartOfLineWhitespaceSequence flag,
+ // as well as the other "global" state flags.
+
+ bool sawBlankOrTab = false;
+ bool leaveLoop = false;
+
+ do {
+ switch (*aPos) {
+ case ' ':
+ case '\t':
+ sawBlankOrTab = true;
+ [[fallthrough]];
+ case '\n':
+ ++aPos;
+ // do not increase mColPos,
+ // because we will reduce the whitespace to a single char
+ break;
+ default:
+ leaveLoop = true;
+ break;
+ }
+ } while (!leaveLoop && aPos < aEnd);
+
+ if (mAddSpace) {
+ // if we had previously been asked to add space,
+ // our situation has not changed
+ } else if (!sawBlankOrTab && mMayIgnoreLineBreakSequence) {
+ // nothing to do in the case where line breaks have already been added
+ // before the call of AppendToStringWrapped
+ // and only if we found line break in the sequence
+ mMayIgnoreLineBreakSequence = false;
+ } else if (aMayIgnoreStartOfLineWhitespaceSequence) {
+ // nothing to do
+ aMayIgnoreStartOfLineWhitespaceSequence = false;
+ } else {
+ if (sawBlankOrTab) {
+ if (mDoWrap && mColPos + 1 >= mMaxColumn) {
+ // no much sense in delaying, we only have one slot left,
+ // let's write a break now
+ bool result = aOutputStr.Append(mLineBreak, mozilla::fallible);
+ mColPos = 0;
+ mIsIndentationAddedOnCurrentLine = false;
+ mMayIgnoreLineBreakSequence = true;
+ NS_ENSURE_TRUE(result, false);
+ } else {
+ // do not write out yet, we may write out either a space or a linebreak
+ // let's delay writing it out until we know more
+ mAddSpace = true;
+ ++mColPos; // eat a slot of available space
+ }
+ } else {
+ // Asian text usually does not contain spaces, therefore we should not
+ // transform a linebreak into a space.
+ // Since we only saw linebreaks, but no spaces or tabs,
+ // let's write a linebreak now.
+ NS_ENSURE_TRUE(AppendNewLineToString(aOutputStr), false);
+ }
+ }
+
+ return true;
+}
+
+bool nsXMLContentSerializer::AppendWrapped_NonWhitespaceSequence(
+ nsAString::const_char_iterator& aPos,
+ const nsAString::const_char_iterator aEnd,
+ const nsAString::const_char_iterator aSequenceStart,
+ bool& aMayIgnoreStartOfLineWhitespaceSequence,
+ bool& aSequenceStartAfterAWhiteSpace, nsAString& aOutputStr) {
+ mMayIgnoreLineBreakSequence = false;
+ aMayIgnoreStartOfLineWhitespaceSequence = false;
+
+ // Handle the complete sequence of non-whitespace in this block
+ // Iterate until we find the first whitespace char or an aEnd condition
+ // Updates "aPos" to point to the first unhandled char.
+ // Also updates the aMayIgnoreStartOfLineWhitespaceSequence flag,
+ // as well as the other "global" state flags.
+
+ bool thisSequenceStartsAtBeginningOfLine = !mColPos;
+ bool onceAgainBecauseWeAddedBreakInFront = false;
+ bool foundWhitespaceInLoop;
+ uint32_t length, colPos;
+
+ do {
+ if (mColPos) {
+ colPos = mColPos;
+ } else {
+ if (mDoFormat && !mDoRaw && !PreLevel() &&
+ !onceAgainBecauseWeAddedBreakInFront) {
+ colPos = mIndent.Length();
+ } else
+ colPos = 0;
+ }
+ foundWhitespaceInLoop = false;
+ length = 0;
+ // we iterate until the next whitespace character
+ // or until we reach the maximum of character per line
+ // or until the end of the string to add.
+ do {
+ if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') {
+ foundWhitespaceInLoop = true;
+ break;
+ }
+
+ ++aPos;
+ ++length;
+ } while ((!mDoWrap || colPos + length < mMaxColumn) && aPos < aEnd);
+
+ // in the case we don't reached the end of the string, but we reached the
+ // maxcolumn, we see if there is a whitespace after the maxcolumn if yes,
+ // then we can append directly the string instead of appending a new line
+ // etc.
+ if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') {
+ foundWhitespaceInLoop = true;
+ }
+
+ if (aPos == aEnd || foundWhitespaceInLoop) {
+ // there is enough room for the complete block we found
+ if (mDoFormat && !mColPos) {
+ NS_ENSURE_TRUE(AppendIndentation(aOutputStr), false);
+ } else if (mAddSpace) {
+ bool result = aOutputStr.Append(char16_t(' '), mozilla::fallible);
+ mAddSpace = false;
+ NS_ENSURE_TRUE(result, false);
+ }
+
+ mColPos += length;
+ NS_ENSURE_TRUE(aOutputStr.Append(aSequenceStart, aPos - aSequenceStart,
+ mozilla::fallible),
+ false);
+
+ // We have not yet reached the max column, we will continue to
+ // fill the current line in the next outer loop iteration
+ // (this one in AppendToStringWrapped)
+ // make sure we return in this outer loop
+ onceAgainBecauseWeAddedBreakInFront = false;
+ } else { // we reach the max column
+ if (!thisSequenceStartsAtBeginningOfLine &&
+ (mAddSpace || (!mDoFormat && aSequenceStartAfterAWhiteSpace))) {
+ // when !mDoFormat, mAddSpace is not used, mAddSpace is always false
+ // so, in the case where mDoWrap && !mDoFormat, if we want to enter in
+ // this condition...
+
+ // We can avoid to wrap. We try to add the whole block
+ // in an empty new line
+
+ NS_ENSURE_TRUE(AppendNewLineToString(aOutputStr), false);
+ aPos = aSequenceStart;
+ thisSequenceStartsAtBeginningOfLine = true;
+ onceAgainBecauseWeAddedBreakInFront = true;
+ } else {
+ // we must wrap
+ onceAgainBecauseWeAddedBreakInFront = false;
+ Maybe<uint32_t> wrapPosition;
+
+ if (mAllowLineBreaking) {
+ MOZ_ASSERT(aPos < aEnd,
+ "We shouldn't be here if aPos reaches the end of text!");
+
+ // Search forward from aSequenceStart until we find the largest
+ // wrap position less than or equal to aPos.
+ Maybe<uint32_t> nextWrapPosition;
+ Span<const char16_t> subSeq(aSequenceStart, aEnd);
+ intl::LineBreakIteratorUtf16 lineBreakIter(subSeq);
+ while (true) {
+ nextWrapPosition = lineBreakIter.Next();
+ MOZ_ASSERT(nextWrapPosition.isSome(),
+ "We should've exited the loop when reaching the end of "
+ "text in the previous iteration!");
+ if (aSequenceStart + *nextWrapPosition > aPos) {
+ break;
+ }
+ wrapPosition = nextWrapPosition;
+ }
+
+ if (!wrapPosition) {
+ // The wrap position found in the first iteration of the above loop
+ // already exceeds aPos. We accept it as valid a wrap position only
+ // if it is not end-of-text. If the line-breaker returned
+ // end-of-text, we don't know that it is actually a good wrap
+ // position, so ignore it and continue to use the fallback code
+ // below.
+ if (*nextWrapPosition < subSeq.Length()) {
+ wrapPosition = nextWrapPosition;
+ }
+ }
+ }
+
+ if (wrapPosition) {
+ if (!mColPos && mDoFormat) {
+ NS_ENSURE_TRUE(AppendIndentation(aOutputStr), false);
+ } else if (mAddSpace) {
+ bool result = aOutputStr.Append(char16_t(' '), mozilla::fallible);
+ mAddSpace = false;
+ NS_ENSURE_TRUE(result, false);
+ }
+ NS_ENSURE_TRUE(aOutputStr.Append(aSequenceStart, *wrapPosition,
+ mozilla::fallible),
+ false);
+
+ NS_ENSURE_TRUE(AppendNewLineToString(aOutputStr), false);
+ aPos = aSequenceStart + *wrapPosition;
+ aMayIgnoreStartOfLineWhitespaceSequence = true;
+ } else {
+ // try some simple fallback logic
+ // go forward up to the next whitespace position,
+ // in the worst case this will be all the rest of the data
+
+ // XXX(jfkthame) Should we (conditionally) output indentation here?
+ // It makes for tidier-looking formatted output, at the cost of
+ // exceeding the target width by a greater amount on such lines.
+ // if (!mColPos && mDoFormat) {
+ // NS_ENSURE_TRUE(AppendIndentation(aOutputStr), false);
+ // mAddSpace = false;
+ // }
+
+ // we update the mColPos variable with the length of
+ // the part already parsed.
+ mColPos += length;
+
+ // now try to find the next whitespace
+ do {
+ if (*aPos == ' ' || *aPos == '\t' || *aPos == '\n') {
+ break;
+ }
+
+ ++aPos;
+ ++mColPos;
+ } while (aPos < aEnd);
+
+ if (mAddSpace) {
+ bool result = aOutputStr.Append(char16_t(' '), mozilla::fallible);
+ mAddSpace = false;
+ NS_ENSURE_TRUE(result, false);
+ }
+ NS_ENSURE_TRUE(
+ aOutputStr.Append(aSequenceStart, aPos - aSequenceStart,
+ mozilla::fallible),
+ false);
+ }
+ }
+ aSequenceStartAfterAWhiteSpace = false;
+ }
+ } while (onceAgainBecauseWeAddedBreakInFront);
+
+ return true;
+}
+
+bool nsXMLContentSerializer::AppendToStringFormatedWrapped(
+ const nsAString& aStr, nsAString& aOutputStr) {
+ if (mBodyOnly && !mInBody) {
+ return true;
+ }
+
+ nsAString::const_char_iterator pos, end, sequenceStart;
+
+ aStr.BeginReading(pos);
+ aStr.EndReading(end);
+
+ bool sequenceStartAfterAWhitespace = false;
+ if (pos < end) {
+ nsAString::const_char_iterator end2;
+ aOutputStr.EndReading(end2);
+ --end2;
+ if (*end2 == ' ' || *end2 == '\n' || *end2 == '\t') {
+ sequenceStartAfterAWhitespace = true;
+ }
+ }
+
+ // if the current line already has text on it, such as a tag,
+ // leading whitespace is significant
+ bool mayIgnoreStartOfLineWhitespaceSequence =
+ (!mColPos ||
+ (mIsIndentationAddedOnCurrentLine && sequenceStartAfterAWhitespace &&
+ uint32_t(mColPos) == mIndent.Length()));
+
+ while (pos < end) {
+ sequenceStart = pos;
+
+ // if beginning of a whitespace sequence
+ if (*pos == ' ' || *pos == '\n' || *pos == '\t') {
+ NS_ENSURE_TRUE(AppendFormatedWrapped_WhitespaceSequence(
+ pos, end, sequenceStart,
+ mayIgnoreStartOfLineWhitespaceSequence, aOutputStr),
+ false);
+ } else { // any other non-whitespace char
+ NS_ENSURE_TRUE(
+ AppendWrapped_NonWhitespaceSequence(
+ pos, end, sequenceStart, mayIgnoreStartOfLineWhitespaceSequence,
+ sequenceStartAfterAWhitespace, aOutputStr),
+ false);
+ }
+ }
+
+ return true;
+}
+
+bool nsXMLContentSerializer::AppendWrapped_WhitespaceSequence(
+ nsAString::const_char_iterator& aPos,
+ const nsAString::const_char_iterator aEnd,
+ const nsAString::const_char_iterator aSequenceStart,
+ nsAString& aOutputStr) {
+ // Handle the complete sequence of whitespace.
+ // Continue to iterate until we find the first non-whitespace char.
+ // Updates "aPos" to point to the first unhandled char.
+ mAddSpace = false;
+ mIsIndentationAddedOnCurrentLine = false;
+
+ bool leaveLoop = false;
+ nsAString::const_char_iterator lastPos = aPos;
+
+ do {
+ switch (*aPos) {
+ case ' ':
+ case '\t':
+ // if there are too many spaces on a line, we wrap
+ if (mColPos >= mMaxColumn) {
+ if (lastPos != aPos) {
+ NS_ENSURE_TRUE(
+ aOutputStr.Append(lastPos, aPos - lastPos, mozilla::fallible),
+ false);
+ }
+ NS_ENSURE_TRUE(AppendToString(mLineBreak, aOutputStr), false);
+ mColPos = 0;
+ lastPos = aPos;
+ }
+
+ ++mColPos;
+ ++aPos;
+ break;
+ case '\n':
+ if (lastPos != aPos) {
+ NS_ENSURE_TRUE(
+ aOutputStr.Append(lastPos, aPos - lastPos, mozilla::fallible),
+ false);
+ }
+ NS_ENSURE_TRUE(AppendToString(mLineBreak, aOutputStr), false);
+ mColPos = 0;
+ ++aPos;
+ lastPos = aPos;
+ break;
+ default:
+ leaveLoop = true;
+ break;
+ }
+ } while (!leaveLoop && aPos < aEnd);
+
+ if (lastPos != aPos) {
+ NS_ENSURE_TRUE(
+ aOutputStr.Append(lastPos, aPos - lastPos, mozilla::fallible), false);
+ }
+
+ return true;
+}
+
+bool nsXMLContentSerializer::AppendToStringWrapped(const nsAString& aStr,
+ nsAString& aOutputStr) {
+ if (mBodyOnly && !mInBody) {
+ return true;
+ }
+
+ nsAString::const_char_iterator pos, end, sequenceStart;
+
+ aStr.BeginReading(pos);
+ aStr.EndReading(end);
+
+ // not used in this case, but needed by AppendWrapped_NonWhitespaceSequence
+ bool mayIgnoreStartOfLineWhitespaceSequence = false;
+ mMayIgnoreLineBreakSequence = false;
+
+ bool sequenceStartAfterAWhitespace = false;
+ if (pos < end && !aOutputStr.IsEmpty()) {
+ nsAString::const_char_iterator end2;
+ aOutputStr.EndReading(end2);
+ --end2;
+ if (*end2 == ' ' || *end2 == '\n' || *end2 == '\t') {
+ sequenceStartAfterAWhitespace = true;
+ }
+ }
+
+ while (pos < end) {
+ sequenceStart = pos;
+
+ // if beginning of a whitespace sequence
+ if (*pos == ' ' || *pos == '\n' || *pos == '\t') {
+ sequenceStartAfterAWhitespace = true;
+ NS_ENSURE_TRUE(
+ AppendWrapped_WhitespaceSequence(pos, end, sequenceStart, aOutputStr),
+ false);
+ } else { // any other non-whitespace char
+ NS_ENSURE_TRUE(
+ AppendWrapped_NonWhitespaceSequence(
+ pos, end, sequenceStart, mayIgnoreStartOfLineWhitespaceSequence,
+ sequenceStartAfterAWhitespace, aOutputStr),
+ false);
+ }
+ }
+
+ return true;
+}
+
+bool nsXMLContentSerializer::ShouldMaintainPreLevel() const {
+ // Only attempt to maintain the pre level for consumers who care about it.
+ return !mDoRaw || (mFlags & nsIDocumentEncoder::OutputNoFormattingInPre);
+}
+
+bool nsXMLContentSerializer::MaybeSerializeIsValue(Element* aElement,
+ nsAString& aStr) {
+ CustomElementData* ceData = aElement->GetCustomElementData();
+ if (ceData) {
+ nsAtom* isAttr = ceData->GetIs(aElement);
+ if (isAttr && !aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::is)) {
+ NS_ENSURE_TRUE(aStr.AppendLiteral(" is=\"", mozilla::fallible), false);
+ NS_ENSURE_TRUE(
+ aStr.Append(nsDependentAtomString(isAttr), mozilla::fallible), false);
+ NS_ENSURE_TRUE(aStr.AppendLiteral("\"", mozilla::fallible), false);
+ }
+ }
+
+ return true;
+}
diff --git a/dom/serializers/nsXMLContentSerializer.h b/dom/serializers/nsXMLContentSerializer.h
new file mode 100644
index 0000000000..167255fe09
--- /dev/null
+++ b/dom/serializers/nsXMLContentSerializer.h
@@ -0,0 +1,440 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * nsIContentSerializer implementation that can be used with an
+ * nsIDocumentEncoder to convert an XML DOM to an XML string that
+ * could be parsed into more or less the original DOM.
+ */
+
+#ifndef nsXMLContentSerializer_h__
+#define nsXMLContentSerializer_h__
+
+#include "mozilla/Attributes.h"
+#include "nsIContentSerializer.h"
+#include "nsISupportsUtils.h"
+#include "nsCOMPtr.h"
+#include "nsTArray.h"
+#include "nsString.h"
+
+#define kIndentStr u" "_ns
+#define kEndTag u"</"_ns
+
+class nsAtom;
+class nsINode;
+
+namespace mozilla {
+class Encoding;
+}
+
+class nsXMLContentSerializer : public nsIContentSerializer {
+ public:
+ nsXMLContentSerializer();
+
+ NS_DECL_ISUPPORTS
+
+ NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
+ const mozilla::Encoding* aEncoding, bool aIsCopying,
+ bool aRewriteEncodingDeclaration,
+ bool* aNeedsPreformatScanning, nsAString& aOutput) override;
+
+ NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
+ int32_t aEndOffset) override;
+
+ NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection, int32_t aStartOffset,
+ int32_t aEndOffset) override;
+
+ NS_IMETHOD AppendProcessingInstruction(
+ mozilla::dom::ProcessingInstruction* aPI, int32_t aStartOffset,
+ int32_t aEndOffset) override;
+
+ NS_IMETHOD AppendComment(mozilla::dom::Comment* aComment,
+ int32_t aStartOffset, int32_t aEndOffset) override;
+
+ NS_IMETHOD AppendDoctype(mozilla::dom::DocumentType* aDoctype) override;
+
+ NS_IMETHOD AppendElementStart(
+ mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) override;
+
+ NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement) override;
+
+ NS_IMETHOD FlushAndFinish() override { return NS_OK; }
+
+ NS_IMETHOD Finish() override;
+
+ NS_IMETHOD GetOutputLength(uint32_t& aLength) const override;
+
+ NS_IMETHOD AppendDocumentStart(mozilla::dom::Document* aDocument) override;
+
+ NS_IMETHOD ScanElementForPreformat(mozilla::dom::Element* aElement) override {
+ return NS_OK;
+ }
+ NS_IMETHOD ForgetElementForPreformat(
+ mozilla::dom::Element* aElement) override {
+ return NS_OK;
+ }
+
+ protected:
+ virtual ~nsXMLContentSerializer();
+
+ /**
+ * Appends a char16_t character and increments the column position
+ */
+ [[nodiscard]] bool AppendToString(const char16_t aChar,
+ nsAString& aOutputStr);
+
+ /**
+ * Appends a nsAString string and increments the column position
+ */
+ [[nodiscard]] bool AppendToString(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ /**
+ * Appends a string by replacing all line-endings
+ * by mLineBreak, except in the case of raw output.
+ * It increments the column position.
+ */
+ [[nodiscard]] bool AppendToStringConvertLF(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ /**
+ * Appends a string by wrapping it when necessary.
+ * It updates the column position.
+ */
+ [[nodiscard]] bool AppendToStringWrapped(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ /**
+ * Appends a string by formating and wrapping it when necessary
+ * It updates the column position.
+ */
+ [[nodiscard]] bool AppendToStringFormatedWrapped(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ // used by AppendToStringWrapped
+ [[nodiscard]] bool AppendWrapped_WhitespaceSequence(
+ nsAString::const_char_iterator& aPos,
+ const nsAString::const_char_iterator aEnd,
+ const nsAString::const_char_iterator aSequenceStart,
+ nsAString& aOutputStr);
+
+ // used by AppendToStringFormatedWrapped
+ [[nodiscard]] bool AppendFormatedWrapped_WhitespaceSequence(
+ nsAString::const_char_iterator& aPos,
+ const nsAString::const_char_iterator aEnd,
+ const nsAString::const_char_iterator aSequenceStart,
+ bool& aMayIgnoreStartOfLineWhitespaceSequence, nsAString& aOutputStr);
+
+ // used by AppendToStringWrapped and AppendToStringFormatedWrapped
+ [[nodiscard]] bool AppendWrapped_NonWhitespaceSequence(
+ nsAString::const_char_iterator& aPos,
+ const nsAString::const_char_iterator aEnd,
+ const nsAString::const_char_iterator aSequenceStart,
+ bool& aMayIgnoreStartOfLineWhitespaceSequence,
+ bool& aSequenceStartAfterAWhiteSpace, nsAString& aOutputStr);
+
+ /**
+ * add mLineBreak to the string
+ * It updates the column position and other flags.
+ */
+ [[nodiscard]] bool AppendNewLineToString(nsAString& aOutputStr);
+
+ /**
+ * Appends a string by translating entities
+ * It doesn't increment the column position
+ */
+ [[nodiscard]] virtual bool AppendAndTranslateEntities(const nsAString& aStr,
+ nsAString& aOutputStr);
+
+ /**
+ * Helper for virtual AppendAndTranslateEntities that does the actualy work.
+ *
+ * Do not call this directly. Call it via the template helper below.
+ */
+ private:
+ [[nodiscard]] static bool AppendAndTranslateEntities(
+ const nsAString& aStr, nsAString& aOutputStr,
+ const uint8_t aEntityTable[], uint16_t aMaxTableIndex,
+ const char* const aStringTable[]);
+
+ protected:
+ /**
+ * Helper for calling AppendAndTranslateEntities in a way that guarantees we
+ * don't mess up our aEntityTable sizing. This is a bit more complicated than
+ * it could be, becaue sometimes we don't want to use all of aEntityTable, so
+ * we have to allow passing the amount to use independently. But we can
+ * statically ensure it's not too big.
+ *
+ * The first integer template argument, which callers need to specify
+ * explicitly, is the index of the last entry in aEntityTable that should be
+ * considered for encoding as an entity reference. The second integer
+ * argument will be deduced from the actual table passed in.
+ *
+ * aEntityTable contains as values indices into aStringTable. Those represent
+ * the strings that should be used to replace the characters that are used to
+ * index into aEntityTable. aStringTable[0] should be nullptr, and characters
+ * that do not need replacement should map to 0 in aEntityTable.
+ */
+ template <uint16_t LargestIndex, uint16_t TableLength>
+ [[nodiscard]] bool AppendAndTranslateEntities(
+ const nsAString& aStr, nsAString& aOutputStr,
+ const uint8_t (&aEntityTable)[TableLength],
+ const char* const aStringTable[]) {
+ static_assert(LargestIndex < TableLength,
+ "Largest allowed index must be smaller than table length");
+ return AppendAndTranslateEntities(aStr, aOutputStr, aEntityTable,
+ LargestIndex, aStringTable);
+ }
+
+ /**
+ * Max index that can be used with some of our entity tables.
+ */
+ static const uint16_t kGTVal = 62;
+
+ /**
+ * retrieve the text content of the node and append it to the given string
+ * It doesn't increment the column position
+ */
+ nsresult AppendTextData(nsIContent* aNode, int32_t aStartOffset,
+ int32_t aEndOffset, nsAString& aStr,
+ bool aTranslateEntities);
+
+ virtual nsresult PushNameSpaceDecl(const nsAString& aPrefix,
+ const nsAString& aURI, nsIContent* aOwner);
+ void PopNameSpaceDeclsFor(nsIContent* aOwner);
+
+ /**
+ * The problem that ConfirmPrefix fixes is that anyone can insert nodes
+ * through the DOM that have a namespace URI and a random or empty or
+ * previously existing prefix that's totally unrelated to the prefixes
+ * declared at that point through xmlns attributes. So what ConfirmPrefix
+ * does is ensure that we can map aPrefix to the namespace URI aURI (for
+ * example, that the prefix is not already mapped to some other namespace).
+ * aPrefix will be adjusted, if necessary, so the value of the prefix
+ * _after_ this call is what should be serialized.
+ * @param aPrefix the prefix that may need adjusting
+ * @param aURI the namespace URI we want aPrefix to point to
+ * @param aElement the element we're working with (needed for proper default
+ * namespace handling)
+ * @param aIsAttribute true if we're confirming a prefix for an attribute.
+ * @return true if we need to push the (prefix, uri) pair on the namespace
+ * stack (note that this can happen even if the prefix is
+ * empty).
+ */
+ bool ConfirmPrefix(nsAString& aPrefix, const nsAString& aURI,
+ nsIContent* aElement, bool aIsAttribute);
+ /**
+ * GenerateNewPrefix generates a new prefix and writes it to aPrefix
+ */
+ void GenerateNewPrefix(nsAString& aPrefix);
+
+ uint32_t ScanNamespaceDeclarations(mozilla::dom::Element* aContent,
+ mozilla::dom::Element* aOriginalElement,
+ const nsAString& aTagNamespaceURI);
+
+ [[nodiscard]] virtual bool SerializeAttributes(
+ mozilla::dom::Element* aContent, mozilla::dom::Element* aOriginalElement,
+ nsAString& aTagPrefix, const nsAString& aTagNamespaceURI,
+ nsAtom* aTagName, nsAString& aStr, uint32_t aSkipAttr, bool aAddNSAttr);
+
+ [[nodiscard]] bool SerializeAttr(const nsAString& aPrefix,
+ const nsAString& aName,
+ const nsAString& aValue, nsAString& aStr,
+ bool aDoEscapeEntities);
+
+ bool IsJavaScript(nsIContent* aContent, nsAtom* aAttrNameAtom,
+ int32_t aAttrNamespaceID, const nsAString& aValueString);
+
+ /**
+ * This method can be redefined to check if the element can be serialized.
+ * It is called when the serialization of the start tag is asked
+ * (AppendElementStart)
+ * In this method you can also force the formating
+ * by setting aForceFormat to true.
+ * @return boolean true if the element can be output
+ */
+ virtual bool CheckElementStart(mozilla::dom::Element* aElement,
+ bool& aForceFormat, nsAString& aStr,
+ nsresult& aResult);
+
+ /**
+ * This method is responsible for appending the '>' at the end of the start
+ * tag, possibly preceded by '/' and maybe a ' ' before that too.
+ *
+ * aElement and aOriginalElement are the same as the corresponding arguments
+ * to AppendElementStart.
+ */
+ [[nodiscard]] bool AppendEndOfElementStart(
+ mozilla::dom::Element* aEleemnt, mozilla::dom::Element* aOriginalElement,
+ nsAString& aStr);
+
+ /**
+ * This method can be redefine to serialize additional things just after
+ * the serialization of the start tag.
+ * (called at the end of AppendElementStart)
+ */
+ [[nodiscard]] virtual bool AfterElementStart(nsIContent* aContent,
+ nsIContent* aOriginalElement,
+ nsAString& aStr) {
+ return true;
+ };
+
+ /**
+ * This method can be redefined to check if the element can be serialized.
+ * It is called when the serialization of the end tag is asked
+ * (AppendElementEnd)
+ * In this method you can also force the formating
+ * by setting aForceFormat to true.
+ * @return boolean true if the element can be output
+ */
+ virtual bool CheckElementEnd(mozilla::dom::Element* aElement,
+ mozilla::dom::Element* aOriginalElement,
+ bool& aForceFormat, nsAString& aStr);
+
+ /**
+ * This method can be redefine to serialize additional things just after
+ * the serialization of the end tag.
+ * (called at the end of AppendElementStart)
+ */
+ virtual void AfterElementEnd(nsIContent* aContent, nsAString& aStr){};
+
+ /**
+ * Returns true if a line break should be inserted before an element open tag
+ */
+ virtual bool LineBreakBeforeOpen(int32_t aNamespaceID, nsAtom* aName);
+
+ /**
+ * Returns true if a line break should be inserted after an element open tag
+ */
+ virtual bool LineBreakAfterOpen(int32_t aNamespaceID, nsAtom* aName);
+
+ /**
+ * Returns true if a line break should be inserted after an element close tag
+ */
+ virtual bool LineBreakBeforeClose(int32_t aNamespaceID, nsAtom* aName);
+
+ /**
+ * Returns true if a line break should be inserted after an element close tag
+ */
+ virtual bool LineBreakAfterClose(int32_t aNamespaceID, nsAtom* aName);
+
+ /**
+ * add intendation. Call only in the case of formating and if the current
+ * position is at 0. It updates the column position.
+ */
+ [[nodiscard]] bool AppendIndentation(nsAString& aStr);
+
+ [[nodiscard]] bool IncrIndentation(nsAtom* aName);
+ void DecrIndentation(nsAtom* aName);
+
+ // Functions to check for newlines that needs to be added between nodes in
+ // the root of a document. See mAddNewlineForRootNode
+ [[nodiscard]] bool MaybeAddNewlineForRootNode(nsAString& aStr);
+ void MaybeFlagNewlineForRootNode(nsINode* aNode);
+
+ // Functions to check if we enter in or leave from a preformated content
+ virtual void MaybeEnterInPreContent(nsIContent* aNode);
+ virtual void MaybeLeaveFromPreContent(nsIContent* aNode);
+
+ bool ShouldMaintainPreLevel() const;
+ int32_t PreLevel() const {
+ MOZ_ASSERT(ShouldMaintainPreLevel());
+ return mPreLevel;
+ }
+ int32_t& PreLevel() {
+ MOZ_ASSERT(ShouldMaintainPreLevel());
+ return mPreLevel;
+ }
+
+ bool MaybeSerializeIsValue(mozilla::dom::Element* aElement, nsAString& aStr);
+
+ int32_t mPrefixIndex;
+
+ struct NameSpaceDecl {
+ nsString mPrefix;
+ nsString mURI;
+ nsIContent* mOwner;
+ };
+
+ nsTArray<NameSpaceDecl> mNameSpaceStack;
+
+ // nsIDocumentEncoder flags
+ MOZ_INIT_OUTSIDE_CTOR uint32_t mFlags;
+
+ // characters to use for line break
+ nsString mLineBreak;
+
+ // The charset that was passed to Init()
+ nsCString mCharset;
+
+ // current column position on the current line
+ uint32_t mColPos;
+
+ // true = pretty formating should be done (OutputFormated flag)
+ MOZ_INIT_OUTSIDE_CTOR bool mDoFormat;
+
+ // true = no formatting,(OutputRaw flag)
+ // no newline convertion and no rewrap long lines even if OutputWrap is set.
+ MOZ_INIT_OUTSIDE_CTOR bool mDoRaw;
+
+ // true = wrapping should be done (OutputWrap flag)
+ MOZ_INIT_OUTSIDE_CTOR bool mDoWrap;
+
+ // true = we can break lines (OutputDisallowLineBreaking flag)
+ MOZ_INIT_OUTSIDE_CTOR bool mAllowLineBreaking;
+
+ // number of maximum column in a line, in the wrap mode
+ MOZ_INIT_OUTSIDE_CTOR uint32_t mMaxColumn;
+
+ // current indent value
+ nsString mIndent;
+
+ // this is the indentation level after the indentation reached
+ // the maximum length of indentation
+ int32_t mIndentOverflow;
+
+ // says if the indentation has been already added on the current line
+ bool mIsIndentationAddedOnCurrentLine;
+
+ // the string which is currently added is in an attribute
+ bool mInAttribute;
+
+ // true = a newline character should be added. It's only
+ // useful when serializing root nodes. see MaybeAddNewlineForRootNode and
+ // MaybeFlagNewlineForRootNode
+ bool mAddNewlineForRootNode;
+
+ // Indicates that a space will be added if and only if content is
+ // continued on the same line while serializing source. Otherwise,
+ // the newline character acts as the whitespace and no space is needed.
+ // used when mDoFormat = true
+ bool mAddSpace;
+
+ // says that if the next string to add contains a newline character at the
+ // begining, then this newline character should be ignored, because a
+ // such character has already been added into the output string
+ bool mMayIgnoreLineBreakSequence;
+
+ bool mBodyOnly;
+ int32_t mInBody;
+
+ // Non-owning.
+ nsAString* mOutput;
+
+ private:
+ // number of nested elements which have preformated content
+ MOZ_INIT_OUTSIDE_CTOR int32_t mPreLevel;
+
+ static const uint8_t kEntities[];
+ static const uint8_t kAttrEntities[];
+ static const char* const kEntityStrings[];
+};
+
+nsresult NS_NewXMLContentSerializer(nsIContentSerializer** aSerializer);
+
+#endif