diff options
Diffstat (limited to '')
-rw-r--r-- | dom/serializers/nsHTMLContentSerializer.cpp | 441 |
1 files changed, 441 insertions, 0 deletions
diff --git a/dom/serializers/nsHTMLContentSerializer.cpp b/dom/serializers/nsHTMLContentSerializer.cpp new file mode 100644 index 0000000000..968792c613 --- /dev/null +++ b/dom/serializers/nsHTMLContentSerializer.cpp @@ -0,0 +1,441 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * nsIContentSerializer implementation that can be used with an + * nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML + * string that could be parsed into more or less the original DOM. + */ + +#include "nsHTMLContentSerializer.h" + +#include "nsIContent.h" +#include "mozilla/dom/Document.h" +#include "nsElementTable.h" +#include "nsNameSpaceManager.h" +#include "nsString.h" +#include "nsUnicharUtils.h" +#include "nsIDocumentEncoder.h" +#include "nsGkAtoms.h" +#include "nsIURI.h" +#include "nsNetUtil.h" +#include "nsEscape.h" +#include "nsCRT.h" +#include "nsContentUtils.h" +#include "nsIScriptElement.h" +#include "nsAttrName.h" +#include "mozilla/dom/Element.h" +#include "nsParserConstants.h" + +using namespace mozilla::dom; + +nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer) { + RefPtr<nsHTMLContentSerializer> it = new nsHTMLContentSerializer(); + it.forget(aSerializer); + return NS_OK; +} + +nsHTMLContentSerializer::nsHTMLContentSerializer() { mIsHTMLSerializer = true; } + +nsHTMLContentSerializer::~nsHTMLContentSerializer() = default; + +NS_IMETHODIMP +nsHTMLContentSerializer::AppendDocumentStart(Document* aDocument) { + return NS_OK; +} + +bool nsHTMLContentSerializer::SerializeHTMLAttributes( + Element* aElement, Element* aOriginalElement, nsAString& aTagPrefix, + const nsAString& aTagNamespaceURI, nsAtom* aTagName, int32_t aNamespace, + nsAString& aStr) { + MaybeSerializeIsValue(aElement, aStr); + + int32_t count = aElement->GetAttrCount(); + if (!count) return true; + + nsresult rv; + nsAutoString valueStr; + + for (int32_t index = 0; index < count; index++) { + const nsAttrName* name = aElement->GetAttrNameAt(index); + int32_t namespaceID = name->NamespaceID(); + nsAtom* attrName = name->LocalName(); + + // Filter out any attribute starting with [-|_]moz + nsDependentAtomString attrNameStr(attrName); + if (StringBeginsWith(attrNameStr, u"_moz"_ns) || + StringBeginsWith(attrNameStr, u"-moz"_ns)) { + continue; + } + aElement->GetAttr(namespaceID, attrName, valueStr); + + if (mIsCopying && mIsFirstChildOfOL && aTagName == nsGkAtoms::li && + aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::value && + namespaceID == kNameSpaceID_None) { + // This is handled separately in SerializeLIValueAttribute() + continue; + } + bool isJS = IsJavaScript(aElement, attrName, namespaceID, valueStr); + + if (((attrName == nsGkAtoms::href && (namespaceID == kNameSpaceID_None || + namespaceID == kNameSpaceID_XLink)) || + (attrName == nsGkAtoms::src && namespaceID == kNameSpaceID_None))) { + // Make all links absolute when converting only the selection: + if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) { + // Would be nice to handle OBJECT tags, but that gets more complicated + // since we have to search the tag list for CODEBASE as well. For now, + // just leave them relative. + nsIURI* uri = aElement->GetBaseURI(); + if (uri) { + nsAutoString absURI; + rv = NS_MakeAbsoluteURI(absURI, valueStr, uri); + if (NS_SUCCEEDED(rv)) { + valueStr = absURI; + } + } + } + } + + if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta && + aNamespace == kNameSpaceID_XHTML && attrName == nsGkAtoms::content && + namespaceID == kNameSpaceID_None) { + // If we're serializing a <meta http-equiv="content-type">, + // use the proper value, rather than what's in the document. + nsAutoString header; + aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header); + if (header.LowerCaseEqualsLiteral("content-type")) { + valueStr = u"text/html; charset="_ns + NS_ConvertASCIItoUTF16(mCharset); + } + } + + nsDependentAtomString nameStr(attrName); + nsAutoString prefix; + if (namespaceID == kNameSpaceID_XML) { + prefix.AssignLiteral(u"xml"); + } else if (namespaceID == kNameSpaceID_XLink) { + prefix.AssignLiteral(u"xlink"); + } + + // Expand shorthand attribute. + if (aNamespace == kNameSpaceID_XHTML && namespaceID == kNameSpaceID_None && + IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) { + valueStr = nameStr; + } + NS_ENSURE_TRUE(SerializeAttr(prefix, nameStr, valueStr, aStr, !isJS), + false); + } + + return true; +} + +NS_IMETHODIMP +nsHTMLContentSerializer::AppendElementStart(Element* aElement, + Element* aOriginalElement) { + NS_ENSURE_ARG(aElement); + NS_ENSURE_STATE(mOutput); + + bool forceFormat = false; + nsresult rv = NS_OK; + if (!CheckElementStart(aElement, forceFormat, *mOutput, rv)) { + // When we go to AppendElementEnd for this element, we're going to + // MaybeLeaveFromPreContent(). So make sure to MaybeEnterInPreContent() + // now, so our PreLevel() doesn't get confused. + MaybeEnterInPreContent(aElement); + return rv; + } + + NS_ENSURE_SUCCESS(rv, rv); + + nsAtom* name = aElement->NodeInfo()->NameAtom(); + int32_t ns = aElement->GetNameSpaceID(); + + bool lineBreakBeforeOpen = LineBreakBeforeOpen(ns, name); + + if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) { + if (mColPos && lineBreakBeforeOpen) { + NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY); + } else { + NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput), + NS_ERROR_OUT_OF_MEMORY); + } + if (!mColPos) { + NS_ENSURE_TRUE(AppendIndentation(*mOutput), NS_ERROR_OUT_OF_MEMORY); + } else if (mAddSpace) { + bool result = AppendToString(char16_t(' '), *mOutput); + mAddSpace = false; + NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY); + } + } else if (mAddSpace) { + bool result = AppendToString(char16_t(' '), *mOutput); + mAddSpace = false; + NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY); + } else { + NS_ENSURE_TRUE(MaybeAddNewlineForRootNode(*mOutput), + NS_ERROR_OUT_OF_MEMORY); + } + // Always reset to avoid false newlines in case MaybeAddNewlineForRootNode + // wasn't called + mAddNewlineForRootNode = false; + + NS_ENSURE_TRUE(AppendToString(kLessThan, *mOutput), NS_ERROR_OUT_OF_MEMORY); + + NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), *mOutput), + NS_ERROR_OUT_OF_MEMORY); + + MaybeEnterInPreContent(aElement); + + // for block elements, we increase the indentation + if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) + NS_ENSURE_TRUE(IncrIndentation(name), NS_ERROR_OUT_OF_MEMORY); + + // Need to keep track of OL and LI elements in order to get ordinal number + // for the LI. + if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) { + // We are copying and current node is an OL; + // Store its start attribute value in olState->startVal. + nsAutoString start; + int32_t startAttrVal = 0; + + aElement->GetAttr(kNameSpaceID_None, nsGkAtoms::start, start); + if (!start.IsEmpty()) { + nsresult rv = NS_OK; + startAttrVal = start.ToInteger(&rv); + // If OL has "start" attribute, first LI element has to start with that + // value Therefore subtracting 1 as all the LI elements are incrementing + // it before using it; In failure of ToInteger(), default StartAttrValue + // to 0. + if (NS_SUCCEEDED(rv)) + startAttrVal--; + else + startAttrVal = 0; + } + mOLStateStack.AppendElement(olState(startAttrVal, true)); + } + + if (mIsCopying && name == nsGkAtoms::li && ns == kNameSpaceID_XHTML) { + mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement); + if (mIsFirstChildOfOL) { + // If OL is parent of this LI, serialize attributes in different manner. + NS_ENSURE_TRUE(SerializeLIValueAttribute(aElement, *mOutput), + NS_ERROR_OUT_OF_MEMORY); + } + } + + // Even LI passed above have to go through this + // for serializing attributes other than "value". + nsAutoString dummyPrefix; + NS_ENSURE_TRUE( + SerializeHTMLAttributes(aElement, aOriginalElement, dummyPrefix, u""_ns, + name, ns, *mOutput), + NS_ERROR_OUT_OF_MEMORY); + + NS_ENSURE_TRUE(AppendToString(kGreaterThan, *mOutput), + NS_ERROR_OUT_OF_MEMORY); + + if (ns == kNameSpaceID_XHTML && + (name == nsGkAtoms::script || name == nsGkAtoms::style || + name == nsGkAtoms::noscript || name == nsGkAtoms::noframes)) { + ++mDisableEntityEncoding; + } + + if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() && + LineBreakAfterOpen(ns, name)) { + NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY); + } + + NS_ENSURE_TRUE(AfterElementStart(aElement, aOriginalElement, *mOutput), + NS_ERROR_OUT_OF_MEMORY); + + return NS_OK; +} + +NS_IMETHODIMP +nsHTMLContentSerializer::AppendElementEnd(Element* aElement, + Element* aOriginalElement) { + NS_ENSURE_ARG(aElement); + NS_ENSURE_STATE(mOutput); + + nsAtom* name = aElement->NodeInfo()->NameAtom(); + int32_t ns = aElement->GetNameSpaceID(); + + if (ns == kNameSpaceID_XHTML && + (name == nsGkAtoms::script || name == nsGkAtoms::style || + name == nsGkAtoms::noscript || name == nsGkAtoms::noframes)) { + --mDisableEntityEncoding; + } + + bool forceFormat = !(mFlags & nsIDocumentEncoder::OutputIgnoreMozDirty) && + aElement->HasAttr(kNameSpaceID_None, nsGkAtoms::mozdirty); + + if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) { + DecrIndentation(name); + } + + if (name == nsGkAtoms::script) { + nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement); + + if (ShouldMaintainPreLevel() && script && script->IsMalformed()) { + // We're looking at a malformed script tag. This means that the end tag + // was missing in the source. Imitate that here by not serializing the end + // tag. + --PreLevel(); + return NS_OK; + } + } else if (mIsCopying && name == nsGkAtoms::ol && ns == kNameSpaceID_XHTML) { + NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack"); + /* Though at this point we must always have an state to be deleted as all + the OL opening tags are supposed to push an olState object to the stack*/ + if (!mOLStateStack.IsEmpty()) { + mOLStateStack.RemoveLastElement(); + } + } + + if (ns == kNameSpaceID_XHTML) { + bool isContainer = + nsHTMLElement::IsContainer(nsHTMLTags::CaseSensitiveAtomTagToId(name)); + if (!isContainer) { + // Keep this in sync with the cleanup at the end of this method. + MOZ_ASSERT(name != nsGkAtoms::body); + MaybeLeaveFromPreContent(aElement); + return NS_OK; + } + } + + if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel()) { + bool lineBreakBeforeClose = LineBreakBeforeClose(ns, name); + + if (mColPos && lineBreakBeforeClose) { + NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY); + } + if (!mColPos) { + NS_ENSURE_TRUE(AppendIndentation(*mOutput), NS_ERROR_OUT_OF_MEMORY); + } else if (mAddSpace) { + bool result = AppendToString(char16_t(' '), *mOutput); + mAddSpace = false; + NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY); + } + } else if (mAddSpace) { + bool result = AppendToString(char16_t(' '), *mOutput); + mAddSpace = false; + NS_ENSURE_TRUE(result, NS_ERROR_OUT_OF_MEMORY); + } + + NS_ENSURE_TRUE(AppendToString(kEndTag, *mOutput), NS_ERROR_OUT_OF_MEMORY); + NS_ENSURE_TRUE(AppendToString(nsDependentAtomString(name), *mOutput), + NS_ERROR_OUT_OF_MEMORY); + NS_ENSURE_TRUE(AppendToString(kGreaterThan, *mOutput), + NS_ERROR_OUT_OF_MEMORY); + + // Keep this cleanup in sync with the IsContainer() early return above. + MaybeLeaveFromPreContent(aElement); + + if ((mDoFormat || forceFormat) && !mDoRaw && !PreLevel() && + LineBreakAfterClose(ns, name)) { + NS_ENSURE_TRUE(AppendNewLineToString(*mOutput), NS_ERROR_OUT_OF_MEMORY); + } else { + MaybeFlagNewlineForRootNode(aElement); + } + + if (name == nsGkAtoms::body && ns == kNameSpaceID_XHTML) { + --mInBody; + } + + return NS_OK; +} + +static const uint16_t kValNBSP = 160; + +#define _ 0 + +// This table indexes into kEntityStrings[]. +const uint8_t nsHTMLContentSerializer::kEntities[] = { + // clang-format off + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, 2, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + 3, _, 4, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + 5 + // clang-format on +}; + +// This table indexes into kEntityStrings[]. +const uint8_t nsHTMLContentSerializer::kAttrEntities[] = { + // clang-format off + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, 1, _, _, _, 2, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + 3, _, 4, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + _, _, _, _, _, _, _, _, _, _, + 5 + // clang-format on +}; + +#undef _ + +const char* const nsHTMLContentSerializer::kEntityStrings[] = { + /* 0 */ nullptr, + /* 1 */ """, + /* 2 */ "&", + /* 3 */ "<", + /* 4 */ ">", + /* 5 */ " "}; + +bool nsHTMLContentSerializer::AppendAndTranslateEntities( + const nsAString& aStr, nsAString& aOutputStr) { + if (mBodyOnly && !mInBody) { + return true; + } + + if (mDisableEntityEncoding) { + return aOutputStr.Append(aStr, mozilla::fallible); + } + + if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities)) { + // Per the API documentation, encode , &, <, >, and " + if (mInAttribute) { + return nsXMLContentSerializer::AppendAndTranslateEntities<kValNBSP>( + aStr, aOutputStr, kAttrEntities, kEntityStrings); + } + + return nsXMLContentSerializer::AppendAndTranslateEntities<kValNBSP>( + aStr, aOutputStr, kEntities, kEntityStrings); + } + + // We don't want to call into our superclass 2-arg version of + // AppendAndTranslateEntities, because it wants to encode more characters + // than we do. Use our tables, but avoid encoding by passing in a + // smaller max index. This will only encode &, <, >, and ". + if (mInAttribute) { + return nsXMLContentSerializer::AppendAndTranslateEntities<kGTVal>( + aStr, aOutputStr, kAttrEntities, kEntityStrings); + } + + return nsXMLContentSerializer::AppendAndTranslateEntities<kGTVal>( + aStr, aOutputStr, kEntities, kEntityStrings); +} |