summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/extensions/newsblog/FeedParser.jsm
diff options
context:
space:
mode:
Diffstat (limited to 'comm/mailnews/extensions/newsblog/FeedParser.jsm')
-rw-r--r--comm/mailnews/extensions/newsblog/FeedParser.jsm1496
1 files changed, 1496 insertions, 0 deletions
diff --git a/comm/mailnews/extensions/newsblog/FeedParser.jsm b/comm/mailnews/extensions/newsblog/FeedParser.jsm
new file mode 100644
index 0000000000..863d5789fe
--- /dev/null
+++ b/comm/mailnews/extensions/newsblog/FeedParser.jsm
@@ -0,0 +1,1496 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const EXPORTED_SYMBOLS = ["FeedParser"];
+
+const lazy = {};
+
+ChromeUtils.defineModuleGetter(
+ lazy,
+ "FeedItem",
+ "resource:///modules/FeedItem.jsm"
+);
+ChromeUtils.defineModuleGetter(
+ lazy,
+ "FeedEnclosure",
+ "resource:///modules/FeedItem.jsm"
+);
+ChromeUtils.defineModuleGetter(
+ lazy,
+ "FeedUtils",
+ "resource:///modules/FeedUtils.jsm"
+);
+
+/**
+ * The feed parser. Depends on FeedItem.js, Feed.js.
+ *
+ * @class
+ */
+function FeedParser() {
+ this.parsedItems = [];
+ this.mSerializer = new XMLSerializer();
+}
+
+FeedParser.prototype = {
+ /**
+ * parseFeed() returns an array of parsed items ready for processing. It is
+ * currently a synchronous operation. If there is an error parsing the feed,
+ * parseFeed returns an empty feed in addition to calling aFeed.onParseError.
+ *
+ * @param {Feed} aFeed - The Feed object.
+ * @param {XMLDocument} aDOM - The document to parse.
+ * @returns {Array} - array of items, or empty array for error returns or
+ * nothing to do condition.
+ */
+ parseFeed(aFeed, aDOM) {
+ if (!XMLDocument.isInstance(aDOM)) {
+ // No xml doc.
+ aFeed.onParseError(aFeed);
+ return [];
+ }
+
+ let doc = aDOM.documentElement;
+ if (doc.namespaceURI == lazy.FeedUtils.MOZ_PARSERERROR_NS) {
+ // Gecko caught a basic parsing error.
+ let errStr =
+ doc.firstChild.textContent + "\n" + doc.firstElementChild.textContent;
+ lazy.FeedUtils.log.info("FeedParser.parseFeed: - " + errStr);
+ aFeed.onParseError(aFeed);
+ return [];
+ } else if (aDOM.querySelector("redirect")) {
+ // Check for RSS2.0 redirect document.
+ let channel = aDOM.querySelector("redirect");
+ if (this.isPermanentRedirect(aFeed, channel, null)) {
+ return [];
+ }
+
+ aFeed.onParseError(aFeed);
+ return [];
+ } else if (
+ doc.namespaceURI == lazy.FeedUtils.RDF_SYNTAX_NS &&
+ doc.getElementsByTagNameNS(lazy.FeedUtils.RSS_NS, "channel")[0]
+ ) {
+ aFeed.mFeedType = "RSS_1.xRDF";
+ lazy.FeedUtils.log.debug(
+ "FeedParser.parseFeed: type:url - " +
+ aFeed.mFeedType +
+ " : " +
+ aFeed.url
+ );
+
+ return this.parseAsRSS1(aFeed, aDOM);
+ } else if (doc.namespaceURI == lazy.FeedUtils.ATOM_03_NS) {
+ aFeed.mFeedType = "ATOM_0.3";
+ lazy.FeedUtils.log.debug(
+ "FeedParser.parseFeed: type:url - " +
+ aFeed.mFeedType +
+ " : " +
+ aFeed.url
+ );
+ return this.parseAsAtom(aFeed, aDOM);
+ } else if (doc.namespaceURI == lazy.FeedUtils.ATOM_IETF_NS) {
+ aFeed.mFeedType = "ATOM_IETF";
+ lazy.FeedUtils.log.debug(
+ "FeedParser.parseFeed: type:url - " +
+ aFeed.mFeedType +
+ " : " +
+ aFeed.url
+ );
+ return this.parseAsAtomIETF(aFeed, aDOM);
+ } else if (
+ doc.getElementsByTagNameNS(lazy.FeedUtils.RSS_090_NS, "channel")[0]
+ ) {
+ aFeed.mFeedType = "RSS_0.90";
+ lazy.FeedUtils.log.debug(
+ "FeedParser.parseFeed: type:url - " +
+ aFeed.mFeedType +
+ " : " +
+ aFeed.url
+ );
+ return this.parseAsRSS2(aFeed, aDOM);
+ }
+
+ // Parse as RSS 0.9x. In theory even RSS 1.0 feeds could be parsed by
+ // the 0.9x parser if the RSS namespace were the default.
+ let rssVer = doc.localName == "rss" ? doc.getAttribute("version") : null;
+ if (rssVer) {
+ aFeed.mFeedType = "RSS_" + rssVer;
+ } else {
+ aFeed.mFeedType = "RSS_0.9x?";
+ }
+ lazy.FeedUtils.log.debug(
+ "FeedParser.parseFeed: type:url - " + aFeed.mFeedType + " : " + aFeed.url
+ );
+ return this.parseAsRSS2(aFeed, aDOM);
+ },
+
+ parseAsRSS2(aFeed, aDOM) {
+ // Get the first channel (assuming there is only one per RSS File).
+ let channel = aDOM.querySelector("channel");
+ if (!channel) {
+ aFeed.onParseError(aFeed);
+ return [];
+ }
+
+ // Usually the empty string, unless this is RSS .90.
+ let nsURI = channel.namespaceURI || "";
+
+ if (this.isPermanentRedirect(aFeed, null, channel)) {
+ return [];
+ }
+
+ let tags = this.childrenByTagNameNS(channel, nsURI, "title");
+ aFeed.title = aFeed.title || this.getNodeValue(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(channel, nsURI, "description");
+ aFeed.description = this.getNodeValueFormatted(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(channel, nsURI, "link");
+ aFeed.link = this.validLink(this.getNodeValue(tags ? tags[0] : null));
+
+ if (!(aFeed.title || aFeed.description)) {
+ lazy.FeedUtils.log.error(
+ "FeedParser.parseAsRSS2: missing mandatory element " +
+ "<title> and <description>"
+ );
+ // The RSS2 spec requires a <link> as well, but we can do without it
+ // so ignore the case of (valid) link missing.
+ aFeed.onParseError(aFeed);
+ return [];
+ }
+
+ if (!aFeed.parseItems) {
+ return [];
+ }
+
+ this.findSyUpdateTags(aFeed, channel);
+
+ aFeed.invalidateItems();
+ // XXX use getElementsByTagNameNS for now; childrenByTagNameNS would be
+ // better, but RSS .90 is still with us.
+ let itemNodes = aDOM.getElementsByTagNameNS(nsURI, "item");
+ itemNodes = itemNodes ? itemNodes : [];
+ lazy.FeedUtils.log.debug(
+ "FeedParser.parseAsRSS2: items to parse - " + itemNodes.length
+ );
+
+ for (let itemNode of itemNodes) {
+ if (!itemNode.childElementCount) {
+ continue;
+ }
+
+ let item = new lazy.FeedItem();
+ item.feed = aFeed;
+ item.enclosures = [];
+ item.keywords = [];
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.FEEDBURNER_NS,
+ "origLink"
+ );
+ let link = this.validLink(this.getNodeValue(tags ? tags[0] : null));
+ if (!link) {
+ tags = this.childrenByTagNameNS(itemNode, nsURI, "link");
+ link = this.validLink(this.getNodeValue(tags ? tags[0] : null));
+ }
+ tags = this.childrenByTagNameNS(itemNode, nsURI, "guid");
+ let guidNode = tags ? tags[0] : null;
+
+ let guid;
+ let isPermaLink = false;
+ if (guidNode) {
+ guid = this.getNodeValue(guidNode);
+ // isPermaLink is true if the value is "true" or if the attribute is
+ // not present; all other values, including "false" and "False" and
+ // for that matter "TRuE" and "meatcake" are false.
+ if (
+ !guidNode.hasAttribute("isPermaLink") ||
+ guidNode.getAttribute("isPermaLink") == "true"
+ ) {
+ isPermaLink = true;
+ }
+ // If attribute isPermaLink is missing, it is good to check the validity
+ // of <guid> value as an URL to avoid linking to non-URL strings.
+ if (!guidNode.hasAttribute("isPermaLink")) {
+ try {
+ Services.io.newURI(guid);
+ if (Services.io.extractScheme(guid) == "tag") {
+ isPermaLink = false;
+ }
+ } catch (ex) {
+ isPermaLink = false;
+ }
+ }
+
+ item.id = guid;
+ }
+
+ let guidLink = this.validLink(guid);
+ if (isPermaLink && guidLink) {
+ item.url = guidLink;
+ } else if (link) {
+ item.url = link;
+ } else {
+ item.url = null;
+ }
+
+ tags = this.childrenByTagNameNS(itemNode, nsURI, "description");
+ item.description = this.getNodeValueFormatted(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(itemNode, nsURI, "title");
+ item.title = this.getNodeValue(tags ? tags[0] : null);
+ if (!(item.title || item.description)) {
+ lazy.FeedUtils.log.info(
+ "FeedParser.parseAsRSS2: <item> missing mandatory " +
+ "element, either <title> or <description>; skipping"
+ );
+ continue;
+ }
+
+ if (!item.id) {
+ // At this point, if there is no guid, uniqueness cannot be guaranteed
+ // by any of link or date (optional) or title (optional unless there
+ // is no description). Use a big chunk of description; minimize dupes
+ // with url and title if present.
+ item.id =
+ (item.url || item.feed.url) +
+ "#" +
+ item.title +
+ "#" +
+ (this.stripTags(
+ item.description ? item.description.substr(0, 150) : null
+ ) || item.title);
+ item.id = item.id.replace(/[\n\r\t\s]+/g, " ");
+ }
+
+ // Escape html entities in <title>, which are unescaped as textContent
+ // values. If the title is used as content, it will remain escaped; if
+ // it is used as the title, it will be unescaped upon store. Bug 1240603.
+ // The <description> tag must follow escaping examples found in
+ // http://www.rssboard.org/rss-encoding-examples, i.e. single escape angle
+ // brackets for tags, which are removed if used as title, and double
+ // escape entities for presentation in title.
+ // Better: always use <title>. Best: use Atom.
+ if (!item.title) {
+ item.title = this.stripTags(item.description).substr(0, 150);
+ } else {
+ item.title = item.htmlEscape(item.title);
+ }
+
+ tags = this.childrenByTagNameNS(itemNode, nsURI, "author");
+ if (!tags) {
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.DC_NS,
+ "creator"
+ );
+ }
+ let author = this.getNodeValue(tags ? tags[0] : null) || aFeed.title;
+ author = this.cleanAuthorName(author);
+ item.author = author ? ["<" + author + ">"] : item.author;
+
+ tags = this.childrenByTagNameNS(itemNode, nsURI, "pubDate");
+ if (!tags || !this.getNodeValue(tags[0])) {
+ tags = this.childrenByTagNameNS(itemNode, lazy.FeedUtils.DC_NS, "date");
+ }
+ item.date = this.getNodeValue(tags ? tags[0] : null) || item.date;
+
+ // If the date is invalid, users will see the beginning of the epoch
+ // unless we reset it here, so they'll see the current time instead.
+ // This is typical aggregator behavior.
+ if (item.date) {
+ item.date = item.date.trim();
+ if (!lazy.FeedUtils.isValidRFC822Date(item.date)) {
+ // XXX Use this on the other formats as well.
+ item.date = this.dateRescue(item.date);
+ }
+ }
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.RSS_CONTENT_NS,
+ "encoded"
+ );
+ item.content = this.getNodeValueFormatted(tags ? tags[0] : null);
+
+ // Handle <enclosures> and <media:content>, which may be in a
+ // <media:group> (if present).
+ tags = this.childrenByTagNameNS(itemNode, nsURI, "enclosure");
+ let encUrls = [];
+ if (tags) {
+ for (let tag of tags) {
+ let url = this.validLink(tag.getAttribute("url"));
+ if (url && !encUrls.includes(url)) {
+ let type = this.removeUnprintableASCII(tag.getAttribute("type"));
+ let length = this.removeUnprintableASCII(
+ tag.getAttribute("length")
+ );
+ item.enclosures.push(new lazy.FeedEnclosure(url, type, length));
+ encUrls.push(url);
+ }
+ }
+ }
+
+ tags = itemNode.getElementsByTagNameNS(lazy.FeedUtils.MRSS_NS, "content");
+ if (tags) {
+ for (let tag of tags) {
+ let url = this.validLink(tag.getAttribute("url"));
+ if (url && !encUrls.includes(url)) {
+ let type = this.removeUnprintableASCII(tag.getAttribute("type"));
+ let fileSize = this.removeUnprintableASCII(
+ tag.getAttribute("fileSize")
+ );
+ item.enclosures.push(new lazy.FeedEnclosure(url, type, fileSize));
+ }
+ }
+ }
+
+ // The <origEnclosureLink> tag has no specification, especially regarding
+ // whether more than one tag is allowed and, if so, how tags would
+ // relate to previously declared (and well specified) enclosure urls.
+ // The common usage is to include 1 origEnclosureLink, in addition to
+ // the specified enclosure tags for 1 enclosure. Thus, we will replace the
+ // first enclosure's, if found, url with the first <origEnclosureLink>
+ // url only or else add the <origEnclosureLink> url.
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.FEEDBURNER_NS,
+ "origEnclosureLink"
+ );
+ let origEncUrl = this.validLink(this.getNodeValue(tags ? tags[0] : null));
+ if (origEncUrl) {
+ if (item.enclosures.length) {
+ item.enclosures[0].mURL = origEncUrl;
+ } else {
+ item.enclosures.push(new lazy.FeedEnclosure(origEncUrl));
+ }
+ }
+
+ // Support <category> and autotagging.
+ tags = this.childrenByTagNameNS(itemNode, nsURI, "category");
+ if (tags) {
+ for (let tag of tags) {
+ let term = this.getNodeValue(tag);
+ term = term ? this.xmlUnescape(term.replace(/,/g, ";")) : null;
+ if (term && !item.keywords.includes(term)) {
+ item.keywords.push(term);
+ }
+ }
+ }
+
+ this.parsedItems.push(item);
+ }
+
+ return this.parsedItems;
+ },
+
+ /**
+ * Extracts feed details and (optionally) items from an RSS1
+ * feed which has already been XML-parsed as an XMLDocument.
+ * The feed items are extracted only if feed.parseItems is set.
+ *
+ * Technically RSS1 is supposed to be treated as RDFXML, but in practice
+ * no feed parser anywhere ever does this, and feeds in the wild are
+ * pretty shakey on their RDF encoding too. So we just treat it as raw
+ * XML and pick out the bits we want.
+ *
+ * @param {Feed} feed - The Feed object.
+ * @param {XMLDocument} doc - The document to parse.
+ * @returns {Array} - array of FeedItems or empty array for error returns or
+ * nothing to do condition (ie unset feed.parseItems).
+ */
+ parseAsRSS1(feed, doc) {
+ let channel = doc.querySelector("channel");
+ if (!channel) {
+ feed.onParseError(feed);
+ return [];
+ }
+
+ if (this.isPermanentRedirect(feed, null, channel)) {
+ return [];
+ }
+
+ let titleNode = this.childByTagNameNS(
+ channel,
+ lazy.FeedUtils.RSS_NS,
+ "title"
+ );
+ // If user entered a title manually, retain it.
+ feed.title = feed.title || this.getNodeValue(titleNode) || feed.url;
+
+ let descNode = this.childByTagNameNS(
+ channel,
+ lazy.FeedUtils.RSS_NS,
+ "description"
+ );
+ feed.description = this.getNodeValueFormatted(descNode) || "";
+
+ let linkNode = this.childByTagNameNS(
+ channel,
+ lazy.FeedUtils.RSS_NS,
+ "link"
+ );
+ feed.link = this.validLink(this.getNodeValue(linkNode)) || feed.url;
+
+ if (!(feed.title || feed.description) || !feed.link) {
+ lazy.FeedUtils.log.error(
+ "FeedParser.parseAsRSS1: missing mandatory element " +
+ "<title> and <description>, or <link>"
+ );
+ feed.onParseError(feed);
+ return [];
+ }
+
+ // If we're only interested in the overall feed description, we're done.
+ if (!feed.parseItems) {
+ return [];
+ }
+
+ this.findSyUpdateTags(feed, channel);
+
+ feed.invalidateItems();
+
+ // Now process all the individual items in the feed.
+ let itemNodes = doc.getElementsByTagNameNS(lazy.FeedUtils.RSS_NS, "item");
+ itemNodes = itemNodes ? itemNodes : [];
+
+ for (let itemNode of itemNodes) {
+ let item = new lazy.FeedItem();
+ item.feed = feed;
+
+ // Prefer the value of the link tag to the item URI since the URI could be
+ // a relative URN.
+ let itemURI = itemNode.getAttribute("about") || "";
+ itemURI = this.removeUnprintableASCII(itemURI.trim());
+ let linkNode = this.childByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.RSS_NS,
+ "link"
+ );
+ item.id = this.getNodeValue(linkNode) || itemURI;
+ item.url = this.validLink(item.id);
+
+ let descNode = this.childByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.RSS_NS,
+ "description"
+ );
+ item.description = this.getNodeValueFormatted(descNode);
+
+ let titleNode = this.childByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.RSS_NS,
+ "title"
+ );
+ let subjectNode = this.childByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.DC_NS,
+ "subject"
+ );
+
+ item.title =
+ this.getNodeValue(titleNode) || this.getNodeValue(subjectNode);
+ if (!item.title && item.description) {
+ item.title = this.stripTags(item.description).substr(0, 150);
+ }
+ if (!item.url || !item.title) {
+ lazy.FeedUtils.log.info(
+ "FeedParser.parseAsRSS1: <item> missing mandatory " +
+ "element <item rdf:about> and <link>, or <title> and " +
+ "no <description>; skipping"
+ );
+ continue;
+ }
+
+ // TODO XXX: ignores multiple authors.
+ let authorNode = this.childByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.DC_NS,
+ "creator"
+ );
+ let channelCreatorNode = this.childByTagNameNS(
+ channel,
+ lazy.FeedUtils.DC_NS,
+ "creator"
+ );
+ let author =
+ this.getNodeValue(authorNode) ||
+ this.getNodeValue(channelCreatorNode) ||
+ feed.title;
+ author = this.cleanAuthorName(author);
+ item.author = author ? ["<" + author + ">"] : item.author;
+
+ let dateNode = this.childByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.DC_NS,
+ "date"
+ );
+ item.date = this.getNodeValue(dateNode) || item.date;
+
+ let contentNode = this.childByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.RSS_CONTENT_NS,
+ "encoded"
+ );
+ item.content = this.getNodeValueFormatted(contentNode);
+
+ this.parsedItems.push(item);
+ }
+ lazy.FeedUtils.log.debug(
+ "FeedParser.parseAsRSS1: items parsed - " + this.parsedItems.length
+ );
+
+ return this.parsedItems;
+ },
+
+ // TODO: deprecate ATOM_03_NS.
+ parseAsAtom(aFeed, aDOM) {
+ // Get the first channel (assuming there is only one per Atom File).
+ let channel = aDOM.querySelector("feed");
+ if (!channel) {
+ aFeed.onParseError(aFeed);
+ return [];
+ }
+
+ if (this.isPermanentRedirect(aFeed, null, channel)) {
+ return [];
+ }
+
+ let tags = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_03_NS,
+ "title"
+ );
+ aFeed.title =
+ aFeed.title || this.stripTags(this.getNodeValue(tags ? tags[0] : null));
+ tags = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_03_NS,
+ "tagline"
+ );
+ aFeed.description = this.getNodeValueFormatted(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(channel, lazy.FeedUtils.ATOM_03_NS, "link");
+ aFeed.link = this.validLink(this.findAtomLink("alternate", tags));
+
+ if (!aFeed.title) {
+ lazy.FeedUtils.log.error(
+ "FeedParser.parseAsAtom: missing mandatory element <title>"
+ );
+ aFeed.onParseError(aFeed);
+ return [];
+ }
+
+ if (!aFeed.parseItems) {
+ return [];
+ }
+
+ this.findSyUpdateTags(aFeed, channel);
+
+ aFeed.invalidateItems();
+ let items = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_03_NS,
+ "entry"
+ );
+ items = items ? items : [];
+ lazy.FeedUtils.log.debug(
+ "FeedParser.parseAsAtom: items to parse - " + items.length
+ );
+
+ for (let itemNode of items) {
+ if (!itemNode.childElementCount) {
+ continue;
+ }
+
+ let item = new lazy.FeedItem();
+ item.feed = aFeed;
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_03_NS,
+ "link"
+ );
+ item.url = this.validLink(this.findAtomLink("alternate", tags));
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_03_NS,
+ "id"
+ );
+ item.id = this.getNodeValue(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_03_NS,
+ "summary"
+ );
+ item.description = this.getNodeValueFormatted(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_03_NS,
+ "title"
+ );
+ item.title =
+ this.getNodeValue(tags ? tags[0] : null) ||
+ (item.description ? item.description.substr(0, 150) : null);
+ if (!item.title || !item.id) {
+ // We're lenient about other mandatory tags, but insist on these.
+ lazy.FeedUtils.log.info(
+ "FeedParser.parseAsAtom: <entry> missing mandatory " +
+ "element <id>, or <title> and no <summary>; skipping"
+ );
+ continue;
+ }
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_03_NS,
+ "author"
+ );
+ if (!tags) {
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_03_NS,
+ "contributor"
+ );
+ }
+ if (!tags) {
+ tags = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_03_NS,
+ "author"
+ );
+ }
+
+ let authorEl = tags ? tags[0] : null;
+
+ let author = "";
+ if (authorEl) {
+ tags = this.childrenByTagNameNS(
+ authorEl,
+ lazy.FeedUtils.ATOM_03_NS,
+ "name"
+ );
+ let name = this.getNodeValue(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(
+ authorEl,
+ lazy.FeedUtils.ATOM_03_NS,
+ "email"
+ );
+ let email = this.getNodeValue(tags ? tags[0] : null);
+ if (name) {
+ author = name + (email ? " <" + email + ">" : "");
+ } else if (email) {
+ author = email;
+ }
+ }
+
+ item.author = author || item.author || aFeed.title;
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_03_NS,
+ "modified"
+ );
+ if (!tags || !this.getNodeValue(tags[0])) {
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_03_NS,
+ "issued"
+ );
+ }
+ if (!tags || !this.getNodeValue(tags[0])) {
+ tags = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_03_NS,
+ "created"
+ );
+ }
+
+ item.date = this.getNodeValue(tags ? tags[0] : null) || item.date;
+
+ // XXX We should get the xml:base attribute from the content tag as well
+ // and use it as the base HREF of the message.
+ // XXX Atom feeds can have multiple content elements; we should differentiate
+ // between them and pick the best one.
+ // Some Atom feeds wrap the content in a CTYPE declaration; others use
+ // a namespace to identify the tags as HTML; and a few are buggy and put
+ // HTML tags in without declaring their namespace so they look like Atom.
+ // We deal with the first two but not the third.
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_03_NS,
+ "content"
+ );
+ let contentNode = tags ? tags[0] : null;
+
+ let content;
+ if (contentNode) {
+ content = "";
+ for (let node of contentNode.childNodes) {
+ if (node.nodeType == node.CDATA_SECTION_NODE) {
+ content += node.data;
+ } else {
+ content += this.mSerializer.serializeToString(node);
+ }
+ }
+
+ if (contentNode.getAttribute("mode") == "escaped") {
+ content = content.replace(/&lt;/g, "<");
+ content = content.replace(/&gt;/g, ">");
+ content = content.replace(/&amp;/g, "&");
+ }
+
+ if (content == "") {
+ content = null;
+ }
+ }
+
+ item.content = content;
+ this.parsedItems.push(item);
+ }
+
+ return this.parsedItems;
+ },
+
+ parseAsAtomIETF(aFeed, aDOM) {
+ // Get the first channel (assuming there is only one per Atom File).
+ let channel = this.childrenByTagNameNS(
+ aDOM,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "feed"
+ )[0];
+ if (!channel) {
+ aFeed.onParseError(aFeed);
+ return [];
+ }
+
+ if (this.isPermanentRedirect(aFeed, null, channel)) {
+ return [];
+ }
+
+ let contentBase = channel.getAttribute("xml:base");
+
+ let tags = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "title"
+ );
+ aFeed.title =
+ aFeed.title ||
+ this.stripTags(this.serializeTextConstruct(tags ? tags[0] : null));
+
+ tags = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "subtitle"
+ );
+ aFeed.description = this.serializeTextConstruct(tags ? tags[0] : null);
+
+ // Per spec, aFeed.link and contentBase may both end up null here.
+ tags = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "link"
+ );
+ aFeed.link =
+ this.findAtomLink("self", tags, contentBase) ||
+ this.findAtomLink("alternate", tags, contentBase);
+ aFeed.link = this.validLink(aFeed.link);
+ if (!contentBase) {
+ contentBase = aFeed.link;
+ }
+
+ if (!aFeed.title) {
+ lazy.FeedUtils.log.error(
+ "FeedParser.parseAsAtomIETF: missing mandatory element <title>"
+ );
+ aFeed.onParseError(aFeed);
+ return [];
+ }
+
+ if (!aFeed.parseItems) {
+ return [];
+ }
+
+ this.findSyUpdateTags(aFeed, channel);
+
+ aFeed.invalidateItems();
+ let items = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "entry"
+ );
+ items = items ? items : [];
+ lazy.FeedUtils.log.debug(
+ "FeedParser.parseAsAtomIETF: items to parse - " + items.length
+ );
+
+ for (let itemNode of items) {
+ if (!itemNode.childElementCount) {
+ continue;
+ }
+
+ let item = new lazy.FeedItem();
+ item.feed = aFeed;
+ item.enclosures = [];
+ item.keywords = [];
+
+ contentBase = itemNode.getAttribute("xml:base") || contentBase;
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "source"
+ );
+ let source = tags ? tags[0] : null;
+
+ // Per spec, item.link and contentBase may both end up null here.
+ // If <content> is also not present, then <link rel="alternate"> is MUST
+ // but we're lenient.
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.FEEDBURNER_NS,
+ "origLink"
+ );
+ item.url = this.validLink(this.getNodeValue(tags ? tags[0] : null));
+ if (!item.url) {
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "link"
+ );
+ item.url =
+ this.validLink(this.findAtomLink("alternate", tags, contentBase)) ||
+ aFeed.link;
+ }
+ if (!contentBase) {
+ contentBase = item.url;
+ }
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "id"
+ );
+ item.id = this.getNodeValue(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "summary"
+ );
+ item.description = this.serializeTextConstruct(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "title"
+ );
+ if (!tags || !this.getNodeValue(tags[0])) {
+ tags = this.childrenByTagNameNS(
+ source,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "title"
+ );
+ }
+ item.title = this.stripTags(
+ this.serializeTextConstruct(tags ? tags[0] : null) ||
+ (item.description ? item.description.substr(0, 150) : null)
+ );
+ if (!item.title || !item.id) {
+ // We're lenient about other mandatory tags, but insist on these.
+ lazy.FeedUtils.log.info(
+ "FeedParser.parseAsAtomIETF: <entry> missing mandatory " +
+ "element <id>, or <title> and no <summary>; skipping"
+ );
+ continue;
+ }
+
+ // Support multiple authors.
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "author"
+ );
+ if (!tags) {
+ tags = this.childrenByTagNameNS(
+ source,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "author"
+ );
+ }
+ if (!tags) {
+ tags = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "author"
+ );
+ }
+
+ let authorTags = tags || [];
+ let authors = [];
+ for (let authorTag of authorTags) {
+ let author = "";
+ tags = this.childrenByTagNameNS(
+ authorTag,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "name"
+ );
+ let name = this.getNodeValue(tags ? tags[0] : null);
+ tags = this.childrenByTagNameNS(
+ authorTag,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "email"
+ );
+ let email = this.getNodeValue(tags ? tags[0] : null);
+ if (name) {
+ name = this.cleanAuthorName(name);
+ if (email) {
+ if (!email.match(/^<.*>$/)) {
+ email = " <" + email + ">";
+ }
+ author = name + email;
+ } else {
+ author = "<" + name + ">";
+ }
+ } else if (email) {
+ author = email;
+ }
+
+ if (author) {
+ authors.push(author);
+ }
+ }
+
+ if (authors.length == 0) {
+ tags = this.childrenByTagNameNS(
+ channel,
+ lazy.FeedUtils.DC_NS,
+ "publisher"
+ );
+ let author = this.getNodeValue(tags ? tags[0] : null) || aFeed.title;
+ author = this.cleanAuthorName(author);
+ item.author = author ? ["<" + author + ">"] : item.author;
+ } else {
+ item.author = authors;
+ }
+ lazy.FeedUtils.log.trace(
+ "FeedParser.parseAsAtomIETF: author(s) - " + item.author
+ );
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "updated"
+ );
+ if (!tags || !this.getNodeValue(tags[0])) {
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "published"
+ );
+ }
+ if (!tags || !this.getNodeValue(tags[0])) {
+ tags = this.childrenByTagNameNS(
+ source,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "published"
+ );
+ }
+ item.date = this.getNodeValue(tags ? tags[0] : null) || item.date;
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "content"
+ );
+ item.content = this.serializeTextConstruct(tags ? tags[0] : null);
+
+ // Ensure relative links can be resolved and Content-Base set to an
+ // absolute url for the entry. But it's not mandatory that a url is found
+ // for Content-Base, per spec.
+ if (item.content) {
+ item.xmlContentBase =
+ (tags && tags[0].getAttribute("xml:base")) || contentBase;
+ } else if (item.description) {
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "summary"
+ );
+ item.xmlContentBase =
+ (tags && tags[0].getAttribute("xml:base")) || contentBase;
+ } else {
+ item.xmlContentBase = contentBase;
+ }
+
+ item.xmlContentBase = this.validLink(item.xmlContentBase);
+
+ // Handle <link rel="enclosure"> (if present).
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "link"
+ );
+ let encUrls = [];
+ if (tags) {
+ for (let tag of tags) {
+ let url =
+ tag.getAttribute("rel") == "enclosure"
+ ? (tag.getAttribute("href") || "").trim()
+ : null;
+ url = this.validLink(url);
+ if (url && !encUrls.includes(url)) {
+ let type = this.removeUnprintableASCII(tag.getAttribute("type"));
+ let length = this.removeUnprintableASCII(
+ tag.getAttribute("length")
+ );
+ let title = this.removeUnprintableASCII(tag.getAttribute("title"));
+ item.enclosures.push(
+ new lazy.FeedEnclosure(url, type, length, title)
+ );
+ encUrls.push(url);
+ }
+ }
+ }
+
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.FEEDBURNER_NS,
+ "origEnclosureLink"
+ );
+ let origEncUrl = this.validLink(this.getNodeValue(tags ? tags[0] : null));
+ if (origEncUrl) {
+ if (item.enclosures.length) {
+ item.enclosures[0].mURL = origEncUrl;
+ } else {
+ item.enclosures.push(new lazy.FeedEnclosure(origEncUrl));
+ }
+ }
+
+ // Handle atom threading extension, RFC4685. There may be 1 or more tags,
+ // and each must contain a ref attribute with 1 Message-Id equivalent
+ // value. This is the only attr of interest in the spec for presentation.
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_THREAD_NS,
+ "in-reply-to"
+ );
+ if (tags) {
+ for (let tag of tags) {
+ let ref = this.removeUnprintableASCII(tag.getAttribute("ref"));
+ if (ref) {
+ item.inReplyTo += item.normalizeMessageID(ref) + " ";
+ }
+ }
+ item.inReplyTo = item.inReplyTo.trimRight();
+ }
+
+ // Support <category> and autotagging.
+ tags = this.childrenByTagNameNS(
+ itemNode,
+ lazy.FeedUtils.ATOM_IETF_NS,
+ "category"
+ );
+ if (tags) {
+ for (let tag of tags) {
+ let term = this.removeUnprintableASCII(tag.getAttribute("term"));
+ term = term ? this.xmlUnescape(term.replace(/,/g, ";")).trim() : null;
+ if (term && !item.keywords.includes(term)) {
+ item.keywords.push(term);
+ }
+ }
+ }
+
+ this.parsedItems.push(item);
+ }
+
+ return this.parsedItems;
+ },
+
+ isPermanentRedirect(aFeed, aRedirDocChannel, aFeedChannel) {
+ // If subscribing to a new feed, do not check redirect tags.
+ if (!aFeed.downloadCallback || aFeed.downloadCallback.mSubscribeMode) {
+ return false;
+ }
+
+ let tags, tagName, newUrl;
+ let oldUrl = aFeed.url;
+
+ // Check for RSS2.0 redirect document <newLocation> tag.
+ if (aRedirDocChannel) {
+ tagName = "newLocation";
+ tags = this.childrenByTagNameNS(aRedirDocChannel, "", tagName);
+ newUrl = this.getNodeValue(tags ? tags[0] : null);
+ }
+
+ // Check for <itunes:new-feed-url> tag.
+ if (aFeedChannel) {
+ tagName = "new-feed-url";
+ tags = this.childrenByTagNameNS(
+ aFeedChannel,
+ lazy.FeedUtils.ITUNES_NS,
+ tagName
+ );
+ newUrl = this.getNodeValue(tags ? tags[0] : null);
+ tagName = "itunes:" + tagName;
+ }
+
+ if (
+ newUrl &&
+ newUrl != oldUrl &&
+ lazy.FeedUtils.isValidScheme(newUrl) &&
+ lazy.FeedUtils.changeUrlForFeed(aFeed, newUrl)
+ ) {
+ lazy.FeedUtils.log.info(
+ "FeedParser.isPermanentRedirect: found <" +
+ tagName +
+ "> tag; updated feed url from: " +
+ oldUrl +
+ " to: " +
+ newUrl +
+ " in folder: " +
+ lazy.FeedUtils.getFolderPrettyPath(aFeed.folder)
+ );
+ aFeed.onUrlChange(aFeed, oldUrl);
+ return true;
+ }
+
+ return false;
+ },
+
+ serializeTextConstruct(textElement) {
+ let content = "";
+ if (textElement) {
+ let textType = textElement.getAttribute("type");
+
+ // Atom spec says consider it "text" if not present.
+ if (!textType) {
+ textType = "text";
+ }
+
+ // There could be some strange content type we don't handle.
+ if (textType != "text" && textType != "html" && textType != "xhtml") {
+ return null;
+ }
+
+ for (let node of textElement.childNodes) {
+ if (node.nodeType == node.CDATA_SECTION_NODE) {
+ content += this.xmlEscape(node.data);
+ } else {
+ content += this.mSerializer.serializeToString(node);
+ }
+ }
+
+ if (textType == "html") {
+ content = this.xmlUnescape(content);
+ }
+
+ content = content.trim();
+ }
+
+ // Other parts of the code depend on this being null if there's no content.
+ return content ? content : null;
+ },
+
+ /**
+ * Return a cleaned up author name value.
+ *
+ * @param {string} authorString - A string.
+ * @returns {String} - A clean string value.
+ */
+ cleanAuthorName(authorString) {
+ if (!authorString) {
+ return "";
+ }
+ lazy.FeedUtils.log.trace(
+ "FeedParser.cleanAuthor: author1 - " + authorString
+ );
+ let author = authorString
+ .replace(/[\n\r\t]+/g, " ")
+ .replace(/"/g, '\\"')
+ .trim();
+ // If the name contains special chars, quote it.
+ if (author.match(/[<>@,"]/)) {
+ author = '"' + author + '"';
+ }
+ lazy.FeedUtils.log.trace("FeedParser.cleanAuthor: author2 - " + author);
+
+ return author;
+ },
+
+ /**
+ * Return a cleaned up node value. This is intended for values that are not
+ * multiline and not formatted. A sequence of tab or newline is converted to
+ * a space and unprintable ascii is removed.
+ *
+ * @param {Node} node - A DOM node.
+ * @returns {String} - A clean string value or null.
+ */
+ getNodeValue(node) {
+ let nodeValue = this.getNodeValueRaw(node);
+ if (!nodeValue) {
+ return null;
+ }
+
+ nodeValue = nodeValue.replace(/[\n\r\t]+/g, " ");
+ return this.removeUnprintableASCII(nodeValue);
+ },
+
+ /**
+ * Return a cleaned up formatted node value, meaning CR/LF/TAB are retained
+ * while all other unprintable ascii is removed. This is intended for values
+ * that are multiline and formatted, such as content or description tags.
+ *
+ * @param {Node} node - A DOM node.
+ * @returns {String} - A clean string value or null.
+ */
+ getNodeValueFormatted(node) {
+ let nodeValue = this.getNodeValueRaw(node);
+ if (!nodeValue) {
+ return null;
+ }
+
+ return this.removeUnprintableASCIIexCRLFTAB(nodeValue);
+ },
+
+ /**
+ * Return a raw node value, as received. This should be sanitized as
+ * appropriate.
+ *
+ * @param {Node} node - A DOM node.
+ * @returns {String} - A string value or null.
+ */
+ getNodeValueRaw(node) {
+ if (node && node.textContent) {
+ return node.textContent.trim();
+ }
+
+ if (node && node.firstChild) {
+ let ret = "";
+ for (let child = node.firstChild; child; child = child.nextSibling) {
+ let value = this.getNodeValueRaw(child);
+ if (value) {
+ ret += value;
+ }
+ }
+
+ if (ret) {
+ return ret.trim();
+ }
+ }
+
+ return null;
+ },
+
+ // Finds elements that are direct children of the first arg.
+ childrenByTagNameNS(aElement, aNamespace, aTagName) {
+ if (!aElement) {
+ return null;
+ }
+
+ let matches = aElement.getElementsByTagNameNS(aNamespace, aTagName);
+ let matchingChildren = [];
+ for (let match of matches) {
+ if (match.parentNode == aElement) {
+ matchingChildren.push(match);
+ }
+ }
+
+ return matchingChildren.length ? matchingChildren : null;
+ },
+
+ /**
+ * Returns first matching descendent of element, or null.
+ *
+ * @param {Element} element - DOM element to search.
+ * @param {string} namespace - Namespace of the search tag.
+ * @param {String} tagName - Tag to search for.
+ * @returns {Element|null} - Matching element, or null.
+ */
+ childByTagNameNS(element, namespace, tagName) {
+ if (!element) {
+ return null;
+ }
+ // Handily, item() returns null for out-of-bounds access.
+ return element.getElementsByTagNameNS(namespace, tagName).item(0);
+ },
+
+ /**
+ * Ensure <link> type tags start with http[s]://, ftp:// or magnet:
+ * for values stored in mail headers (content-base and remote enclosures),
+ * particularly to prevent data: uris, javascript, and other spoofing.
+ *
+ * @param {string} link - An intended http url string.
+ * @returns {String} - A clean string starting with http, ftp or magnet,
+ * else null.
+ */
+ validLink(link) {
+ if (/^((https?|ftp):\/\/|magnet:)/.test(link)) {
+ return this.removeUnprintableASCII(link.trim());
+ }
+
+ return null;
+ },
+
+ /**
+ * Return an absolute link for <entry> relative links. If xml:base is
+ * present in a <feed> attribute or child <link> element attribute, use it;
+ * otherwise the Feed.link will be the relevant <feed> child <link> value
+ * and will be the |baseURI| for <entry> child <link>s if there is no further
+ * xml:base, which may be an attribute of any element.
+ *
+ * @param {string} linkRel - the <link> rel attribute value to find.
+ * @param {NodeList} linkElements - the nodelist of <links> to search in.
+ * @param {string} baseURI - the url to use when resolving relative
+ * links to absolute values.
+ * @returns {String} or null - absolute url for a <link>, or null if the
+ * rel type is not found.
+ */
+ findAtomLink(linkRel, linkElements, baseURI) {
+ if (!linkElements) {
+ return null;
+ }
+
+ // XXX Need to check for MIME type and hreflang.
+ for (let alink of linkElements) {
+ if (
+ alink &&
+ // If there's a link rel.
+ ((alink.getAttribute("rel") && alink.getAttribute("rel") == linkRel) ||
+ // If there isn't, assume 'alternate'.
+ (!alink.getAttribute("rel") && linkRel == "alternate")) &&
+ alink.getAttribute("href")
+ ) {
+ // Atom links are interpreted relative to xml:base.
+ let href = alink.getAttribute("href");
+ baseURI = alink.getAttribute("xml:base") || baseURI || href;
+ try {
+ return Services.io.newURI(baseURI).resolve(href);
+ } catch (ex) {}
+ }
+ }
+
+ return null;
+ },
+
+ /**
+ * Find RSS Syndication extension tags.
+ * http://web.resource.org/rss/1.0/modules/syndication/
+ *
+ * @param {Feed} aFeed - the feed object.
+ * @param {Node | String} aChannel - dom node for the <channel>.
+ * @returns {void}
+ */
+ findSyUpdateTags(aFeed, aChannel) {
+ let tag, updatePeriod, updateFrequency, updateBase;
+ tag = this.childrenByTagNameNS(
+ aChannel,
+ lazy.FeedUtils.RSS_SY_NS,
+ "updatePeriod"
+ );
+ updatePeriod = this.getNodeValue(tag ? tag[0] : null) || "";
+ tag = this.childrenByTagNameNS(
+ aChannel,
+ lazy.FeedUtils.RSS_SY_NS,
+ "updateFrequency"
+ );
+ updateFrequency = this.getNodeValue(tag ? tag[0] : null) || "";
+ tag = this.childrenByTagNameNS(
+ aChannel,
+ lazy.FeedUtils.RSS_SY_NS,
+ "updateBase"
+ );
+ updateBase = this.getNodeValue(tag ? tag[0] : null) || "";
+ lazy.FeedUtils.log.debug(
+ "FeedParser.findSyUpdateTags: updatePeriod:updateFrequency - " +
+ updatePeriod +
+ ":" +
+ updateFrequency
+ );
+
+ if (updatePeriod) {
+ if (lazy.FeedUtils.RSS_SY_UNITS.includes(updatePeriod.toLowerCase())) {
+ updatePeriod = updatePeriod.toLowerCase();
+ } else {
+ updatePeriod = "daily";
+ }
+ }
+
+ updateFrequency = isNaN(updateFrequency) ? 1 : updateFrequency;
+
+ let options = aFeed.options;
+ if (
+ options.updates.updatePeriod == updatePeriod &&
+ options.updates.updateFrequency == updateFrequency &&
+ options.updates.updateBase == updateBase
+ ) {
+ return;
+ }
+
+ options.updates.updatePeriod = updatePeriod;
+ options.updates.updateFrequency = updateFrequency;
+ options.updates.updateBase = updateBase;
+ aFeed.options = options;
+ },
+
+ /**
+ * Remove unprintable ascii, particularly CR/LF, for non formatted tag values.
+ *
+ * @param {string} s - String to clean.
+ * @returns {String} - Cleaned string.
+ */
+ removeUnprintableASCII(s) {
+ /* eslint-disable-next-line no-control-regex */
+ return s ? s.replace(/[\x00-\x1F\x7F]+/g, "") : "";
+ },
+
+ /**
+ * Remove unprintable ascii, except CR/LF/TAB, for formatted tag values.
+ *
+ * @param {string} s - String to clean.
+ * @returns {String} - Cleaned string.
+ */
+ removeUnprintableASCIIexCRLFTAB(s) {
+ /* eslint-disable-next-line no-control-regex */
+ return s ? s.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]+/g, "") : "";
+ },
+
+ stripTags(someHTML) {
+ return someHTML ? someHTML.replace(/<[^>]+>/g, "") : someHTML;
+ },
+
+ xmlUnescape(s) {
+ s = s.replace(/&lt;/g, "<");
+ s = s.replace(/&gt;/g, ">");
+ s = s.replace(/&amp;/g, "&");
+ return s;
+ },
+
+ xmlEscape(s) {
+ s = s.replace(/&/g, "&amp;");
+ s = s.replace(/>/g, "&gt;");
+ s = s.replace(/</g, "&lt;");
+ return s;
+ },
+
+ dateRescue(dateString) {
+ // Deal with various kinds of invalid dates.
+ if (!isNaN(parseInt(dateString))) {
+ // It's an integer, so maybe it's a timestamp.
+ let d = new Date(parseInt(dateString) * 1000);
+ let now = new Date();
+ let yeardiff = now.getFullYear() - d.getFullYear();
+ lazy.FeedUtils.log.trace(
+ "FeedParser.dateRescue: Rescue Timestamp date - " +
+ d.toString() +
+ " ,year diff - " +
+ yeardiff
+ );
+ if (yeardiff >= 0 && yeardiff < 3) {
+ // It's quite likely the correct date.
+ return d.toString();
+ }
+ }
+
+ // Could be an ISO8601/W3C date. If not, get the current time.
+ return lazy.FeedUtils.getValidRFC5322Date(dateString);
+ },
+};