From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- comm/mailnews/db/gloda/modules/GlodaContent.jsm | 285 ++++++++++++++++++++++++ 1 file changed, 285 insertions(+) create mode 100644 comm/mailnews/db/gloda/modules/GlodaContent.jsm (limited to 'comm/mailnews/db/gloda/modules/GlodaContent.jsm') diff --git a/comm/mailnews/db/gloda/modules/GlodaContent.jsm b/comm/mailnews/db/gloda/modules/GlodaContent.jsm new file mode 100644 index 0000000000..5f1daf5e9c --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaContent.jsm @@ -0,0 +1,285 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = [ + "GlodaContent", + "whittlerRegistry", + "mimeMsgToContentAndMeta", + "mimeMsgToContentSnippetAndMeta", +]; + +/** + * Given a MimeMsg and the corresponding folder, return the GlodaContent object. + * + * @param aMimeMsg: the MimeMessage instance + * @param folder: the nsIMsgDBFolder + * @returns an array containing the GlodaContent instance, and the meta dictionary + * that the Gloda content providers may have filled with useful data. + */ + +function mimeMsgToContentAndMeta(aMimeMsg, folder) { + let content = new GlodaContent(); + let meta = { subject: aMimeMsg.get("subject") }; + let bodyLines = aMimeMsg.coerceBodyToPlaintext(folder).split(/\r?\n/); + + for (let whittler of whittlerRegistry.getWhittlers()) { + whittler.contentWhittle(meta, bodyLines, content); + } + + return [content, meta]; +} + +/** + * Given a MimeMsg, return the whittled content string, suitable for summarizing + * a message. + * + * @param aMimeMsg: the MimeMessage instance + * @param folder: the nsIMsgDBFolder + * @param length: optional number of characters to trim the whittled content. + * If the actual length of the message is greater than |length|, then the return + * value is the first (length-1) characters with an ellipsis appended. + * @returns an array containing the text of the snippet, and the meta dictionary + * that the Gloda content providers may have filled with useful data. + */ + +function mimeMsgToContentSnippetAndMeta(aMimeMsg, folder, length) { + let [content, meta] = mimeMsgToContentAndMeta(aMimeMsg, folder); + + let text = content.getContentSnippet(length + 1); + if (length && text.length > length) { + text = text.substring(0, length - 1) + "\u2026"; // ellipsis + } + return [text, meta]; +} + +/** + * A registry of gloda providers that have contentWhittle() functions. + * used by mimeMsgToContentSnippet, but populated by the Gloda object as it's + * processing providers. + */ +function WhittlerRegistry() { + this._whittlers = []; +} + +WhittlerRegistry.prototype = { + /** + * Add a provider as a content whittler. + */ + registerWhittler(provider) { + this._whittlers.push(provider); + }, + /** + * get the list of content whittlers, sorted from the most specific to + * the most generic + */ + getWhittlers() { + // Use the concat() trick to avoid mutating the internal object and + // leaking an internal representation. + return this._whittlers.concat().reverse(); + }, +}; + +const whittlerRegistry = new WhittlerRegistry(); + +function GlodaContent() { + this._contentPriority = null; + this._producing = false; + this._hunks = []; +} + +GlodaContent.prototype = { + kPriorityBase: 0, + kPriorityPerfect: 100, + + kHunkMeta: 1, + kHunkQuoted: 2, + kHunkContent: 3, + + _resetContent() { + this._keysAndValues = []; + this._keysAndDeltaValues = []; + this._hunks = []; + this._curHunk = null; + }, + + /* ===== Consumer API ===== */ + hasContent() { + return this._contentPriority != null; + }, + + /** + * Return content suitable for snippet display. This means that no quoting + * or meta-data should be returned. + * + * @param aMaxLength The maximum snippet length desired. + */ + getContentSnippet(aMaxLength) { + let content = this.getContentString(); + if (aMaxLength) { + content = content.substring(0, aMaxLength); + } + return content; + }, + + getContentString(aIndexingPurposes) { + let data = ""; + for (let hunk of this._hunks) { + if (hunk.hunkType == this.kHunkContent) { + if (data) { + data += "\n" + hunk.data; + } else { + data = hunk.data; + } + } + } + + if (aIndexingPurposes) { + // append the values for indexing. we assume the keywords are cruft. + // this may be crazy, but things that aren't a science aren't an exact + // science. + for (let kv of this._keysAndValues) { + data += "\n" + kv[1]; + } + for (let kon of this._keysAndValues) { + data += "\n" + kon[1] + "\n" + kon[2]; + } + } + + return data; + }, + + /* ===== Producer API ===== */ + /** + * Called by a producer with the priority they believe their interpretation + * of the content comes in at. + * + * @returns true if we believe the producer's interpretation will be + * interesting and they should go ahead and generate events. We return + * false if we don't think they are interesting, in which case they should + * probably not issue calls to us, although we don't care. (We will + * ignore their calls if we return false, this allows the simplification + * of code that needs to run anyways.) + */ + volunteerContent(aPriority) { + if (this._contentPriority === null || this._contentPriority < aPriority) { + this._contentPriority = aPriority; + this._resetContent(); + this._producing = true; + return true; + } + this._producing = false; + return false; + }, + + keyValue(aKey, aValue) { + if (!this._producing) { + return; + } + + this._keysAndValues.push([aKey, aValue]); + }, + keyValueDelta(aKey, aOldValue, aNewValue) { + if (!this._producing) { + return; + } + + this._keysAndDeltaValues.push([aKey, aOldValue, aNewValue]); + }, + + /** + * Meta lines are lines that have to do with the content but are not the + * content and can generally be related to an attribute that has been derived + * and stored on the item. + * For example, a bugzilla bug may note that an attachment was created; this + * is not content and wouldn't be desired in a snippet, but is still + * potentially interesting meta-data. + * + * @param aLineOrLines The line or list of lines that are meta-data. + * @param aAttr The attribute this meta-data is associated with. + * @param aIndex If the attribute is non-singular, indicate the specific + * index of the item in the attribute's bound list that the meta-data + * is associated with. + */ + meta(aLineOrLines, aAttr, aIndex) { + if (!this._producing) { + return; + } + + let data; + if (typeof aLineOrLines == "string") { + data = aLineOrLines; + } else { + data = aLineOrLines.join("\n"); + } + + this._curHunk = { + hunkType: this.kHunkMeta, + attr: aAttr, + index: aIndex, + data, + }; + this._hunks.push(this._curHunk); + }, + /** + * Quoted lines reference previous messages or what not. + * + * @param aLineOrLiens The line or list of lines that are quoted. + * @param aDepth The depth of the quoting. + * @param aOrigin The item that originated the original content, if known. + * For example, perhaps a GlodaMessage? + * @param aTarget A reference to the location in the original content, if + * known. For example, the index of a line in a message or something? + */ + quoted(aLineOrLines, aDepth, aOrigin, aTarget) { + if (!this._producing) { + return; + } + + let data; + if (typeof aLineOrLines == "string") { + data = aLineOrLines; + } else { + data = aLineOrLines.join("\n"); + } + + if ( + !this._curHunk || + this._curHunk.hunkType != this.kHunkQuoted || + this._curHunk.depth != aDepth || + this._curHunk.origin != aOrigin || + this._curHunk.target != aTarget + ) { + this._curHunk = { + hunkType: this.kHunkQuoted, + data, + depth: aDepth, + origin: aOrigin, + target: aTarget, + }; + this._hunks.push(this._curHunk); + } else { + this._curHunk.data += "\n" + data; + } + }, + + content(aLineOrLines) { + if (!this._producing) { + return; + } + + let data; + if (typeof aLineOrLines == "string") { + data = aLineOrLines; + } else { + data = aLineOrLines.join("\n"); + } + + if (!this._curHunk || this._curHunk.hunkType != this.kHunkContent) { + this._curHunk = { hunkType: this.kHunkContent, data }; + this._hunks.push(this._curHunk); + } else { + this._curHunk.data += "\n" + data; + } + }, +}; -- cgit v1.2.3