diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/db/gloda/modules | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'comm/mailnews/db/gloda/modules')
25 files changed, 19608 insertions, 0 deletions
diff --git a/comm/mailnews/db/gloda/modules/Collection.jsm b/comm/mailnews/db/gloda/modules/Collection.jsm new file mode 100644 index 0000000000..e229161fc9 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/Collection.jsm @@ -0,0 +1,834 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["GlodaCollection", "GlodaCollectionManager"]; + +var LOG = console.createInstance({ + prefix: "gloda.collection", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", +}); + +/** + * @namespace Central registry and logic for all collections. + * + * The collection manager is a singleton that has the following tasks: + * - Let views of objects (nouns) know when their objects have changed. For + * example, an attribute has changed due to user action. + * - Let views of objects based on queries know when new objects match their + * query, or when their existing objects no longer match due to changes. + * - Caching/object-identity maintenance. It is ideal if we only ever have + * one instance of an object at a time. (More specifically, only one instance + * per database row 'id'.) The collection mechanism lets us find existing + * instances to this end. Caching can be directly integrated by being treated + * as a special collection. + */ +var GlodaCollectionManager = { + _collectionsByNoun: {}, + _cachesByNoun: {}, + + /** + * Registers the existence of a collection with the collection manager. This + * is done using a weak reference so that the collection can go away if it + * wants to. + */ + registerCollection(aCollection) { + let collections; + let nounID = aCollection.query._nounDef.id; + if (!(nounID in this._collectionsByNoun)) { + collections = this._collectionsByNoun[nounID] = []; + } else { + // purge dead weak references while we're at it + collections = this._collectionsByNoun[nounID].filter(aRef => aRef.get()); + this._collectionsByNoun[nounID] = collections; + } + collections.push(Cu.getWeakReference(aCollection)); + }, + + getCollectionsForNounID(aNounID) { + if (!(aNounID in this._collectionsByNoun)) { + return []; + } + + // generator would be nice, but I suspect get() is too expensive to use + // twice (guard/predicate and value) + let weakCollections = this._collectionsByNoun[aNounID]; + let collections = []; + for (let iColl = 0; iColl < weakCollections.length; iColl++) { + let collection = weakCollections[iColl].get(); + if (collection) { + collections.push(collection); + } + } + return collections; + }, + + defineCache(aNounDef, aCacheSize) { + this._cachesByNoun[aNounDef.id] = new GlodaLRUCacheCollection( + aNounDef, + aCacheSize + ); + }, + + /** + * Attempt to locate an instance of the object of the given noun type with the + * given id. Counts as a cache hit if found. (And if it wasn't in a cache, + * but rather a collection, it is added to the cache.) + */ + cacheLookupOne(aNounID, aID, aDoCache) { + let cache = this._cachesByNoun[aNounID]; + + if (cache) { + if (aID in cache._idMap) { + let item = cache._idMap[aID]; + return cache.hit(item); + } + } + + if (aDoCache === false) { + cache = null; + } + + for (let collection of this.getCollectionsForNounID(aNounID)) { + if (aID in collection._idMap) { + let item = collection._idMap[aID]; + if (cache) { + cache.add([item]); + } + return item; + } + } + + LOG.debug("cacheLookupOne:\nhit null"); + return null; + }, + + /** + * Lookup multiple nouns by ID from the cache/existing collections. + * + * @param aNounID The kind of noun identified by its ID. + * @param aIDMap A dictionary/map whose keys must be gloda noun ids for the + * given noun type and whose values are ignored. + * @param aTargetMap An object to hold the noun id's (key) and noun instances + * (value) for the noun instances that were found available in memory + * because they were cached or in existing query collections. + * @param [aDoCache=true] Should we add any items to the cache that we found + * in collections that were in memory but not in the cache? You would + * likely want to pass false if you are only updating in-memory + * representations rather than performing a new query. + * + * @returns [The number that were found, the number that were not found, + * a dictionary whose keys are the ids of noun instances that + * were not found.] + */ + cacheLookupMany(aNounID, aIDMap, aTargetMap, aDoCache) { + let foundCount = 0, + notFoundCount = 0, + notFound = {}; + + let cache = this._cachesByNoun[aNounID]; + + if (cache) { + for (let key in aIDMap) { + let cacheValue = cache._idMap[key]; + if (cacheValue === undefined) { + notFoundCount++; + notFound[key] = null; + } else { + foundCount++; + aTargetMap[key] = cacheValue; + cache.hit(cacheValue); + } + } + } + + if (aDoCache === false) { + cache = null; + } + + for (let collection of this.getCollectionsForNounID(aNounID)) { + for (let key in notFound) { + let collValue = collection._idMap[key]; + if (collValue !== undefined) { + aTargetMap[key] = collValue; + delete notFound[key]; + foundCount++; + notFoundCount--; + if (cache) { + cache.add([collValue]); + } + } + } + } + + return [foundCount, notFoundCount, notFound]; + }, + + /** + * Friendlier version of |cacheLookupMany|; takes a list of ids and returns + * an object whose keys and values are the gloda id's and instances of the + * instances that were found. We don't tell you who we didn't find. The + * assumption is this is being used for in-memory updates where we only need + * to tweak what is in memory. + */ + cacheLookupManyList(aNounID, aIds) { + let checkMap = {}, + targetMap = {}; + for (let id of aIds) { + checkMap[id] = null; + } + // do not promote found items into the cache + this.cacheLookupMany(aNounID, checkMap, targetMap, false); + return targetMap; + }, + + /** + * Attempt to locate an instance of the object of the given noun type with the + * given id. Counts as a cache hit if found. (And if it wasn't in a cache, + * but rather a collection, it is added to the cache.) + */ + cacheLookupOneByUniqueValue(aNounID, aUniqueValue, aDoCache) { + let cache = this._cachesByNoun[aNounID]; + + if (cache) { + if (aUniqueValue in cache._uniqueValueMap) { + let item = cache._uniqueValueMap[aUniqueValue]; + return cache.hit(item); + } + } + + if (aDoCache === false) { + cache = null; + } + + for (let collection of this.getCollectionsForNounID(aNounID)) { + if (aUniqueValue in collection._uniqueValueMap) { + let item = collection._uniqueValueMap[aUniqueValue]; + if (cache) { + cache.add([item]); + } + return item; + } + } + + return null; + }, + + /** + * Checks whether the provided item with the given id is actually a duplicate + * of an instance that already exists in the cache/a collection. If it is, + * the pre-existing instance is returned and counts as a cache hit. If it + * is not, the passed-in instance is added to the cache and returned. + */ + cacheLoadUnifyOne(aItem) { + let items = [aItem]; + this.cacheLoadUnify(aItem.NOUN_ID, items); + return items[0]; + }, + + /** + * Given a list of items, check if any of them already have duplicate, + * canonical, instances in the cache or collections. Items with pre-existing + * instances are replaced by those instances in the provided list, and each + * counts as a cache hit. Items without pre-existing instances are added + * to the cache and left intact. + */ + cacheLoadUnify(aNounID, aItems, aCacheIfMissing) { + let cache = this._cachesByNoun[aNounID]; + if (aCacheIfMissing === undefined) { + aCacheIfMissing = true; + } + + // track the items we haven't yet found in a cache/collection (value) and + // their index in aItems (key). We're somewhat abusing the dictionary + // metaphor with the intent of storing tuples here. We also do it because + // it allows random-access deletion theoretically without cost. (Since + // we delete during iteration, that may be wrong, but it sounds like the + // semantics still work?) + let unresolvedIndexToItem = {}; + let numUnresolved = 0; + + if (cache) { + for (let iItem = 0; iItem < aItems.length; iItem++) { + let item = aItems[iItem]; + + if (item.id in cache._idMap) { + let realItem = cache._idMap[item.id]; + // update the caller's array with the reference to the 'real' item + aItems[iItem] = realItem; + cache.hit(realItem); + } else { + unresolvedIndexToItem[iItem] = item; + numUnresolved++; + } + } + + // we're done if everyone was a hit. + if (numUnresolved == 0) { + return; + } + } else { + for (let iItem = 0; iItem < aItems.length; iItem++) { + unresolvedIndexToItem[iItem] = aItems[iItem]; + } + numUnresolved = aItems.length; + } + + let needToCache = []; + // next, let's fall back to our collections + for (let collection of this.getCollectionsForNounID(aNounID)) { + for (let [iItem, item] of Object.entries(unresolvedIndexToItem)) { + if (item.id in collection._idMap) { + let realItem = collection._idMap[item.id]; + // update the caller's array to now have the 'real' object + aItems[iItem] = realItem; + // flag that we need to cache this guy (we use an inclusive cache) + needToCache.push(realItem); + // we no longer need to resolve this item... + delete unresolvedIndexToItem[iItem]; + // stop checking collections if we got everybody + if (--numUnresolved == 0) { + break; + } + } + } + } + + // anything left in unresolvedIndexToItem should be added to the cache + // unless !aCacheIfMissing. plus, we already have 'needToCache' + if (cache && aCacheIfMissing) { + cache.add( + needToCache.concat( + Object.keys(unresolvedIndexToItem).map( + key => unresolvedIndexToItem[key] + ) + ) + ); + } + }, + + cacheCommitDirty() { + for (let id in this._cachesByNoun) { + let cache = this._cachesByNoun[id]; + cache.commitDirty(); + } + }, + + /** + * Notifies the collection manager that an item has been loaded and should + * be cached, assuming caching is active. + */ + itemLoaded(aItem) { + let cache = this._cachesByNoun[aItem.NOUN_ID]; + if (cache) { + cache.add([aItem]); + } + }, + + /** + * Notifies the collection manager that multiple items has been loaded and + * should be cached, assuming caching is active. + */ + itemsLoaded(aNounID, aItems) { + let cache = this._cachesByNoun[aNounID]; + if (cache) { + cache.add(aItems); + } + }, + + /** + * This should be called when items are added to the global database. This + * should generally mean during indexing by indexers or an attribute + * provider. + * We walk all existing collections for the given noun type and add the items + * to the collection if the item meets the query that defines the collection. + */ + itemsAdded(aNounID, aItems) { + let cache = this._cachesByNoun[aNounID]; + if (cache) { + cache.add(aItems); + } + + for (let collection of this.getCollectionsForNounID(aNounID)) { + let addItems = aItems.filter(item => collection.query.test(item)); + if (addItems.length) { + collection._onItemsAdded(addItems); + } + } + }, + /** + * This should be called when items in the global database are modified. For + * example, as a result of indexing. This should generally only be called + * by indexers or by attribute providers. + * We walk all existing collections for the given noun type. For items + * currently included in each collection but should no longer be (per the + * collection's defining query) we generate onItemsRemoved events. For items + * not currently included in the collection but should now be, we generate + * onItemsAdded events. For items included that still match the query, we + * generate onItemsModified events. + */ + itemsModified(aNounID, aItems) { + for (let collection of this.getCollectionsForNounID(aNounID)) { + let added = [], + modified = [], + removed = []; + for (let item of aItems) { + if (item.id in collection._idMap) { + // currently in... but should it still be there? + if (collection.query.test(item)) { + modified.push(item); // yes, keep it + } else if (!collection.query.frozen) { + // oy, so null queries really don't want any notifications, and they + // sorta fit into our existing model, except for the removal bit. + // so we need a specialized check for them, and we're using the + // frozen attribute to this end. + removed.push(item); // no, bin it + } + } else if (collection.query.test(item)) { + // not in, should it be? + added.push(item); // yep, add it + } + } + if (added.length) { + collection._onItemsAdded(added); + } + if (modified.length) { + collection._onItemsModified(modified); + } + if (removed.length) { + collection._onItemsRemoved(removed); + } + } + }, + /** + * This should be called when items in the global database are permanently-ish + * deleted. (This is distinct from concepts like message deletion which may + * involved trash folders or other modified forms of existence. Deleted + * means the data is gone and if it were to come back, it would come back + * via an itemsAdded event.) + * We walk all existing collections for the given noun type. For items + * currently in the collection, we generate onItemsRemoved events. + * + * @param aItemIds A list of item ids that are being deleted. + */ + itemsDeleted(aNounID, aItemIds) { + // cache + let cache = this._cachesByNoun[aNounID]; + if (cache) { + for (let itemId of aItemIds) { + if (itemId in cache._idMap) { + cache.deleted(cache._idMap[itemId]); + } + } + } + + // collections + for (let collection of this.getCollectionsForNounID(aNounID)) { + let removeItems = aItemIds + .filter(itemId => itemId in collection._idMap) + .map(itemId => collection._idMap[itemId]); + if (removeItems.length) { + collection._onItemsRemoved(removeItems); + } + } + }, + /** + * Like |itemsDeleted| but for the case where the deletion is based on an + * attribute that SQLite can more efficiently check than we can and where the + * cost of scanning the in-memory items is presumably much cheaper than + * trying to figure out what actually got deleted. + * + * Since we are doing an in-memory walk, this is obviously O(n) where n is the + * number of noun instances of a given type in-memory. We are assuming this + * is a reasonable number of things and that this type of deletion call is + * not going to happen all that frequently. If these assumptions are wrong, + * callers are advised to re-think the whole situation. + * + * @param aNounID Type of noun we are talking about here. + * @param aFilter A filter function that returns true when the item should be + * thought of as deleted, or false if the item is still good. Screw this + * up and you will get some seriously wacky bugs, yo. + */ + itemsDeletedByAttribute(aNounID, aFilter) { + // cache + let cache = this._cachesByNoun[aNounID]; + if (cache) { + for (let id in cache._idMap) { + let item = cache._idMap[id]; + if (aFilter(item)) { + cache.deleted(item); + } + } + } + + // collections + for (let collection of this.getCollectionsForNounID(aNounID)) { + let removeItems = collection.items.filter(aFilter); + if (removeItems.length) { + collection._onItemsRemoved(removeItems); + } + } + }, +}; + +/** + * @class A current view of the set of first-class nouns meeting a given query. + * Assuming a listener is present, events are + * generated when new objects meet the query, existing objects no longer meet + * the query, or existing objects have experienced a change in attributes that + * does not affect their ability to be present (but the listener may care about + * because it is exposing those attributes). + * @class + */ +function GlodaCollection( + aNounDef, + aItems, + aQuery, + aListener, + aMasterCollection +) { + // if aNounDef is null, we are just being invoked for subclassing + if (aNounDef === undefined) { + return; + } + + this._nounDef = aNounDef; + // should we also maintain a unique value mapping... + if (this._nounDef.usesUniqueValue) { + this._uniqueValueMap = {}; + } + + this.pendingItems = []; + this._pendingIdMap = {}; + this.items = []; + this._idMap = {}; + + // force the listener to null for our call to _onItemsAdded; no events for + // the initial load-out. + this._listener = null; + if (aItems && aItems.length) { + this._onItemsAdded(aItems); + } + + this.query = aQuery || null; + if (this.query) { + this.query.collection = this; + if (this.query.options.stashColumns) { + this.stashedColumns = {}; + } + } + this._listener = aListener || null; + + this.deferredCount = 0; + this.resolvedCount = 0; + + if (aMasterCollection) { + this.masterCollection = aMasterCollection.masterCollection; + } else { + this.masterCollection = this; + /** a dictionary of dictionaries. at the top level, the keys are noun IDs. + * each of these sub-dictionaries maps the IDs of desired noun instances to + * the actual instance, or null if it has not yet been loaded. + */ + this.referencesByNounID = {}; + /** + * a dictionary of dictionaries. at the top level, the keys are noun IDs. + * each of the sub-dictionaries maps the IDs of the _recognized parent + * noun_ to the list of children, or null if the list has not yet been + * populated. + * + * So if we have a noun definition A with ID 1 who is the recognized parent + * noun of noun definition B with ID 2, AND we have an instance A(1) with + * two children B(10), B(11), then an example might be: {2: {1: [10, 11]}}. + */ + this.inverseReferencesByNounID = {}; + this.subCollections = {}; + } +} + +GlodaCollection.prototype = { + get listener() { + return this._listener; + }, + set listener(aListener) { + this._listener = aListener; + }, + + /** + * If this collection still has a query associated with it, drop the query + * and replace it with an 'explicit query'. This means that the Collection + * Manager will not attempt to match new items indexed to the system against + * our query criteria. + * Once you call this method, your collection's listener will no longer + * receive onItemsAdded notifications that are not the result of your + * initial database query. It will, however, receive onItemsModified + * notifications if items in the collection are re-indexed. + */ + becomeExplicit() { + if (!(this.query instanceof this._nounDef.explicitQueryClass)) { + this.query = new this._nounDef.explicitQueryClass(this); + } + }, + + /** + * Clear the contents of this collection. This only makes sense for explicit + * collections or wildcard collections. (Actual query-based collections + * should represent the state of the query, so unless we're going to delete + * all the items, clearing the collection would violate that constraint.) + */ + clear() { + this._idMap = {}; + if (this._uniqueValueMap) { + this._uniqueValueMap = {}; + } + this.items = []; + }, + + _onItemsAdded(aItems) { + this.items.push.apply(this.items, aItems); + if (this._uniqueValueMap) { + for (let item of this.items) { + this._idMap[item.id] = item; + this._uniqueValueMap[item.uniqueValue] = item; + } + } else { + for (let item of this.items) { + this._idMap[item.id] = item; + } + } + if (this._listener) { + try { + this._listener.onItemsAdded(aItems, this); + } catch (ex) { + LOG.error( + "caught exception from listener in onItemsAdded: " + + ex.fileName + + ":" + + ex.lineNumber + + ": " + + ex + ); + } + } + }, + + _onItemsModified(aItems) { + if (this._listener) { + try { + this._listener.onItemsModified(aItems, this); + } catch (ex) { + LOG.error( + "caught exception from listener in onItemsModified: " + + ex.fileName + + ":" + + ex.lineNumber + + ": " + + ex + ); + } + } + }, + + /** + * Given a list of items that definitely no longer belong in this collection, + * remove them from the collection and notify the listener. The 'tricky' + * part is that we need to remove the deleted items from our list of items. + */ + _onItemsRemoved(aItems) { + // we want to avoid the O(n^2) deletion performance case, and deletion + // should be rare enough that the extra cost of building the deletion map + // should never be a real problem. + let deleteMap = {}; + // build the delete map while also nuking from our id map/unique value map + for (let item of aItems) { + deleteMap[item.id] = true; + delete this._idMap[item.id]; + if (this._uniqueValueMap) { + delete this._uniqueValueMap[item.uniqueValue]; + } + } + let items = this.items; + // in-place filter. probably needless optimization. + let iWrite = 0; + for (let iRead = 0; iRead < items.length; iRead++) { + let item = items[iRead]; + if (!(item.id in deleteMap)) { + items[iWrite++] = item; + } + } + items.splice(iWrite); + + if (this._listener) { + try { + this._listener.onItemsRemoved(aItems, this); + } catch (ex) { + LOG.error( + "caught exception from listener in onItemsRemoved: " + + ex.fileName + + ":" + + ex.lineNumber + + ": " + + ex + ); + } + } + }, + + _onQueryCompleted() { + this.query.completed = true; + if (this._listener && this._listener.onQueryCompleted) { + this._listener.onQueryCompleted(this); + } + }, +}; + +/** + * Create an LRU cache collection for the given noun with the given size. + * + * @class + */ +function GlodaLRUCacheCollection(aNounDef, aCacheSize) { + GlodaCollection.call(this, aNounDef, null, null, null); + + this._head = null; // aka oldest! + this._tail = null; // aka newest! + this._size = 0; + // let's keep things sane, and simplify our logic a little... + if (aCacheSize < 32) { + aCacheSize = 32; + } + this._maxCacheSize = aCacheSize; +} +/** + * @class A LRU-discard cache. We use a doubly linked-list for the eviction + * tracking. Since we require that there is at most one LRU-discard cache per + * noun class, we simplify our lives by adding our own attributes to the + * cached objects. + * @augments GlodaCollection + */ +GlodaLRUCacheCollection.prototype = new GlodaCollection(); +GlodaLRUCacheCollection.prototype.add = function (aItems) { + for (let item of aItems) { + if (item.id in this._idMap) { + // DEBUGME so, we're dealing with this, but it shouldn't happen. need + // trace-debuggage. + continue; + } + this._idMap[item.id] = item; + if (this._uniqueValueMap) { + this._uniqueValueMap[item.uniqueValue] = item; + } + + item._lruPrev = this._tail; + // we do have to make sure that we will set _head the first time we insert + // something + if (this._tail !== null) { + this._tail._lruNext = item; + } else { + this._head = item; + } + item._lruNext = null; + this._tail = item; + + this._size++; + } + + while (this._size > this._maxCacheSize) { + let item = this._head; + + // we never have to deal with the possibility of needing to make _head/_tail + // null. + this._head = item._lruNext; + this._head._lruPrev = null; + // (because we are nice, we will delete the properties...) + delete item._lruNext; + delete item._lruPrev; + + // nuke from our id map + delete this._idMap[item.id]; + if (this._uniqueValueMap) { + delete this._uniqueValueMap[item.uniqueValue]; + } + + // flush dirty items to disk (they may not have this attribute, in which + // case, this returns false, which is fine.) + if (item.dirty) { + this._nounDef.objUpdate.call(this._nounDef.datastore, item); + delete item.dirty; + } + + this._size--; + } +}; + +GlodaLRUCacheCollection.prototype.hit = function (aItem) { + // don't do anything in the 0 or 1 items case, or if we're already + // the last item + if (this._head === this._tail || this._tail === aItem) { + return aItem; + } + + // - unlink the item + if (aItem._lruPrev !== null) { + aItem._lruPrev._lruNext = aItem._lruNext; + } else { + this._head = aItem._lruNext; + } + // (_lruNext cannot be null) + aItem._lruNext._lruPrev = aItem._lruPrev; + // - link it in to the end + this._tail._lruNext = aItem; + aItem._lruPrev = this._tail; + aItem._lruNext = null; + // update tail tracking + this._tail = aItem; + + return aItem; +}; + +GlodaLRUCacheCollection.prototype.deleted = function (aItem) { + // unlink the item + if (aItem._lruPrev !== null) { + aItem._lruPrev._lruNext = aItem._lruNext; + } else { + this._head = aItem._lruNext; + } + if (aItem._lruNext !== null) { + aItem._lruNext._lruPrev = aItem._lruPrev; + } else { + this._tail = aItem._lruPrev; + } + + // (because we are nice, we will delete the properties...) + delete aItem._lruNext; + delete aItem._lruPrev; + + // nuke from our id map + delete this._idMap[aItem.id]; + if (this._uniqueValueMap) { + delete this._uniqueValueMap[aItem.uniqueValue]; + } + + this._size--; +}; + +/** + * If any of the cached items are dirty, commit them, and make them no longer + * dirty. + */ +GlodaLRUCacheCollection.prototype.commitDirty = function () { + // we can only do this if there is an update method available... + if (!this._nounDef.objUpdate) { + return; + } + + for (let iItem in this._idMap) { + let item = this._idMap[iItem]; + if (item.dirty) { + LOG.debug("flushing dirty: " + item); + this._nounDef.objUpdate.call(this._nounDef.datastore, item); + delete item.dirty; + } + } +}; diff --git a/comm/mailnews/db/gloda/modules/Everybody.jsm b/comm/mailnews/db/gloda/modules/Everybody.jsm new file mode 100644 index 0000000000..4f33134ef9 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/Everybody.jsm @@ -0,0 +1,23 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = []; + +const { GlodaFundAttr } = ChromeUtils.import( + "resource:///modules/gloda/GlodaFundAttr.jsm" +); +GlodaFundAttr.init(); +const { GlodaExplicitAttr } = ChromeUtils.import( + "resource:///modules/gloda/GlodaExplicitAttr.jsm" +); +GlodaExplicitAttr.init(); + +ChromeUtils.import("resource:///modules/gloda/NounTag.jsm"); +ChromeUtils.import("resource:///modules/gloda/NounFreetag.jsm"); +ChromeUtils.import("resource:///modules/gloda/NounMimetype.jsm"); +ChromeUtils.import("resource:///modules/gloda/IndexMsg.jsm"); +const { GlodaABAttrs } = ChromeUtils.import( + "resource:///modules/gloda/GlodaMsgIndexer.jsm" +); +GlodaABAttrs.init(); diff --git a/comm/mailnews/db/gloda/modules/Facet.jsm b/comm/mailnews/db/gloda/modules/Facet.jsm new file mode 100644 index 0000000000..96425b8838 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/Facet.jsm @@ -0,0 +1,599 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file provides faceting logic. + */ + +var EXPORTED_SYMBOLS = ["FacetDriver", "FacetUtils"]; + +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); + +const lazy = {}; +ChromeUtils.defineModuleGetter( + lazy, + "Gloda", + "resource:///modules/gloda/GlodaPublic.jsm" +); + +/** + * Decides the appropriate faceters for the noun type and drives the faceting + * process. This class and the faceters are intended to be reusable so that + * you only need one instance per faceting session. (Although each faceting + * pass is accordingly destructive to previous results.) + * + * Our strategy for faceting is to process one attribute at a time across all + * the items in the provided set. The alternative would be to iterate over + * the items and then iterate over the attributes on each item. While both + * approaches have caching downsides + */ +function FacetDriver(aNounDef, aWindow) { + this.nounDef = aNounDef; + this._window = aWindow; + + this._makeFaceters(); +} +FacetDriver.prototype = { + /** + * Populate |this.faceters| with a set of faceters appropriate to the noun + * definition associated with this instance. + */ + _makeFaceters() { + let faceters = (this.faceters = []); + + function makeFaceter(aAttrDef, aFacetDef) { + let facetType = aFacetDef.type; + + if (aAttrDef.singular) { + if (facetType == "date") { + faceters.push(new DateFaceter(aAttrDef, aFacetDef)); + } else { + faceters.push(new DiscreteFaceter(aAttrDef, aFacetDef)); + } + } else if (facetType == "nonempty?") { + faceters.push(new NonEmptySetFaceter(aAttrDef, aFacetDef)); + } else { + faceters.push(new DiscreteSetFaceter(aAttrDef, aFacetDef)); + } + } + + for (let key in this.nounDef.attribsByBoundName) { + let attrDef = this.nounDef.attribsByBoundName[key]; + // ignore attributes that do not want to be faceted + if (!attrDef.facet) { + continue; + } + + makeFaceter(attrDef, attrDef.facet); + + if ("extraFacets" in attrDef) { + for (let facetDef of attrDef.extraFacets) { + makeFaceter(attrDef, facetDef); + } + } + } + }, + /** + * Asynchronously facet the provided items, calling the provided callback when + * completed. + */ + go(aItems, aCallback, aCallbackThis) { + this.items = aItems; + this.callback = aCallback; + this.callbackThis = aCallbackThis; + + this._nextFaceter = 0; + this._drive(); + }, + + _MAX_FACETING_TIMESLICE_MS: 100, + _FACETING_YIELD_DURATION_MS: 0, + _driveWrapper(aThis) { + aThis._drive(); + }, + _drive() { + let start = Date.now(); + + while (this._nextFaceter < this.faceters.length) { + let faceter = this.faceters[this._nextFaceter++]; + // for now we facet in one go, but the long-term plan allows for them to + // be generators. + faceter.facetItems(this.items); + + let delta = Date.now() - start; + if (delta > this._MAX_FACETING_TIMESLICE_MS) { + this._window.setTimeout( + this._driveWrapper, + this._FACETING_YIELD_DURATION_MS, + this + ); + return; + } + } + + // we only get here once we are done with the faceters + this.callback.call(this.callbackThis); + }, +}; + +var FacetUtils = { + _groupSizeComparator(a, b) { + return b[1].length - a[1].length; + }, + + /** + * Given a list where each entry is a tuple of [group object, list of items + * belonging to that group], produce a new list of the top grouped items. We + * used to also produce an "other" aggregation, but that turned out to be + * conceptually difficult to deal with, so that's gone, leaving this method + * with much less to do. + * + * @param aAttrDef The attribute for the facet we are working with. + * @param aGroups The list of groups built for the facet. + * @param aMaxCount The number of result rows you want back. + */ + makeTopGroups(aAttrDef, aGroups, aMaxCount) { + let nounDef = aAttrDef.objectNounDef; + let realGroupsToUse = aMaxCount; + + let orderedBySize = aGroups.concat(); + orderedBySize.sort(this._groupSizeComparator); + + // - get the real groups to use and order them by the attribute comparator + let outGroups = orderedBySize.slice(0, realGroupsToUse); + let comparator = nounDef.comparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + outGroups.sort(comparatorHelper); + + return outGroups; + }, +}; + +/** + * Facet discrete things like message authors, boolean values, etc. Only + * appropriate for use on singular values. Use |DiscreteSetFaceter| for + * non-singular values. + */ +function DiscreteFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +DiscreteFaceter.prototype = { + type: "discrete", + /** + * Facet the given set of items, deferring to the appropriate helper method + */ + facetItems(aItems) { + if (this.attrDef.objectNounDef.isPrimitive) { + return this.facetPrimitiveItems(aItems); + } + return this.facetComplexItems(aItems); + }, + /** + * Facet an attribute whose value is primitive, meaning that it is a raw + * numeric value or string, rather than a complex object. + */ + facetPrimitiveItems(aItems) { + let attrKey = this.attrDef.boundName; + let filter = this.facetDef.filter; + + let valStrToVal = {}; + let groups = (this.groups = {}); + this.groupCount = 0; + + for (let item of aItems) { + let val = attrKey in item ? item[attrKey] : null; + if (val === GlodaConstants.IGNORE_FACET) { + continue; + } + + // skip items the filter tells us to ignore + if (filter && !filter(val)) { + continue; + } + + // We need to use hasOwnProperty because we cannot guarantee that the + // contents of val won't collide with the attributes in Object.prototype. + if (groups.hasOwnProperty(val)) { + groups[val].push(item); + } else { + groups[val] = [item]; + valStrToVal[val] = val; + this.groupCount++; + } + } + + let orderedGroups = Object.keys(groups).map(key => [ + valStrToVal[key], + groups[key], + ]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, + /** + * Facet an attribute whose value is a complex object that can be identified + * by its 'id' attribute. This is the case where the value is itself a noun + * instance. + */ + facetComplexItems(aItems) { + let attrKey = this.attrDef.boundName; + let filter = this.facetDef.filter; + let idAttr = this.facetDef.groupIdAttr; + + let groups = (this.groups = {}); + let groupMap = (this.groupMap = {}); + this.groupCount = 0; + + for (let item of aItems) { + let val = attrKey in item ? item[attrKey] : null; + if (val === GlodaConstants.IGNORE_FACET) { + continue; + } + + // skip items the filter tells us to ignore + if (filter && !filter(val)) { + continue; + } + + let valId = val == null ? null : val[idAttr]; + // We need to use hasOwnProperty because tag nouns are complex objects + // with id's that are non-numeric and so can collide with the contents + // of Object.prototype. (Note: the "tags" attribute is actually handled + // by the DiscreteSetFaceter.) + if (groupMap.hasOwnProperty(valId)) { + groups[valId].push(item); + } else { + groupMap[valId] = val; + groups[valId] = [item]; + this.groupCount++; + } + } + + let orderedGroups = Object.keys(groups).map(key => [ + groupMap[key], + groups[key], + ]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, +}; + +/** + * Facet sets of discrete items. For example, tags applied to messages. + * + * The main differences between us and |DiscreteFaceter| are: + * - The empty set is notable. + * - Specific set configurations could be interesting, but are not low-hanging + * fruit. + */ +function DiscreteSetFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +DiscreteSetFaceter.prototype = { + type: "discrete", + /** + * Facet the given set of items, deferring to the appropriate helper method + */ + facetItems(aItems) { + if (this.attrDef.objectNounDef.isPrimitive) { + return this.facetPrimitiveItems(aItems); + } + return this.facetComplexItems(aItems); + }, + /** + * Facet an attribute whose value is primitive, meaning that it is a raw + * numeric value or string, rather than a complex object. + */ + facetPrimitiveItems(aItems) { + let attrKey = this.attrDef.boundName; + let filter = this.facetDef.filter; + + let groups = (this.groups = {}); + let valStrToVal = {}; + this.groupCount = 0; + + for (let item of aItems) { + let vals = attrKey in item ? item[attrKey] : null; + if (vals === GlodaConstants.IGNORE_FACET) { + continue; + } + + if (vals == null || vals.length == 0) { + vals = [null]; + } + for (let val of vals) { + // skip items the filter tells us to ignore + if (filter && !filter(val)) { + continue; + } + + // We need to use hasOwnProperty because we cannot guarantee that the + // contents of val won't collide with the attributes in + // Object.prototype. + if (groups.hasOwnProperty(val)) { + groups[val].push(item); + } else { + groups[val] = [item]; + valStrToVal[val] = val; + this.groupCount++; + } + } + } + + let orderedGroups = Object.keys(groups).map(key => [ + valStrToVal[key], + groups[key], + ]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, + /** + * Facet an attribute whose value is a complex object that can be identified + * by its 'id' attribute. This is the case where the value is itself a noun + * instance. + */ + facetComplexItems(aItems) { + let attrKey = this.attrDef.boundName; + let filter = this.facetDef.filter; + let idAttr = this.facetDef.groupIdAttr; + + let groups = (this.groups = {}); + let groupMap = (this.groupMap = {}); + this.groupCount = 0; + + for (let item of aItems) { + let vals = attrKey in item ? item[attrKey] : null; + if (vals === GlodaConstants.IGNORE_FACET) { + continue; + } + + if (vals == null || vals.length == 0) { + vals = [null]; + } + for (let val of vals) { + // skip items the filter tells us to ignore + if (filter && !filter(val)) { + continue; + } + + let valId = val == null ? null : val[idAttr]; + // We need to use hasOwnProperty because tag nouns are complex objects + // with id's that are non-numeric and so can collide with the contents + // of Object.prototype. + if (groupMap.hasOwnProperty(valId)) { + groups[valId].push(item); + } else { + groupMap[valId] = val; + groups[valId] = [item]; + this.groupCount++; + } + } + } + + let orderedGroups = Object.keys(groups).map(key => [ + groupMap[key], + groups[key], + ]); + let comparator = this.facetDef.groupComparator; + function comparatorHelper(a, b) { + return comparator(a[0], b[0]); + } + orderedGroups.sort(comparatorHelper); + this.orderedGroups = orderedGroups; + }, +}; + +/** + * Given a non-singular attribute, facet it as if it were a boolean based on + * whether there is anything in the list (set). + */ +function NonEmptySetFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +NonEmptySetFaceter.prototype = { + type: "boolean", + /** + * Facet the given set of items, deferring to the appropriate helper method + */ + facetItems(aItems) { + let attrKey = this.attrDef.boundName; + + let trueValues = []; + let falseValues = []; + + this.groupCount = 0; + + for (let item of aItems) { + let vals = attrKey in item ? item[attrKey] : null; + if (vals == null || vals.length == 0) { + falseValues.push(item); + } else { + trueValues.push(item); + } + } + + this.orderedGroups = []; + if (trueValues.length) { + this.orderedGroups.push([true, trueValues]); + } + if (falseValues.length) { + this.orderedGroups.push([false, falseValues]); + } + this.groupCount = this.orderedGroups.length; + }, + makeQuery(aGroupValues, aInclusive) { + let query = (this.query = lazy.Gloda.newQuery(GlodaConstants.NOUN_MESSAGE)); + + let constraintFunc = query[this.attrDef.boundName]; + constraintFunc.call(query); + + // Our query is always for non-empty lists (at this time), so we want to + // invert if they're excluding 'true' or including 'false', which means !=. + let invert = aGroupValues[0] != aInclusive; + + return [query, invert]; + }, +}; + +/** + * Facet dates. We build a hierarchical nested structure of year, month, and + * day nesting levels. This decision was made speculatively in the hopes that + * it would allow us to do clustered analysis and that there might be a benefit + * for that. For example, if you search for "Christmas", we might notice + * clusters of messages around December of each year. We could then present + * these in a list as likely candidates, rather than a graphical timeline. + * Alternately, it could be used to inform a non-linear visualization. As it + * stands (as of this writing), it's just a complicating factor. + */ +function DateFaceter(aAttrDef, aFacetDef) { + this.attrDef = aAttrDef; + this.facetDef = aFacetDef; +} +DateFaceter.prototype = { + type: "date", + /** + * + */ + facetItems(aItems) { + let attrKey = this.attrDef.boundName; + + let years = (this.years = { _subCount: 0 }); + // generally track the time range + let oldest = null, + newest = null; + + this.validItems = []; + + // just cheat and put us at the front... + this.groupCount = aItems.length ? 1000 : 0; + this.orderedGroups = null; + + /** The number of items with a null/missing attribute. */ + this.missing = 0; + + /** + * The number of items with a date that is unreasonably far in the past or + * in the future. Old-wise, we are concerned about incorrectly formatted + * messages (spam) that end up placed around the UNIX epoch. New-wise, + * we are concerned about messages that can't be explained by users who + * don't know how to set their clocks (both the current user and people + * sending them mail), mainly meaning spam. + * We want to avoid having our clever time-scale logic being made useless by + * these unreasonable messages. + */ + this.unreasonable = 0; + // feb 1, 1970 + let tooOld = new Date(1970, 1, 1); + // 3 days from now + let tooNew = new Date(Date.now() + 3 * 24 * 60 * 60 * 1000); + + for (let item of aItems) { + let val = attrKey in item ? item[attrKey] : null; + // -- missing + if (val == null) { + this.missing++; + continue; + } + + // -- unreasonable + if (val < tooOld || val > tooNew) { + this.unreasonable++; + continue; + } + + this.validItems.push(item); + + // -- time range + if (oldest == null) { + oldest = newest = val; + } else if (val < oldest) { + oldest = val; + } else if (val > newest) { + newest = val; + } + + // -- bucket + // - year + let year, + valYear = val.getYear(); + if (valYear in years) { + year = years[valYear]; + year._dateCount++; + } else { + year = years[valYear] = { + _dateCount: 1, + _subCount: 0, + }; + years._subCount++; + } + + // - month + let month, + valMonth = val.getMonth(); + if (valMonth in year) { + month = year[valMonth]; + month._dateCount++; + } else { + month = year[valMonth] = { + _dateCount: 1, + _subCount: 0, + }; + year._subCount++; + } + + // - day + let valDate = val.getDate(); + if (valDate in month) { + month[valDate].push(item); + } else { + month[valDate] = [item]; + } + } + + this.oldest = oldest; + this.newest = newest; + }, + + _unionMonth(aMonthObj) { + let dayItemLists = []; + for (let key in aMonthObj) { + let dayItemList = aMonthObj[key]; + if (typeof key == "string" && key.startsWith("_")) { + continue; + } + dayItemLists.push(dayItemList); + } + return dayItemLists; + }, + + _unionYear(aYearObj) { + let monthItemLists = []; + for (let key in aYearObj) { + let monthObj = aYearObj[key]; + if (typeof key == "string" && key.startsWith("_")) { + continue; + } + monthItemLists.push(this._unionMonth(monthObj)); + } + return monthItemLists; + }, +}; diff --git a/comm/mailnews/db/gloda/modules/Gloda.jsm b/comm/mailnews/db/gloda/modules/Gloda.jsm new file mode 100644 index 0000000000..77b2288e53 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/Gloda.jsm @@ -0,0 +1,2275 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["Gloda"]; + +const { GlodaDatastore } = ChromeUtils.import( + "resource:///modules/gloda/GlodaDatastore.jsm" +); +const { + GlodaAttributeDBDef, + GlodaAccount, + GlodaConversation, + GlodaFolder, + GlodaMessage, + GlodaContact, + GlodaIdentity, + GlodaAttachment, +} = ChromeUtils.import("resource:///modules/gloda/GlodaDataModel.jsm"); +const { GlodaCollection, GlodaCollectionManager } = ChromeUtils.import( + "resource:///modules/gloda/Collection.jsm" +); +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); +const { whittlerRegistry, mimeMsgToContentAndMeta } = ChromeUtils.import( + "resource:///modules/gloda/GlodaContent.jsm" +); +const { GlodaQueryClassFactory } = ChromeUtils.import( + "resource:///modules/gloda/GlodaQueryClassFactory.jsm" +); +const { GlodaUtils } = ChromeUtils.import( + "resource:///modules/gloda/GlodaUtils.jsm" +); +const { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); + +/** + * @see |Gloda.BadItemContentsError| + */ +function BadItemContentsError(aMessage) { + this.message = aMessage; +} +BadItemContentsError.prototype = { + toString() { + return this.message; + }, +}; + +/** + * Provides the user-visible (and extension visible) global database + * functionality. There is currently a dependency/ordering + * problem in that the concept of 'gloda' also includes some logic that is + * contributed by built-in extensions, if you will. Those built-in extensions + * (fundattr.js, GlodaExplicitAttr.jsm) also import this file. To avoid a circular + * dependency, those built-in extensions are loaded by Everybody.jsm. The + * simplest/best solution is probably to move Everybody.jsm to be Gloda.jsm and + * have it re-export only 'Gloda'. Gloda.jsm (this file) can then move to be + * gloda_int.js (or whatever our eventual naming scheme is), which built-in + * extensions can explicitly rely upon. + * + * === Concepts + * + * == Nouns + * + * Inspired by reasonable uses of triple-stores, I have tried to leverage + * existing model and terminology rather than rolling out own for everything. + * The idea with triple-stores is that you have a subject, a predicate, and an + * object. For example, if we are talking about a message, that is the + * subject, the predicate could roughly be sent-by, and the object a person. + * We can generalize this idea to say that the subject and objects are nouns. + * Since we want to be more flexible than only dealing with messages, we + * therefore introduce the concept of nouns as an organizing principle. + * + * == Attributes + * + * Our attributes definitions are basically our predicates. When we define + * an attribute, it's a label with a bunch of meta-data. Our attribute + * instances are basically a 'triple' in a triple-store. The attributes + * are stored in database rows that imply a specific noun-type (ex: the + * messageAttributes table), with an ID identifying the message which is our + * subject, an attribute ID which identifies the attribute definition in use + * (and therefore the predicate), plus an object ID (given context aka the + * noun type by the attribute's meta-data) which identifies the 'object'. + * + * == But... + * + * Things aren't entirely as clear as they could be right now, terminology/ + * concept/implementation-wise. Some work is probably still in order. + * + * === Implementation + * + * == Nouns + * + * So, we go and define the nouns that are roughly the classes in our data + * model. Every 'class' we define in GlodaDataModel.jsm is a noun that gets defined + * here in the Gloda core. We provide sufficient meta-data about the noun to + * serialize/deserialize its representation from our database representation. + * Nouns do not have to be defined in this class, but can also be contributed + * by external code. + * We have a concept of 'first class' nouns versus non-first class nouns. The + * distinction is meant to be whether we can store meta-information about those + * nouns using attributes. Right now, only message are real first-class nouns, + * but we want to expand that to include contacts and eventually events and + * tasks as lightning-integration occurs. In practice, we are stretching the + * definition of first-class nouns slightly to include things we can't store + * meta-data about, but want to be able to query about. We do want to resolve + * this. + * + * == Attributes + * + * Attributes are defined by "attribute providers" who are responsible for + * taking an instance of a first-class noun (for which they are registered) + * plus perhaps some other meta-data, and returning a list of attributes + * extracted from that noun. For now, this means messages. Attribute + * providers may create new data records as a side-effect of the indexing + * process, although we have not yet fully dealt with the problem of deleting + * these records should they become orphaned in the database due to the + * purging of a message and its attributes. + * All of the 'core' gloda attributes are provided by the GlodaFundAttr.jsm and + * GlodaExplicitAttr.jsm providers. + * + * === (Notable) Future Work + * + * == Attributes + * + * Attribute mechanisms currently lack any support for 'overriding' attributes + * provided by other attribute providers. For example, the fundattr provider + * tells us who a message is 'from' based on the e-mail address present. + * However, other plugins may actually know better. For example, the bugzilla + * daemon e-mails based on bug activity although the daemon gets the credit + * as the official sender. A bugzilla plugin can easily extract the actual + * person/e-mail addressed who did something on the bug to cause the + * notification to be sent. In practice, we would like that person to be + * the 'sender' of the bugmail. But we can't really do that right, yet. + * + * @namespace + */ +var Gloda = { + /** + * Initialize logging, the datastore (SQLite database), the core nouns and + * attributes, and the contact and identities that belong to the presumed + * current user (based on accounts). + * + * Additional nouns and the core attribute providers are initialized by the + * Everybody.jsm module which ensures all of those dependencies are loaded + * (and initialized). + */ + _init() { + this._initLogging(); + GlodaDatastore._init(this._nounIDToDef); + this._initAttributes(); + this._initMyIdentities(); + }, + + _log: null, + /** + * Initialize logging; the error console window gets Warning/Error, and stdout + * (via dump) gets everything. + */ + _initLogging() { + this._log = console.createInstance({ + prefix: "gloda", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }); + this._log.info("Logging Initialized"); + }, + + /** + * Callers should access the unique ID for the GlodaDatastore + * with this getter. If the GlodaDatastore has not been + * initialized, this value is null. + * + * @returns a UUID as a string, ex: "c4dd0159-9287-480f-a648-a4613e147fdb" + */ + get datastoreID() { + return GlodaDatastore._datastoreID; + }, + + /** + * Lookup a gloda message from an nsIMsgDBHdr, with the result returned as a + * collection. Keep in mind that the message may not be indexed, so you + * may end up with an empty collection. (Also keep in mind that this query + * is asynchronous, so you will want your action-taking logic to be found + * in your listener's onQueryCompleted method; the result will not be in + * the collection when this method returns.) + * + * @param aMsgHdr The header of the message you want the gloda message for. + * @param aListener The listener that should be registered with the collection + * @param aData The (optional) value to set as the data attribute on the + * collection. + * + * @returns The collection that will receive the results. + * + * @testpoint gloda.ns.getMessageCollectionForHeader() + */ + getMessageCollectionForHeader(aMsgHdr, aListener, aData) { + let query = Gloda.newQuery(GlodaConstants.NOUN_MESSAGE); + query.folder(aMsgHdr.folder).messageKey(aMsgHdr.messageKey); + return query.getCollection(aListener, aData); + }, + + /** + * Given a list of message headers, return a collection containing the gloda + * messages that correspond to those headers. Keep in mind that gloda may + * not have indexed all the messages, so the returned collection may not have + * a message for each header you provide. (Also keep in mind that this query + * is asynchronous, so you will want your action-taking logic to be found + * in your listener's onQueryCompleted method; no results will be present in + * the collection when this method returns.) + * + * @param aHeaders An array of headers + * @param aListener The listener that should be registered with the collection + * @param aData The (optional) value to set as the data attribute on the + * collection. + * + * @returns The collection that will receive the results. + * + * @testpoint gloda.ns.getMessageCollectionForHeaders() + */ + getMessageCollectionForHeaders(aHeaders, aListener, aData) { + // group the headers by the folder they are found in + let headersByFolder = {}; + for (let header of aHeaders) { + let folderURI = header.folder.URI; + let headersForFolder = headersByFolder[folderURI]; + if (headersForFolder === undefined) { + headersByFolder[folderURI] = [header]; + } else { + headersForFolder.push(header); + } + } + + let query = Gloda.newQuery(GlodaConstants.NOUN_MESSAGE); + let clause; + // build a query, using a separate union clause for each folder. + for (let folderURI in headersByFolder) { + let headersForFolder = headersByFolder[folderURI]; + let folder = this.getFolderForFolder(headersForFolder[0].folder); + // if this is the first or clause, just use the query itself + if (!clause) { + clause = query; + } else { + // Create a new query clause via the 'or' command. + clause = query.or(); + } + + clause.folder(folder); + let messageKeys = headersForFolder.map(hdr => hdr.messageKey); + clause.messageKey.apply(clause, messageKeys); + } + + return query.getCollection(aListener, aData); + }, + + /** + * @testpoint gloda.ns.getMessageContent + */ + getMessageContent(aGlodaMessage, aMimeMsg) { + return mimeMsgToContentAndMeta( + aMimeMsg, + aGlodaMessage.folderMessage.folder + )[0]; + }, + + getFolderForFolder(aMsgFolder) { + return GlodaDatastore._mapFolder(aMsgFolder); + }, + + /** + * Takes one or more strings containing lists of comma-delimited e-mail + * addresses with optional display names, and returns a list of sub-lists of + * identities, where each sub-list corresponds to each of the strings passed + * as arguments. These identities are loaded from the database if they + * already exist, or created if they do not yet exist. + * If the identities need to be created, they will also result in the + * creation of a gloda contact. If a display name was provided with the + * e-mail address, it will become the name of the gloda contact. If a + * display name was not provided, the e-mail address will also serve as the + * contact name. + * This method uses the indexer's callback handle mechanism, and does not + * obey traditional return semantics. + * + * We normalize all e-mail addresses to be lowercase as a normative measure. + * + * @param aCallbackHandle The GlodaIndexer callback handle (or equivalent) + * that you are operating under. + * @param aAddrGroups... One or more strings. Each string can contain zero or more + * e-mail addresses with display name. If more than one address is given, + * they should be comma-delimited. For example + * '"Bob Smith" <bob@example.com>' is an address with display name. Mime + * header decoding is performed, but is ignorant of any folder-level + * character set overrides. + * @returns via the callback handle mechanism, a list containing one sub-list + * for each string argument passed. Each sub-list contains zero or more + * GlodaIdentity instances corresponding to the addresses provided. + */ + *getOrCreateMailIdentities(aCallbackHandle, ...aAddrGroups) { + let addresses = {}; + let resultLists = []; + + // parse the strings + for (let aMailAddresses of aAddrGroups) { + let parsed = GlodaUtils.parseMailAddresses(aMailAddresses); + + let resultList = []; + resultLists.push(resultList); + + for (let iAddress = 0; iAddress < parsed.count; iAddress++) { + let address = parsed.addresses[iAddress].toLowerCase(); + if (address in addresses) { + addresses[address].push(resultList); + } else { + addresses[address] = [parsed.names[iAddress], resultList]; + } + } + } + + let addressList = Object.keys(addresses); + if (addressList.length == 0) { + yield aCallbackHandle.doneWithResult(resultLists); + // we should be stopped before we reach this point, but safety first. + return; + } + + let query = this.newQuery(GlodaConstants.NOUN_IDENTITY); + query.kind("email"); + query.value.apply(query, addressList); + let collection = query.getCollection(aCallbackHandle); + yield GlodaConstants.kWorkAsync; + + // put the identities in the appropriate result lists + for (let identity of collection.items) { + let nameAndResultLists = addresses[identity.value]; + this._log.debug( + " found identity for '" + + nameAndResultLists[0] + + "' (" + + identity.value + + ")" + ); + // index 0 is the name, skip it + for (let iResList = 1; iResList < nameAndResultLists.length; iResList++) { + nameAndResultLists[iResList].push(identity); + } + delete addresses[identity.value]; + } + + // create the identities that did not exist yet + for (let address in addresses) { + let nameAndResultLists = addresses[address]; + let name = nameAndResultLists[0]; + + this._log.debug(" creating contact for '" + name + "' (" + address + ")"); + + // try and find an existing address book contact. + let card = MailServices.ab.cardForEmailAddress(address); + // XXX when we have the address book GUID stuff, we need to use that to + // find existing contacts... (this will introduce a new query phase + // where we batch all the GUIDs for an async query) + // XXX when the address book supports multiple e-mail addresses, we + // should also just create identities for any that don't yet exist + + // if there is no name, just use the e-mail (the ab indexer actually + // processes the card's displayName for synchronization, so we don't + // need to do that.) + if (!name) { + name = address; + } + + let contact = GlodaDatastore.createContact(null, null, name, 0, 0); + + // we must create the identity. use a blank description because there's + // nothing to differentiate it from other identities, as this contact + // only has one initially (us). + // XXX when we have multiple e-mails and there is a meaning associated + // with each e-mail, try and use that to populate the description. + // XXX we are creating the identity here before we insert the contact. + // conceptually it is good for us to be creating the identity before + // exposing it to the address-book indexer, but we could get our id's + // in a bad way from not deferring the identity insertion until after + // the contact insertion. + let identity = GlodaDatastore.createIdentity( + contact.id, + contact, + "email", + address, + /* description */ "", + /* relay? */ false + ); + contact._identities = [identity]; + + // give the address book indexer a chance if we have a card. + // (it will fix-up the name based on the card as appropriate) + if (card) { + yield aCallbackHandle.pushAndGo( + Gloda.grokNounItem(contact, { card }, true, true, aCallbackHandle) + ); + } else { + // grokNounItem will issue the insert for us... + GlodaDatastore.insertContact(contact); + } + + for (let iResList = 1; iResList < nameAndResultLists.length; iResList++) { + nameAndResultLists[iResList].push(identity); + } + } + + yield aCallbackHandle.doneWithResult(resultLists); + }, + + /** + * Dictionary of the user's known identities; key is the identity id, value + * is the actual identity. This is populated by _initMyIdentities based on + * the accounts defined. + */ + myIdentities: {}, + /** + * The contact corresponding to the current user. We are assuming that only + * a single user/human being uses the current profile. This is known to be + * a flawed assumption, but is the best first approximation available. + * The contact is based on the default account's default identity. The user + * can change both, if desired, in Account Settings. + * + * @TODO attempt to deal with multiple people using the same profile + */ + myContact: null, + /** + * Populate myIdentities with all of our identities. Currently we do this + * by assuming that there is one human/user per profile, and that all of the + * accounts defined in the profile belong to them. The single contact is + * stored on myContact. + * + * @TODO deal with account addition/modification/removal + * @TODO attempt to deal with multiple people using the same profile + */ + _initMyIdentities() { + let myContact = null; + let myIdentities = {}; + // Process each email at most once; stored here. + let myEmailAddresses = new Set(); + + let fullName, fallbackName; + let existingIdentities = []; + let identitiesToCreate = []; + + let allIdentities = MailServices.accounts.allIdentities; + let defaultMsgIdentity = MailServices.accounts.defaultAccount + ? MailServices.accounts.defaultAccount.defaultIdentity + : null; + let defaultMsgIdentityKey = defaultMsgIdentity + ? defaultMsgIdentity.key + : null; + let defaultIdentity; + + // Nothing to do if there are no accounts/identities. + if (allIdentities.length == 0) { + return; + } + + for (let msgIdentity of allIdentities) { + let emailAddress = msgIdentity.email; + let replyTo = msgIdentity.replyTo; + let msgIdentityDescription = msgIdentity.fullName || msgIdentity.email; + let isDefaultMsgIdentity = msgIdentity.key == defaultMsgIdentityKey; + + if (!fullName || isDefaultMsgIdentity) { + fullName = msgIdentity.fullName; + } + if (!fallbackName || isDefaultMsgIdentity) { + fallbackName = msgIdentity.email; + } + + // Find the identities if they exist, flag to create them if they don't. + for (let address of [emailAddress, replyTo]) { + if (!address) { + continue; + } + let parsed = GlodaUtils.parseMailAddresses(address); + if (myEmailAddresses.has(parsed.addresses[0])) { + continue; + } + let identity = GlodaDatastore.getIdentity("email", parsed.addresses[0]); + if (identity) { + if (identity.description != msgIdentityDescription) { + // If the user changed the identity name, update the db. + identity._description = msgIdentityDescription; + GlodaDatastore.updateIdentity(identity); + } + existingIdentities.push(identity); + if (isDefaultMsgIdentity) { + defaultIdentity = identity; + } + } else { + identitiesToCreate.push([ + parsed.addresses[0], + msgIdentityDescription, + ]); + } + myEmailAddresses.add(parsed.addresses[0]); + } + } + // We need to establish the identity.contact portions of the relationship. + for (let identity of existingIdentities) { + identity._contact = GlodaDatastore.getContactByID(identity.contactID); + if (defaultIdentity && defaultIdentity.id == identity.id) { + if (identity.contact.name != (fullName || fallbackName)) { + // If the user changed the default identity, update the db. + identity.contact.name = fullName || fallbackName; + GlodaDatastore.updateContact(identity.contact); + } + defaultIdentity._contact = identity.contact; + } + } + + if (defaultIdentity) { + // The contact is based on the default account's default identity. + myContact = defaultIdentity.contact; + } else if (existingIdentities.length) { + // Just use the first guy's contact. + myContact = existingIdentities[0].contact; + } else { + // Create a new contact. + myContact = GlodaDatastore.createContact( + null, + null, + fullName || fallbackName, + 0, + 0 + ); + GlodaDatastore.insertContact(myContact); + } + + for (let emailAndDescription of identitiesToCreate) { + // XXX This won't always be of type "email" as we add new account types. + let identity = GlodaDatastore.createIdentity( + myContact.id, + myContact, + "email", + emailAndDescription[0], + emailAndDescription[1], + false + ); + existingIdentities.push(identity); + } + + for (let identity of existingIdentities) { + myIdentities[identity.id] = identity; + } + + this.myContact = myContact; + this.myIdentities = myIdentities; + myContact._identities = Object.keys(myIdentities).map( + id => myIdentities[id] + ); + + // We need contacts to make these objects reachable via the collection + // manager. + this._myContactCollection = this.explicitCollection( + GlodaConstants.NOUN_CONTACT, + [this.myContact] + ); + this._myIdentitiesCollection = this.explicitCollection( + GlodaConstants.NOUN_IDENTITY, + this.myContact._identities + ); + }, + + /** Next Noun ID to hand out, these don't need to be persisted (for now). */ + _nextNounID: 1000, + + /** + * Maps noun names to noun IDs. + */ + _nounNameToNounID: {}, + /** + * Maps noun IDs to noun definition dictionaries. (Noun definition + * dictionaries provided to us at the time a noun was defined, plus some + * additional stuff we put in there.) + */ + _nounIDToDef: {}, + + _managedToJSON(aItem) { + return aItem.id; + }, + + /** + * Define a noun. Takes a dictionary with the following keys/values: + * + * @param aNounDef.name The name of the noun. This is not a display name + * (anything being displayed needs to be localized, after all), but simply + * the canonical name for debugging purposes and for people to pass to + * lookupNoun. The suggested convention is lower-case-dash-delimited, + * with names being singular (since it's a single noun we are referring + * to.) + * @param aNounDef.class The 'class' to which an instance of the noun will + * belong (aka will pass an instanceof test). You may also provide this + * as 'clazz' if the keyword makes your IDE angry. + * @param aNounDef.allowsArbitraryAttrs Is this a 'first class noun'/can it be + * a subject, AKA can this noun have attributes stored on it that relate + * it to other things? For example, a message is first-class; we store + * attributes of messages. A date is not first-class now, nor is it + * likely to be; we will not store attributes about a date, although dates + * will be the objects of other subjects. (For example: we might + * associate a date with a calendar event, but the date is an attribute of + * the calendar event and not vice versa.) + * @param aNounDef.usesParameter A boolean indicating whether this noun + * requires use of the 'parameter' BLOB storage field on the attribute + * bindings in the database to persist itself. Use of parameters should + * be limited to a reasonable number of values (16-32 is okay, more than + * that is pushing it and 256 should be considered an absolute upper + * bound) because of the database organization. When false, your + * toParamAndValue function is expected to return null for the parameter + * and likewise your fromParamAndValue should expect ignore and generally + * ignore the argument. + * @param aNounDef.toParamAndValue A function that takes an instantiated noun + * instance and returns a 2-element list of [parameter, value] where + * parameter may only be non-null if you passed a usesParameter of true. + * Parameter may be of any type (BLOB), and value must be numeric (pass + * 0 if you don't need the value). + * + * @param aNounDef.isPrimitive True when the noun instance is a raw numeric + * value/string/boolean. False when the instance is an object. When + * false, it is assumed the attribute that serves as a unique identifier + * for the value is "id" unless 'idAttr' is provided. + * @param [aNounDef.idAttr="id"] For non-primitive nouns, this is the + * attribute on the object that uniquely identifies it. + * + * @param aNounDef.schema Unsupported mechanism by which you can define a + * table that corresponds to this noun. The table will be created if it + * does not exist. + * - name The table name; don't conflict with other things! + * - columns A list of [column name, sqlite type] tuples. You should + * always include a definition like ["id", "INTEGER PRIMARY KEY"] for + * now (and it should be the first column name too.) If you care about + * how the attributes are poked into your object (for example, you want + * underscores used for some of them because the attributes should be + * immutable), then you can include a third string that is the name of + * the attribute to use. + * - indices A dictionary of lists of column names, where the key name + * becomes the index name. Ex: {foo: ["bar"]} results in an index on + * the column "bar" where the index is named "foo". + */ + defineNoun(aNounDef, aNounID) { + this._log.info("Defining noun: " + aNounDef.name); + if (aNounID === undefined) { + aNounID = this._nextNounID++; + } + aNounDef.id = aNounID; + + // Let people whose editors get angry about illegal attribute names use + // clazz instead of class. + if (aNounDef.clazz) { + aNounDef.class = aNounDef.clazz; + } + + if (!("idAttr" in aNounDef)) { + aNounDef.idAttr = "id"; + } + if (!("comparator" in aNounDef)) { + aNounDef.comparator = function () { + throw new Error( + "Noun type '" + aNounDef.name + "' lacks a real comparator." + ); + }; + } + + // We allow nouns to have data tables associated with them where we do all + // the legwork. The schema attribute is the gateway to this magical world + // of functionality. Said door is officially unsupported. + if (aNounDef.schema) { + if (!aNounDef.tableName) { + if (aNounDef.schema.name) { + aNounDef.tableName = "ext_" + aNounDef.schema.name; + } else { + aNounDef.tableName = "ext_" + aNounDef.name; + } + } + // this creates the data table and binder and hooks everything up + GlodaDatastore.createNounTable(aNounDef); + + if (!aNounDef.toParamAndValue) { + aNounDef.toParamAndValue = function (aThing) { + if (aThing instanceof aNounDef.class) { + return [null, aThing.id]; + } + // assume they're just passing the id directly + return [null, aThing]; + }; + } + } + + // if it has a table, you can query on it. seems straight-forward. + if (aNounDef.tableName) { + [ + aNounDef.queryClass, + aNounDef.nullQueryClass, + aNounDef.explicitQueryClass, + aNounDef.wildcardQueryClass, + ] = GlodaQueryClassFactory(aNounDef); + aNounDef._dbMeta = {}; + aNounDef.class.prototype.NOUN_ID = aNounDef.id; + aNounDef.class.prototype.NOUN_DEF = aNounDef; + aNounDef.toJSON = this._managedToJSON; + + aNounDef.specialLoadAttribs = []; + + // - define the 'id' constrainer + let idConstrainer = function (...aArgs) { + let constraint = [GlodaConstants.kConstraintIdIn, null, ...aArgs]; + this._constraints.push(constraint); + return this; + }; + aNounDef.queryClass.prototype.id = idConstrainer; + } + if (aNounDef.cache) { + let cacheCost = aNounDef.cacheCost || 1024; + let cacheBudget = aNounDef.cacheBudget || 128 * 1024; + let cacheSize = Math.floor(cacheBudget / cacheCost); + if (cacheSize) { + GlodaCollectionManager.defineCache(aNounDef, cacheSize); + } + } + aNounDef.attribsByBoundName = {}; + aNounDef.domExposeAttribsByBoundName = {}; + + aNounDef.objectNounOfAttributes = []; + + this._nounNameToNounID[aNounDef.name] = aNounID; + this._nounIDToDef[aNounID] = aNounDef; + aNounDef.actions = []; + + this._attrProviderOrderByNoun[aNounDef.id] = []; + this._attrOptimizerOrderByNoun[aNounDef.id] = []; + this._attrProvidersByNoun[aNounDef.id] = {}; + + return aNounDef; + }, + + /** + * Lookup a noun (ID) suitable for passing to defineAttribute's various + * noun arguments. Throws an exception if the noun with the given name + * cannot be found; the assumption is that you can't live without the noun. + */ + lookupNoun(aNounName) { + if (aNounName in this._nounNameToNounID) { + return this._nounNameToNounID[aNounName]; + } + + throw Error( + "Unable to locate noun with name '" + + aNounName + + "', but I " + + "do know about: " + + Object.keys(this._nounNameToNounID).join(", ") + ); + }, + + /** + * Lookup a noun def given a name. + */ + lookupNounDef(aNounName) { + return this._nounIDToDef[this.lookupNoun(aNounName)]; + }, + + /** + * Define an action on a noun. During the prototype stage, this was conceived + * of as a way to expose all the constraints possible given a noun. For + * example, if you have an identity or a contact, you could use this to + * see all the messages sent from/to a given contact. It was likewise + * thought potentially usable for future expansion. For example, you could + * also decide to send an e-mail to a contact when you have the contact + * instance available. + * Outside of the 'expmess' checkbox-happy prototype, this functionality is + * not used. As such, this functionality should be considered in flux and + * subject to changes. Also, very open to specific suggestsions motivated + * by use cases. + * One conceptual issue raised by this mechanism is the interaction of actions + * with facts like "this message is read". We currently implement the 'fact' + * by defining an attribute with a 'boolean' noun type. To deal with this, + * in various places we pass-in the attribute as well as the noun value. + * Since the relationships for booleans and integers in these cases is + * standard and well-defined, this works out pretty well, but suggests we + * need to think things through. + * + * @param aNounID The ID of the noun you want to define an action on. + * @param aActionMeta The dictionary describing the noun. The dictionary + * should have the following fields: + * - actionType: a string indicating the type of action. Currently, only + * "filter" is a legal value. + * - actionTarget: the noun ID of the noun type on which this action is + * applicable. For example, + * + * The following should be present for actionType=="filter"; + * - shortName: The name that should be used to display this constraint. For + * example, a checkbox-heavy UI might display a checkbox for each constraint + * using shortName as the label. + * - makeConstraint: A function that takes the attribute that is the source + * of the noun and the noun instance as arguments, and returns APV-style + * constraints. Since the APV-style query mechanism is now deprecated, + * this signature is deprecated. Probably the way to update this would be + * to pass in the query instance that constraints should be contributed to. + */ + defineNounAction(aNounID, aActionMeta) { + let nounDef = this._nounIDToDef[aNounID]; + nounDef.actions.push(aActionMeta); + }, + + /** + * Retrieve all of the actions (as defined using defineNounAction) for the + * given noun type (via noun ID) with the given action type (ex: filter). + */ + getNounActions(aNounID, aActionType) { + let nounDef = this._nounIDToDef[aNounID]; + if (!nounDef) { + return []; + } + return nounDef.actions.filter( + action => !aActionType || action.actionType == aActionType + ); + }, + + /** Attribute providers in the sequence to process them. */ + _attrProviderOrderByNoun: {}, + /** Attribute providers that provide optimizers, in the sequence to proc. */ + _attrOptimizerOrderByNoun: {}, + /** Maps attribute providers to the list of attributes they provide */ + _attrProviders: {}, + /** + * Maps nouns to their attribute providers to a list of the attributes they + * provide for the noun. + */ + _attrProvidersByNoun: {}, + + /** + * Define the core nouns (that are not defined elsewhere) and a few noun + * actions. Core nouns could be defined in other files, assuming dependency + * issues are resolved via the Everybody.jsm mechanism or something else. + * Right now, noun_tag defines the tag noun. If we broke more of these out, + * we would probably want to move the 'class' code from GlodaDataModel.jsm, the + * SQL table def and helper code from GlodaDatastore.jsm (and this code) to their + * own noun_*.js files. There are some trade-offs to be made, and I think + * we can deal with those once we start to integrate lightning/calendar and + * our noun space gets large and more heterogeneous. + */ + _initAttributes() { + this.defineNoun( + { + name: "bool", + clazz: Boolean, + allowsArbitraryAttrs: false, + isPrimitive: true, + // favor true before false + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return b - a; + }, + toParamAndValue(aBool) { + return [null, aBool ? 1 : 0]; + }, + }, + GlodaConstants.NOUN_BOOLEAN + ); + this.defineNoun( + { + name: "number", + clazz: Number, + allowsArbitraryAttrs: false, + continuous: true, + isPrimitive: true, + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a - b; + }, + toParamAndValue(aNum) { + return [null, aNum]; + }, + }, + GlodaConstants.NOUN_NUMBER + ); + this.defineNoun( + { + name: "string", + clazz: String, + allowsArbitraryAttrs: false, + isPrimitive: true, + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a.localeCompare(b); + }, + toParamAndValue(aString) { + return [null, aString]; + }, + }, + GlodaConstants.NOUN_STRING + ); + this.defineNoun( + { + name: "date", + clazz: Date, + allowsArbitraryAttrs: false, + continuous: true, + isPrimitive: true, + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a - b; + }, + toParamAndValue(aDate) { + return [null, aDate.valueOf() * 1000]; + }, + }, + GlodaConstants.NOUN_DATE + ); + this.defineNoun( + { + name: "fulltext", + clazz: String, + allowsArbitraryAttrs: false, + continuous: false, + isPrimitive: true, + comparator(a, b) { + throw new Error("Fulltext nouns are not comparable!"); + }, + // as noted on NOUN_FULLTEXT, we just pass the string around. it never + // hits the database, so it's okay. + toParamAndValue(aString) { + return [null, aString]; + }, + }, + GlodaConstants.NOUN_FULLTEXT + ); + + this.defineNoun( + { + name: "folder", + clazz: GlodaFolder, + allowsArbitraryAttrs: false, + isPrimitive: false, + queryHelpers: { + /** + * Query for accounts based on the account associated with folders. We + * walk all of the folders associated with an account and put them in + * the list of folders that match if gloda would index them. This is + * unsuitable for producing a persistable constraint since it does not + * adapt for added/deleted folders. However, it is sufficient for + * faceting. Also, we don't persist constraints yet. + * + * @TODO The long-term solution is to move towards using arithmetic + * encoding on folder-id's like we use for MIME types and friends. + */ + Account(aAttrDef, aArguments) { + let folderValues = []; + let seenRootFolders = {}; + for (let iArg = 0; iArg < aArguments.length; iArg++) { + let givenFolder = aArguments[iArg]; + let givenMsgFolder = givenFolder.getXPCOMFolder( + givenFolder.kActivityFolderOnlyNoData + ); + let rootFolder = givenMsgFolder.rootFolder; + + // skip processing this folder if we have already processed its + // root folder. + if (rootFolder.URI in seenRootFolders) { + continue; + } + seenRootFolders[rootFolder.URI] = true; + + for (let folder of rootFolder.descendants) { + let folderFlags = folder.flags; + + // Ignore virtual folders, non-mail folders. + // XXX this is derived from GlodaIndexer's shouldIndexFolder. + // This should probably just use centralized code or the like. + if ( + !(folderFlags & Ci.nsMsgFolderFlags.Mail) || + folderFlags & Ci.nsMsgFolderFlags.Virtual + ) { + continue; + } + // we only index local or IMAP folders + if ( + !(folder instanceof Ci.nsIMsgLocalMailFolder) && + !(folder instanceof Ci.nsIMsgImapMailFolder) + ) { + continue; + } + + let glodaFolder = Gloda.getFolderForFolder(folder); + folderValues.push(glodaFolder); + } + } + return this._inConstraintHelper(aAttrDef, folderValues); + }, + }, + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a.name.localeCompare(b.name); + }, + toParamAndValue(aFolderOrGlodaFolder) { + if (aFolderOrGlodaFolder instanceof GlodaFolder) { + return [null, aFolderOrGlodaFolder.id]; + } + return [null, GlodaDatastore._mapFolder(aFolderOrGlodaFolder).id]; + }, + }, + GlodaConstants.NOUN_FOLDER + ); + this.defineNoun( + { + name: "account", + clazz: GlodaAccount, + allowsArbitraryAttrs: false, + isPrimitive: false, + equals(a, b) { + if ((a && !b) || (!a && b)) { + return false; + } + if (!a && !b) { + return true; + } + return a.id == b.id; + }, + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a.name.localeCompare(b.name); + }, + }, + GlodaConstants.NOUN_ACCOUNT + ); + this.defineNoun( + { + name: "conversation", + clazz: GlodaConversation, + allowsArbitraryAttrs: false, + isPrimitive: false, + cache: true, + cacheCost: 512, + tableName: "conversations", + attrTableName: "messageAttributes", + attrIDColumnName: "conversationID", + datastore: GlodaDatastore, + objFromRow: GlodaDatastore._conversationFromRow, + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a.subject.localeCompare(b.subject); + }, + toParamAndValue(aConversation) { + if (aConversation instanceof GlodaConversation) { + return [null, aConversation.id]; + } + // assume they're just passing the id directly + return [null, aConversation]; + }, + }, + GlodaConstants.NOUN_CONVERSATION + ); + this.defineNoun( + { + name: "message", + clazz: GlodaMessage, + allowsArbitraryAttrs: true, + isPrimitive: false, + cache: true, + cacheCost: 2048, + tableName: "messages", + // we will always have a fulltext row, even for messages where we don't + // have the body available. this is because we want the subject indexed. + dbQueryJoinMagic: + " INNER JOIN messagesText ON messages.id = messagesText.rowid", + attrTableName: "messageAttributes", + attrIDColumnName: "messageID", + datastore: GlodaDatastore, + objFromRow: GlodaDatastore._messageFromRow, + dbAttribAdjuster: GlodaDatastore.adjustMessageAttributes, + dbQueryValidityConstraintSuffix: + " AND +deleted = 0 AND +folderID IS NOT NULL AND +messageKey IS NOT NULL", + // This is what's used when we have no validity constraints, i.e. we allow + // for ghost messages, which do not have a row in the messagesText table. + dbQueryJoinMagicWithNoValidityConstraints: + " LEFT JOIN messagesText ON messages.id = messagesText.rowid", + objInsert: GlodaDatastore.insertMessage, + objUpdate: GlodaDatastore.updateMessage, + toParamAndValue(aMessage) { + if (aMessage instanceof GlodaMessage) { + return [null, aMessage.id]; + } + // assume they're just passing the id directly + return [null, aMessage]; + }, + }, + GlodaConstants.NOUN_MESSAGE + ); + this.defineNoun( + { + name: "contact", + clazz: GlodaContact, + allowsArbitraryAttrs: true, + isPrimitive: false, + cache: true, + cacheCost: 128, + tableName: "contacts", + attrTableName: "contactAttributes", + attrIDColumnName: "contactID", + datastore: GlodaDatastore, + objFromRow: GlodaDatastore._contactFromRow, + dbAttribAdjuster: GlodaDatastore.adjustAttributes, + objInsert: GlodaDatastore.insertContact, + objUpdate: GlodaDatastore.updateContact, + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a.name.localeCompare(b.name); + }, + toParamAndValue(aContact) { + if (aContact instanceof GlodaContact) { + return [null, aContact.id]; + } + // assume they're just passing the id directly + return [null, aContact]; + }, + }, + GlodaConstants.NOUN_CONTACT + ); + this.defineNoun( + { + name: "identity", + clazz: GlodaIdentity, + allowsArbitraryAttrs: false, + isPrimitive: false, + cache: true, + cacheCost: 128, + usesUniqueValue: true, + tableName: "identities", + datastore: GlodaDatastore, + objFromRow: GlodaDatastore._identityFromRow, + /** + * Short string is the contact name, long string includes the identity + * value too, delimited by a colon. Not tremendously localizable. + */ + userVisibleString(aIdentity, aLong) { + if (!aLong) { + return aIdentity.contact.name; + } + if (aIdentity.contact.name == aIdentity.value) { + return aIdentity.value; + } + return aIdentity.contact.name + " (" + aIdentity.value + ")"; + }, + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a.contact.name.localeCompare(b.contact.name); + }, + toParamAndValue(aIdentity) { + if (aIdentity instanceof GlodaIdentity) { + return [null, aIdentity.id]; + } + // assume they're just passing the id directly + return [null, aIdentity]; + }, + }, + GlodaConstants.NOUN_IDENTITY + ); + this.defineNoun( + { + name: "attachment-infos", + clazz: GlodaAttachment, + allowsArbitraryAttrs: false, + isPrimitive: false, + toJSON(x) { + return [ + x._name, + x._contentType, + x._size, + x._part, + x._externalUrl, + x._isExternal, + ]; + }, + fromJSON(x, aGlodaMessage) { + let [name, contentType, size, _part, _externalUrl, isExternal] = x; + return new GlodaAttachment( + aGlodaMessage, + name, + contentType, + size, + _part, + _externalUrl, + isExternal + ); + }, + }, + GlodaConstants.NOUN_ATTACHMENT + ); + + // parameterized identity is just two identities; we store the first one + // (whose value set must be very constrainted, like the 'me' identities) + // as the parameter, the second (which does not need to be constrained) + // as the value. + this.defineNoun( + { + name: "parameterized-identity", + clazz: null, + allowsArbitraryAttrs: false, + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + // First sort by the first identity in the tuple + // Since our general use-case is for the first guy to be "me", we only + // compare the identity value, not the name. + let fic = a[0].value.localeCompare(b[0].value); + if (fic) { + return fic; + } + // Next compare the second identity in the tuple, but use the contact + // this time to be consistent with our identity comparator. + return a[1].contact.name.localeCompare(b[1].contact.name); + }, + computeDelta(aCurValues, aOldValues) { + let oldMap = {}; + for (let tupe of aOldValues) { + let [originIdentity, targetIdentity] = tupe; + let targets = oldMap[originIdentity]; + if (targets === undefined) { + targets = oldMap[originIdentity] = {}; + } + targets[targetIdentity] = true; + } + + let added = [], + removed = []; + for (let tupe of aCurValues) { + let [originIdentity, targetIdentity] = tupe; + let targets = oldMap[originIdentity]; + if (targets === undefined || !(targetIdentity in targets)) { + added.push(tupe); + } else { + delete targets[targetIdentity]; + } + } + + for (let originIdentity in oldMap) { + let targets = oldMap[originIdentity]; + for (let targetIdentity in targets) { + removed.push([originIdentity, targetIdentity]); + } + } + + return [added, removed]; + }, + contributeObjDependencies( + aJsonValues, + aReferencesByNounID, + aInverseReferencesByNounID + ) { + // nothing to do with a zero-length list + if (aJsonValues.length == 0) { + return false; + } + + let nounIdentityDef = + Gloda._nounIDToDef[GlodaConstants.NOUN_IDENTITY]; + let references = aReferencesByNounID[nounIdentityDef.id]; + if (references === undefined) { + references = aReferencesByNounID[nounIdentityDef.id] = {}; + } + + for (let tupe of aJsonValues) { + let [originIdentityID, targetIdentityID] = tupe; + if (!(originIdentityID in references)) { + references[originIdentityID] = null; + } + if (!(targetIdentityID in references)) { + references[targetIdentityID] = null; + } + } + + return true; + }, + resolveObjDependencies( + aJsonValues, + aReferencesByNounID, + aInverseReferencesByNounID + ) { + let references = aReferencesByNounID[GlodaConstants.NOUN_IDENTITY]; + + let results = []; + for (let tupe of aJsonValues) { + let [originIdentityID, targetIdentityID] = tupe; + results.push([ + references[originIdentityID], + references[targetIdentityID], + ]); + } + + return results; + }, + toJSON(aIdentityTuple) { + return [aIdentityTuple[0].id, aIdentityTuple[1].id]; + }, + toParamAndValue(aIdentityTuple) { + return [aIdentityTuple[0].id, aIdentityTuple[1].id]; + }, + }, + GlodaConstants.NOUN_PARAM_IDENTITY + ); + + GlodaDatastore.getAllAttributes(); + }, + + /** + * Create accessor functions to 'bind' an attribute to underlying normalized + * attribute storage, as well as creating the appropriate query object + * constraint helper functions. This name is somewhat of a misnomer because + * special attributes are not 'bound' (because specific/non-generic per-class + * code provides the properties) but still depend on this method to + * establish their constraint helper methods. + * + * @XXX potentially rename to not suggest binding is required. + */ + _bindAttribute(aAttrDef, aSubjectNounDef) { + let objectNounDef = aAttrDef.objectNounDef; + + // -- the query constraint helpers + if (aSubjectNounDef.queryClass !== undefined) { + let constrainer; + let canQuery = true; + if ( + "special" in aAttrDef && + aAttrDef.special == GlodaConstants.kSpecialFulltext + ) { + constrainer = function (...aArgs) { + let constraint = [ + GlodaConstants.kConstraintFulltext, + aAttrDef, + ...aArgs, + ]; + this._constraints.push(constraint); + return this; + }; + } else if (aAttrDef.canQuery || aAttrDef.attributeName.startsWith("_")) { + constrainer = function (...aArgs) { + let constraint = [GlodaConstants.kConstraintIn, aAttrDef, ...aArgs]; + this._constraints.push(constraint); + return this; + }; + } else { + constrainer = function () { + throw new Error( + "Cannot query on attribute " + + aAttrDef.attributeName + + " because its canQuery parameter hasn't been set to true." + + " Reading the comments about Gloda.defineAttribute may be a" + + " sensible thing to do now." + ); + }; + canQuery = false; + } + + aSubjectNounDef.queryClass.prototype[aAttrDef.boundName] = constrainer; + + // Don't bind extra query-able attributes if we're unable to perform a + // search on the attribute. + if (!canQuery) { + return; + } + + // - ranged value helper: fooRange + if (objectNounDef.continuous) { + // takes one or more tuples of [lower bound, upper bound] + let rangedConstrainer = function (...aArgs) { + let constraint = [ + GlodaConstants.kConstraintRanges, + aAttrDef, + ...aArgs, + ]; + this._constraints.push(constraint); + return this; + }; + + aSubjectNounDef.queryClass.prototype[aAttrDef.boundName + "Range"] = + rangedConstrainer; + } + + // - string LIKE helper for special on-row attributes: fooLike + // (it is impossible to store a string as an indexed attribute, which is + // why we do this for on-row only.) + if ( + "special" in aAttrDef && + aAttrDef.special == GlodaConstants.kSpecialString + ) { + let likeConstrainer = function (...aArgs) { + let constraint = [ + GlodaConstants.kConstraintStringLike, + aAttrDef, + ...aArgs, + ]; + this._constraints.push(constraint); + return this; + }; + + aSubjectNounDef.queryClass.prototype[aAttrDef.boundName + "Like"] = + likeConstrainer; + } + + // - Custom helpers provided by the noun type... + if ("queryHelpers" in objectNounDef) { + for (let name in objectNounDef.queryHelpers) { + let helper = objectNounDef.queryHelpers[name]; + // we need a new closure... + let helperFunc = helper; + aSubjectNounDef.queryClass.prototype[aAttrDef.boundName + name] = + function (...aArgs) { + return helperFunc.call(this, aAttrDef, ...aArgs); + }; + } + } + } + }, + + /** + * Names of attribute-specific localized strings and the JS attribute they are + * exposed as in the attribute's "strings" attribute (if the provider has a + * string bundle exposed on its "strings" attribute). They are rooted at + * "gloda.SUBJECT-NOUN-NAME.attr.ATTR-NAME.*". + * + * Please consult the localization notes in gloda.properties to understand + * what these are used for. + */ + _ATTR_LOCALIZED_STRINGS: { + /* - Faceting */ + facetNameLabel: "facetNameLabel", + noneLabel: "noneLabel", + includeLabel: "includeLabel", + excludeLabel: "excludeLabel", + remainderLabel: "remainderLabel", + mustMatchLabel: "mustMatchLabel", + cantMatchLabel: "cantMatchLabel", + mayMatchLabel: "mayMatchLabel", + mustMatchNoneLabel: "mustMatchNoneLabel", + mustMatchSomeLabel: "mustMatchSomeLabel", + mayMatchAnyLabel: "mayMatchAnyLabel", + }, + /** + * Define an attribute and all its meta-data. Takes a single dictionary as + * its argument, with the following required properties: + * + * @param aAttrDef.provider The object instance providing a 'process' method. + * @param aAttrDef.extensionName The name of the extension providing these + * attributes. + * @param aAttrDef.attributeType The type of attribute, one of the values from + * the kAttr* enumeration. + * @param aAttrDef.attributeName The name of the attribute, which also doubles + * as the bound property name if you pass 'bind' a value of true. You are + * responsible for avoiding collisions, which presumably will mean + * checking/updating a wiki page in the future, or just prefixing your + * attribute name with your extension name or something like that. + * @param aAttrDef.bind Should this attribute be 'bound' as a convenience + * attribute on the subject's object (true/false)? For example, with an + * attributeName of "foo" and passing true for 'bind' with a subject noun + * of NOUN_MESSAGE, GlodaMessage instances will expose a "foo" getter that + * returns the value of the attribute. If 'singular' is true, this means + * an instance of the object class corresponding to the noun type or null + * if the attribute does not exist. If 'singular' is false, this means a + * list of instances of the object class corresponding to the noun type, + * where the list may be empty if no instances of the attribute are + * present. + * @param aAttrDef.bindName Optional override of attributeName for purposes of + * the binding property's name. + * @param aAttrDef.singular Is the attribute going to happen at most once + * (true), or potentially multiple times (false). This affects whether + * the binding returns a list or just a single item (which is null when + * the attribute is not present). + * @param [aAttrDef.emptySetIsSignificant=false] Should we + * @param aAttrDef.subjectNouns A list of object types (NOUNs) that this + * attribute can be set on. Each element in the list should be one of the + * NOUN_* constants or a dynamically registered noun type. + * @param aAttrDef.objectNoun The object type (one of the NOUN_* constants or + * a dynamically registered noun types) that is the 'object' in the + * traditional RDF triple. More pragmatically, in the database row used + * to represent an attribute, we store the subject (ex: message ID), + * attribute ID, and an integer which is the integer representation of the + * 'object' whose type you are defining right here. + */ + defineAttribute(aAttrDef) { + // ensure required properties exist on aAttrDef + if ( + !("provider" in aAttrDef) || + !("extensionName" in aAttrDef) || + !("attributeType" in aAttrDef) || + !("attributeName" in aAttrDef) || + !("singular" in aAttrDef) || + !("subjectNouns" in aAttrDef) || + !("objectNoun" in aAttrDef) + ) { + // perhaps we should have a list of required attributes, perchance with + // and explanation of what it holds, and use that to be friendlier? + throw Error( + "You omitted a required attribute defining property, please" + + " consult the documentation as penance." + ); + } + + // -- Fill in defaults + if (!("emptySetIsSignificant" in aAttrDef)) { + aAttrDef.emptySetIsSignificant = false; + } + + if (!("canQuery" in aAttrDef)) { + aAttrDef.canQuery = !!aAttrDef.facet; + } + + // return if the attribute has already been defined + if (aAttrDef.dbDef) { + return aAttrDef; + } + + // - first time we've seen a provider init logic + if (!(aAttrDef.provider.providerName in this._attrProviders)) { + this._attrProviders[aAttrDef.provider.providerName] = []; + if (aAttrDef.provider.contentWhittle) { + whittlerRegistry.registerWhittler(aAttrDef.provider); + } + } + + let compoundName = aAttrDef.extensionName + ":" + aAttrDef.attributeName; + // -- Database Definition + let attrDBDef; + if (compoundName in GlodaDatastore._attributeDBDefs) { + // the existence of the GlodaAttributeDBDef means that either it has + // already been fully defined, or has been loaded from the database but + // not yet 'bound' to a provider (and had important meta-info that + // doesn't go in the db copied over) + attrDBDef = GlodaDatastore._attributeDBDefs[compoundName]; + } else { + // we need to create the attribute definition in the database + let attrID = null; + attrID = GlodaDatastore._createAttributeDef( + aAttrDef.attributeType, + aAttrDef.extensionName, + aAttrDef.attributeName, + null + ); + + attrDBDef = new GlodaAttributeDBDef( + GlodaDatastore, + attrID, + compoundName, + aAttrDef.attributeType, + aAttrDef.extensionName, + aAttrDef.attributeName + ); + GlodaDatastore._attributeDBDefs[compoundName] = attrDBDef; + GlodaDatastore._attributeIDToDBDefAndParam[attrID] = [attrDBDef, null]; + } + + aAttrDef.dbDef = attrDBDef; + attrDBDef.attrDef = aAttrDef; + + aAttrDef.id = aAttrDef.dbDef.id; + + if ("bindName" in aAttrDef) { + aAttrDef.boundName = aAttrDef.bindName; + } else { + aAttrDef.boundName = aAttrDef.attributeName; + } + + aAttrDef.objectNounDef = this._nounIDToDef[aAttrDef.objectNoun]; + aAttrDef.objectNounDef.objectNounOfAttributes.push(aAttrDef); + + // -- Facets + function normalizeFacetDef(aFacetDef) { + if (!("groupIdAttr" in aFacetDef)) { + aFacetDef.groupIdAttr = aAttrDef.objectNounDef.idAttr; + } + if (!("groupComparator" in aFacetDef)) { + aFacetDef.groupComparator = aAttrDef.objectNounDef.comparator; + } + if (!("filter" in aFacetDef)) { + aFacetDef.filter = null; + } + } + // No facet attribute means no facet desired; set an explicit null so that + // code can check without doing an "in" check. + if (!("facet" in aAttrDef)) { + aAttrDef.facet = null; + } else if (aAttrDef.facet === true) { + // Promote "true" facet values to the defaults. Where attributes have + // specified values, make sure we fill in any missing defaults. + aAttrDef.facet = { + type: "default", + groupIdAttr: aAttrDef.objectNounDef.idAttr, + groupComparator: aAttrDef.objectNounDef.comparator, + filter: null, + }; + } else { + normalizeFacetDef(aAttrDef.facet); + } + if ("extraFacets" in aAttrDef) { + for (let facetDef of aAttrDef.extraFacets) { + normalizeFacetDef(facetDef); + } + } + + function gatherLocalizedStrings(aBundle, aPropRoot, aStickIn) { + for (let propName in Gloda._ATTR_LOCALIZED_STRINGS) { + let attrName = Gloda._ATTR_LOCALIZED_STRINGS[propName]; + try { + aStickIn[attrName] = aBundle.GetStringFromName(aPropRoot + propName); + } catch (ex) { + // do nothing. nsIStringBundle throws exceptions when not found + } + } + } + + // -- L10n. + // If the provider has a string bundle, populate a "strings" attribute with + // our standard attribute strings that can be UI exposed. + if ("strings" in aAttrDef.provider && aAttrDef.facet) { + let bundle = aAttrDef.provider.strings; + + // -- attribute strings + let attrStrings = (aAttrDef.facet.strings = {}); + // we use the first subject the attribute applies to as the basis of + // where to get the string from. Mainly because we currently don't have + // any attributes with multiple subjects nor a use-case where we expose + // multiple noun types via the UI. (Just messages right now.) + let canonicalSubject = this._nounIDToDef[aAttrDef.subjectNouns[0]]; + let propRoot = + "gloda." + + canonicalSubject.name + + ".attr." + + aAttrDef.attributeName + + "."; + gatherLocalizedStrings(bundle, propRoot, attrStrings); + + // -- alias strings for synthetic facets + if ("extraFacets" in aAttrDef) { + for (let facetDef of aAttrDef.extraFacets) { + facetDef.strings = {}; + let aliasPropRoot = + "gloda." + canonicalSubject.name + ".attr." + facetDef.alias + "."; + gatherLocalizedStrings(bundle, aliasPropRoot, facetDef.strings); + } + } + } + + // -- Subject Noun Binding + for ( + let iSubject = 0; + iSubject < aAttrDef.subjectNouns.length; + iSubject++ + ) { + let subjectType = aAttrDef.subjectNouns[iSubject]; + let subjectNounDef = this._nounIDToDef[subjectType]; + this._bindAttribute(aAttrDef, subjectNounDef); + + // update the provider maps... + if ( + !this._attrProviderOrderByNoun[subjectType].includes(aAttrDef.provider) + ) { + this._attrProviderOrderByNoun[subjectType].push(aAttrDef.provider); + if (aAttrDef.provider.optimize) { + this._attrOptimizerOrderByNoun[subjectType].push(aAttrDef.provider); + } + this._attrProvidersByNoun[subjectType][aAttrDef.provider.providerName] = + []; + } + this._attrProvidersByNoun[subjectType][ + aAttrDef.provider.providerName + ].push(aAttrDef); + + subjectNounDef.attribsByBoundName[aAttrDef.boundName] = aAttrDef; + if (aAttrDef.domExpose) { + subjectNounDef.domExposeAttribsByBoundName[aAttrDef.boundName] = + aAttrDef; + } + + if ( + "special" in aAttrDef && + aAttrDef.special & GlodaConstants.kSpecialColumn + ) { + subjectNounDef.specialLoadAttribs.push(aAttrDef); + } + + // if this is a parent column attribute, make note of it so that if we + // need to do an inverse references lookup, we know what column we are + // issuing against. + if ( + "special" in aAttrDef && + aAttrDef.special === GlodaConstants.kSpecialColumnParent + ) { + subjectNounDef.parentColumnAttr = aAttrDef; + } + + if ( + aAttrDef.objectNounDef.tableName || + aAttrDef.objectNounDef.contributeObjDependencies + ) { + subjectNounDef.hasObjDependencies = true; + } + } + + this._attrProviders[aAttrDef.provider.providerName].push(aAttrDef); + return aAttrDef; + }, + + /** + * Retrieve the attribute provided by the given extension with the given + * attribute name. The original idea was that plugins would effectively + * name-space attributes, helping avoid collisions. Since we are leaning + * towards using binding heavily, this doesn't really help, as the collisions + * will just occur on the attribute name instead. Also, this can turn + * extensions into liars as name changes/moves to core/etc. happen. + * + * @TODO consider removing the extension name argument parameter requirement + */ + getAttrDef(aPluginName, aAttrName) { + let compoundName = aPluginName + ":" + aAttrName; + return GlodaDatastore._attributeDBDefs[compoundName]; + }, + + /** + * Create a new query instance for the given noun-type. This provides + * a generic way to provide constraint-based queries of any first-class + * nouns supported by the system. + * + * The idea is that every attribute on an object can be used to express + * a constraint on the query object. Constraints implicitly 'AND' together, + * but providing multiple arguments to a constraint function results in an + * 'OR'ing of those values. Additionally, you can call or() on the returned + * query to create an alternate query that is effectively a giant OR against + * all the constraints you create on the main query object (or any other + * alternate queries returned by or()). (Note: there is no nesting of these + * alternate queries. query.or().or() is equivalent to query.or()) + * For each attribute, there is a constraint with the same name that takes + * one or more arguments. The arguments represent a set of OR values that + * objects matching the query can have. (If you want the constraint + * effectively ANDed together, just invoke the constraint function + * multiple times.) For example, newQuery(NOUN_PERSON).age(25) would + * constraint to all the people aged 25, while age(25, 26) would constrain + * to all the people age 25 or 26. + * For each attribute with a 'continuous' noun, there is a constraint with the + * attribute name with "Range" appended. It takes two arguments which are an + * inclusive lower bound and an inclusive lower bound for values in the + * range. If you would like an open-ended range on either side, pass null + * for that argument. If you would like to specify multiple ranges that + * should be ORed together, simply pass additional (pairs of) arguments. + * For example, newQuery(NOUN_PERSON).age(25,100) would constraint to all + * the people who are >= 25 and <= 100. Likewise age(25, null) would just + * return all the people who are 25 or older. And age(25,30,35,40) would + * return people who are either 25-30 or 35-30. + * There are also full-text constraint columns. In a nutshell, their + * arguments are the strings that should be passed to the SQLite FTS3 + * MATCH clause. + * + * @param aNounID The (integer) noun-id of the noun you want to query on. + * @param aOptions an optional dictionary of query options, see the GlodaQuery + * class documentation. + */ + newQuery(aNounID, aOptions) { + let nounDef = this._nounIDToDef[aNounID]; + return new nounDef.queryClass(aOptions); + }, + + /** + * Create a collection/query for the given noun-type that only matches the + * provided items. This is to be used when you have an explicit set of items + * that you would still like to receive updates for. + */ + explicitCollection(aNounID, aItems) { + let nounDef = this._nounIDToDef[aNounID]; + let collection = new GlodaCollection(nounDef, aItems, null, null); + let query = new nounDef.explicitQueryClass(collection); + collection.query = query; + GlodaCollectionManager.registerCollection(collection); + return collection; + }, + + /** + * Debugging 'wildcard' collection creation support. A wildcard collection + * will 'accept' any new item instances presented to the collection manager + * as new. The result is that it allows you to be notified as new items + * as they are indexed, existing items as they are loaded from the database, + * etc. + * Because the items are added to the collection without limit, this will + * result in a leak if you don't do something to clean up after the + * collection. (Forgetting about the collection will suffice, as it is still + * weakly held.) + */ + _wildcardCollection(aNounID, aItems) { + let nounDef = this._nounIDToDef[aNounID]; + let collection = new GlodaCollection(nounDef, aItems, null, null); + let query = new nounDef.wildcardQueryClass(collection); + collection.query = query; + GlodaCollectionManager.registerCollection(collection); + return collection; + }, + + /** + * Attribute providers attempting to index something that experience a fatal + * problem should throw one of these. For example: + * "throw new Gloda.BadItemContentsError('Message lacks an author.');". + * + * We're not really taking advantage of this yet, but it's a good idea. + */ + BadItemContentsError, + + /* eslint-disable complexity */ + /** + * Populate a gloda representation of an item given the thus-far built + * representation, the previous representation, and one or more raw + * representations. The attribute providers/optimizers for the given noun + * type are invoked, allowing them to contribute/alter things. Following + * that, we build and persist our attribute representations. + * + * The result of the processing ends up with attributes in 3 different forms: + * - Database attribute rows (to be added and removed). + * - In-memory representation. + * - JSON-able representation. + * + * @param aItem The noun instance you want processed. + * @param aRawReps A dictionary that we pass to the attribute providers. + * There is a(n implied) contract between the caller of grokNounItem for a + * given noun type and the attribute providers for that noun type, and we + * have nothing to do with it OTHER THAN inserting a 'trueGlodaRep' + * value into it. In the event of reindexing an existing object, the + * gloda representation we pass to the indexers is actually a clone that + * allows the asynchronous indexers to mutate the object without + * causing visible changes in the existing representation of the gloda + * object. We patch the changes back onto the original item atomically + * once indexing completes. The 'trueGlodaRep' is then useful for + * objects that hang off of the gloda instance that need a reference + * back to their containing object for API convenience purposes. + * @param aIsConceptuallyNew Is the item "new" in the sense that it would + * never have been visible from within user code? This translates into + * whether this should trigger an itemAdded notification or an + * itemModified notification. + * @param aIsRecordNew Is the item "new" in the sense that we should INSERT + * a record rather than UPDATE-ing a record. For example, when dealing + * with messages where we may have a ghost, the ghost message is not a + * new record, but is conceptually new. + * @param aCallbackHandle The GlodaIndexer-style callback handle that is being + * used to drive this processing in an async fashion. (See + * GlodaIndexer._callbackHandle). + * @param aDoCache Should we allow this item to be contributed to its noun + * cache? + */ + *grokNounItem( + aItem, + aRawReps, + aIsConceptuallyNew, + aIsRecordNew, + aCallbackHandle, + aDoCache + ) { + let itemNounDef = aItem.NOUN_DEF; + let attribsByBoundName = itemNounDef.attribsByBoundName; + + this._log.info(" ** grokNounItem: " + itemNounDef.name); + + let addDBAttribs = []; + let removeDBAttribs = []; + + let jsonDict = {}; + + let aOldItem; + aRawReps.trueGlodaRep = aItem; + if (aIsConceptuallyNew) { + // there is no old item if we are new. + aOldItem = {}; + } else { + aOldItem = aItem; + // we want to create a clone of the existing item so that we can know the + // deltas that happened for indexing purposes + aItem = aItem._clone(); + } + + // Have the attribute providers directly set properties on the aItem + let attrProviders = this._attrProviderOrderByNoun[itemNounDef.id]; + for (let iProvider = 0; iProvider < attrProviders.length; iProvider++) { + this._log.info(" * provider: " + attrProviders[iProvider].providerName); + yield aCallbackHandle.pushAndGo( + attrProviders[iProvider].process( + aItem, + aRawReps, + aIsConceptuallyNew, + aCallbackHandle + ) + ); + } + + let attrOptimizers = this._attrOptimizerOrderByNoun[itemNounDef.id]; + for (let iProvider = 0; iProvider < attrOptimizers.length; iProvider++) { + this._log.info( + " * optimizer: " + attrOptimizers[iProvider].providerName + ); + yield aCallbackHandle.pushAndGo( + attrOptimizers[iProvider].optimize( + aItem, + aRawReps, + aIsConceptuallyNew, + aCallbackHandle + ) + ); + } + this._log.info(" ** done with providers."); + + // Iterate over the attributes on the item + for (let key of Object.keys(aItem)) { + let value = aItem[key]; + // ignore keys that start with underscores, they are private and not + // persisted by our attribute mechanism. (they are directly handled by + // the object implementation.) + if (key.startsWith("_")) { + continue; + } + // find the attribute definition that corresponds to this key + let attrib = attribsByBoundName[key]; + // if there's no attribute, that's not good, but not horrible. + if (attrib === undefined) { + this._log.warn("new proc ignoring attrib: " + key); + continue; + } + + let attribDB = attrib.dbDef; + let objectNounDef = attrib.objectNounDef; + + // - translate for our JSON rep + if (attrib.singular) { + if (objectNounDef.toJSON) { + jsonDict[attrib.id] = objectNounDef.toJSON(value); + } else { + jsonDict[attrib.id] = value; + } + } else if (objectNounDef.toJSON) { + let toJSON = objectNounDef.toJSON; + jsonDict[attrib.id] = []; + for (let subValue of value) { + jsonDict[attrib.id].push(toJSON(subValue)); + } + } else { + jsonDict[attrib.id] = value; + } + + let oldValue = aOldItem[key]; + + // the 'old' item is still the canonical one; update it + // do the update now, because we may skip operations on addDBAttribs and + // removeDBattribs, if the attribute is not to generate entries in + // messageAttributes + if (oldValue !== undefined || !aIsConceptuallyNew) { + aOldItem[key] = value; + } + + // the new canQuery property has to be set to true to generate entries + // in the messageAttributes table. Any other truthy value (like a non + // empty string), will still make the message query-able but without + // using the database. + if (attrib.canQuery !== true) { + continue; + } + + // - database index attributes + + // perform a delta analysis against the old value, if we have one + if (oldValue !== undefined) { + // in the singular case if they don't match, it's one add and one remove + if (attrib.singular) { + // test for identicality, failing that, see if they have explicit + // equals support. + if ( + value !== oldValue && + (!value.equals || !value.equals(oldValue)) + ) { + addDBAttribs.push(attribDB.convertValuesToDBAttributes([value])[0]); + removeDBAttribs.push( + attribDB.convertValuesToDBAttributes([oldValue])[0] + ); + } + } else if (objectNounDef.computeDelta) { + // in the plural case, we have to figure the deltas accounting for + // possible changes in ordering (which is insignificant from an + // indexing perspective) + // some nouns may not meet === equivalence needs, so must provide a + // custom computeDelta method to help us out + let [valuesAdded, valuesRemoved] = objectNounDef.computeDelta( + value, + oldValue + ); + // convert the values to database-style attribute rows + addDBAttribs.push.apply( + addDBAttribs, + attribDB.convertValuesToDBAttributes(valuesAdded) + ); + removeDBAttribs.push.apply( + removeDBAttribs, + attribDB.convertValuesToDBAttributes(valuesRemoved) + ); + } else { + // build a map of the previous values; we will delete the values as + // we see them so that we will know what old values are no longer + // present in the current set of values. + let oldValueMap = {}; + for (let anOldValue of oldValue) { + // remember, the key is just the toString'ed value, so we need to + // store and use the actual value as the value! + oldValueMap[anOldValue] = anOldValue; + } + // traverse the current values... + let valuesAdded = []; + for (let curValue of value) { + if (curValue in oldValueMap) { + delete oldValueMap[curValue]; + } else { + valuesAdded.push(curValue); + } + } + // anything still on oldValueMap was removed. + let valuesRemoved = Object.keys(oldValueMap).map( + key => oldValueMap[key] + ); + // convert the values to database-style attribute rows + addDBAttribs.push.apply( + addDBAttribs, + attribDB.convertValuesToDBAttributes(valuesAdded) + ); + removeDBAttribs.push.apply( + removeDBAttribs, + attribDB.convertValuesToDBAttributes(valuesRemoved) + ); + } + + // Add/remove the empty set indicator as appropriate. + if (attrib.emptySetIsSignificant) { + // if we are now non-zero but previously were zero, remove. + if (value.length && !oldValue.length) { + removeDBAttribs.push([GlodaDatastore.kEmptySetAttrId, attribDB.id]); + } else if (!value.length && oldValue.length) { + // We are now zero length but previously were not, add. + addDBAttribs.push([GlodaDatastore.kEmptySetAttrId, attribDB.id]); + } + } + } else { + // no old value, all values are new + // add the db reps on the new values + if (attrib.singular) { + value = [value]; + } + addDBAttribs.push.apply( + addDBAttribs, + attribDB.convertValuesToDBAttributes(value) + ); + // Add the empty set indicator for the attribute id if appropriate. + if (!value.length && attrib.emptySetIsSignificant) { + addDBAttribs.push([GlodaDatastore.kEmptySetAttrId, attribDB.id]); + } + } + } + + // Iterate over any remaining values in old items for purge purposes. + for (let key of Object.keys(aOldItem)) { + let value = aOldItem[key]; + // ignore keys that start with underscores, they are private and not + // persisted by our attribute mechanism. (they are directly handled by + // the object implementation.) + if (key.startsWith("_")) { + continue; + } + // ignore things we saw in the new guy + if (key in aItem) { + continue; + } + + // find the attribute definition that corresponds to this key + let attrib = attribsByBoundName[key]; + // if there's no attribute, that's not good, but not horrible. + if (attrib === undefined) { + continue; + } + + // delete these from the old item, as the old item is canonical, and + // should no longer have these values + delete aOldItem[key]; + + if (attrib.canQuery !== true) { + this._log.debug( + "Not inserting attribute " + + attrib.attributeName + + " into the db, since we don't plan on querying on it" + ); + continue; + } + + if (attrib.singular) { + value = [value]; + } + let attribDB = attrib.dbDef; + removeDBAttribs.push.apply( + removeDBAttribs, + attribDB.convertValuesToDBAttributes(value) + ); + // remove the empty set marker if there should have been one + if (!value.length && attrib.emptySetIsSignificant) { + removeDBAttribs.push([GlodaDatastore.kEmptySetAttrId, attribDB.id]); + } + } + + aItem._jsonText = JSON.stringify(jsonDict); + this._log.debug(" json text: " + aItem._jsonText); + + if (aIsRecordNew) { + this._log.debug(" inserting item"); + itemNounDef.objInsert.call(itemNounDef.datastore, aItem); + } else { + this._log.debug(" updating item"); + itemNounDef.objUpdate.call(itemNounDef.datastore, aItem); + } + + this._log.debug( + " adjusting attributes, add: " + addDBAttribs + " rem: " + removeDBAttribs + ); + itemNounDef.dbAttribAdjuster.call( + itemNounDef.datastore, + aItem, + addDBAttribs, + removeDBAttribs + ); + + if (!aIsConceptuallyNew && "_declone" in aOldItem) { + aOldItem._declone(aItem); + } + + // Cache ramifications... + if (aDoCache === undefined || aDoCache) { + if (aIsConceptuallyNew) { + GlodaCollectionManager.itemsAdded(aItem.NOUN_ID, [aItem]); + } else { + GlodaCollectionManager.itemsModified(aOldItem.NOUN_ID, [aOldItem]); + } + } + + this._log.debug(" done grokking."); + + yield GlodaConstants.kWorkDone; + }, + /* eslint-enable complexity */ + + /** + * Processes a list of noun instances for their score within a given context. + * This is primarily intended for use by search ranking mechanisms, but could + * be used elsewhere too. (It does, however, depend on the complicity of the + * score method implementations to not get confused.) + * + * @param aItems The non-empty list of items to score. + * @param aContext A noun-specific dictionary that we just pass to the funcs. + * @param aExtraScoreFuncs A list of extra scoring functions to apply. + * @returns A list of integer scores equal in length to aItems. + */ + scoreNounItems(aItems, aContext, aExtraScoreFuncs) { + let scores = []; + // bail if there is nothing to score + if (!aItems.length) { + return scores; + } + + let itemNounDef = aItems[0].NOUN_DEF; + if (aExtraScoreFuncs == null) { + aExtraScoreFuncs = []; + } + + for (let item of aItems) { + let score = 0; + let attrProviders = this._attrProviderOrderByNoun[itemNounDef.id]; + for (let iProvider = 0; iProvider < attrProviders.length; iProvider++) { + let provider = attrProviders[iProvider]; + if (provider.score) { + score += provider.score(item); + } + } + for (let extraScoreFunc of aExtraScoreFuncs) { + score += extraScoreFunc(item, aContext); + } + scores.push(score); + } + + return scores; + }, +}; + +/* and initialize the Gloda object/NS before we return... */ +try { + Gloda._init(); +} catch (ex) { + Gloda._log.debug( + "Exception during Gloda init (" + + ex.fileName + + ":" + + ex.lineNumber + + "): " + + ex + ); +} +/* but don't forget that we effectively depend on Everybody.jsm too, and + currently on our importer to be importing that if they need us fully armed + and operational. */ diff --git a/comm/mailnews/db/gloda/modules/GlodaConstants.jsm b/comm/mailnews/db/gloda/modules/GlodaConstants.jsm new file mode 100644 index 0000000000..1e6d253f09 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaConstants.jsm @@ -0,0 +1,250 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at http://mozilla.org/MPL/2.0/. */ + +/** + * The constants used by Gloda files. Avoid importing anything into this file. + */ + +const EXPORTED_SYMBOLS = ["GlodaConstants"]; + +var GlodaConstants = { + /** + * The indexer is idle. + */ + kIndexerIdle: 0, + /** + * The indexer is doing something. We used to have other specific states, but + * they have been rendered irrelevant and wiped from existence. + */ + kIndexerIndexing: 1, + + /** + * Synchronous activities performed that can be thought of as one processing + * token. Potentially yield the event-loop and re-schedule for later based + * on how long we've actually taken/etc. The goal here is that code that + * is doing stuff synchronously yields with kWorkSync periodically to make + * sure that it doesn't dominate the event-loop. Unless the processing + * in question is particularly intensive, it should be reasonable to apply + * some decimation factor (ex: 32 or 64) with the general goal of yielding + * every 3-10 milliseconds. + */ + kWorkSync: 0, + /** + * Asynchronous activity performed, you need to relinquish flow control and + * trust us to call callbackDriver later. + */ + kWorkAsync: 1, + /** + * We are all done with our task, close us and figure out something else to do. + */ + kWorkDone: 2, + /** + * We are not done with our task, but we think it's a good idea to take a + * breather because we believe we have tied up the event loop for a + * non-trivial amount of time. So please re-schedule us in the future. + * + * This is currently only used internally by the indexer's batching logic; + * minor changes may be required if used by actual indexers. + */ + kWorkPause: 3, + /** + * We are done with our task, and have a result that we are returning. This + * should only be used by your callback handler's doneWithResult method. + * Ex: you are passed aCallbackHandle, and you do + * "yield aCallbackHandle.doneWithResult(myResult);". + */ + kWorkDoneWithResult: 4, + + /** + * An attribute that is a defining characteristic of the subject. + */ + kAttrFundamental: 0, + /** + * An attribute that is an optimization derived from two or more fundamental + * attributes and exists solely to improve database query performance. + */ + kAttrOptimization: 1, + /** + * An attribute that is derived from the content of the subject. For example, + * a message that references a bugzilla bug could have a "derived" attribute + * that captures the bugzilla reference. This is not + */ + kAttrDerived: 2, + /** + * An attribute that is the result of an explicit and intentional user action + * upon the subject. For example, a tag placed on a message by a user (or + * at the user's request by a filter) is explicit. + */ + kAttrExplicit: 3, + /** + * An attribute that is indirectly the result of a user's behaviour. For + * example, if a user consults a message multiple times, we may conclude that + * the user finds the message interesting. It is "implied", if you will, + * that the message is interesting. + */ + kAttrImplicit: 4, + + /** + * This attribute is not 'special'; it is stored as a (thing id, attribute id, + * attribute id) tuple in the database rather than on thing's row or on + * thing's fulltext row. (Where "thing" could be a message or any other + * first class noun.) + */ + kSpecialNotAtAll: 0, + /** + * This attribute is stored as a numeric column on the row for the noun. The + * attribute definition should include this value as 'special' and the + * column name that stores the attribute as 'specialColumnName'. + */ + kSpecialColumn: 16, + kSpecialColumnChildren: 16 | 1, + kSpecialColumnParent: 16 | 2, + /** + * This attribute is stored as a string column on the row for the noun. It + * differs from kSpecialColumn in that it is a string, which once had + * query ramifications and one day may have them again. + */ + kSpecialString: 32, + /** + * This attribute is stored as a fulltext column on the fulltext table for + * the noun. The attribute definition should include this value as 'special' + * and the column name that stores the table as 'specialColumnName'. + */ + kSpecialFulltext: 64, + + /** + * The extensionName used for the attributes defined by core gloda plugins + * such as GlodaFundAttr.jsm and GlodaExplicitAttr.jsm. + */ + BUILT_IN: "built-in", + + /** + * Special sentinel value that will cause facets to skip a noun instance + * when an attribute has this value. + */ + IGNORE_FACET: "ignore-facet", + + /* + * The following are explicit noun IDs. While most extension-provided nouns + * will have dynamically allocated id's that are looked up by name, these + * id's can be relied upon to exist and be accessible via these + * pseudo-constants. It's not really clear that we need these, although it + * does potentially simplify code to not have to look up all of their nouns + * at initialization time. + */ + /** + * Boolean values, expressed as 0/1 in the database and non-continuous for + * constraint purposes. Like numbers, such nouns require their attributes + * to provide them with context, lacking any of their own. + * Having this as a noun type may be a bad idea; a change of nomenclature + * (so that we are not claiming a boolean value is a noun, but still using + * it in the same way) or implementation to require each boolean noun + * actually be its own noun may be in order. + */ + NOUN_BOOLEAN: 1, + /** + * A number, which could mean an integer or floating point values. We treat + * these as continuous, meaning that queries on them can have ranged + * constraints expressed on them. Lacking any inherent context, numbers + * depend on their attributes to parameterize them as required. + * Same deal as with NOUN_BOOLEAN, we may need to change this up conceptually. + */ + NOUN_NUMBER: 2, + /** + * A (non-fulltext) string. + * Same deal as with NOUN_BOOLEAN, we may need to change this up conceptually. + */ + NOUN_STRING: 3, + /** A date, encoded as a PRTime, represented as a js Date object. */ + NOUN_DATE: 10, + /** + * Fulltext search support, somewhat magical. This is only intended to be + * used for kSpecialFulltext attributes, and exclusively as a constraint + * mechanism. The values are always represented as strings. It is presumed + * that the user of this functionality knows how to generate SQLite FTS3 + * style MATCH queries, or is okay with us just gluing them together with + * " OR " when used in an or-constraint case. Gloda's query mechanism + * currently lacks the ability to to compile Gloda-style and-constraints + * into a single MATCH query, but it will turn out okay, just less + * efficiently than it could. + */ + NOUN_FULLTEXT: 20, + /** + * Represents a MIME Type. We currently lack any human-intelligible + * descriptions of mime types. + */ + NOUN_MIME_TYPE: 40, + /** + * Captures a message tag as well as when the tag's presence was observed, + * hoping to approximate when the tag was applied. It's a somewhat dubious + * attempt to not waste our opporunity to store a value along with the tag. + * (The tag is actually stored as an attribute parameter on the attribute + * definition, rather than a value in the attribute 'instance' for the + * message.) + */ + NOUN_TAG: 50, + /** + * Doesn't actually work owing to a lack of an object to represent a folder. + * We do expose the folderURI and folderID of a message, but need to map that + * to a good abstraction. Probably something thin around a SteelFolder or + * the like; we would contribute the functionality to easily move from a + * folder to the list of gloda messages in that folder, as well as the + * indexing preferences for that folder. + * + * @TODO folder noun and related abstraction + */ + NOUN_FOLDER: 100, + /** + * All messages belong to a conversation. See GlodaDataModel.jsm for the + * definition of the GlodaConversation class. + */ + NOUN_CONVERSATION: 101, + /** + * A one-to-one correspondence with underlying (indexed) nsIMsgDBHdr + * instances. See GlodaDataModel.jsm for the definition of the GlodaMessage class. + */ + NOUN_MESSAGE: 102, + /** + * Corresponds to a human being, who may have multiple electronic identities + * (a la NOUN_IDENTITY). There is no requirement for association with an + * address book contact, although when the address book contact exists, + * we want to be associated with it. See GlodaDataModel.jsm for the definition + * of the GlodaContact class. + */ + NOUN_CONTACT: 103, + /** + * A single identity of a contact, who may have one or more. E-mail accounts, + * instant messaging accounts, social network site accounts, etc. are each + * identities. See GlodaDataModel.jsm for the definition of the GlodaIdentity + * class. + */ + NOUN_IDENTITY: 104, + /** + * An attachment to a message. A message may have many different attachments. + */ + NOUN_ATTACHMENT: 105, + /** + * An account related to a message. A message can have only one account. + */ + NOUN_ACCOUNT: 106, + + /** + * Parameterized identities, for use in the from-me, to-me, cc-me optimization + * cases. Not for reuse without some thought. These nouns use the parameter + * to store the 'me' identity that we are talking about, and the value to + * store the identity of the other party. So in both the from-me and to-me + * cases involving 'me' and 'foo@bar', the 'me' identity is always stored via + * the attribute parameter, and the 'foo@bar' identity is always stored as + * the attribute value. See GlodaFundAttr.jsm for more information on this, but + * you probably shouldn't be touching this unless you are fundattr. + */ + NOUN_PARAM_IDENTITY: 200, + + kConstraintIdIn: 0, + kConstraintIn: 1, + kConstraintRanges: 2, + kConstraintEquals: 3, + kConstraintStringLike: 4, + kConstraintFulltext: 5, +}; diff --git a/comm/mailnews/db/gloda/modules/GlodaContent.jsm b/comm/mailnews/db/gloda/modules/GlodaContent.jsm new file mode 100644 index 0000000000..5f1daf5e9c --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaContent.jsm @@ -0,0 +1,285 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = [ + "GlodaContent", + "whittlerRegistry", + "mimeMsgToContentAndMeta", + "mimeMsgToContentSnippetAndMeta", +]; + +/** + * Given a MimeMsg and the corresponding folder, return the GlodaContent object. + * + * @param aMimeMsg: the MimeMessage instance + * @param folder: the nsIMsgDBFolder + * @returns an array containing the GlodaContent instance, and the meta dictionary + * that the Gloda content providers may have filled with useful data. + */ + +function mimeMsgToContentAndMeta(aMimeMsg, folder) { + let content = new GlodaContent(); + let meta = { subject: aMimeMsg.get("subject") }; + let bodyLines = aMimeMsg.coerceBodyToPlaintext(folder).split(/\r?\n/); + + for (let whittler of whittlerRegistry.getWhittlers()) { + whittler.contentWhittle(meta, bodyLines, content); + } + + return [content, meta]; +} + +/** + * Given a MimeMsg, return the whittled content string, suitable for summarizing + * a message. + * + * @param aMimeMsg: the MimeMessage instance + * @param folder: the nsIMsgDBFolder + * @param length: optional number of characters to trim the whittled content. + * If the actual length of the message is greater than |length|, then the return + * value is the first (length-1) characters with an ellipsis appended. + * @returns an array containing the text of the snippet, and the meta dictionary + * that the Gloda content providers may have filled with useful data. + */ + +function mimeMsgToContentSnippetAndMeta(aMimeMsg, folder, length) { + let [content, meta] = mimeMsgToContentAndMeta(aMimeMsg, folder); + + let text = content.getContentSnippet(length + 1); + if (length && text.length > length) { + text = text.substring(0, length - 1) + "\u2026"; // ellipsis + } + return [text, meta]; +} + +/** + * A registry of gloda providers that have contentWhittle() functions. + * used by mimeMsgToContentSnippet, but populated by the Gloda object as it's + * processing providers. + */ +function WhittlerRegistry() { + this._whittlers = []; +} + +WhittlerRegistry.prototype = { + /** + * Add a provider as a content whittler. + */ + registerWhittler(provider) { + this._whittlers.push(provider); + }, + /** + * get the list of content whittlers, sorted from the most specific to + * the most generic + */ + getWhittlers() { + // Use the concat() trick to avoid mutating the internal object and + // leaking an internal representation. + return this._whittlers.concat().reverse(); + }, +}; + +const whittlerRegistry = new WhittlerRegistry(); + +function GlodaContent() { + this._contentPriority = null; + this._producing = false; + this._hunks = []; +} + +GlodaContent.prototype = { + kPriorityBase: 0, + kPriorityPerfect: 100, + + kHunkMeta: 1, + kHunkQuoted: 2, + kHunkContent: 3, + + _resetContent() { + this._keysAndValues = []; + this._keysAndDeltaValues = []; + this._hunks = []; + this._curHunk = null; + }, + + /* ===== Consumer API ===== */ + hasContent() { + return this._contentPriority != null; + }, + + /** + * Return content suitable for snippet display. This means that no quoting + * or meta-data should be returned. + * + * @param aMaxLength The maximum snippet length desired. + */ + getContentSnippet(aMaxLength) { + let content = this.getContentString(); + if (aMaxLength) { + content = content.substring(0, aMaxLength); + } + return content; + }, + + getContentString(aIndexingPurposes) { + let data = ""; + for (let hunk of this._hunks) { + if (hunk.hunkType == this.kHunkContent) { + if (data) { + data += "\n" + hunk.data; + } else { + data = hunk.data; + } + } + } + + if (aIndexingPurposes) { + // append the values for indexing. we assume the keywords are cruft. + // this may be crazy, but things that aren't a science aren't an exact + // science. + for (let kv of this._keysAndValues) { + data += "\n" + kv[1]; + } + for (let kon of this._keysAndValues) { + data += "\n" + kon[1] + "\n" + kon[2]; + } + } + + return data; + }, + + /* ===== Producer API ===== */ + /** + * Called by a producer with the priority they believe their interpretation + * of the content comes in at. + * + * @returns true if we believe the producer's interpretation will be + * interesting and they should go ahead and generate events. We return + * false if we don't think they are interesting, in which case they should + * probably not issue calls to us, although we don't care. (We will + * ignore their calls if we return false, this allows the simplification + * of code that needs to run anyways.) + */ + volunteerContent(aPriority) { + if (this._contentPriority === null || this._contentPriority < aPriority) { + this._contentPriority = aPriority; + this._resetContent(); + this._producing = true; + return true; + } + this._producing = false; + return false; + }, + + keyValue(aKey, aValue) { + if (!this._producing) { + return; + } + + this._keysAndValues.push([aKey, aValue]); + }, + keyValueDelta(aKey, aOldValue, aNewValue) { + if (!this._producing) { + return; + } + + this._keysAndDeltaValues.push([aKey, aOldValue, aNewValue]); + }, + + /** + * Meta lines are lines that have to do with the content but are not the + * content and can generally be related to an attribute that has been derived + * and stored on the item. + * For example, a bugzilla bug may note that an attachment was created; this + * is not content and wouldn't be desired in a snippet, but is still + * potentially interesting meta-data. + * + * @param aLineOrLines The line or list of lines that are meta-data. + * @param aAttr The attribute this meta-data is associated with. + * @param aIndex If the attribute is non-singular, indicate the specific + * index of the item in the attribute's bound list that the meta-data + * is associated with. + */ + meta(aLineOrLines, aAttr, aIndex) { + if (!this._producing) { + return; + } + + let data; + if (typeof aLineOrLines == "string") { + data = aLineOrLines; + } else { + data = aLineOrLines.join("\n"); + } + + this._curHunk = { + hunkType: this.kHunkMeta, + attr: aAttr, + index: aIndex, + data, + }; + this._hunks.push(this._curHunk); + }, + /** + * Quoted lines reference previous messages or what not. + * + * @param aLineOrLiens The line or list of lines that are quoted. + * @param aDepth The depth of the quoting. + * @param aOrigin The item that originated the original content, if known. + * For example, perhaps a GlodaMessage? + * @param aTarget A reference to the location in the original content, if + * known. For example, the index of a line in a message or something? + */ + quoted(aLineOrLines, aDepth, aOrigin, aTarget) { + if (!this._producing) { + return; + } + + let data; + if (typeof aLineOrLines == "string") { + data = aLineOrLines; + } else { + data = aLineOrLines.join("\n"); + } + + if ( + !this._curHunk || + this._curHunk.hunkType != this.kHunkQuoted || + this._curHunk.depth != aDepth || + this._curHunk.origin != aOrigin || + this._curHunk.target != aTarget + ) { + this._curHunk = { + hunkType: this.kHunkQuoted, + data, + depth: aDepth, + origin: aOrigin, + target: aTarget, + }; + this._hunks.push(this._curHunk); + } else { + this._curHunk.data += "\n" + data; + } + }, + + content(aLineOrLines) { + if (!this._producing) { + return; + } + + let data; + if (typeof aLineOrLines == "string") { + data = aLineOrLines; + } else { + data = aLineOrLines.join("\n"); + } + + if (!this._curHunk || this._curHunk.hunkType != this.kHunkContent) { + this._curHunk = { hunkType: this.kHunkContent, data }; + this._hunks.push(this._curHunk); + } else { + this._curHunk.data += "\n" + data; + } + }, +}; diff --git a/comm/mailnews/db/gloda/modules/GlodaDataModel.jsm b/comm/mailnews/db/gloda/modules/GlodaDataModel.jsm new file mode 100644 index 0000000000..d9361c079c --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaDataModel.jsm @@ -0,0 +1,1020 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = [ + "GlodaAttributeDBDef", + "GlodaAccount", + "GlodaConversation", + "GlodaFolder", + "GlodaMessage", + "GlodaContact", + "GlodaIdentity", + "GlodaAttachment", +]; + +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); +const { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); +var LOG = console.createInstance({ + prefix: "gloda.datamodel", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", +}); + +/** + * @class Represents a gloda attribute definition's DB form. This class + * stores the information in the database relating to this attribute + * definition. Access its attrDef attribute to get at the really juicy data. + * This main interesting thing this class does is serve as the keeper of the + * mapping from parameters to attribute ids in the database if this is a + * parameterized attribute. + */ +function GlodaAttributeDBDef( + aDatastore, + aID, + aCompoundName, + aAttrType, + aPluginName, + aAttrName +) { + // _datastore is now set on the prototype by GlodaDatastore + this._id = aID; + this._compoundName = aCompoundName; + this._attrType = aAttrType; + this._pluginName = aPluginName; + this._attrName = aAttrName; + + this.attrDef = null; + + /** Map parameter values to the underlying database id. */ + this._parameterBindings = {}; +} + +GlodaAttributeDBDef.prototype = { + // set by GlodaDatastore + _datastore: null, + get id() { + return this._id; + }, + get attributeName() { + return this._attrName; + }, + + get parameterBindings() { + return this._parameterBindings; + }, + + /** + * Bind a parameter value to the attribute definition, allowing use of the + * attribute-parameter as an attribute. + * + * @returns + */ + bindParameter(aValue) { + // people probably shouldn't call us with null, but handle it + if (aValue == null) { + return this._id; + } + if (aValue in this._parameterBindings) { + return this._parameterBindings[aValue]; + } + // no database entry exists if we are here, so we must create it... + let id = this._datastore._createAttributeDef( + this._attrType, + this._pluginName, + this._attrName, + aValue + ); + this._parameterBindings[aValue] = id; + this._datastore.reportBinding(id, this, aValue); + return id; + }, + + /** + * Given a list of values, return a list (regardless of plurality) of + * database-ready [attribute id, value] tuples. This is intended to be used + * to directly convert the value of a property on an object that corresponds + * to a bound attribute. + * + * @param {Array} aInstanceValues An array of instance values regardless of + * whether or not the attribute is singular. + */ + convertValuesToDBAttributes(aInstanceValues) { + let nounDef = this.attrDef.objectNounDef; + let dbAttributes = []; + if (nounDef.usesParameter) { + for (let instanceValue of aInstanceValues) { + let [param, dbValue] = nounDef.toParamAndValue(instanceValue); + dbAttributes.push([this.bindParameter(param), dbValue]); + } + } else if ("toParamAndValue" in nounDef) { + // Not generating any attributes is ok. This basically means the noun is + // just an informative property on the Gloda Message and has no real + // indexing purposes. + for (let instanceValue of aInstanceValues) { + dbAttributes.push([ + this._id, + nounDef.toParamAndValue(instanceValue)[1], + ]); + } + } + return dbAttributes; + }, + + toString() { + return this._compoundName; + }, +}; + +var GlodaHasAttributesMixIn = { + *enumerateAttributes() { + let nounDef = this.NOUN_DEF; + for (let key in this) { + let value = this[key]; + let attrDef = nounDef.attribsByBoundName[key]; + // we expect to not have attributes for underscore prefixed values (those + // are managed by the instance's logic. we also want to not explode + // should someone crap other values in there, we get both birds with this + // one stone. + if (attrDef === undefined) { + continue; + } + if (attrDef.singular) { + // ignore attributes with null values + if (value != null) { + yield [attrDef, [value]]; + } + } else if (value.length) { + // ignore attributes with no values + yield [attrDef, value]; + } + } + }, + + domContribute(aDomNode) { + let nounDef = this.NOUN_DEF; + for (let attrName in nounDef.domExposeAttribsByBoundName) { + let attr = nounDef.domExposeAttribsByBoundName[attrName]; + if (this[attrName]) { + aDomNode.setAttribute(attr.domExpose, this[attrName]); + } + } + }, +}; + +function MixIn(aConstructor, aMixIn) { + let proto = aConstructor.prototype; + for (let [name, func] of Object.entries(aMixIn)) { + if (name.startsWith("get_")) { + proto.__defineGetter__(name.substring(4), func); + } else { + proto[name] = func; + } + } +} + +/** + * @class A gloda wrapper around nsIMsgIncomingServer. + */ +function GlodaAccount(aIncomingServer) { + this._incomingServer = aIncomingServer; +} + +GlodaAccount.prototype = { + NOUN_ID: 106, + get id() { + return this._incomingServer.key; + }, + get name() { + return this._incomingServer.prettyName; + }, + get incomingServer() { + return this._incomingServer; + }, + toString() { + return "Account: " + this.id; + }, + + toLocaleString() { + return this.name; + }, +}; + +/** + * @class A gloda conversation (thread) exists so that messages can belong. + */ +function GlodaConversation( + aDatastore, + aID, + aSubject, + aOldestMessageDate, + aNewestMessageDate +) { + // _datastore is now set on the prototype by GlodaDatastore + this._id = aID; + this._subject = aSubject; + this._oldestMessageDate = aOldestMessageDate; + this._newestMessageDate = aNewestMessageDate; +} + +GlodaConversation.prototype = { + NOUN_ID: GlodaConstants.NOUN_CONVERSATION, + // set by GlodaDatastore + _datastore: null, + get id() { + return this._id; + }, + get subject() { + return this._subject; + }, + get oldestMessageDate() { + return this._oldestMessageDate; + }, + get newestMessageDate() { + return this._newestMessageDate; + }, + + getMessagesCollection(aListener, aData) { + let query = new GlodaMessage.prototype.NOUN_DEF.queryClass(); + query.conversation(this._id).orderBy("date"); + return query.getCollection(aListener, aData); + }, + + toString() { + return "Conversation:" + this._id; + }, + + toLocaleString() { + return this._subject; + }, +}; + +function GlodaFolder( + aDatastore, + aID, + aURI, + aDirtyStatus, + aPrettyName, + aIndexingPriority +) { + // _datastore is now set by GlodaDatastore + this._id = aID; + this._uri = aURI; + this._dirtyStatus = aDirtyStatus; + this._prettyName = aPrettyName; + this._account = null; + this._activeIndexing = false; + this._indexingPriority = aIndexingPriority; + this._deleted = false; + this._compacting = false; +} + +GlodaFolder.prototype = { + NOUN_ID: GlodaConstants.NOUN_FOLDER, + // set by GlodaDatastore + _datastore: null, + + /** The folder is believed to be up-to-date */ + kFolderClean: 0, + /** The folder has some un-indexed or dirty messages */ + kFolderDirty: 1, + /** The folder needs to be entirely re-indexed, regardless of the flags on + * the messages in the folder. This state will be downgraded to dirty */ + kFolderFilthy: 2, + + _kFolderDirtyStatusMask: 0x7, + /** + * The (local) folder has been compacted and all of its message keys are + * potentially incorrect. This is not a possible state for IMAP folders + * because their message keys are based on UIDs rather than offsets into + * the mbox file. + */ + _kFolderCompactedFlag: 0x8, + + /** The folder should never be indexed. */ + kIndexingNeverPriority: -1, + /** The lowest priority assigned to a folder. */ + kIndexingLowestPriority: 0, + /** The highest priority assigned to a folder. */ + kIndexingHighestPriority: 100, + + /** The indexing priority for a folder if no other priority is assigned. */ + kIndexingDefaultPriority: 20, + /** Folders marked check new are slightly more important I guess. */ + kIndexingCheckNewPriority: 30, + /** Favorite folders are more interesting to the user, presumably. */ + kIndexingFavoritePriority: 40, + /** The indexing priority for inboxes. */ + kIndexingInboxPriority: 50, + /** The indexing priority for sent mail folders. */ + kIndexingSentMailPriority: 60, + + get id() { + return this._id; + }, + get uri() { + return this._uri; + }, + get dirtyStatus() { + return this._dirtyStatus & this._kFolderDirtyStatusMask; + }, + /** + * Mark a folder as dirty if it was clean. Do nothing if it was already dirty + * or filthy. For use by GlodaMsgIndexer only. And maybe rkent and his + * marvelous extensions. + */ + _ensureFolderDirty() { + if (this.dirtyStatus == this.kFolderClean) { + this._dirtyStatus = + (this.kFolderDirty & this._kFolderDirtyStatusMask) | + (this._dirtyStatus & ~this._kFolderDirtyStatusMask); + this._datastore.updateFolderDirtyStatus(this); + } + }, + /** + * Definitely for use only by GlodaMsgIndexer to downgrade the dirty status of + * a folder. + */ + _downgradeDirtyStatus(aNewStatus) { + if (this.dirtyStatus != aNewStatus) { + this._dirtyStatus = + (aNewStatus & this._kFolderDirtyStatusMask) | + (this._dirtyStatus & ~this._kFolderDirtyStatusMask); + this._datastore.updateFolderDirtyStatus(this); + } + }, + /** + * Indicate whether this folder is currently being compacted. The + * |GlodaMsgIndexer| keeps this in-memory-only value up-to-date. + */ + get compacting() { + return this._compacting; + }, + /** + * Set whether this folder is currently being compacted. This is really only + * for the |GlodaMsgIndexer| to set. + */ + set compacting(aCompacting) { + this._compacting = aCompacting; + }, + /** + * Indicate whether this folder was compacted and has not yet been + * compaction processed. + */ + get compacted() { + return Boolean(this._dirtyStatus & this._kFolderCompactedFlag); + }, + /** + * For use only by GlodaMsgIndexer to set/clear the compaction state of this + * folder. + */ + _setCompactedState(aCompacted) { + if (this.compacted != aCompacted) { + if (aCompacted) { + this._dirtyStatus |= this._kFolderCompactedFlag; + } else { + this._dirtyStatus &= ~this._kFolderCompactedFlag; + } + this._datastore.updateFolderDirtyStatus(this); + } + }, + + get name() { + return this._prettyName; + }, + toString() { + return "Folder:" + this._id; + }, + + toLocaleString() { + let xpcomFolder = this.getXPCOMFolder(this.kActivityFolderOnlyNoData); + if (!xpcomFolder) { + return this._prettyName; + } + return ( + xpcomFolder.prettyName + " (" + xpcomFolder.rootFolder.prettyName + ")" + ); + }, + + get indexingPriority() { + return this._indexingPriority; + }, + + /** We are going to index this folder. */ + kActivityIndexing: 0, + /** Asking for the folder to perform header retrievals. */ + kActivityHeaderRetrieval: 1, + /** We only want the folder for its metadata but are not going to open it. */ + kActivityFolderOnlyNoData: 2, + + /** Is this folder known to be actively used for indexing? */ + _activeIndexing: false, + /** Get our indexing status. */ + get indexing() { + return this._activeIndexing; + }, + /** + * Set our indexing status. Normally, this will be enabled through passing + * an activity type of kActivityIndexing (which will set us), but we will + * still need to be explicitly disabled by the indexing code. + * When disabling indexing, we will call forgetFolderIfUnused to take care of + * shutting things down. + * We are not responsible for committing changes to the message database! + * That is on you! + */ + set indexing(aIndexing) { + this._activeIndexing = aIndexing; + }, + + /** + * Retrieve the nsIMsgFolder instance corresponding to this folder, providing + * an explanation of why you are requesting it for tracking/cleanup purposes. + * + * @param aActivity One of the kActivity* constants. If you pass + * kActivityIndexing, we will set indexing for you, but you will need to + * clear it when you are done. + * @returns The nsIMsgFolder if available, null on failure. + */ + getXPCOMFolder(aActivity) { + switch (aActivity) { + case this.kActivityIndexing: + // mark us as indexing, but don't bother with live tracking. we do + // that independently and only for header retrieval. + this.indexing = true; + break; + case this.kActivityHeaderRetrieval: + case this.kActivityFolderOnlyNoData: + // we don't have to do anything here. + break; + } + + return MailServices.folderLookup.getFolderForURL(this.uri); + }, + + /** + * Retrieve a GlodaAccount instance corresponding to this folder. + * + * @returns The GlodaAccount instance. + */ + getAccount() { + if (!this._account) { + let msgFolder = this.getXPCOMFolder(this.kActivityFolderOnlyNoData); + this._account = new GlodaAccount(msgFolder.server); + } + return this._account; + }, +}; + +/** + * @class A message representation. + */ +function GlodaMessage( + aDatastore, + aID, + aFolderID, + aMessageKey, + aConversationID, + aConversation, + aDate, + aHeaderMessageID, + aDeleted, + aJsonText, + aNotability, + aSubject, + aIndexedBodyText, + aAttachmentNames +) { + // _datastore is now set on the prototype by GlodaDatastore + this._id = aID; + this._folderID = aFolderID; + this._messageKey = aMessageKey; + this._conversationID = aConversationID; + this._conversation = aConversation; + this._date = aDate; + this._headerMessageID = aHeaderMessageID; + this._jsonText = aJsonText; + this._notability = aNotability; + this._subject = aSubject; + this._indexedBodyText = aIndexedBodyText; + this._attachmentNames = aAttachmentNames; + + // only set _deleted if we're deleted, otherwise the undefined does our + // speaking for us. + if (aDeleted) { + this._deleted = aDeleted; + } +} + +GlodaMessage.prototype = { + NOUN_ID: GlodaConstants.NOUN_MESSAGE, + // set by GlodaDatastore + _datastore: null, + get id() { + return this._id; + }, + get folderID() { + return this._folderID; + }, + get messageKey() { + return this._messageKey; + }, + get conversationID() { + return this._conversationID; + }, + // conversation is special + get headerMessageID() { + return this._headerMessageID; + }, + get notability() { + return this._notability; + }, + set notability(aNotability) { + this._notability = aNotability; + }, + + get subject() { + return this._subject; + }, + get indexedBodyText() { + return this._indexedBodyText; + }, + get attachmentNames() { + return this._attachmentNames; + }, + + get date() { + return this._date; + }, + set date(aNewDate) { + this._date = aNewDate; + }, + + get folder() { + // XXX due to a deletion bug it is currently possible to get in a state + // where we have an illegal folderID value. This will result in an + // exception. As a workaround, let's just return null in that case. + try { + if (this._folderID != null) { + return this._datastore._mapFolderID(this._folderID); + } + } catch (ex) {} + return null; + }, + get folderURI() { + // XXX just like for folder, handle mapping failures and return null + try { + if (this._folderID != null) { + return this._datastore._mapFolderID(this._folderID).uri; + } + } catch (ex) {} + return null; + }, + get account() { + // XXX due to a deletion bug it is currently possible to get in a state + // where we have an illegal folderID value. This will result in an + // exception. As a workaround, let's just return null in that case. + try { + if (this._folderID == null) { + return null; + } + let folder = this._datastore._mapFolderID(this._folderID); + return folder.getAccount(); + } catch (ex) {} + return null; + }, + get conversation() { + return this._conversation; + }, + + toString() { + // uh, this is a tough one... + return "Message:" + this._id; + }, + + _clone() { + return new GlodaMessage( + /* datastore */ null, + this._id, + this._folderID, + this._messageKey, + this._conversationID, + this._conversation, + this._date, + this._headerMessageID, + "_deleted" in this ? this._deleted : undefined, + "_jsonText" in this ? this._jsonText : undefined, + this._notability, + this._subject, + this._indexedBodyText, + this._attachmentNames + ); + }, + + /** + * Provide a means of propagating changed values on our clone back to + * ourselves. This is required because of an object identity trick gloda + * does; when indexing an already existing object, all mutations happen on + * a clone of the existing object so that + */ + _declone(aOther) { + if ("_content" in aOther) { + this._content = aOther._content; + } + + // The _indexedAuthor/_indexedRecipients fields don't get updated on + // fulltext update so we don't need to propagate. + this._indexedBodyText = aOther._indexedBodyText; + this._attachmentNames = aOther._attachmentNames; + }, + + /** + * Mark this message as a ghost. Ghosts are characterized by having no folder + * id and no message key. They also are not deleted or they would be of + * absolutely no use to us. + * + * These changes are suitable for persistence. + */ + _ghost() { + this._folderID = null; + this._messageKey = null; + if ("_deleted" in this) { + delete this._deleted; + } + }, + + /** + * Are we a ghost (which implies not deleted)? We are not a ghost if we have + * a definite folder location (we may not know our message key in the case + * of IMAP moves not fully completed) and are not deleted. + */ + get _isGhost() { + return this._folderID == null && !this._isDeleted; + }, + + /** + * If we were dead, un-dead us. + */ + _ensureNotDeleted() { + if ("_deleted" in this) { + delete this._deleted; + } + }, + + /** + * Are we deleted? This is private because deleted gloda messages are not + * visible to non-core-gloda code. + */ + get _isDeleted() { + return "_deleted" in this && this._deleted; + }, + + /** + * Trash this message's in-memory representation because it should no longer + * be reachable by any code. The database record is gone, it's not coming + * back. + */ + _objectPurgedMakeYourselfUnpleasant() { + this._id = null; + this._folderID = null; + this._messageKey = null; + this._conversationID = null; + this._conversation = null; + this.date = null; + this._headerMessageID = null; + }, + + /** + * Return the underlying nsIMsgDBHdr from the folder storage for this, or + * null if the message does not exist for one reason or another. We may log + * to our logger in the failure cases. + * + * This method no longer caches the result, so if you need to hold onto it, + * hold onto it. + * + * In the process of retrieving the underlying message header, we may have to + * open the message header database associated with the folder. This may + * result in blocking while the load happens, so you may want to try and find + * an alternate way to initiate the load before calling us. + * We provide hinting to the GlodaDatastore via the GlodaFolder so that it + * knows when it's a good time for it to go and detach from the database. + * + * @returns The nsIMsgDBHdr associated with this message if available, null on + * failure. + */ + get folderMessage() { + if (this._folderID === null || this._messageKey === null) { + return null; + } + + // XXX like for folder and folderURI, return null if we can't map the folder + let glodaFolder; + try { + glodaFolder = this._datastore._mapFolderID(this._folderID); + } catch (ex) { + return null; + } + let folder = glodaFolder.getXPCOMFolder( + glodaFolder.kActivityHeaderRetrieval + ); + if (folder) { + let folderMessage; + try { + folderMessage = folder.GetMessageHeader(this._messageKey); + } catch (ex) { + folderMessage = null; + } + if (folderMessage !== null) { + // verify the message-id header matches what we expect... + if (folderMessage.messageId != this._headerMessageID) { + LOG.info( + "Message with message key " + + this._messageKey + + " in folder '" + + folder.URI + + "' does not match expected " + + "header! (" + + this._headerMessageID + + " expected, got " + + folderMessage.messageId + + ")" + ); + folderMessage = null; + } + } + return folderMessage; + } + + // this only gets logged if things have gone very wrong. we used to throw + // here, but it's unlikely our caller can do anything more meaningful than + // treating this as a disappeared message. + LOG.info( + "Unable to locate folder message for: " + + this._folderID + + ":" + + this._messageKey + ); + return null; + }, + get folderMessageURI() { + let folderMessage = this.folderMessage; + if (folderMessage) { + return folderMessage.folder.getUriForMsg(folderMessage); + } + return null; + }, +}; +MixIn(GlodaMessage, GlodaHasAttributesMixIn); + +/** + * @class Contacts correspond to people (one per person), and may own multiple + * identities (e-mail address, IM account, etc.) + */ +function GlodaContact( + aDatastore, + aID, + aDirectoryUUID, + aContactUUID, + aName, + aPopularity, + aFrecency, + aJsonText +) { + // _datastore set on the prototype by GlodaDatastore + this._id = aID; + this._directoryUUID = aDirectoryUUID; + this._contactUUID = aContactUUID; + this._name = aName; + this._popularity = aPopularity; + this._frecency = aFrecency; + if (aJsonText) { + this._jsonText = aJsonText; + } + + this._identities = null; +} + +GlodaContact.prototype = { + NOUN_ID: GlodaConstants.NOUN_CONTACT, + // set by GlodaDatastore + _datastore: null, + + get id() { + return this._id; + }, + get directoryUUID() { + return this._directoryUUID; + }, + get contactUUID() { + return this._contactUUID; + }, + get name() { + return this._name; + }, + set name(aName) { + this._name = aName; + }, + + get popularity() { + return this._popularity; + }, + set popularity(aPopularity) { + this._popularity = aPopularity; + this.dirty = true; + }, + + get frecency() { + return this._frecency; + }, + set frecency(aFrecency) { + this._frecency = aFrecency; + this.dirty = true; + }, + + get identities() { + return this._identities; + }, + + toString() { + return "Contact:" + this._id; + }, + + get accessibleLabel() { + return "Contact: " + this._name; + }, + + _clone() { + return new GlodaContact( + /* datastore */ null, + this._id, + this._directoryUUID, + this._contactUUID, + this._name, + this._popularity, + this._frecency + ); + }, +}; +MixIn(GlodaContact, GlodaHasAttributesMixIn); + +/** + * @class A specific means of communication for a contact. + */ +function GlodaIdentity( + aDatastore, + aID, + aContactID, + aContact, + aKind, + aValue, + aDescription, + aIsRelay +) { + // _datastore set on the prototype by GlodaDatastore + this._id = aID; + this._contactID = aContactID; + this._contact = aContact; + this._kind = aKind; + this._value = aValue; + this._description = aDescription; + this._isRelay = aIsRelay; + // Cached indication of whether there is an address book card for this + // identity. We keep this up-to-date via address book listener + // notifications in |GlodaABIndexer|. + this._hasAddressBookCard = undefined; +} + +GlodaIdentity.prototype = { + NOUN_ID: GlodaConstants.NOUN_IDENTITY, + // set by GlodaDatastore + _datastore: null, + get id() { + return this._id; + }, + get contactID() { + return this._contactID; + }, + get contact() { + return this._contact; + }, + get kind() { + return this._kind; + }, + get value() { + return this._value; + }, + get description() { + return this._description; + }, + get isRelay() { + return this._isRelay; + }, + + get uniqueValue() { + return this._kind + "@" + this._value; + }, + + toString() { + return "Identity:" + this._kind + ":" + this._value; + }, + + toLocaleString() { + if (this.contact.name == this.value) { + return this.value; + } + return this.contact.name + " : " + this.value; + }, + + get abCard() { + // for our purposes, the address book only speaks email + if (this._kind != "email") { + return false; + } + let card = MailServices.ab.cardForEmailAddress(this._value); + this._hasAddressBookCard = card != null; + return card; + }, + + /** + * Indicates whether we have an address book card for this identity. This + * value is cached once looked-up and kept up-to-date by |GlodaABIndexer| + * and its notifications. + */ + get inAddressBook() { + if (this._hasAddressBookCard !== undefined) { + return this._hasAddressBookCard; + } + return (this.abCard && true) || false; + }, +}; + +/** + * An attachment, with as much information as we can gather on it + */ +function GlodaAttachment( + aGlodaMessage, + aName, + aContentType, + aSize, + aPart, + aExternalUrl, + aIsExternal +) { + // _datastore set on the prototype by GlodaDatastore + this._glodaMessage = aGlodaMessage; + this._name = aName; + this._contentType = aContentType; + this._size = aSize; + this._part = aPart; + this._externalUrl = aExternalUrl; + this._isExternal = aIsExternal; +} + +GlodaAttachment.prototype = { + NOUN_ID: GlodaConstants.NOUN_ATTACHMENT, + // set by GlodaDatastore + get name() { + return this._name; + }, + get contentType() { + return this._contentType; + }, + get size() { + return this._size; + }, + get url() { + if (this.isExternal) { + return this._externalUrl; + } + + let uri = this._glodaMessage.folderMessageURI; + if (!uri) { + throw new Error( + "The message doesn't exist anymore, unable to rebuild attachment URL" + ); + } + let msgService = MailServices.messageServiceFromURI(uri); + let neckoURL = msgService.getUrlForUri(uri); + let url = neckoURL.spec; + let hasParamAlready = url.match(/\?[a-z]+=[^\/]+$/); + let sep = hasParamAlready ? "&" : "?"; + return ( + url + + sep + + "part=" + + this._part + + "&filename=" + + encodeURIComponent(this._name) + ); + }, + get isExternal() { + return this._isExternal; + }, + + toString() { + return "attachment: " + this._name + ":" + this._contentType; + }, +}; diff --git a/comm/mailnews/db/gloda/modules/GlodaDatabind.jsm b/comm/mailnews/db/gloda/modules/GlodaDatabind.jsm new file mode 100644 index 0000000000..eda41cb91a --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaDatabind.jsm @@ -0,0 +1,210 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["GlodaDatabind"]; + +function GlodaDatabind(aNounDef, aDatastore) { + this._nounDef = aNounDef; + this._tableName = aNounDef.tableName; + this._tableDef = aNounDef.schema; + this._datastore = aDatastore; + this._log = console.createInstance({ + prefix: `gloda.databind.${this._tableName}`, + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }); + + // process the column definitions and make sure they have an attribute mapping + for (let [iColDef, coldef] of this._tableDef.columns.entries()) { + // default to the other dude's thing. + if (coldef.length < 3) { + coldef[2] = coldef[0]; + } + if (coldef[0] == "id") { + this._idAttr = coldef[2]; + } + // colDef[3] is the index of us in our SQL bindings, storage-numbering + coldef[3] = iColDef; + } + + // XXX This is obviously synchronous and not perfectly async. Since we are + // doing this, we don't actually need to move to ordinal binding below + // since we could just as well compel creation of the name map and thereby + // avoid ever acquiring the mutex after bootstrap. + // However, this specific check can be cleverly avoided with future work. + // Namely, at startup we can scan for extension-defined tables and get their + // maximum id so that we don't need to do it here. The table will either + // be brand new and thus have a maximum id of 1 or we will already know it + // because of that scan. + this._nextId = 1; + let stmt = this._datastore._createSyncStatement( + "SELECT MAX(id) FROM " + this._tableName, + true + ); + if (stmt.executeStep()) { + // no chance of this SQLITE_BUSY on this call + this._nextId = stmt.getInt64(0) + 1; + } + stmt.finalize(); + + let insertColumns = []; + let insertValues = []; + let updateItems = []; + for (let [iColDef, coldef] of this._tableDef.columns.entries()) { + let column = coldef[0]; + let placeholder = "?" + (iColDef + 1); + insertColumns.push(column); + insertValues.push(placeholder); + if (column != "id") { + updateItems.push(column + " = " + placeholder); + } + } + + let insertSql = + "INSERT INTO " + + this._tableName + + " (" + + insertColumns.join(", ") + + ") VALUES (" + + insertValues.join(", ") + + ")"; + + // For the update, we want the 'id' to be a constraint and not a value + // that gets set... + let updateSql = + "UPDATE " + + this._tableName + + " SET " + + updateItems.join(", ") + + " WHERE id = ?1"; + this._insertStmt = aDatastore._createAsyncStatement(insertSql); + this._updateStmt = aDatastore._createAsyncStatement(updateSql); + + if (this._tableDef.fulltextColumns) { + for (let [iColDef, coldef] of this._tableDef.fulltextColumns.entries()) { + if (coldef.length < 3) { + coldef[2] = coldef[0]; + } + // colDef[3] is the index of us in our SQL bindings, storage-numbering + coldef[3] = iColDef + 1; + } + + let insertColumns = []; + let insertValues = []; + let updateItems = []; + for (var [iColDef, coldef] of this._tableDef.fulltextColumns.entries()) { + let column = coldef[0]; + // +2 instead of +1 because docid is implied + let placeholder = "?" + (iColDef + 2); + insertColumns.push(column); + insertValues.push(placeholder); + if (column != "id") { + updateItems.push(column + " = " + placeholder); + } + } + + let insertFulltextSql = + "INSERT INTO " + + this._tableName + + "Text (docid," + + insertColumns.join(", ") + + ") VALUES (?1," + + insertValues.join(", ") + + ")"; + + // For the update, we want the 'id' to be a constraint and not a value + // that gets set... + let updateFulltextSql = + "UPDATE " + + this._tableName + + "Text SET " + + updateItems.join(", ") + + " WHERE docid = ?1"; + + this._insertFulltextStmt = + aDatastore._createAsyncStatement(insertFulltextSql); + this._updateFulltextStmt = + aDatastore._createAsyncStatement(updateFulltextSql); + } +} + +GlodaDatabind.prototype = { + /** + * Perform appropriate binding coercion based on the schema provided to us. + * Although we end up effectively coercing JS Date objects to numeric values, + * we should not be provided with JS Date objects! There is no way for us + * to know to turn them back into JS Date objects on the way out. + * Additionally, there is the small matter of storage's bias towards + * PRTime representations which may not always be desirable. + */ + bindByType(aStmt, aColDef, aValue) { + aStmt.bindByIndex(aColDef[3], aValue); + }, + + objFromRow(aRow) { + let getVariant = this._datastore._getVariant; + let obj = new this._nounDef.class(); + for (let [iCol, colDef] of this._tableDef.columns.entries()) { + obj[colDef[2]] = getVariant(aRow, iCol); + } + return obj; + }, + + objInsert(aThing) { + let bindByType = this.bindByType; + if (!aThing[this._idAttr]) { + aThing[this._idAttr] = this._nextId++; + } + + let stmt = this._insertStmt; + for (let colDef of this._tableDef.columns) { + bindByType(stmt, colDef, aThing[colDef[2]]); + } + + stmt.executeAsync(this._datastore.trackAsync()); + + if (this._insertFulltextStmt) { + stmt = this._insertFulltextStmt; + stmt.bindByIndex(0, aThing[this._idAttr]); + for (let colDef of this._tableDef.fulltextColumns) { + bindByType(stmt, colDef, aThing[colDef[2]]); + } + stmt.executeAsync(this._datastore.trackAsync()); + } + }, + + objUpdate(aThing) { + let bindByType = this.bindByType; + let stmt = this._updateStmt; + // note, we specially bound the location of 'id' for the insert, but since + // we're using named bindings, there is nothing special about setting it + for (let colDef of this._tableDef.columns) { + bindByType(stmt, colDef, aThing[colDef[2]]); + } + stmt.executeAsync(this._datastore.trackAsync()); + + if (this._updateFulltextStmt) { + stmt = this._updateFulltextStmt; + // fulltextColumns doesn't include id/docid, need to explicitly set it + stmt.bindByIndex(0, aThing[this._idAttr]); + for (let colDef of this._tableDef.fulltextColumns) { + bindByType(stmt, colDef, aThing[colDef[2]]); + } + stmt.executeAsync(this._datastore.trackAsync()); + } + }, + + adjustAttributes(...aArgs) { + // just proxy the call over to the datastore... we have to do this for + // 'this' reasons. we don't refactor things to avoid this because it does + // make some sense to have all the methods exposed from a single object, + // even if the implementation does live elsewhere. + return this._datastore.adjustAttributes(...aArgs); + }, + + // also proxied... + queryFromQuery(...aArgs) { + return this._datastore.queryFromQuery(...aArgs); + }, +}; diff --git a/comm/mailnews/db/gloda/modules/GlodaDatastore.jsm b/comm/mailnews/db/gloda/modules/GlodaDatastore.jsm new file mode 100644 index 0000000000..1391ceaaf2 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaDatastore.jsm @@ -0,0 +1,4402 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file looks to Myk Melez <myk@mozilla.org>'s Mozilla Labs snowl + * project's (https://hg.mozilla.org/labs/snowl/) modules/GlodaDatastore.jsm + * for inspiration and idioms (and also a name :). + */ + +const EXPORTED_SYMBOLS = ["GlodaDatastore"]; + +const { + GlodaAttributeDBDef, + GlodaConversation, + GlodaFolder, + GlodaMessage, + GlodaContact, + GlodaIdentity, +} = ChromeUtils.import("resource:///modules/gloda/GlodaDataModel.jsm"); +const { GlodaDatabind } = ChromeUtils.import( + "resource:///modules/gloda/GlodaDatabind.jsm" +); +const { GlodaCollection, GlodaCollectionManager } = ChromeUtils.import( + "resource:///modules/gloda/Collection.jsm" +); +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); + +var MIN_CACHE_SIZE = 8 * 1048576; +var MAX_CACHE_SIZE = 64 * 1048576; +var MEMSIZE_FALLBACK_BYTES = 256 * 1048576; + +var PCH_LOG = console.createInstance({ + prefix: "gloda.ds.pch", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", +}); + +/** + * Commit async handler; hands off the notification to + * |GlodaDatastore._asyncCompleted|. + */ +function PostCommitHandler(aCallbacks) { + this.callbacks = aCallbacks; + GlodaDatastore._pendingAsyncStatements++; +} + +PostCommitHandler.prototype = { + handleResult(aResultSet) {}, + + handleError(aError) { + PCH_LOG.error("database error:" + aError); + }, + + handleCompletion(aReason) { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) { + return; + } + + if (aReason == Ci.mozIStorageStatementCallback.REASON_FINISHED) { + for (let callback of this.callbacks) { + try { + callback(); + } catch (ex) { + PCH_LOG.error( + "PostCommitHandler callback (" + + ex.fileName + + ":" + + ex.lineNumber + + ") threw: " + + ex + ); + } + } + } + try { + GlodaDatastore._asyncCompleted(); + } catch (e) { + PCH_LOG.error("Exception in handleCompletion:", e); + } + }, +}; + +var QFQ_LOG = console.createInstance({ + prefix: "gloda.ds.qfq", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", +}); + +/** + * Singleton collection listener used by |QueryFromQueryCallback| to assist in + * the loading of referenced noun instances. Which is to say, messages have + * identities (specific e-mail addresses) associated with them via attributes. + * And these identities in turn reference / are referenced by contacts (the + * notion of a person). + * + * This listener is primarily concerned with fixing up the references in each + * noun instance to its referenced instances once they have been loaded. It + * also deals with caching so that our identity invariant is maintained: user + * code should only ever see one distinct instance of a thing at a time. + */ +var QueryFromQueryResolver = { + onItemsAdded(aIgnoredItems, aCollection, aFake) { + let originColl = aCollection.dataStack + ? aCollection.dataStack.pop() + : aCollection.data; + // QFQ_LOG.debug("QFQR: originColl: " + originColl); + if (aCollection.completionShifter) { + aCollection.completionShifter.push(originColl); + } else { + aCollection.completionShifter = [originColl]; + } + + if (!aFake) { + originColl.deferredCount--; + originColl.resolvedCount++; + } + + // bail if we are still pending on some other load completion + if (originColl.deferredCount > 0) { + // QFQ_LOG.debug("QFQR: bailing " + originColl._nounDef.name); + return; + } + + let referencesByNounID = originColl.masterCollection.referencesByNounID; + let inverseReferencesByNounID = + originColl.masterCollection.inverseReferencesByNounID; + + if (originColl.pendingItems) { + for (let item of originColl.pendingItems) { + // QFQ_LOG.debug("QFQR: loading deferred " + item.NOUN_ID + ":" + item.id); + GlodaDatastore.loadNounDeferredDeps( + item, + referencesByNounID, + inverseReferencesByNounID + ); + } + + // we need to consider the possibility that we are racing a collection very + // much like our own. as such, this means we need to perform cache + // unification as our last step. + GlodaCollectionManager.cacheLoadUnify( + originColl._nounDef.id, + originColl.pendingItems, + false + ); + + // just directly tell the collection about the items. we know the query + // matches (at least until we introduce predicates that we cannot express + // in SQL.) + // QFQ_LOG.debug(" QFQR: about to trigger listener: " + originColl._listener + + // "with collection: " + originColl._nounDef.name); + originColl._onItemsAdded(originColl.pendingItems); + delete originColl.pendingItems; + delete originColl._pendingIdMap; + } + }, + onItemsModified() {}, + onItemsRemoved() {}, + onQueryCompleted(aCollection) { + let originColl = aCollection.completionShifter + ? aCollection.completionShifter.shift() + : aCollection.data; + // QFQ_LOG.debug(" QFQR about to trigger completion with collection: " + + // originColl._nounDef.name); + if (originColl.deferredCount <= 0) { + originColl._onQueryCompleted(); + } + }, +}; + +/** + * Handles the results from a GlodaDatastore.queryFromQuery call in cooperation + * with the |QueryFromQueryResolver| collection listener. We do a lot of + * legwork related to satisfying references to other noun instances on the + * noun instances the user directly queried. Messages reference identities + * reference contacts which in turn (implicitly) reference identities again. + * We have to spin up those other queries and stitch things together. + * + * While the code is generally up to the existing set of tasks it is called to + * handle, I would not be surprised for it to fall down if things get more + * complex. Some of the logic here 'evolved' a bit and could benefit from + * additional documentation and a fresh go-through. + */ +function QueryFromQueryCallback(aStatement, aNounDef, aCollection) { + this.statement = aStatement; + this.nounDef = aNounDef; + this.collection = aCollection; + + // QFQ_LOG.debug("Creating QFQCallback for noun: " + aNounDef.name); + + // the master collection holds the referencesByNounID + this.referencesByNounID = {}; + this.masterReferencesByNounID = + this.collection.masterCollection.referencesByNounID; + this.inverseReferencesByNounID = {}; + this.masterInverseReferencesByNounID = + this.collection.masterCollection.inverseReferencesByNounID; + // we need to contribute our references as we load things; we need this + // because of the potential for circular dependencies and our inability to + // put things into the caching layer (or collection's _idMap) until we have + // fully resolved things. + if (this.nounDef.id in this.masterReferencesByNounID) { + this.selfReferences = this.masterReferencesByNounID[this.nounDef.id]; + } else { + this.selfReferences = this.masterReferencesByNounID[this.nounDef.id] = {}; + } + if (this.nounDef.parentColumnAttr) { + if (this.nounDef.id in this.masterInverseReferencesByNounID) { + this.selfInverseReferences = + this.masterInverseReferencesByNounID[this.nounDef.id]; + } else { + this.selfInverseReferences = this.masterInverseReferencesByNounID[ + this.nounDef.id + ] = {}; + } + } + + this.needsLoads = false; + + GlodaDatastore._pendingAsyncStatements++; +} + +QueryFromQueryCallback.prototype = { + handleResult(aResultSet) { + try { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) { + return; + } + + let pendingItems = this.collection.pendingItems; + let pendingIdMap = this.collection._pendingIdMap; + let row; + let nounDef = this.nounDef; + let nounID = nounDef.id; + while ((row = aResultSet.getNextRow())) { + let item = nounDef.objFromRow.call(nounDef.datastore, row); + if (this.collection.stashedColumns) { + let stashed = (this.collection.stashedColumns[item.id] = []); + for (let iCol of this.collection.query.options.stashColumns) { + stashed.push(GlodaDatastore._getVariant(row, iCol)); + } + } + // try and replace the item with one from the cache, if we can + let cachedItem = GlodaCollectionManager.cacheLookupOne( + nounID, + item.id, + false + ); + + // if we already have a copy in the pending id map, skip it + if (item.id in pendingIdMap) { + continue; + } + + // QFQ_LOG.debug("loading item " + nounDef.id + ":" + item.id + " existing: " + + // this.selfReferences[item.id] + " cached: " + cachedItem); + if (cachedItem) { + item = cachedItem; + } else if (this.selfReferences[item.id] != null) { + // We may already have been loaded by this process. + item = this.selfReferences[item.id]; + } else { + // Perform loading logic which may produce reference dependencies. + this.needsLoads = + GlodaDatastore.loadNounItem( + item, + this.referencesByNounID, + this.inverseReferencesByNounID + ) || this.needsLoads; + } + + // add ourself to the references by our id + // QFQ_LOG.debug("saving item " + nounDef.id + ":" + item.id + " to self-refs"); + this.selfReferences[item.id] = item; + + // if we're tracking it, add ourselves to our parent's list of children + // too + if (this.selfInverseReferences) { + let parentID = item[nounDef.parentColumnAttr.idStorageAttributeName]; + let childrenList = this.selfInverseReferences[parentID]; + if (childrenList === undefined) { + childrenList = this.selfInverseReferences[parentID] = []; + } + childrenList.push(item); + } + + pendingItems.push(item); + pendingIdMap[item.id] = item; + } + } catch (e) { + GlodaDatastore._log.error("Exception in handleResult:", e); + } + }, + + handleError(aError) { + GlodaDatastore._log.error( + "Async queryFromQuery error: " + aError.result + ": " + aError.message + ); + }, + + handleCompletion(aReason) { + try { + try { + this.statement.finalize(); + this.statement = null; + + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) { + return; + } + + // QFQ_LOG.debug("handleCompletion: " + this.collection._nounDef.name); + + if (this.needsLoads) { + for (let nounID in this.referencesByNounID) { + let references = this.referencesByNounID[nounID]; + if (nounID == this.nounDef.id) { + continue; + } + let nounDef = GlodaDatastore._nounIDToDef[nounID]; + // QFQ_LOG.debug(" have references for noun: " + nounDef.name); + // try and load them out of the cache/existing collections. items in the + // cache will be fully formed, which is nice for us. + // XXX this mechanism will get dubious when we have multiple paths to a + // single noun-type. For example, a -> b -> c, a-> c; two paths to c + // and we're looking at issuing two requests to c, the latter of which + // will be a superset of the first one. This does not currently pose + // a problem because we only have a -> b -> c -> b, and sequential + // processing means no alarms and no surprises. + let masterReferences = this.masterReferencesByNounID[nounID]; + if (masterReferences === undefined) { + masterReferences = this.masterReferencesByNounID[nounID] = {}; + } + let outReferences; + if (nounDef.parentColumnAttr) { + outReferences = {}; + } else { + outReferences = masterReferences; + } + let [, notFoundCount, notFound] = + GlodaCollectionManager.cacheLookupMany( + nounDef.id, + references, + outReferences + ); + + if (nounDef.parentColumnAttr) { + let inverseReferences; + if (nounDef.id in this.masterInverseReferencesByNounID) { + inverseReferences = + this.masterInverseReferencesByNounID[nounDef.id]; + } else { + inverseReferences = this.masterInverseReferencesByNounID[ + nounDef.id + ] = {}; + } + + for (let key in outReferences) { + let item = outReferences[key]; + masterReferences[item.id] = item; + let parentID = + item[nounDef.parentColumnAttr.idStorageAttributeName]; + let childrenList = inverseReferences[parentID]; + if (childrenList === undefined) { + childrenList = inverseReferences[parentID] = []; + } + childrenList.push(item); + } + } + + // QFQ_LOG.debug(" found: " + foundCount + " not found: " + notFoundCount); + if (notFoundCount === 0) { + this.collection.resolvedCount++; + } else { + this.collection.deferredCount++; + let query = new nounDef.queryClass(); + query.id.apply(query, Object.keys(notFound)); + + // we fully expect/allow for there being no such subcollection yet. + let subCollection = + nounDef.id in this.collection.masterCollection.subCollections + ? this.collection.masterCollection.subCollections[nounDef.id] + : undefined; + this.collection.masterCollection.subCollections[nounDef.id] = + GlodaDatastore.queryFromQuery( + query, + QueryFromQueryResolver, + this.collection, + subCollection, + this.collection.masterCollection, + { becomeExplicit: true } + ); + } + } + + for (let nounID in this.inverseReferencesByNounID) { + let inverseReferences = this.inverseReferencesByNounID[nounID]; + this.collection.deferredCount++; + let nounDef = GlodaDatastore._nounIDToDef[nounID]; + + // QFQ_LOG.debug("Want to load inverse via " + nounDef.parentColumnAttr.boundName); + + let query = new nounDef.queryClass(); + // we want to constrain using the parent column + let queryConstrainer = query[nounDef.parentColumnAttr.boundName]; + queryConstrainer.apply(query, Object.keys(inverseReferences)); + // we fully expect/allow for there being no such subcollection yet. + let subCollection = + nounDef.id in this.collection.masterCollection.subCollections + ? this.collection.masterCollection.subCollections[nounDef.id] + : undefined; + this.collection.masterCollection.subCollections[nounDef.id] = + GlodaDatastore.queryFromQuery( + query, + QueryFromQueryResolver, + this.collection, + subCollection, + this.collection.masterCollection, + { becomeExplicit: true } + ); + } + } else { + this.collection.deferredCount--; + this.collection.resolvedCount++; + } + + // QFQ_LOG.debug(" defer: " + this.collection.deferredCount + + // " resolved: " + this.collection.resolvedCount); + + // process immediately and kick-up to the master collection... + if (this.collection.deferredCount <= 0) { + // this guy will resolve everyone using referencesByNounID and issue the + // call to this.collection._onItemsAdded to propagate things to the + // next concerned subCollection or the actual listener if this is the + // master collection. (Also, call _onQueryCompleted). + QueryFromQueryResolver.onItemsAdded( + null, + { data: this.collection }, + true + ); + QueryFromQueryResolver.onQueryCompleted({ data: this.collection }); + } + } catch (e) { + console.error(e); + QFQ_LOG.error("Exception:", e); + } + } finally { + GlodaDatastore._asyncCompleted(); + } + }, +}; + +/** + * Used by |GlodaDatastore.folderCompactionPassBlockFetch| to accumulate the + * results and pass them back in to the compaction process in + * |GlodaMsgIndexer._worker_folderCompactionPass|. + */ +function CompactionBlockFetcherHandler(aCallback) { + this.callback = aCallback; + this.idsAndMessageKeys = []; + GlodaDatastore._pendingAsyncStatements++; +} +CompactionBlockFetcherHandler.prototype = { + handleResult(aResultSet) { + let row; + while ((row = aResultSet.getNextRow())) { + this.idsAndMessageKeys.push([ + row.getInt64(0), // id + row.getInt64(1), // messageKey + row.getString(2), // headerMessageID + ]); + } + }, + handleError(aError) { + GlodaDatastore._log.error( + "CompactionBlockFetcherHandler error: " + + aError.result + + ": " + + aError.message + ); + }, + handleCompletion(aReason) { + GlodaDatastore._asyncCompleted(); + this.callback(this.idsAndMessageKeys); + }, +}; + +/** + * Use this as the callback handler when you have a SQL query that returns a + * single row with a single integer column value, like a COUNT() query. + */ +function SingletonResultValueHandler(aCallback) { + this.callback = aCallback; + this.result = null; + GlodaDatastore._pendingAsyncStatements++; +} +SingletonResultValueHandler.prototype = { + handleResult(aResultSet) { + let row; + while ((row = aResultSet.getNextRow())) { + this.result = row.getInt64(0); + } + }, + handleError(aError) { + GlodaDatastore._log.error( + "SingletonResultValueHandler error: " + + aError.result + + ": " + + aError.message + ); + }, + handleCompletion(aReason) { + GlodaDatastore._asyncCompleted(); + this.callback(this.result); + }, +}; + +/** + * Wrapper that duplicates actions taken on a real statement to an explain + * statement. Currently only fires an explain statement once. + */ +function ExplainedStatementWrapper( + aRealStatement, + aExplainStatement, + aSQLString, + aExplainHandler +) { + this.real = aRealStatement; + this.explain = aExplainStatement; + this.sqlString = aSQLString; + this.explainHandler = aExplainHandler; + this.done = false; +} +ExplainedStatementWrapper.prototype = { + bindByIndex(aColIndex, aValue) { + this.real.bindByIndex(aColIndex, aValue); + if (!this.done) { + this.explain.bindByIndex(aColIndex, aValue); + } + }, + executeAsync(aCallback) { + if (!this.done) { + this.explainHandler.sqlEnRoute(this.sqlString); + this.explain.executeAsync(this.explainHandler); + this.explain.finalize(); + this.done = true; + } + return this.real.executeAsync(aCallback); + }, + finalize() { + if (!this.done) { + this.explain.finalize(); + } + this.real.finalize(); + }, +}; + +/** + * Writes a single JSON document to the provide file path in a streaming + * fashion. At startup we open an array to place the queries in and at + * shutdown we close it. + */ +function ExplainedStatementProcessor(aDumpPath) { + Services.obs.addObserver(this, "quit-application"); + + this._sqlStack = []; + this._curOps = []; + this._objsWritten = 0; + + let filePath = Cc["@mozilla.org/file/local;1"].createInstance(Ci.nsIFile); + filePath.initWithPath(aDumpPath); + + this._ostream = Cc[ + "@mozilla.org/network/file-output-stream;1" + ].createInstance(Ci.nsIFileOutputStream); + this._ostream.init(filePath, -1, -1, 0); + + let s = '{"queries": ['; + this._ostream.write(s, s.length); +} +ExplainedStatementProcessor.prototype = { + sqlEnRoute(aSQLString) { + this._sqlStack.push(aSQLString); + }, + handleResult(aResultSet) { + let row; + // addr opcode (s) p1 p2 p3 p4 (s) p5 comment (s) + while ((row = aResultSet.getNextRow())) { + this._curOps.push([ + row.getInt64(0), // addr + row.getString(1), // opcode + row.getInt64(2), // p1 + row.getInt64(3), // p2 + row.getInt64(4), // p3 + row.getString(5), // p4 + row.getString(6), // p5 + row.getString(7), // comment + ]); + } + }, + handleError(aError) { + console.error("Unexpected error in EXPLAIN handler: " + aError); + }, + handleCompletion(aReason) { + let obj = { + sql: this._sqlStack.shift(), + operations: this._curOps, + }; + let s = (this._objsWritten++ ? ", " : "") + JSON.stringify(obj, null, 2); + this._ostream.write(s, s.length); + + this._curOps = []; + }, + + observe(aSubject, aTopic, aData) { + if (aTopic == "quit-application") { + this.shutdown(); + } + }, + + shutdown() { + let s = "]}"; + this._ostream.write(s, s.length); + this._ostream.close(); + + Services.obs.removeObserver(this, "quit-application"); + }, +}; + +// See the documentation on GlodaDatastore._schemaVersion to understand these: +var DB_SCHEMA_ACCEPT_LEAVE_LOW = 31, + DB_SCHEMA_ACCEPT_LEAVE_HIGH = 34, + DB_SCHEMA_ACCEPT_DOWNGRADE_LOW = 35, + DB_SCHEMA_ACCEPT_DOWNGRADE_HIGH = 39, + DB_SCHEMA_DOWNGRADE_DELTA = 5; + +/** + * Database abstraction layer. Contains explicit SQL schemas for our + * fundamental representations (core 'nouns', if you will) as well as + * specialized functions for then dealing with each type of object. At the + * same time, we are beginning to support extension-provided tables, which + * call into question whether we really need our hand-rolled code, or could + * simply improve the extension-provided table case to work for most of our + * hand-rolled cases. + * For now, the argument can probably be made that our explicit schemas and code + * is readable/intuitive (not magic) and efficient (although generic stuff + * could also be made efficient, if slightly evil through use of eval or some + * other code generation mechanism.) + * + * === Data Model Interaction / Dependencies + * + * Dependent on and assumes limited knowledge of the GlodaDataModel.jsm + * implementations. GlodaDataModel.jsm actually has an implicit dependency on + * our implementation, reaching back into the datastore via the _datastore + * attribute which we pass into every instance we create. + * We pass a reference to ourself as we create the GlodaDataModel.jsm instances (and + * they store it as _datastore) because of a half-implemented attempt to make + * it possible to live in a world where we have multiple datastores. This + * would be desirable in the cases where we are dealing with multiple SQLite + * databases. This could be because of per-account global databases or + * some other segmentation. This was abandoned when the importance of + * per-account databases was diminished following public discussion, at least + * for the short-term, but no attempted was made to excise the feature or + * preclude it. (Merely a recognition that it's too much to try and implement + * correct right now, especially because our solution might just be another + * (aggregating) layer on top of things, rather than complicating the lower + * levels.) + * + * === Object Identity / Caching + * + * The issue of object identity is handled by integration with the Collection.jsm + * provided GlodaCollectionManager. By "Object Identity", I mean that we only + * should ever have one object instance alive at a time that corresponds to + * an underlying database row in the database. Where possible we avoid + * performing database look-ups when we can check if the object is already + * present in memory; in practice, this means when we are asking for an object + * by ID. When we cannot avoid a database query, we attempt to make sure that + * we do not return a duplicate object instance, instead replacing it with the + * 'live' copy of the object. (Ideally, we would avoid any redundant + * construction costs, but that is not currently the case.) + * Although you should consult the GlodaCollectionManager for details, the + * general idea is that we have 'collections' which represent views of the + * database (based on a query) which use a single mechanism for double duty. + * The collections are registered with the collection manager via weak + * reference. The first 'duty' is that since the collections may be desired + * to be 'live views' of the data, we want them to update as changes occur. + * The weak reference allows the collection manager to track the 'live' + * collections and update them. The second 'duty' is the caching/object + * identity duty. In theory, every live item should be referenced by at least + * one collection, making it reachable for object identity/caching purposes. + * There is also an explicit (inclusive) caching layer present to both try and + * avoid poor performance from some of the costs of this strategy, as well as + * to try and keep track of objects that are being worked with that are not + * (yet) tracked by a collection. Using a size-bounded cache is clearly not + * a guarantee of correctness for this, but is suspected will work quite well. + * (Well enough to be dangerous because the inevitable failure case will not be + * expected.) + * + * The current strategy may not be the optimal one, feel free to propose and/or + * implement better ones, especially if you have numbers. + * The current strategy is not fully implemented in this file, but the common + * cases are believed to be covered. (Namely, we fail to purge items from the + * cache as they are purged from the database.) + * + * === Things That May Not Be Obvious (Gotchas) + * + * Although the schema includes "triggers", they are currently not used + * and were added when thinking about implementing the feature. We will + * probably implement this feature at some point, which is why they are still + * in there. + * + * We, and the layers above us, are not sufficiently thorough at cleaning out + * data from the database, and may potentially orphan it _as new functionality + * is added in the future at layers above us_. That is, currently we should + * not be leaking database rows, but we may in the future. This is because + * we/the layers above us lack a mechanism to track dependencies based on + * attributes. Say a plugin exists that extracts recipes from messages and + * relates them via an attribute. To do so, it must create new recipe rows + * in its own table as new recipes are discovered. No automatic mechanism + * will purge recipes as their source messages are purged, nor does any + * event-driven mechanism explicitly inform the plugin. (It could infer + * such an event from the indexing/attribute-providing process, or poll the + * states of attributes to accomplish this, but that is not desirable.) This + * needs to be addressed, and may be best addressed at layers above + * GlodaDatastore.jsm. + * + * @namespace + */ +var GlodaDatastore = { + _log: null, + + /* ******************* SCHEMA ******************* */ + + /** + * Schema version policy. IMPORTANT! We expect the following potential things + * to happen in the life of gloda that can impact our schema and the ability + * to move between different versions of Thunderbird: + * + * - Fundamental changes to the schema so that two versions of Thunderbird + * cannot use the same global database. To wit, Thunderbird N+1 needs to + * blow away the database of Thunderbird N and reindex from scratch. + * Likewise, Thunderbird N will need to blow away Thunderbird N+1's + * database because it can't understand it. And we can't simply use a + * different file because there would be fatal bookkeeping losses. + * + * - Bidirectional minor schema changes (rare). + * Thunderbird N+1 does something that does not affect Thunderbird N's use + * of the database, and a user switching back to Thunderbird N will not be + * negatively impacted. It will also be fine when they go back to N+1 and + * N+1 will not be missing any vital data. The historic example of this is + * when we added a missing index that was important for performance. In + * that case, Thunderbird N could have potentially left the schema revision + * intact (if there was a safe revision), rather than swapping it on the + * downgrade, compelling N+1 to redo the transform on upgrade. + * + * - Backwards compatible, upgrade-transition minor schema changes. + * Thunderbird N+1 does something that does not require nuking the + * database / a full re-index, but does require processing on upgrade from + * a version of the database previously used by Thunderbird. These changes + * do not impact N's ability to use the database. For example, adding a + * new indexed attribute that affects a small number of messages could be + * handled by issuing a query on upgrade to dirty/index those messages. + * However, if the user goes back to N from N+1, when they upgrade to N+1 + * again, we need to re-index. In this case N would need to have downgrade + * the schema revision. + * + * - Backwards incompatible, minor schema changes. + * Thunderbird N+1 does something that does not require nuking the database + * but will break Thunderbird N's ability to use the database. + * + * - Regression fixes. Sometimes we may land something that screws up + * databases, or the platform changes in a way that breaks our code and we + * had insufficient unit test coverage and so don't detect it until some + * databases have gotten messed up. + * + * Accordingly, every version of Thunderbird has a concept of potential schema + * versions with associated semantics to prepare for the minor schema upgrade + * cases were inter-op is possible. These ranges and their semantics are: + * - accepts and leaves intact. Covers: + * - regression fixes that no longer exist with the landing of the upgrade + * code as long as users never go back a build in the given channel. + * - bidirectional minor schema changes. + * - accepts but downgrades version to self. Covers: + * - backwards compatible, upgrade-transition minor schema changes. + * - nuke range (anything beyond a specific revision needs to be nuked): + * - backwards incompatible, minor scheme changes + * - fundamental changes + * + * + * SO, YOU WANT TO CHANGE THE SCHEMA? + * + * Use the ranges below for Thunderbird 11 as a guide, bumping things as little + * as possible. If we start to use up the "accepts and leaves intact" range + * without majorly changing things up, re-do the numbering acceptance range + * to give us additional runway. + * + * Also, if we keep needing non-nuking upgrades, consider adding an additional + * table to the database that can tell older versions of Thunderbird what to + * do when confronted with a newer database and where it can set flags to tell + * the newer Thunderbird what the older Thunderbird got up to. For example, + * it would be much easier if we just tell Thunderbird N what to do when it's + * confronted with the database. + * + * + * CURRENT STATE OF THE MIGRATION LOGIC: + * + * Thunderbird 11: uses 30 (regression fix from 26) + * - accepts and leaves intact: 31-34 + * - accepts and downgrades by 5: 35-39 + * - nukes: 40+ + */ + _schemaVersion: 30, + // what is the schema in the database right now? + _actualSchemaVersion: 0, + _schema: { + tables: { + // ----- Messages + folderLocations: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["folderURI", "TEXT NOT NULL"], + ["dirtyStatus", "INTEGER NOT NULL"], + ["name", "TEXT NOT NULL"], + ["indexingPriority", "INTEGER NOT NULL"], + ], + + triggers: { + delete: "DELETE from messages WHERE folderID = OLD.id", + }, + }, + + conversations: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["subject", "TEXT NOT NULL"], + ["oldestMessageDate", "INTEGER"], + ["newestMessageDate", "INTEGER"], + ], + + indices: { + subject: ["subject"], + oldestMessageDate: ["oldestMessageDate"], + newestMessageDate: ["newestMessageDate"], + }, + + fulltextColumns: [["subject", "TEXT"]], + + triggers: { + delete: "DELETE from messages WHERE conversationID = OLD.id", + }, + }, + + /** + * A message record correspond to an actual message stored in a folder + * somewhere, or is a ghost record indicating a message that we know + * should exist, but which we have not seen (and which we may never see). + * We represent these ghost messages by storing NULL values in the + * folderID and messageKey fields; this may need to change to other + * sentinel values if this somehow impacts performance. + */ + messages: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["folderID", "INTEGER"], + ["messageKey", "INTEGER"], + // conversationID used to have a REFERENCES but I'm losing it for + // presumed performance reasons and it doesn't do anything for us. + ["conversationID", "INTEGER NOT NULL"], + ["date", "INTEGER"], + // we used to have the parentID, but because of the very real + // possibility of multiple copies of a message with a given + // message-id, the parentID concept is unreliable. + ["headerMessageID", "TEXT"], + ["deleted", "INTEGER NOT NULL default 0"], + ["jsonAttributes", "TEXT"], + // Notability attempts to capture the static 'interestingness' of a + // message as a result of being starred/flagged, labeled, read + // multiple times, authored by someone in your address book or that + // you converse with a lot, etc. + ["notability", "INTEGER NOT NULL default 0"], + ], + + indices: { + messageLocation: ["folderID", "messageKey"], + headerMessageID: ["headerMessageID"], + conversationID: ["conversationID"], + date: ["date"], + deleted: ["deleted"], + }, + + // note: if reordering the columns, you need to change this file's + // row-loading logic, GlodaMsgSearcher.jsm's ranking usages and also the + // column saturations in nsGlodaRankerFunction + fulltextColumns: [ + ["body", "TEXT"], + ["subject", "TEXT"], + ["attachmentNames", "TEXT"], + ["author", "TEXT"], + ["recipients", "TEXT"], + ], + + triggers: { + delete: "DELETE FROM messageAttributes WHERE messageID = OLD.id", + }, + }, + + // ----- Attributes + attributeDefinitions: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["attributeType", "INTEGER NOT NULL"], + ["extensionName", "TEXT NOT NULL"], + ["name", "TEXT NOT NULL"], + ["parameter", "BLOB"], + ], + + triggers: { + delete: "DELETE FROM messageAttributes WHERE attributeID = OLD.id", + }, + }, + + messageAttributes: { + columns: [ + // conversationID and messageID used to have REFERENCES back to their + // appropriate types. I removed it when removing attributeID for + // better reasons and because the code is not capable of violating + // this constraint, so the check is just added cost. (And we have + // unit tests that sanity check my assertions.) + ["conversationID", "INTEGER NOT NULL"], + ["messageID", "INTEGER NOT NULL"], + // This used to be REFERENCES attributeDefinitions(id) but then we + // introduced sentinel values and it's hard to justify the effort + // to compel injection of the record or the overhead to do the + // references checking. + ["attributeID", "INTEGER NOT NULL"], + ["value", "NUMERIC"], + ], + + indices: { + attribQuery: [ + "attributeID", + "value", + /* covering: */ "conversationID", + "messageID", + ], + // This is required for deletion of a message's attributes to be + // performant. We could optimize this index away if we changed our + // deletion logic to issue specific attribute deletions based on the + // information it already has available in the message's JSON blob. + // The rub there is that if we screwed up we could end up leaking + // attributes and there is a non-trivial performance overhead to + // the many requests it would cause (which can also be reduced in + // the future by changing our SQL dispatch code.) + messageAttribFastDeletion: ["messageID"], + }, + }, + + // ----- Contacts / Identities + + /** + * Corresponds to a human being and roughly to an address book entry. + * Contrast with an identity, which is a specific e-mail address, IRC + * nick, etc. Identities belong to contacts, and this relationship is + * expressed on the identityAttributes table. + */ + contacts: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["directoryUUID", "TEXT"], + ["contactUUID", "TEXT"], + ["popularity", "INTEGER"], + ["frecency", "INTEGER"], + ["name", "TEXT"], + ["jsonAttributes", "TEXT"], + ], + indices: { + popularity: ["popularity"], + frecency: ["frecency"], + }, + }, + + contactAttributes: { + columns: [ + ["contactID", "INTEGER NOT NULL"], + ["attributeID", "INTEGER NOT NULL"], + ["value", "NUMERIC"], + ], + indices: { + contactAttribQuery: [ + "attributeID", + "value", + /* covering: */ "contactID", + ], + }, + }, + + /** + * Identities correspond to specific e-mail addresses, IRC nicks, etc. + */ + identities: { + columns: [ + ["id", "INTEGER PRIMARY KEY"], + ["contactID", "INTEGER NOT NULL"], + ["kind", "TEXT NOT NULL"], // ex: email, irc, etc. + ["value", "TEXT NOT NULL"], // ex: e-mail address, irc nick/handle... + ["description", "NOT NULL"], // what makes this identity different + // from the others? (ex: home, work, etc.) + ["relay", "INTEGER NOT NULL"], // is the identity just a relay + // mechanism? (ex: mailing list, twitter 'bouncer', IRC gateway, etc.) + ], + + indices: { + contactQuery: ["contactID"], + valueQuery: ["kind", "value"], + }, + }, + }, + }, + + /* ******************* LOGIC ******************* */ + /** + * We only have one connection; this name exists for legacy reasons but helps + * track when we are intentionally doing synchronous things during startup. + * We do nothing synchronous once our setup has completed. + */ + syncConnection: null, + /** + * We only have one connection and we only do asynchronous things after setup; + * this name still exists mainly for legacy reasons. + */ + asyncConnection: null, + + /** + * Our "mailnews.database.global.datastore." preferences branch for debug + * notification handling. We register as an observer against this. + */ + _prefBranch: null, + + /** + * The unique ID assigned to an index when it has been built. This value + * changes once the index has been rebuilt. + */ + _datastoreID: null, + + /** + * Initialize logging, create the database if it doesn't exist, "upgrade" it + * if it does and it's not up-to-date, fill our authoritative folder uri/id + * mapping. + */ + _init(aNounIDToDef) { + this._log = console.createInstance({ + prefix: "gloda.datastore", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }); + this._log.debug("Beginning datastore initialization."); + + this._nounIDToDef = aNounIDToDef; + + let branch = Services.prefs.getBranch( + "mailnews.database.global.datastore." + ); + this._prefBranch = branch; + + // Not sure the weak reference really makes a difference given that we are a + // GC root. + branch.addObserver("", this); + // claim the pref changed so we can centralize our logic there. + this.observe(null, "nsPref:changed", "explainToPath"); + + // Get the path to our global database + var dbFile = Services.dirsvc.get("ProfD", Ci.nsIFile); + dbFile.append("global-messages-db.sqlite"); + + var dbConnection; + + // Report about the size of the database through telemetry (if there's a + // database, naturally). + if (dbFile.exists()) { + try { + let h = Services.telemetry.getHistogramById( + "THUNDERBIRD_GLODA_SIZE_MB" + ); + h.add(dbFile.fileSize / 1048576); + } catch (e) { + this._log.warn("Couldn't report telemetry", e); + } + } + + // Create the file if it does not exist + if (!dbFile.exists()) { + this._log.debug("Creating database because it doesn't exist."); + dbConnection = this._createDB(dbFile); + } else { + // It does exist, but we (someday) might need to upgrade the schema + // (Exceptions may be thrown if the database is corrupt) + try { + dbConnection = Services.storage.openUnsharedDatabase(dbFile); + let cacheSize = this._determineCachePages(dbConnection); + // see _createDB... + dbConnection.executeSimpleSQL("PRAGMA cache_size = " + cacheSize); + dbConnection.executeSimpleSQL("PRAGMA synchronous = FULL"); + + // Register custom tokenizer to index all language text + var tokenizer = Cc["@mozilla.org/messenger/fts3tokenizer;1"].getService( + Ci.nsIFts3Tokenizer + ); + tokenizer.registerTokenizer(dbConnection); + + // -- database schema changes + let dbSchemaVersion = (this._actualSchemaVersion = + dbConnection.schemaVersion); + // - database from the future! + if (dbSchemaVersion > this._schemaVersion) { + if ( + dbSchemaVersion >= DB_SCHEMA_ACCEPT_LEAVE_LOW && + dbSchemaVersion <= DB_SCHEMA_ACCEPT_LEAVE_HIGH + ) { + this._log.debug( + "db from the future in acceptable range; leaving " + + "version at: " + + dbSchemaVersion + ); + } else if ( + dbSchemaVersion >= DB_SCHEMA_ACCEPT_DOWNGRADE_LOW && + dbSchemaVersion <= DB_SCHEMA_ACCEPT_DOWNGRADE_HIGH + ) { + let newVersion = dbSchemaVersion - DB_SCHEMA_DOWNGRADE_DELTA; + this._log.debug( + "db from the future in downgrade range; setting " + + "version to " + + newVersion + + " down from " + + dbSchemaVersion + ); + dbConnection.schemaVersion = this._actualSchemaVersion = newVersion; + } else { + // too far from the future, nuke it. + dbConnection = this._nukeMigration(dbFile, dbConnection); + } + } else if (dbSchemaVersion < this._schemaVersion) { + // - database from the past! migrate it, possibly. + this._log.debug( + "Need to migrate database. (DB version: " + + this._actualSchemaVersion + + " desired version: " + + this._schemaVersion + ); + dbConnection = this._migrate( + dbFile, + dbConnection, + this._actualSchemaVersion, + this._schemaVersion + ); + this._log.debug("Migration call completed."); + } + // else: this database is juuust right. + + // If we never had a datastore ID, make sure to create one now. + if (!this._prefBranch.prefHasUserValue("id")) { + this._datastoreID = this._generateDatastoreID(); + this._prefBranch.setCharPref("id", this._datastoreID); + } else { + this._datastoreID = this._prefBranch.getCharPref("id"); + } + } catch (ex) { + // Handle corrupt databases, other oddities + if (ex.result == Cr.NS_ERROR_FILE_CORRUPTED) { + this._log.warn("Database was corrupt, removing the old one."); + dbFile.remove(false); + this._log.warn("Removed old database, creating a new one."); + dbConnection = this._createDB(dbFile); + } else { + this._log.error( + "Unexpected error when trying to open the database:", + ex + ); + throw ex; + } + } + } + + this.syncConnection = dbConnection; + this.asyncConnection = dbConnection; + + this._log.debug("Initializing folder mappings."); + this._getAllFolderMappings(); + // we need to figure out the next id's for all of the tables where we + // manage that. + this._log.debug("Populating managed id counters."); + this._populateAttributeDefManagedId(); + this._populateConversationManagedId(); + this._populateMessageManagedId(); + this._populateContactManagedId(); + this._populateIdentityManagedId(); + + this._log.debug("Completed datastore initialization."); + }, + + observe(aSubject, aTopic, aData) { + if (aTopic != "nsPref:changed") { + return; + } + + if (aData == "explainToPath") { + let explainToPath = null; + try { + explainToPath = this._prefBranch.getCharPref("explainToPath"); + if (explainToPath.trim() == "") { + explainToPath = null; + } + } catch (ex) { + // don't care if the pref is not there. + } + + // It is conceivable that the name is changing and this isn't a boolean + // toggle, so always clean out the explain processor. + if (this._explainProcessor) { + this._explainProcessor.shutdown(); + this._explainProcessor = null; + } + + if (explainToPath) { + this._createAsyncStatement = this._createExplainedAsyncStatement; + this._explainProcessor = new ExplainedStatementProcessor(explainToPath); + } else { + this._createAsyncStatement = this._realCreateAsyncStatement; + } + } + }, + + datastoreIsShutdown: false, + + /** + * Perform datastore shutdown. + */ + shutdown() { + // Clear out any pending transaction by committing it. + // The indexer has been shutdown by this point; it no longer has any active + // indexing logic and it no longer has active event listeners capable of + // generating new activity. + // Semantic consistency of the database is guaranteed by the indexer's + // strategy of only yielding control at coherent times. Although it takes + // multiple calls and multiple SQL operations to update the state of our + // database representations, the generator does not yield until it has + // issued all the database statements required for said update. As such, + // this commit will leave us in a good way (and the commit will happen + // because closing the connection will drain the async execution queue.) + while (this._transactionDepth) { + this._log.info("Closing pending transaction out for shutdown."); + // just schedule this function to be run again once the transaction has + // been closed out. + this._commitTransaction(); + } + + this.datastoreIsShutdown = true; + + this._log.info("Closing db connection"); + + // we do not expect exceptions, but it's a good idea to avoid having our + // shutdown process explode. + try { + this._cleanupAsyncStatements(); + this._cleanupSyncStatements(); + } catch (ex) { + this._log.debug("Unexpected exception during statement cleanup: " + ex); + } + + // it's conceivable we might get a spurious exception here, but we really + // shouldn't get one. again, we want to ensure shutdown runs to completion + // and doesn't break our caller. + try { + // This currently causes all pending asynchronous operations to be run to + // completion. this simplifies things from a correctness perspective, + // and, honestly, is a lot easier than us tracking all of the async + // event tasks so that we can explicitly cancel them. + // This is a reasonable thing to do because we don't actually ever have + // a huge number of statements outstanding. The indexing process needs + // to issue async requests periodically, so the most we have in-flight + // from a write perspective is strictly less than the work required to + // update the database state for a single message. + // However, the potential for multiple pending expensive queries does + // exist, and it may be advisable to attempt to track and cancel those. + // For simplicity we don't currently do this, and I expect this should + // not pose a major problem, but those are famous last words. + // Note: asyncClose does not spin a nested event loop, but the thread + // manager shutdown code will spin the async thread's event loop, so it + // nets out to be the same. + this.asyncConnection.asyncClose(); + } catch (ex) { + this._log.debug( + "Potentially expected exception during connection closure: " + ex + ); + } + + this.asyncConnection = null; + this.syncConnection = null; + }, + + /** + * Generates and returns a UUID. + * + * @returns a UUID as a string, ex: "c4dd0159-9287-480f-a648-a4613e147fdb" + */ + _generateDatastoreID() { + let uuid = Services.uuid.generateUUID().toString(); + // We snip off the { and } from each end of the UUID. + return uuid.substring(1, uuid.length - 2); + }, + + _determineCachePages(aDBConn) { + try { + // For the details of the computations, one should read + // nsNavHistory::InitDB. We're slightly diverging from them in the sense + // that we won't allow gloda to use insane amounts of memory cache, and + // we start with 1% instead of 6% like them. + let pageStmt = aDBConn.createStatement("PRAGMA page_size"); + pageStmt.executeStep(); + let pageSize = pageStmt.row.page_size; + pageStmt.finalize(); + let cachePermillage = this._prefBranch.getIntPref( + "cache_to_memory_permillage" + ); + cachePermillage = Math.min(cachePermillage, 50); + cachePermillage = Math.max(cachePermillage, 0); + let physMem = Services.sysinfo.getPropertyAsInt64("memsize"); + if (physMem == 0) { + physMem = MEMSIZE_FALLBACK_BYTES; + } + let cacheSize = Math.round((physMem * cachePermillage) / 1000); + cacheSize = Math.max(cacheSize, MIN_CACHE_SIZE); + cacheSize = Math.min(cacheSize, MAX_CACHE_SIZE); + let cachePages = Math.round(cacheSize / pageSize); + return cachePages; + } catch (ex) { + this._log.warn("Error determining cache size: " + ex); + // A little bit lower than on my personal machine, will result in ~40M. + return 1000; + } + }, + + /** + * Create our database; basically a wrapper around _createSchema. + */ + _createDB(aDBFile) { + var dbConnection = Services.storage.openUnsharedDatabase(aDBFile); + // We now follow the Firefox strategy for places, which mainly consists in + // picking a default 32k page size, and then figuring out the amount of + // cache accordingly. The default 32k come from mozilla/toolkit/storage, + // but let's get it directly from sqlite in case they change it. + let cachePages = this._determineCachePages(dbConnection); + // This is a maximum number of pages to be used. If the database does not + // get this large, then the memory does not get used. + // Do not forget to update the code in _init if you change this value. + dbConnection.executeSimpleSQL("PRAGMA cache_size = " + cachePages); + // The mozStorage default is NORMAL which shaves off some fsyncs in the + // interest of performance. Since everything we do after bootstrap is + // async, we do not care about the performance, but we really want the + // correctness. Bug reports and support avenues indicate a non-zero number + // of corrupt databases. Note that this may not fix everything; OS X + // also supports an F_FULLSYNC flag enabled by PRAGMA fullfsync that we are + // not enabling that is much more comprehensive. We can think about + // turning that on after we've seen how this reduces our corruption count. + dbConnection.executeSimpleSQL("PRAGMA synchronous = FULL"); + // Register custom tokenizer to index all language text + var tokenizer = Cc["@mozilla.org/messenger/fts3tokenizer;1"].getService( + Ci.nsIFts3Tokenizer + ); + tokenizer.registerTokenizer(dbConnection); + + // We're creating a new database, so let's generate a new ID for this + // version of the datastore. This way, indexers can know when the index + // has been rebuilt in the event that they need to rebuild dependent data. + this._datastoreID = this._generateDatastoreID(); + this._prefBranch.setCharPref("id", this._datastoreID); + + dbConnection.beginTransaction(); + try { + this._createSchema(dbConnection); + dbConnection.commitTransaction(); + } catch (ex) { + dbConnection.rollbackTransaction(); + throw ex; + } + + return dbConnection; + }, + + _createTableSchema(aDBConnection, aTableName, aTableDef) { + // - Create the table + this._log.info("Creating table: " + aTableName); + let columnDefs = []; + for (let [column, type] of aTableDef.columns) { + columnDefs.push(column + " " + type); + } + aDBConnection.createTable(aTableName, columnDefs.join(", ")); + + // - Create the fulltext table if applicable + if (aTableDef.fulltextColumns) { + let columnDefs = []; + for (let [column, type] of aTableDef.fulltextColumns) { + columnDefs.push(column + " " + type); + } + let createFulltextSQL = + "CREATE VIRTUAL TABLE " + + aTableName + + "Text" + + " USING fts3(tokenize mozporter, " + + columnDefs.join(", ") + + ")"; + this._log.info("Creating fulltext table: " + createFulltextSQL); + aDBConnection.executeSimpleSQL(createFulltextSQL); + } + + // - Create its indices + if (aTableDef.indices) { + for (let indexName in aTableDef.indices) { + let indexColumns = aTableDef.indices[indexName]; + aDBConnection.executeSimpleSQL( + "CREATE INDEX " + + indexName + + " ON " + + aTableName + + "(" + + indexColumns.join(", ") + + ")" + ); + } + } + + // - Create the attributes table if applicable + if (aTableDef.genericAttributes) { + aTableDef.genericAttributes = { + columns: [ + ["nounID", "INTEGER NOT NULL"], + ["attributeID", "INTEGER NOT NULL"], + ["value", "NUMERIC"], + ], + indices: {}, + }; + aTableDef.genericAttributes.indices[aTableName + "AttribQuery"] = [ + "attributeID", + "value", + /* covering: */ "nounID", + ]; + // let's use this very function! (since we created genericAttributes, + // explodey recursion is avoided.) + this._createTableSchema( + aDBConnection, + aTableName + "Attributes", + aTableDef.genericAttributes + ); + } + }, + + /** + * Create our database schema assuming a newly created database. This + * comes down to creating normal tables, their full-text variants (if + * applicable), and their indices. + */ + _createSchema(aDBConnection) { + // -- For each table... + for (let tableName in this._schema.tables) { + let tableDef = this._schema.tables[tableName]; + this._createTableSchema(aDBConnection, tableName, tableDef); + } + + aDBConnection.schemaVersion = this._actualSchemaVersion = + this._schemaVersion; + }, + + /** + * Create a table for a noun, replete with data binding. + */ + createNounTable(aNounDef) { + // give it a _jsonText attribute if appropriate... + if (aNounDef.allowsArbitraryAttrs) { + aNounDef.schema.columns.push(["jsonAttributes", "STRING", "_jsonText"]); + } + // check if the table exists + if (!this.asyncConnection.tableExists(aNounDef.tableName)) { + // it doesn't! create it (and its potentially many variants) + try { + this._createTableSchema( + this.asyncConnection, + aNounDef.tableName, + aNounDef.schema + ); + } catch (ex) { + this._log.error( + "Problem creating table " + + aNounDef.tableName + + " " + + "because: " + + ex + + " at " + + ex.fileName + + ":" + + ex.lineNumber + ); + return; + } + } + + aNounDef._dataBinder = new GlodaDatabind(aNounDef, this); + aNounDef.datastore = aNounDef._dataBinder; + aNounDef.objFromRow = aNounDef._dataBinder.objFromRow; + aNounDef.objInsert = aNounDef._dataBinder.objInsert; + aNounDef.objUpdate = aNounDef._dataBinder.objUpdate; + aNounDef.dbAttribAdjuster = aNounDef._dataBinder.adjustAttributes; + + if (aNounDef.schema.genericAttributes) { + aNounDef.attrTableName = aNounDef.tableName + "Attributes"; + aNounDef.attrIDColumnName = "nounID"; + } + }, + + _nukeMigration(aDBFile, aDBConnection) { + aDBConnection.close(); + aDBFile.remove(false); + this._log.warn( + "Global database has been purged due to schema change. " + + "old version was " + + this._actualSchemaVersion + + ", new version is: " + + this._schemaVersion + ); + return this._createDB(aDBFile); + }, + + /** + * Migrate the database _to the latest version_ from an older version. We + * only keep enough logic around to get us to the recent version. This code + * is not a time machine! If we need to blow away the database to get to the + * most recent version, then that's the sum total of the migration! + */ + _migrate(aDBFile, aDBConnection, aCurVersion, aNewVersion) { + // version 12: + // - notability column added + // version 13: + // - we are adding a new fulltext index column. blow away! + // - note that I screwed up and failed to mark the schema change; apparently + // no database will claim to be version 13... + // version 14ish, still labeled 13?: + // - new attributes: forwarded, repliedTo, bcc, recipients + // - altered fromMeTo and fromMeCc to fromMe + // - altered toMe and ccMe to just be toMe + // - exposes bcc to cc-related attributes + // - MIME type DB schema overhaul + // version 15ish, still labeled 13: + // - change tokenizer to mozporter to support CJK + // (We are slip-streaming this so that only people who want to test CJK + // have to test it. We will properly bump the schema revision when the + // gloda correctness patch lands.) + // version 16ish, labeled 14 and now 16 + // - gloda message id's start from 32 now + // - all kinds of correctness changes (blow away) + // version 17 + // - more correctness fixes. (blow away) + // version 18 + // - significant empty set support (blow away) + // version 19 + // - there was a typo that was resulting in deleted getting set to the + // numeric value of the javascript undefined value. (migrate-able) + // version 20 + // - tokenizer changes to provide for case/accent-folding. (blow away) + // version 21 + // - add the messagesAttribFastDeletion index we thought was already covered + // by an index we removed a while ago (migrate-able) + // version 26 + // - bump page size and also cache size (blow away) + // version 30 + // - recover from bug 732372 that affected TB 11 beta / TB 12 alpha / TB 13 + // trunk. The fix is bug 734507. The revision bump happens + // asynchronously. (migrate-able) + + // nuke if prior to 26 + if (aCurVersion < 26) { + return this._nukeMigration(aDBFile, aDBConnection); + } + + // They must be desiring our "a.contact is undefined" fix! + // This fix runs asynchronously as the first indexing job the indexer ever + // performs. It is scheduled by the enabling of the message indexer and + // it is the one that updates the schema version when done. + + // return the same DB connection since we didn't create a new one or do + // anything. + return aDBConnection; + }, + + /** + * Asynchronously update the schema version; only for use by in-tree callers + * who asynchronously perform migration work triggered by their initial + * indexing sweep and who have properly updated the schema version in all + * the appropriate locations in this file. + * + * This is done without doing anything about the current transaction state, + * which is desired. + */ + _updateSchemaVersion(newSchemaVersion) { + this._actualSchemaVersion = newSchemaVersion; + let stmt = this._createAsyncStatement( + // we need to concat; pragmas don't like "?1" binds + "PRAGMA user_version = " + newSchemaVersion, + true + ); + stmt.executeAsync(this.trackAsync()); + stmt.finalize(); + }, + + _outstandingAsyncStatements: [], + + /** + * Unless debugging, this is just _realCreateAsyncStatement, but in some + * debugging modes this is instead the helpful wrapper + * _createExplainedAsyncStatement. + */ + _createAsyncStatement: null, + + _realCreateAsyncStatement(aSQLString, aWillFinalize) { + let statement = null; + try { + statement = this.asyncConnection.createAsyncStatement(aSQLString); + } catch (ex) { + throw new Error( + "error creating async statement " + + aSQLString + + " - " + + this.asyncConnection.lastError + + ": " + + this.asyncConnection.lastErrorString + + " - " + + ex + ); + } + + if (!aWillFinalize) { + this._outstandingAsyncStatements.push(statement); + } + + return statement; + }, + + /** + * The ExplainedStatementProcessor instance used by + * _createExplainedAsyncStatement. This will be null if + * _createExplainedAsyncStatement is not being used as _createAsyncStatement. + */ + _explainProcessor: null, + + /** + * Wrapped version of _createAsyncStatement that EXPLAINs the statement. When + * used this decorates _createAsyncStatement, in which case we are found at + * that name and the original is at _orig_createAsyncStatement. This is + * controlled by the explainToPath preference (see |_init|). + */ + _createExplainedAsyncStatement(aSQLString, aWillFinalize) { + let realStatement = this._realCreateAsyncStatement( + aSQLString, + aWillFinalize + ); + // don't wrap transaction control statements. + if ( + aSQLString == "COMMIT" || + aSQLString == "BEGIN TRANSACTION" || + aSQLString == "ROLLBACK" + ) { + return realStatement; + } + + let explainSQL = "EXPLAIN " + aSQLString; + let explainStatement = this._realCreateAsyncStatement(explainSQL); + + return new ExplainedStatementWrapper( + realStatement, + explainStatement, + aSQLString, + this._explainProcessor + ); + }, + + _cleanupAsyncStatements() { + this._outstandingAsyncStatements.forEach(stmt => stmt.finalize()); + }, + + _outstandingSyncStatements: [], + + _createSyncStatement(aSQLString, aWillFinalize) { + let statement = null; + try { + statement = this.syncConnection.createStatement(aSQLString); + } catch (ex) { + throw new Error( + "error creating sync statement " + + aSQLString + + " - " + + this.syncConnection.lastError + + ": " + + this.syncConnection.lastErrorString + + " - " + + ex + ); + } + + if (!aWillFinalize) { + this._outstandingSyncStatements.push(statement); + } + + return statement; + }, + + _cleanupSyncStatements() { + this._outstandingSyncStatements.forEach(stmt => stmt.finalize()); + }, + + /** + * Perform a synchronous executeStep on the statement, handling any + * SQLITE_BUSY fallout that could conceivably happen from a collision on our + * read with the async writes. + * Basically we keep trying until we succeed or run out of tries. + * We believe this to be a reasonable course of action because we don't + * expect this to happen much. + */ + _syncStep(aStatement) { + let tries = 0; + while (tries < 32000) { + try { + return aStatement.executeStep(); + } catch (e) { + // SQLITE_BUSY becomes NS_ERROR_FAILURE + if (e.result == Cr.NS_ERROR_FAILURE) { + tries++; + // we really need to delay here, somehow. unfortunately, we can't + // allow event processing to happen, and most of the things we could + // do to delay ourselves result in event processing happening. (Use + // of a timer, a synchronous dispatch, etc.) + // in theory, nsIThreadEventFilter could allow us to stop other events + // that aren't our timer from happening, but it seems slightly + // dangerous and 'notxpcom' suggests it ain't happening anyways... + // so, let's just be dumb and hope that the underlying file I/O going + // on makes us more likely to yield to the other thread so it can + // finish what it is doing... + } else { + throw e; + } + } + } + this._log.error("Synchronous step gave up after " + tries + " tries."); + return false; + }, + + _bindVariant(aStatement, aIndex, aVariant) { + aStatement.bindByIndex(aIndex, aVariant); + }, + + /** + * Helper that uses the appropriate getter given the data type; should be + * mooted once we move to 1.9.2 and can use built-in variant support. + */ + _getVariant(aRow, aIndex) { + let typeOfIndex = aRow.getTypeOfIndex(aIndex); + if (typeOfIndex == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + // XPConnect would just end up going through an intermediary double stage + // for the int64 case anyways... + return null; + } + if ( + typeOfIndex == Ci.mozIStorageValueArray.VALUE_TYPE_INTEGER || + typeOfIndex == Ci.mozIStorageValueArray.VALUE_TYPE_DOUBLE + ) { + return aRow.getDouble(aIndex); + } + // typeOfIndex == Ci.mozIStorageValueArray.VALUE_TYPE_TEXT + return aRow.getString(aIndex); + }, + + /** Simple nested transaction support as a performance optimization. */ + _transactionDepth: 0, + _transactionGood: false, + + /** + * Self-memoizing BEGIN TRANSACTION statement. + */ + get _beginTransactionStatement() { + let statement = this._createAsyncStatement("BEGIN TRANSACTION"); + this.__defineGetter__("_beginTransactionStatement", () => statement); + return this._beginTransactionStatement; + }, + + /** + * Self-memoizing COMMIT statement. + */ + get _commitTransactionStatement() { + let statement = this._createAsyncStatement("COMMIT"); + this.__defineGetter__("_commitTransactionStatement", () => statement); + return this._commitTransactionStatement; + }, + + /** + * Self-memoizing ROLLBACK statement. + */ + get _rollbackTransactionStatement() { + let statement = this._createAsyncStatement("ROLLBACK"); + this.__defineGetter__("_rollbackTransactionStatement", () => statement); + return this._rollbackTransactionStatement; + }, + + _pendingPostCommitCallbacks: null, + /** + * Register a callback to be invoked when the current transaction's commit + * completes. + */ + runPostCommit(aCallback) { + this._pendingPostCommitCallbacks.push(aCallback); + }, + + /** + * Begin a potentially nested transaction; only the outermost transaction gets + * to be an actual transaction, and the failure of any nested transaction + * results in a rollback of the entire outer transaction. If you really + * need an atomic transaction + */ + _beginTransaction() { + if (this._transactionDepth == 0) { + this._pendingPostCommitCallbacks = []; + this._beginTransactionStatement.executeAsync(this.trackAsync()); + this._transactionGood = true; + } + this._transactionDepth++; + }, + /** + * Commit a potentially nested transaction; if we are the outer-most + * transaction and no sub-transaction issues a rollback + * (via _rollbackTransaction) then we commit, otherwise we rollback. + */ + _commitTransaction() { + this._transactionDepth--; + if (this._transactionDepth == 0) { + try { + if (this._transactionGood) { + this._commitTransactionStatement.executeAsync( + new PostCommitHandler(this._pendingPostCommitCallbacks) + ); + } else { + this._rollbackTransactionStatement.executeAsync(this.trackAsync()); + } + } catch (ex) { + this._log.error("Commit problem:", ex); + } + this._pendingPostCommitCallbacks = []; + } + }, + /** + * Abort the commit of the potentially nested transaction. If we are not the + * outermost transaction, we set a flag that tells the outermost transaction + * that it must roll back. + */ + _rollbackTransaction() { + this._transactionDepth--; + this._transactionGood = false; + if (this._transactionDepth == 0) { + try { + this._rollbackTransactionStatement.executeAsync(this.trackAsync()); + } catch (ex) { + this._log.error("Rollback problem:", ex); + } + } + }, + + _pendingAsyncStatements: 0, + /** + * The function to call, if any, when we hit 0 pending async statements. + */ + _pendingAsyncCompletedListener: null, + _asyncCompleted() { + if (--this._pendingAsyncStatements == 0) { + if (this._pendingAsyncCompletedListener !== null) { + this._pendingAsyncCompletedListener(); + this._pendingAsyncCompletedListener = null; + } + } + }, + _asyncTrackerListener: { + handleResult() {}, + handleError(aError) { + GlodaDatastore._log.error( + "got error in _asyncTrackerListener.handleError(): " + + aError.result + + ": " + + aError.message + ); + }, + handleCompletion() { + try { + // the helper method exists because the other classes need to call it too + GlodaDatastore._asyncCompleted(); + } catch (e) { + this._log.error("Exception in handleCompletion:", e); + } + }, + }, + /** + * Increments _pendingAsyncStatements and returns a listener that will + * decrement the value when the statement completes. + */ + trackAsync() { + this._pendingAsyncStatements++; + return this._asyncTrackerListener; + }, + + /* ********** Attribute Definitions ********** */ + /** Maps (attribute def) compound names to the GlodaAttributeDBDef objects. */ + _attributeDBDefs: {}, + /** Map attribute ID to the definition and parameter value that produce it. */ + _attributeIDToDBDefAndParam: {}, + + /** + * This attribute id indicates that we are encoding that a non-singular + * attribute has an empty set. The value payload that goes with this should + * the attribute id of the attribute we are talking about. + */ + kEmptySetAttrId: 1, + + /** + * We maintain the attributeDefinitions next id counter mainly because we can. + * Since we mediate the access, there's no real risk to doing so, and it + * allows us to keep the writes on the async connection without having to + * wait for a completion notification. + * + * Start from 32 so we can have a number of sentinel values. + */ + _nextAttributeId: 32, + + _populateAttributeDefManagedId() { + let stmt = this._createSyncStatement( + "SELECT MAX(id) FROM attributeDefinitions", + true + ); + if (stmt.executeStep()) { + // no chance of this SQLITE_BUSY on this call + // 0 gets returned even if there are no messages... + let highestSeen = stmt.getInt64(0); + if (highestSeen != 0) { + this._nextAttributeId = highestSeen + 1; + } + } + stmt.finalize(); + }, + + get _insertAttributeDefStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO attributeDefinitions (id, attributeType, extensionName, \ + name, parameter) \ + VALUES (?1, ?2, ?3, ?4, ?5)" + ); + this.__defineGetter__("_insertAttributeDefStatement", () => statement); + return this._insertAttributeDefStatement; + }, + + /** + * Create an attribute definition and return the row ID. Special/atypical + * in that it doesn't directly return a GlodaAttributeDBDef; we leave that up + * to the caller since they know much more than actually needs to go in the + * database. + * + * @returns The attribute id allocated to this attribute. + */ + _createAttributeDef(aAttrType, aExtensionName, aAttrName, aParameter) { + let attributeId = this._nextAttributeId++; + + let iads = this._insertAttributeDefStatement; + iads.bindByIndex(0, attributeId); + iads.bindByIndex(1, aAttrType); + iads.bindByIndex(2, aExtensionName); + iads.bindByIndex(3, aAttrName); + this._bindVariant(iads, 4, aParameter); + + iads.executeAsync(this.trackAsync()); + + return attributeId; + }, + + /** + * Sync-ly look-up all the attribute definitions, populating our authoritative + * _attributeDBDefss and _attributeIDToDBDefAndParam maps. (In other words, + * once this method is called, those maps should always be in sync with the + * underlying database.) + */ + getAllAttributes() { + let stmt = this._createSyncStatement( + "SELECT id, attributeType, extensionName, name, parameter \ + FROM attributeDefinitions", + true + ); + + // map compound name to the attribute + let attribs = {}; + // map the attribute id to [attribute, parameter] where parameter is null + // in cases where parameter is unused. + let idToAttribAndParam = {}; + + this._log.info("loading all attribute defs"); + + while (stmt.executeStep()) { + // no chance of this SQLITE_BUSY on this call + let rowId = stmt.getInt64(0); + let rowAttributeType = stmt.getInt64(1); + let rowExtensionName = stmt.getString(2); + let rowName = stmt.getString(3); + let rowParameter = this._getVariant(stmt, 4); + + let compoundName = rowExtensionName + ":" + rowName; + + let attrib; + if (compoundName in attribs) { + attrib = attribs[compoundName]; + } else { + attrib = new GlodaAttributeDBDef( + this, + /* aID */ null, + compoundName, + rowAttributeType, + rowExtensionName, + rowName + ); + attribs[compoundName] = attrib; + } + // if the parameter is null, the id goes on the attribute def, otherwise + // it is a parameter binding and goes in the binding map. + if (rowParameter == null) { + this._log.debug(compoundName + " primary: " + rowId); + attrib._id = rowId; + idToAttribAndParam[rowId] = [attrib, null]; + } else { + this._log.debug( + compoundName + " binding: " + rowParameter + " = " + rowId + ); + attrib._parameterBindings[rowParameter] = rowId; + idToAttribAndParam[rowId] = [attrib, rowParameter]; + } + } + stmt.finalize(); + + this._log.info("done loading all attribute defs"); + + this._attributeDBDefs = attribs; + this._attributeIDToDBDefAndParam = idToAttribAndParam; + }, + + /** + * Helper method for GlodaAttributeDBDef to tell us when their bindParameter + * method is called and they have created a new binding (using + * GlodaDatastore._createAttributeDef). In theory, that method could take + * an additional argument and obviate the need for this method. + */ + reportBinding(aID, aAttrDef, aParamValue) { + this._attributeIDToDBDefAndParam[aID] = [aAttrDef, aParamValue]; + }, + + /* ********** Folders ********** */ + /** next folder (row) id to issue, populated by _getAllFolderMappings. */ + _nextFolderId: 1, + + get _insertFolderLocationStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO folderLocations (id, folderURI, dirtyStatus, name, \ + indexingPriority) VALUES \ + (?1, ?2, ?3, ?4, ?5)" + ); + this.__defineGetter__("_insertFolderLocationStatement", () => statement); + return this._insertFolderLocationStatement; + }, + + /** + * Authoritative map from folder URI to folder ID. (Authoritative in the + * sense that this map exactly represents the state of the underlying + * database. If it does not, it's a bug in updating the database.) + */ + _folderByURI: {}, + /** Authoritative map from folder ID to folder URI */ + _folderByID: {}, + + /** Initialize our _folderByURI/_folderByID mappings, called by _init(). */ + _getAllFolderMappings() { + let stmt = this._createSyncStatement( + "SELECT id, folderURI, dirtyStatus, name, indexingPriority \ + FROM folderLocations", + true + ); + + while (stmt.executeStep()) { + // no chance of this SQLITE_BUSY on this call + let folderID = stmt.getInt64(0); + let folderURI = stmt.getString(1); + let dirtyStatus = stmt.getInt32(2); + let folderName = stmt.getString(3); + let indexingPriority = stmt.getInt32(4); + + let folder = new GlodaFolder( + this, + folderID, + folderURI, + dirtyStatus, + folderName, + indexingPriority + ); + + this._folderByURI[folderURI] = folder; + this._folderByID[folderID] = folder; + + if (folderID >= this._nextFolderId) { + this._nextFolderId = folderID + 1; + } + } + stmt.finalize(); + }, + + _folderKnown(aFolder) { + let folderURI = aFolder.URI; + return folderURI in this._folderByURI; + }, + + _folderIdKnown(aFolderID) { + return aFolderID in this._folderByID; + }, + + /** + * Return the default messaging priority for a folder of this type, based + * on the folder's flags. If aAllowSpecialFolderIndexing is true, then + * folders suchs as Trash and Junk will be indexed. + * + * @param {nsIMsgFolder} aFolder + * @param {boolean} aAllowSpecialFolderIndexing + * @returns {number} + */ + getDefaultIndexingPriority(aFolder, aAllowSpecialFolderIndexing) { + let indexingPriority = GlodaFolder.prototype.kIndexingDefaultPriority; + // Do not walk into trash/junk folders, unless the user is explicitly + // telling us to do so. + let specialFolderFlags = + Ci.nsMsgFolderFlags.Trash | Ci.nsMsgFolderFlags.Junk; + if (aFolder.isSpecialFolder(specialFolderFlags, true)) { + indexingPriority = aAllowSpecialFolderIndexing + ? GlodaFolder.prototype.kIndexingDefaultPriority + : GlodaFolder.prototype.kIndexingNeverPriority; + } else if ( + aFolder.flags & + (Ci.nsMsgFolderFlags.Queue | Ci.nsMsgFolderFlags.Newsgroup) + // In unit testing at least folders can be + // confusingly labeled ImapPublic when they + // should not be. Or at least I don't think they + // should be. So they're legit for now. + // | Ci.nsMsgFolderFlags.ImapPublic + // | Ci.nsMsgFolderFlags.ImapOtherUser + ) { + // Queue folders should always be ignored just because messages should not + // spend much time in there. + // We hate newsgroups, and public IMAP folders are similar. + // Other user IMAP folders should be ignored because it's not this user's + // mail. + indexingPriority = GlodaFolder.prototype.kIndexingNeverPriority; + } else if (aFolder.flags & Ci.nsMsgFolderFlags.Inbox) { + indexingPriority = GlodaFolder.prototype.kIndexingInboxPriority; + } else if (aFolder.flags & Ci.nsMsgFolderFlags.SentMail) { + indexingPriority = GlodaFolder.prototype.kIndexingSentMailPriority; + } else if (aFolder.flags & Ci.nsMsgFolderFlags.Favorite) { + indexingPriority = GlodaFolder.prototype.kIndexingFavoritePriority; + } else if (aFolder.flags & Ci.nsMsgFolderFlags.CheckNew) { + indexingPriority = GlodaFolder.prototype.kIndexingCheckNewPriority; + } + + return indexingPriority; + }, + + /** + * Map a folder URI to a GlodaFolder instance, creating the mapping if it does + * not yet exist. + * + * @param aFolder The nsIMsgFolder instance you would like the GlodaFolder + * instance for. + * @returns The existing or newly created GlodaFolder instance. + */ + _mapFolder(aFolder) { + let folderURI = aFolder.URI; + if (folderURI in this._folderByURI) { + return this._folderByURI[folderURI]; + } + + let folderID = this._nextFolderId++; + + // If there's an indexingPriority stored on the folder, just use that. + // Otherwise, fall back to the default for folders of this type. + let indexingPriority = NaN; + try { + let pri = aFolder.getStringProperty("indexingPriority"); // Might throw. + indexingPriority = parseInt(pri); // Might return NaN. + } catch (ex) {} + if (isNaN(indexingPriority)) { + indexingPriority = this.getDefaultIndexingPriority(aFolder); + } + + // If there are messages in the folder, it is filthy. If there are no + // messages, it can be clean. + let dirtyStatus = aFolder.getTotalMessages(false) + ? GlodaFolder.prototype.kFolderFilthy + : GlodaFolder.prototype.kFolderClean; + let folder = new GlodaFolder( + this, + folderID, + folderURI, + dirtyStatus, + aFolder.prettyName, + indexingPriority + ); + + this._insertFolderLocationStatement.bindByIndex(0, folder.id); + this._insertFolderLocationStatement.bindByIndex(1, folder.uri); + this._insertFolderLocationStatement.bindByIndex(2, folder.dirtyStatus); + this._insertFolderLocationStatement.bindByIndex(3, folder.name); + this._insertFolderLocationStatement.bindByIndex(4, folder.indexingPriority); + this._insertFolderLocationStatement.executeAsync(this.trackAsync()); + + this._folderByURI[folderURI] = folder; + this._folderByID[folderID] = folder; + this._log.debug("!! mapped " + folder.id + " from " + folderURI); + return folder; + }, + + /** + * Map an integer gloda folder ID to the corresponding GlodaFolder instance. + * + * @param aFolderID The known valid gloda folder ID for which you would like + * a GlodaFolder instance. + * @returns The GlodaFolder instance with the given id. If no such instance + * exists, we will throw an exception. + */ + _mapFolderID(aFolderID) { + if (aFolderID === null) { + return null; + } + if (aFolderID in this._folderByID) { + return this._folderByID[aFolderID]; + } + throw new Error("Got impossible folder ID: " + aFolderID); + }, + + /** + * Mark the gloda folder as deleted for any outstanding references to it and + * remove it from our tables so we don't hand out any new references. The + * latter is especially important in the case a folder with the same name + * is created afterwards; we don't want to confuse the new one with the old + * one! + */ + _killGlodaFolderIntoTombstone(aGlodaFolder) { + aGlodaFolder._deleted = true; + delete this._folderByURI[aGlodaFolder.uri]; + delete this._folderByID[aGlodaFolder.id]; + }, + + get _updateFolderDirtyStatusStatement() { + let statement = this._createAsyncStatement( + "UPDATE folderLocations SET dirtyStatus = ?1 \ + WHERE id = ?2" + ); + this.__defineGetter__("_updateFolderDirtyStatusStatement", () => statement); + return this._updateFolderDirtyStatusStatement; + }, + + updateFolderDirtyStatus(aFolder) { + let ufds = this._updateFolderDirtyStatusStatement; + ufds.bindByIndex(1, aFolder.id); + ufds.bindByIndex(0, aFolder.dirtyStatus); + ufds.executeAsync(this.trackAsync()); + }, + + get _updateFolderIndexingPriorityStatement() { + let statement = this._createAsyncStatement( + "UPDATE folderLocations SET indexingPriority = ?1 \ + WHERE id = ?2" + ); + this.__defineGetter__( + "_updateFolderIndexingPriorityStatement", + () => statement + ); + return this._updateFolderIndexingPriorityStatement; + }, + + updateFolderIndexingPriority(aFolder) { + let ufip = this._updateFolderIndexingPriorityStatement; + ufip.bindByIndex(1, aFolder.id); + ufip.bindByIndex(0, aFolder.indexingPriority); + ufip.executeAsync(this.trackAsync()); + }, + + get _updateFolderLocationStatement() { + let statement = this._createAsyncStatement( + "UPDATE folderLocations SET folderURI = ?1 \ + WHERE id = ?2" + ); + this.__defineGetter__("_updateFolderLocationStatement", () => statement); + return this._updateFolderLocationStatement; + }, + + /** + * Non-recursive asynchronous folder renaming based on the URI. + * + * @TODO provide a mechanism for recursive folder renames or have a higher + * layer deal with it and remove this note. + */ + renameFolder(aOldFolder, aNewURI) { + if (!(aOldFolder.URI in this._folderByURI)) { + return; + } + let folder = this._mapFolder(aOldFolder); // ensure the folder is mapped + let oldURI = folder.uri; + this._folderByURI[aNewURI] = folder; + folder._uri = aNewURI; + this._log.info("renaming folder URI " + oldURI + " to " + aNewURI); + this._updateFolderLocationStatement.bindByIndex(1, folder.id); + this._updateFolderLocationStatement.bindByIndex(0, aNewURI); + this._updateFolderLocationStatement.executeAsync(this.trackAsync()); + + delete this._folderByURI[oldURI]; + }, + + get _deleteFolderByIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM folderLocations WHERE id = ?1" + ); + this.__defineGetter__("_deleteFolderByIDStatement", () => statement); + return this._deleteFolderByIDStatement; + }, + + deleteFolderByID(aFolderID) { + let dfbis = this._deleteFolderByIDStatement; + dfbis.bindByIndex(0, aFolderID); + dfbis.executeAsync(this.trackAsync()); + }, + + /* ********** Conversation ********** */ + /** The next conversation id to allocate. Initialize at startup. */ + _nextConversationId: 1, + + _populateConversationManagedId() { + let stmt = this._createSyncStatement( + "SELECT MAX(id) FROM conversations", + true + ); + if (stmt.executeStep()) { + // no chance of this SQLITE_BUSY on this call + this._nextConversationId = stmt.getInt64(0) + 1; + } + stmt.finalize(); + }, + + get _insertConversationStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO conversations (id, subject, oldestMessageDate, \ + newestMessageDate) \ + VALUES (?1, ?2, ?3, ?4)" + ); + this.__defineGetter__("_insertConversationStatement", () => statement); + return this._insertConversationStatement; + }, + + get _insertConversationTextStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO conversationsText (docid, subject) \ + VALUES (?1, ?2)" + ); + this.__defineGetter__("_insertConversationTextStatement", () => statement); + return this._insertConversationTextStatement; + }, + + /** + * Asynchronously create a conversation. + */ + createConversation(aSubject, aOldestMessageDate, aNewestMessageDate) { + // create the data row + let conversationID = this._nextConversationId++; + let ics = this._insertConversationStatement; + ics.bindByIndex(0, conversationID); + ics.bindByIndex(1, aSubject); + if (aOldestMessageDate == null) { + ics.bindByIndex(2, null); + } else { + ics.bindByIndex(2, aOldestMessageDate); + } + if (aNewestMessageDate == null) { + ics.bindByIndex(3, null); + } else { + ics.bindByIndex(3, aNewestMessageDate); + } + ics.executeAsync(this.trackAsync()); + + // create the fulltext row, using the same rowid/docid + let icts = this._insertConversationTextStatement; + icts.bindByIndex(0, conversationID); + icts.bindByIndex(1, aSubject); + icts.executeAsync(this.trackAsync()); + + // create it + let conversation = new GlodaConversation( + this, + conversationID, + aSubject, + aOldestMessageDate, + aNewestMessageDate + ); + // it's new! let the collection manager know about it. + GlodaCollectionManager.itemsAdded(conversation.NOUN_ID, [conversation]); + // return it + return conversation; + }, + + get _deleteConversationByIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM conversations WHERE id = ?1" + ); + this.__defineGetter__("_deleteConversationByIDStatement", () => statement); + return this._deleteConversationByIDStatement; + }, + + /** + * Asynchronously delete a conversation given its ID. + */ + deleteConversationByID(aConversationID) { + let dcbids = this._deleteConversationByIDStatement; + dcbids.bindByIndex(0, aConversationID); + dcbids.executeAsync(this.trackAsync()); + + GlodaCollectionManager.itemsDeleted(GlodaConversation.prototype.NOUN_ID, [ + aConversationID, + ]); + }, + + _conversationFromRow(aStmt) { + let oldestMessageDate, newestMessageDate; + if (aStmt.getTypeOfIndex(2) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + oldestMessageDate = null; + } else { + oldestMessageDate = aStmt.getInt64(2); + } + if (aStmt.getTypeOfIndex(3) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + newestMessageDate = null; + } else { + newestMessageDate = aStmt.getInt64(3); + } + return new GlodaConversation( + this, + aStmt.getInt64(0), + aStmt.getString(1), + oldestMessageDate, + newestMessageDate + ); + }, + + /* ********** Message ********** */ + /** + * Next message id, managed because of our use of asynchronous inserts. + * Initialized by _populateMessageManagedId called by _init. + * + * Start from 32 to leave us all kinds of magical sentinel values at the + * bottom. + */ + _nextMessageId: 32, + + _populateMessageManagedId() { + let stmt = this._createSyncStatement("SELECT MAX(id) FROM messages", true); + if (stmt.executeStep()) { + // no chance of this SQLITE_BUSY on this call + // 0 gets returned even if there are no messages... + let highestSeen = stmt.getInt64(0); + if (highestSeen != 0) { + this._nextMessageId = highestSeen + 1; + } + } + stmt.finalize(); + }, + + get _insertMessageStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO messages (id, folderID, messageKey, conversationID, date, \ + headerMessageID, jsonAttributes, notability) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)" + ); + this.__defineGetter__("_insertMessageStatement", () => statement); + return this._insertMessageStatement; + }, + + get _insertMessageTextStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO messagesText (docid, subject, body, attachmentNames, \ + author, recipients) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)" + ); + this.__defineGetter__("_insertMessageTextStatement", () => statement); + return this._insertMessageTextStatement; + }, + + /** + * Create a GlodaMessage with the given properties. Because this is only half + * of the process of creating a message (the attributes still need to be + * completed), it's on the caller's head to call GlodaCollectionManager's + * itemAdded method once the message is fully created. + * + * This method uses the async connection, any downstream logic that depends on + * this message actually existing in the database must be done using an + * async query. + */ + createMessage( + aFolder, + aMessageKey, + aConversationID, + aDatePRTime, + aHeaderMessageID + ) { + let folderID; + if (aFolder != null) { + folderID = this._mapFolder(aFolder).id; + } else { + folderID = null; + } + + let messageID = this._nextMessageId++; + + let message = new GlodaMessage( + this, + messageID, + folderID, + aMessageKey, + aConversationID, + /* conversation */ null, + aDatePRTime ? new Date(aDatePRTime / 1000) : null, + aHeaderMessageID, + /* deleted */ false, + /* jsonText */ undefined, + /* notability*/ 0 + ); + + // We would love to notify the collection manager about the message at this + // point (at least if it's not a ghost), but we can't yet. We need to wait + // until the attributes have been indexed, which means it's out of our + // hands. (Gloda.processMessage does it.) + + return message; + }, + + insertMessage(aMessage) { + this._log.debug("insertMessage " + aMessage); + let ims = this._insertMessageStatement; + ims.bindByIndex(0, aMessage.id); + if (aMessage.folderID == null) { + ims.bindByIndex(1, null); + } else { + ims.bindByIndex(1, aMessage.folderID); + } + if (aMessage.messageKey == null) { + ims.bindByIndex(2, null); + } else { + ims.bindByIndex(2, aMessage.messageKey); + } + ims.bindByIndex(3, aMessage.conversationID); + if (aMessage.date == null) { + ims.bindByIndex(4, null); + } else { + ims.bindByIndex(4, aMessage.date * 1000); + } + ims.bindByIndex(5, aMessage.headerMessageID); + if (aMessage._jsonText) { + ims.bindByIndex(6, aMessage._jsonText); + } else { + ims.bindByIndex(6, null); + } + ims.bindByIndex(7, aMessage.notability); + + try { + ims.executeAsync(this.trackAsync()); + } catch (ex) { + throw new Error( + "error executing statement... " + + this.asyncConnection.lastError + + ": " + + this.asyncConnection.lastErrorString + + " - " + + ex + ); + } + + // we create the full-text row for any message that isn't a ghost, + // whether we have the body or not + if (aMessage.folderID !== null) { + this._insertMessageText(aMessage); + } + }, + + /** + * Inserts a full-text row. This should only be called if you're sure you want + * to insert a row into the table. + */ + _insertMessageText(aMessage) { + if (aMessage._content && aMessage._content.hasContent()) { + aMessage._indexedBodyText = aMessage._content.getContentString(true); + } else if (aMessage._bodyLines) { + aMessage._indexedBodyText = aMessage._bodyLines.join("\n"); + } else { + aMessage._indexedBodyText = null; + } + + let imts = this._insertMessageTextStatement; + imts.bindByIndex(0, aMessage.id); + imts.bindByIndex(1, aMessage._subject); + if (aMessage._indexedBodyText == null) { + imts.bindByIndex(2, null); + } else { + imts.bindByIndex(2, aMessage._indexedBodyText); + } + if (aMessage._attachmentNames === null) { + imts.bindByIndex(3, null); + } else { + imts.bindByIndex(3, aMessage._attachmentNames.join("\n")); + } + + // if (aMessage._indexAuthor) + imts.bindByIndex(4, aMessage._indexAuthor); + // if (aMessage._indexRecipients) + imts.bindByIndex(5, aMessage._indexRecipients); + + try { + imts.executeAsync(this.trackAsync()); + } catch (ex) { + throw new Error( + "error executing fulltext statement... " + + this.asyncConnection.lastError + + ": " + + this.asyncConnection.lastErrorString + + " - " + + ex + ); + } + }, + + get _updateMessageStatement() { + let statement = this._createAsyncStatement( + "UPDATE messages SET folderID = ?1, \ + messageKey = ?2, \ + conversationID = ?3, \ + date = ?4, \ + headerMessageID = ?5, \ + jsonAttributes = ?6, \ + notability = ?7, \ + deleted = ?8 \ + WHERE id = ?9" + ); + this.__defineGetter__("_updateMessageStatement", () => statement); + return this._updateMessageStatement; + }, + + get _updateMessageTextStatement() { + let statement = this._createAsyncStatement( + "UPDATE messagesText SET body = ?1, \ + attachmentNames = ?2 \ + WHERE docid = ?3" + ); + + this.__defineGetter__("_updateMessageTextStatement", () => statement); + return this._updateMessageTextStatement; + }, + + /** + * Update the database row associated with the message. If the message is + * not a ghost and has _isNew defined, messagesText is affected. + * + * aMessage._isNew is currently equivalent to the fact that there is no + * full-text row associated with this message, and we work with this + * assumption here. Note that if aMessage._isNew is not defined, then + * we don't do anything. + */ + updateMessage(aMessage) { + this._log.debug("updateMessage " + aMessage); + let ums = this._updateMessageStatement; + ums.bindByIndex(8, aMessage.id); + if (aMessage.folderID === null) { + ums.bindByIndex(0, null); + } else { + ums.bindByIndex(0, aMessage.folderID); + } + if (aMessage.messageKey === null) { + ums.bindByIndex(1, null); + } else { + ums.bindByIndex(1, aMessage.messageKey); + } + ums.bindByIndex(2, aMessage.conversationID); + if (aMessage.date === null) { + ums.bindByIndex(3, null); + } else { + ums.bindByIndex(3, aMessage.date * 1000); + } + ums.bindByIndex(4, aMessage.headerMessageID); + if (aMessage._jsonText) { + ums.bindByIndex(5, aMessage._jsonText); + } else { + ums.bindByIndex(5, null); + } + ums.bindByIndex(6, aMessage.notability); + ums.bindByIndex(7, aMessage._isDeleted ? 1 : 0); + + ums.executeAsync(this.trackAsync()); + + if (aMessage.folderID !== null) { + if ("_isNew" in aMessage && aMessage._isNew === true) { + this._insertMessageText(aMessage); + } else { + this._updateMessageText(aMessage); + } + } + }, + + /** + * Updates the full-text row associated with this message. This only performs + * the UPDATE query if the indexed body text has changed, which means that if + * the body hasn't changed but the attachments have, we don't update. + */ + _updateMessageText(aMessage) { + let newIndexedBodyText; + if (aMessage._content && aMessage._content.hasContent()) { + newIndexedBodyText = aMessage._content.getContentString(true); + } else if (aMessage._bodyLines) { + newIndexedBodyText = aMessage._bodyLines.join("\n"); + } else { + newIndexedBodyText = null; + } + + // If the body text matches, don't perform an update + if (newIndexedBodyText == aMessage._indexedBodyText) { + this._log.debug( + "in _updateMessageText, skipping update because body matches" + ); + return; + } + + aMessage._indexedBodyText = newIndexedBodyText; + let umts = this._updateMessageTextStatement; + umts.bindByIndex(2, aMessage.id); + + if (aMessage._indexedBodyText == null) { + umts.bindByIndex(0, null); + } else { + umts.bindByIndex(0, aMessage._indexedBodyText); + } + + if (aMessage._attachmentNames == null) { + umts.bindByIndex(1, null); + } else { + umts.bindByIndex(1, aMessage._attachmentNames.join("\n")); + } + + try { + umts.executeAsync(this.trackAsync()); + } catch (ex) { + throw new Error( + "error executing fulltext statement... " + + this.asyncConnection.lastError + + ": " + + this.asyncConnection.lastErrorString + + " - " + + ex + ); + } + }, + + get _updateMessageLocationStatement() { + let statement = this._createAsyncStatement( + "UPDATE messages SET folderID = ?1, messageKey = ?2 WHERE id = ?3" + ); + this.__defineGetter__("_updateMessageLocationStatement", () => statement); + return this._updateMessageLocationStatement; + }, + + /** + * Given a list of gloda message ids, and a list of their new message keys in + * the given new folder location, asynchronously update the message's + * database locations. Also, update the in-memory representations. + */ + updateMessageLocations( + aMessageIds, + aNewMessageKeys, + aDestFolder, + aDoNotNotify + ) { + this._log.debug( + "updateMessageLocations:\n" + + "ids: " + + aMessageIds + + "\n" + + "keys: " + + aNewMessageKeys + + "\n" + + "dest folder: " + + aDestFolder + + "\n" + + "do not notify?" + + aDoNotNotify + + "\n" + ); + let statement = this._updateMessageLocationStatement; + let destFolderID = + typeof aDestFolder == "number" + ? aDestFolder + : this._mapFolder(aDestFolder).id; + + // map gloda id to the new message key for in-memory rep transform below + let cacheLookupMap = {}; + + for (let iMsg = 0; iMsg < aMessageIds.length; iMsg++) { + let id = aMessageIds[iMsg], + msgKey = aNewMessageKeys[iMsg]; + statement.bindByIndex(0, destFolderID); + statement.bindByIndex(1, msgKey); + statement.bindByIndex(2, id); + statement.executeAsync(this.trackAsync()); + + cacheLookupMap[id] = msgKey; + } + + // - perform the cache lookup so we can update in-memory representations + // found in memory items, and converted to list form for notification + let inMemoryItems = {}, + modifiedItems = []; + GlodaCollectionManager.cacheLookupMany( + GlodaMessage.prototype.NOUN_ID, + cacheLookupMap, + inMemoryItems, + /* do not cache */ false + ); + for (let glodaId in inMemoryItems) { + let glodaMsg = inMemoryItems[glodaId]; + glodaMsg._folderID = destFolderID; + glodaMsg._messageKey = cacheLookupMap[glodaId]; + modifiedItems.push(glodaMsg); + } + + // tell the collection manager about the modified messages so it can update + // any existing views... + if (!aDoNotNotify && modifiedItems.length) { + GlodaCollectionManager.itemsModified( + GlodaMessage.prototype.NOUN_ID, + modifiedItems + ); + } + }, + + get _updateMessageKeyStatement() { + let statement = this._createAsyncStatement( + "UPDATE messages SET messageKey = ?1 WHERE id = ?2" + ); + this.__defineGetter__("_updateMessageKeyStatement", () => statement); + return this._updateMessageKeyStatement; + }, + + /** + * Update the message keys for the gloda messages with the given id's. This + * is to be used in response to msgKeyChanged notifications and is similar to + * `updateMessageLocations` except that we do not update the folder and we + * do not perform itemsModified notifications (because message keys are not + * intended to be relevant to the gloda message abstraction). + */ + updateMessageKeys(aMessageIds, aNewMessageKeys) { + this._log.debug( + "updateMessageKeys:\n" + + "ids: " + + aMessageIds + + "\n" + + "keys:" + + aNewMessageKeys + + "\n" + ); + let statement = this._updateMessageKeyStatement; + + // map gloda id to the new message key for in-memory rep transform below + let cacheLookupMap = {}; + + for (let iMsg = 0; iMsg < aMessageIds.length; iMsg++) { + let id = aMessageIds[iMsg], + msgKey = aNewMessageKeys[iMsg]; + statement.bindByIndex(0, msgKey); + statement.bindByIndex(1, id); + statement.executeAsync(this.trackAsync()); + + cacheLookupMap[id] = msgKey; + } + + // - perform the cache lookup so we can update in-memory representations + let inMemoryItems = {}; + GlodaCollectionManager.cacheLookupMany( + GlodaMessage.prototype.NOUN_ID, + cacheLookupMap, + inMemoryItems, + /* do not cache */ false + ); + for (let glodaId in inMemoryItems) { + let glodaMsg = inMemoryItems[glodaId]; + glodaMsg._messageKey = cacheLookupMap[glodaId]; + } + }, + + /** + * Asynchronously mutate message folder id/message keys for the given + * messages, indicating that we are moving them to the target folder, but + * don't yet know their target message keys. + * + * Updates in-memory representations too. + */ + updateMessageFoldersByKeyPurging(aGlodaIds, aDestFolder) { + let destFolderID = this._mapFolder(aDestFolder).id; + + let sqlStr = + "UPDATE messages SET folderID = ?1, \ + messageKey = ?2 \ + WHERE id IN (" + + aGlodaIds.join(", ") + + ")"; + let statement = this._createAsyncStatement(sqlStr, true); + statement.bindByIndex(0, destFolderID); + statement.bindByIndex(1, null); + statement.executeAsync(this.trackAsync()); + statement.finalize(); + + let cached = GlodaCollectionManager.cacheLookupManyList( + GlodaMessage.prototype.NOUN_ID, + aGlodaIds + ); + for (let id in cached) { + let glodaMsg = cached[id]; + glodaMsg._folderID = destFolderID; + glodaMsg._messageKey = null; + } + }, + + _messageFromRow(aRow) { + this._log.debug("_messageFromRow " + aRow); + let folderId, + messageKey, + date, + jsonText, + subject, + indexedBodyText, + attachmentNames; + if (aRow.getTypeOfIndex(1) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + folderId = null; + } else { + folderId = aRow.getInt64(1); + } + if (aRow.getTypeOfIndex(2) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + messageKey = null; + } else { + messageKey = aRow.getInt64(2); + } + if (aRow.getTypeOfIndex(4) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + date = null; + } else { + date = new Date(aRow.getInt64(4) / 1000); + } + if (aRow.getTypeOfIndex(7) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + jsonText = undefined; + } else { + jsonText = aRow.getString(7); + } + // only queryFromQuery queries will have these columns + if (aRow.numEntries >= 14) { + if (aRow.getTypeOfIndex(10) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + subject = undefined; + } else { + subject = aRow.getString(10); + } + if (aRow.getTypeOfIndex(9) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + indexedBodyText = undefined; + } else { + indexedBodyText = aRow.getString(9); + } + if (aRow.getTypeOfIndex(11) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + attachmentNames = null; + } else { + attachmentNames = aRow.getString(11); + if (attachmentNames) { + attachmentNames = attachmentNames.split("\n"); + } else { + attachmentNames = null; + } + } + // we ignore 12, author + // we ignore 13, recipients + } + return new GlodaMessage( + this, + aRow.getInt64(0), + folderId, + messageKey, + aRow.getInt64(3), + null, + date, + aRow.getString(5), + aRow.getInt64(6), + jsonText, + aRow.getInt64(8), + subject, + indexedBodyText, + attachmentNames + ); + }, + + get _updateMessagesMarkDeletedByFolderID() { + // When marking deleted clear the folderID and messageKey so that the + // indexing process can reuse it without any location constraints. + let statement = this._createAsyncStatement( + "UPDATE messages SET folderID = NULL, messageKey = NULL, \ + deleted = 1 WHERE folderID = ?1" + ); + this.__defineGetter__( + "_updateMessagesMarkDeletedByFolderID", + () => statement + ); + return this._updateMessagesMarkDeletedByFolderID; + }, + + /** + * Efficiently mark all the messages in a folder as deleted. Unfortunately, + * we obviously do not know the id's of the messages affected by this which + * complicates in-memory updates. The options are sending out to the SQL + * database for a list of the message id's or some form of in-memory + * traversal. I/O costs being what they are, users having a propensity to + * have folders with tens of thousands of messages, and the unlikeliness + * of all of those messages being gloda-memory-resident, we go with the + * in-memory traversal. + */ + markMessagesDeletedByFolderID(aFolderID) { + let statement = this._updateMessagesMarkDeletedByFolderID; + statement.bindByIndex(0, aFolderID); + statement.executeAsync(this.trackAsync()); + + // Have the collection manager generate itemsRemoved events for any + // in-memory messages in that folder. + GlodaCollectionManager.itemsDeletedByAttribute( + GlodaMessage.prototype.NOUN_ID, + aMsg => aMsg._folderID == aFolderID + ); + }, + + /** + * Mark all the gloda messages as deleted blind-fire. Check if any of the + * messages are known to the collection manager and update them to be deleted + * along with the requisite collection notifications. + */ + markMessagesDeletedByIDs(aMessageIDs) { + // When marking deleted clear the folderID and messageKey so that the + // indexing process can reuse it without any location constraints. + let sqlString = + "UPDATE messages SET folderID = NULL, messageKey = NULL, " + + "deleted = 1 WHERE id IN (" + + aMessageIDs.join(",") + + ")"; + + let statement = this._createAsyncStatement(sqlString, true); + statement.executeAsync(this.trackAsync()); + statement.finalize(); + + GlodaCollectionManager.itemsDeleted( + GlodaMessage.prototype.NOUN_ID, + aMessageIDs + ); + }, + + get _countDeletedMessagesStatement() { + let statement = this._createAsyncStatement( + "SELECT COUNT(*) FROM messages WHERE deleted = 1" + ); + this.__defineGetter__("_countDeletedMessagesStatement", () => statement); + return this._countDeletedMessagesStatement; + }, + + /** + * Count how many messages are currently marked as deleted in the database. + */ + countDeletedMessages(aCallback) { + let cms = this._countDeletedMessagesStatement; + cms.executeAsync(new SingletonResultValueHandler(aCallback)); + }, + + get _deleteMessageByIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM messages WHERE id = ?1" + ); + this.__defineGetter__("_deleteMessageByIDStatement", () => statement); + return this._deleteMessageByIDStatement; + }, + + get _deleteMessageTextByIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM messagesText WHERE docid = ?1" + ); + this.__defineGetter__("_deleteMessageTextByIDStatement", () => statement); + return this._deleteMessageTextByIDStatement; + }, + + /** + * Delete a message and its fulltext from the database. It is assumed that + * the message was already marked as deleted and so is not visible to the + * collection manager and so nothing needs to be done about that. + */ + deleteMessageByID(aMessageID) { + let dmbids = this._deleteMessageByIDStatement; + dmbids.bindByIndex(0, aMessageID); + dmbids.executeAsync(this.trackAsync()); + + this.deleteMessageTextByID(aMessageID); + }, + + deleteMessageTextByID(aMessageID) { + let dmt = this._deleteMessageTextByIDStatement; + dmt.bindByIndex(0, aMessageID); + dmt.executeAsync(this.trackAsync()); + }, + + get _folderCompactionStatement() { + let statement = this._createAsyncStatement( + "SELECT id, messageKey, headerMessageID FROM messages \ + WHERE folderID = ?1 AND \ + messageKey >= ?2 AND +deleted = 0 ORDER BY messageKey LIMIT ?3" + ); + this.__defineGetter__("_folderCompactionStatement", () => statement); + return this._folderCompactionStatement; + }, + + folderCompactionPassBlockFetch( + aFolderID, + aStartingMessageKey, + aLimit, + aCallback + ) { + let fcs = this._folderCompactionStatement; + fcs.bindByIndex(0, aFolderID); + fcs.bindByIndex(1, aStartingMessageKey); + fcs.bindByIndex(2, aLimit); + fcs.executeAsync(new CompactionBlockFetcherHandler(aCallback)); + }, + + /* ********** Message Attributes ********** */ + get _insertMessageAttributeStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO messageAttributes (conversationID, messageID, attributeID, \ + value) \ + VALUES (?1, ?2, ?3, ?4)" + ); + this.__defineGetter__("_insertMessageAttributeStatement", () => statement); + return this._insertMessageAttributeStatement; + }, + + get _deleteMessageAttributeStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM messageAttributes WHERE attributeID = ?1 AND value = ?2 \ + AND conversationID = ?3 AND messageID = ?4" + ); + this.__defineGetter__("_deleteMessageAttributeStatement", () => statement); + return this._deleteMessageAttributeStatement; + }, + + /** + * Insert and remove attributes relating to a GlodaMessage. This is performed + * inside a pseudo-transaction (we create one if we aren't in one, using + * our _beginTransaction wrapper, but if we are in one, no additional + * meaningful semantics are added). + * No attempt is made to verify uniqueness of inserted attributes, either + * against the current database or within the provided list of attributes. + * The caller is responsible for ensuring that unwanted duplicates are + * avoided. + * + * @param aMessage The GlodaMessage the attributes belong to. This is used + * to provide the message id and conversation id. + * @param aAddDBAttributes A list of attribute tuples to add, where each tuple + * contains an attribute ID and a value. Lest you forget, an attribute ID + * corresponds to a row in the attribute definition table. The attribute + * definition table stores the 'parameter' for the attribute, if any. + * (Which is to say, our frequent Attribute-Parameter-Value triple has + * the Attribute-Parameter part distilled to a single attribute id.) + * @param aRemoveDBAttributes A list of attribute tuples to remove. + */ + adjustMessageAttributes(aMessage, aAddDBAttributes, aRemoveDBAttributes) { + let imas = this._insertMessageAttributeStatement; + let dmas = this._deleteMessageAttributeStatement; + this._beginTransaction(); + try { + for (let iAttrib = 0; iAttrib < aAddDBAttributes.length; iAttrib++) { + let attribValueTuple = aAddDBAttributes[iAttrib]; + + imas.bindByIndex(0, aMessage.conversationID); + imas.bindByIndex(1, aMessage.id); + imas.bindByIndex(2, attribValueTuple[0]); + // use 0 instead of null, otherwise the db gets upset. (and we don't + // really care anyways.) + if (attribValueTuple[1] == null) { + imas.bindByIndex(3, 0); + } else if (Math.floor(attribValueTuple[1]) == attribValueTuple[1]) { + imas.bindByIndex(3, attribValueTuple[1]); + } else { + imas.bindByIndex(3, attribValueTuple[1]); + } + imas.executeAsync(this.trackAsync()); + } + + for (let iAttrib = 0; iAttrib < aRemoveDBAttributes.length; iAttrib++) { + let attribValueTuple = aRemoveDBAttributes[iAttrib]; + + dmas.bindByIndex(0, attribValueTuple[0]); + // use 0 instead of null, otherwise the db gets upset. (and we don't + // really care anyways.) + if (attribValueTuple[1] == null) { + dmas.bindByIndex(1, 0); + } else if (Math.floor(attribValueTuple[1]) == attribValueTuple[1]) { + dmas.bindByIndex(1, attribValueTuple[1]); + } else { + dmas.bindByIndex(1, attribValueTuple[1]); + } + dmas.bindByIndex(2, aMessage.conversationID); + dmas.bindByIndex(3, aMessage.id); + dmas.executeAsync(this.trackAsync()); + } + + this._commitTransaction(); + } catch (ex) { + this._log.error("adjustMessageAttributes:", ex); + this._rollbackTransaction(); + throw ex; + } + }, + + get _deleteMessageAttributesByMessageIDStatement() { + let statement = this._createAsyncStatement( + "DELETE FROM messageAttributes WHERE messageID = ?1" + ); + this.__defineGetter__( + "_deleteMessageAttributesByMessageIDStatement", + () => statement + ); + return this._deleteMessageAttributesByMessageIDStatement; + }, + + /** + * Clear all the message attributes for a given GlodaMessage. No changes + * are made to the in-memory representation of the message; it is up to the + * caller to ensure that it handles things correctly. + * + * @param aMessage The GlodaMessage whose database attributes should be + * purged. + */ + clearMessageAttributes(aMessage) { + if (aMessage.id != null) { + this._deleteMessageAttributesByMessageIDStatement.bindByIndex( + 0, + aMessage.id + ); + this._deleteMessageAttributesByMessageIDStatement.executeAsync( + this.trackAsync() + ); + } + }, + + _stringSQLQuoter(aString) { + return "'" + aString.replace(/\'/g, "''") + "'"; + }, + _numberQuoter(aNum) { + return aNum; + }, + + /* ===== Generic Attribute Support ===== */ + adjustAttributes(aItem, aAddDBAttributes, aRemoveDBAttributes) { + let nounDef = aItem.NOUN_DEF; + let dbMeta = nounDef._dbMeta; + if (dbMeta.insertAttrStatement === undefined) { + dbMeta.insertAttrStatement = this._createAsyncStatement( + "INSERT INTO " + + nounDef.attrTableName + + " (" + + nounDef.attrIDColumnName + + ", attributeID, value) " + + " VALUES (?1, ?2, ?3)" + ); + // we always create this at the same time (right here), no need to check + dbMeta.deleteAttrStatement = this._createAsyncStatement( + "DELETE FROM " + + nounDef.attrTableName + + " WHERE " + + " attributeID = ?1 AND value = ?2 AND " + + nounDef.attrIDColumnName + + " = ?3" + ); + } + + let ias = dbMeta.insertAttrStatement; + let das = dbMeta.deleteAttrStatement; + this._beginTransaction(); + try { + for (let iAttr = 0; iAttr < aAddDBAttributes.length; iAttr++) { + let attribValueTuple = aAddDBAttributes[iAttr]; + + ias.bindByIndex(0, aItem.id); + ias.bindByIndex(1, attribValueTuple[0]); + // use 0 instead of null, otherwise the db gets upset. (and we don't + // really care anyways.) + if (attribValueTuple[1] == null) { + ias.bindByIndex(2, 0); + } else if (Math.floor(attribValueTuple[1]) == attribValueTuple[1]) { + ias.bindByIndex(2, attribValueTuple[1]); + } else { + ias.bindByIndex(2, attribValueTuple[1]); + } + ias.executeAsync(this.trackAsync()); + } + + for (let iAttr = 0; iAttr < aRemoveDBAttributes.length; iAttr++) { + let attribValueTuple = aRemoveDBAttributes[iAttr]; + + das.bindByIndex(0, attribValueTuple[0]); + // use 0 instead of null, otherwise the db gets upset. (and we don't + // really care anyways.) + if (attribValueTuple[1] == null) { + das.bindByIndex(1, 0); + } else if (Math.floor(attribValueTuple[1]) == attribValueTuple[1]) { + das.bindByIndex(1, attribValueTuple[1]); + } else { + das.bindByIndex(1, attribValueTuple[1]); + } + das.bindByIndex(2, aItem.id); + das.executeAsync(this.trackAsync()); + } + + this._commitTransaction(); + } catch (ex) { + this._log.error("adjustAttributes:", ex); + this._rollbackTransaction(); + throw ex; + } + }, + + clearAttributes(aItem) { + let nounDef = aItem.NOUN_DEF; + let dbMeta = nounDef._dbMeta; + if (dbMeta.clearAttrStatement === undefined) { + dbMeta.clearAttrStatement = this._createAsyncStatement( + "DELETE FROM " + + nounDef.attrTableName + + " WHERE " + + nounDef.attrIDColumnName + + " = ?1" + ); + } + + if (aItem.id != null) { + dbMeta.clearAttrstatement.bindByIndex(0, aItem.id); + dbMeta.clearAttrStatement.executeAsync(this.trackAsync()); + } + }, + + /** + * escapeStringForLIKE is only available on statements, and sometimes we want + * to use it before we create our statement, so we create a statement just + * for this reason. + */ + get _escapeLikeStatement() { + let statement = this._createAsyncStatement("SELECT 0"); + this.__defineGetter__("_escapeLikeStatement", () => statement); + return this._escapeLikeStatement; + }, + + *_convertToDBValuesAndGroupByAttributeID(aAttrDef, aValues) { + let objectNounDef = aAttrDef.objectNounDef; + if (!objectNounDef.usesParameter) { + let dbValues = []; + for (let iValue = 0; iValue < aValues.length; iValue++) { + let value = aValues[iValue]; + // If the empty set is significant and it's an empty signifier, emit + // the appropriate dbvalue. + if (value == null && aAttrDef.emptySetIsSignificant) { + yield [this.kEmptySetAttrId, [aAttrDef.id]]; + // Bail if the only value was us; we don't want to add a + // value-posessing wildcard into the mix. + if (aValues.length == 1) { + return; + } + continue; + } + let dbValue = objectNounDef.toParamAndValue(value)[1]; + if (dbValue != null) { + dbValues.push(dbValue); + } + } + yield [aAttrDef.special ? undefined : aAttrDef.id, dbValues]; + return; + } + + let curParam, attrID, dbValues; + let attrDBDef = aAttrDef.dbDef; + for (let iValue = 0; iValue < aValues.length; iValue++) { + let value = aValues[iValue]; + // If the empty set is significant and it's an empty signifier, emit + // the appropriate dbvalue. + if (value == null && aAttrDef.emptySetIsSignificant) { + yield [this.kEmptySetAttrId, [aAttrDef.id]]; + // Bail if the only value was us; we don't want to add a + // value-posessing wildcard into the mix. + if (aValues.length == 1) { + return; + } + continue; + } + let [dbParam, dbValue] = objectNounDef.toParamAndValue(value); + if (curParam === undefined) { + curParam = dbParam; + attrID = attrDBDef.bindParameter(curParam); + if (dbValue != null) { + dbValues = [dbValue]; + } else { + dbValues = []; + } + } else if (curParam == dbParam) { + if (dbValue != null) { + dbValues.push(dbValue); + } + } else { + yield [attrID, dbValues]; + curParam = dbParam; + attrID = attrDBDef.bindParameter(curParam); + if (dbValue != null) { + dbValues = [dbValue]; + } else { + dbValues = []; + } + } + } + if (dbValues !== undefined) { + yield [attrID, dbValues]; + } + }, + + *_convertRangesToDBStringsAndGroupByAttributeID( + aAttrDef, + aValues, + aValueColumnName + ) { + let objectNounDef = aAttrDef.objectNounDef; + if (!objectNounDef.usesParameter) { + let dbStrings = []; + for (let iValue = 0; iValue < aValues.length; iValue++) { + let [lowerVal, upperVal] = aValues[iValue]; + // they both can't be null. that is the law. + if (lowerVal == null) { + dbStrings.push( + aValueColumnName + + " <= " + + objectNounDef.toParamAndValue(upperVal)[1] + ); + } else if (upperVal == null) { + dbStrings.push( + aValueColumnName + + " >= " + + objectNounDef.toParamAndValue(lowerVal)[1] + ); + } else { + // No one is null! + dbStrings.push( + aValueColumnName + + " BETWEEN " + + objectNounDef.toParamAndValue(lowerVal)[1] + + " AND " + + objectNounDef.toParamAndValue(upperVal)[1] + ); + } + } + yield [aAttrDef.special ? undefined : aAttrDef.id, dbStrings]; + return; + } + + let curParam, attrID, dbStrings; + let attrDBDef = aAttrDef.dbDef; + for (let iValue = 0; iValue < aValues.length; iValue++) { + let [lowerVal, upperVal] = aValues[iValue]; + + let dbString, dbParam, lowerDBVal, upperDBVal; + // they both can't be null. that is the law. + if (lowerVal == null) { + [dbParam, upperDBVal] = objectNounDef.toParamAndValue(upperVal); + dbString = aValueColumnName + " <= " + upperDBVal; + } else if (upperVal == null) { + [dbParam, lowerDBVal] = objectNounDef.toParamAndValue(lowerVal); + dbString = aValueColumnName + " >= " + lowerDBVal; + } else { + // no one is null! + [dbParam, lowerDBVal] = objectNounDef.toParamAndValue(lowerVal); + dbString = + aValueColumnName + + " BETWEEN " + + lowerDBVal + + " AND " + + objectNounDef.toParamAndValue(upperVal)[1]; + } + + if (curParam === undefined) { + curParam = dbParam; + attrID = attrDBDef.bindParameter(curParam); + dbStrings = [dbString]; + } else if (curParam === dbParam) { + dbStrings.push(dbString); + } else { + yield [attrID, dbStrings]; + curParam = dbParam; + attrID = attrDBDef.bindParameter(curParam); + dbStrings = [dbString]; + } + } + if (dbStrings !== undefined) { + yield [attrID, dbStrings]; + } + }, + + /* eslint-disable complexity */ + /** + * Perform a database query given a GlodaQueryClass instance that specifies + * a set of constraints relating to the noun type associated with the query. + * A GlodaCollection is returned containing the results of the look-up. + * By default the collection is "live", and will mutate (generating events to + * its listener) as the state of the database changes. + * This functionality is made user/extension visible by the Query's + * getCollection (asynchronous). + * + * @param [aArgs] See |GlodaQuery.getCollection| for info. + */ + queryFromQuery( + aQuery, + aListener, + aListenerData, + aExistingCollection, + aMasterCollection, + aArgs + ) { + // when changing this method, be sure that GlodaQuery's testMatch function + // likewise has its changes made. + let nounDef = aQuery._nounDef; + + let whereClauses = []; + let unionQueries = [aQuery].concat(aQuery._unions); + let boundArgs = []; + + // Use the dbQueryValidityConstraintSuffix to provide constraints that + // filter items down to those that are valid for the query mechanism to + // return. For example, in the case of messages, deleted or ghost + // messages should not be returned by this query layer. We require + // hand-rolled SQL to do that for now. + let validityConstraintSuffix; + if ( + nounDef.dbQueryValidityConstraintSuffix && + !aQuery.options.noDbQueryValidityConstraints + ) { + validityConstraintSuffix = nounDef.dbQueryValidityConstraintSuffix; + } else { + validityConstraintSuffix = ""; + } + + for (let iUnion = 0; iUnion < unionQueries.length; iUnion++) { + let curQuery = unionQueries[iUnion]; + let selects = []; + + let lastConstraintWasSpecial = false; + let curConstraintIsSpecial; + + for ( + let iConstraint = 0; + iConstraint < curQuery._constraints.length; + iConstraint++ + ) { + let constraint = curQuery._constraints[iConstraint]; + let [constraintType, attrDef] = constraint; + let constraintValues = constraint.slice(2); + + let tableName, idColumnName, valueColumnName; + if (constraintType == GlodaConstants.kConstraintIdIn) { + // we don't need any of the next cases' setup code, and we especially + // would prefer that attrDef isn't accessed since it's null for us. + } else if (attrDef.special) { + tableName = nounDef.tableName; + idColumnName = "id"; // canonical id for a table is "id". + valueColumnName = attrDef.specialColumnName; + curConstraintIsSpecial = true; + } else { + tableName = nounDef.attrTableName; + idColumnName = nounDef.attrIDColumnName; + valueColumnName = "value"; + curConstraintIsSpecial = false; + } + + let select = null, + test = null; + if (constraintType === GlodaConstants.kConstraintIdIn) { + // this is somewhat of a trick. this does mean that this can be the + // only constraint. Namely, our idiom is: + // SELECT * FROM blah WHERE id IN (a INTERSECT b INTERSECT c) + // but if we only have 'a', then that becomes "...IN (a)", and if + // 'a' is not a select but a list of id's... tricky, no? + select = constraintValues.join(","); + } else if (constraintType === GlodaConstants.kConstraintIn) { + // @testpoint gloda.datastore.sqlgen.kConstraintIn + let clauses = []; + for (let [ + attrID, + values, + ] of this._convertToDBValuesAndGroupByAttributeID( + attrDef, + constraintValues + )) { + let clausePart; + if (attrID !== undefined) { + clausePart = + "(attributeID = " + attrID + (values.length ? " AND " : ""); + } else { + clausePart = "("; + } + if (values.length) { + // strings need to be escaped, we would use ? binding, except + // that gets mad if we have too many strings... so we use our + // own escaping logic. correctly escaping is easy, but it still + // feels wrong to do it. (just double the quote character...) + if ( + "special" in attrDef && + attrDef.special == GlodaConstants.kSpecialString + ) { + clausePart += + valueColumnName + + " IN (" + + values + .map(v => "'" + v.replace(/\'/g, "''") + "'") + .join(",") + + "))"; + } else { + clausePart += + valueColumnName + " IN (" + values.join(",") + "))"; + } + } else { + clausePart += ")"; + } + clauses.push(clausePart); + } + test = clauses.join(" OR "); + } else if (constraintType === GlodaConstants.kConstraintRanges) { + // @testpoint gloda.datastore.sqlgen.kConstraintRanges + let clauses = []; + for (let [ + attrID, + dbStrings, + ] of this._convertRangesToDBStringsAndGroupByAttributeID( + attrDef, + constraintValues, + valueColumnName + )) { + if (attrID !== undefined) { + clauses.push( + "(attributeID = " + + attrID + + " AND (" + + dbStrings.join(" OR ") + + "))" + ); + } else { + clauses.push("(" + dbStrings.join(" OR ") + ")"); + } + } + test = clauses.join(" OR "); + } else if (constraintType === GlodaConstants.kConstraintEquals) { + // @testpoint gloda.datastore.sqlgen.kConstraintEquals + let clauses = []; + for (let [ + attrID, + values, + ] of this._convertToDBValuesAndGroupByAttributeID( + attrDef, + constraintValues + )) { + if (attrID !== undefined) { + clauses.push( + "(attributeID = " + + attrID + + " AND (" + + values.map(_ => valueColumnName + " = ?").join(" OR ") + + "))" + ); + } else { + clauses.push( + "(" + + values.map(_ => valueColumnName + " = ?").join(" OR ") + + ")" + ); + } + boundArgs.push.apply(boundArgs, values); + } + test = clauses.join(" OR "); + } else if (constraintType === GlodaConstants.kConstraintStringLike) { + // @testpoint gloda.datastore.sqlgen.kConstraintStringLike + let likePayload = ""; + for (let valuePart of constraintValues) { + if (typeof valuePart == "string") { + likePayload += this._escapeLikeStatement.escapeStringForLIKE( + valuePart, + "/" + ); + } else { + likePayload += "%"; + } + } + test = valueColumnName + " LIKE ? ESCAPE '/'"; + boundArgs.push(likePayload); + } else if (constraintType === GlodaConstants.kConstraintFulltext) { + // @testpoint gloda.datastore.sqlgen.kConstraintFulltext + let matchStr = constraintValues[0]; + select = + "SELECT docid FROM " + + nounDef.tableName + + "Text" + + " WHERE " + + attrDef.specialColumnName + + " MATCH ?"; + boundArgs.push(matchStr); + } + + if (curConstraintIsSpecial && lastConstraintWasSpecial && test) { + selects[selects.length - 1] += " AND " + test; + } else if (select) { + selects.push(select); + } else if (test) { + select = + "SELECT " + idColumnName + " FROM " + tableName + " WHERE " + test; + selects.push(select); + } else { + this._log.warn( + "Unable to translate constraint of type " + + constraintType + + " on attribute bound as " + + nounDef.name + ); + } + + lastConstraintWasSpecial = curConstraintIsSpecial; + } + + if (selects.length) { + whereClauses.push( + "id IN (" + + selects.join(" INTERSECT ") + + ")" + + validityConstraintSuffix + ); + } + } + + let sqlString = "SELECT * FROM " + nounDef.tableName; + if (!aQuery.options.noMagic) { + if ( + aQuery.options.noDbQueryValidityConstraints && + nounDef.dbQueryJoinMagicWithNoValidityConstraints + ) { + sqlString += nounDef.dbQueryJoinMagicWithNoValidityConstraints; + } else if (nounDef.dbQueryJoinMagic) { + sqlString += nounDef.dbQueryJoinMagic; + } + } + + if (whereClauses.length) { + sqlString += " WHERE (" + whereClauses.join(") OR (") + ")"; + } + + if (aQuery.options.explicitSQL) { + sqlString = aQuery.options.explicitSQL; + } + + if (aQuery.options.outerWrapColumns) { + sqlString = + "SELECT *, " + + aQuery.options.outerWrapColumns.join(", ") + + " FROM (" + + sqlString + + ")"; + } + + if (aQuery._order.length) { + let orderClauses = []; + for (let colName of aQuery._order) { + if (colName.startsWith("-")) { + orderClauses.push(colName.substring(1) + " DESC"); + } else { + orderClauses.push(colName + " ASC"); + } + } + sqlString += " ORDER BY " + orderClauses.join(", "); + } + + if (aQuery._limit) { + if (!("limitClauseAlreadyIncluded" in aQuery.options)) { + sqlString += " LIMIT ?"; + } + boundArgs.push(aQuery._limit); + } + + this._log.debug("QUERY FROM QUERY: " + sqlString + " ARGS: " + boundArgs); + + // if we want to become explicit, replace the query (which has already + // provided our actual SQL query) with an explicit query. This will be + // what gets attached to the collection in the event we create a new + // collection. If we are reusing one, we assume that the explicitness, + // if desired, already happened. + // (we do not need to pass an argument to the explicitQueryClass constructor + // because it will be passed in to the collection's constructor, which will + // ensure that the collection attribute gets set.) + if (aArgs && "becomeExplicit" in aArgs && aArgs.becomeExplicit) { + aQuery = new nounDef.explicitQueryClass(); + } else if (aArgs && "becomeNull" in aArgs && aArgs.becomeNull) { + aQuery = new nounDef.nullQueryClass(); + } + + return this._queryFromSQLString( + sqlString, + boundArgs, + nounDef, + aQuery, + aListener, + aListenerData, + aExistingCollection, + aMasterCollection + ); + }, + /* eslint-enable complexity */ + + _queryFromSQLString( + aSqlString, + aBoundArgs, + aNounDef, + aQuery, + aListener, + aListenerData, + aExistingCollection, + aMasterCollection + ) { + let statement = this._createAsyncStatement(aSqlString, true); + for (let [iBinding, bindingValue] of aBoundArgs.entries()) { + this._bindVariant(statement, iBinding, bindingValue); + } + + let collection; + if (aExistingCollection) { + collection = aExistingCollection; + } else { + collection = new GlodaCollection( + aNounDef, + [], + aQuery, + aListener, + aMasterCollection + ); + GlodaCollectionManager.registerCollection(collection); + // we don't want to overwrite the existing listener or its data, but this + // does raise the question about what should happen if we get passed in + // a different listener and/or data. + if (aListenerData !== undefined) { + collection.data = aListenerData; + } + } + if (aListenerData) { + if (collection.dataStack) { + collection.dataStack.push(aListenerData); + } else { + collection.dataStack = [aListenerData]; + } + } + + statement.executeAsync( + new QueryFromQueryCallback(statement, aNounDef, collection) + ); + statement.finalize(); + return collection; + }, + + /* eslint-disable complexity */ + loadNounItem(aItem, aReferencesByNounID, aInverseReferencesByNounID) { + let attribIDToDBDefAndParam = this._attributeIDToDBDefAndParam; + + let hadDeps = aItem._deps != null; + let deps = aItem._deps || {}; + let hasDeps = false; + + for (let attrib of aItem.NOUN_DEF.specialLoadAttribs) { + let objectNounDef = attrib.objectNounDef; + + if ( + "special" in attrib && + attrib.special === GlodaConstants.kSpecialColumnChildren + ) { + let invReferences = aInverseReferencesByNounID[objectNounDef.id]; + if (invReferences === undefined) { + invReferences = aInverseReferencesByNounID[objectNounDef.id] = {}; + } + // only contribute if it's not already pending or there + if ( + !(attrib.id in deps) && + aItem[attrib.storageAttributeName] == null + ) { + // this._log.debug(" Adding inv ref for: " + aItem.id); + if (!(aItem.id in invReferences)) { + invReferences[aItem.id] = null; + } + deps[attrib.id] = null; + hasDeps = true; + } + } else if ( + "special" in attrib && + attrib.special === GlodaConstants.kSpecialColumnParent + ) { + let references = aReferencesByNounID[objectNounDef.id]; + if (references === undefined) { + references = aReferencesByNounID[objectNounDef.id] = {}; + } + // nothing to contribute if it's already there + if ( + !(attrib.id in deps) && + aItem[attrib.valueStorageAttributeName] == null + ) { + let parentID = aItem[attrib.idStorageAttributeName]; + if (!(parentID in references)) { + references[parentID] = null; + } + // this._log.debug(" Adding parent ref for: " + + // aItem[attrib.idStorageAttributeName]); + deps[attrib.id] = null; + hasDeps = true; + } else { + this._log.debug( + " paranoia value storage: " + + aItem[attrib.valueStorageAttributeName] + ); + } + } + } + + // bail here if arbitrary values are not allowed, there just is no + // encoded json, or we already had dependencies for this guy, implying + // the json pass has already been performed + if (!aItem.NOUN_DEF.allowsArbitraryAttrs || !aItem._jsonText || hadDeps) { + if (hasDeps) { + aItem._deps = deps; + } + return hasDeps; + } + + // this._log.debug(" load json: " + aItem._jsonText); + let jsonDict = JSON.parse(aItem._jsonText); + delete aItem._jsonText; + + // Iterate over the attributes on the item + for (let attribId in jsonDict) { + let jsonValue = jsonDict[attribId]; + // It is technically impossible for attribute ids to go away at this + // point in time. This would require someone to monkey around with + // our schema. But we will introduce this functionality one day, so + // prepare for it now. + if (!(attribId in attribIDToDBDefAndParam)) { + continue; + } + // find the attribute definition that corresponds to this key + let dbAttrib = attribIDToDBDefAndParam[attribId][0]; + + let attrib = dbAttrib.attrDef; + // The attribute definition will fail to exist if no one defines the + // attribute anymore. This can happen for many reasons: an extension + // was uninstalled, an extension was changed and no longer defines the + // attribute, or patches are being applied/unapplied. Ignore this + // attribute if missing. + if (attrib == null) { + continue; + } + let objectNounDef = attrib.objectNounDef; + + // If it has a tableName member but no fromJSON, then it's a persistent + // object that needs to be loaded, which also means we need to hold it in + // a collection owned by our collection. + // (If it has a fromJSON method, then it's a special case like + // MimeTypeNoun where it is authoritatively backed by a table but caches + // everything into memory. There is no case where fromJSON would be + // implemented but we should still be doing database lookups.) + if (objectNounDef.tableName && !objectNounDef.fromJSON) { + let references = aReferencesByNounID[objectNounDef.id]; + if (references === undefined) { + references = aReferencesByNounID[objectNounDef.id] = {}; + } + + if (attrib.singular) { + if (!(jsonValue in references)) { + references[jsonValue] = null; + } + } else { + for (let key in jsonValue) { + let anID = jsonValue[key]; + if (!(anID in references)) { + references[anID] = null; + } + } + } + + deps[attribId] = jsonValue; + hasDeps = true; + } else if (objectNounDef.contributeObjDependencies) { + /* if it has custom contribution logic, use it */ + if ( + objectNounDef.contributeObjDependencies( + jsonValue, + aReferencesByNounID, + aInverseReferencesByNounID + ) + ) { + deps[attribId] = jsonValue; + hasDeps = true; + } else { + // just propagate the value, it's some form of simple sentinel + aItem[attrib.boundName] = jsonValue; + } + } else if (objectNounDef.fromJSON) { + // otherwise, the value just needs to be de-persisted, or... + if (attrib.singular) { + // For consistency with the non-singular case, we don't assign the + // attribute if undefined is returned. + let deserialized = objectNounDef.fromJSON(jsonValue, aItem); + if (deserialized !== undefined) { + aItem[attrib.boundName] = deserialized; + } + } else { + // Convert all the entries in the list filtering out any undefined + // values. (TagNoun will do this if the tag is now dead.) + let outList = []; + for (let key in jsonValue) { + let val = jsonValue[key]; + let deserialized = objectNounDef.fromJSON(val, aItem); + if (deserialized !== undefined) { + outList.push(deserialized); + } + } + // Note: It's possible if we filtered things out that this is an empty + // list. This is acceptable because this is somewhat of an unusual + // case and I don't think we want to further complicate our + // semantics. + aItem[attrib.boundName] = outList; + } + } else { + // it's fine as is + aItem[attrib.boundName] = jsonValue; + } + } + + if (hasDeps) { + aItem._deps = deps; + } + return hasDeps; + }, + /* eslint-enable complexity */ + + loadNounDeferredDeps(aItem, aReferencesByNounID, aInverseReferencesByNounID) { + if (aItem._deps === undefined) { + return; + } + + let attribIDToDBDefAndParam = this._attributeIDToDBDefAndParam; + + for (let [attribId, jsonValue] of Object.entries(aItem._deps)) { + let dbAttrib = attribIDToDBDefAndParam[attribId][0]; + let attrib = dbAttrib.attrDef; + + let objectNounDef = attrib.objectNounDef; + let references = aReferencesByNounID[objectNounDef.id]; + if (attrib.special) { + if (attrib.special === GlodaConstants.kSpecialColumnChildren) { + let inverseReferences = aInverseReferencesByNounID[objectNounDef.id]; + // this._log.info("inverse assignment: " + objectNounDef.id + + // " of " + aItem.id) + aItem[attrib.storageAttributeName] = inverseReferences[aItem.id]; + } else if (attrib.special === GlodaConstants.kSpecialColumnParent) { + // this._log.info("parent column load: " + objectNounDef.id + + // " storage value: " + aItem[attrib.idStorageAttributeName]); + aItem[attrib.valueStorageAttributeName] = + references[aItem[attrib.idStorageAttributeName]]; + } + } else if (objectNounDef.tableName) { + if (attrib.singular) { + aItem[attrib.boundName] = references[jsonValue]; + } else { + aItem[attrib.boundName] = Object.keys(jsonValue).map( + key => references[jsonValue[key]] + ); + } + } else if (objectNounDef.contributeObjDependencies) { + aItem[attrib.boundName] = objectNounDef.resolveObjDependencies( + jsonValue, + aReferencesByNounID, + aInverseReferencesByNounID + ); + } + // there is no other case + } + + delete aItem._deps; + }, + + /* ********** Contact ********** */ + _nextContactId: 1, + + _populateContactManagedId() { + let stmt = this._createSyncStatement("SELECT MAX(id) FROM contacts", true); + if (stmt.executeStep()) { + // no chance of this SQLITE_BUSY on this call + this._nextContactId = stmt.getInt64(0) + 1; + } + stmt.finalize(); + }, + + get _insertContactStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO contacts (id, directoryUUID, contactUUID, name, popularity,\ + frecency, jsonAttributes) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)" + ); + this.__defineGetter__("_insertContactStatement", () => statement); + return this._insertContactStatement; + }, + + createContact(aDirectoryUUID, aContactUUID, aName, aPopularity, aFrecency) { + let contactID = this._nextContactId++; + let contact = new GlodaContact( + this, + contactID, + aDirectoryUUID, + aContactUUID, + aName, + aPopularity, + aFrecency + ); + return contact; + }, + + insertContact(aContact) { + let ics = this._insertContactStatement; + ics.bindByIndex(0, aContact.id); + if (aContact.directoryUUID == null) { + ics.bindByIndex(1, null); + } else { + ics.bindByIndex(1, aContact.directoryUUID); + } + if (aContact.contactUUID == null) { + ics.bindByIndex(2, null); + } else { + ics.bindByIndex(2, aContact.contactUUID); + } + ics.bindByIndex(3, aContact.name); + ics.bindByIndex(4, aContact.popularity); + ics.bindByIndex(5, aContact.frecency); + if (aContact._jsonText) { + ics.bindByIndex(6, aContact._jsonText); + } else { + ics.bindByIndex(6, null); + } + + ics.executeAsync(this.trackAsync()); + + return aContact; + }, + + get _updateContactStatement() { + let statement = this._createAsyncStatement( + "UPDATE contacts SET directoryUUID = ?1, \ + contactUUID = ?2, \ + name = ?3, \ + popularity = ?4, \ + frecency = ?5, \ + jsonAttributes = ?6 \ + WHERE id = ?7" + ); + this.__defineGetter__("_updateContactStatement", () => statement); + return this._updateContactStatement; + }, + + updateContact(aContact) { + let ucs = this._updateContactStatement; + ucs.bindByIndex(6, aContact.id); + ucs.bindByIndex(0, aContact.directoryUUID); + ucs.bindByIndex(1, aContact.contactUUID); + ucs.bindByIndex(2, aContact.name); + ucs.bindByIndex(3, aContact.popularity); + ucs.bindByIndex(4, aContact.frecency); + if (aContact._jsonText) { + ucs.bindByIndex(5, aContact._jsonText); + } else { + ucs.bindByIndex(5, null); + } + + ucs.executeAsync(this.trackAsync()); + }, + + _contactFromRow(aRow) { + let directoryUUID, contactUUID, jsonText; + if (aRow.getTypeOfIndex(1) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + directoryUUID = null; + } else { + directoryUUID = aRow.getString(1); + } + if (aRow.getTypeOfIndex(2) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + contactUUID = null; + } else { + contactUUID = aRow.getString(2); + } + if (aRow.getTypeOfIndex(6) == Ci.mozIStorageValueArray.VALUE_TYPE_NULL) { + jsonText = undefined; + } else { + jsonText = aRow.getString(6); + } + + return new GlodaContact( + this, + aRow.getInt64(0), + directoryUUID, + contactUUID, + aRow.getString(5), + aRow.getInt64(3), + aRow.getInt64(4), + jsonText + ); + }, + + get _selectContactByIDStatement() { + let statement = this._createSyncStatement( + "SELECT * FROM contacts WHERE id = ?1" + ); + this.__defineGetter__("_selectContactByIDStatement", () => statement); + return this._selectContactByIDStatement; + }, + + /** + * Synchronous contact lookup currently only for use by gloda's creation + * of the concept of "me". It is okay for it to be doing synchronous work + * because it is part of the startup process before any user code could + * have gotten a reference to Gloda, but no one else should do this. + */ + getContactByID(aContactID) { + let contact = GlodaCollectionManager.cacheLookupOne( + GlodaContact.prototype.NOUN_ID, + aContactID + ); + + if (contact === null) { + let scbi = this._selectContactByIDStatement; + scbi.bindByIndex(0, aContactID); + if (this._syncStep(scbi)) { + contact = this._contactFromRow(scbi); + GlodaCollectionManager.itemLoaded(contact); + } + scbi.reset(); + } + + return contact; + }, + + /* ********** Identity ********** */ + /** next identity id, managed for async use reasons. */ + _nextIdentityId: 1, + _populateIdentityManagedId() { + let stmt = this._createSyncStatement( + "SELECT MAX(id) FROM identities", + true + ); + if (stmt.executeStep()) { + // no chance of this SQLITE_BUSY on this call + this._nextIdentityId = stmt.getInt64(0) + 1; + } + stmt.finalize(); + }, + + get _insertIdentityStatement() { + let statement = this._createAsyncStatement( + "INSERT INTO identities (id, contactID, kind, value, description, relay) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)" + ); + this.__defineGetter__("_insertIdentityStatement", () => statement); + return this._insertIdentityStatement; + }, + + createIdentity(aContactID, aContact, aKind, aValue, aDescription, aIsRelay) { + let identityID = this._nextIdentityId++; + let iis = this._insertIdentityStatement; + iis.bindByIndex(0, identityID); + iis.bindByIndex(1, aContactID); + iis.bindByIndex(2, aKind); + iis.bindByIndex(3, aValue); + iis.bindByIndex(4, aDescription); + iis.bindByIndex(5, aIsRelay ? 1 : 0); + iis.executeAsync(this.trackAsync()); + + let identity = new GlodaIdentity( + this, + identityID, + aContactID, + aContact, + aKind, + aValue, + aDescription, + aIsRelay + ); + GlodaCollectionManager.itemsAdded(identity.NOUN_ID, [identity]); + return identity; + }, + + get _updateIdentityStatement() { + let statement = this._createAsyncStatement( + "UPDATE identities SET contactID = ?1, \ + kind = ?2, \ + value = ?3, \ + description = ?4, \ + relay = ?5 \ + WHERE id = ?6" + ); + this.__defineGetter__("_updateIdentityStatement", () => statement); + return this._updateIdentityStatement; + }, + + updateIdentity(aIdentity) { + let ucs = this._updateIdentityStatement; + ucs.bindByIndex(5, aIdentity.id); + ucs.bindByIndex(0, aIdentity.contactID); + ucs.bindByIndex(1, aIdentity.kind); + ucs.bindByIndex(2, aIdentity.value); + ucs.bindByIndex(3, aIdentity.description); + ucs.bindByIndex(4, aIdentity.relay ? 1 : 0); + + ucs.executeAsync(this.trackAsync()); + }, + + _identityFromRow(aRow) { + return new GlodaIdentity( + this, + aRow.getInt64(0), + aRow.getInt64(1), + null, + aRow.getString(2), + aRow.getString(3), + aRow.getString(4), + !!aRow.getInt32(5) + ); + }, + + get _selectIdentityByKindValueStatement() { + let statement = this._createSyncStatement( + "SELECT * FROM identities WHERE kind = ?1 AND value = ?2" + ); + this.__defineGetter__( + "_selectIdentityByKindValueStatement", + () => statement + ); + return this._selectIdentityByKindValueStatement; + }, + + /** + * Synchronous lookup of an identity by kind and value, only for use by + * the legacy gloda core code that creates a concept of "me". + * Ex: (email, foo@example.com) + */ + getIdentity(aKind, aValue) { + let identity = GlodaCollectionManager.cacheLookupOneByUniqueValue( + GlodaIdentity.prototype.NOUN_ID, + aKind + "@" + aValue + ); + + let ibkv = this._selectIdentityByKindValueStatement; + ibkv.bindByIndex(0, aKind); + ibkv.bindByIndex(1, aValue); + if (this._syncStep(ibkv)) { + identity = this._identityFromRow(ibkv); + GlodaCollectionManager.itemLoaded(identity); + } + ibkv.reset(); + + return identity; + }, +}; +GlodaAttributeDBDef.prototype._datastore = GlodaDatastore; +GlodaConversation.prototype._datastore = GlodaDatastore; +GlodaFolder.prototype._datastore = GlodaDatastore; +GlodaMessage.prototype._datastore = GlodaDatastore; +GlodaContact.prototype._datastore = GlodaDatastore; +GlodaIdentity.prototype._datastore = GlodaDatastore; diff --git a/comm/mailnews/db/gloda/modules/GlodaExplicitAttr.jsm b/comm/mailnews/db/gloda/modules/GlodaExplicitAttr.jsm new file mode 100644 index 0000000000..7a10b4112e --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaExplicitAttr.jsm @@ -0,0 +1,188 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file provides the "explicit attribute" provider for messages. It is + * concerned with attributes that are the result of user actions. For example, + * whether a message is starred (flagged), message tags, whether it is + * read/unread, etc. + */ + +const EXPORTED_SYMBOLS = ["GlodaExplicitAttr"]; + +const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm"); +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); +const { TagNoun } = ChromeUtils.import("resource:///modules/gloda/NounTag.jsm"); + +/** + * @namespace Explicit attribute provider. Indexes/defines attributes that are + * explicitly a result of user action. This dubiously includes marking a + * message as read. + */ +var GlodaExplicitAttr = { + providerName: "gloda.explattr", + strings: Services.strings.createBundle( + "chrome://messenger/locale/gloda.properties" + ), + _log: null, + + init() { + this._log = console.createInstance({ + prefix: "gloda.explattr", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }); + + try { + this.defineAttributes(); + } catch (ex) { + this._log.error("Error in init: " + ex); + throw ex; + } + }, + + /** Boost for starred messages. */ + NOTABILITY_STARRED: 16, + /** Boost for tagged messages, first tag. */ + NOTABILITY_TAGGED_FIRST: 8, + /** Boost for tagged messages, each additional tag. */ + NOTABILITY_TAGGED_ADDL: 1, + + defineAttributes() { + // Tag + this._attrTag = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrExplicit, + attributeName: "tag", + bindName: "tags", + singular: false, + emptySetIsSignificant: true, + facet: true, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_TAG, + parameterNoun: null, + // Property change notifications that we care about: + propertyChanges: ["keywords"], + }); // not-tested + + // Star + this._attrStar = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrExplicit, + attributeName: "star", + bindName: "starred", + singular: true, + facet: true, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_BOOLEAN, + parameterNoun: null, + }); // tested-by: test_attributes_explicit + // Read/Unread + this._attrRead = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrExplicit, + attributeName: "read", + // Make the message query-able but without using the database. + canQuery: "truthy-but-not-true", + singular: true, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_BOOLEAN, + parameterNoun: null, + }); // tested-by: test_attributes_explicit + + /** + * Has this message been replied to by the user. + */ + this._attrRepliedTo = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrExplicit, + attributeName: "repliedTo", + singular: true, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_BOOLEAN, + parameterNoun: null, + }); // tested-by: test_attributes_explicit + + /** + * Has this user forwarded this message to someone. + */ + this._attrForwarded = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrExplicit, + attributeName: "forwarded", + singular: true, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_BOOLEAN, + parameterNoun: null, + }); // tested-by: test_attributes_explicit + }, + + *process(aGlodaMessage, aRawReps, aIsNew, aCallbackHandle) { + let aMsgHdr = aRawReps.header; + + aGlodaMessage.starred = aMsgHdr.isFlagged; + if (aGlodaMessage.starred) { + aGlodaMessage.notability += this.NOTABILITY_STARRED; + } + + aGlodaMessage.read = aMsgHdr.isRead; + + let flags = aMsgHdr.flags; + aGlodaMessage.repliedTo = Boolean(flags & Ci.nsMsgMessageFlags.Replied); + aGlodaMessage.forwarded = Boolean(flags & Ci.nsMsgMessageFlags.Forwarded); + + let tags = (aGlodaMessage.tags = []); + + // -- Tag + // build a map of the keywords + let keywords = aMsgHdr.getStringProperty("keywords"); + let keywordList = keywords.split(" "); + let keywordMap = {}; + for (let iKeyword = 0; iKeyword < keywordList.length; iKeyword++) { + let keyword = keywordList[iKeyword]; + keywordMap[keyword] = true; + } + + let tagArray = TagNoun.getAllTags(); + for (let iTag = 0; iTag < tagArray.length; iTag++) { + let tag = tagArray[iTag]; + if (tag.key in keywordMap) { + tags.push(tag); + } + } + + if (tags.length) { + aGlodaMessage.notability += + this.NOTABILITY_TAGGED_FIRST + + (tags.length - 1) * this.NOTABILITY_TAGGED_ADDL; + } + + yield GlodaConstants.kWorkDone; + }, + + /** + * Duplicates the notability logic from process(). Arguably process should + * be factored to call us, grokNounItem should be factored to call us, or we + * should get sufficiently fancy that our code wildly diverges. + */ + score(aMessage, aContext) { + let score = 0; + if (aMessage.starred) { + score += this.NOTABILITY_STARRED; + } + if (aMessage.tags.length) { + score += + this.NOTABILITY_TAGGED_FIRST + + (aMessage.tags.length - 1) * this.NOTABILITY_TAGGED_ADDL; + } + return score; + }, +}; diff --git a/comm/mailnews/db/gloda/modules/GlodaFundAttr.jsm b/comm/mailnews/db/gloda/modules/GlodaFundAttr.jsm new file mode 100644 index 0000000000..364ea61bb0 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaFundAttr.jsm @@ -0,0 +1,947 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["GlodaFundAttr"]; + +const { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); +const { GlodaUtils } = ChromeUtils.import( + "resource:///modules/gloda/GlodaUtils.jsm" +); +const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm"); +const { GlodaAttachment } = ChromeUtils.import( + "resource:///modules/gloda/GlodaDataModel.jsm" +); +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); +const { MimeTypeNoun } = ChromeUtils.import( + "resource:///modules/gloda/NounMimetype.jsm" +); +const { GlodaContent } = ChromeUtils.import( + "resource:///modules/gloda/GlodaContent.jsm" +); + +/** + * @namespace The Gloda Fundamental Attribute provider is a special attribute + * provider; it provides attributes that the rest of the providers should be + * able to assume exist. Also, it may end up accessing things at a lower level + * than most extension providers should do. In summary, don't mimic this code + * unless you won't complain when your code breaks. + */ +var GlodaFundAttr = { + providerName: "gloda.fundattr", + strings: Services.strings.createBundle( + "chrome://messenger/locale/gloda.properties" + ), + _log: null, + + init() { + this._log = console.createInstance({ + prefix: "gloda.fundattr", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }); + + try { + this.defineAttributes(); + } catch (ex) { + this._log.error("Error in init: " + ex); + throw ex; + } + }, + + POPULARITY_FROM_ME_TO: 10, + POPULARITY_FROM_ME_CC: 4, + POPULARITY_FROM_ME_BCC: 3, + POPULARITY_TO_ME: 5, + POPULARITY_CC_ME: 1, + POPULARITY_BCC_ME: 1, + + /** Boost for messages 'I' sent */ + NOTABILITY_FROM_ME: 10, + /** Boost for messages involving 'me'. */ + NOTABILITY_INVOLVING_ME: 1, + /** Boost for message from someone in 'my' address book. */ + NOTABILITY_FROM_IN_ADDR_BOOK: 10, + /** Boost for the first person involved in my address book. */ + NOTABILITY_INVOLVING_ADDR_BOOK_FIRST: 8, + /** Boost for each additional person involved in my address book. */ + NOTABILITY_INVOLVING_ADDR_BOOK_ADDL: 2, + + defineAttributes() { + /* ***** Conversations ***** */ + // conversation: subjectMatches + this._attrConvSubject = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "subjectMatches", + singular: true, + special: GlodaConstants.kSpecialFulltext, + specialColumnName: "subject", + subjectNouns: [GlodaConstants.NOUN_CONVERSATION], + objectNoun: GlodaConstants.NOUN_FULLTEXT, + }); + + /* ***** Messages ***** */ + // folder + this._attrFolder = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "folder", + singular: true, + facet: true, + special: GlodaConstants.kSpecialColumn, + specialColumnName: "folderID", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_FOLDER, + }); // tested-by: test_attributes_fundamental + this._attrAccount = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "account", + canQuery: "memory", + singular: true, + facet: true, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_ACCOUNT, + }); + this._attrMessageKey = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "messageKey", + singular: true, + special: GlodaConstants.kSpecialColumn, + specialColumnName: "messageKey", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_NUMBER, + canQuery: true, + }); // tested-by: test_attributes_fundamental + + // We need to surface the deleted attribute for querying, but there is no + // reason for user code, so let's call it "_deleted" rather than deleted. + // (In fact, our validity constraints require a special query formulation + // that user code should have no clue exists. That's right user code, + // that's a dare.) + Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "_deleted", + singular: true, + special: GlodaConstants.kSpecialColumn, + specialColumnName: "deleted", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_NUMBER, + }); + + // -- fulltext search helpers + // fulltextMatches. Match over message subject, body, and attachments + // @testpoint gloda.noun.message.attr.fulltextMatches + this._attrFulltext = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "fulltextMatches", + singular: true, + special: GlodaConstants.kSpecialFulltext, + specialColumnName: "messagesText", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_FULLTEXT, + }); + + // subjectMatches. Fulltext match on subject + // @testpoint gloda.noun.message.attr.subjectMatches + this._attrSubjectText = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "subjectMatches", + singular: true, + special: GlodaConstants.kSpecialFulltext, + specialColumnName: "subject", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_FULLTEXT, + }); + + // bodyMatches. super-synthetic full-text matching... + // @testpoint gloda.noun.message.attr.bodyMatches + this._attrBody = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "bodyMatches", + singular: true, + special: GlodaConstants.kSpecialFulltext, + specialColumnName: "body", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_FULLTEXT, + }); + + // attachmentNamesMatch + // @testpoint gloda.noun.message.attr.attachmentNamesMatch + this._attrAttachmentNames = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "attachmentNamesMatch", + singular: true, + special: GlodaConstants.kSpecialFulltext, + specialColumnName: "attachmentNames", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_FULLTEXT, + }); + + // @testpoint gloda.noun.message.attr.authorMatches + this._attrAuthorFulltext = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "authorMatches", + singular: true, + special: GlodaConstants.kSpecialFulltext, + specialColumnName: "author", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_FULLTEXT, + }); + + // @testpoint gloda.noun.message.attr.recipientsMatch + this._attrRecipientsFulltext = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "recipientsMatch", + singular: true, + special: GlodaConstants.kSpecialFulltext, + specialColumnName: "recipients", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_FULLTEXT, + }); + + // --- synthetic stuff for some reason + // conversation + // @testpoint gloda.noun.message.attr.conversation + this._attrConversation = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "conversation", + singular: true, + special: GlodaConstants.kSpecialColumnParent, + specialColumnName: "conversationID", + idStorageAttributeName: "_conversationID", + valueStorageAttributeName: "_conversation", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_CONVERSATION, + canQuery: true, + }); + + // --- Fundamental + // From + this._attrFrom = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "from", + singular: true, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_IDENTITY, + }); // tested-by: test_attributes_fundamental + // To + this._attrTo = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "to", + singular: false, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_IDENTITY, + }); // tested-by: test_attributes_fundamental + // Cc + this._attrCc = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "cc", + singular: false, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_IDENTITY, + }); // not-tested + /** + * Bcc'ed recipients; only makes sense for sent messages. + */ + this._attrBcc = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "bcc", + singular: false, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_IDENTITY, + }); // not-tested + + // Date. now lives on the row. + this._attrDate = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "date", + singular: true, + facet: { + type: "date", + }, + special: GlodaConstants.kSpecialColumn, + specialColumnName: "date", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_DATE, + }); // tested-by: test_attributes_fundamental + + // Header message ID. + this._attrHeaderMessageID = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "headerMessageID", + singular: true, + special: GlodaConstants.kSpecialString, + specialColumnName: "headerMessageID", + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_STRING, + canQuery: true, + }); // tested-by: test_attributes_fundamental + + // Attachment MIME Types + this._attrAttachmentTypes = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "attachmentTypes", + singular: false, + emptySetIsSignificant: true, + facet: { + type: "default", + // This will group the MIME types by their category. + groupIdAttr: "category", + queryHelper: "Category", + }, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_MIME_TYPE, + }); + + // Attachment infos + this._attrIsEncrypted = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "isEncrypted", + singular: true, + emptySetIsSignificant: false, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_NUMBER, + }); + + // Attachment infos + this._attrAttachmentInfos = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "attachmentInfos", + singular: false, + emptySetIsSignificant: false, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_ATTACHMENT, + }); + + // --- Optimization + /** + * Involves means any of from/to/cc/bcc. The queries get ugly enough + * without this that it seems to justify the cost, especially given the + * frequent use case. (In fact, post-filtering for the specific from/to/cc + * is probably justifiable rather than losing this attribute...) + */ + this._attrInvolves = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrOptimization, + attributeName: "involves", + singular: false, + facet: { + type: "default", + /** + * Filter out 'me', as we have other facets that deal with that, and the + * 'me' identities are so likely that they distort things. + * + * @returns true if the identity is not one of my identities, false if it + * is. + */ + filter(aItem) { + return !(aItem.id in Gloda.myIdentities); + }, + }, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_IDENTITY, + }); // not-tested + + /** + * Any of to/cc/bcc. + */ + this._attrRecipients = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrOptimization, + attributeName: "recipients", + singular: false, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_IDENTITY, + }); // not-tested + + // From Me (To/Cc/Bcc) + this._attrFromMe = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrOptimization, + attributeName: "fromMe", + singular: false, + // The interesting thing to a facet is whether the message is from me. + facet: { + type: "nonempty?", + }, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_PARAM_IDENTITY, + }); // not-tested + // To/Cc/Bcc Me + this._attrToMe = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "toMe", + // The interesting thing to a facet is whether the message is to me. + facet: { + type: "nonempty?", + }, + singular: false, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_PARAM_IDENTITY, + }); // not-tested + + // -- Mailing List + // Non-singular, but a hard call. Namely, it is obvious that a message can + // be addressed to multiple mailing lists. However, I don't see how you + // could receive a message with more than one set of List-* headers, + // since each list-serve would each send you a copy. Based on our current + // decision to treat each physical message as separate, it almost seems + // right to limit the list attribute to the copy that originated at the + // list. That may sound entirely wrong, but keep in mind that until we + // have seen a message from the list with the List headers, we can't + // definitely know it's a mailing list (although heuristics could take us + // pretty far). As such, the quasi-singular thing is appealing. + // Of course, the reality is that we really want to know if a message was + // sent to multiple mailing lists and be able to query on that. + // Additionally, our implicit-to logic needs to work on messages that + // weren't relayed by the list-serve, especially messages sent to the list + // by the user. + this._attrList = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "mailing-list", + bindName: "mailingLists", + singular: false, + emptySetIsSignificant: true, + facet: true, + subjectNouns: [GlodaConstants.NOUN_MESSAGE], + objectNoun: GlodaConstants.NOUN_IDENTITY, + }); // not-tested, not-implemented + }, + + RE_LIST_POST: /<mailto:([^>]+)>/, + + /** + * + * Specializations: + * - Mailing Lists. Replies to a message on a mailing list frequently only + * have the list-serve as the 'to', so we try to generate a synthetic 'to' + * based on the author of the parent message when possible. (The 'possible' + * part is that we may not have a copy of the parent message at the time of + * processing.) + * - Newsgroups. Same deal as mailing lists. + */ + *process(aGlodaMessage, aRawReps, aIsNew, aCallbackHandle) { + let aMsgHdr = aRawReps.header; + let aMimeMsg = aRawReps.mime; + + // -- From + // Let's use replyTo if available. + // er, since we are just dealing with mailing lists for now, forget the + // reply-to... + // TODO: deal with default charset issues + let author = null; + /* + try { + author = aMsgHdr.getStringProperty("replyTo"); + } + catch (ex) { + } + */ + if (author == null || author == "") { + author = aMsgHdr.author; + } + + let normalizedListPost = ""; + if (aMimeMsg && aMimeMsg.has("list-post")) { + let match = this.RE_LIST_POST.exec(aMimeMsg.get("list-post")); + if (match) { + normalizedListPost = "<" + match[1] + ">"; + } + } + + // Do not use the MIME decoded variants of any of the email addresses + // because if name is encoded and has a comma in it, it will break the + // address parser (which already knows how to do the decoding anyways). + let [ + authorIdentities, + toIdentities, + ccIdentities, + bccIdentities, + listIdentities, + ] = yield aCallbackHandle.pushAndGo( + Gloda.getOrCreateMailIdentities( + aCallbackHandle, + author, + aMsgHdr.recipients, + aMsgHdr.ccList, + aMsgHdr.bccList, + normalizedListPost + ) + ); + + if (authorIdentities.length != 1) { + throw new Gloda.BadItemContentsError( + "Message with subject '" + + aMsgHdr.mime2DecodedSubject + + "' somehow lacks a valid author. Bailing." + ); + } + let authorIdentity = authorIdentities[0]; + aGlodaMessage.from = authorIdentity; + + // -- To, Cc, Bcc + aGlodaMessage.to = toIdentities; + aGlodaMessage.cc = ccIdentities; + aGlodaMessage.bcc = bccIdentities; + + // -- Mailing List + if (listIdentities.length) { + aGlodaMessage.mailingLists = listIdentities; + } + + let findIsEncrypted = x => + x.isEncrypted || (x.parts ? x.parts.some(findIsEncrypted) : false); + + // -- Encryption + aGlodaMessage.isEncrypted = false; + if (aMimeMsg) { + aGlodaMessage.isEncrypted = findIsEncrypted(aMimeMsg); + } + + // -- Attachments + if (aMimeMsg) { + // nsParseMailbox.cpp puts the attachment flag on msgHdrs as soon as it + // finds a multipart/mixed part. This is a good heuristic, but if it turns + // out the part has no filename, then we don't treat it as an attachment. + // We just streamed the message, and we have all the information to figure + // that out, so now is a good place to clear the flag if needed. + let attachmentTypes = new Set(); + for (let attachment of aMimeMsg.allAttachments) { + // getMimeType expects the content type to contain at least a "/". + if (!attachment.contentType.includes("/")) { + continue; + } + attachmentTypes.add(MimeTypeNoun.getMimeType(attachment.contentType)); + } + if (attachmentTypes.size) { + aGlodaMessage.attachmentTypes = Array.from(attachmentTypes); + } + + let aMsgHdr = aRawReps.header; + let wasStreamed = + aMsgHdr && + !aGlodaMessage.isEncrypted && + (aMsgHdr.flags & Ci.nsMsgMessageFlags.Offline || + aMsgHdr.folder instanceof Ci.nsIMsgLocalMailFolder); + + // Clear the flag if it turns out there's no attachment after all and we + // streamed completely the message (if we didn't, then we have no + // knowledge of attachments, unless bug 673370 is fixed). + if (wasStreamed && !aMimeMsg.allAttachments.length) { + aMsgHdr.markHasAttachments(false); + } + + // This is not the same kind of attachments as above. Now, we want to + // provide convenience attributes to Gloda consumers, so that they can run + // through the list of attachments of a given message, to possibly build a + // visualization on top of it. We still reject bogus mime types, which + // means yencode won't be supported. Oh, I feel really bad. + let attachmentInfos = []; + for (let att of aMimeMsg.allUserAttachments) { + attachmentInfos.push( + this.glodaAttFromMimeAtt(aRawReps.trueGlodaRep, att) + ); + } + aGlodaMessage.attachmentInfos = attachmentInfos; + } + + // TODO: deal with mailing lists, including implicit-to. this will require + // convincing the indexer to pass us in the previous message if it is + // available. (which we'll simply pass to everyone... it can help body + // logic for quoting purposes, etc. too.) + + yield GlodaConstants.kWorkDone; + }, + + glodaAttFromMimeAtt(aGlodaMessage, aAtt) { + // So we don't want to store the URL because it can change over time if + // the message is moved. What we do is store the full URL if it's a + // detached attachment, otherwise just keep the part information, and + // rebuild the URL according to where the message is sitting. + let part, externalUrl; + if (aAtt.isExternal) { + externalUrl = aAtt.url; + } else { + let matches = aAtt.url.match(GlodaUtils.PART_RE); + if (matches && matches.length) { + part = matches[1]; + } else { + this._log.error("Error processing attachment: " + aAtt.url); + } + } + return new GlodaAttachment( + aGlodaMessage, + aAtt.name, + aAtt.contentType, + aAtt.size, + part, + externalUrl, + aAtt.isExternal + ); + }, + + *optimize(aGlodaMessage, aRawReps, aIsNew, aCallbackHandle) { + let aMsgHdr = aRawReps.header; + + // for simplicity this is used for both involves and recipients + let involvesIdentities = {}; + let involves = aGlodaMessage.involves || []; + let recipients = aGlodaMessage.recipients || []; + + // 'me' specialization optimizations + let toMe = aGlodaMessage.toMe || []; + let fromMe = aGlodaMessage.fromMe || []; + + let myIdentities = Gloda.myIdentities; // needless optimization? + let authorIdentity = aGlodaMessage.from; + let isFromMe = authorIdentity.id in myIdentities; + + // The fulltext search column for the author. We want to have in here: + // - The e-mail address and display name as enclosed on the message. + // - The name per the address book card for this e-mail address, if we have + // one. + aGlodaMessage._indexAuthor = aMsgHdr.mime2DecodedAuthor; + // The fulltext search column for the recipients. (same deal) + aGlodaMessage._indexRecipients = aMsgHdr.mime2DecodedRecipients; + + if (isFromMe) { + aGlodaMessage.notability += this.NOTABILITY_FROM_ME; + } else { + let authorDisplayName = MailServices.ab.cardForEmailAddress( + authorIdentity.value + )?.displayName; + if (authorDisplayName !== null) { + aGlodaMessage.notability += this.NOTABILITY_FROM_IN_ADDR_BOOK; + // @testpoint gloda.noun.message.attr.authorMatches + aGlodaMessage._indexAuthor += " " + authorDisplayName; + } + } + + involves.push(authorIdentity); + involvesIdentities[authorIdentity.id] = true; + + let involvedAddrBookCount = 0; + + for (let toIdentity of aGlodaMessage.to) { + if (!(toIdentity.id in involvesIdentities)) { + involves.push(toIdentity); + recipients.push(toIdentity); + involvesIdentities[toIdentity.id] = true; + let toDisplayName = MailServices.ab.cardForEmailAddress( + toIdentity.value + )?.displayName; + if (toDisplayName !== null) { + involvedAddrBookCount++; + // @testpoint gloda.noun.message.attr.recipientsMatch + aGlodaMessage._indexRecipients += " " + toDisplayName; + } + } + + // optimization attribute to-me ('I' am the parameter) + if (toIdentity.id in myIdentities) { + toMe.push([toIdentity, authorIdentity]); + if (aIsNew) { + authorIdentity.contact.popularity += this.POPULARITY_TO_ME; + } + } + // optimization attribute from-me-to ('I' am the parameter) + if (isFromMe) { + fromMe.push([authorIdentity, toIdentity]); + // also, popularity + if (aIsNew) { + toIdentity.contact.popularity += this.POPULARITY_FROM_ME_TO; + } + } + } + for (let ccIdentity of aGlodaMessage.cc) { + if (!(ccIdentity.id in involvesIdentities)) { + involves.push(ccIdentity); + recipients.push(ccIdentity); + involvesIdentities[ccIdentity.id] = true; + let ccDisplayName = MailServices.ab.cardForEmailAddress( + ccIdentity.value + )?.displayName; + if (ccDisplayName !== null) { + involvedAddrBookCount++; + // @testpoint gloda.noun.message.attr.recipientsMatch + aGlodaMessage._indexRecipients += " " + ccDisplayName; + } + } + // optimization attribute cc-me ('I' am the parameter) + if (ccIdentity.id in myIdentities) { + toMe.push([ccIdentity, authorIdentity]); + if (aIsNew) { + authorIdentity.contact.popularity += this.POPULARITY_CC_ME; + } + } + // optimization attribute from-me-to ('I' am the parameter) + if (isFromMe) { + fromMe.push([authorIdentity, ccIdentity]); + // also, popularity + if (aIsNew) { + ccIdentity.contact.popularity += this.POPULARITY_FROM_ME_CC; + } + } + } + // just treat bcc like cc; the intent is the same although the exact + // semantics differ. + for (let bccIdentity of aGlodaMessage.bcc) { + if (!(bccIdentity.id in involvesIdentities)) { + involves.push(bccIdentity); + recipients.push(bccIdentity); + involvesIdentities[bccIdentity.id] = true; + let bccDisplayName = MailServices.ab.cardForEmailAddress( + bccIdentity.value + )?.displayName; + if (bccDisplayName !== null) { + involvedAddrBookCount++; + // @testpoint gloda.noun.message.attr.recipientsMatch + aGlodaMessage._indexRecipients += " " + bccDisplayName; + } + } + // optimization attribute cc-me ('I' am the parameter) + if (bccIdentity.id in myIdentities) { + toMe.push([bccIdentity, authorIdentity]); + if (aIsNew) { + authorIdentity.contact.popularity += this.POPULARITY_BCC_ME; + } + } + // optimization attribute from-me-to ('I' am the parameter) + if (isFromMe) { + fromMe.push([authorIdentity, bccIdentity]); + // also, popularity + if (aIsNew) { + bccIdentity.contact.popularity += this.POPULARITY_FROM_ME_BCC; + } + } + } + + if (involvedAddrBookCount) { + aGlodaMessage.notability += + this.NOTABILITY_INVOLVING_ADDR_BOOK_FIRST + + (involvedAddrBookCount - 1) * this.NOTABILITY_INVOLVING_ADDR_BOOK_ADDL; + } + + aGlodaMessage.involves = involves; + aGlodaMessage.recipients = recipients; + if (toMe.length) { + aGlodaMessage.toMe = toMe; + aGlodaMessage.notability += this.NOTABILITY_INVOLVING_ME; + } + if (fromMe.length) { + aGlodaMessage.fromMe = fromMe; + } + + // Content + if (aRawReps.bodyLines) { + aGlodaMessage._content = aRawReps.content = new GlodaContent(); + if (this.contentWhittle({}, aRawReps.bodyLines, aGlodaMessage._content)) { + // we were going to do something here? + } + } else { + aRawReps.content = null; + } + + yield GlodaConstants.kWorkDone; + }, + + /** + * Duplicates the notability logic from optimize(). Arguably optimize should + * be factored to call us, grokNounItem should be factored to call us, or we + * should get sufficiently fancy that our code wildly diverges. + */ + score(aMessage, aContext) { + let score = 0; + + let authorIdentity = aMessage.from; + if (authorIdentity.id in Gloda.myIdentities) { + score += this.NOTABILITY_FROM_ME; + } else if (authorIdentity.inAddressBook) { + score += this.NOTABILITY_FROM_IN_ADDR_BOOK; + } + if (aMessage.toMe) { + score += this.NOTABILITY_INVOLVING_ME; + } + + let involvedAddrBookCount = 0; + for (let identity of aMessage.to) { + if (identity.inAddressBook) { + involvedAddrBookCount++; + } + } + for (let identity of aMessage.cc) { + if (identity.inAddressBook) { + involvedAddrBookCount++; + } + } + if (involvedAddrBookCount) { + score += + this.NOTABILITY_INVOLVING_ADDR_BOOK_FIRST + + (involvedAddrBookCount - 1) * this.NOTABILITY_INVOLVING_ADDR_BOOK_ADDL; + } + return score; + }, + + _countQuoteDepthAndNormalize(aLine) { + let count = 0; + let lastStartOffset = 0; + + for (let i = 0; i < aLine.length; i++) { + let c = aLine[i]; + if (c == ">") { + count++; + lastStartOffset = i + 1; + } else if (c != " ") { + return [ + count, + lastStartOffset ? aLine.substring(lastStartOffset) : aLine, + ]; + } + } + + return [count, lastStartOffset ? aLine.substring(lastStartOffset) : aLine]; + }, + + /** + * Attempt to understand simple quoting constructs that use ">" with + * obvious phrases to enter the quoting block. No support for other types + * of quoting at this time. Also no support for piercing the wrapper of + * forwarded messages to actually be the content of the forwarded message. + */ + contentWhittle(aMeta, aBodyLines, aContent) { + if (!aContent.volunteerContent(aContent.kPriorityBase)) { + return false; + } + + // duplicate the list; we mutate somewhat... + let bodyLines = aBodyLines.concat(); + + // lastNonBlankLine originally was just for detecting quoting idioms where + // the "wrote" line was separated from the quoted block by a blank line. + // Now we also use it for whitespace suppression at the boundaries of + // quoted and un-quoted text. (We keep blank lines within the same + // 'block' of quoted or non-quoted text.) + // Because we now have two goals for it, and we still want to suppress blank + // lines when there is a 'wrote' line involved, we introduce... + // prevLastNonBlankLine! This arguably suggests refactoring should be the + // next step, but things work for now. + let rangeStart = 0, + lastNonBlankLine = null, + prevLastNonBlankLine = null; + let inQuoteDepth = 0; + for (let [iLine, line] of bodyLines.entries()) { + if (!line || line == "\xa0") { + /* unicode non breaking space */ + continue; + } + + if (line.startsWith(">")) { + if (!inQuoteDepth) { + let rangeEnd = iLine - 1; + let quoteRangeStart = iLine; + // see if the last non-blank-line was a lead-in... + if (lastNonBlankLine != null) { + // TODO: localize quote range start detection + if (aBodyLines[lastNonBlankLine].includes("wrote")) { + quoteRangeStart = lastNonBlankLine; + rangeEnd = lastNonBlankLine - 1; + // we 'used up' lastNonBlankLine, let's promote the prev guy to + // be the new lastNonBlankLine for the next logic block + lastNonBlankLine = prevLastNonBlankLine; + } + // eat the trailing whitespace... + if (lastNonBlankLine != null) { + rangeEnd = Math.min(rangeEnd, lastNonBlankLine); + } + } + if (rangeEnd >= rangeStart) { + aContent.content(aBodyLines.slice(rangeStart, rangeEnd + 1)); + } + + [inQuoteDepth, line] = this._countQuoteDepthAndNormalize(line); + bodyLines[iLine] = line; + rangeStart = quoteRangeStart; + } else { + let curQuoteDepth; + [curQuoteDepth, line] = this._countQuoteDepthAndNormalize(line); + bodyLines[iLine] = line; + + if (curQuoteDepth != inQuoteDepth) { + // we could do some "wrote" compensation here, but it's not really + // as important. let's wait for a more clever algorithm. + aContent.quoted(aBodyLines.slice(rangeStart, iLine), inQuoteDepth); + inQuoteDepth = curQuoteDepth; + rangeStart = iLine; + } + } + } else if (inQuoteDepth) { + aContent.quoted(aBodyLines.slice(rangeStart, iLine), inQuoteDepth); + inQuoteDepth = 0; + rangeStart = iLine; + } + + prevLastNonBlankLine = lastNonBlankLine; + lastNonBlankLine = iLine; + } + + if (inQuoteDepth) { + aContent.quoted(aBodyLines.slice(rangeStart), inQuoteDepth); + } else { + aContent.content(aBodyLines.slice(rangeStart, lastNonBlankLine + 1)); + } + + return true; + }, +}; diff --git a/comm/mailnews/db/gloda/modules/GlodaIndexer.jsm b/comm/mailnews/db/gloda/modules/GlodaIndexer.jsm new file mode 100644 index 0000000000..05919e4d67 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaIndexer.jsm @@ -0,0 +1,1491 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file currently contains a fairly general implementation of asynchronous + * indexing with a very explicit message indexing implementation. As gloda + * will eventually want to index more than just messages, the message-specific + * things should ideally lose their special hold on this file. This will + * benefit readability/size as well. + */ + +const EXPORTED_SYMBOLS = ["GlodaIndexer", "IndexingJob"]; + +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); + +const lazy = {}; +ChromeUtils.defineModuleGetter( + lazy, + "GlodaCollectionManager", + "resource:///modules/gloda/Collection.jsm" +); +ChromeUtils.defineModuleGetter( + lazy, + "GlodaDatastore", + "resource:///modules/gloda/GlodaDatastore.jsm" +); + +/** + * @class Capture the indexing batch concept explicitly. + * + * @param aJobType The type of thing we are indexing. Current choices are: + * "folder" and "message". Previous choices included "account". The indexer + * currently knows too much about these; they should be de-coupled. + * @param aID Specific to the job type, but for now only used to hold folder + * IDs. + * + * @ivar items The list of items to process during this job/batch. (For + * example, if this is a "messages" job, this would be the list of messages + * to process, although the specific representation is determined by the + * job.) The list will only be mutated through the addition of extra items. + * @ivar offset The current offset into the 'items' list (if used), updated as + * processing occurs. If 'items' is not used, the processing code can also + * update this in a similar fashion. This is used by the status + * notification code in conjunction with goal. + * @ivar goal The total number of items to index/actions to perform in this job. + * This number may increase during the life of the job, but should not + * decrease. This is used by the status notification code in conjunction + * with the goal. + */ +function IndexingJob(aJobType, aID, aItems) { + this.jobType = aJobType; + this.id = aID; + this.items = aItems != null ? aItems : []; + this.offset = 0; + this.goal = null; + this.callback = null; + this.callbackThis = null; +} +IndexingJob.prototype = { + /** + * Invoke the callback associated with this job, passing through all arguments + * received by this function to the callback function. + */ + safelyInvokeCallback(...aArgs) { + if (!this.callback) { + return; + } + try { + this.callback.apply(this.callbackThis, aArgs); + } catch (ex) { + GlodaIndexer._log.warn("job callback invocation problem:", ex); + } + }, + toString() { + return ( + "[job:" + + this.jobType + + " id:" + + this.id + + " items:" + + (this.items ? this.items.length : "no") + + " offset:" + + this.offset + + " goal:" + + this.goal + + "]" + ); + }, +}; + +/** + * @namespace Core indexing logic, plus message-specific indexing logic. + * + * === Indexing Goals + * We have the following goals: + * + * Responsiveness + * - When the user wants to quit, we should be able to stop and quit in a timely + * fashion. + * - We should not interfere with the user's thunderbird usage. + * + * Correctness + * - Quitting should not result in any information loss; we should (eventually) + * end up at the same indexed state regardless of whether a user lets + * indexing run to completion or restarts thunderbird in the middle of the + * process. (It is okay to take slightly longer in the latter case.) + * + * Worst Case Scenario Avoidance + * - We should try to be O(1) memory-wise regardless of what notifications + * are thrown at us. + * + * === Indexing Throttling + * + * Adaptive Indexing + * - The indexer tries to stay out of the way of other running code in + * Thunderbird (autosync) and other code on the system. We try and target + * some number of milliseconds of activity between intentional inactive + * periods. The number of milliseconds of activity varies based on whether we + * believe the user to be actively using the computer or idle. We use our + * inactive periods as a way to measure system load; if we receive our + * notification promptly at the end of our inactive period, we believe the + * system is not heavily loaded. If we do not get notified promptly, we + * assume there is other stuff going on and back off. + * + */ +var GlodaIndexer = { + /** + * A partial attempt to generalize to support multiple databases. Each + * database would have its own datastore would have its own indexer. But + * we rather inter-mingle our use of this field with the singleton global + * GlodaDatastore. + */ + _log: console.createInstance({ + prefix: "gloda.indexer", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }), + /** + * Our nsITimer that we use to schedule ourselves on the main thread + * intermittently. The timer always exists but may not always be active. + */ + _timer: null, + /** + * Our nsITimer that we use to schedule events in the "far" future. For now, + * this means not compelling an initial indexing sweep until some number of + * seconds after startup. + */ + _longTimer: null, + + /** + * Periodic performance adjustment parameters: The overall goal is to adjust + * our rate of work so that we don't interfere with the user's activities + * when they are around (non-idle), and the system in general (when idle). + * Being nice when idle isn't quite as important, but is a good idea so that + * when the user un-idles we are able to back off nicely. Also, we give + * other processes on the system a chance to do something. + * + * We do this by organizing our work into discrete "tokens" of activity, + * then processing the number of tokens that we have determined will + * not impact the UI. Then we pause to give other activities a chance to get + * some work done, and we measure whether anything happened during our pause. + * If something else is going on in our application during that pause, we + * give it priority (up to a point) by delaying further indexing. + * + * Keep in mind that many of our operations are actually asynchronous, so we + * aren't entirely starving the event queue. However, a lot of the async + * stuff can end up not having any actual delay between events. For + * example, we only index offline message bodies, so there's no network + * latency involved, just disk IO; the only meaningful latency will be the + * initial disk seek (if there is one... pre-fetching may seriously be our + * friend). + * + * In order to maintain responsiveness, I assert that we want to minimize the + * length of the time we are dominating the event queue. This suggests + * that we want break up our blocks of work frequently. But not so + * frequently that there is a lot of waste. Accordingly our algorithm is + * basically: + * + * - Estimate the time that it takes to process a token, and schedule the + * number of tokens that should fit into that time. + * - Detect user activity, and back off immediately if found. + * - Try to delay commits and garbage collection until the user is inactive, + * as these tend to cause a brief pause in the UI. + */ + + /** + * The number of milliseconds before we declare the user idle and step up our + * indexing. + */ + _INDEX_IDLE_ADJUSTMENT_TIME: 5000, + + /** + * The time delay in milliseconds before we should schedule our initial sweep. + */ + _INITIAL_SWEEP_DELAY: 10000, + + /** + * How many milliseconds in the future should we schedule indexing to start + * when turning on indexing (and it was not previously active). + */ + _INDEX_KICKOFF_DELAY: 200, + + /** + * The time interval, in milliseconds, of pause between indexing batches. The + * maximum processor consumption is determined by this constant and the + * active |_cpuTargetIndexTime|. + * + * For current constants, that puts us at 50% while the user is active and 83% + * when idle. + */ + _INDEX_INTERVAL: 32, + + /** + * Number of indexing 'tokens' we are allowed to consume before yielding for + * each incremental pass. Consider a single token equal to indexing a single + * medium-sized message. This may be altered by user session (in)activity. + * Because we fetch message bodies, which is potentially asynchronous, this + * is not a precise knob to twiddle. + */ + _indexTokens: 2, + + /** + * Stopwatches used to measure performance during indexing, and during + * pauses between indexing. These help us adapt our indexing constants so + * as to not explode your computer. Kind of us, no? + */ + _perfIndexStopwatch: null, + _perfPauseStopwatch: null, + /** + * Do we have an uncommitted indexer transaction that idle callback should commit? + */ + _idleToCommit: false, + /** + * Target CPU time per batch of tokens, current value (milliseconds). + */ + _cpuTargetIndexTime: 32, + /** + * Target CPU time per batch of tokens, during non-idle (milliseconds). + */ + _CPU_TARGET_INDEX_TIME_ACTIVE: 32, + /** + * Target CPU time per batch of tokens, during idle (milliseconds). + */ + _CPU_TARGET_INDEX_TIME_IDLE: 160, + /** + * Average CPU time per processed token (milliseconds). + */ + _cpuAverageTimePerToken: 16, + /** + * Damping factor for _cpuAverageTimePerToken, as an approximate + * number of tokens to include in the average time. + */ + _CPU_AVERAGE_TIME_DAMPING: 200, + /** + * Maximum tokens per batch. This is normally just a sanity check. + */ + _CPU_MAX_TOKENS_PER_BATCH: 100, + /** + * CPU usage during a pause to declare that system was busy (milliseconds). + * This is typically set as 1.5 times the minimum resolution of the cpu + * usage clock, which is 16 milliseconds on Windows systems, and (I think) + * smaller on other systems, so we take the worst case. + */ + _CPU_IS_BUSY_TIME: 24, + /** + * Time that return from pause may be late before the system is declared + * busy, in milliseconds. (Same issues as _CPU_IS_BUSY_TIME). + */ + _PAUSE_LATE_IS_BUSY_TIME: 24, + /** + * Number of times that we will repeat a pause while waiting for a + * free CPU. + */ + _PAUSE_REPEAT_LIMIT: 10, + /** + * Minimum time delay between commits, in milliseconds. + */ + _MINIMUM_COMMIT_TIME: 5000, + /** + * Maximum time delay between commits, in milliseconds. + */ + _MAXIMUM_COMMIT_TIME: 20000, + + /** + * Unit testing hook to get us to emit additional logging that verges on + * inane for general usage but is helpful in unit test output to get a lay + * of the land and for paranoia reasons. + */ + _unitTestSuperVerbose: false, + /** + * Unit test vector to get notified when a worker has a problem and it has + * a recover helper associated. This gets called with an argument + * indicating whether the recovery helper indicates recovery was possible. + */ + _unitTestHookRecover: null, + /** + * Unit test vector to get notified when a worker runs into an exceptional + * situation (an exception propagates or gets explicitly killed) and needs + * to be cleaned up. This gets called with an argument indicating if there + * was a helper that was used or if we just did the default cleanup thing. + */ + _unitTestHookCleanup: null, + + /** + * Last commit time. Tracked to try and only commit at reasonable intervals. + */ + _lastCommitTime: Date.now(), + + _inited: false, + /** + * Initialize the indexer. + */ + _init() { + if (this._inited) { + return; + } + + this._inited = true; + + this._callbackHandle.init(); + + if (Services.io.offline) { + this._suppressIndexing = true; + } + + // create the timer that drives our intermittent indexing + this._timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); + // create the timer for larger offsets independent of indexing + this._longTimer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer); + + this._idleService = Cc["@mozilla.org/widget/useridleservice;1"].getService( + Ci.nsIUserIdleService + ); + + // create our performance stopwatches + try { + this._perfIndexStopwatch = Cc["@mozilla.org/stopwatch;1"].createInstance( + Ci.nsIStopwatch + ); + this._perfPauseStopwatch = Cc["@mozilla.org/stopwatch;1"].createInstance( + Ci.nsIStopwatch + ); + } catch (ex) { + this._log.error("problem creating stopwatch!: " + ex); + } + + // register for shutdown notifications + Services.obs.addObserver(this, "quit-application"); + + // figure out if event-driven indexing should be enabled... + let branch = Services.prefs.getBranch("mailnews.database.global.indexer."); + let eventDrivenEnabled = branch.getBoolPref("enabled", false); + let performInitialSweep = branch.getBoolPref("perform_initial_sweep", true); + // pretend we have already performed an initial sweep... + if (!performInitialSweep) { + this._initialSweepPerformed = true; + } + + this.enabled = eventDrivenEnabled; + }, + + /** + * When shutdown, indexing immediately ceases and no further progress should + * be made. This flag goes true once, and never returns to false. Being + * in this state is a destructive thing from whence we cannot recover. + */ + _indexerIsShutdown: false, + + /** + * Shutdown the indexing process and datastore as quickly as possible in + * a synchronous fashion. + */ + _shutdown() { + // no more timer events, please + try { + this._timer.cancel(); + } catch (ex) {} + this._timer = null; + try { + this._longTimer.cancel(); + } catch (ex) {} + this._longTimer = null; + + this._perfIndexStopwatch = null; + this._perfPauseStopwatch = null; + + // Remove listeners to avoid reference cycles on the off chance one of them + // holds a reference to the indexer object. + this._indexListeners = []; + + this._indexerIsShutdown = true; + + if (this.enabled) { + this._log.info("Shutting Down"); + } + + // don't let anything try and convince us to start indexing again + this.suppressIndexing = true; + + // If there is an active job and it has a cleanup handler, run it. + if (this._curIndexingJob) { + let workerDef = this._curIndexingJob._workerDef; + try { + if (workerDef.cleanup) { + workerDef.cleanup.call(workerDef.indexer, this._curIndexingJob); + } + } catch (ex) { + this._log.error("problem during worker cleanup during shutdown."); + } + } + // Definitely clean out the async call stack and any associated data + this._callbackHandle.cleanup(); + this._workBatchData = undefined; + + // disable ourselves and all of the specific indexers + this.enabled = false; + + lazy.GlodaDatastore.shutdown(); + }, + + /** + * The list of indexers registered with us. If you are a core gloda indexer + * (you ship with gloda), then you can import this file directly and should + * make sure your indexer is imported in 'Everybody.jsm' in the right order. + * If you are not core gloda, then you should import 'GlodaPublic.jsm' and only + * then should you import 'GlodaIndexer.jsm' to get at GlodaIndexer. + */ + _indexers: [], + /** + * Register an indexer with the Gloda indexing mechanism. + * + * @param aIndexer.name The name of your indexer. + * @param aIndexer.enable Your enable function. This will be called during + * the call to registerIndexer if Gloda indexing is already enabled. If + * indexing is not yet enabled, you will be called + * @param aIndexer.disable Your disable function. This will be called when + * indexing is disabled or we are shutting down. This will only be called + * if enable has already been called. + * @param aIndexer.workers A list of tuples of the form [worker type code, + * worker generator function, optional scheduling trigger function]. The + * type code is the string used to uniquely identify the job type. If you + * are not core gloda, your job type must start with your extension's name + * and a colon; you can collow that with anything you want. The worker + * generator is not easily explained in here. The trigger function is + * invoked immediately prior to calling the generator to create it. The + * trigger function takes the job as an argument and should perform any + * finalization required on the job. Most workers should not need to use + * the trigger function. + * @param aIndexer.initialSweep We call this to tell each indexer when it is + * its turn to run its indexing sweep. The idea of the indexing sweep is + * that this is when you traverse things eligible for indexing to make + * sure they are indexed. Right now we just call everyone at the same + * time and hope that their jobs don't fight too much. + */ + registerIndexer(aIndexer) { + this._log.info("Registering indexer: " + aIndexer.name); + this._indexers.push(aIndexer); + + try { + for (let workerInfo of aIndexer.workers) { + let workerCode = workerInfo[0]; + let workerDef = workerInfo[1]; + workerDef.name = workerCode; + workerDef.indexer = aIndexer; + this._indexerWorkerDefs[workerCode] = workerDef; + if (!("recover" in workerDef)) { + workerDef.recover = null; + } + if (!("cleanup" in workerDef)) { + workerDef.cleanup = null; + } + if (!("onSchedule" in workerDef)) { + workerDef.onSchedule = null; + } + if (!("jobCanceled" in workerDef)) { + workerDef.jobCanceled = null; + } + } + } catch (ex) { + this._log.warn("Helper indexer threw exception on worker enum."); + } + + if (this._enabled) { + try { + aIndexer.enable(); + } catch (ex) { + this._log.warn("Helper indexer threw exception on enable: " + ex); + } + } + }, + + /** + * Are we enabled, read: are we processing change events? + */ + _enabled: false, + get enabled() { + return this._enabled; + }, + set enabled(aEnable) { + if (!this._enabled && aEnable) { + // register for offline notifications + Services.obs.addObserver(this, "network:offline-status-changed"); + + // register for idle notification + this._idleService.addIdleObserver(this, this._indexIdleThresholdSecs); + + this._enabled = true; + + for (let indexer of this._indexers) { + try { + indexer.enable(); + } catch (ex) { + this._log.warn("Helper indexer threw exception on enable: " + ex); + } + } + + // if we have an accumulated desire to index things, kick it off again. + if (this._indexingDesired) { + this._indexingDesired = false; // it's edge-triggered for now + this.indexing = true; + } + + // if we have not done an initial sweep, schedule scheduling one. + if (!this._initialSweepPerformed) { + this._longTimer.initWithCallback( + this._scheduleInitialSweep, + this._INITIAL_SWEEP_DELAY, + Ci.nsITimer.TYPE_ONE_SHOT + ); + } + } else if (this._enabled && !aEnable) { + for (let indexer of this._indexers) { + try { + indexer.disable(); + } catch (ex) { + this._log.warn("Helper indexer threw exception on disable: " + ex); + } + } + + // remove offline observer + Services.obs.removeObserver(this, "network:offline-status-changed"); + + // remove idle + this._idleService.removeIdleObserver(this, this._indexIdleThresholdSecs); + + this._enabled = false; + } + }, + + /** Track whether indexing is desired (we have jobs to prosecute). */ + _indexingDesired: false, + /** + * Track whether we have an actively pending callback or timer event. We do + * this so we don't experience a transient suppression and accidentally + * get multiple event-chains driving indexing at the same time (which the + * code will not handle correctly). + */ + _indexingActive: false, + /** + * Indicates whether indexing is currently ongoing. This may return false + * while indexing activities are still active, but they will quiesce shortly. + */ + get indexing() { + return this._indexingDesired && !this._suppressIndexing; + }, + /** Indicates whether indexing is desired. */ + get indexingDesired() { + return this._indexingDesired; + }, + /** + * Set this to true to indicate there is indexing work to perform. This does + * not mean indexing will begin immediately (if it wasn't active), however. + * If suppressIndexing has been set, we won't do anything until indexing is + * no longer suppressed. + */ + set indexing(aShouldIndex) { + if (!this._indexingDesired && aShouldIndex) { + this._indexingDesired = true; + if (this.enabled && !this._indexingActive && !this._suppressIndexing) { + this._log.info("+++ Indexing Queue Processing Commencing"); + this._indexingActive = true; + this._timer.initWithCallback( + this._timerCallbackDriver, + this._INDEX_KICKOFF_DELAY, + Ci.nsITimer.TYPE_ONE_SHOT + ); + } + } + }, + + _suppressIndexing: false, + /** + * Set whether or not indexing should be suppressed. This is to allow us to + * avoid running down a laptop's battery when it is not on AC. Only code + * in charge of regulating that tracking should be setting this variable; if + * other factors want to contribute to such a decision, this logic needs to + * be changed to track that, since last-write currently wins. + */ + set suppressIndexing(aShouldSuppress) { + this._suppressIndexing = aShouldSuppress; + + // re-start processing if we are no longer suppressing, there is work yet + // to do, and the indexing process had actually stopped. + if ( + !this._suppressIndexing && + this._indexingDesired && + !this._indexingActive + ) { + this._log.info("+++ Indexing Queue Processing Resuming"); + this._indexingActive = true; + this._timer.initWithCallback( + this._timerCallbackDriver, + this._INDEX_KICKOFF_DELAY, + Ci.nsITimer.TYPE_ONE_SHOT + ); + } + }, + + /** + * Track whether an initial sweep has been performed. This mainly exists so + * that unit testing can stop us from performing an initial sweep. + */ + _initialSweepPerformed: false, + /** + * Our timer-driven callback to schedule our first initial indexing sweep. + * Because it is invoked by an nsITimer it operates without the benefit of + * a 'this' context and must use GlodaIndexer instead of this. + * Since an initial sweep could have been performed before we get invoked, + * we need to check whether an initial sweep is still desired before trying + * to schedule one. We don't need to worry about whether one is active + * because the indexingSweepNeeded takes care of that. + */ + _scheduleInitialSweep() { + if (GlodaIndexer._initialSweepPerformed) { + return; + } + GlodaIndexer._initialSweepPerformed = true; + for (let indexer of GlodaIndexer._indexers) { + indexer.initialSweep(); + } + }, + + /** + * Our current job number. Meaningless value that increments with every job + * we process that resets to 0 when we run out of jobs. Currently used by + * the activity manager's gloda listener to tell when we have changed jobs. + * We really need a better listener mechanism. + */ + _indexingJobCount: 0, + + /** + * A list of IndexingJob instances to process. + */ + _indexQueue: [], + + /** + * The current indexing job. + */ + _curIndexingJob: null, + + /** + * The number of seconds before we declare the user idle and commit if + * needed. + */ + _indexIdleThresholdSecs: 3, + + _indexListeners: [], + /** + * Add an indexing progress listener. The listener will be notified of at + * least all major status changes (idle -> indexing, indexing -> idle), plus + * arbitrary progress updates during the indexing process. + * If indexing is not active when the listener is added, a synthetic idle + * notification will be generated. + * + * @param aListener A listener function, taking arguments: status (Gloda. + * kIndexer*), the folder name if a folder is involved (string or null), + * current zero-based job number (int), + * current item number being indexed in this job (int), total number + * of items in this job to be indexed (int). + * + * @TODO should probably allow for a 'this' value to be provided + * @TODO generalize to not be folder/message specific. use nouns! + */ + addListener(aListener) { + // should we weakify? + if (!this._indexListeners.includes(aListener)) { + this._indexListeners.push(aListener); + } + // if we aren't indexing, give them an idle indicator, otherwise they can + // just be happy when we hit the next actual status point. + if (!this.indexing) { + aListener(GlodaConstants.kIndexerIdle, null, 0, 0, 1); + } + return aListener; + }, + /** + * Remove the given listener so that it no longer receives indexing progress + * updates. + */ + removeListener(aListener) { + let index = this._indexListeners.indexOf(aListener); + if (index != -1) { + this._indexListeners.splice(index, 1); + } + }, + /** + * Helper method to tell listeners what we're up to. For code simplicity, + * the caller is just deciding when to send this update (preferably at + * reasonable intervals), and doesn't need to provide any indication of + * state... we figure that out ourselves. + * + * This was not pretty but got ugly once we moved the message indexing out + * to its own indexer. Some generalization is required but will likely + * require string hooks. + */ + _notifyListeners() { + let status, prettyName, jobIndex, jobItemIndex, jobItemGoal, jobType; + + if (this.indexing && this._curIndexingJob) { + let job = this._curIndexingJob; + status = GlodaConstants.kIndexerIndexing; + + let indexer = this._indexerWorkerDefs[job.jobType].indexer; + if ("_indexingFolder" in indexer) { + prettyName = + indexer._indexingFolder != null + ? indexer._indexingFolder.prettyName + : null; + } else { + prettyName = null; + } + + jobIndex = this._indexingJobCount - 1; + jobItemIndex = job.offset; + jobItemGoal = job.goal; + jobType = job.jobType; + } else { + status = GlodaConstants.kIndexerIdle; + prettyName = null; + jobIndex = 0; + jobItemIndex = 0; + jobItemGoal = 1; + jobType = null; + } + + // Some people ascribe to the belief that the most you can give is 100%. + // We know better, but let's humor them. + if (jobItemIndex > jobItemGoal) { + jobItemGoal = jobItemIndex; + } + + for ( + let iListener = this._indexListeners.length - 1; + iListener >= 0; + iListener-- + ) { + let listener = this._indexListeners[iListener]; + try { + listener( + status, + prettyName, + jobIndex, + jobItemIndex, + jobItemGoal, + jobType + ); + } catch (ex) { + this._log.error(ex); + } + } + }, + + /** + * A wrapped callback driver intended to be used by timers that provide + * arguments we really do not care about. + */ + _timerCallbackDriver() { + GlodaIndexer.callbackDriver(); + }, + + /** + * A simple callback driver wrapper to provide 'this'. + */ + _wrapCallbackDriver(...aArgs) { + GlodaIndexer.callbackDriver(...aArgs); + }, + + /** + * The current processing 'batch' generator, produced by a call to workBatch() + * and used by callbackDriver to drive execution. + */ + _batch: null, + _inCallback: false, + _savedCallbackArgs: null, + /** + * The root work-driver. callbackDriver creates workBatch generator instances + * (stored in _batch) which run until they are done (kWorkDone) or they + * (really the embedded activeIterator) encounter something asynchronous. + * The convention is that all the callback handlers end up calling us, + * ensuring that control-flow properly resumes. If the batch completes, + * we re-schedule ourselves after a time delay (controlled by _INDEX_INTERVAL) + * and return. (We use one-shot timers because repeating-slack does not + * know enough to deal with our (current) asynchronous nature.) + */ + callbackDriver(...aArgs) { + // just bail if we are shutdown + if (this._indexerIsShutdown) { + return; + } + + // it is conceivable that someone we call will call something that in some + // cases might be asynchronous, and in other cases immediately generate + // events without returning. In the interest of (stack-depth) sanity, + // let's handle this by performing a minimal time-delay callback. + // this is also now a good thing sequencing-wise. if we get our callback + // with data before the underlying function has yielded, we obviously can't + // cram the data in yet. Our options in this case are to either mark the + // fact that the callback has already happened and immediately return to + // the iterator when it does bubble up the kWorkAsync, or we can do as we + // have been doing, but save the + if (this._inCallback) { + this._savedCallbackArgs = aArgs; + this._timer.initWithCallback( + this._timerCallbackDriver, + 0, + Ci.nsITimer.TYPE_ONE_SHOT + ); + return; + } + this._inCallback = true; + + try { + if (this._batch === null) { + this._batch = this.workBatch(); + } + + // kWorkAsync, kWorkDone, kWorkPause are allowed out; kWorkSync is not + // On kWorkDone, we want to schedule another timer to fire on us if we are + // not done indexing. (On kWorkAsync, we don't care what happens, because + // someone else will be receiving the callback, and they will call us when + // they are done doing their thing. + let args; + if (this._savedCallbackArgs != null) { + args = this._savedCallbackArgs; + this._savedCallbackArgs = null; + } else { + args = aArgs; + } + + let result; + if (args.length == 0) { + result = this._batch.next().value; + } else if (args.length == 1) { + result = this._batch.next(args[0]).value; + } else { + // Arguments works with destructuring assignment. + result = this._batch.next(args).value; + } + switch (result) { + // job's done, close the batch and re-schedule ourselves if there's more + // to do. + case GlodaConstants.kWorkDone: + this._batch.return(); + this._batch = null; + // the batch wants to get re-scheduled, do so. + // (intentional fall-through to re-scheduling logic) + case GlodaConstants.kWorkPause: + if (this.indexing) { + this._timer.initWithCallback( + this._timerCallbackDriver, + this._INDEX_INTERVAL, + Ci.nsITimer.TYPE_ONE_SHOT + ); + } else { + // it's important to indicate no more callbacks are in flight + this._indexingActive = false; + } + break; + case GlodaConstants.kWorkAsync: + // there is nothing to do. some other code is now responsible for + // calling us. + break; + } + } finally { + this._inCallback = false; + } + }, + + _callbackHandle: { + init() { + this.wrappedCallback = GlodaIndexer._wrapCallbackDriver; + this.callbackThis = GlodaIndexer; + this.callback = GlodaIndexer.callbackDriver; + }, + /** + * The stack of generators we are processing. The (numerically) last one is + * also the |activeIterator|. + */ + activeStack: [], + /** + * The generator at the top of the |activeStack| and that we will call next + * or send on next if nothing changes. + */ + activeIterator: null, + /** + * Meta-information about the generators at each level of the stack. + */ + contextStack: [], + /** + * Push a new generator onto the stack. It becomes the active generator. + */ + push(aIterator, aContext) { + this.activeStack.push(aIterator); + this.contextStack.push(aContext); + this.activeIterator = aIterator; + }, + /** + * For use by generators that want to call another asynchronous process + * implemented as a generator. They should do + * "yield aCallbackHandle.pushAndGo(someGenerator(arg1, arg2));". + * + * @public + */ + pushAndGo(aIterator, aContext) { + this.push(aIterator, aContext); + return GlodaConstants.kWorkSync; + }, + /** + * Pop the active generator off the stack. + */ + pop() { + this.activeIterator.return(); + this.activeStack.pop(); + this.contextStack.pop(); + if (this.activeStack.length) { + this.activeIterator = this.activeStack[this.activeStack.length - 1]; + } else { + this.activeIterator = null; + } + }, + /** + * Someone propagated an exception and we need to clean-up all the active + * logic as best we can. Which is not really all that well. + * + * @param [aOptionalStopAtDepth=0] The length the stack should be when this + * method completes. Pass 0 or omit for us to clear everything out. + * Pass 1 to leave just the top-level generator intact. + */ + cleanup(aOptionalStopAtDepth) { + if (aOptionalStopAtDepth === undefined) { + aOptionalStopAtDepth = 0; + } + while (this.activeStack.length > aOptionalStopAtDepth) { + this.pop(); + } + }, + /** + * For use when a generator finishes up by calling |doneWithResult| on us; + * the async driver calls this to pop that generator off the stack + * and get the result it passed in to its call to |doneWithResult|. + * + * @protected + */ + popWithResult() { + this.pop(); + let result = this._result; + this._result = null; + return result; + }, + _result: null, + /** + * For use by generators that want to return a result to the calling + * asynchronous generator. Specifically, they should do + * "yield aCallbackHandle.doneWithResult(RESULT);". + * + * @public + */ + doneWithResult(aResult) { + this._result = aResult; + return GlodaConstants.kWorkDoneWithResult; + }, + + /* be able to serve as a collection listener, resuming the active iterator's + last yield kWorkAsync */ + onItemsAdded() {}, + onItemsModified() {}, + onItemsRemoved() {}, + onQueryCompleted(aCollection) { + GlodaIndexer.callbackDriver(); + }, + }, + _workBatchData: undefined, + /* eslint-disable complexity */ + /** + * The workBatch generator handles a single 'batch' of processing, managing + * the database transaction and keeping track of "tokens". It drives the + * activeIterator generator which is doing the work. + * workBatch will only produce kWorkAsync, kWorkPause, and kWorkDone + * notifications. If activeIterator returns kWorkSync and there are still + * tokens available, workBatch will keep driving the activeIterator until it + * encounters a kWorkAsync (which workBatch will yield to callbackDriver), or + * it runs out of tokens and yields a kWorkPause or kWorkDone. + */ + *workBatch() { + // Do we still have an open transaction? If not, start a new one. + if (!this._idleToCommit) { + lazy.GlodaDatastore._beginTransaction(); + } else { + // We'll manage commit ourself while this routine is active. + this._idleToCommit = false; + } + + this._perfIndexStopwatch.start(); + let batchCount; + let haveMoreWork = true; + let transactionToCommit = true; + let inIdle; + + let notifyDecimator = 0; + + while (haveMoreWork) { + // Both explicit work activity points (sync + async) and transfer of + // control return (via kWorkDone*) results in a token being eaten. The + // idea now is to make tokens less precious so that the adaptive logic + // can adjust them with less impact. (Before this change, doing 1 + // token's work per cycle ended up being an entire non-idle time-slice's + // work.) + // During this loop we track the clock real-time used even though we + // frequently yield to asynchronous operations. These asynchronous + // operations are either database queries or message streaming requests. + // Both may involve disk I/O but no network I/O (since we only stream + // messages that are already available offline), but in an ideal + // situation will come from cache and so the work this function kicks off + // will dominate. + // We do not use the CPU time to this end because... + // 1) Our timer granularity on linux is worse for CPU than for wall time. + // 2) That can fail to account for our I/O cost. + // 3) If something with a high priority / low latency need (like playing + // a video) is fighting us, although using CPU time will accurately + // express how much time we are actually spending to index, our goal + // is to control the duration of our time slices, not be "right" about + // the actual CPU cost. In that case, if we attempted to take on more + // work, we would likely interfere with the higher priority process or + // make ourselves less responsive by drawing out the period of time we + // are dominating the main thread. + this._perfIndexStopwatch.start(); + // For telemetry purposes, we want to know how many messages we've been + // processing during that batch, and how long it took, pauses included. + let t0 = Date.now(); + this._indexedMessageCount = 0; + batchCount = 0; + while (batchCount < this._indexTokens) { + if ( + this._callbackHandle.activeIterator === null && + !this._hireJobWorker() + ) { + haveMoreWork = false; + break; + } + batchCount++; + + // XXX for performance, we may want to move the try outside the for loop + // with a quasi-redundant outer loop that shunts control back inside + // if we left the loop due to an exception (without consuming all the + // tokens.) + try { + switch ( + this._callbackHandle.activeIterator.next(this._workBatchData).value + ) { + case GlodaConstants.kWorkSync: + this._workBatchData = undefined; + break; + case GlodaConstants.kWorkAsync: + this._workBatchData = yield GlodaConstants.kWorkAsync; + break; + case GlodaConstants.kWorkDone: + this._callbackHandle.pop(); + this._workBatchData = undefined; + break; + case GlodaConstants.kWorkDoneWithResult: + this._workBatchData = this._callbackHandle.popWithResult(); + break; + default: + break; + } + } catch (ex) { + this._log.debug("Exception in batch processing:", ex); + let workerDef = this._curIndexingJob._workerDef; + if (workerDef.recover) { + let recoverToDepth; + try { + recoverToDepth = workerDef.recover.call( + workerDef.indexer, + this._curIndexingJob, + this._callbackHandle.contextStack, + ex + ); + } catch (ex2) { + this._log.error( + "Worker '" + + workerDef.name + + "' recovery function itself failed:", + ex2 + ); + } + if (this._unitTestHookRecover) { + this._unitTestHookRecover( + recoverToDepth, + ex, + this._curIndexingJob, + this._callbackHandle + ); + } + + if (recoverToDepth) { + this._callbackHandle.cleanup(recoverToDepth); + continue; + } + } + // (we either did not have a recover handler or it couldn't recover) + // call the cleanup helper if there is one + if (workerDef.cleanup) { + try { + workerDef.cleanup.call(workerDef.indexer, this._curIndexingJob); + } catch (ex2) { + this._log.error( + "Worker '" + + workerDef.name + + "' cleanup function itself failed:", + ex2 + ); + } + if (this._unitTestHookCleanup) { + this._unitTestHookCleanup( + true, + ex, + this._curIndexingJob, + this._callbackHandle + ); + } + } else if (this._unitTestHookCleanup) { + this._unitTestHookCleanup( + false, + ex, + this._curIndexingJob, + this._callbackHandle + ); + } + + // Clean out everything on the async stack, warn about the job, kill. + // We do not log this warning lightly; it will break unit tests and + // be visible to users. Anything expected should likely have a + // recovery function or the cleanup logic should be extended to + // indicate that the failure is acceptable. + this._callbackHandle.cleanup(); + this._log.warn( + "Problem during " + this._curIndexingJob + ", bailing:", + ex + ); + this._curIndexingJob = null; + // the data must now be invalid + this._workBatchData = undefined; + } + } + this._perfIndexStopwatch.stop(); + + // idleTime can throw if there is no idle-provider available, such as an + // X session without the relevant extensions available. In this case + // we assume that the user is never idle. + try { + // We want to stop ASAP when leaving idle, so we can't rely on the + // standard polled callback. We do the polling ourselves. + if (this._idleService.idleTime < this._INDEX_IDLE_ADJUSTMENT_TIME) { + inIdle = false; + this._cpuTargetIndexTime = this._CPU_TARGET_INDEX_TIME_ACTIVE; + } else { + inIdle = true; + this._cpuTargetIndexTime = this._CPU_TARGET_INDEX_TIME_IDLE; + } + } catch (ex) { + inIdle = false; + } + + // take a breather by having the caller re-schedule us sometime in the + // future, but only if we're going to perform another loop iteration. + if (haveMoreWork) { + notifyDecimator = (notifyDecimator + 1) % 32; + if (!notifyDecimator) { + this._notifyListeners(); + } + + for ( + let pauseCount = 0; + pauseCount < this._PAUSE_REPEAT_LIMIT; + pauseCount++ + ) { + this._perfPauseStopwatch.start(); + + yield GlodaConstants.kWorkPause; + + this._perfPauseStopwatch.stop(); + // We repeat the pause if the pause was longer than + // we expected, or if it used a significant amount + // of cpu, either of which indicate significant other + // activity. + if ( + this._perfPauseStopwatch.cpuTimeSeconds * 1000 < + this._CPU_IS_BUSY_TIME && + this._perfPauseStopwatch.realTimeSeconds * 1000 - + this._INDEX_INTERVAL < + this._PAUSE_LATE_IS_BUSY_TIME + ) { + break; + } + } + } + + // All pauses have been taken, how effective were we? Report! + // XXX: there's possibly a lot of fluctuation since we go through here + // every 5 messages or even less + if (this._indexedMessageCount > 0) { + let delta = (Date.now() - t0) / 1000; // in seconds + let v = Math.round(this._indexedMessageCount / delta); + try { + let h = Services.telemetry.getHistogramById( + "THUNDERBIRD_INDEXING_RATE_MSG_PER_S" + ); + h.add(v); + } catch (e) { + this._log.warn("Couldn't report telemetry", e, v); + } + } + + if (batchCount > 0) { + let totalTime = this._perfIndexStopwatch.realTimeSeconds * 1000; + let timePerToken = totalTime / batchCount; + // Damp the average time since it is a rough estimate only. + this._cpuAverageTimePerToken = + (totalTime + + this._CPU_AVERAGE_TIME_DAMPING * this._cpuAverageTimePerToken) / + (batchCount + this._CPU_AVERAGE_TIME_DAMPING); + // We use the larger of the recent or the average time per token, so + // that we can respond quickly to slow down indexing if there + // is a sudden increase in time per token. + let bestTimePerToken = Math.max( + timePerToken, + this._cpuAverageTimePerToken + ); + // Always index at least one token! + this._indexTokens = Math.max( + 1, + this._cpuTargetIndexTime / bestTimePerToken + ); + // But no more than the a maximum limit, just for sanity's sake. + this._indexTokens = Math.min( + this._CPU_MAX_TOKENS_PER_BATCH, + this._indexTokens + ); + this._indexTokens = Math.ceil(this._indexTokens); + } + + // Should we try to commit now? + let elapsed = Date.now() - this._lastCommitTime; + // Commit tends to cause a brief UI pause, so we try to delay it (but not + // forever) if the user is active. If we're done and idling, we'll also + // commit, otherwise we'll let the idle callback do it. + let doCommit = + transactionToCommit && + (elapsed > this._MAXIMUM_COMMIT_TIME || + (inIdle && (elapsed > this._MINIMUM_COMMIT_TIME || !haveMoreWork))); + if (doCommit) { + lazy.GlodaCollectionManager.cacheCommitDirty(); + // Set up an async notification to happen after the commit completes so + // that we can avoid the indexer doing something with the database that + // causes the main thread to block against the completion of the commit + // (which can be a while) on 1.9.1. + lazy.GlodaDatastore.runPostCommit(this._callbackHandle.wrappedCallback); + // kick off the commit + lazy.GlodaDatastore._commitTransaction(); + yield GlodaConstants.kWorkAsync; + this._lastCommitTime = Date.now(); + // Restart the transaction if we still have work. + if (haveMoreWork) { + lazy.GlodaDatastore._beginTransaction(); + } else { + transactionToCommit = false; + } + } + } + + this._notifyListeners(); + + // If we still have a transaction to commit, tell idle to do the commit + // when it gets around to it. + if (transactionToCommit) { + this._idleToCommit = true; + } + + yield GlodaConstants.kWorkDone; + }, + /* eslint-enable complexity */ + + /** + * Maps indexing job type names to a worker definition. + * The worker definition is an object with the following attributes where + * only worker is required: + * - worker: + * - onSchedule: A function to be invoked when the worker is scheduled. The + * job is passed as an argument. + * - recover: + * - cleanup: + */ + _indexerWorkerDefs: {}, + /** + * Perform the initialization step and return a generator if there is any + * steady-state processing to be had. + */ + _hireJobWorker() { + // In no circumstances should there be data bouncing around from previous + // calls if we are here. |killActiveJob| depends on this. + this._workBatchData = undefined; + + if (this._indexQueue.length == 0) { + this._log.info("--- Done indexing, disabling timer renewal."); + + this._curIndexingJob = null; + this._indexingDesired = false; + this._indexingJobCount = 0; + return false; + } + + let job = (this._curIndexingJob = this._indexQueue.shift()); + this._indexingJobCount++; + + let generator = null; + + if (job.jobType in this._indexerWorkerDefs) { + let workerDef = this._indexerWorkerDefs[job.jobType]; + job._workerDef = workerDef; + + // Prior to creating the worker, call the scheduling trigger function + // if there is one. This is so that jobs can be finalized. The + // initial use case is event-driven message indexing that accumulates + // a list of messages to index but wants it locked down once we start + // processing the list. + if (workerDef.onSchedule) { + workerDef.onSchedule.call(workerDef.indexer, job); + } + + generator = workerDef.worker.call( + workerDef.indexer, + job, + this._callbackHandle + ); + } else { + // Nothing we can do about this. Be loud about it and try to schedule + // something else. + this._log.error("Unknown job type: " + job.jobType); + return this._hireJobWorker(); + } + + if (this._unitTestSuperVerbose) { + this._log.debug("Hired job of type: " + job.jobType); + } + + this._notifyListeners(); + + if (generator) { + this._callbackHandle.push(generator); + return true; + } + return false; + }, + + /** + * Schedule a job for indexing. + */ + indexJob(aJob) { + this._log.info("Queue-ing job for indexing: " + aJob.jobType); + + this._indexQueue.push(aJob); + this.indexing = true; + }, + + /** + * Kill the active job. This means a few things: + * - Kill all the generators in the callbackHandle stack. + * - If we are currently waiting on an async return, we need to make sure it + * does not screw us up. + * - Make sure the job's cleanup function gets called if appropriate. + * + * The async return case is actually not too troublesome. Since there is an + * active indexing job and we are not (by fiat) in that call stack, we know + * that the callback driver is guaranteed to get triggered again somehow. + * The only issue is to make sure that _workBatchData does not end up with + * the data. We compel |_hireJobWorker| to erase it to this end. + * + * @note You MUST NOT call this function from inside a job or an async function + * on the callbackHandle's stack of generators. If you are in that + * situation, you should just throw an exception. At the very least, + * use a timeout to trigger us. + */ + killActiveJob() { + // There is nothing to do if we have no job + if (!this._curIndexingJob) { + return; + } + + // -- Blow away the stack with cleanup. + let workerDef = this._curIndexingJob._workerDef; + if (this._unitTestSuperVerbose) { + this._log.debug("Killing job of type: " + this._curIndexingJob.jobType); + } + if (this._unitTestHookCleanup) { + this._unitTestHookCleanup( + !!workerDef.cleanup, + "no exception, this was killActiveJob", + this._curIndexingJob, + this._callbackHandle + ); + } + this._callbackHandle.cleanup(); + if (workerDef.cleanup) { + workerDef.cleanup.call(workerDef.indexer, this._curIndexingJob); + } + + // Eliminate the job. + this._curIndexingJob = null; + }, + + /** + * Purge all jobs that the filter function returns true for. This does not + * kill the active job, use |killActiveJob| to do that. + * + * Make sure to call this function before killActiveJob + * + * @param aFilterElimFunc A filter function that takes an |IndexingJob| and + * returns true if the job should be purged, false if it should not be. + * The filter sees the jobs in the order they are scheduled. + */ + purgeJobsUsingFilter(aFilterElimFunc) { + for (let iJob = 0; iJob < this._indexQueue.length; iJob++) { + let job = this._indexQueue[iJob]; + + // If the filter says to, splice the job out of existence (and make sure + // to fixup iJob to compensate.) + if (aFilterElimFunc(job)) { + if (this._unitTestSuperVerbose) { + this._log.debug("Purging job of type: " + job.jobType); + } + this._indexQueue.splice(iJob--, 1); + let workerDef = this._indexerWorkerDefs[job.jobType]; + if (workerDef.jobCanceled) { + workerDef.jobCanceled.call(workerDef.indexer, job); + } + } + } + }, + + /* *********** Event Processing *********** */ + observe(aSubject, aTopic, aData) { + // idle + if (aTopic == "idle") { + // Do we need to commit an indexer transaction? + if (this._idleToCommit) { + this._idleToCommit = false; + lazy.GlodaCollectionManager.cacheCommitDirty(); + lazy.GlodaDatastore._commitTransaction(); + this._lastCommitTime = Date.now(); + this._notifyListeners(); + } + } else if (aTopic == "network:offline-status-changed") { + // offline status + if (aData == "offline") { + this.suppressIndexing = true; + } else { + // online + this.suppressIndexing = false; + } + } else if (aTopic == "quit-application") { + // shutdown fallback + this._shutdown(); + } + }, +}; +// we used to initialize here; now we have GlodaPublic.jsm do it for us after the +// indexers register themselves so we know about all our built-in indexers +// at init-time. diff --git a/comm/mailnews/db/gloda/modules/GlodaMsgIndexer.jsm b/comm/mailnews/db/gloda/modules/GlodaMsgIndexer.jsm new file mode 100644 index 0000000000..54ceacb59a --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaMsgIndexer.jsm @@ -0,0 +1,310 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["GlodaABIndexer", "GlodaABAttrs"]; + +const { GlodaCollectionManager } = ChromeUtils.import( + "resource:///modules/gloda/Collection.jsm" +); +const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm"); +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); +const { GlodaIndexer, IndexingJob } = ChromeUtils.import( + "resource:///modules/gloda/GlodaIndexer.jsm" +); +const { FreeTagNoun } = ChromeUtils.import( + "resource:///modules/gloda/NounFreetag.jsm" +); + +var GlodaABIndexer = { + _log: null, + _notifications: [ + "addrbook-contact-created", + "addrbook-contact-updated", + "addrbook-contact-deleted", + ], + + name: "index_ab", + enable() { + if (this._log == null) { + this._log = console.createInstance({ + prefix: "gloda.index_ab", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }); + } + + for (let topic of this._notifications) { + Services.obs.addObserver(this, topic); + } + }, + + disable() { + for (let topic of this._notifications) { + Services.obs.removeObserver(this, topic); + } + }, + + // it's a getter so we can reference 'this' + get workers() { + return [ + [ + "ab-card", + { + worker: this._worker_index_card, + }, + ], + ]; + }, + + *_worker_index_card(aJob, aCallbackHandle) { + let card = aJob.id; + + if (card.primaryEmail) { + // load the identity + let query = Gloda.newQuery(GlodaConstants.NOUN_IDENTITY); + query.kind("email"); + // we currently normalize all e-mail addresses to be lowercase + query.value(card.primaryEmail.toLowerCase()); + let identityCollection = query.getCollection(aCallbackHandle); + yield GlodaConstants.kWorkAsync; + + if (identityCollection.items.length) { + let identity = identityCollection.items[0]; + // force the identity to know it has an associated ab card. + identity._hasAddressBookCard = true; + + this._log.debug("Found identity, processing card."); + yield aCallbackHandle.pushAndGo( + Gloda.grokNounItem( + identity.contact, + { card }, + false, + false, + aCallbackHandle + ) + ); + this._log.debug("Done processing card."); + } + } + + yield GlodaConstants.kWorkDone; + }, + + initialSweep() {}, + + observe(subject, topic, data) { + subject.QueryInterface(Ci.nsIAbCard); + + switch (topic) { + case "addrbook-contact-created": { + // When an address book card is added, update the cached GlodaIdentity + // object's cached idea of whether the identity has an ab card. + this._log.debug("Received Card Add Notification"); + + let identity = GlodaCollectionManager.cacheLookupOneByUniqueValue( + GlodaConstants.NOUN_IDENTITY, + "email@" + subject.primaryEmail.toLowerCase() + ); + if (identity) { + identity._hasAddressBookCard = true; + } + break; + } + case "addrbook-contact-updated": { + this._log.debug("Received Card Change Notification"); + + let job = new IndexingJob("ab-card", subject); + GlodaIndexer.indexJob(job); + break; + } + case "addrbook-contact-deleted": { + // When an address book card is added, update the cached GlodaIdentity + // object's cached idea of whether the identity has an ab card. + this._log.debug("Received Card Removal Notification"); + + let identity = GlodaCollectionManager.cacheLookupOneByUniqueValue( + GlodaConstants.NOUN_IDENTITY, + "email@" + subject.primaryEmail.toLowerCase() + ); + if (identity) { + identity._hasAddressBookCard = false; + } + break; + } + } + }, +}; +GlodaIndexer.registerIndexer(GlodaABIndexer); + +var GlodaABAttrs = { + providerName: "gloda.ab_attr", + _log: null, + + init() { + this._log = console.createInstance({ + prefix: "gloda.abattrs", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }); + + try { + this.defineAttributes(); + } catch (ex) { + this._log.error("Error in init: " + ex); + throw ex; + } + }, + + defineAttributes() { + /* ***** Contacts ***** */ + this._attrIdentityContact = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "identities", + singular: false, + special: GlodaConstants.kSpecialColumnChildren, + // specialColumnName: "contactID", + storageAttributeName: "_identities", + subjectNouns: [GlodaConstants.NOUN_CONTACT], + objectNoun: GlodaConstants.NOUN_IDENTITY, + }); // tested-by: test_attributes_fundamental + this._attrContactName = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "name", + singular: true, + special: GlodaConstants.kSpecialString, + specialColumnName: "name", + subjectNouns: [GlodaConstants.NOUN_CONTACT], + objectNoun: GlodaConstants.NOUN_STRING, + canQuery: true, + }); // tested-by: test_attributes_fundamental + this._attrContactPopularity = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "popularity", + singular: true, + special: GlodaConstants.kSpecialColumn, + specialColumnName: "popularity", + subjectNouns: [GlodaConstants.NOUN_CONTACT], + objectNoun: GlodaConstants.NOUN_NUMBER, + canQuery: true, + }); // not-tested + this._attrContactFrecency = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "frecency", + singular: true, + special: GlodaConstants.kSpecialColumn, + specialColumnName: "frecency", + subjectNouns: [GlodaConstants.NOUN_CONTACT], + objectNoun: GlodaConstants.NOUN_NUMBER, + canQuery: true, + }); // not-tested + + /* ***** Identities ***** */ + this._attrIdentityContact = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrDerived, + attributeName: "contact", + singular: true, + special: GlodaConstants.kSpecialColumnParent, + specialColumnName: "contactID", // the column in the db + idStorageAttributeName: "_contactID", + valueStorageAttributeName: "_contact", + subjectNouns: [GlodaConstants.NOUN_IDENTITY], + objectNoun: GlodaConstants.NOUN_CONTACT, + canQuery: true, + }); // tested-by: test_attributes_fundamental + this._attrIdentityKind = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "kind", + singular: true, + special: GlodaConstants.kSpecialString, + specialColumnName: "kind", + subjectNouns: [GlodaConstants.NOUN_IDENTITY], + objectNoun: GlodaConstants.NOUN_STRING, + canQuery: true, + }); // tested-by: test_attributes_fundamental + this._attrIdentityValue = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrFundamental, + attributeName: "value", + singular: true, + special: GlodaConstants.kSpecialString, + specialColumnName: "value", + subjectNouns: [GlodaConstants.NOUN_IDENTITY], + objectNoun: GlodaConstants.NOUN_STRING, + canQuery: true, + }); // tested-by: test_attributes_fundamental + + /* ***** Contact Meta ***** */ + // Freeform tags; not explicit like thunderbird's fundamental tags. + // we differentiate for now because of fundamental implementation + // differences. + this._attrFreeTag = Gloda.defineAttribute({ + provider: this, + extensionName: GlodaConstants.BUILT_IN, + attributeType: GlodaConstants.kAttrExplicit, + attributeName: "freetag", + bind: true, + bindName: "freeTags", + singular: false, + subjectNouns: [GlodaConstants.NOUN_CONTACT], + objectNoun: Gloda.lookupNoun("freetag"), + parameterNoun: null, + canQuery: true, + }); // not-tested + // we need to find any existing bound freetag attributes, and use them to + // populate to FreeTagNoun's understanding + if ("parameterBindings" in this._attrFreeTag) { + for (let freeTagName in this._attrFreeTag.parameterBindings) { + this._log.debug("Telling FreeTagNoun about: " + freeTagName); + FreeTagNoun.getFreeTag(freeTagName); + } + } + }, + + *process(aContact, aRawReps, aIsNew, aCallbackHandle) { + let card = aRawReps.card; + if (aContact.NOUN_ID != GlodaConstants.NOUN_CONTACT) { + this._log.warn("Somehow got a non-contact: " + aContact); + return; // this will produce an exception; we like. + } + + // update the name + if (card.displayName && card.displayName != aContact.name) { + aContact.name = card.displayName; + } + + aContact.freeTags = []; + + let tags = null; + try { + tags = card.getProperty("Categories", null); + } catch (ex) { + this._log.error("Problem accessing property: " + ex); + } + if (tags) { + for (let tagName of tags.split(",")) { + tagName = tagName.trim(); + if (tagName) { + aContact.freeTags.push(FreeTagNoun.getFreeTag(tagName)); + } + } + } + + yield GlodaConstants.kWorkDone; + }, +}; diff --git a/comm/mailnews/db/gloda/modules/GlodaMsgSearcher.jsm b/comm/mailnews/db/gloda/modules/GlodaMsgSearcher.jsm new file mode 100644 index 0000000000..f81def2560 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaMsgSearcher.jsm @@ -0,0 +1,361 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["GlodaMsgSearcher"]; + +const { Gloda } = ChromeUtils.import( + "resource:///modules/gloda/GlodaPublic.jsm" +); +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); + +/** + * How much time boost should a 'score point' amount to? The authoritative, + * incontrivertible answer, across all time and space, is a week. + * Note that gloda stores timestamps as PRTimes for no exceedingly good + * reason. + */ +var FUZZSCORE_TIMESTAMP_FACTOR = 1000 * 1000 * 60 * 60 * 24 * 7; + +var RANK_USAGE = "glodaRank(matchinfo(messagesText), 1.0, 2.0, 2.0, 1.5, 1.5)"; + +var DASCORE = + "(((" + + RANK_USAGE + + " + messages.notability) * " + + FUZZSCORE_TIMESTAMP_FACTOR + + ") + messages.date)"; + +/** + * A new optimization decision we are making is that we do not want to carry + * around any data in our ephemeral tables that is not used for whittling the + * result set. The idea is that the btree page cache or OS cache is going to + * save us from the disk seeks and carrying around the extra data is just going + * to be CPU/memory churn that slows us down. + * + * Additionally, we try and avoid row lookups that would have their results + * discarded by the LIMIT. Because of limitations in FTS3 (which might + * be addressed in FTS4 by a feature request), we can't avoid the 'messages' + * lookup since that has the message's date and static notability but we can + * defer the 'messagesText' lookup. + * + * This is the access pattern we are after here: + * 1) Order the matches with minimized lookup and result storage costs. + * - The innermost MATCH does the doclist magic and provides us with + * matchinfo() support which does not require content row retrieval + * from messagesText. Unfortunately, this is not enough to whittle anything + * because we still need static interestingness, so... + * - Based on the match we retrieve the date and notability for that row from + * 'messages' using this in conjunction with matchinfo() to provide a score + * that we can then use to LIMIT our results. + * 2) We reissue the MATCH query so that we will be able to use offsets(), but + * we intersect the results of this MATCH against our LIMITed results from + * step 1. + * - We use 'docid IN (phase 1 query)' to accomplish this because it results in + * efficient lookup. If we just use a join, we get O(mn) performance because + * a cartesian join ends up being performed where either we end up performing + * the fulltext query M times and table scan intersect with the results from + * phase 1 or we do the fulltext once but traverse the entire result set from + * phase 1 N times. + * - We believe that the re-execution of the MATCH query should have no disk + * costs because it should still be cached by SQLite or the OS. In the case + * where memory is so constrained this is not true our behavior is still + * probably preferable than the old way because that would have caused lots + * of swapping. + * - This part of the query otherwise resembles the basic gloda query but with + * the inclusion of the offsets() invocation. The messages table lookup + * should not involve any disk traffic because the pages should still be + * cached (SQLite or OS) from phase 1. The messagesText lookup is new, and + * this is the major disk-seek reduction optimization we are making. (Since + * we avoid this lookup for all of the documents that were excluded by the + * LIMIT.) Since offsets() also needs to retrieve the row from messagesText + * there is a nice synergy there. + */ +var NUEVO_FULLTEXT_SQL = + "SELECT messages.*, messagesText.*, offsets(messagesText) AS osets " + + "FROM messagesText, messages " + + "WHERE" + + " messagesText MATCH ?1 " + + " AND messagesText.docid IN (" + + "SELECT docid " + + "FROM messagesText JOIN messages ON messagesText.docid = messages.id " + + "WHERE messagesText MATCH ?1 " + + "ORDER BY " + + DASCORE + + " DESC " + + "LIMIT ?2" + + " )" + + " AND messages.id = messagesText.docid " + + " AND +messages.deleted = 0" + + " AND +messages.folderID IS NOT NULL" + + " AND +messages.messageKey IS NOT NULL"; + +function identityFunc(x) { + return x; +} + +function oneLessMaxZero(x) { + if (x <= 1) { + return 0; + } + return x - 1; +} + +function reduceSum(accum, curValue) { + return accum + curValue; +} + +/* + * Columns are: body, subject, attachment names, author, recipients + */ + +/** + * Scores if all search terms match in a column. We bias against author + * slightly and recipient a bit more in this case because a search that + * entirely matches just on a person should give a mention of that person + * in the subject or attachment a fighting chance. + * Keep in mind that because of our indexing in the face of address book + * contacts (namely, we index the name used in the e-mail as well as the + * display name on the address book card associated with the e-mail address) + * a contact is going to bias towards matching multiple times. + */ +var COLUMN_ALL_MATCH_SCORES = [4, 20, 20, 16, 12]; +/** + * Score for each distinct term that matches in the column. This is capped + * by COLUMN_ALL_SCORES. + */ +var COLUMN_PARTIAL_PER_MATCH_SCORES = [1, 4, 4, 4, 3]; +/** + * If a term matches multiple times, what is the marginal score for each + * additional match. We count the total number of matches beyond the + * first match for each term. In other words, if we have 3 terms which + * matched 5, 3, and 0 times, then the total from our perspective is + * (5 - 1) + (3 - 1) + 0 = 4 + 2 + 0 = 6. We take the minimum of that value + * and the value in COLUMN_MULTIPLE_MATCH_LIMIT and multiply by the value in + * COLUMN_MULTIPLE_MATCH_SCORES. + */ +var COLUMN_MULTIPLE_MATCH_SCORES = [1, 0, 0, 0, 0]; +var COLUMN_MULTIPLE_MATCH_LIMIT = [10, 0, 0, 0, 0]; + +/** + * Score the message on its offsets (from stashedColumns). + */ +function scoreOffsets(aMessage, aContext) { + let score = 0; + + let termTemplate = aContext.terms.map(_ => 0); + // for each column, a list of the incidence of each term + let columnTermIncidence = [ + termTemplate.concat(), + termTemplate.concat(), + termTemplate.concat(), + termTemplate.concat(), + termTemplate.concat(), + ]; + + // we need a friendlyParseInt because otherwise the radix stuff happens + // because of the extra arguments map parses. curse you, map! + let offsetNums = aContext.stashedColumns[aMessage.id][0] + .split(" ") + .map(x => parseInt(x)); + for (let i = 0; i < offsetNums.length; i += 4) { + let columnIndex = offsetNums[i]; + let termIndex = offsetNums[i + 1]; + columnTermIncidence[columnIndex][termIndex]++; + } + + for (let iColumn = 0; iColumn < COLUMN_ALL_MATCH_SCORES.length; iColumn++) { + let termIncidence = columnTermIncidence[iColumn]; + if (termIncidence.every(identityFunc)) { + // Bestow all match credit. + score += COLUMN_ALL_MATCH_SCORES[iColumn]; + } else if (termIncidence.some(identityFunc)) { + // Bestow partial match credit. + score += Math.min( + COLUMN_ALL_MATCH_SCORES[iColumn], + COLUMN_PARTIAL_PER_MATCH_SCORES[iColumn] * + termIncidence.filter(identityFunc).length + ); + } + // Bestow multiple match credit. + score += + Math.min( + termIncidence.map(oneLessMaxZero).reduce(reduceSum, 0), + COLUMN_MULTIPLE_MATCH_LIMIT[iColumn] + ) * COLUMN_MULTIPLE_MATCH_SCORES[iColumn]; + } + + return score; +} + +/** + * The searcher basically looks like a query, but is specialized for fulltext + * search against messages. Most of the explicit specialization involves + * crafting a SQL query that attempts to order the matches by likelihood that + * the user was looking for it. This is based on full-text matches combined + * with an explicit (generic) interest score value placed on the message at + * indexing time. This is followed by using the more generic gloda scoring + * mechanism to explicitly score the messages given the search context in + * addition to the more generic score adjusting rules. + */ +function GlodaMsgSearcher(aListener, aSearchString, aAndTerms) { + this.listener = aListener; + + this.searchString = aSearchString; + this.fulltextTerms = this.parseSearchString(aSearchString); + this.andTerms = aAndTerms != null ? aAndTerms : true; + + this.query = null; + this.collection = null; + + this.scores = null; +} +GlodaMsgSearcher.prototype = { + /** + * Number of messages to retrieve initially. + */ + get retrievalLimit() { + return Services.prefs.getIntPref( + "mailnews.database.global.search.msg.limit" + ); + }, + + /** + * Parse the string into terms/phrases by finding matching double-quotes. + */ + parseSearchString(aSearchString) { + aSearchString = aSearchString.trim(); + let terms = []; + + /* + * Add the term as long as the trim on the way in didn't obliterate it. + * + * In the future this might have other helper logic; it did once before. + */ + function addTerm(aTerm) { + if (aTerm) { + terms.push(aTerm); + } + } + + while (aSearchString) { + if (aSearchString.startsWith('"')) { + let endIndex = aSearchString.indexOf(aSearchString[0], 1); + // eat the quote if it has no friend + if (endIndex == -1) { + aSearchString = aSearchString.substring(1); + continue; + } + + addTerm(aSearchString.substring(1, endIndex).trim()); + aSearchString = aSearchString.substring(endIndex + 1); + continue; + } + + let spaceIndex = aSearchString.indexOf(" "); + if (spaceIndex == -1) { + addTerm(aSearchString); + break; + } + + addTerm(aSearchString.substring(0, spaceIndex)); + aSearchString = aSearchString.substring(spaceIndex + 1); + } + + return terms; + }, + + buildFulltextQuery() { + let query = Gloda.newQuery(GlodaConstants.NOUN_MESSAGE, { + noMagic: true, + explicitSQL: NUEVO_FULLTEXT_SQL, + limitClauseAlreadyIncluded: true, + // osets is 0-based column number 14 (volatile to column changes) + // save the offset column for extra analysis + stashColumns: [14], + }); + + let fulltextQueryString = ""; + + for (let [iTerm, term] of this.fulltextTerms.entries()) { + if (iTerm) { + fulltextQueryString += this.andTerms ? " " : " OR "; + } + + // Put our term in quotes. This is needed for the tokenizer to be able + // to do useful things. The exception is people clever enough to use + // NEAR. + if (/^NEAR(\/\d+)?$/.test(term)) { + fulltextQueryString += term; + } else if (term.length == 1 && term.charCodeAt(0) >= 0x2000) { + // This is a single-character CJK search query, so add a wildcard. + // Our tokenizer treats anything at/above 0x2000 as CJK for now. + fulltextQueryString += term + "*"; + } else if ( + (term.length == 2 && + term.charCodeAt(0) >= 0x2000 && + term.charCodeAt(1) >= 0x2000) || + term.length >= 3 + ) { + fulltextQueryString += '"' + term + '"'; + } + } + + query.fulltextMatches(fulltextQueryString); + query.limit(this.retrievalLimit); + + return query; + }, + + getCollection(aListenerOverride, aData) { + if (aListenerOverride) { + this.listener = aListenerOverride; + } + + this.query = this.buildFulltextQuery(); + this.collection = this.query.getCollection(this, aData); + this.completed = false; + + return this.collection; + }, + + sortBy: "-dascore", + + onItemsAdded(aItems, aCollection) { + let newScores = Gloda.scoreNounItems( + aItems, + { + terms: this.fulltextTerms, + stashedColumns: aCollection.stashedColumns, + }, + [scoreOffsets] + ); + if (this.scores) { + this.scores = this.scores.concat(newScores); + } else { + this.scores = newScores; + } + + if (this.listener) { + this.listener.onItemsAdded(aItems, aCollection); + } + }, + onItemsModified(aItems, aCollection) { + if (this.listener) { + this.listener.onItemsModified(aItems, aCollection); + } + }, + onItemsRemoved(aItems, aCollection) { + if (this.listener) { + this.listener.onItemsRemoved(aItems, aCollection); + } + }, + onQueryCompleted(aCollection) { + this.completed = true; + if (this.listener) { + this.listener.onQueryCompleted(aCollection); + } + }, +}; diff --git a/comm/mailnews/db/gloda/modules/GlodaPublic.jsm b/comm/mailnews/db/gloda/modules/GlodaPublic.jsm new file mode 100644 index 0000000000..555a6d8921 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaPublic.jsm @@ -0,0 +1,45 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["Gloda"]; + +const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm"); +/* nothing to import, just run some code */ ChromeUtils.import( + "resource:///modules/gloda/Everybody.jsm" +); +const { GlodaIndexer } = ChromeUtils.import( + "resource:///modules/gloda/GlodaIndexer.jsm" +); +// initialize the indexer! (who was actually imported as a nested dep by the +// things Everybody.jsm imported.) We waited until now so it could know about +// its indexers. +GlodaIndexer._init(); +const { GlodaMsgIndexer } = ChromeUtils.import( + "resource:///modules/gloda/IndexMsg.jsm" +); + +/** + * Expose some junk + */ +function proxy(aSourceObj, aSourceAttr, aDestObj, aDestAttr) { + aDestObj[aDestAttr] = function (...aArgs) { + return aSourceObj[aSourceAttr](...aArgs); + }; +} + +proxy(GlodaIndexer, "addListener", Gloda, "addIndexerListener"); +proxy(GlodaIndexer, "removeListener", Gloda, "removeIndexerListener"); +proxy(GlodaMsgIndexer, "isMessageIndexed", Gloda, "isMessageIndexed"); +proxy( + GlodaMsgIndexer, + "setFolderIndexingPriority", + Gloda, + "setFolderIndexingPriority" +); +proxy( + GlodaMsgIndexer, + "resetFolderIndexingPriority", + Gloda, + "resetFolderIndexingPriority" +); diff --git a/comm/mailnews/db/gloda/modules/GlodaQueryClassFactory.jsm b/comm/mailnews/db/gloda/modules/GlodaQueryClassFactory.jsm new file mode 100644 index 0000000000..2e53cf5925 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaQueryClassFactory.jsm @@ -0,0 +1,642 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["GlodaQueryClassFactory"]; + +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); + +/** + * @class Query class core; each noun gets its own sub-class where attributes + * have helper methods bound. + * + * @param aOptions A dictionary of options. Current legal options are: + * - noMagic: Indicates that the noun's dbQueryJoinMagic should be ignored. + * Currently, this means that messages will not have their + * full-text indexed values re-attached. This is planned to be + * offset by having queries/cache lookups that do not request + * noMagic to ensure that their data does get loaded. + * - explicitSQL: A hand-rolled alternate representation for the core + * SELECT portion of the SQL query. The queryFromQuery logic still + * generates its normal query, we just ignore its result in favor of + * your provided value. This means that the positional parameter + * list is still built and you should/must rely on those bound + * parameters (using '?'). The replacement occurs prior to the + * outerWrapColumns, ORDER BY, and LIMIT contributions to the query. + * - outerWrapColumns: If provided, wraps the query in a "SELECT *,blah + * FROM (actual query)" where blah is your list of outerWrapColumns + * made comma-delimited. The idea is that this allows you to + * reference the result of expressions inside the query using their + * names rather than having to duplicate the logic. In practice, + * this makes things more readable but is unlikely to improve + * performance. (Namely, my use of 'offsets' for full-text stuff + * ends up in the EXPLAIN plan twice despite this.) + * - noDbQueryValidityConstraints: Indicates that any validity constraints + * should be ignored. This should be used when you need to get every + * match regardless of whether it's valid. + * + * @property _owner The query instance that holds the list of unions... + * @property _constraints A list of (lists of OR constraints) that are ANDed + * together. For example [[FROM bob, FROM jim], [DATE last week]] would + * be requesting us to find all the messages from either bob or jim, and + * sent in the last week. + * @property _unions A list of other queries whose results are unioned with our + * own. There is no concept of nesting or sub-queries apart from this + * mechanism. + */ +function GlodaQueryClass(aOptions) { + this.options = aOptions != null ? aOptions : {}; + + // if we are an 'or' clause, who is our parent whom other 'or' clauses should + // spawn from... + this._owner = null; + // our personal chain of and-ing. + this._constraints = []; + // the other instances we union with + this._unions = []; + + this._order = []; + this._limit = 0; +} + +GlodaQueryClass.prototype = { + WILDCARD: {}, + + get constraintCount() { + return this._constraints.length; + }, + + or() { + let owner = this._owner || this; + let orQuery = new this._queryClass(); + orQuery._owner = owner; + owner._unions.push(orQuery); + return orQuery; + }, + + orderBy(...aArgs) { + this._order.push(...aArgs); + return this; + }, + + limit(aLimit) { + this._limit = aLimit; + return this; + }, + + /** + * Return a collection asynchronously populated by this collection. You must + * provide a listener to receive notifications from the collection as it + * receives updates. The listener object should implement onItemsAdded, + * onItemsModified, and onItemsRemoved methods, all of which take a single + * argument which is the list of items which have been added, modified, or + * removed respectively. + * + * @param aListener The collection listener. + * @param [aData] The data attribute to set on the collection. + * @param [aArgs.becomeExplicit] Make the collection explicit so that the + * collection will only ever contain results found from the database + * query and the query will not be updated as new items are indexed that + * also match the query. + * @param [aArgs.becomeNull] Change the collection's query to a null query so + * that it will never receive any additional added/modified/removed events + * apart from the underlying database query. This is really only intended + * for gloda internal use but may be acceptable for non-gloda use. Please + * ask on mozilla.dev.apps.thunderbird first to make sure there isn't a + * better solution for your use-case. (Note: removals will still happen + * when things get fully deleted.) + */ + getCollection(aListener, aData, aArgs) { + this.completed = false; + return this._nounDef.datastore.queryFromQuery( + this, + aListener, + aData, + /* aExistingCollection */ null, + /* aMasterCollection */ null, + aArgs + ); + }, + + /* eslint-disable complexity */ + /** + * Test whether the given first-class noun instance satisfies this query. + * + * @testpoint gloda.query.test + */ + test(aObj) { + // when changing this method, be sure that GlodaDatastore's queryFromQuery + // method likewise has any required changes made. + let unionQueries = [this].concat(this._unions); + + for (let iUnion = 0; iUnion < unionQueries.length; iUnion++) { + let curQuery = unionQueries[iUnion]; + + // assume success until a specific (or) constraint proves us wrong + let querySatisfied = true; + for ( + let iConstraint = 0; + iConstraint < curQuery._constraints.length; + iConstraint++ + ) { + let constraint = curQuery._constraints[iConstraint]; + let [constraintType, attrDef] = constraint; + let boundName = attrDef ? attrDef.boundName : "id"; + if ( + boundName in aObj && + aObj[boundName] === GlodaConstants.IGNORE_FACET + ) { + querySatisfied = false; + break; + } + + let constraintValues = constraint.slice(2); + + if (constraintType === GlodaConstants.kConstraintIdIn) { + if (!constraintValues.includes(aObj.id)) { + querySatisfied = false; + break; + } + } else if ( + constraintType === GlodaConstants.kConstraintIn || + constraintType === GlodaConstants.kConstraintEquals + ) { + // @testpoint gloda.query.test.kConstraintIn + let objectNounDef = attrDef.objectNounDef; + + // if they provide an equals comparator, use that. + // (note: the next case has better optimization possibilities than + // this mechanism, but of course has higher initialization costs or + // code complexity costs...) + if (objectNounDef.equals) { + let testValues; + if (!(boundName in aObj)) { + testValues = []; + } else if (attrDef.singular) { + testValues = [aObj[boundName]]; + } else { + testValues = aObj[boundName]; + } + + // If there are no constraints, then we are just testing for there + // being a value. Succeed (continue) in that case. + if ( + constraintValues.length == 0 && + testValues.length && + testValues[0] != null + ) { + continue; + } + + // If there are no test values and the empty set is significant, + // then check if any of the constraint values are null (our + // empty indicator.) + if (testValues.length == 0 && attrDef.emptySetIsSignificant) { + let foundEmptySetSignifier = false; + for (let constraintValue of constraintValues) { + if (constraintValue == null) { + foundEmptySetSignifier = true; + break; + } + } + if (foundEmptySetSignifier) { + continue; + } + } + + let foundMatch = false; + for (let testValue of testValues) { + for (let value of constraintValues) { + if (objectNounDef.equals(testValue, value)) { + foundMatch = true; + break; + } + } + if (foundMatch) { + break; + } + } + if (!foundMatch) { + querySatisfied = false; + break; + } + } else { + // otherwise, we need to convert everyone to their param/value form + // in order to test for equality + // let's just do the simple, obvious thing for now. which is + // what we did in the prior case but exploding values using + // toParamAndValue, and then comparing. + let testValues; + if (!(boundName in aObj)) { + testValues = []; + } else if (attrDef.singular) { + testValues = [aObj[boundName]]; + } else { + testValues = aObj[boundName]; + } + + // If there are no constraints, then we are just testing for there + // being a value. Succeed (continue) in that case. + if ( + constraintValues.length == 0 && + testValues.length && + testValues[0] != null + ) { + continue; + } + // If there are no test values and the empty set is significant, + // then check if any of the constraint values are null (our + // empty indicator.) + if (testValues.length == 0 && attrDef.emptySetIsSignificant) { + let foundEmptySetSignifier = false; + for (let constraintValue of constraintValues) { + if (constraintValue == null) { + foundEmptySetSignifier = true; + break; + } + } + if (foundEmptySetSignifier) { + continue; + } + } + + let foundMatch = false; + for (let testValue of testValues) { + let [aParam, aValue] = objectNounDef.toParamAndValue(testValue); + for (let value of constraintValues) { + // skip empty set check sentinel values + if (value == null && attrDef.emptySetIsSignificant) { + continue; + } + let [bParam, bValue] = objectNounDef.toParamAndValue(value); + if (aParam == bParam && aValue == bValue) { + foundMatch = true; + break; + } + } + if (foundMatch) { + break; + } + } + if (!foundMatch) { + querySatisfied = false; + break; + } + } + } else if (constraintType === GlodaConstants.kConstraintRanges) { + // @testpoint gloda.query.test.kConstraintRanges + let objectNounDef = attrDef.objectNounDef; + + let testValues; + if (!(boundName in aObj)) { + testValues = []; + } else if (attrDef.singular) { + testValues = [aObj[boundName]]; + } else { + testValues = aObj[boundName]; + } + + let foundMatch = false; + for (let testValue of testValues) { + let [tParam, tValue] = objectNounDef.toParamAndValue(testValue); + for (let rangeTuple of constraintValues) { + let [lowerRValue, upperRValue] = rangeTuple; + if (lowerRValue == null) { + let [upperParam, upperValue] = + objectNounDef.toParamAndValue(upperRValue); + if (tParam == upperParam && tValue <= upperValue) { + foundMatch = true; + break; + } + } else if (upperRValue == null) { + let [lowerParam, lowerValue] = + objectNounDef.toParamAndValue(lowerRValue); + if (tParam == lowerParam && tValue >= lowerValue) { + foundMatch = true; + break; + } + } else { + // no one is null + let [upperParam, upperValue] = + objectNounDef.toParamAndValue(upperRValue); + let [lowerParam, lowerValue] = + objectNounDef.toParamAndValue(lowerRValue); + if ( + tParam == lowerParam && + tValue >= lowerValue && + tParam == upperParam && + tValue <= upperValue + ) { + foundMatch = true; + break; + } + } + } + if (foundMatch) { + break; + } + } + if (!foundMatch) { + querySatisfied = false; + break; + } + } else if (constraintType === GlodaConstants.kConstraintStringLike) { + // @testpoint gloda.query.test.kConstraintStringLike + let curIndex = 0; + let value = boundName in aObj ? aObj[boundName] : ""; + // the attribute must be singular, we don't support arrays of strings. + for (let valuePart of constraintValues) { + if (typeof valuePart == "string") { + let index = value.indexOf(valuePart); + // if curIndex is null, we just need any match + // if it's not null, it must match the offset of our found match + if (curIndex === null) { + if (index == -1) { + querySatisfied = false; + } else { + curIndex = index + valuePart.length; + } + } else if (index != curIndex) { + querySatisfied = false; + } else { + curIndex = index + valuePart.length; + } + if (!querySatisfied) { + break; + } + } else { + // wild! + curIndex = null; + } + } + // curIndex must be null or equal to the length of the string + if (querySatisfied && curIndex !== null && curIndex != value.length) { + querySatisfied = false; + } + } else if (constraintType === GlodaConstants.kConstraintFulltext) { + // @testpoint gloda.query.test.kConstraintFulltext + // this is beyond our powers. Even if we have the fulltext content in + // memory, which we may not, the tokenization and such to perform + // the testing gets very complicated in the face of i18n, etc. + // so, let's fail if the item is not already in the collection, and + // let the testing continue if it is. (some other constraint may no + // longer apply...) + if (!(aObj.id in this.collection._idMap)) { + querySatisfied = false; + } + } + + if (!querySatisfied) { + break; + } + } + + if (querySatisfied) { + return true; + } + } + return false; + }, + /* eslint-enable complexity */ + + /** + * Helper code for noun definitions of queryHelpers that want to build a + * traditional in/equals constraint. The goal is to let them build a range + * without having to know how we structure |_constraints|. + * + * @protected + */ + _inConstraintHelper(aAttrDef, aValues) { + let constraint = [GlodaConstants.kConstraintIn, aAttrDef].concat(aValues); + this._constraints.push(constraint); + return this; + }, + + /** + * Helper code for noun definitions of queryHelpers that want to build a + * range. The goal is to let them build a range without having to know how + * we structure |_constraints| or requiring them to mark themselves as + * continuous to get a "Range". + * + * @protected + */ + _rangedConstraintHelper(aAttrDef, aRanges) { + let constraint = [GlodaConstants.kConstraintRanges, aAttrDef].concat( + aRanges + ); + this._constraints.push(constraint); + return this; + }, +}; + +/** + * @class A query that never matches anything. + * + * Collections corresponding to this query are intentionally frozen in time and + * do not want to be notified of any updates. We need the collection to be + * registered with the collection manager so that the noun instances in the + * collection are always 'reachable' via the collection for as long as we might + * be handing out references to the instances. (The other way to avoid updates + * would be to not register the collection, but then items might not be + * reachable.) + * This is intended to be used in implementation details behind the gloda + * abstraction barrier. For example, the message indexer likes to be able + * to represent 'ghost' and deleted messages, but these should never be exposed + * to the user. For code simplicity, it wants to be able to use the query + * mechanism. But it doesn't want updates that are effectively + * nonsensical. For example, a ghost message that is reused by message + * indexing may already be present in a collection; when the collection manager + * receives an itemsAdded event, a GlodaExplicitQueryClass would result in + * an item added notification in that case, which would wildly not be desired. + */ +function GlodaNullQueryClass() {} + +GlodaNullQueryClass.prototype = { + /** + * No options; they are currently only needed for SQL query generation, which + * does not happen for null queries. + */ + options: {}, + + /** + * Provide a duck-typing way of indicating to GlodaCollectionManager that our + * associated collection just doesn't want anything to change. Our test + * function is able to convey most of it, but special-casing has to happen + * somewhere, so it happens here. + */ + frozen: true, + + /** + * Since our query never matches anything, it doesn't make sense to let + * someone attempt to construct a boolean OR involving us. + * + * @returns null + */ + or() { + return null; + }, + + /** + * Return nothing (null) because it does not make sense to create a collection + * based on a null query. This method is normally used (on a normal query) + * to return a collection populated by the constraints of the query. We + * match nothing, so we should return nothing. More importantly, you are + * currently doing something wrong if you try and do this, so null is + * appropriate. It may turn out that it makes sense for us to return an + * empty collection in the future for sentinel value purposes, but we'll + * cross that bridge when we come to it. + * + * @returns null + */ + getCollection() { + return null; + }, + + /** + * Never matches anything. + * + * @param aObj The object someone wants us to test for relevance to our + * associated collection. But we don't care! Not a fig! + * @returns false + */ + test(aObj) { + return false; + }, +}; + +/** + * @class A query that only 'tests' for already belonging to the collection. + * + * This type of collection is useful for when you (or rather your listener) + * are interested in hearing about modifications to your collection or removals + * from your collection because of deletion, but do not want to be notified + * about newly indexed items matching your normal query constraints. + * + * @param aCollection The collection this query belongs to. This needs to be + * passed-in here or the collection should set the attribute directly when + * the query is passed in to a collection's constructor. + */ +function GlodaExplicitQueryClass(aCollection) { + this.collection = aCollection; +} + +GlodaExplicitQueryClass.prototype = { + /** + * No options; they are currently only needed for SQL query generation, which + * does not happen for explicit queries. + */ + options: {}, + + /** + * Since our query is intended to only match the contents of our collection, + * it doesn't make sense to let someone attempt to construct a boolean OR + * involving us. + * + * @returns null + */ + or() { + return null; + }, + + /** + * Return nothing (null) because it does not make sense to create a collection + * based on an explicit query. This method is normally used (on a normal + * query) to return a collection populated by the constraints of the query. + * In the case of an explicit query, we expect it will be associated with + * either a hand-created collection or the results of a normal query that is + * immediately converted into an explicit query. In all likelihood, calling + * this method on an instance of this type is an error, so it is helpful to + * return null because people will error hard. + * + * @returns null + */ + getCollection() { + return null; + }, + + /** + * Matches only items that are already in the collection associated with this + * query (by id). + * + * @param aObj The object/item to test for already being in the associated + * collection. + * @returns true when the object is in the associated collection, otherwise + * false. + */ + test(aObj) { + return aObj.id in this.collection._idMap; + }, +}; + +/** + * @class A query that 'tests' true for everything. Intended for debugging purposes + * only. + */ +function GlodaWildcardQueryClass() {} + +GlodaWildcardQueryClass.prototype = { + /** + * No options; they are currently only needed for SQL query generation. + */ + options: {}, + + // don't let people try and mess with us + or() { + return null; + }, + // don't let people try and query on us (until we have a real use case for + // that...) + getCollection() { + return null; + }, + /** + * Everybody wins! + */ + test(aObj) { + return true; + }, +}; + +/** + * Factory method to effectively create per-noun subclasses of GlodaQueryClass, + * GlodaNullQueryClass, GlodaExplicitQueryClass, and GlodaWildcardQueryClass. + * For GlodaQueryClass this allows us to add per-noun helpers. For the others, + * this is merely a means of allowing us to attach the (per-noun) nounDef to + * the 'class'. + */ +function GlodaQueryClassFactory(aNounDef) { + let newQueryClass = function (aOptions) { + GlodaQueryClass.call(this, aOptions); + }; + newQueryClass.prototype = new GlodaQueryClass(); + newQueryClass.prototype._queryClass = newQueryClass; + newQueryClass.prototype._nounDef = aNounDef; + + let newNullClass = function (aCollection) { + GlodaNullQueryClass.call(this); + this.collection = aCollection; + }; + newNullClass.prototype = new GlodaNullQueryClass(); + newNullClass.prototype._queryClass = newNullClass; + newNullClass.prototype._nounDef = aNounDef; + + let newExplicitClass = function (aCollection) { + GlodaExplicitQueryClass.call(this); + this.collection = aCollection; + }; + newExplicitClass.prototype = new GlodaExplicitQueryClass(); + newExplicitClass.prototype._queryClass = newExplicitClass; + newExplicitClass.prototype._nounDef = aNounDef; + + let newWildcardClass = function (aCollection) { + GlodaWildcardQueryClass.call(this); + this.collection = aCollection; + }; + newWildcardClass.prototype = new GlodaWildcardQueryClass(); + newWildcardClass.prototype._queryClass = newWildcardClass; + newWildcardClass.prototype._nounDef = aNounDef; + + return [newQueryClass, newNullClass, newExplicitClass, newWildcardClass]; +} diff --git a/comm/mailnews/db/gloda/modules/GlodaSyntheticView.jsm b/comm/mailnews/db/gloda/modules/GlodaSyntheticView.jsm new file mode 100644 index 0000000000..2e0fb7b5be --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaSyntheticView.jsm @@ -0,0 +1,175 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file is charged with providing you a way to have a pretty gloda-backed + * nsIMsgDBView. + */ + +const EXPORTED_SYMBOLS = ["GlodaSyntheticView"]; + +/** + * Create a synthetic view suitable for passing to |FolderDisplayWidget.show|. + * You must pass a query, collection, or conversation in. + * + * @param {GlodaQuery} [aArgs.query] A gloda query to run. + * @param {GlodaCollection} [aArgs.collection] An already-populated collection + * to display. Do not call getCollection on a query and hand us that. We + * will not register ourselves as a listener and things will not work. + * @param {GlodaConversation} [aArgs.conversation] A conversation whose messages + * you want to display. + */ +function GlodaSyntheticView(aArgs) { + if ("query" in aArgs) { + this.query = aArgs.query; + this.collection = this.query.getCollection(this); + this.completed = false; + this.viewType = "global"; + } else if ("collection" in aArgs) { + this.query = null; + this.collection = aArgs.collection; + this.completed = true; + this.viewType = "global"; + } else if ("conversation" in aArgs) { + this.collection = aArgs.conversation.getMessagesCollection(this); + this.query = this.collection.query; + this.completed = false; + this.viewType = "conversation"; + this.selectedMessage = aArgs.message.folderMessage; + } else { + throw new Error("You need to pass a query or collection"); + } + + this.customColumns = []; +} +GlodaSyntheticView.prototype = { + defaultSort: [ + [Ci.nsMsgViewSortType.byDate, Ci.nsMsgViewSortOrder.descending], + ], + + /** + * Request the search be performed and notification provided to + * aSearchListener. If results are already available, they should + * be provided to aSearchListener without re-performing the search. + */ + search(aSearchListener, aCompletionCallback) { + this.searchListener = aSearchListener; + this.completionCallback = aCompletionCallback; + + this.searchListener.onNewSearch(); + if (this.completed) { + this.reportResults(this.collection.items); + // we're not really aborting, but it closes things out nicely + this.abortSearch(); + } + }, + + abortSearch() { + if (this.searchListener) { + this.searchListener.onSearchDone(Cr.NS_OK); + } + if (this.completionCallback) { + this.completionCallback(); + } + this.searchListener = null; + this.completionCallback = null; + }, + + reportResults(aItems) { + for (let item of aItems) { + let hdr = item.folderMessage; + if (hdr) { + this.searchListener.onSearchHit(hdr, hdr.folder); + } + } + }, + + /** + * Helper function used by |DBViewWrapper.getMsgHdrForMessageID| since there + * are no actual backing folders for it to check. + */ + getMsgHdrForMessageID(aMessageId) { + for (let item of this.collection.items) { + if (item.headerMessageID == aMessageId) { + let hdr = item.folderMessage; + if (hdr) { + return hdr; + } + } + } + return null; + }, + + /** + * The default set of columns to show. + */ + DEFAULT_COLUMN_STATES: { + threadCol: { + visible: true, + }, + flaggedCol: { + visible: true, + }, + subjectCol: { + visible: true, + }, + correspondentCol: { + visible: Services.prefs.getBoolPref("mail.threadpane.use_correspondents"), + }, + senderCol: { + visible: !Services.prefs.getBoolPref( + "mail.threadpane.use_correspondents" + ), + }, + dateCol: { + visible: true, + }, + locationCol: { + visible: true, + }, + }, + + // --- settings persistence + getPersistedSetting(aSetting) { + try { + return JSON.parse( + Services.prefs.getCharPref( + "mailnews.database.global.views." + this.viewType + "." + aSetting + ) + ); + } catch (e) { + return this.getDefaultSetting(aSetting); + } + }, + setPersistedSetting(aSetting, aValue) { + Services.prefs.setCharPref( + "mailnews.database.global.views." + this.viewType + "." + aSetting, + JSON.stringify(aValue) + ); + }, + getDefaultSetting(aSetting) { + if (aSetting == "columns") { + return this.DEFAULT_COLUMN_STATES; + } + return undefined; + }, + + // --- collection listener + onItemsAdded(aItems, aCollection) { + if (this.searchListener) { + this.reportResults(aItems); + } + }, + onItemsModified(aItems, aCollection) {}, + onItemsRemoved(aItems, aCollection) {}, + onQueryCompleted(aCollection) { + this.completed = true; + if (this.searchListener) { + this.searchListener.onSearchDone(Cr.NS_OK); + } + if (this.completionCallback) { + this.completionCallback(); + } + }, +}; diff --git a/comm/mailnews/db/gloda/modules/GlodaUtils.jsm b/comm/mailnews/db/gloda/modules/GlodaUtils.jsm new file mode 100644 index 0000000000..a2b7fe4174 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/GlodaUtils.jsm @@ -0,0 +1,84 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["GlodaUtils"]; + +const { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); + +/** + * @namespace A holding place for logic that is not gloda-specific and should + * reside elsewhere. + */ +var GlodaUtils = { + /** + * This Regexp is super-complicated and used at least in two different parts of + * the code, so let's expose it from one single location. + */ + PART_RE: new RegExp( + "^[^?]+\\?(?:/;section=\\d+\\?)?(?:[^&]+&)*part=([^&]+)(?:&[^&]+)*$" + ), + + deMime(aString) { + return MailServices.mimeConverter.decodeMimeHeader( + aString, + null, + false, + true + ); + }, + + _headerParser: MailServices.headerParser, + + /** + * Parses an RFC 2822 list of e-mail addresses and returns an object with + * 4 attributes, as described below. We will use the example of the user + * passing an argument of '"Bob Smith" <bob@example.com>'. + * + * This method (by way of nsIMsgHeaderParser) takes care of decoding mime + * headers, but is not aware of folder-level character set overrides. + * + * count: the number of addresses parsed. (ex: 1) + * addresses: a list of e-mail addresses (ex: ["bob@example.com"]) + * names: a list of names (ex: ["Bob Smith"]) + * fullAddresses: aka the list of name and e-mail together (ex: ['"Bob Smith" + * <bob@example.com>']). + * + * This method is a convenience wrapper around nsIMsgHeaderParser. + */ + parseMailAddresses(aMailAddresses) { + let addresses = this._headerParser.parseEncodedHeader(aMailAddresses); + return { + names: addresses.map(a => a.name || null), + addresses: addresses.map(a => a.email), + fullAddresses: addresses.map(a => a.toString()), + count: addresses.length, + }; + }, + + /** + * MD5 hash a string and return the hex-string result. Impl from nsICryptoHash + * docs. + */ + md5HashString(aString) { + let data = [...new TextEncoder().encode(aString)]; + + let hasher = Cc["@mozilla.org/security/hash;1"].createInstance( + Ci.nsICryptoHash + ); + hasher.init(Ci.nsICryptoHash.MD5); + hasher.update(data, data.length); + let hash = hasher.finish(false); + + // return the two-digit hexadecimal code for a byte + function toHexString(charCode) { + return ("0" + charCode.toString(16)).slice(-2); + } + + // convert the binary hash data to a hex string. + let hex = Object.keys(hash).map(i => toHexString(hash.charCodeAt(i))); + return hex.join(""); + }, +}; diff --git a/comm/mailnews/db/gloda/modules/IndexMsg.jsm b/comm/mailnews/db/gloda/modules/IndexMsg.jsm new file mode 100644 index 0000000000..9a4add589e --- /dev/null +++ b/comm/mailnews/db/gloda/modules/IndexMsg.jsm @@ -0,0 +1,3464 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +/* + * This file currently contains a fairly general implementation of asynchronous + * indexing with a very explicit message indexing implementation. As gloda + * will eventually want to index more than just messages, the message-specific + * things should ideally lose their special hold on this file. This will + * benefit readability/size as well. + */ + +const EXPORTED_SYMBOLS = ["GlodaMsgIndexer"]; + +const { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); +const { GlodaDatastore } = ChromeUtils.import( + "resource:///modules/gloda/GlodaDatastore.jsm" +); +const { GlodaContact, GlodaFolder } = ChromeUtils.import( + "resource:///modules/gloda/GlodaDataModel.jsm" +); +const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm"); +const { GlodaCollectionManager } = ChromeUtils.import( + "resource:///modules/gloda/Collection.jsm" +); +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); +const { GlodaIndexer, IndexingJob } = ChromeUtils.import( + "resource:///modules/gloda/GlodaIndexer.jsm" +); +const { MsgHdrToMimeMessage } = ChromeUtils.import( + "resource:///modules/gloda/MimeMessage.jsm" +); + +const lazy = {}; +ChromeUtils.defineModuleGetter( + lazy, + "MailUtils", + "resource:///modules/MailUtils.jsm" +); + +// Cr does not have mailnews error codes! +var NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE = 0x80550005; + +var GLODA_MESSAGE_ID_PROPERTY = "gloda-id"; +/** + * Message header property to track dirty status; one of + * |GlodaIndexer.kMessageClean|, |GlodaIndexer.kMessageDirty|, + * |GlodaIndexer.kMessageFilthy|. + */ +var GLODA_DIRTY_PROPERTY = "gloda-dirty"; + +/** + * The sentinel GLODA_MESSAGE_ID_PROPERTY value indicating that a message fails + * to index and we should not bother trying again, at least not until a new + * release is made. + * + * This should ideally just flip between 1 and 2, with GLODA_OLD_BAD_MESSAGE_ID + * flipping in the other direction. If we start having more trailing badness, + * _indexerGetEnumerator and GLODA_OLD_BAD_MESSAGE_ID will need to be altered. + * + * When flipping this, be sure to update glodaTestHelper.js's copy. + */ +var GLODA_BAD_MESSAGE_ID = 2; +/** + * The gloda id we used to use to mark messages as bad, but now should be + * treated as eligible for indexing. This is only ever used for consideration + * when creating msg header enumerators with `_indexerGetEnumerator` which + * means we only will re-index such messages in an indexing sweep. Accordingly + * event-driven indexing will still treat such messages as unindexed (and + * unindexable) until an indexing sweep picks them up. + */ +var GLODA_OLD_BAD_MESSAGE_ID = 1; +var GLODA_FIRST_VALID_MESSAGE_ID = 32; + +var JUNK_SCORE_PROPERTY = "junkscore"; +var JUNK_SPAM_SCORE_STR = Ci.nsIJunkMailPlugin.IS_SPAM_SCORE.toString(); + +/** + * The processing flags that tell us that a message header has not yet been + * reported to us via msgsClassified. If it has one of these flags, it is + * still being processed. + */ +var NOT_YET_REPORTED_PROCESSING_FLAGS = + Ci.nsMsgProcessingFlags.NotReportedClassified | + Ci.nsMsgProcessingFlags.ClassifyJunk; + +// for list comprehension fun +function* range(begin, end) { + for (let i = begin; i < end; ++i) { + yield i; + } +} + +/** + * We do not set properties on the messages until we perform a DB commit; this + * helper class tracks messages that we have indexed but are not yet marked + * as such on their header. + */ +var PendingCommitTracker = { + /** + * Maps message URIs to their gloda ids. + * + * I am not entirely sure why I chose the URI for the key rather than + * gloda folder ID + message key. Most likely it was to simplify debugging + * since the gloda folder ID is opaque while the URI is very informative. It + * is also possible I was afraid of IMAP folder renaming triggering a UID + * renumbering? + */ + _indexedMessagesPendingCommitByKey: {}, + /** + * Map from the pending commit gloda id to a tuple of [the corresponding + * message header, dirtyState]. + */ + _indexedMessagesPendingCommitByGlodaId: {}, + /** + * Do we have a post-commit handler registered with this transaction yet? + */ + _pendingCommit: false, + + /** + * The function gets called when the commit actually happens to flush our + * message id's. + * + * It is very possible that by the time this call happens we have left the + * folder and nulled out msgDatabase on the folder. Since nulling it out + * is what causes the commit, if we set the headers here without somehow + * forcing a commit, we will lose. Badly. + * Accordingly, we make a list of all the folders that the headers belong to + * as we iterate, make sure to re-attach their msgDatabase before forgetting + * the headers, then make sure to zero the msgDatabase again, triggering a + * commit. If there were a way to directly get the nsIMsgDatabase from the + * header we could do that and call commit directly. We don't track + * databases along with the headers since the headers can change because of + * moves and that would increase the number of moving parts. + */ + _commitCallback() { + let foldersByURI = {}; + let lastFolder = null; + + for (let glodaId in PendingCommitTracker._indexedMessagesPendingCommitByGlodaId) { + let [msgHdr, dirtyState] = + PendingCommitTracker._indexedMessagesPendingCommitByGlodaId[glodaId]; + // Mark this message as indexed. + // It's conceivable the database could have gotten blown away, in which + // case the message headers are going to throw exceptions when we try + // and touch them. So we wrap this in a try block that complains about + // this unforeseen circumstance. (noteFolderDatabaseGettingBlownAway + // should have been called and avoided this situation in all known + // situations.) + try { + let curGlodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); + if (curGlodaId != glodaId) { + msgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, glodaId); + } + let headerDirty = msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); + if (headerDirty != dirtyState) { + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, dirtyState); + } + + // Make sure this folder is in our foldersByURI map. + if (lastFolder == msgHdr.folder) { + continue; + } + lastFolder = msgHdr.folder; + let folderURI = lastFolder.URI; + if (!(folderURI in foldersByURI)) { + foldersByURI[folderURI] = lastFolder; + } + } catch (ex) { + GlodaMsgIndexer._log.error( + "Exception while attempting to mark message with gloda state after" + + "db commit", + ex + ); + } + } + + // it is vitally important to do this before we forget about the headers! + for (let uri in foldersByURI) { + let folder = foldersByURI[uri]; + // This will not cause a parse. The database is in-memory since we have + // a header that belongs to it. This just causes the folder to + // re-acquire a reference from the database manager. + folder.msgDatabase; + // And this will cause a commit. (And must be done since we don't want + // to cause a leak.) + folder.msgDatabase = null; + } + + PendingCommitTracker._indexedMessagesPendingCommitByGlodaId = {}; + PendingCommitTracker._indexedMessagesPendingCommitByKey = {}; + + PendingCommitTracker._pendingCommit = false; + }, + + /** + * Track a message header that should be marked with the given gloda id when + * the database commits. + */ + track(aMsgHdr, aGlodaId) { + let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; + this._indexedMessagesPendingCommitByKey[pendingKey] = aGlodaId; + this._indexedMessagesPendingCommitByGlodaId[aGlodaId] = [ + aMsgHdr, + GlodaMsgIndexer.kMessageClean, + ]; + + if (!this._pendingCommit) { + GlodaDatastore.runPostCommit(this._commitCallback); + this._pendingCommit = true; + } + }, + + /** + * Get the current state of a message header given that we cannot rely on just + * looking at the header's properties because we defer setting those + * until the SQLite commit happens. + * + * @returns Tuple of [gloda id, dirty status]. + */ + getGlodaState(aMsgHdr) { + // If it's in the pending commit table, then the message is basically + // clean. Return that info. + let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; + if (pendingKey in this._indexedMessagesPendingCommitByKey) { + let glodaId = + PendingCommitTracker._indexedMessagesPendingCommitByKey[pendingKey]; + return [glodaId, this._indexedMessagesPendingCommitByGlodaId[glodaId][1]]; + } + + // Otherwise the header's concept of state is correct. + let glodaId = aMsgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); + let glodaDirty = aMsgHdr.getUint32Property(GLODA_DIRTY_PROPERTY); + return [glodaId, glodaDirty]; + }, + + /** + * Update our structure to reflect moved headers. Moves are currently + * treated as weakly interesting and do not require a reindexing + * although collections will get notified. So our job is to to fix-up + * the pending commit information if the message has a pending commit. + */ + noteMove(aOldHdr, aNewHdr) { + let oldKey = aOldHdr.folder.URI + "#" + aOldHdr.messageKey; + if (!(oldKey in this._indexedMessagesPendingCommitByKey)) { + return; + } + + let glodaId = this._indexedMessagesPendingCommitByKey[oldKey]; + delete this._indexedMessagesPendingCommitByKey[oldKey]; + + let newKey = aNewHdr.folder.URI + "#" + aNewHdr.messageKey; + this._indexedMessagesPendingCommitByKey[newKey] = glodaId; + + // only clobber the header, not the dirty state + this._indexedMessagesPendingCommitByGlodaId[glodaId][0] = aNewHdr; + }, + + /** + * A blind move is one where we have the source header but not the destination + * header. This happens for IMAP messages that do not involve offline fake + * headers. + * XXX Since IMAP moves will propagate the gloda-id/gloda-dirty bits for us, + * we could detect the other side of the move when it shows up as a + * msgsClassified event and restore the mapping information. Since the + * offline fake header case should now cover the bulk of IMAP move + * operations, we probably do not need to pursue this. + * + * We just re-dispatch to noteDirtyHeader because we can't do anything more + * clever. + */ + noteBlindMove(aOldHdr) { + this.noteDirtyHeader(aOldHdr); + }, + + /** + * If a message is dirty we should stop tracking it for post-commit + * purposes. This is not because we don't want to write to its header + * when we commit as much as that we want to avoid |getHeaderGlodaState| + * reporting that the message is clean. We could complicate our state + * by storing that information, but this is easier and ends up the same + * in the end. + */ + noteDirtyHeader(aMsgHdr) { + let pendingKey = aMsgHdr.folder.URI + "#" + aMsgHdr.messageKey; + if (!(pendingKey in this._indexedMessagesPendingCommitByKey)) { + return; + } + + // (It is important that we get the gloda id from our own structure!) + let glodaId = this._indexedMessagesPendingCommitByKey[pendingKey]; + this._indexedMessagesPendingCommitByGlodaId[glodaId][1] = + GlodaMsgIndexer.kMessageDirty; + }, + + /** + * Sometimes a folder database gets blown away. This happens for one of two + * expected reasons right now: + * - Folder compaction. + * - Explicit reindexing of a folder via the folder properties "rebuild index" + * button. + * + * When this happens, we are basically out of luck and need to discard + * everything about the folder. The good news is that the folder compaction + * pass is clever enough to re-establish the linkages that are being lost + * when we drop these things on the floor. Reindexing of a folder is not + * clever enough to deal with this but is an exceptional case of last resort + * (the user should not normally be performing a reindex as part of daily + * operation), so we accept that messages may be redundantly indexed. + */ + noteFolderDatabaseGettingBlownAway(aMsgFolder) { + let uri = aMsgFolder.URI + "#"; + for (let key of Object.keys(this._indexedMessagesPendingCommitByKey)) { + // this is not as efficient as it could be, but compaction is relatively + // rare and the number of pending headers is generally going to be + // small. + if (key.indexOf(uri) == 0) { + delete this._indexedMessagesPendingCommitByKey[key]; + } + } + }, +}; + +/** + * This callback handles processing the asynchronous query results of + * |GlodaMsgIndexer.getMessagesByMessageID|. + */ +function MessagesByMessageIdCallback( + aMsgIDToIndex, + aResults, + aCallback, + aCallbackThis +) { + this.msgIDToIndex = aMsgIDToIndex; + this.results = aResults; + this.callback = aCallback; + this.callbackThis = aCallbackThis; +} + +MessagesByMessageIdCallback.prototype = { + _log: console.createInstance({ + prefix: "gloda.index_msg.mbm", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }), + + onItemsAdded(aItems, aCollection) { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) { + return; + } + + this._log.debug("getting results..."); + for (let message of aItems) { + this.results[this.msgIDToIndex[message.headerMessageID]].push(message); + } + }, + onItemsModified() {}, + onItemsRemoved() {}, + onQueryCompleted(aCollection) { + // just outright bail if we are shutdown + if (GlodaDatastore.datastoreIsShutdown) { + return; + } + + this._log.debug("query completed, notifying... " + this.results); + + this.callback.call(this.callbackThis, this.results); + }, +}; + +/** + * The message indexer! + * + * === Message Indexing Strategy + * To these ends, we implement things like so: + * + * Message State Tracking + * - We store a property on all indexed headers indicating their gloda message + * id. This allows us to tell whether a message is indexed from the header, + * without having to consult the SQL database. + * - When we receive an event that indicates that a message's meta-data has + * changed and gloda needs to re-index the message, we set a property on the + * header that indicates the message is dirty. This property can indicate + * that the message needs to be re-indexed but the gloda-id is valid (dirty) + * or that the message's gloda-id is invalid (filthy) because the gloda + * database has been blown away. + * - We track whether a folder is up-to-date on our GlodaFolder representation + * using a concept of dirtiness, just like messages. Like messages, a folder + * can be dirty or filthy. A dirty folder has at least one dirty message in + * it which means we should scan the folder. A filthy folder means that + * every message in the folder should be considered filthy. Folders start + * out filthy when Gloda is first told about them indicating we cannot + * trust any of the gloda-id's in the folders. Filthy folders are downgraded + * to dirty folders after we mark all of the headers with gloda-id's filthy. + * + * Indexing Message Control + * - We index the headers of all IMAP messages. We index the bodies of all IMAP + * messages that are offline. We index all local messages. We plan to avoid + * indexing news messages. + * - We would like a way to express desires about indexing that either don't + * confound offline storage with indexing, or actually allow some choice. + * + * Indexing Messages + * - We have two major modes of indexing: sweep and event-driven. When we + * start up we kick off an indexing sweep. We use event-driven indexing + * as we receive events for eligible messages, but if we get too many + * events we start dropping them on the floor and just flag that an indexing + * sweep is required. + * - The sweep initiates folder indexing jobs based on the priorities assigned + * to folders. Folder indexing uses a filtered message enumerator to find + * messages that need to be indexed, minimizing wasteful exposure of message + * headers to XPConnect that we would not end up indexing. + * - For local folders, we use GetDatabaseWithReparse to ensure that the .msf + * file exists. For IMAP folders, we simply use GetDatabase because we know + * the auto-sync logic will make sure that the folder is up-to-date and we + * want to avoid creating problems through use of updateFolder. + * + * Junk Mail + * - We do not index junk. We do not index messages until the junk/non-junk + * determination has been made. If a message gets marked as junk, we act like + * it was deleted. + * - We know when a message is actively queued for junk processing thanks to + * folder processing flags. nsMsgDBFolder::CallFilterPlugins does this + * prior to initiating spam processing. Unfortunately, this method does not + * get called until after we receive the notification about the existence of + * the header. How long after can vary on different factors. The longest + * delay is in the IMAP case where there is a filter that requires the + * message body to be present; the method does not get called until all the + * bodies are downloaded. + * + */ +var GlodaMsgIndexer = { + /** + * A partial attempt to generalize to support multiple databases. Each + * database would have its own datastore would have its own indexer. But + * we rather inter-mingle our use of this field with the singleton global + * GlodaDatastore. + */ + _datastore: GlodaDatastore, + _log: console.createInstance({ + prefix: "gloda.index_msg", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }), + + _junkService: MailServices.junk, + + name: "index_msg", + /** + * Are we enabled, read: are we processing change events? + */ + _enabled: false, + get enabled() { + return this._enabled; + }, + + enable() { + // initialize our listeners' this pointers + this._databaseAnnouncerListener.indexer = this; + this._msgFolderListener.indexer = this; + + // register for: + // - folder loaded events, so we know when getDatabaseWithReparse has + // finished updating the index/what not (if it wasn't immediately + // available) + // - property changes (so we know when a message's read/starred state have + // changed.) + this._folderListener._init(this); + MailServices.mailSession.AddFolderListener( + this._folderListener, + Ci.nsIFolderListener.intPropertyChanged | + Ci.nsIFolderListener.propertyFlagChanged | + Ci.nsIFolderListener.event + ); + + MailServices.mfn.addListener( + this._msgFolderListener, + // note: intentionally no msgAdded or msgUnincorporatedMoved. + Ci.nsIMsgFolderNotificationService.msgsClassified | + Ci.nsIMsgFolderNotificationService.msgsJunkStatusChanged | + Ci.nsIMsgFolderNotificationService.msgsDeleted | + Ci.nsIMsgFolderNotificationService.msgsMoveCopyCompleted | + Ci.nsIMsgFolderNotificationService.msgKeyChanged | + Ci.nsIMsgFolderNotificationService.folderAdded | + Ci.nsIMsgFolderNotificationService.folderDeleted | + Ci.nsIMsgFolderNotificationService.folderMoveCopyCompleted | + Ci.nsIMsgFolderNotificationService.folderRenamed | + Ci.nsIMsgFolderNotificationService.folderCompactStart | + Ci.nsIMsgFolderNotificationService.folderCompactFinish | + Ci.nsIMsgFolderNotificationService.folderReindexTriggered + ); + + this._enabled = true; + + this._considerSchemaMigration(); + + this._log.info("Event-Driven Indexing is now " + this._enabled); + }, + disable() { + // remove FolderLoaded notification listener + MailServices.mailSession.RemoveFolderListener(this._folderListener); + + MailServices.mfn.removeListener(this._msgFolderListener); + + this._indexerLeaveFolder(); // nop if we aren't "in" a folder + + this._enabled = false; + + this._log.info("Event-Driven Indexing is now " + this._enabled); + }, + + /** + * Indicates that we have pending deletions to process, meaning that there + * are gloda message rows flagged for deletion. If this value is a boolean, + * it means the value is known reliably. If this value is null, it means + * that we don't know, likely because we have started up and have not checked + * the database. + */ + pendingDeletions: null, + + /** + * The message (or folder state) is believed up-to-date. + */ + kMessageClean: 0, + /** + * The message (or folder) is known to not be up-to-date. In the case of + * folders, this means that some of the messages in the folder may be dirty. + * However, because of the way our indexing works, it is possible there may + * actually be no dirty messages in a folder. (We attempt to process + * messages in an event-driven fashion for a finite number of messages, but + * because we can quit without completing processing of the queue, we need to + * mark the folder dirty, just-in-case.) (We could do some extra leg-work + * and do a better job of marking the folder clean again.) + */ + kMessageDirty: 1, + /** + * We have not indexed the folder at all, but messages in the folder think + * they are indexed. We downgrade the folder to just kMessageDirty after + * marking all the messages in the folder as dirty. We do this so that if we + * have to stop indexing the folder we can still build on our progress next + * time we enter the folder. + * We mark all folders filthy when (re-)creating the database because there + * may be previous state left over from an earlier database. + */ + kMessageFilthy: 2, + + /** + * A message addition job yet to be (completely) processed. Since message + * addition events come to us one-by-one, in order to aggregate them into a + * job, we need something like this. It's up to the indexing loop to + * decide when to null this out; it can either do it when it first starts + * processing it, or when it has processed the last thing. It's really a + * question of whether we want retrograde motion in the folder progress bar + * or the message progress bar. + */ + _pendingAddJob: null, + + /** + * The number of messages that we should queue for processing before letting + * them fall on the floor and relying on our folder-walking logic to ensure + * that the messages are indexed. + * The reason we allow for queueing messages in an event-driven fashion is + * that once we have reached a steady-state, it is preferable to be able to + * deal with new messages and modified meta-data in a prompt fashion rather + * than having to (potentially) walk every folder in the system just to find + * the message that the user changed the tag on. + */ + _indexMaxEventQueueMessages: 20, + + /** + * Unit testing hook to get us to emit additional logging that verges on + * inane for general usage but is helpful in unit test output to get a lay + * of the land and for paranoia reasons. + */ + _unitTestSuperVerbose: false, + + /** The GlodaFolder corresponding to the folder we are indexing. */ + _indexingGlodaFolder: null, + /** The nsIMsgFolder we are currently indexing. */ + _indexingFolder: null, + /** The nsIMsgDatabase we are currently indexing. */ + _indexingDatabase: null, + /** + * The iterator we are using to iterate over the headers in + * this._indexingDatabase. + */ + _indexingIterator: null, + + /** folder whose entry we are pending on */ + _pendingFolderEntry: null, + + /** + * Async common logic that we want to deal with the given folder ID. Besides + * cutting down on duplicate code, this ensures that we are listening on + * the folder in case it tries to go away when we are using it. + * + * @returns true when the folder was successfully entered, false when we need + * to pend on notification of updating of the folder (due to re-parsing + * or what have you). In the event of an actual problem, an exception + * will escape. + */ + _indexerEnterFolder(aFolderID) { + // leave the folder if we haven't explicitly left it. + if (this._indexingFolder !== null) { + this._indexerLeaveFolder(); + } + + this._indexingGlodaFolder = GlodaDatastore._mapFolderID(aFolderID); + this._indexingFolder = this._indexingGlodaFolder.getXPCOMFolder( + this._indexingGlodaFolder.kActivityIndexing + ); + + if (this._indexingFolder) { + this._log.debug("Entering folder: " + this._indexingFolder.URI); + } + + try { + // The msf may need to be created or otherwise updated for local folders. + // This may require yielding until such time as the msf has been created. + try { + if (this._indexingFolder instanceof Ci.nsIMsgLocalMailFolder) { + this._indexingDatabase = this._indexingFolder.getDatabaseWithReparse( + null, + null + ); + } + // we need do nothing special for IMAP, news, or other + } catch (e) { + // getDatabaseWithReparse can return either NS_ERROR_NOT_INITIALIZED or + // NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE if the net result is that it + // is going to send us a notification when the reparse has completed. + // (note that although internally NS_MSG_ERROR_FOLDER_SUMMARY_MISSING + // might get flung around, it won't make it out to us, and will instead + // be permuted into an NS_ERROR_NOT_INITIALIZED.) + if ( + e.result == Cr.NS_ERROR_NOT_INITIALIZED || + e.result == NS_MSG_ERROR_FOLDER_SUMMARY_OUT_OF_DATE + ) { + // this means that we need to pend on the update; the listener for + // FolderLoaded events will call _indexerCompletePendingFolderEntry. + this._log.debug("Pending on folder load..."); + this._pendingFolderEntry = this._indexingFolder; + return GlodaConstants.kWorkAsync; + } + throw e; + } + // we get an nsIMsgDatabase out of this (unsurprisingly) which + // explicitly inherits from nsIDBChangeAnnouncer, which has the + // addListener call we want. + if (this._indexingDatabase == null) { + this._indexingDatabase = this._indexingFolder.msgDatabase; + } + this._indexingDatabase.addListener(this._databaseAnnouncerListener); + } catch (ex) { + this._log.error( + "Problem entering folder: " + + (this._indexingFolder ? this._indexingFolder.prettyName : "unknown") + + ", skipping. Error was: " + + ex.fileName + + ":" + + ex.lineNumber + + ": " + + ex + ); + this._indexingGlodaFolder.indexing = false; + this._indexingFolder = null; + this._indexingGlodaFolder = null; + this._indexingDatabase = null; + this._indexingEnumerator = null; + + // re-throw, we just wanted to make sure this junk is cleaned up and + // get localized error logging... + throw ex; + } + + return GlodaConstants.kWorkSync; + }, + + /** + * If the folder was still parsing/updating when we tried to enter, then this + * handler will get called by the listener who got the FolderLoaded message. + * All we need to do is get the database reference, register a listener on + * the db, and retrieve an iterator if desired. + */ + _indexerCompletePendingFolderEntry() { + this._indexingDatabase = this._indexingFolder.msgDatabase; + this._indexingDatabase.addListener(this._databaseAnnouncerListener); + this._log.debug("...Folder Loaded!"); + + // the load is no longer pending; we certainly don't want more notifications + this._pendingFolderEntry = null; + // indexerEnterFolder returned kWorkAsync, which means we need to notify + // the callback driver to get things going again. + GlodaIndexer.callbackDriver(); + }, + + /** + * Enumerate all messages in the folder. + */ + kEnumAllMsgs: 0, + /** + * Enumerate messages that look like they need to be indexed. + */ + kEnumMsgsToIndex: 1, + /** + * Enumerate messages that are already indexed. + */ + kEnumIndexedMsgs: 2, + + /** + * Synchronous helper to get an enumerator for the current folder (as found + * in |_indexingFolder|. + * + * @param aEnumKind One of |kEnumAllMsgs|, |kEnumMsgsToIndex|, or + * |kEnumIndexedMsgs|. + * @param [aAllowPreBadIds=false] Only valid for |kEnumIndexedMsgs|, tells us + * that we should treat message with any gloda-id as dirty, not just + * messages that have non-bad message id's. + */ + _indexerGetEnumerator(aEnumKind, aAllowPreBadIds) { + if (aEnumKind == this.kEnumMsgsToIndex) { + // We need to create search terms for messages to index. Messages should + // be indexed if they're indexable (local or offline and not expunged) + // and either: haven't been indexed, are dirty, or are marked with with + // a former GLODA_BAD_MESSAGE_ID that is no longer our bad marker. (Our + // bad marker can change on minor schema revs so that we can try and + // reindex those messages exactly once and without needing to go through + // a pass to mark them as needing one more try.) + // The basic search expression is: + // ((GLODA_MESSAGE_ID_PROPERTY Is 0) || + // (GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID) || + // (GLODA_DIRTY_PROPERTY Isnt 0)) && + // (JUNK_SCORE_PROPERTY Isnt 100) + // If the folder !isLocal we add the terms: + // - if the folder is offline -- && (Status Is nsMsgMessageFlags.Offline) + // - && (Status Isnt nsMsgMessageFlags.Expunged) + + let searchSession = Cc[ + "@mozilla.org/messenger/searchSession;1" + ].createInstance(Ci.nsIMsgSearchSession); + let searchTerms = []; + let isLocal = this._indexingFolder instanceof Ci.nsIMsgLocalMailFolder; + + searchSession.addScopeTerm( + Ci.nsMsgSearchScope.offlineMail, + this._indexingFolder + ); + let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; + let nsMsgSearchOp = Ci.nsMsgSearchOp; + + // first term: (GLODA_MESSAGE_ID_PROPERTY Is 0 + let searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; // actually don't care here + searchTerm.beginsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Is; + let value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = 0; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; + searchTerms.push(searchTerm); + + // second term: || GLODA_MESSAGE_ID_PROPERTY Is GLODA_OLD_BAD_MESSAGE_ID + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; // OR + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Is; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = GLODA_OLD_BAD_MESSAGE_ID; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; + searchTerms.push(searchTerm); + + // third term: || GLODA_DIRTY_PROPERTY Isnt 0 ) + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; + searchTerm.endsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = 0; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; + searchTerms.push(searchTerm); + + // JUNK_SCORE_PROPERTY Isnt 100 + // For symmetry with our event-driven stuff, we just directly deal with + // the header property. + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.attrib = nsMsgSearchAttrib.HdrProperty; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.str = JUNK_SPAM_SCORE_STR; + searchTerm.value = value; + searchTerm.hdrProperty = JUNK_SCORE_PROPERTY; + searchTerms.push(searchTerm); + + if (!isLocal) { + // If the folder is offline, then the message should be too + if (this._indexingFolder.getFlag(Ci.nsMsgFolderFlags.Offline)) { + // third term: && Status Is nsMsgMessageFlags.Offline + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; + searchTerm.op = nsMsgSearchOp.Is; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = Ci.nsMsgMessageFlags.Offline; + searchTerm.value = value; + searchTerms.push(searchTerm); + } + + // fourth term: && Status Isnt nsMsgMessageFlags.Expunged + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.attrib = nsMsgSearchAttrib.MsgStatus; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = Ci.nsMsgMessageFlags.Expunged; + searchTerm.value = value; + searchTerms.push(searchTerm); + } + + this._indexingEnumerator = this._indexingDatabase.getFilterEnumerator( + searchTerms, + true + ); + } else if (aEnumKind == this.kEnumIndexedMsgs) { + // Enumerate only messages that are already indexed. This comes out to: + // ((GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1) && + // (GLODA_DIRTY_PROPERTY Isnt kMessageFilthy)) + // In English, a message is indexed if (by clause): + // 1) The message has a gloda-id and that gloda-id is in the valid range + // (and not in the bad message marker range). + // 2) The message has not been marked filthy (which invalidates the + // gloda-id.) We also assume that the folder would not have been + // entered at all if it was marked filthy. + let searchSession = Cc[ + "@mozilla.org/messenger/searchSession;1" + ].createInstance(Ci.nsIMsgSearchSession); + let searchTerms = []; + + searchSession.addScopeTerm( + Ci.nsMsgSearchScope.offlineMail, + this._indexingFolder + ); + let nsMsgSearchAttrib = Ci.nsMsgSearchAttrib; + let nsMsgSearchOp = Ci.nsMsgSearchOp; + + // first term: (GLODA_MESSAGE_ID_PROPERTY > GLODA_FIRST_VALID_MESSAGE_ID-1 + let searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = false; // actually don't care here + searchTerm.beginsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + // use != 0 if we're allow pre-bad ids. + searchTerm.op = aAllowPreBadIds + ? nsMsgSearchOp.Isnt + : nsMsgSearchOp.IsGreaterThan; + let value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = aAllowPreBadIds ? 0 : GLODA_FIRST_VALID_MESSAGE_ID - 1; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_MESSAGE_ID_PROPERTY; + searchTerms.push(searchTerm); + + // second term: && GLODA_DIRTY_PROPERTY Isnt kMessageFilthy) + searchTerm = searchSession.createTerm(); + searchTerm.booleanAnd = true; + searchTerm.endsGrouping = true; + searchTerm.attrib = nsMsgSearchAttrib.Uint32HdrProperty; + searchTerm.op = nsMsgSearchOp.Isnt; + value = searchTerm.value; + value.attrib = searchTerm.attrib; + value.status = this.kMessageFilthy; + searchTerm.value = value; + searchTerm.hdrProperty = GLODA_DIRTY_PROPERTY; + searchTerms.push(searchTerm); + + // The use-case of already indexed messages does not want them reversed; + // we care about seeing the message keys in order. + this._indexingEnumerator = this._indexingDatabase.getFilterEnumerator( + searchTerms, + false + ); + } else if (aEnumKind == this.kEnumAllMsgs) { + this._indexingEnumerator = + this._indexingDatabase.reverseEnumerateMessages(); + } else { + throw new Error("Unknown enumerator type requested:" + aEnumKind); + } + }, + + _indexerLeaveFolder() { + if (this._indexingFolder !== null) { + if (this._indexingDatabase) { + this._indexingDatabase.commit(Ci.nsMsgDBCommitType.kLargeCommit); + // remove our listener! + this._indexingDatabase.removeListener(this._databaseAnnouncerListener); + } + // let the gloda folder know we are done indexing + this._indexingGlodaFolder.indexing = false; + // null everyone out + this._indexingFolder = null; + this._indexingGlodaFolder = null; + this._indexingDatabase = null; + this._indexingEnumerator = null; + } + }, + + /** + * Event fed to us by our nsIFolderListener when a folder is loaded. We use + * this event to know when a folder we were trying to open to index is + * actually ready to be indexed. (The summary may have not existed, may have + * been out of date, or otherwise.) + * + * @param aFolder An nsIMsgFolder, already QI'd. + */ + _onFolderLoaded(aFolder) { + if ( + this._pendingFolderEntry !== null && + aFolder.URI == this._pendingFolderEntry.URI + ) { + this._indexerCompletePendingFolderEntry(); + } + }, + + // it's a getter so we can reference 'this'. we could memoize. + get workers() { + return [ + [ + "folderSweep", + { + worker: this._worker_indexingSweep, + jobCanceled: this._cleanup_indexingSweep, + cleanup: this._cleanup_indexingSweep, + }, + ], + [ + "folder", + { + worker: this._worker_folderIndex, + recover: this._recover_indexMessage, + cleanup: this._cleanup_indexing, + }, + ], + [ + "folderCompact", + { + worker: this._worker_folderCompactionPass, + // compaction enters the folder so needs to know how to leave + cleanup: this._cleanup_indexing, + }, + ], + [ + "message", + { + worker: this._worker_messageIndex, + onSchedule: this._schedule_messageIndex, + jobCanceled: this._canceled_messageIndex, + recover: this._recover_indexMessage, + cleanup: this._cleanup_indexing, + }, + ], + [ + "delete", + { + worker: this._worker_processDeletes, + }, + ], + + [ + "fixMissingContacts", + { + worker: this._worker_fixMissingContacts, + }, + ], + ]; + }, + + _schemaMigrationInitiated: false, + _considerSchemaMigration() { + if ( + !this._schemaMigrationInitiated && + GlodaDatastore._actualSchemaVersion === 26 + ) { + let job = new IndexingJob("fixMissingContacts", null); + GlodaIndexer.indexJob(job); + this._schemaMigrationInitiated = true; + } + }, + + initialSweep() { + this.indexingSweepNeeded = true; + }, + + _indexingSweepActive: false, + /** + * Indicate that an indexing sweep is desired. We kick-off an indexing + * sweep at start-up and whenever we receive an event-based notification + * that we either can't process as an event or that we normally handle + * during the sweep pass anyways. + */ + set indexingSweepNeeded(aNeeded) { + if (!this._indexingSweepActive && aNeeded) { + let job = new IndexingJob("folderSweep", null); + job.mappedFolders = false; + GlodaIndexer.indexJob(job); + this._indexingSweepActive = true; + } + }, + + /** + * Performs the folder sweep, locating folders that should be indexed, and + * creating a folder indexing job for them, and rescheduling itself for + * execution after that job is completed. Once it indexes all the folders, + * if we believe we have deletions to process (or just don't know), it kicks + * off a deletion processing job. + * + * Folder traversal logic is based off the spotlight/vista indexer code; we + * retrieve the list of servers and folders each time want to find a new + * folder to index. This avoids needing to maintain a perfect model of the + * folder hierarchy at all times. (We may eventually want to do that, but + * this is sufficient and safe for now.) Although our use of dirty flags on + * the folders allows us to avoid tracking the 'last folder' we processed, + * we do so to avoid getting 'trapped' in a folder with a high rate of + * changes. + */ + *_worker_indexingSweep(aJob) { + if (!aJob.mappedFolders) { + // Walk the folders and make sure all the folders we would want to index + // are mapped. Build up a list of GlodaFolders as we go, so that we can + // sort them by their indexing priority. + let foldersToProcess = (aJob.foldersToProcess = []); + + for (let folder of MailServices.accounts.allFolders) { + if (this.shouldIndexFolder(folder)) { + foldersToProcess.push(Gloda.getFolderForFolder(folder)); + } + } + + // sort the folders by priority (descending) + foldersToProcess.sort(function (a, b) { + return b.indexingPriority - a.indexingPriority; + }); + + aJob.mappedFolders = true; + } + + // -- process the folders (in sorted order) + while (aJob.foldersToProcess.length) { + let glodaFolder = aJob.foldersToProcess.shift(); + // ignore folders that: + // - have been deleted out of existence! + // - are not dirty/have not been compacted + // - are actively being compacted + if ( + glodaFolder._deleted || + (!glodaFolder.dirtyStatus && !glodaFolder.compacted) || + glodaFolder.compacting + ) { + continue; + } + + // If the folder is marked as compacted, give it a compaction job. + if (glodaFolder.compacted) { + GlodaIndexer.indexJob(new IndexingJob("folderCompact", glodaFolder.id)); + } + + // add a job for the folder indexing if it was dirty + if (glodaFolder.dirtyStatus) { + GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); + } + + // re-schedule this job (although this worker will die) + GlodaIndexer.indexJob(aJob); + yield GlodaConstants.kWorkDone; + } + + // consider deletion + if (this.pendingDeletions || this.pendingDeletions === null) { + GlodaIndexer.indexJob(new IndexingJob("delete", null)); + } + + // we don't have any more work to do... + this._indexingSweepActive = false; + yield GlodaConstants.kWorkDone; + }, + + /** + * The only state we need to cleanup is that there is no longer an active + * indexing sweep. + */ + _cleanup_indexingSweep(aJob) { + this._indexingSweepActive = false; + }, + + /** + * The number of headers to look at before yielding with kWorkSync. This + * is for time-slicing purposes so we still yield to the UI periodically. + */ + HEADER_CHECK_SYNC_BLOCK_SIZE: 25, + + FOLDER_COMPACTION_PASS_BATCH_SIZE: 512, + /** + * Special indexing pass for (local) folders than have been compacted. The + * compaction can cause message keys to change because message keys in local + * folders are simply offsets into the mbox file. Accordingly, we need to + * update the gloda records/objects to point them at the new message key. + * + * Our general algorithm is to perform two traversals in parallel. The first + * is a straightforward enumeration of the message headers in the folder that + * apparently have been already indexed. These provide us with the message + * key and the "gloda-id" property. + * The second is a list of tuples containing a gloda message id, its current + * message key per the gloda database, and the message-id header. We re-fill + * the list with batches on-demand. This allows us to both avoid dispatching + * needless UPDATEs as well as deal with messages that were tracked by the + * PendingCommitTracker but were discarded by the compaction notification. + * + * We end up processing two streams of gloda-id's and some extra info. In + * the normal case we expect these two streams to line up exactly and all + * we need to do is update the message key if it has changed. + * + * There are a few exceptional cases where things do not line up: + * 1) The gloda database knows about a message that the enumerator does not + * know about... + * a) This message exists in the folder (identified using its message-id + * header). This means the message got indexed but PendingCommitTracker + * had to forget about the info when the compaction happened. We + * re-establish the link and track the message in PendingCommitTracker + * again. + * b) The message does not exist in the folder. This means the message got + * indexed, PendingCommitTracker had to forget about the info, and + * then the message either got moved or deleted before now. We mark + * the message as deleted; this allows the gloda message to be reused + * if the move target has not yet been indexed or purged if it already + * has been and the gloda message is a duplicate. And obviously, if the + * event that happened was actually a delete, then the delete is the + * right thing to do. + * 2) The enumerator knows about a message that the gloda database does not + * know about. This is unexpected and should not happen. We log a + * warning. We are able to differentiate this case from case #1a by + * retrieving the message header associated with the next gloda message + * (using the message-id header per 1a again). If the gloda message's + * message key is after the enumerator's message key then we know this is + * case #2. (It implies an insertion in the enumerator stream which is how + * we define the unexpected case.) + * + * Besides updating the database rows, we also need to make sure that + * in-memory representations are updated. Immediately after dispatching + * UPDATE changes to the database we use the same set of data to walk the + * live collections and update any affected messages. We are then able to + * discard the information. Although this means that we will have to + * potentially walk the live collections multiple times, unless something + * has gone horribly wrong, the number of collections should be reasonable + * and the lookups are cheap. We bias batch sizes accordingly. + * + * Because we operate based on chunks we need to make sure that when we + * actually deal with multiple chunks that we don't step on our own feet with + * our database updates. Since compaction of message key K results in a new + * message key K' such that K' <= K, we can reliably issue database + * updates for all values <= K. Which means our feet are safe no matter + * when we issue the update command. For maximum cache benefit, we issue + * our updates prior to our new query since they should still be maximally + * hot at that point. + */ + *_worker_folderCompactionPass(aJob, aCallbackHandle) { + yield this._indexerEnterFolder(aJob.id); + + // It's conceivable that with a folder sweep we might end up trying to + // compact a folder twice. Bail early in this case. + if (!this._indexingGlodaFolder.compacted) { + yield GlodaConstants.kWorkDone; + } + + // this is a forward enumeration (sometimes we reverse enumerate; not here) + this._indexerGetEnumerator(this.kEnumIndexedMsgs); + + const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; + const FOLDER_COMPACTION_PASS_BATCH_SIZE = + this.FOLDER_COMPACTION_PASS_BATCH_SIZE; + + // Tuples of [gloda id, message key, message-id header] from + // folderCompactionPassBlockFetch + let glodaIdsMsgKeysHeaderIds = []; + // Unpack each tuple from glodaIdsMsgKeysHeaderIds into these guys. + // (Initialize oldMessageKey because we use it to kickstart our query.) + let oldGlodaId, + oldMessageKey = -1, + oldHeaderMessageId; + // parallel lists of gloda ids and message keys to pass to + // GlodaDatastore.updateMessageLocations + let updateGlodaIds = []; + let updateMessageKeys = []; + // list of gloda id's to mark deleted + let deleteGlodaIds = []; + + // for GC reasons we need to track the number of headers seen + let numHeadersSeen = 0; + + // We are consuming two lists; our loop structure has to reflect that. + let headerIter = this._indexingEnumerator[Symbol.iterator](); + let mayHaveMoreGlodaMessages = true; + let keepIterHeader = false; + let keepGlodaTuple = false; + let msgHdr = null; + while (headerIter || mayHaveMoreGlodaMessages) { + let glodaId; + if (headerIter) { + if (!keepIterHeader) { + let result = headerIter.next(); + if (result.done) { + headerIter = null; + msgHdr = null; + // do the loop check again + continue; + } + msgHdr = result.value; + } else { + keepIterHeader = false; + } + } + + if (msgHdr) { + numHeadersSeen++; + if (numHeadersSeen % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) { + yield GlodaConstants.kWorkSync; + } + + // There is no need to check with PendingCommitTracker. If a message + // somehow got indexed between the time the compaction killed + // everything and the time we run, that is a bug. + glodaId = msgHdr.getUint32Property(GLODA_MESSAGE_ID_PROPERTY); + // (there is also no need to check for gloda dirty since the enumerator + // filtered that for us.) + } + + // get more [gloda id, message key, message-id header] tuples if out + if (!glodaIdsMsgKeysHeaderIds.length && mayHaveMoreGlodaMessages) { + // Since we operate on blocks, getting a new block implies we should + // flush the last block if applicable. + if (updateGlodaIds.length) { + GlodaDatastore.updateMessageLocations( + updateGlodaIds, + updateMessageKeys, + aJob.id, + true + ); + updateGlodaIds = []; + updateMessageKeys = []; + } + + if (deleteGlodaIds.length) { + GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); + deleteGlodaIds = []; + } + + GlodaDatastore.folderCompactionPassBlockFetch( + aJob.id, + oldMessageKey + 1, + FOLDER_COMPACTION_PASS_BATCH_SIZE, + aCallbackHandle.wrappedCallback + ); + glodaIdsMsgKeysHeaderIds = yield GlodaConstants.kWorkAsync; + // Reverse so we can use pop instead of shift and I don't need to be + // paranoid about performance. + glodaIdsMsgKeysHeaderIds.reverse(); + + if (!glodaIdsMsgKeysHeaderIds.length) { + mayHaveMoreGlodaMessages = false; + + // We shouldn't be in the loop anymore if headerIter is dead now. + if (!headerIter) { + break; + } + } + } + + if (!keepGlodaTuple) { + if (mayHaveMoreGlodaMessages) { + [oldGlodaId, oldMessageKey, oldHeaderMessageId] = + glodaIdsMsgKeysHeaderIds.pop(); + } else { + oldGlodaId = oldMessageKey = oldHeaderMessageId = null; + } + } else { + keepGlodaTuple = false; + } + + // -- normal expected case + if (glodaId == oldGlodaId) { + // only need to do something if the key is not right + if (msgHdr.messageKey != oldMessageKey) { + updateGlodaIds.push(glodaId); + updateMessageKeys.push(msgHdr.messageKey); + } + } else { + // -- exceptional cases + // This should always return a value unless something is very wrong. + // We do not want to catch the exception if one happens. + let idBasedHeader = oldHeaderMessageId + ? this._indexingDatabase.getMsgHdrForMessageID(oldHeaderMessageId) + : false; + // - Case 1b. + // We want to mark the message as deleted. + if (idBasedHeader == null) { + deleteGlodaIds.push(oldGlodaId); + } else if ( + idBasedHeader && + ((msgHdr && idBasedHeader.messageKey < msgHdr.messageKey) || !msgHdr) + ) { + // - Case 1a + // The expected case is that the message referenced by the gloda + // database precedes the header the enumerator told us about. This + // is expected because if PendingCommitTracker did not mark the + // message as indexed/clean then the enumerator would not tell us + // about it. + // Also, if we ran out of headers from the enumerator, this is a dead + // giveaway that this is the expected case. + // tell the pending commit tracker about the gloda database one + PendingCommitTracker.track(idBasedHeader, oldGlodaId); + // and we might need to update the message key too + if (idBasedHeader.messageKey != oldMessageKey) { + updateGlodaIds.push(oldGlodaId); + updateMessageKeys.push(idBasedHeader.messageKey); + } + // Take another pass through the loop so that we check the + // enumerator header against the next message in the gloda + // database. + keepIterHeader = true; + } else if (msgHdr) { + // - Case 2 + // Whereas if the message referenced by gloda has a message key + // greater than the one returned by the enumerator, then we have a + // header claiming to be indexed by gloda that gloda does not + // actually know about. This is exceptional and gets a warning. + this._log.warn( + "Observed header that claims to be gloda indexed " + + "but that gloda has never heard of during " + + "compaction." + + " In folder: " + + msgHdr.folder.URI + + " sketchy key: " + + msgHdr.messageKey + + " subject: " + + msgHdr.mime2DecodedSubject + ); + // Keep this tuple around for the next enumerator provided header + keepGlodaTuple = true; + } + } + } + // If we don't flush the update, no one will! + if (updateGlodaIds.length) { + GlodaDatastore.updateMessageLocations( + updateGlodaIds, + updateMessageKeys, + aJob.id, + true + ); + } + if (deleteGlodaIds.length) { + GlodaDatastore.markMessagesDeletedByIDs(deleteGlodaIds); + } + + this._indexingGlodaFolder._setCompactedState(false); + + this._indexerLeaveFolder(); + yield GlodaConstants.kWorkDone; + }, + + /** + * Index the contents of a folder. + */ + *_worker_folderIndex(aJob, aCallbackHandle) { + yield this._indexerEnterFolder(aJob.id); + + if (!this.shouldIndexFolder(this._indexingFolder)) { + aJob.safelyInvokeCallback(true); + yield GlodaConstants.kWorkDone; + } + + // Make sure listeners get notified about this job. + GlodaIndexer._notifyListeners(); + + // there is of course a cost to all this header investigation even if we + // don't do something. so we will yield with kWorkSync for every block. + const HEADER_CHECK_SYNC_BLOCK_SIZE = this.HEADER_CHECK_SYNC_BLOCK_SIZE; + + // we can safely presume if we are here that this folder has been selected + // for offline processing... + + // -- Filthy Folder + // A filthy folder may have misleading properties on the message that claim + // the message is indexed. They are misleading because the database, for + // whatever reason, does not have the messages (accurately) indexed. + // We need to walk all the messages and mark them filthy if they have a + // dirty property. Once we have done this, we can downgrade the folder's + // dirty status to plain dirty. We do this rather than trying to process + // everyone in one go in a filthy context because if we have to terminate + // indexing before we quit, we don't want to have to re-index messages next + // time. (This could even lead to never completing indexing in a + // pathological situation.) + let glodaFolder = GlodaDatastore._mapFolder(this._indexingFolder); + if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) { + this._indexerGetEnumerator(this.kEnumIndexedMsgs, true); + let count = 0; + for (let msgHdr of this._indexingEnumerator) { + // we still need to avoid locking up the UI, pause periodically... + if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) { + yield GlodaConstants.kWorkSync; + } + + let glodaMessageId = msgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY + ); + // if it has a gloda message id, we need to mark it filthy + if (glodaMessageId != 0) { + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageFilthy); + } + // if it doesn't have a gloda message id, we will definitely index it, + // so no action is required. + } + // Commit the filthy status changes to the message database. + this._indexingDatabase.commit(Ci.nsMsgDBCommitType.kLargeCommit); + + // this will automatically persist to the database + glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderDirty); + } + + // Figure out whether we're supposed to index _everything_ or just what + // has not yet been indexed. + let force = "force" in aJob && aJob.force; + let enumeratorType = force ? this.kEnumAllMsgs : this.kEnumMsgsToIndex; + + // Pass 1: count the number of messages to index. + // We do this in order to be able to report to the user what we're doing. + // TODO: give up after reaching a certain number of messages in folders + // with ridiculous numbers of messages and make the interface just say + // something like "over N messages to go." + + this._indexerGetEnumerator(enumeratorType); + + let numMessagesToIndex = 0; + // eslint-disable-next-line no-unused-vars + for (let ignore of this._indexingEnumerator) { + // We're only counting, so do bigger chunks on this pass. + ++numMessagesToIndex; + if (numMessagesToIndex % (HEADER_CHECK_SYNC_BLOCK_SIZE * 8) == 0) { + yield GlodaConstants.kWorkSync; + } + } + + aJob.goal = numMessagesToIndex; + + if (numMessagesToIndex > 0) { + // We used up the iterator, get a new one. + this._indexerGetEnumerator(enumeratorType); + + // Pass 2: index the messages. + let count = 0; + for (let msgHdr of this._indexingEnumerator) { + // per above, we want to periodically release control while doing all + // this header traversal/investigation. + if (++count % HEADER_CHECK_SYNC_BLOCK_SIZE == 0) { + yield GlodaConstants.kWorkSync; + } + + // To keep our counts more accurate, increment the offset before + // potentially skipping any messages. + ++aJob.offset; + + // Skip messages that have not yet been reported to us as existing via + // msgsClassified. + if ( + this._indexingFolder.getProcessingFlags(msgHdr.messageKey) & + NOT_YET_REPORTED_PROCESSING_FLAGS + ) { + continue; + } + + // Because the gloda id could be in-flight, we need to double-check the + // enumerator here since it can't know about our in-memory stuff. + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); + // if the message seems valid and we are not forcing indexing, skip it. + // (that means good gloda id and not dirty) + if ( + !force && + glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty == this.kMessageClean + ) { + continue; + } + + this._log.debug(">>> calling _indexMessage"); + yield aCallbackHandle.pushAndGo( + this._indexMessage(msgHdr, aCallbackHandle), + { what: "indexMessage", msgHdr } + ); + GlodaIndexer._indexedMessageCount++; + this._log.debug("<<< back from _indexMessage"); + } + } + + // This will trigger an (async) db update which cannot hit the disk prior to + // the actual database records that constitute the clean state. + // XXX There is the slight possibility that, in the event of a crash, this + // will hit the disk but the gloda-id properties on the headers will not + // get set. This should ideally be resolved by detecting a non-clean + // shutdown and marking all folders as dirty. + glodaFolder._downgradeDirtyStatus(glodaFolder.kFolderClean); + + // by definition, it's not likely we'll visit this folder again anytime soon + this._indexerLeaveFolder(); + + aJob.safelyInvokeCallback(true); + + yield GlodaConstants.kWorkDone; + }, + + /** + * Invoked when a "message" job is scheduled so that we can clear + * _pendingAddJob if that is the job. We do this so that work items are not + * added to _pendingAddJob while it is being processed. + */ + _schedule_messageIndex(aJob, aCallbackHandle) { + // we do not want new work items to be added as we are processing, so + // clear _pendingAddJob. A new job will be created as needed. + if (aJob === this._pendingAddJob) { + this._pendingAddJob = null; + } + // update our goal from the items length + aJob.goal = aJob.items.length; + }, + /** + * If the job gets canceled, we need to make sure that we clear out pending + * add job or our state will get wonky. + */ + _canceled_messageIndex(aJob) { + if (aJob === this._pendingAddJob) { + this._pendingAddJob = null; + } + }, + + /** + * Index a specific list of messages that we know to index from + * event-notification hints. + */ + *_worker_messageIndex(aJob, aCallbackHandle) { + // if we are already in the correct folder, our "get in the folder" clause + // will not execute, so we need to make sure this value is accurate in + // that case. (and we want to avoid multiple checks...) + for (; aJob.offset < aJob.items.length; aJob.offset++) { + let item = aJob.items[aJob.offset]; + // item is either [folder ID, message key] or + // [folder ID, message ID] + + let glodaFolderId = item[0]; + // If the folder has been deleted since we queued, skip this message + if (!GlodaDatastore._folderIdKnown(glodaFolderId)) { + continue; + } + let glodaFolder = GlodaDatastore._mapFolderID(glodaFolderId); + + // Stay out of folders that: + // - are compacting / compacted and not yet processed + // - got deleted (this would be redundant if we had a stance on id nukage) + // (these things could have changed since we queued the event) + if ( + glodaFolder.compacting || + glodaFolder.compacted || + glodaFolder._deleted + ) { + continue; + } + + // get in the folder + if (this._indexingGlodaFolder != glodaFolder) { + yield this._indexerEnterFolder(glodaFolderId); + + // Now that we have the real nsIMsgFolder, sanity-check that we should + // be indexing it. (There are some checks that require the + // nsIMsgFolder.) + if (!this.shouldIndexFolder(this._indexingFolder)) { + continue; + } + } + + let msgHdr; + // GetMessageHeader can be affected by the use cache, so we need to check + // ContainsKey first to see if the header is really actually there. + if (typeof item[1] == "number") { + msgHdr = + this._indexingDatabase.containsKey(item[1]) && + this._indexingFolder.GetMessageHeader(item[1]); + } else { + // Same deal as in move processing. + // TODO fixme to not assume singular message-id's. + msgHdr = this._indexingDatabase.getMsgHdrForMessageID(item[1]); + } + + if (msgHdr) { + yield aCallbackHandle.pushAndGo( + this._indexMessage(msgHdr, aCallbackHandle), + { what: "indexMessage", msgHdr } + ); + } else { + yield GlodaConstants.kWorkSync; + } + } + + // There is no real reason to stay 'in' the folder. If we are going to get + // more events from the folder, its database would have to be open for us + // to get the events, so it's not like we're creating an efficiency + // problem where we unload a folder just to load it again in 2 seconds. + // (Well, at least assuming the views are good about holding onto the + // database references even though they go out of their way to avoid + // holding onto message header references.) + this._indexerLeaveFolder(); + + yield GlodaConstants.kWorkDone; + }, + + /** + * Recover from a "folder" or "message" job failing inside a call to + * |_indexMessage|, marking the message bad. If we were not in an + * |_indexMessage| call, then fail to recover. + * + * @param aJob The job that was being worked. We ignore this for now. + * @param aContextStack The callbackHandle mechanism's context stack. When we + * invoke pushAndGo for _indexMessage we put something in so we can + * detect when it is on the async stack. + * @param aException The exception that is necessitating we attempt to + * recover. + * + * @returns 1 if we were able to recover (because we want the call stack + * popped down to our worker), false if we can't. + */ + _recover_indexMessage(aJob, aContextStack, aException) { + // See if indexMessage is on the stack... + if ( + aContextStack.length >= 2 && + aContextStack[1] && + "what" in aContextStack[1] && + aContextStack[1].what == "indexMessage" + ) { + // it is, so this is probably recoverable. + + this._log.debug( + "Exception while indexing message, marking it bad (gloda id of 1)." + ); + + // -- Mark the message as bad + let msgHdr = aContextStack[1].msgHdr; + // (In the worst case, the header is no longer valid, which will result in + // exceptions. We need to be prepared for that.) + try { + msgHdr.setUint32Property( + GLODA_MESSAGE_ID_PROPERTY, + GLODA_BAD_MESSAGE_ID + ); + // clear the dirty bit if it has one + if (msgHdr.getUint32Property(GLODA_DIRTY_PROPERTY)) { + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, 0); + } + } catch (ex) { + // If we are indexing a folder and the message header is no longer + // valid, then it's quite likely the whole folder is no longer valid. + // But since in the event-driven message indexing case we could have + // other valid things to look at, let's try and recover. The folder + // indexing case will come back to us shortly and we will indicate + // recovery is not possible at that point. + // So do nothing here since by popping the indexing of the specific + // message out of existence we are recovering. + } + return 1; + } + return false; + }, + + /** + * Cleanup after an aborted "folder" or "message" job. + */ + _cleanup_indexing(aJob) { + this._indexerLeaveFolder(); + aJob.safelyInvokeCallback(false); + }, + + /** + * Maximum number of deleted messages to process at a time. Arbitrary; there + * are no real known performance constraints at this point. + */ + DELETED_MESSAGE_BLOCK_SIZE: 32, + + /** + * Process pending deletes... + */ + *_worker_processDeletes(aJob, aCallbackHandle) { + // Count the number of messages we will eventually process. People freak + // out when the number is constantly increasing because they think gloda + // has gone rogue. (Note: new deletions can still accumulate during + // our execution, so we may 'expand' our count a little still.) + this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); + aJob.goal = yield GlodaConstants.kWorkAsync; + this._log.debug( + "There are currently " + + aJob.goal + + " messages awaiting" + + " deletion processing." + ); + + // get a block of messages to delete. + let query = Gloda.newQuery(GlodaConstants.NOUN_MESSAGE, { + noDbQueryValidityConstraints: true, + }); + query._deleted(1); + query.limit(this.DELETED_MESSAGE_BLOCK_SIZE); + let deletedCollection = query.getCollection(aCallbackHandle); + yield GlodaConstants.kWorkAsync; + + while (deletedCollection.items.length) { + for (let message of deletedCollection.items) { + // If it turns out our count is wrong (because some new deletions + // happened since we entered this worker), let's issue a new count + // and use that to accurately update our goal. + if (aJob.offset >= aJob.goal) { + this._datastore.countDeletedMessages(aCallbackHandle.wrappedCallback); + aJob.goal += yield GlodaConstants.kWorkAsync; + } + + yield aCallbackHandle.pushAndGo( + this._deleteMessage(message, aCallbackHandle) + ); + aJob.offset++; + yield GlodaConstants.kWorkSync; + } + + deletedCollection = query.getCollection(aCallbackHandle); + yield GlodaConstants.kWorkAsync; + } + this.pendingDeletions = false; + + yield GlodaConstants.kWorkDone; + }, + + *_worker_fixMissingContacts(aJob, aCallbackHandle) { + let identityContactInfos = []; + + // -- asynchronously get a list of all identities without contacts + // The upper bound on the number of messed up contacts is the number of + // contacts in the user's address book. This should be small enough + // (and the data size small enough) that this won't explode thunderbird. + let queryStmt = GlodaDatastore._createAsyncStatement( + "SELECT identities.id, identities.contactID, identities.value " + + "FROM identities " + + "LEFT JOIN contacts ON identities.contactID = contacts.id " + + "WHERE identities.kind = 'email' AND contacts.id IS NULL", + true + ); + queryStmt.executeAsync({ + handleResult(aResultSet) { + let row; + while ((row = aResultSet.getNextRow())) { + identityContactInfos.push({ + identityId: row.getInt64(0), + contactId: row.getInt64(1), + email: row.getString(2), + }); + } + }, + handleError(aError) {}, + handleCompletion(aReason) { + GlodaDatastore._asyncCompleted(); + aCallbackHandle.wrappedCallback(); + }, + }); + queryStmt.finalize(); + GlodaDatastore._pendingAsyncStatements++; + yield GlodaConstants.kWorkAsync; + + // -- perform fixes only if there were missing contacts + if (identityContactInfos.length) { + const yieldEvery = 64; + // - create the missing contacts + for (let i = 0; i < identityContactInfos.length; i++) { + if (i % yieldEvery === 0) { + yield GlodaConstants.kWorkSync; + } + + let info = identityContactInfos[i], + card = MailServices.ab.cardForEmailAddress(info.email), + contact = new GlodaContact( + GlodaDatastore, + info.contactId, + null, + null, + card ? card.displayName || info.email : info.email, + 0, + 0 + ); + GlodaDatastore.insertContact(contact); + + // update the in-memory rep of the identity to know about the contact + // if there is one. + let identity = GlodaCollectionManager.cacheLookupOne( + GlodaConstants.NOUN_IDENTITY, + info.identityId, + false + ); + if (identity) { + // Unfortunately, although this fixes the (reachable) Identity and + // exposes the Contact, it does not make the Contact reachable from + // the collection manager. This will make explicit queries that look + // up the contact potentially see the case where + // contact.identities[0].contact !== contact. Alternately, that + // may not happen and instead the "contact" object we created above + // may become unlinked. (I'd have to trace some logic I don't feel + // like tracing.) Either way, The potential fallout is minimal + // since the object identity invariant will just lapse and popularity + // on the contact may become stale, and neither of those meaningfully + // affect the operation of anything in Thunderbird. + // If we really cared, we could find all the dominant collections + // that reference the identity and update their corresponding + // contact collection to make it reachable. That use-case does not + // exist outside of here, which is why we're punting. + identity._contact = contact; + contact._identities = [identity]; + } + + // NOTE: If the addressbook indexer did anything useful other than + // adapting to name changes, we could schedule indexing of the cards at + // this time. However, as of this writing, it doesn't, and this task + // is a one-off relevant only to the time of this writing. + } + + // - mark all folders as dirty, initiate indexing sweep + this.dirtyAllKnownFolders(); + this.indexingSweepNeeded = true; + } + + // -- mark the schema upgrade, be done + GlodaDatastore._updateSchemaVersion(GlodaDatastore._schemaVersion); + yield GlodaConstants.kWorkDone; + }, + + /** + * Determine whether a folder is suitable for indexing. + * + * @param aMsgFolder An nsIMsgFolder you want to see if we should index. + * + * @returns true if we want to index messages in this type of folder, false if + * we do not. + */ + shouldIndexFolder(aMsgFolder) { + let folderFlags = aMsgFolder.flags; + // Completely ignore non-mail and virtual folders. They should never even + // get to be GlodaFolder instances. + if ( + !(folderFlags & Ci.nsMsgFolderFlags.Mail) || + folderFlags & Ci.nsMsgFolderFlags.Virtual + ) { + return false; + } + + // Some folders do not really exist; we can detect this by getStringProperty + // exploding when we call it. This is primarily a concern because + // _mapFolder calls said exploding method, but we also don't want to + // even think about indexing folders that don't exist. (Such folders are + // likely the result of a messed up profile.) + try { + // flags is used because it should always be in the cache avoiding a miss + // which would compel an msf open. + aMsgFolder.getStringProperty("flags"); + } catch (ex) { + return false; + } + + // Now see what our gloda folder information has to say about the folder. + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + return glodaFolder.indexingPriority != glodaFolder.kIndexingNeverPriority; + }, + + /** + * Sets the indexing priority for this folder and persists it both to Gloda, + * and, for backup purposes, to the nsIMsgFolder via string property as well. + * + * Setting this priority may cause the indexer to either reindex this folder, + * or remove this folder from the existing index. + * + * @param {nsIMsgFolder} aFolder + * @param {number} aPriority (one of the priority constants from GlodaFolder) + */ + setFolderIndexingPriority(aFolder, aPriority) { + let glodaFolder = GlodaDatastore._mapFolder(aFolder); + + // if there's been no change, we're done + if (aPriority == glodaFolder.indexingPriority) { + return; + } + + // save off the old priority, and set the new one + let previousPrio = glodaFolder.indexingPriority; + glodaFolder._indexingPriority = aPriority; + + // persist the new priority + GlodaDatastore.updateFolderIndexingPriority(glodaFolder); + aFolder.setStringProperty("indexingPriority", Number(aPriority).toString()); + + // if we've been told never to index this folder... + if (aPriority == glodaFolder.kIndexingNeverPriority) { + // stop doing so + if (this._indexingFolder == aFolder) { + GlodaIndexer.killActiveJob(); + } + + // mark all existing messages as deleted + GlodaDatastore.markMessagesDeletedByFolderID(glodaFolder.id); + + // re-index + GlodaMsgIndexer.indexingSweepNeeded = true; + } else if (previousPrio == glodaFolder.kIndexingNeverPriority) { + // there's no existing index, but the user now wants one + glodaFolder._dirtyStatus = glodaFolder.kFolderFilthy; + GlodaDatastore.updateFolderDirtyStatus(glodaFolder); + GlodaMsgIndexer.indexingSweepNeeded = true; + } + }, + + /** + * Resets the indexing priority on the given folder to whatever the default + * is for folders of that type. + * + * @note Calls setFolderIndexingPriority under the hood, so has identical + * potential reindexing side-effects + * + * @param {nsIMsgFolder} aFolder + * @param {boolean} aAllowSpecialFolderIndexing + */ + resetFolderIndexingPriority(aFolder, aAllowSpecialFolderIndexing) { + this.setFolderIndexingPriority( + aFolder, + GlodaDatastore.getDefaultIndexingPriority( + aFolder, + aAllowSpecialFolderIndexing + ) + ); + }, + + /** + * Queue all of the folders of all of the accounts of the current profile + * for indexing. We traverse all folders and queue them immediately to try + * and have an accurate estimate of the number of folders that need to be + * indexed. (We previously queued accounts rather than immediately + * walking their list of folders.) + */ + indexEverything() { + this._log.info("Queueing all accounts for indexing."); + + GlodaDatastore._beginTransaction(); + for (let account of MailServices.accounts.accounts) { + this.indexAccount(account); + } + GlodaDatastore._commitTransaction(); + }, + + /** + * Queue all of the folders belonging to an account for indexing. + */ + indexAccount(aAccount) { + let rootFolder = aAccount.incomingServer.rootFolder; + if (rootFolder instanceof Ci.nsIMsgFolder) { + this._log.info("Queueing account folders for indexing: " + aAccount.key); + + for (let folder of rootFolder.descendants) { + if (this.shouldIndexFolder(folder)) { + GlodaIndexer.indexJob( + new IndexingJob("folder", GlodaDatastore._mapFolder(folder).id) + ); + } + } + } else { + this._log.info("Skipping Account, root folder not nsIMsgFolder"); + } + }, + + /** + * Queue a single folder for indexing given an nsIMsgFolder. + * + * @param [aOptions.callback] A callback to invoke when the folder finishes + * indexing. First argument is true if the task ran to completion + * successfully, false if we had to abort for some reason. + * @param [aOptions.force=false] Should we force the indexing of all messages + * in the folder (true) or just index what hasn't been indexed (false). + * @returns true if we are going to index the folder, false if not. + */ + indexFolder(aMsgFolder, aOptions) { + if (!this.shouldIndexFolder(aMsgFolder)) { + return false; + } + let glodaFolder = GlodaDatastore._mapFolder(aMsgFolder); + // stay out of compacting/compacted folders + if (glodaFolder.compacting || glodaFolder.compacted) { + return false; + } + + this._log.info("Queue-ing folder for indexing: " + aMsgFolder.prettyName); + let job = new IndexingJob("folder", glodaFolder.id); + if (aOptions) { + if ("callback" in aOptions) { + job.callback = aOptions.callback; + } + if ("force" in aOptions) { + job.force = true; + } + } + GlodaIndexer.indexJob(job); + return true; + }, + + /** + * Queue a list of messages for indexing. + * + * @param aFoldersAndMessages List of [nsIMsgFolder, message key] tuples. + */ + indexMessages(aFoldersAndMessages) { + let job = new IndexingJob("message", null); + job.items = aFoldersAndMessages.map(fm => [ + GlodaDatastore._mapFolder(fm[0]).id, + fm[1], + ]); + GlodaIndexer.indexJob(job); + }, + + /** + * Mark all known folders as dirty so that the next indexing sweep goes + * into all folders and checks their contents to see if they need to be + * indexed. + * + * This is being added for the migration case where we want to try and reindex + * all of the messages that had been marked with GLODA_BAD_MESSAGE_ID but + * which is now GLODA_OLD_BAD_MESSAGE_ID and so we should attempt to reindex + * them. + */ + dirtyAllKnownFolders() { + // Just iterate over the datastore's folder map and tell each folder to + // be dirty if its priority is not disabled. + for (let folderID in GlodaDatastore._folderByID) { + let glodaFolder = GlodaDatastore._folderByID[folderID]; + if (glodaFolder.indexingPriority !== glodaFolder.kIndexingNeverPriority) { + glodaFolder._ensureFolderDirty(); + } + } + }, + + /** + * Given a message header, return whether this message is likely to have + * been indexed or not. + * + * This means the message must: + * - Be in a folder eligible for gloda indexing. (Not News, etc.) + * - Be in a non-filthy folder. + * - Be gloda-indexed and non-filthy. + * + * @param aMsgHdr A message header. + * @returns true if the message is likely to have been indexed. + */ + isMessageIndexed(aMsgHdr) { + // If it's in a folder that we flat out do not index, say no. + if (!this.shouldIndexFolder(aMsgHdr.folder)) { + return false; + } + let glodaFolder = GlodaDatastore._mapFolder(aMsgHdr.folder); + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(aMsgHdr); + return ( + glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty != GlodaMsgIndexer.kMessageFilthy && + glodaFolder && + glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy + ); + }, + + /* *********** Event Processing *********** */ + + /** + * Tracks messages we have received msgKeyChanged notifications for in order + * to provide batching and to suppress needless reindexing when we receive + * the expected follow-up msgsClassified notification. + * + * The entries in this dictionary should be extremely short-lived as we + * receive the msgKeyChanged notification as the offline fake header is + * converted into a real header (which is accompanied by a msgAdded + * notification we don't pay attention to). Once the headers finish + * updating, the message classifier will get its at-bat and should likely + * find that the messages have already been classified and so fast-path + * them. + * + * The keys in this dictionary are chosen to be consistent with those of + * PendingCommitTracker: the folder.URI + "#" + the (new) message key. + * The values in the dictionary are either an object with "id" (the gloda + * id), "key" (the new message key), and "dirty" (is it dirty and so + * should still be queued for indexing) attributes, or null indicating that + * no change in message key occurred and so no database changes are required. + */ + _keyChangedBatchInfo: {}, + + /** + * Common logic for things that want to feed event-driven indexing. This gets + * called by both |_msgFolderListener.msgsClassified| when we are first + * seeing a message as well as by |_folderListener| when things happen to + * existing messages. Although we could slightly specialize for the + * new-to-us case, it works out to be cleaner to just treat them the same + * and take a very small performance hit. + * + * @param aMsgHdrs array of messages to treat as potentially changed. + * @param aDirtyingEvent Is this event inherently dirtying? Receiving a + * msgsClassified notification is not inherently dirtying because it is + * just telling us that a message exists. We use this knowledge to + * ignore the msgsClassified notifications for messages we have received + * msgKeyChanged notifications for and fast-pathed. Since it is possible + * for user action to do something that dirties the message between the + * time we get the msgKeyChanged notification and when we receive the + * msgsClassified notification, we want to make sure we don't get + * confused. (Although since we remove the message from our ignore-set + * after the first notification, we would likely just mistakenly treat + * the msgsClassified notification as something dirtying, so it would + * still work out...) + */ + _reindexChangedMessages(aMsgHdrs, aDirtyingEvent) { + let glodaIdsNeedingDeletion = null; + let messageKeyChangedIds = null, + messageKeyChangedNewKeys = null; + for (let msgHdr of aMsgHdrs) { + // -- Index this folder? + let msgFolder = msgHdr.folder; + if (!this.shouldIndexFolder(msgFolder)) { + continue; + } + // -- Ignore messages in filthy folders! + // A filthy folder can only be processed by an indexing sweep, and at + // that point the message will get indexed. + let glodaFolder = GlodaDatastore._mapFolder(msgHdr.folder); + if (glodaFolder.dirtyStatus == glodaFolder.kFolderFilthy) { + continue; + } + + // -- msgKeyChanged event follow-up + if (!aDirtyingEvent) { + let keyChangedKey = msgHdr.folder.URI + "#" + msgHdr.messageKey; + if (keyChangedKey in this._keyChangedBatchInfo) { + var keyChangedInfo = this._keyChangedBatchInfo[keyChangedKey]; + delete this._keyChangedBatchInfo[keyChangedKey]; + + // Null means to ignore this message because the key did not change + // (and the message was not dirty so it is safe to ignore.) + if (keyChangedInfo == null) { + continue; + } + // (the key may be null if we only generated the entry because the + // message was dirty) + if (keyChangedInfo.key !== null) { + if (messageKeyChangedIds == null) { + messageKeyChangedIds = []; + messageKeyChangedNewKeys = []; + } + messageKeyChangedIds.push(keyChangedInfo.id); + messageKeyChangedNewKeys.push(keyChangedInfo.key); + } + // ignore the message because it was not dirty + if (!keyChangedInfo.isDirty) { + continue; + } + } + } + + // -- Index this message? + // We index local messages, IMAP messages that are offline, and IMAP + // messages that aren't offline but whose folders aren't offline either + let isFolderLocal = msgFolder instanceof Ci.nsIMsgLocalMailFolder; + if (!isFolderLocal) { + if ( + !(msgHdr.flags & Ci.nsMsgMessageFlags.Offline) && + msgFolder.getFlag(Ci.nsMsgFolderFlags.Offline) + ) { + continue; + } + } + // Ignore messages whose processing flags indicate it has not yet been + // classified. In the IMAP case if the Offline flag is going to get set + // we are going to see it before the msgsClassified event so this is + // very important. + if ( + msgFolder.getProcessingFlags(msgHdr.messageKey) & + NOT_YET_REPORTED_PROCESSING_FLAGS + ) { + continue; + } + + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); + + let isSpam = + msgHdr.getStringProperty(JUNK_SCORE_PROPERTY) == JUNK_SPAM_SCORE_STR; + + // -- Is the message currently gloda indexed? + if ( + glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty != this.kMessageFilthy + ) { + // - Is the message spam? + if (isSpam) { + // Treat this as a deletion... + if (!glodaIdsNeedingDeletion) { + glodaIdsNeedingDeletion = []; + } + glodaIdsNeedingDeletion.push(glodaId); + // and skip to the next message + continue; + } + + // - Mark the message dirty if it is clean. + // (This is the only case in which we need to mark dirty so that the + // indexing sweep takes care of things if we don't process this in + // an event-driven fashion. If the message has no gloda-id or does + // and it's already dirty or filthy, it is already marked for + // indexing.) + if (glodaDirty == this.kMessageClean) { + msgHdr.setUint32Property(GLODA_DIRTY_PROPERTY, this.kMessageDirty); + } + // if the message is pending clean, this change invalidates that. + PendingCommitTracker.noteDirtyHeader(msgHdr); + } else if (isSpam) { + // If it's not indexed but is spam, ignore it. + continue; + } + // (we want to index the message if we are here) + + // mark the folder dirty too, so we know to look inside + glodaFolder._ensureFolderDirty(); + + if (this._pendingAddJob == null) { + this._pendingAddJob = new IndexingJob("message", null); + GlodaIndexer.indexJob(this._pendingAddJob); + } + // only queue the message if we haven't overflowed our event-driven budget + if (this._pendingAddJob.items.length < this._indexMaxEventQueueMessages) { + this._pendingAddJob.items.push([ + GlodaDatastore._mapFolder(msgFolder).id, + msgHdr.messageKey, + ]); + } else { + this.indexingSweepNeeded = true; + } + } + + // Process any message key changes (from earlier msgKeyChanged events) + if (messageKeyChangedIds != null) { + GlodaDatastore.updateMessageKeys( + messageKeyChangedIds, + messageKeyChangedNewKeys + ); + } + + // If we accumulated any deletions in there, batch them off now. + if (glodaIdsNeedingDeletion) { + GlodaDatastore.markMessagesDeletedByIDs(glodaIdsNeedingDeletion); + this.pendingDeletions = true; + } + }, + + /* ***** Folder Changes ***** */ + /** + * All additions and removals are queued for processing. Indexing messages + * is potentially phenomenally expensive, and deletion can still be + * relatively expensive due to our need to delete the message, its + * attributes, and all attributes that reference it. Additionally, + * attribute deletion costs are higher than attribute look-up because + * there is the actual row plus its 3 indices, and our covering indices are + * no help there. + * + */ + _msgFolderListener: { + indexer: null, + + /** + * We no longer use the msgAdded notification, instead opting to wait until + * junk/trait classification has run (or decided not to run) and all + * filters have run. The msgsClassified notification provides that for us. + */ + msgAdded(aMsgHdr) { + // we are never called! we do not enable this bit! + }, + + /** + * Process (apparently newly added) messages that have been looked at by + * the message classifier. This ensures that if the message was going + * to get marked as spam, this will have already happened. + * + * Besides truly new (to us) messages, We will also receive this event for + * messages that are the result of IMAP message move/copy operations, + * including both moves that generated offline fake headers and those that + * did not. In the offline fake header case, however, we are able to + * ignore their msgsClassified events because we will have received a + * msgKeyChanged notification sometime in the recent past. + */ + msgsClassified(aMsgHdrs, aJunkClassified, aTraitClassified) { + this.indexer._log.debug("msgsClassified notification"); + try { + GlodaMsgIndexer._reindexChangedMessages(aMsgHdrs, false); + } catch (ex) { + this.indexer._log.error("Explosion in msgsClassified handling:", ex); + } + }, + + /** + * Any messages which have had their junk state changed are marked for + * reindexing. + */ + msgsJunkStatusChanged(messages) { + this.indexer._log.debug("JunkStatusChanged notification"); + GlodaMsgIndexer._reindexChangedMessages(messages, true); + }, + + /** + * Handle real, actual deletion (move to trash and IMAP deletion model + * don't count); we only see the deletion here when it becomes forever, + * or rather _just before_ it becomes forever. Because the header is + * going away, we need to either process things immediately or extract the + * information required to purge it later without the header. + * To this end, we mark all messages that were indexed in the gloda message + * database as deleted. We set our pending deletions flag to let our + * indexing logic know that after its next wave of folder traversal, it + * should perform a deletion pass. If it turns out the messages are coming + * back, the fact that deletion is thus deferred can be handy, as we can + * reuse the existing gloda message. + */ + msgsDeleted(aMsgHdrs) { + this.indexer._log.debug("msgsDeleted notification"); + let glodaMessageIds = []; + + for (let msgHdr of aMsgHdrs) { + let [glodaId, glodaDirty] = PendingCommitTracker.getGlodaState(msgHdr); + if ( + glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + glodaDirty != GlodaMsgIndexer.kMessageFilthy + ) { + glodaMessageIds.push(glodaId); + } + } + + if (glodaMessageIds.length) { + GlodaMsgIndexer._datastore.markMessagesDeletedByIDs(glodaMessageIds); + GlodaMsgIndexer.pendingDeletions = true; + } + }, + + /** + * Process a move or copy. + * + * Moves to a local folder or an IMAP folder where we are generating offline + * fake headers are dealt with efficiently because we get both the source + * and destination headers. The main ingredient to having offline fake + * headers is that allowUndo was true when the operation was performance. + * The only non-obvious thing is that we need to make sure that we deal + * with the impact of filthy folders and messages on gloda-id's (they + * invalidate the gloda-id). + * + * Moves to an IMAP folder that do not generate offline fake headers do not + * provide us with the target header, but the IMAP SetPendingAttributes + * logic will still attempt to propagate the properties on the message + * header so when we eventually see it in the msgsClassified notification, + * it should have the properties of the source message copied over. + * We make sure that gloda-id's do not get propagated when messages are + * moved from IMAP folders that are marked filthy or are marked as not + * supposed to be indexed by clearing the pending attributes for the header + * being tracked by the destination IMAP folder. + * We could fast-path the IMAP move case in msgsClassified by noticing that + * a message is showing up with a gloda-id header already and just + * performing an async location update. + * + * Moves that occur involving 'compacted' folders are fine and do not + * require special handling here. The one tricky super-edge-case that + * can happen (and gets handled by the compaction pass) is the move of a + * message that got gloda indexed that did not already have a gloda-id and + * PendingCommitTracker did not get to flush the gloda-id before the + * compaction happened. In that case our move logic cannot know to do + * anything and the gloda database still thinks the message lives in our + * folder. The compaction pass will deal with this by marking the message + * as deleted. The rationale being that marking it deleted allows the + * message to be re-used if it gets indexed in the target location, or if + * the target location has already been indexed, we no longer need the + * duplicate and it should be deleted. (Also, it is unable to distinguish + * between a case where the message got deleted versus moved.) + * + * Because copied messages are, by their nature, duplicate messages, we + * do not particularly care about them. As such, we defer their processing + * to the automatic sync logic that will happen much later on. This is + * potentially desirable in case the user deletes some of the original + * messages, allowing us to reuse the gloda message representations when + * we finally get around to indexing the messages. We do need to mark the + * folder as dirty, though, to clue in the sync logic. + */ + msgsMoveCopyCompleted(aMove, aSrcMsgHdrs, aDestFolder, aDestMsgHdrs) { + this.indexer._log.debug("MoveCopy notification. Move: " + aMove); + try { + // ---- Move + if (aMove) { + // -- Effectively a deletion? + // If the destination folder is not indexed, it's like these messages + // are being deleted. + if (!GlodaMsgIndexer.shouldIndexFolder(aDestFolder)) { + this.msgsDeleted(aSrcMsgHdrs); + return; + } + + // -- Avoid propagation of filthy gloda-id's. + // If the source folder is filthy or should not be indexed (and so + // any gloda-id's found in there are gibberish), our only job is to + // strip the gloda-id's off of all the destination headers because + // none of the gloda-id's are valid (and so we certainly don't want + // to try and use them as a basis for updating message keys.) + let srcMsgFolder = aSrcMsgHdrs[0].folder; + if ( + !this.indexer.shouldIndexFolder(srcMsgFolder) || + GlodaDatastore._mapFolder(srcMsgFolder).dirtyStatus == + GlodaFolder.prototype.kFolderFilthy + ) { + // Local case, just modify the destination headers directly. + if (aDestMsgHdrs.length > 0) { + for (let destMsgHdr of aDestMsgHdrs) { + // zero it out if it exists + // (no need to deal with pending commit issues here; a filthy + // folder by definition has nothing indexed in it.) + let glodaId = destMsgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY + ); + if (glodaId) { + destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0); + } + } + + // Since we are moving messages from a folder where they were + // effectively not indexed, it is up to us to make sure the + // messages now get indexed. + this.indexer._reindexChangedMessages(aDestMsgHdrs); + return; + } + + // IMAP move case, we need to operate on the pending headers using + // the source header to get the pending header and as the + // indication of what has been already set on the pending header. + let destDb; + // so, this can fail, and there's not much we can do about it. + try { + destDb = aDestFolder.msgDatabase; + } catch (ex) { + this.indexer._log.warn( + "Destination database for " + + aDestFolder.prettyName + + " not ready on IMAP move." + + " Gloda corruption possible." + ); + return; + } + for (let srcMsgHdr of aSrcMsgHdrs) { + // zero it out if it exists + // (no need to deal with pending commit issues here; a filthy + // folder by definition has nothing indexed in it.) + let glodaId = srcMsgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY + ); + if (glodaId) { + destDb.setUint32AttributeOnPendingHdr( + srcMsgHdr, + GLODA_MESSAGE_ID_PROPERTY, + 0 + ); + } + } + + // Nothing remains to be done. The msgClassified event will take + // care of making sure the message gets indexed. + return; + } + + // --- Have destination headers (local case): + if (aDestMsgHdrs.length > 0) { + // -- Update message keys for valid gloda-id's. + // (Which means ignore filthy gloda-id's.) + let glodaIds = []; + let newMessageKeys = []; + // Track whether we see any messages that are not gloda indexed so + // we know if we have to mark the destination folder dirty. + let sawNonGlodaMessage = false; + for (let iMsg = 0; iMsg < aSrcMsgHdrs.length; iMsg++) { + let srcMsgHdr = aSrcMsgHdrs[iMsg]; + let destMsgHdr = aDestMsgHdrs[iMsg]; + + let [glodaId, dirtyStatus] = + PendingCommitTracker.getGlodaState(srcMsgHdr); + if ( + glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + dirtyStatus != GlodaMsgIndexer.kMessageFilthy + ) { + // we may need to update the pending commit map (it checks) + PendingCommitTracker.noteMove(srcMsgHdr, destMsgHdr); + // but we always need to update our database + glodaIds.push(glodaId); + newMessageKeys.push(destMsgHdr.messageKey); + } else { + sawNonGlodaMessage = true; + } + } + + // this method takes care to update the in-memory representations + // too; we don't need to do anything + if (glodaIds.length) { + GlodaDatastore.updateMessageLocations( + glodaIds, + newMessageKeys, + aDestFolder + ); + } + + // Mark the destination folder dirty if we saw any messages that + // were not already gloda indexed. + if (sawNonGlodaMessage) { + let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); + destGlodaFolder._ensureFolderDirty(); + this.indexer.indexingSweepNeeded = true; + } + } else { + // --- No dest headers (IMAP case): + // Update any valid gloda indexed messages into their new folder to + // make the indexer's life easier when it sees the messages in their + // new folder. + let glodaIds = []; + + let srcFolderIsLocal = + srcMsgFolder instanceof Ci.nsIMsgLocalMailFolder; + for (let msgHdr of aSrcMsgHdrs) { + let [glodaId, dirtyStatus] = + PendingCommitTracker.getGlodaState(msgHdr); + if ( + glodaId >= GLODA_FIRST_VALID_MESSAGE_ID && + dirtyStatus != GlodaMsgIndexer.kMessageFilthy + ) { + // we may need to update the pending commit map (it checks) + PendingCommitTracker.noteBlindMove(msgHdr); + // but we always need to update our database + glodaIds.push(glodaId); + + // XXX UNDO WORKAROUND + // This constitutes a move from a local folder to an IMAP + // folder. Undo does not currently do the right thing for us, + // but we have a chance of not orphaning the message if we + // mark the source header as dirty so that when the message + // gets re-added we see it. (This does require that we enter + // the folder; we set the folder dirty after the loop to + // increase the probability of this but it's not foolproof + // depending on when the next indexing sweep happens and when + // the user performs an undo.) + msgHdr.setUint32Property( + GLODA_DIRTY_PROPERTY, + GlodaMsgIndexer.kMessageDirty + ); + } + } + // XXX ALSO UNDO WORKAROUND + if (srcFolderIsLocal) { + let srcGlodaFolder = GlodaDatastore._mapFolder(srcMsgFolder); + srcGlodaFolder._ensureFolderDirty(); + } + + // quickly move them to the right folder, zeroing their message keys + GlodaDatastore.updateMessageFoldersByKeyPurging( + glodaIds, + aDestFolder + ); + // we _do not_ need to mark the folder as dirty, because the + // message added events will cause that to happen. + } + } else { + // ---- Copy case + // -- Do not propagate gloda-id's for copies + // (Only applies if we have the destination header, which means local) + for (let destMsgHdr of aDestMsgHdrs) { + let glodaId = destMsgHdr.getUint32Property( + GLODA_MESSAGE_ID_PROPERTY + ); + if (glodaId) { + destMsgHdr.setUint32Property(GLODA_MESSAGE_ID_PROPERTY, 0); + } + } + + // mark the folder as dirty; we'll get to it later. + let destGlodaFolder = GlodaDatastore._mapFolder(aDestFolder); + destGlodaFolder._ensureFolderDirty(); + this.indexer.indexingSweepNeeded = true; + } + } catch (ex) { + this.indexer._log.error( + "Problem encountered during message move/copy:", + ex.stack + ); + } + }, + + /** + * Queue up message key changes that are a result of offline fake headers + * being made real for the actual update during the msgsClassified + * notification that is expected after this. We defer the + * actual work (if there is any to be done; the fake header might have + * guessed the right UID correctly) so that we can batch our work. + * + * The expectation is that there will be no meaningful time window between + * this notification and the msgsClassified notification since the message + * classifier should not actually need to classify the messages (they + * should already have been classified) and so can fast-path them. + */ + msgKeyChanged(aOldMsgKey, aNewMsgHdr) { + try { + let val = null, + newKey = aNewMsgHdr.messageKey; + let [glodaId, glodaDirty] = + PendingCommitTracker.getGlodaState(aNewMsgHdr); + // If we haven't indexed this message yet, take no action, and leave it + // up to msgsClassified to take proper action. + if (glodaId < GLODA_FIRST_VALID_MESSAGE_ID) { + return; + } + // take no action on filthy messages, + // generate an entry if dirty or the keys don't match. + if ( + glodaDirty !== GlodaMsgIndexer.kMessageFilthy && + (glodaDirty === GlodaMsgIndexer.kMessageDirty || + aOldMsgKey !== newKey) + ) { + val = { + id: glodaId, + key: aOldMsgKey !== newKey ? newKey : null, + isDirty: glodaDirty === GlodaMsgIndexer.kMessageDirty, + }; + } + + let key = aNewMsgHdr.folder.URI + "#" + aNewMsgHdr.messageKey; + this.indexer._keyChangedBatchInfo[key] = val; + } catch (ex) { + // this is more for the unit test to fail rather than user error reporting + this.indexer._log.error( + "Problem encountered during msgKeyChanged" + + " notification handling: " + + ex + + "\n\n" + + ex.stack + + " \n\n" + ); + } + }, + + /** + * Detect newly added folders before they get messages so we map them before + * they get any messages added to them. If we only hear about them after + * they get their 1st message, then we will mark them filthy, but if we mark + * them before that, they get marked clean. + */ + folderAdded(aMsgFolder) { + // This is invoked for its side-effect of invoking _mapFolder and doing so + // only after filtering out folders we don't care about. + GlodaMsgIndexer.shouldIndexFolder(aMsgFolder); + }, + + /** + * Handles folder no-longer-exists-ence. We mark all messages as deleted + * and remove the folder from our URI table. Currently, if a folder that + * contains other folders is deleted, we may either receive one + * notification for the folder that is deleted, or a notification for the + * folder and one for each of its descendents. This depends upon the + * underlying account implementation, so we explicitly handle each case. + * Namely, we treat it as if we're only planning on getting one, but we + * handle if the children are already gone for some reason. + */ + folderDeleted(aFolder) { + this.indexer._log.debug("folderDeleted notification"); + try { + let delFunc = function (aFolder, indexer) { + if (indexer._datastore._folderKnown(aFolder)) { + indexer._log.info( + "Processing deletion of folder " + aFolder.prettyName + "." + ); + let glodaFolder = GlodaDatastore._mapFolder(aFolder); + indexer._datastore.markMessagesDeletedByFolderID(glodaFolder.id); + indexer._datastore.deleteFolderByID(glodaFolder.id); + GlodaDatastore._killGlodaFolderIntoTombstone(glodaFolder); + } else { + indexer._log.info( + "Ignoring deletion of folder " + + aFolder.prettyName + + " because it is unknown to gloda." + ); + } + }; + + let descendentFolders = aFolder.descendants; + // (the order of operations does not matter; child, non-child, whatever.) + // delete the parent + delFunc(aFolder, this.indexer); + // delete all its descendents + for (let folder of descendentFolders) { + delFunc(folder, this.indexer); + } + + this.indexer.pendingDeletions = true; + } catch (ex) { + this.indexer._log.error( + "Problem encountered during folder deletion" + + ": " + + ex + + "\n\n" + + ex.stack + + "\n\n" + ); + } + }, + + /** + * Handle a folder being copied or moved. + * Moves are handled by a helper function shared with _folderRenameHelper + * (which takes care of any nesting involved). + * Copies are actually ignored, because our periodic indexing traversal + * should discover these automatically. We could hint ourselves into + * action, but arguably a set of completely duplicate messages is not + * a high priority for indexing. + */ + folderMoveCopyCompleted(aMove, aSrcFolder, aDestFolder) { + this.indexer._log.debug( + "folderMoveCopy notification (Move: " + aMove + ")" + ); + if (aMove) { + let srcURI = aSrcFolder.URI; + let targetURI = + aDestFolder.URI + srcURI.substring(srcURI.lastIndexOf("/")); + this._folderRenameHelper(aSrcFolder, targetURI); + } else { + this.indexer.indexingSweepNeeded = true; + } + }, + + /** + * We just need to update the URI <-> ID maps and the row in the database, + * all of which is actually done by the datastore for us. + * This method needs to deal with the complexity where local folders will + * generate a rename notification for each sub-folder, but IMAP folders + * will generate only a single notification. Our logic primarily handles + * this by not exploding if the original folder no longer exists. + */ + _folderRenameHelper(aOrigFolder, aNewURI) { + let newFolder = lazy.MailUtils.getOrCreateFolder(aNewURI); + let specialFolderFlags = + Ci.nsMsgFolderFlags.Trash | Ci.nsMsgFolderFlags.Junk; + if (newFolder.isSpecialFolder(specialFolderFlags, true)) { + let descendentFolders = newFolder.descendants; + + // First thing to do: make sure we don't index the resulting folder and + // its descendents. + GlodaMsgIndexer.resetFolderIndexingPriority(newFolder); + for (let folder of descendentFolders) { + GlodaMsgIndexer.resetFolderIndexingPriority(folder); + } + + // Remove from the index messages from the original folder + this.folderDeleted(aOrigFolder); + } else { + let descendentFolders = aOrigFolder.descendants; + + let origURI = aOrigFolder.URI; + // this rename is straightforward. + GlodaDatastore.renameFolder(aOrigFolder, aNewURI); + + for (let folder of descendentFolders) { + let oldSubURI = folder.URI; + // mangle a new URI from the old URI. we could also try and do a + // parallel traversal of the new folder hierarchy, but that seems like + // more work. + let newSubURI = aNewURI + oldSubURI.substring(origURI.length); + this.indexer._datastore.renameFolder(oldSubURI, newSubURI); + } + + this.indexer._log.debug( + "folder renamed: " + origURI + " to " + aNewURI + ); + } + }, + + /** + * Handle folder renames, dispatching to our rename helper (which also + * takes care of any nested folder issues.) + */ + folderRenamed(aOrigFolder, aNewFolder) { + this._folderRenameHelper(aOrigFolder, aNewFolder.URI); + }, + + /** + * Helper used by folderCompactStart/folderReindexTriggered. + */ + _reindexFolderHelper(folder, isCompacting) { + // ignore folders we ignore... + if (!GlodaMsgIndexer.shouldIndexFolder(folder)) { + return; + } + + let glodaFolder = GlodaDatastore._mapFolder(folder); + if (isCompacting) { + glodaFolder.compacting = true; + } + + // Purge any explicit indexing of said folder. + GlodaIndexer.purgeJobsUsingFilter(function (aJob) { + return aJob.jobType == "folder" && aJob.id == folder.id; + }); + + // Abort the active job if it's in the folder (this covers both + // event-driven indexing that happens to be in the folder as well + // explicit folder indexing of the folder). + if (GlodaMsgIndexer._indexingFolder == folder) { + GlodaIndexer.killActiveJob(); + } + + // Tell the PendingCommitTracker to throw away anything it is tracking + // about the folder. We will pick up the pieces in the compaction + // pass. + PendingCommitTracker.noteFolderDatabaseGettingBlownAway(folder); + + // (We do not need to mark the folder dirty because if we were indexing + // it, it already must have been marked dirty.) + }, + + /** + * folderCompactStart: Mark the folder as compacting in our in-memory + * representation. This should keep any new indexing out of the folder + * until it is done compacting. Also, kill any active or existing jobs + * to index the folder. + */ + folderCompactStart(folder) { + this._reindexFolderHelper(folder, true); + }, + + /** + * folderReindexTriggered: We do the same thing as folderCompactStart + * but don't mark the folder as compacting. + */ + folderReindexTriggered(folder) { + this._reindexFolderHelper(folder, false); + }, + + /** + * folderCompactFinish: Mark the folder as done compacting in our + * in-memory representation. Assuming the folder was known to us and + * not marked filthy, queue a compaction job. + */ + folderCompactFinish(folder) { + // ignore folders we ignore... + if (!GlodaMsgIndexer.shouldIndexFolder(folder)) { + return; + } + + let glodaFolder = GlodaDatastore._mapFolder(folder); + glodaFolder.compacting = false; + glodaFolder._setCompactedState(true); + + // Queue compaction unless the folder was filthy (in which case there + // are no valid gloda-id's to update.) + if (glodaFolder.dirtyStatus != glodaFolder.kFolderFilthy) { + GlodaIndexer.indexJob(new IndexingJob("folderCompact", glodaFolder.id)); + } + + // Queue indexing of the folder if it is dirty. We are doing this + // mainly in case we were indexing it before the compaction started. + // It should be reasonably harmless if we weren't. + // (It would probably be better to just make sure that there is an + // indexing sweep queued or active, and if it's already active that + // this folder is in the queue to be processed.) + if (glodaFolder.dirtyStatus == glodaFolder.kFolderDirty) { + GlodaIndexer.indexJob(new IndexingJob("folder", glodaFolder.id)); + } + }, + }, + + /** + * A nsIFolderListener (listening on nsIMsgMailSession so we get all of + * these events) PRIMARILY to get folder loaded notifications. Because of + * deficiencies in the nsIMsgFolderListener's events at this time, we also + * get our folder-added and newsgroup notifications from here for now. (This + * will be rectified.) + */ + _folderListener: { + indexer: null, + + _init(aIndexer) { + this.indexer = aIndexer; + }, + + onFolderAdded(parentFolder, child) {}, + onMessageAdded(parentFolder, msg) {}, + onFolderRemoved(parentFolder, child) {}, + onMessageRemoved(parentFolder, msg) {}, + onFolderPropertyChanged(aItem, aProperty, aOldValue, aNewValue) {}, + /** + * Detect changes to folder flags and reset our indexing priority. This + * is important because (all?) folders start out without any flags and + * then get their flags added to them. + */ + onFolderIntPropertyChanged(aFolderItem, aProperty, aOldValue, aNewValue) { + if (aProperty !== "FolderFlag") { + return; + } + if (!GlodaMsgIndexer.shouldIndexFolder(aFolderItem)) { + return; + } + // Only reset priority if folder Special Use changes. + if ( + (aOldValue & Ci.nsMsgFolderFlags.SpecialUse) == + (aNewValue & Ci.nsMsgFolderFlags.SpecialUse) + ) { + return; + } + GlodaMsgIndexer.resetFolderIndexingPriority(aFolderItem); + }, + onFolderBoolPropertyChanged(aItem, aProperty, aOldValue, aNewValue) {}, + onFolderUnicharPropertyChanged(aItem, aProperty, aOldValue, aNewValue) {}, + /** + * Notice when user activity adds/removes tags or changes a message's + * status. + */ + onFolderPropertyFlagChanged(aMsgHdr, aProperty, aOldValue, aNewValue) { + if ( + aProperty == "Keywords" || + // We could care less about the new flag changing. + (aProperty == "Status" && + (aOldValue ^ aNewValue) != Ci.nsMsgMessageFlags.New && + // We do care about IMAP deletion, but msgsDeleted tells us that, so + // ignore IMAPDeleted too... + (aOldValue ^ aNewValue) != Ci.nsMsgMessageFlags.IMAPDeleted) || + aProperty == "Flagged" + ) { + GlodaMsgIndexer._reindexChangedMessages([aMsgHdr], true); + } + }, + + /** + * Get folder loaded notifications for folders that had to do some + * (asynchronous) processing before they could be opened. + */ + onFolderEvent(aFolder, aEvent) { + if (aEvent == "FolderLoaded") { + this.indexer._onFolderLoaded(aFolder); + } + }, + }, + + /* ***** Rebuilding / Reindexing ***** */ + /** + * Allow us to invalidate an outstanding folder traversal because the + * underlying database is going away. We use other means for detecting + * modifications of the message (labeling, marked (un)read, starred, etc.) + * + * This is an nsIDBChangeListener listening to an nsIDBChangeAnnouncer. To + * add ourselves, we get us a nice nsMsgDatabase, query it to the announcer, + * then call addListener. + */ + _databaseAnnouncerListener: { + indexer: null, + /** + * XXX We really should define the operations under which we expect this to + * occur. While we know this must be happening as the result of a + * ForceClosed call, we don't have a comprehensive list of when this is + * expected to occur. Some reasons: + * - Compaction (although we should already have killed the job thanks to + * our compaction notification) + * - UID validity rolls. + * - Folder Rename + * - Folder Delete + * The fact that we already have the database open when getting this means + * that it had to be valid before we opened it, which hopefully rules out + * modification of the mbox file by an external process (since that is + * forbidden when we are running) and many other exotic things. + * + * So this really ends up just being a correctness / safety protection + * mechanism. At least now that we have better compaction support. + */ + onAnnouncerGoingAway(aDBChangeAnnouncer) { + // The fact that we are getting called means we have an active folder and + // that we therefore are the active job. As such, we must kill the + // active job. + // XXX In the future, when we support interleaved event-driven indexing + // that bumps long-running indexing tasks, the semantics of this will + // have to change a bit since we will want to maintain being active in a + // folder even when bumped. However, we will probably have a more + // complex notion of indexing contexts on a per-job basis. + GlodaIndexer.killActiveJob(); + }, + + onHdrFlagsChanged(aHdrChanged, aOldFlags, aNewFlags, aInstigator) {}, + onHdrDeleted(aHdrChanged, aParentKey, aFlags, aInstigator) {}, + onHdrAdded(aHdrChanged, aParentKey, aFlags, aInstigator) {}, + onParentChanged(aKeyChanged, aOldParent, aNewParent, aInstigator) {}, + onReadChanged(aInstigator) {}, + onJunkScoreChanged(aInstigator) {}, + onHdrPropertyChanged(aHdrToChange, aPreChange, aStatus, aInstigator) {}, + onEvent(aDB, aEvent) {}, + }, + + /** + * Given a list of Message-ID's, return a matching list of lists of messages + * matching those Message-ID's. So if you pass an array with three + * Message-ID's ["a", "b", "c"], you would get back an array containing + * 3 lists, where the first list contains all the messages with a message-id + * of "a", and so forth. The reason a list is returned rather than null/a + * message is that we accept the reality that we have multiple copies of + * messages with the same ID. + * This call is asynchronous because it depends on previously created messages + * to be reflected in our results, which requires us to execute on the async + * thread where all our writes happen. This also turns out to be a + * reasonable thing because we could imagine pathological cases where there + * could be a lot of message-id's and/or a lot of messages with those + * message-id's. + * + * The returned collection will include both 'ghost' messages (messages + * that exist for conversation-threading purposes only) as well as deleted + * messages in addition to the normal 'live' messages that non-privileged + * queries might return. + */ + getMessagesByMessageID(aMessageIDs, aCallback, aCallbackThis) { + let msgIDToIndex = {}; + let results = []; + for (let iID = 0; iID < aMessageIDs.length; ++iID) { + let msgID = aMessageIDs[iID]; + results.push([]); + msgIDToIndex[msgID] = iID; + } + + // (Note: although we are performing a lookup with no validity constraints + // and using the same object-relational-mapper-ish layer used by things + // that do have constraints, we are not at risk of exposing deleted + // messages to other code and getting it confused. The only way code + // can find a message is if it shows up in their queries or gets announced + // via GlodaCollectionManager.itemsAdded, neither of which will happen.) + let query = Gloda.newQuery(GlodaConstants.NOUN_MESSAGE, { + noDbQueryValidityConstraints: true, + }); + query.headerMessageID.apply(query, aMessageIDs); + query.frozen = true; + + let listener = new MessagesByMessageIdCallback( + msgIDToIndex, + results, + aCallback, + aCallbackThis + ); + return query.getCollection(listener, null, { becomeNull: true }); + }, + + /** + * A reference to MsgHdrToMimeMessage that unit testing can clobber when it + * wants to cause us to hang or inject a fault. If you are not + * glodaTestHelper.js then _do not touch this_. + */ + _MsgHdrToMimeMessageFunc: MsgHdrToMimeMessage, + /** + * Primary message indexing logic. This method is mainly concerned with + * getting all the information about the message required for threading / + * conversation building and subsequent processing. It is responsible for + * determining whether to reuse existing gloda messages or whether a new one + * should be created. Most attribute stuff happens in fund_attr.js or + * expl_attr.js. + * + * Prior to calling this method, the caller must have invoked + * |_indexerEnterFolder|, leaving us with the following true invariants + * below. + * + * @pre aMsgHdr.folder == this._indexingFolder + * @pre aMsgHdr.folder.msgDatabase == this._indexingDatabase + */ + *_indexMessage(aMsgHdr, aCallbackHandle) { + this._log.debug( + "*** Indexing message: " + aMsgHdr.messageKey + " : " + aMsgHdr.subject + ); + + // If the message is offline, then get the message body as well + let aMimeMsg; + if ( + aMsgHdr.flags & Ci.nsMsgMessageFlags.Offline || + aMsgHdr.folder instanceof Ci.nsIMsgLocalMailFolder + ) { + this._MsgHdrToMimeMessageFunc( + aMsgHdr, + aCallbackHandle.callbackThis, + aCallbackHandle.callback, + false, + { + saneBodySize: true, + } + ); + aMimeMsg = (yield GlodaConstants.kWorkAsync)[1]; + } else { + this._log.debug(" * Message is not offline -- only headers indexed"); + } + + this._log.debug(" * Got message, subject " + aMsgHdr.subject); + + if (this._unitTestSuperVerbose) { + if (aMimeMsg) { + this._log.debug(" * Got Mime " + aMimeMsg.prettyString()); + } else { + this._log.debug(" * NO MIME MESSAGE!!!\n"); + } + } + + // -- Find/create the conversation the message belongs to. + // Our invariant is that all messages that exist in the database belong to + // a conversation. + + // - See if any of the ancestors exist and have a conversationID... + // (references are ordered from old [0] to new [n-1]) + let references = Array.from(range(0, aMsgHdr.numReferences)).map(i => + aMsgHdr.getStringReference(i) + ); + // also see if we already know about the message... + references.push(aMsgHdr.messageId); + + this.getMessagesByMessageID( + references, + aCallbackHandle.callback, + aCallbackHandle.callbackThis + ); + // (ancestorLists has a direct correspondence to the message ids) + let ancestorLists = yield GlodaConstants.kWorkAsync; + + this._log.debug("ancestors raw: " + ancestorLists); + this._log.debug( + "ref len: " + references.length + " anc len: " + ancestorLists.length + ); + this._log.debug("references: " + references); + this._log.debug("ancestors: " + ancestorLists); + + // pull our current message lookup results off + references.pop(); + let candidateCurMsgs = ancestorLists.pop(); + + let conversationID = null; + let conversation = null; + // -- figure out the conversation ID + // if we have a clone/already exist, just use his conversation ID + if (candidateCurMsgs.length > 0) { + conversationID = candidateCurMsgs[0].conversationID; + conversation = candidateCurMsgs[0].conversation; + } else { + // otherwise check out our ancestors + // (walk from closest to furthest ancestor) + for ( + let iAncestor = ancestorLists.length - 1; + iAncestor >= 0; + --iAncestor + ) { + let ancestorList = ancestorLists[iAncestor]; + + if (ancestorList.length > 0) { + // we only care about the first instance of the message because we are + // able to guarantee the invariant that all messages with the same + // message id belong to the same conversation. + let ancestor = ancestorList[0]; + if (conversationID === null) { + conversationID = ancestor.conversationID; + conversation = ancestor.conversation; + } else if (conversationID != ancestor.conversationID) { + // XXX this inconsistency is known and understood and tracked by + // bug 478162 https://bugzilla.mozilla.org/show_bug.cgi?id=478162 + // this._log.error("Inconsistency in conversations invariant on " + + // ancestor.headerMessageID + ". It has conv id " + + // ancestor.conversationID + " but expected " + + // conversationID + ". ID: " + ancestor.id); + } + } + } + } + + // nobody had one? create a new conversation + if (conversationID === null) { + // (the create method could issue the id, making the call return + // without waiting for the database...) + conversation = this._datastore.createConversation( + aMsgHdr.mime2DecodedSubject, + null, + null + ); + conversationID = conversation.id; + } + + // Walk from furthest to closest ancestor, creating the ancestors that don't + // exist. (This is possible if previous messages that were consumed in this + // thread only had an in-reply-to or for some reason did not otherwise + // provide the full references chain.) + for (let iAncestor = 0; iAncestor < ancestorLists.length; ++iAncestor) { + let ancestorList = ancestorLists[iAncestor]; + + if (ancestorList.length == 0) { + this._log.debug( + "creating message with: null, " + + conversationID + + ", " + + references[iAncestor] + + ", null." + ); + let ancestor = this._datastore.createMessage( + null, + null, // ghost + conversationID, + null, + references[iAncestor], + null, // no subject + null, // no body + null + ); // no attachments + this._datastore.insertMessage(ancestor); + ancestorLists[iAncestor].push(ancestor); + } + } + // now all our ancestors exist, though they may be ghost-like... + + // find if there's a ghost version of our message or we already have indexed + // this message. + let curMsg = null; + this._log.debug(candidateCurMsgs.length + " candidate messages"); + for (let iCurCand = 0; iCurCand < candidateCurMsgs.length; iCurCand++) { + let candMsg = candidateCurMsgs[iCurCand]; + + this._log.debug( + "candidate folderID: " + + candMsg.folderID + + " messageKey: " + + candMsg.messageKey + ); + + if (candMsg.folderURI == this._indexingFolder.URI) { + // if we are in the same folder and we have the same message key, we + // are definitely the same, stop looking. + if (candMsg.messageKey == aMsgHdr.messageKey) { + curMsg = candMsg; + break; + } + // if (we are in the same folder and) the candidate message has a null + // message key, we treat it as our best option unless we find an exact + // key match. (this would happen because the 'move' notification case + // has to deal with not knowing the target message key. this case + // will hopefully be somewhat improved in the future to not go through + // this path which mandates re-indexing of the message in its entirety) + if (candMsg.messageKey === null) { + curMsg = candMsg; + } else if ( + curMsg === null && + !this._indexingDatabase.containsKey(candMsg.messageKey) + ) { + // (We are in the same folder and) the candidate message's underlying + // message no longer exists/matches. Assume we are the same but + // were betrayed by a re-indexing or something, but we have to make + // sure a perfect match doesn't turn up. + curMsg = candMsg; + } + } else if (curMsg === null && candMsg.folderID === null) { + // a ghost/deleted message is fine + curMsg = candMsg; + } + } + + let attachmentNames = aMimeMsg?.allAttachments.map(att => att.name) || null; + + let isConceptuallyNew, isRecordNew, insertFulltext; + if (curMsg === null) { + curMsg = this._datastore.createMessage( + aMsgHdr.folder, + aMsgHdr.messageKey, + conversationID, + aMsgHdr.date, + aMsgHdr.messageId + ); + curMsg._conversation = conversation; + isConceptuallyNew = isRecordNew = insertFulltext = true; + } else { + isRecordNew = false; + // the message is conceptually new if it was a ghost or dead. + isConceptuallyNew = curMsg._isGhost || curMsg._isDeleted; + // insert fulltext if it was a ghost + insertFulltext = curMsg._isGhost; + curMsg._folderID = this._datastore._mapFolder(aMsgHdr.folder).id; + curMsg._messageKey = aMsgHdr.messageKey; + curMsg.date = new Date(aMsgHdr.date / 1000); + // the message may have been deleted; tell it to make sure it's not. + curMsg._ensureNotDeleted(); + // note: we are assuming that our matching logic is flawless in that + // if this message was not a ghost, we are assuming the 'body' + // associated with the id is still exactly the same. It is conceivable + // that there are cases where this is not true. + } + + if (aMimeMsg) { + let bodyPlain = aMimeMsg.coerceBodyToPlaintext(aMsgHdr.folder); + if (bodyPlain) { + curMsg._bodyLines = bodyPlain.split(/\r?\n/); + // curMsg._content gets set by GlodaFundAttr.jsm + } + } + + // Mark the message as new (for the purposes of fulltext insertion) + if (insertFulltext) { + curMsg._isNew = true; + } + + curMsg._subject = aMsgHdr.mime2DecodedSubject; + curMsg._attachmentNames = attachmentNames; + + // curMsg._indexAuthor gets set by GlodaFundAttr.jsm + // curMsg._indexRecipients gets set by GlodaFundAttr.jsm + + // zero the notability so everything in grokNounItem can just increment + curMsg.notability = 0; + + yield aCallbackHandle.pushAndGo( + Gloda.grokNounItem( + curMsg, + { header: aMsgHdr, mime: aMimeMsg, bodyLines: curMsg._bodyLines }, + isConceptuallyNew, + isRecordNew, + aCallbackHandle + ) + ); + + delete curMsg._bodyLines; + delete curMsg._content; + delete curMsg._isNew; + delete curMsg._indexAuthor; + delete curMsg._indexRecipients; + + // we want to update the header for messages only after the transaction + // irrevocably hits the disk. otherwise we could get confused if the + // transaction rolls back or what not. + PendingCommitTracker.track(aMsgHdr, curMsg.id); + + yield GlodaConstants.kWorkDone; + }, + + /** + * Wipe a message out of existence from our index. This is slightly more + * tricky than one would first expect because there are potentially + * attributes not immediately associated with this message that reference + * the message. Not only that, but deletion of messages may leave a + * conversation possessing only ghost messages, which we don't want, so we + * need to nuke the moot conversation and its moot ghost messages. + * For now, we are actually punting on that trickiness, and the exact + * nuances aren't defined yet because we have not decided whether to store + * such attributes redundantly. For example, if we have subject-pred-object, + * we could actually store this as attributes (subject, id, object) and + * (object, id, subject). In such a case, we could query on (subject, *) + * and use the results to delete the (object, id, subject) case. If we + * don't redundantly store attributes, we can deal with the problem by + * collecting up all the attributes that accept a message as their object + * type and issuing a delete against that. For example, delete (*, [1,2,3], + * message id). + * (We are punting because we haven't implemented support for generating + * attributes like that yet.) + * + * @TODO: implement deletion of attributes that reference (deleted) messages + */ + *_deleteMessage(aMessage, aCallbackHandle) { + this._log.debug("*** Deleting message: " + aMessage); + + // -- delete our attributes + // delete the message's attributes (if we implement the cascade delete, that + // could do the honors for us... right now we define the trigger in our + // schema but the back-end ignores it) + GlodaDatastore.clearMessageAttributes(aMessage); + + // -- delete our message or ghost us, and maybe nuke the whole conversation + // Look at the other messages in the conversation. + // (Note: although we are performing a lookup with no validity constraints + // and using the same object-relational-mapper-ish layer used by things + // that do have constraints, we are not at risk of exposing deleted + // messages to other code and getting it confused. The only way code + // can find a message is if it shows up in their queries or gets announced + // via GlodaCollectionManager.itemsAdded, neither of which will happen.) + let convPrivQuery = Gloda.newQuery(GlodaConstants.NOUN_MESSAGE, { + noDbQueryValidityConstraints: true, + }); + convPrivQuery.conversation(aMessage.conversation); + let conversationCollection = convPrivQuery.getCollection(aCallbackHandle); + yield GlodaConstants.kWorkAsync; + + let conversationMsgs = conversationCollection.items; + + // Count the number of ghosts messages we see to determine if we are + // the last message alive. + let ghostCount = 0; + let twinMessageExists = false; + for (let convMsg of conversationMsgs) { + // ignore our own message + if (convMsg.id == aMessage.id) { + continue; + } + + if (convMsg._isGhost) { + ghostCount++; + } else if ( + // This message is our (living) twin if it is not a ghost, not deleted, + // and has the same message-id header. + !convMsg._isDeleted && + convMsg.headerMessageID == aMessage.headerMessageID + ) { + twinMessageExists = true; + } + } + + // -- If everyone else is a ghost, blow away the conversation. + // If there are messages still alive or deleted but we have not yet gotten + // to them yet _deleteMessage, then do not do this. (We will eventually + // hit this case if they are all deleted.) + if (conversationMsgs.length - 1 == ghostCount) { + // - Obliterate each message + for (let msg of conversationMsgs) { + GlodaDatastore.deleteMessageByID(msg.id); + } + // - Obliterate the conversation + GlodaDatastore.deleteConversationByID(aMessage.conversationID); + // *no one* should hold a reference or use aMessage after this point, + // trash it so such ne'er do'wells are made plain. + aMessage._objectPurgedMakeYourselfUnpleasant(); + } else if (twinMessageExists) { + // -- Ghost or purge us as appropriate + // Purge us if we have a (living) twin; no ghost required. + GlodaDatastore.deleteMessageByID(aMessage.id); + // *no one* should hold a reference or use aMessage after this point, + // trash it so such ne'er do'wells are made plain. + aMessage._objectPurgedMakeYourselfUnpleasant(); + } else { + // No twin, a ghost is required, we become the ghost. + aMessage._ghost(); + GlodaDatastore.updateMessage(aMessage); + // ghosts don't have fulltext. purge it. + GlodaDatastore.deleteMessageTextByID(aMessage.id); + } + + yield GlodaConstants.kWorkDone; + }, +}; +GlodaIndexer.registerIndexer(GlodaMsgIndexer); diff --git a/comm/mailnews/db/gloda/modules/MimeMessage.jsm b/comm/mailnews/db/gloda/modules/MimeMessage.jsm new file mode 100644 index 0000000000..8859f10877 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/MimeMessage.jsm @@ -0,0 +1,821 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = [ + "MsgHdrToMimeMessage", + "MimeMessage", + "MimeContainer", + "MimeBody", + "MimeUnknown", + "MimeMessageAttachment", +]; + +const { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); + +/** + * The URL listener is surplus because the CallbackStreamListener ends up + * getting the same set of events, effectively. + */ +var dumbUrlListener = { + OnStartRunningUrl(aUrl) {}, + OnStopRunningUrl(aUrl, aExitCode) {}, +}; + +/** + * Maintain a list of all active stream listeners so that we can cancel them all + * during shutdown. If we don't cancel them, we risk calls into javascript + * from C++ after the various XPConnect contexts have already begun their + * teardown process. + */ +var activeStreamListeners = {}; + +var shutdownCleanupObserver = { + _initialized: false, + ensureInitialized() { + if (this._initialized) { + return; + } + + Services.obs.addObserver(this, "quit-application"); + + this._initialized = true; + }, + + observe(aSubject, aTopic, aData) { + if (aTopic == "quit-application") { + Services.obs.removeObserver(this, "quit-application"); + + for (let uri in activeStreamListeners) { + let streamListener = activeStreamListeners[uri]; + if (streamListener._request) { + streamListener._request.cancel(Cr.NS_BINDING_ABORTED); + } + } + } + }, +}; + +function CallbackStreamListener(aMsgHdr, aCallbackThis, aCallback) { + this._msgHdr = aMsgHdr; + // Messages opened from file or attachments do not have a folder property, but + // have their url stored as a string property. + let hdrURI = aMsgHdr.folder + ? aMsgHdr.folder.getUriForMsg(aMsgHdr) + : aMsgHdr.getStringProperty("dummyMsgUrl"); + + this._request = null; + this._stream = null; + if (aCallback === undefined) { + this._callbacksThis = [null]; + this._callbacks = [aCallbackThis]; + } else { + this._callbacksThis = [aCallbackThis]; + this._callbacks = [aCallback]; + } + activeStreamListeners[hdrURI] = this; +} + +/** + * @implements {nsIRequestObserver} + * @implements {nsIStreamListener} + */ +CallbackStreamListener.prototype = { + QueryInterface: ChromeUtils.generateQI(["nsIStreamListener"]), + + // nsIRequestObserver part + onStartRequest(aRequest) { + this._request = aRequest; + }, + onStopRequest(aRequest, aStatusCode) { + // Messages opened from file or attachments do not have a folder property, + // but have their url stored as a string property. + let msgURI = this._msgHdr.folder + ? this._msgHdr.folder.getUriForMsg(this._msgHdr) + : this._msgHdr.getStringProperty("dummyMsgUrl"); + delete activeStreamListeners[msgURI]; + + aRequest.QueryInterface(Ci.nsIChannel); + let message = MsgHdrToMimeMessage.RESULT_RENDEVOUZ[aRequest.URI.spec]; + if (message === undefined) { + message = null; + } + + delete MsgHdrToMimeMessage.RESULT_RENDEVOUZ[aRequest.URI.spec]; + + for (let i = 0; i < this._callbacksThis.length; i++) { + try { + this._callbacks[i].call(this._callbacksThis[i], this._msgHdr, message); + } catch (e) { + // Most of the time, exceptions will silently disappear into the endless + // deeps of XPConnect, and never reach the surface ever again. At least + // warn the user if he has dump enabled. + dump( + "The MsgHdrToMimeMessage callback threw an exception: " + e + "\n" + ); + // That one will probably never make it to the original caller. + throw e; + } + } + + this._msgHdr = null; + this._request = null; + this._stream = null; + this._callbacksThis = null; + this._callbacks = null; + }, + + // nsIStreamListener part + + /** + * Our onDataAvailable should actually never be called. The stream converter + * is actually eating everything except the start and stop notification. + */ + onDataAvailable(aRequest, aInputStream, aOffset, aCount) { + throw new Error( + `The stream converter should have grabbed the data for ${aRequest?.URI.spec}` + ); + }, +}; + +function stripEncryptedParts(aPart) { + if (aPart.parts && aPart.isEncrypted) { + aPart.parts = []; // Show an empty container. + } else if (aPart.parts) { + aPart.parts = aPart.parts.map(stripEncryptedParts); + } + return aPart; +} + +/** + * Starts retrieval of a MimeMessage instance for the given message header. + * Your callback will be called with the message header you provide and the + * + * @param aMsgHdr The message header to retrieve the body for and build a MIME + * representation of the message. + * @param aCallbackThis The (optional) 'this' to use for your callback function. + * @param aCallback The callback function to invoke on completion of message + * parsing or failure. The first argument passed will be the nsIMsgDBHdr + * you passed to this function. The second argument will be the MimeMessage + * instance resulting from the processing on success, and null on failure. + * @param [aAllowDownload=false] Should we allow the message to be downloaded + * for this streaming request? The default is false, which means that we + * require that the message be available offline. If false is passed and + * the message is not available offline, we will propagate an exception + * thrown by the underlying code. + * @param [aOptions] Optional options. + * @param [aOptions.saneBodySize] Limit body sizes to a 'reasonable' size in + * order to combat corrupt offline/message stores creating pathological + * situations where we have erroneously multi-megabyte messages. This + * also likely reduces the impact of legitimately ridiculously large + * messages. + * @param [aOptions.examineEncryptedParts] By default, we won't reveal the + * contents of multipart/encrypted parts to the consumers, unless explicitly + * requested. In the case of MIME/PGP messages, for instance, the message + * will appear as an empty multipart/encrypted container, unless this option + * is used. + */ +function MsgHdrToMimeMessage( + aMsgHdr, + aCallbackThis, + aCallback, + aAllowDownload, + aOptions +) { + shutdownCleanupObserver.ensureInitialized(); + + let requireOffline = !aAllowDownload; + // Messages opened from file or attachments do not have a folder property, but + // have their url stored as a string property. + let msgURI = aMsgHdr.folder + ? aMsgHdr.folder.getUriForMsg(aMsgHdr) + : aMsgHdr.getStringProperty("dummyMsgUrl"); + + let msgService = MailServices.messageServiceFromURI(msgURI); + + MsgHdrToMimeMessage.OPTION_TUNNEL = aOptions; + // By default, Enigmail only decrypts a message streamed via libmime if it's + // the one currently on display in the message reader. With this option, we're + // letting Enigmail know that it should decrypt the message since the client + // explicitly asked for it. + let encryptedStr = + aOptions && aOptions.examineEncryptedParts + ? "&examineEncryptedParts=true" + : ""; + + // S/MIME, our other encryption backend, is not that smart, and always + // decrypts data. In order to protect sensitive data (e.g. not index it in + // Gloda), unless the client asked for encrypted data, we pass to the client + // callback a stripped-down version of the MIME structure where encrypted + // parts have been removed. + let wrapCallback = function (aCallback, aCallbackThis) { + if (aOptions && aOptions.examineEncryptedParts) { + return aCallback; + } + return (aMsgHdr, aMimeMsg) => + aCallback.call(aCallbackThis, aMsgHdr, stripEncryptedParts(aMimeMsg)); + }; + + // Apparently there used to be an old syntax where the callback was the second + // argument... + let callback = aCallback ? aCallback : aCallbackThis; + let callbackThis = aCallback ? aCallbackThis : null; + + // if we're already streaming this msg, just add the callback + // to the listener. + let listenerForURI = activeStreamListeners[msgURI]; + if (listenerForURI != undefined) { + listenerForURI._callbacks.push(wrapCallback(callback, callbackThis)); + listenerForURI._callbacksThis.push(callbackThis); + return; + } + let streamListener = new CallbackStreamListener( + aMsgHdr, + callbackThis, + wrapCallback(callback, callbackThis) + ); + + try { + msgService.streamMessage( + msgURI, + streamListener, // consumer + null, // nsIMsgWindow + dumbUrlListener, // nsIUrlListener + true, // have them create the converter + // additional uri payload, note that "header=" is prepended automatically + "filter&emitter=js" + encryptedStr, + requireOffline + ); + } catch (ex) { + // If streamMessage throws an exception, we should make sure to clear the + // activeStreamListener, or any subsequent attempt at sreaming this URI + // will silently fail + if (activeStreamListeners[msgURI]) { + delete activeStreamListeners[msgURI]; + } + MsgHdrToMimeMessage.OPTION_TUNNEL = null; + throw ex; + } + + MsgHdrToMimeMessage.OPTION_TUNNEL = null; +} + +/** + * Let the jsmimeemitter provide us with results. The poor emitter (if I am + * understanding things correctly) is evaluated outside of the C.u.import + * world, so if we were to import him, we would not see him, but rather a new + * copy of him. This goes for his globals, etc. (and is why we live in this + * file right here). Also, it appears that the XPCOM JS wrappers aren't + * magically unified so that we can try and pass data as expando properties + * on things like the nsIUri instances either. So we have the jsmimeemitter + * import us and poke things into RESULT_RENDEVOUZ. We put it here on this + * function to try and be stealthy and avoid polluting the namespaces (or + * encouraging bad behaviour) of our importers. + * + * If you can come up with a prettier way to shuttle this data, please do. + */ +MsgHdrToMimeMessage.RESULT_RENDEVOUZ = {}; +/** + * Cram rich options here for the MimeMessageEmitter to grab from. We + * leverage the known control-flow to avoid needing a whole dictionary here. + * We set this immediately before constructing the emitter and clear it + * afterwards. Control flow is never yielded during the process and reentrancy + * cannot happen via any other means. + */ +MsgHdrToMimeMessage.OPTION_TUNNEL = null; + +var HeaderHandlerBase = { + /** + * Look-up a header that should be present at most once. + * + * @param aHeaderName The header name to retrieve, case does not matter. + * @param aDefaultValue The value to return if the header was not found, null + * if left unspecified. + * @returns the value of the header if present, and the default value if not + * (defaults to null). If the header was present multiple times, the first + * instance of the header is returned. Use getAll if you want all of the + * values for the multiply-defined header. + */ + get(aHeaderName, aDefaultValue) { + if (aDefaultValue === undefined) { + aDefaultValue = null; + } + let lowerHeader = aHeaderName.toLowerCase(); + if (lowerHeader in this.headers) { + // we require that the list cannot be empty if present + return this.headers[lowerHeader][0]; + } + return aDefaultValue; + }, + /** + * Look-up a header that can be present multiple times. Use get for headers + * that you only expect to be present at most once. + * + * @param aHeaderName The header name to retrieve, case does not matter. + * @returns An array containing the values observed, which may mean a zero + * length array. + */ + getAll(aHeaderName) { + let lowerHeader = aHeaderName.toLowerCase(); + if (lowerHeader in this.headers) { + return this.headers[lowerHeader]; + } + return []; + }, + /** + * @param aHeaderName Header name to test for its presence. + * @returns true if the message has (at least one value for) the given header + * name. + */ + has(aHeaderName) { + let lowerHeader = aHeaderName.toLowerCase(); + return lowerHeader in this.headers; + }, + _prettyHeaderString(aIndent) { + if (aIndent === undefined) { + aIndent = ""; + } + let s = ""; + for (let header in this.headers) { + let values = this.headers[header]; + s += "\n " + aIndent + header + ": " + values; + } + return s; + }, +}; + +/** + * @ivar partName The MIME part, ex "1.2.2.1". The partName of a (top-level) + * message is "1", its first child is "1.1", its second child is "1.2", + * its first child's first child is "1.1.1", etc. + * @ivar headers Maps lower-cased header field names to a list of the values + * seen for the given header. Use get or getAll as convenience helpers. + * @ivar parts The list of the MIME part children of this message. Children + * will be either MimeMessage instances, MimeMessageAttachment instances, + * MimeContainer instances, or MimeUnknown instances. The latter two are + * the result of limitations in the Javascript representation generation + * at this time, combined with the need to most accurately represent the + * MIME structure. + */ +function MimeMessage() { + this.partName = null; + this.headers = {}; + this.parts = []; + this.isEncrypted = false; +} + +MimeMessage.prototype = { + __proto__: HeaderHandlerBase, + contentType: "message/rfc822", + + /** + * @returns a list of all attachments contained in this message and all its + * sub-messages. Only MimeMessageAttachment instances will be present in + * the list (no sub-messages). + */ + get allAttachments() { + let results = []; // messages are not attachments, don't include self + for (let iChild = 0; iChild < this.parts.length; iChild++) { + let child = this.parts[iChild]; + results = results.concat(child.allAttachments); + } + return results; + }, + + /** + * @returns a list of all attachments contained in this message and all its + * sub-messages, including the sub-messages. + */ + get allInlineAttachments() { + // Do not include the top message, but only sub-messages. + let results = this.partName ? [this] : []; + for (let iChild = 0; iChild < this.parts.length; iChild++) { + let child = this.parts[iChild]; + results = results.concat(child.allInlineAttachments); + } + return results; + }, + + /** + * @returns a list of all attachments contained in this message, with + * included/forwarded messages treated as real attachments. Attachments + * contained in inner messages won't be shown. + */ + get allUserAttachments() { + if (this.url) { + // The jsmimeemitter camouflaged us as a MimeAttachment + return [this]; + } + return this.parts + .map(child => child.allUserAttachments) + .reduce((a, b) => a.concat(b), []); + }, + + /** + * @returns the total size of this message, that is, the size of all subparts + */ + get size() { + return this.parts + .map(child => child.size) + .reduce((a, b) => a + Math.max(b, 0), 0); + }, + + /** + * In the case of attached messages, libmime considers them as attachments, + * and if the body is, say, quoted-printable encoded, then libmime will start + * counting bytes and notify the js mime emitter about it. The JS mime emitter + * being a nice guy, it will try to set a size on us. While this is the + * expected behavior for MimeMsgAttachments, we must make sure we can handle + * that (failing to write a setter results in exceptions being thrown). + */ + set size(whatever) { + // nop + }, + + /** + * @param aMsgFolder A message folder, any message folder. Because this is + * a hack. + * @returns The concatenation of all of the body parts where parts + * available as text/plain are pulled as-is, and parts only available + * as text/html are converted to plaintext form first. In other words, + * if we see a multipart/alternative with a text/plain, we take the + * text/plain. If we see a text/html without an alternative, we convert + * that to text. + */ + coerceBodyToPlaintext(aMsgFolder) { + let bodies = []; + for (let part of this.parts) { + // an undefined value for something not having the method is fine + let body = + part.coerceBodyToPlaintext && part.coerceBodyToPlaintext(aMsgFolder); + if (body) { + bodies.push(body); + } + } + if (bodies) { + return bodies.join(""); + } + return ""; + }, + + /** + * Convert the message and its hierarchy into a "pretty string". The message + * and each MIME part get their own line. The string never ends with a + * newline. For a non-multi-part message, only a single line will be + * returned. + * Messages have their subject displayed, attachments have their filename and + * content-type (ex: image/jpeg) displayed. "Filler" classes simply have + * their class displayed. + */ + prettyString(aVerbose, aIndent, aDumpBody) { + if (aIndent === undefined) { + aIndent = ""; + } + let nextIndent = aIndent + " "; + + let s = + "Message " + + (this.isEncrypted ? "[encrypted] " : "") + + "(" + + this.size + + " bytes): " + + "subject" in + this.headers + ? this.headers.subject + : ""; + if (aVerbose) { + s += this._prettyHeaderString(nextIndent); + } + + for (let iPart = 0; iPart < this.parts.length; iPart++) { + let part = this.parts[iPart]; + s += + "\n" + + nextIndent + + (iPart + 1) + + " " + + part.prettyString(aVerbose, nextIndent, aDumpBody); + } + + return s; + }, +}; + +/** + * @ivar contentType The content-type of this container. + * @ivar parts The parts held by this container. These can be instances of any + * of the classes found in this file. + */ +function MimeContainer(aContentType) { + this.partName = null; + this.contentType = aContentType; + this.headers = {}; + this.parts = []; + this.isEncrypted = false; +} + +MimeContainer.prototype = { + __proto__: HeaderHandlerBase, + get allAttachments() { + let results = []; + for (let iChild = 0; iChild < this.parts.length; iChild++) { + let child = this.parts[iChild]; + results = results.concat(child.allAttachments); + } + return results; + }, + get allInlineAttachments() { + let results = []; + for (let iChild = 0; iChild < this.parts.length; iChild++) { + let child = this.parts[iChild]; + results = results.concat(child.allInlineAttachments); + } + return results; + }, + get allUserAttachments() { + return this.parts + .map(child => child.allUserAttachments) + .reduce((a, b) => a.concat(b), []); + }, + get size() { + return this.parts + .map(child => child.size) + .reduce((a, b) => a + Math.max(b, 0), 0); + }, + set size(whatever) { + // nop + }, + coerceBodyToPlaintext(aMsgFolder) { + if (this.contentType == "multipart/alternative") { + let htmlPart; + // pick the text/plain if we can find one, otherwise remember the HTML one + for (let part of this.parts) { + if (part.contentType == "text/plain") { + return part.body; + } + if (part.contentType == "text/html") { + htmlPart = part; + } else if (!htmlPart && part.contentType == "text/enriched") { + // text/enriched gets transformed into HTML, so use it if we don't + // already have an HTML part. + htmlPart = part; + } + } + // convert the HTML part if we have one + if (htmlPart) { + return aMsgFolder.convertMsgSnippetToPlainText(htmlPart.body); + } + } + // if it's not alternative, recurse/aggregate using MimeMessage logic + return MimeMessage.prototype.coerceBodyToPlaintext.call(this, aMsgFolder); + }, + prettyString(aVerbose, aIndent, aDumpBody) { + let nextIndent = aIndent + " "; + + let s = + "Container " + + (this.isEncrypted ? "[encrypted] " : "") + + "(" + + this.size + + " bytes): " + + this.contentType; + if (aVerbose) { + s += this._prettyHeaderString(nextIndent); + } + + for (let iPart = 0; iPart < this.parts.length; iPart++) { + let part = this.parts[iPart]; + s += + "\n" + + nextIndent + + (iPart + 1) + + " " + + part.prettyString(aVerbose, nextIndent, aDumpBody); + } + + return s; + }, + toString() { + return "Container: " + this.contentType; + }, +}; + +/** + * @class Represents a body portion that we understand and do not believe to be + * a proper attachment. This means text/plain or text/html and it has no + * filename. (A filename suggests an attachment.) + * + * @ivar contentType The content type of this body materal; text/plain or + * text/html. + * @ivar body The actual body content. + */ +function MimeBody(aContentType) { + this.partName = null; + this.contentType = aContentType; + this.headers = {}; + this.body = ""; + this.isEncrypted = false; +} + +MimeBody.prototype = { + __proto__: HeaderHandlerBase, + get allAttachments() { + return []; // we are a leaf + }, + get allInlineAttachments() { + return []; // we are a leaf + }, + get allUserAttachments() { + return []; // we are a leaf + }, + get size() { + return this.body.length; + }, + set size(whatever) { + // nop + }, + appendBody(aBuf) { + this.body += aBuf; + }, + coerceBodyToPlaintext(aMsgFolder) { + if (this.contentType == "text/plain") { + return this.body; + } + // text/enriched gets transformed into HTML by libmime + if ( + this.contentType == "text/html" || + this.contentType == "text/enriched" + ) { + return aMsgFolder.convertMsgSnippetToPlainText(this.body); + } + return ""; + }, + prettyString(aVerbose, aIndent, aDumpBody) { + let s = + "Body: " + + (this.isEncrypted ? "[encrypted] " : "") + + "" + + this.contentType + + " (" + + this.body.length + + " bytes" + + (aDumpBody ? ": '" + this.body + "'" : "") + + ")"; + if (aVerbose) { + s += this._prettyHeaderString(aIndent + " "); + } + return s; + }, + toString() { + return "Body: " + this.contentType + " (" + this.body.length + " bytes)"; + }, +}; + +/** + * @class A MIME Leaf node that doesn't have a filename so we assume it's not + * intended to be an attachment proper. This is probably meant for inline + * display or is the result of someone amusing themselves by composing messages + * by hand or a bad client. This class should probably be renamed or we should + * introduce a better named class that we try and use in preference to this + * class. + * + * @ivar contentType The content type of this part. + */ +function MimeUnknown(aContentType) { + this.partName = null; + this.contentType = aContentType; + this.headers = {}; + // Looks like libmime does not always interpret us as an attachment, which + // means we'll have to have a default size. Returning undefined would cause + // the recursive size computations to fail. + this._size = 0; + this.isEncrypted = false; + // We want to make sure MimeUnknown has a part property: S/MIME encrypted + // messages have a topmost MimeUnknown part, with the encrypted bit set to 1, + // and we need to ensure all other encrypted parts are children of this + // topmost part. + this.parts = []; +} + +MimeUnknown.prototype = { + __proto__: HeaderHandlerBase, + get allAttachments() { + return this.parts + .map(child => child.allAttachments) + .reduce((a, b) => a.concat(b), []); + }, + get allInlineAttachments() { + return this.parts + .map(child => child.allInlineAttachments) + .reduce((a, b) => a.concat(b), []); + }, + get allUserAttachments() { + return this.parts + .map(child => child.allUserAttachments) + .reduce((a, b) => a.concat(b), []); + }, + get size() { + return ( + this._size + + this.parts + .map(child => child.size) + .reduce((a, b) => a + Math.max(b, 0), 0) + ); + }, + set size(aSize) { + this._size = aSize; + }, + prettyString(aVerbose, aIndent, aDumpBody) { + let nextIndent = aIndent + " "; + + let s = + "Unknown: " + + (this.isEncrypted ? "[encrypted] " : "") + + "" + + this.contentType + + " (" + + this.size + + " bytes)"; + if (aVerbose) { + s += this._prettyHeaderString(aIndent + " "); + } + + for (let iPart = 0; iPart < this.parts.length; iPart++) { + let part = this.parts[iPart]; + s += + "\n" + + nextIndent + + (iPart + 1) + + " " + + (part ? part.prettyString(aVerbose, nextIndent, aDumpBody) : "NULL"); + } + return s; + }, + toString() { + return "Unknown: " + this.contentType; + }, +}; + +/** + * @class An attachment proper. We think it's an attachment because it has a + * filename that libmime was able to figure out. + * + * @ivar partName @see{MimeMessage.partName} + * @ivar name The filename of this attachment. + * @ivar contentType The MIME content type of this part. + * @ivar url The URL to stream if you want the contents of this part. + * @ivar isExternal Is the attachment stored someplace else than in the message? + * @ivar size The size of the attachment if available, -1 otherwise (size is set + * after initialization by jsmimeemitter.js) + */ +function MimeMessageAttachment( + aPartName, + aName, + aContentType, + aUrl, + aIsExternal +) { + this.partName = aPartName; + this.name = aName; + this.contentType = aContentType; + this.url = aUrl; + this.isExternal = aIsExternal; + this.headers = {}; + this.isEncrypted = false; + // parts is copied over from the part instance that preceded us + // headers is copied over from the part instance that preceded us + // isEncrypted is copied over from the part instance that preceded us +} + +MimeMessageAttachment.prototype = { + __proto__: HeaderHandlerBase, + get allAttachments() { + return [this]; // we are a leaf, so just us. + }, + get allInlineAttachments() { + return [this]; // we are a leaf, so just us. + }, + get allUserAttachments() { + return [this]; + }, + prettyString(aVerbose, aIndent, aDumpBody) { + let s = + "Attachment " + + (this.isEncrypted ? "[encrypted] " : "") + + "(" + + this.size + + " bytes): " + + this.name + + ", " + + this.contentType; + if (aVerbose) { + s += this._prettyHeaderString(aIndent + " "); + } + return s; + }, + toString() { + return this.prettyString(false, ""); + }, +}; diff --git a/comm/mailnews/db/gloda/modules/NounFreetag.jsm b/comm/mailnews/db/gloda/modules/NounFreetag.jsm new file mode 100644 index 0000000000..cb169645f1 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/NounFreetag.jsm @@ -0,0 +1,91 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["FreeTag", "FreeTagNoun"]; + +const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm"); + +function FreeTag(aTagName) { + this.name = aTagName; +} + +FreeTag.prototype = { + toString() { + return this.name; + }, +}; + +/** + * @namespace Tag noun provider. Since the tag unique value is stored as a + * parameter, we are an odd case and semantically confused. + */ +var FreeTagNoun = { + _log: console.createInstance({ + prefix: "gloda.noun.freetag", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", + }), + + name: "freetag", + clazz: FreeTag, + allowsArbitraryAttrs: false, + usesParameter: true, + + _listeners: [], + addListener(aListener) { + this._listeners.push(aListener); + }, + removeListener(aListener) { + let index = this._listeners.indexOf(aListener); + if (index >= 0) { + this._listeners.splice(index, 1); + } + }, + + populateKnownFreeTags() { + for (let attr of this.objectNounOfAttributes) { + let attrDB = attr.dbDef; + for (let param in attrDB.parameterBindings) { + this.getFreeTag(param); + } + } + }, + + knownFreeTags: {}, + getFreeTag(aTagName) { + let tag = this.knownFreeTags[aTagName]; + if (!tag) { + tag = this.knownFreeTags[aTagName] = new FreeTag(aTagName); + for (let listener of this._listeners) { + listener.onFreeTagAdded(tag); + } + } + return tag; + }, + + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a.name.localeCompare(b.name); + }, + + toParamAndValue(aTag) { + return [aTag.name, null]; + }, + + toJSON(aTag) { + return aTag.name; + }, + fromJSON(aTagName) { + return this.getFreeTag(aTagName); + }, +}; + +Gloda.defineNoun(FreeTagNoun); diff --git a/comm/mailnews/db/gloda/modules/NounMimetype.jsm b/comm/mailnews/db/gloda/modules/NounMimetype.jsm new file mode 100644 index 0000000000..fef1a33bc7 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/NounMimetype.jsm @@ -0,0 +1,582 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["MimeType", "MimeTypeNoun"]; + +const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm"); +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); + +var LOG = console.createInstance({ + prefix: "gloda.noun.mimetype", + maxLogLevel: "Warn", + maxLogLevelPref: "gloda.loglevel", +}); + +var CategoryStringMap = {}; + +/** + * Input data structure to allow us to build a fast mapping from mime type to + * category name. The keys in MimeCategoryMapping are the top-level + * categories. Each value can either be a list of MIME types or a nested + * object which recursively defines sub-categories. We currently do not use + * the sub-categories. They are just there to try and organize the MIME types + * a little and open the door to future enhancements. + * + * Do _not_ add additional top-level categories unless you have added + * corresponding entries to gloda.properties under the + * "gloda.mimetype.category" branch and are making sure localizers are aware + * of the change and have time to localize it. + * + * Entries with wildcards in them are part of a fallback strategy by the + * |mimeTypeNoun| and do not actually use regular expressions or anything like + * that. Everything is a straight string lookup. Given "foo/bar" we look for + * "foo/bar", then "foo/*", and finally "*". + */ +var MimeCategoryMapping = { + archives: [ + "application/java-archive", + "application/x-java-archive", + "application/x-jar", + "application/x-java-jnlp-file", + + "application/mac-binhex40", + "application/vnd.ms-cab-compressed", + + "application/x-arc", + "application/x-arj", + "application/x-compress", + "application/x-compressed-tar", + "application/x-cpio", + "application/x-cpio-compressed", + "application/x-deb", + + "application/x-bittorrent", + + "application/x-rar", + "application/x-rar-compressed", + "application/x-7z-compressed", + "application/zip", + "application/x-zip-compressed", + "application/x-zip", + + "application/x-bzip", + "application/x-bzip-compressed-tar", + "application/x-bzip2", + "application/x-gzip", + "application/x-tar", + "application/x-tar-gz", + "application/x-tarz", + ], + documents: { + database: [ + "application/vnd.ms-access", + "application/x-msaccess", + "application/msaccess", + "application/vnd.msaccess", + "application/x-msaccess", + "application/mdb", + "application/x-mdb", + + "application/vnd.oasis.opendocument.database", + ], + graphics: [ + "application/postscript", + "application/x-bzpostscript", + "application/x-dvi", + "application/x-gzdvi", + + "application/illustrator", + + "application/vnd.corel-draw", + "application/cdr", + "application/coreldraw", + "application/x-cdr", + "application/x-coreldraw", + "image/cdr", + "image/x-cdr", + "zz-application/zz-winassoc-cdr", + + "application/vnd.oasis.opendocument.graphics", + "application/vnd.oasis.opendocument.graphics-template", + "application/vnd.oasis.opendocument.image", + + "application/x-dia-diagram", + ], + presentation: [ + "application/vnd.ms-powerpoint.presentation.macroenabled.12", + "application/vnd.ms-powerpoint.template.macroenabled.12", + "application/vnd.ms-powerpoint", + "application/powerpoint", + "application/mspowerpoint", + "application/x-mspowerpoint", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.presentationml.template", + + "application/vnd.oasis.opendocument.presentation", + "application/vnd.oasis.opendocument.presentation-template", + ], + spreadsheet: [ + "application/vnd.lotus-1-2-3", + "application/x-lotus123", + "application/x-123", + "application/lotus123", + "application/wk1", + + "application/x-quattropro", + + "application/vnd.ms-excel.sheet.binary.macroenabled.12", + "application/vnd.ms-excel.sheet.macroenabled.12", + "application/vnd.ms-excel.template.macroenabled.12", + "application/vnd.ms-excel", + "application/msexcel", + "application/x-msexcel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.spreadsheetml.template", + + "application/vnd.oasis.opendocument.formula", + "application/vnd.oasis.opendocument.formula-template", + "application/vnd.oasis.opendocument.chart", + "application/vnd.oasis.opendocument.chart-template", + "application/vnd.oasis.opendocument.spreadsheet", + "application/vnd.oasis.opendocument.spreadsheet-template", + + "application/x-gnumeric", + ], + wordProcessor: [ + "application/msword", + "application/vnd.ms-word", + "application/x-msword", + "application/msword-template", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.wordprocessingml.template", + "application/vnd.ms-word.document.macroenabled.12", + "application/vnd.ms-word.template.macroenabled.12", + "application/x-mswrite", + "application/x-pocket-word", + + "application/rtf", + "text/rtf", + + "application/vnd.oasis.opendocument.text", + "application/vnd.oasis.opendocument.text-master", + "application/vnd.oasis.opendocument.text-template", + "application/vnd.oasis.opendocument.text-web", + + "application/vnd.wordperfect", + + "application/x-abiword", + "application/x-amipro", + ], + suite: ["application/vnd.ms-works"], + }, + images: ["image/*"], + media: { + audio: ["audio/*"], + video: ["video/*"], + container: [ + "application/ogg", + + "application/smil", + "application/vnd.ms-asf", + "application/vnd.rn-realmedia", + "application/x-matroska", + "application/x-quicktime-media-link", + "application/x-quicktimeplayer", + ], + }, + other: ["*"], + pdf: [ + "application/pdf", + "application/x-pdf", + "image/pdf", + "file/pdf", + "application/x-bzpdf", + "application/x-gzpdf", + ], +}; + +/** + * Mime type abstraction that exists primarily so we can map mime types to + * integer id's. + * + * Instances of this class should only be retrieved via |MimeTypeNoun|; no one + * should ever create an instance directly. + */ +function MimeType(aID, aType, aSubType, aFullType, aCategory) { + this._id = aID; + this._type = aType; + this._subType = aSubType; + this._fullType = aFullType; + this._category = aCategory; +} + +MimeType.prototype = { + /** + * The integer id we have associated with the mime type. This is stable for + * the lifetime of the database, which means that anything in the Gloda + * database can use this without fear. Things not persisted in the database + * should use the actual string mime type, retrieval via |fullType|. + */ + get id() { + return this._id; + }, + /** + * The first part of the MIME type; "text/plain" gets you "text". + */ + get type() { + return this._type; + }, + set fullType(aFullType) { + if (!this._fullType) { + this._fullType = aFullType; + [this._type, this._subType] = this._fullType.split("/"); + this._category = MimeTypeNoun._getCategoryForMimeType( + aFullType, + this._type + ); + } + }, + /** + * If the |fullType| is "text/plain", subType is "plain". + */ + get subType() { + return this._subType; + }, + /** + * The full MIME type; "text/plain" returns "text/plain". + */ + get fullType() { + return this._fullType; + }, + toString() { + return this.fullType; + }, + + /** + * @returns the category we believe this mime type belongs to. This category + * name should never be shown directly to the user. Instead, use + * |categoryLabel| to get the localized name for the category. The + * category mapping comes from mimeTypesCategories.js. + */ + get category() { + return this._category; + }, + /** + * @returns The localized label for the category from gloda.properties in the + * "gloda.mimetype.category.CATEGORY.label" definition using the value + * from |category|. + */ + get categoryLabel() { + return CategoryStringMap[this._category]; + }, +}; + +/** + * Mime type noun provider. + * + * The set of MIME Types is sufficiently limited that we can keep them all in + * memory. In theory it is also sufficiently limited that we could use the + * parameter mechanism in the database. However, it is more efficient, for + * both space and performance reasons, to store the specific mime type as a + * value. For future-proofing reasons, we opt to use a database table to + * persist the mapping rather than a hard-coded list. A preferences file or + * other text file would arguably suffice, but for consistency reasons, the + * database is not a bad thing. + */ +var MimeTypeNoun = { + name: "mime-type", + clazz: MimeType, // gloda supports clazz as well as class + allowsArbitraryAttrs: false, + + _strings: Services.strings.createBundle( + "chrome://messenger/locale/gloda.properties" + ), + + // note! update test_noun_mimetype if you change our internals! + _mimeTypes: {}, + _mimeTypesByID: {}, + TYPE_BLOCK_SIZE: 16384, + _mimeTypeHighID: {}, + _mimeTypeRangeDummyObjects: {}, + _highID: 0, + + // we now use the exciting 'schema' mechanism of defineNoun to get our table + // created for us, plus some helper methods that we simply don't use. + schema: { + name: "mimeTypes", + columns: [ + ["id", "INTEGER PRIMARY KEY", "_id"], + ["mimeType", "TEXT", "fullType"], + ], + }, + + _init() { + LOG.debug("loading MIME types"); + this._loadCategoryMapping(); + this._loadMimeTypes(); + }, + + /** + * A map from MIME type to category name. + */ + _mimeTypeToCategory: {}, + /** + * Load the contents of MimeTypeCategories and populate + */ + _loadCategoryMapping() { + let mimeTypeToCategory = this._mimeTypeToCategory; + + function procMapObj(aSubTree, aCategories) { + for (let key in aSubTree) { + let value = aSubTree[key]; + // Add this category to our nested categories list. Use concat since + // the list will be long-lived and each list needs to be distinct. + let categories = aCategories.concat(); + categories.push(key); + + if (categories.length == 1) { + CategoryStringMap[key] = MimeTypeNoun._strings.GetStringFromName( + "gloda.mimetype.category." + key + ".label" + ); + } + + // Is it an array? If so, just process this depth + if (Array.isArray(value)) { + for (let mimeTypeStr of value) { + mimeTypeToCategory[mimeTypeStr] = categories; + } + } else { + // it's yet another sub-tree branch + procMapObj(value, categories); + } + } + } + procMapObj(MimeCategoryMapping, []); + }, + + /** + * Lookup the category associated with a MIME type given its full type and + * type. (So, "foo/bar" and "foo" for "foo/bar".) + */ + _getCategoryForMimeType(aFullType, aType) { + if (aFullType in this._mimeTypeToCategory) { + return this._mimeTypeToCategory[aFullType][0]; + } + let wildType = aType + "/*"; + if (wildType in this._mimeTypeToCategory) { + return this._mimeTypeToCategory[wildType][0]; + } + return this._mimeTypeToCategory["*"][0]; + }, + + /** + * In order to allow the gloda query mechanism to avoid hitting the database, + * we need to either define the noun type as cacheable and have a super-large + * cache or simply have a collection with every MIME type in it that stays + * alive forever. + * This is that collection. It is initialized by |_loadMimeTypes|. As new + * MIME types are created, we add them to the collection. + */ + _universalCollection: null, + + /** + * Kick off a query of all the mime types in our database, leaving + * |_processMimeTypes| to actually do the legwork. + */ + _loadMimeTypes() { + // get all the existing mime types! + let query = Gloda.newQuery(this.id); + let nullFunc = function () {}; + this._universalCollection = query.getCollection( + { + onItemsAdded: nullFunc, + onItemsModified: nullFunc, + onItemsRemoved: nullFunc, + onQueryCompleted(aCollection) { + MimeTypeNoun._processMimeTypes(aCollection.items); + }, + }, + null + ); + }, + + /** + * For the benefit of our Category queryHelper, we need dummy ranged objects + * that cover the numerical address space allocated to the category. We + * can't use a real object for the upper-bound because the upper-bound is + * constantly growing and there is the chance the query might get persisted, + * which means these values need to be long-lived. Unfortunately, our + * solution to this problem (dummy objects) complicates the second case, + * should it ever occur. (Because the dummy objects cannot be persisted + * on their own... but there are other issues that will come up that we will + * just have to deal with then.) + */ + _createCategoryDummies(aId, aCategory) { + let blockBottom = aId - (aId % this.TYPE_BLOCK_SIZE); + let blockTop = blockBottom + this.TYPE_BLOCK_SIZE - 1; + this._mimeTypeRangeDummyObjects[aCategory] = [ + new MimeType( + blockBottom, + "!category-dummy!", + aCategory, + "!category-dummy!/" + aCategory, + aCategory + ), + new MimeType( + blockTop, + "!category-dummy!", + aCategory, + "!category-dummy!/" + aCategory, + aCategory + ), + ]; + }, + + _processMimeTypes(aMimeTypes) { + for (let mimeType of aMimeTypes) { + if (mimeType.id > this._highID) { + this._highID = mimeType.id; + } + this._mimeTypes[mimeType] = mimeType; + this._mimeTypesByID[mimeType.id] = mimeType; + + let blockHighID = + mimeType.category in this._mimeTypeHighID + ? this._mimeTypeHighID[mimeType.category] + : undefined; + // create the dummy range objects + if (blockHighID === undefined) { + this._createCategoryDummies(mimeType.id, mimeType.category); + } + if (blockHighID === undefined || mimeType.id > blockHighID) { + this._mimeTypeHighID[mimeType.category] = mimeType.id; + } + } + }, + + _addNewMimeType(aMimeTypeName) { + let [typeName, subTypeName] = aMimeTypeName.split("/"); + let category = this._getCategoryForMimeType(aMimeTypeName, typeName); + + if (!(category in this._mimeTypeHighID)) { + let nextID = + this._highID - + (this._highID % this.TYPE_BLOCK_SIZE) + + this.TYPE_BLOCK_SIZE; + this._mimeTypeHighID[category] = nextID; + this._createCategoryDummies(nextID, category); + } + + let nextID = ++this._mimeTypeHighID[category]; + + let mimeType = new MimeType( + nextID, + typeName, + subTypeName, + aMimeTypeName, + category + ); + if (mimeType.id > this._highID) { + this._highID = mimeType.id; + } + + this._mimeTypes[aMimeTypeName] = mimeType; + this._mimeTypesByID[nextID] = mimeType; + + // As great as the gloda extension mechanisms are, we don't think it makes + // a lot of sense to use them in this case. So we directly trigger object + // insertion without any of the grokNounItem stuff. + this.objInsert.call(this.datastore, mimeType); + // Since we bypass grokNounItem and its fun, we need to explicitly add the + // new MIME-type to _universalCollection ourselves. Don't try this at + // home, kids. + this._universalCollection._onItemsAdded([mimeType]); + + return mimeType; + }, + + /** + * Map a mime type to a |MimeType| instance, creating it if necessary. + * + * @param aMimeTypeName The mime type. It may optionally include parameters + * (which will be ignored). A mime type is of the form "type/subtype". + * A type with parameters would look like 'type/subtype; param="value"'. + */ + getMimeType(aMimeTypeName) { + // first, lose any parameters + let semiIndex = aMimeTypeName.indexOf(";"); + if (semiIndex >= 0) { + aMimeTypeName = aMimeTypeName.substring(0, semiIndex); + } + aMimeTypeName = aMimeTypeName.trim().toLowerCase(); + + if (aMimeTypeName in this._mimeTypes) { + return this._mimeTypes[aMimeTypeName]; + } + return this._addNewMimeType(aMimeTypeName); + }, + + /** + * Query helpers contribute additional functions to the query object for the + * attributes that use the noun type. For example, we define Category, so + * for the "attachmentTypes" attribute, "attachmentTypesCategory" would be + * exposed. + */ + queryHelpers: { + /** + * Query for MIME type categories based on one or more MIME type objects + * passed in. We want the range to span the entire block allocated to the + * category. + * + * @param aAttrDef The attribute that is using us. + * @param aArguments The actual arguments object that + */ + Category(aAttrDef, aArguments) { + let rangePairs = []; + // If there are no arguments then we want to fall back to the 'in' + // constraint which matches on any attachment. + if (!aArguments || aArguments.length == 0) { + return this._inConstraintHelper(aAttrDef, []); + } + + for (let iArg = 0; iArg < aArguments.length; iArg++) { + let arg = aArguments[iArg]; + rangePairs.push(MimeTypeNoun._mimeTypeRangeDummyObjects[arg.category]); + } + return this._rangedConstraintHelper(aAttrDef, rangePairs); + }, + }, + + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a.fullType.localeCompare(b.fullType); + }, + + toParamAndValue(aMimeType) { + return [null, aMimeType.id]; + }, + toJSON(aMimeType) { + return aMimeType.id; + }, + fromJSON(aMimeTypeID) { + return this._mimeTypesByID[aMimeTypeID]; + }, +}; +Gloda.defineNoun(MimeTypeNoun, GlodaConstants.NOUN_MIME_TYPE); +try { + MimeTypeNoun._init(); +} catch (ex) { + LOG.error( + "problem init-ing: " + ex.fileName + ":" + ex.lineNumber + ": " + ex + ); +} diff --git a/comm/mailnews/db/gloda/modules/NounTag.jsm b/comm/mailnews/db/gloda/modules/NounTag.jsm new file mode 100644 index 0000000000..1e5db85a42 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/NounTag.jsm @@ -0,0 +1,97 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["TagNoun"]; + +const { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); + +const { Gloda } = ChromeUtils.import("resource:///modules/gloda/Gloda.jsm"); +const { GlodaConstants } = ChromeUtils.import( + "resource:///modules/gloda/GlodaConstants.jsm" +); + +/** + * @namespace Tag noun provider. + */ +var TagNoun = { + name: "tag", + clazz: Ci.nsIMsgTag, + usesParameter: true, + allowsArbitraryAttrs: false, + idAttr: "key", + _msgTagService: null, + _tagMap: null, + _tagList: null, + + _init() { + // This reference can be substituted for testing purposes. + this._msgTagService = MailServices.tags; + this._updateTagMap(); + }, + + getAllTags() { + if (this._tagList == null) { + this._updateTagMap(); + } + return this._tagList; + }, + + _updateTagMap() { + this._tagMap = {}; + let tagArray = (this._tagList = this._msgTagService.getAllTags()); + for (let iTag = 0; iTag < tagArray.length; iTag++) { + let tag = tagArray[iTag]; + this._tagMap[tag.key] = tag; + } + }, + + comparator(a, b) { + if (a == null) { + if (b == null) { + return 0; + } + return 1; + } else if (b == null) { + return -1; + } + return a.tag.localeCompare(b.tag); + }, + userVisibleString(aTag) { + return aTag.tag; + }, + + // we cannot be an attribute value + + toParamAndValue(aTag) { + return [aTag.key, null]; + }, + toJSON(aTag) { + return aTag.key; + }, + fromJSON(aTagKey, aIgnored) { + let tag = this._tagMap.hasOwnProperty(aTagKey) + ? this._tagMap[aTagKey] + : undefined; + // you will note that if a tag is removed, we are unable to aggressively + // deal with this. we are okay with this, but it would be nice to be able + // to listen to the message tag service to know when we should rebuild. + if (tag === undefined && this._msgTagService.isValidKey(aTagKey)) { + this._updateTagMap(); + tag = this._tagMap[aTagKey]; + } + // we intentionally are returning undefined if the tag doesn't exist + return tag; + }, + /** + * Convenience helper to turn a tag key into a tag name. + */ + getTag(aTagKey) { + return this.fromJSON(aTagKey); + }, +}; + +TagNoun._init(); +Gloda.defineNoun(TagNoun, GlodaConstants.NOUN_TAG); diff --git a/comm/mailnews/db/gloda/modules/SuffixTree.jsm b/comm/mailnews/db/gloda/modules/SuffixTree.jsm new file mode 100644 index 0000000000..239993e180 --- /dev/null +++ b/comm/mailnews/db/gloda/modules/SuffixTree.jsm @@ -0,0 +1,381 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +const EXPORTED_SYMBOLS = ["SuffixTree", "MultiSuffixTree"]; + +/** + * Given a list of strings and a corresponding map of items that those strings + * correspond to, build a suffix tree. + */ +function MultiSuffixTree(aStrings, aItems) { + if (aStrings.length != aItems.length) { + throw new Error("Array lengths need to be the same."); + } + + let s = ""; + let offsetsToItems = []; + let lastLength = 0; + for (let i = 0; i < aStrings.length; i++) { + s += aStrings[i]; + offsetsToItems.push(lastLength, s.length, aItems[i]); + lastLength = s.length; + } + + this._construct(s); + this._offsetsToItems = offsetsToItems; + this._numItems = aItems.length; +} + +/** + * @class + */ +function State(aStartIndex, aEndIndex, aSuffix) { + this.start = aStartIndex; + this.end = aEndIndex; + this.suffix = aSuffix; +} + +/** + * Since objects are basically hash-tables anyways, we simply create an + * attribute whose name is the first letter of the edge string. (So, the + * edge string can conceptually be a multi-letter string, but since we would + * split it were there any ambiguity, it's okay to just use the single letter.) + * This avoids having to update the attribute name or worry about tripping our + * implementation up. + */ +State.prototype = { + get isExplicit() { + // our end is not inclusive... + return this.end <= this.start; + }, + get isImplicit() { + // our end is not inclusive... + return this.end > this.start; + }, + + get length() { + return this.end - this.start; + }, + + toString() { + return ( + "[Start: " + + this.start + + " End: " + + this.end + + (this.suffix ? " non-null suffix]" : " null suffix]") + ); + }, +}; + +/** + * Suffix tree implemented using Ukkonen's algorithm. + * + * @class + */ +function SuffixTree(aStr) { + this._construct(aStr); +} + +/** + * States are + */ +SuffixTree.prototype = { + /** + * Find all items matching the provided substring. + */ + findMatches(aSubstring) { + let results = []; + let state = this._root; + let index = 0; + let end = aSubstring.length; + while (index < end) { + state = state[aSubstring[index]]; + // bail if there was no edge + if (state === undefined) { + return results; + } + // bail if the portion of the edge we traversed is not equal to that + // portion of our pattern + let actualTraverseLength = Math.min(state.length, end - index); + if ( + this._str.substring(state.start, state.start + actualTraverseLength) != + aSubstring.substring(index, index + actualTraverseLength) + ) { + return results; + } + index += state.length; + } + + // state should now be the node which itself and all its children match... + // The delta is to adjust us to the offset of the last letter of our match; + // the edge we traversed to get here may have found us traversing more + // than we wanted. + // index - end captures the over-shoot of the edge traversal, + // index - end + 1 captures the fact that we want to find the last letter + // that matched, not just the first letter beyond it + // However, if this state is a leaf node (end == 'infinity'), then 'end' + // isn't describing an edge at all and we want to avoid accounting for it. + let delta; + /* + if (state.end != this._infinity) + //delta = index - end + 1; + delta = end - (index - state.length); + else */ + delta = index - state.length - end + 1; + + this._resultGather(state, results, {}, end, delta, true); + return results; + }, + + _resultGather( + aState, + aResults, + aPresence, + aPatLength, + aDelta, + alreadyAdjusted + ) { + // find the item that this state originated from based on the state's + // start character. offsetToItem holds [string start index, string end + // index (exclusive), item reference]. So we want to binary search to + // find the string whose start/end index contains the state's start index. + let low = 0; + let high = this._numItems - 1; + let mid, stringStart, stringEnd; + + let patternLast = aState.start - aDelta; + while (low <= high) { + mid = low + Math.floor((high - low) / 2); // excessive, especially with js nums + stringStart = this._offsetsToItems[mid * 3]; + let startDelta = stringStart - patternLast; + stringEnd = this._offsetsToItems[mid * 3 + 1]; + let endDelta = stringEnd - patternLast; + if (startDelta > 0) { + high = mid - 1; + } else if (endDelta <= 0) { + low = mid + 1; + } else { + break; + } + } + + // - The match occurred completely inside a source string. Success. + // - The match spans more than one source strings, and is therefore not + // a match. + + // at this point, we have located the origin string that corresponds to the + // start index of this state. + // - The match terminated with the end of the preceding string, and does + // not match us at all. We, and potentially our children, are merely + // serving as a unique terminal. + // - The + + let patternFirst = patternLast - (aPatLength - 1); + + if (patternFirst >= stringStart) { + if (!(stringStart in aPresence)) { + aPresence[stringStart] = true; + aResults.push(this._offsetsToItems[mid * 3 + 2]); + } + } + + // bail if we had it coming OR + // if the result terminates at/part-way through this state, meaning any + // of its children are not going to be actual results, just hangers + // on. + /* + if (bail || (end <= aState.end)) { +dump(" bailing! (bail was: " + bail + ")\n"); + return; + } +*/ + // process our children... + for (let key in aState) { + // edges have attributes of length 1... + if (key.length == 1) { + let statePrime = aState[key]; + this._resultGather( + statePrime, + aResults, + aPresence, + aPatLength, + aDelta + aState.length, // (alreadyAdjusted ? 0 : aState.length), + false + ); + } + } + }, + + /** + * Given a reference 'pair' of a state and a string (may be 'empty'=explicit, + * which means no work to do and we return immediately) follow that state + * (and then the successive states)'s transitions until we run out of + * transitions. This happens either when we find an explicit state, or + * find ourselves partially along an edge (conceptually speaking). In + * the partial case, we return the state prior to the edge traversal. + * (The information about the 'edge' is contained on its target State; + * we can do this because a state is only referenced by one other state.) + */ + _canonize(aState, aStart, aEnd) { + if (aEnd <= aStart) { + return [aState, aStart]; + } + + let statePrime; + // we treat an aState of null as 'bottom', which has transitions for every + // letter in the alphabet to 'root'. rather than create all those + // transitions, we special-case here. + if (aState === null) { + statePrime = this._root; + } else { + statePrime = aState[this._str[aStart]]; + } + while (statePrime.length <= aEnd - aStart) { + // (no 1 adjustment required) + aStart += statePrime.length; + aState = statePrime; + if (aStart < aEnd) { + statePrime = aState[this._str[aStart]]; + } + } + return [aState, aStart]; + }, + + /** + * Given a reference 'pair' whose state may or may not be explicit (and for + * which we will perform the required splitting to make it explicit), test + * whether it already possesses a transition corresponding to the provided + * character. + * + * @returns A list of: whether we had to make it explicit, the (potentially) + * new explicit state. + */ + _testAndSplit(aState, aStart, aEnd, aChar) { + if (aStart < aEnd) { + // it's not explicit + let statePrime = aState[this._str[aStart]]; + let length = aEnd - aStart; + if (aChar == this._str[statePrime.start + length]) { + return [true, aState]; + } + + // do splitting... aState -> rState -> statePrime + let rState = new State(statePrime.start, statePrime.start + length); + aState[this._str[statePrime.start]] = rState; + statePrime.start += length; + rState[this._str[statePrime.start]] = statePrime; + return [false, rState]; + } + + // it's already explicit + if (aState === null) { + // bottom case... shouldn't happen, but hey. + return [true, aState]; + } + return [aChar in aState, aState]; + }, + + _update(aState, aStart, aIndex) { + let oldR = this._root; + let textAtIndex = this._str[aIndex]; // T sub i (0-based corrected...) + // because of the way we store the 'end' value as a one-past form, we do + // not need to subtract 1 off of aIndex. + let [endPoint, rState] = this._testAndSplit( + aState, + aStart, + aIndex, // no -1 + textAtIndex + ); + while (!endPoint) { + let rPrime = new State(aIndex, this._infinity); + rState[textAtIndex] = rPrime; + if (oldR !== this._root) { + oldR.suffix = rState; + } + oldR = rState; + [aState, aStart] = this._canonize(aState.suffix, aStart, aIndex); // no -1 + [endPoint, rState] = this._testAndSplit( + aState, + aStart, + aIndex, // no -1 + textAtIndex + ); + } + if (oldR !== this._root) { + oldR.suffix = aState; + } + + return [aState, aStart]; + }, + + _construct(aStr) { + this._str = aStr; + // just needs to be longer than the string. + this._infinity = aStr.length + 1; + + // this._bottom = new State(0, -1, null); + this._root = new State(-1, 0, null); // null === bottom + let state = this._root; + let start = 0; + + for (let i = 0; i < aStr.length; i++) { + [state, start] = this._update(state, start, i); // treat as flowing -1... + [state, start] = this._canonize(state, start, i + 1); // 1-length string + } + }, + + dump(aState, aIndent, aKey) { + if (aState === undefined) { + aState = this._root; + } + if (aIndent === undefined) { + aIndent = ""; + aKey = "."; + } + + if (aState.isImplicit) { + let snip; + if (aState.length > 10) { + snip = + this._str.slice( + aState.start, + Math.min(aState.start + 10, this._str.length) + ) + "..."; + } else { + snip = this._str.slice( + aState.start, + Math.min(aState.end, this._str.length) + ); + } + dump( + aIndent + + aKey + + ":" + + snip + + "(" + + aState.start + + ":" + + aState.end + + ")\n" + ); + } else { + dump( + aIndent + + aKey + + ": (explicit:" + + aState.start + + ":" + + aState.end + + ")\n" + ); + } + let nextIndent = aIndent + " "; + let keys = Object.keys(aState).filter(c => c.length == 1); + for (let key of keys) { + this.dump(aState[key], nextIndent, key); + } + }, +}; +MultiSuffixTree.prototype = SuffixTree.prototype; diff --git a/comm/mailnews/db/gloda/modules/moz.build b/comm/mailnews/db/gloda/modules/moz.build new file mode 100644 index 0000000000..54978c24ea --- /dev/null +++ b/comm/mailnews/db/gloda/modules/moz.build @@ -0,0 +1,31 @@ +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXTRA_JS_MODULES.gloda += [ + "Collection.jsm", + "Everybody.jsm", + "Facet.jsm", + "Gloda.jsm", + "GlodaConstants.jsm", + "GlodaContent.jsm", + "GlodaDatabind.jsm", + "GlodaDataModel.jsm", + "GlodaDatastore.jsm", + "GlodaExplicitAttr.jsm", + "GlodaFundAttr.jsm", + "GlodaIndexer.jsm", + "GlodaMsgIndexer.jsm", + "GlodaMsgSearcher.jsm", + "GlodaPublic.jsm", + "GlodaQueryClassFactory.jsm", + "GlodaSyntheticView.jsm", + "GlodaUtils.jsm", + "IndexMsg.jsm", + "MimeMessage.jsm", + "NounFreetag.jsm", + "NounMimetype.jsm", + "NounTag.jsm", + "SuffixTree.jsm", +] |