/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ "use strict"; // PersonalityProviderWorker.js imports the following scripts before this. /* import-globals-from Tokenize.jsm */ /* import-globals-from NaiveBayesTextTagger.jsm */ /* import-globals-from NmfTextTagger.jsm */ /* import-globals-from RecipeExecutor.jsm */ // We load this into a worker using importScripts, and in tests using import. // We use var to avoid name collision errors. // eslint-disable-next-line no-var var EXPORTED_SYMBOLS = ["PersonalityProviderWorker"]; // A helper function to create a hash out of a file. async function _getFileHash(filepath) { const data = await IOUtils.read(filepath); // File is an instance of Uint8Array const digest = await crypto.subtle.digest("SHA-256", data); const uint8 = new Uint8Array(digest); // return the two-digit hexadecimal code for a byte const toHex = b => b.toString(16).padStart(2, "0"); return Array.from(uint8, toHex).join(""); } /** * V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history. * This allows Firefox to classify pages into topics, by examining the text found on the page. * It does this by looking at the history text content, title, and description. */ const PersonalityProviderWorker = class PersonalityProviderWorker { async getPersonalityProviderDir() { const personalityProviderDir = PathUtils.join( await PathUtils.getLocalProfileDir(), "personality-provider" ); // Cache this so we don't need to await again. this.getPersonalityProviderDir = () => Promise.resolve(personalityProviderDir); return personalityProviderDir; } setBaseAttachmentsURL(url) { this.baseAttachmentsURL = url; } setInterestConfig(interestConfig) { this.interestConfig = interestConfig; } setInterestVector(interestVector) { this.interestVector = interestVector; } onSync(event) { const { data: { created, updated, deleted }, } = event; // Remove every removed attachment. const toRemove = deleted.concat(updated.map(u => u.old)); toRemove.forEach(record => this.deleteAttachment(record)); // Download every new/updated attachment. const toDownload = created.concat(updated.map(u => u.new)); // maybeDownloadAttachment is async but we don't care inside onSync. toDownload.forEach(record => this.maybeDownloadAttachment(record)); } /** * Attempts to download the attachment, but only if it doesn't already exist. */ async maybeDownloadAttachment(record, retries = 3) { const { attachment: { filename, hash, size }, } = record; await IOUtils.makeDirectory(await this.getPersonalityProviderDir()); const localFilePath = PathUtils.join( await this.getPersonalityProviderDir(), filename ); let retry = 0; while ( retry++ < retries && // exists is an issue for perf because I might not need to call it. (!(await IOUtils.exists(localFilePath)) || (await IOUtils.stat(localFilePath)).size !== size || (await _getFileHash(localFilePath)) !== hash) ) { await this._downloadAttachment(record); } } /** * Downloads the attachment to disk assuming the dir already exists * and any existing files matching the filename are clobbered. */ async _downloadAttachment(record) { const { attachment: { location, filename }, } = record; const remoteFilePath = this.baseAttachmentsURL + location; const localFilePath = PathUtils.join( await this.getPersonalityProviderDir(), filename ); const xhr = new XMLHttpRequest(); // Set false here for a synchronous request, because we're in a worker. xhr.open("GET", remoteFilePath, false); xhr.setRequestHeader("Accept-Encoding", "gzip"); xhr.responseType = "arraybuffer"; xhr.withCredentials = false; xhr.send(null); if (xhr.status !== 200) { console.error(`Failed to fetch ${remoteFilePath}: ${xhr.statusText}`); return; } const buffer = xhr.response; const bytes = new Uint8Array(buffer); await IOUtils.write(localFilePath, bytes, { tmpPath: `${localFilePath}.tmp`, }); } async deleteAttachment(record) { const { attachment: { filename }, } = record; await IOUtils.makeDirectory(await this.getPersonalityProviderDir()); const path = PathUtils.join( await this.getPersonalityProviderDir(), filename ); await IOUtils.remove(path, { ignoreAbsent: true }); // Cleanup the directory if it is empty, do nothing if it is not empty. try { await IOUtils.remove(await this.getPersonalityProviderDir(), { ignoreAbsent: true, }); } catch (e) { // This is likely because the directory is not empty, so we don't care. } } /** * Gets contents of the attachment if it already exists on file, * and if not attempts to download it. */ async getAttachment(record) { const { attachment: { filename }, } = record; const filepath = PathUtils.join( await this.getPersonalityProviderDir(), filename ); try { await this.maybeDownloadAttachment(record); return await IOUtils.readJSON(filepath); } catch (error) { console.error(`Failed to load ${filepath}: ${error.message}`); } return {}; } async fetchModels(models) { this.models = await Promise.all( models.map(async record => ({ ...(await this.getAttachment(record)), recordKey: record.key, })) ); if (!this.models.length) { return { ok: false, }; } return { ok: true, }; } generateTaggers(modelKeys) { if (!this.taggers) { let nbTaggers = []; let nmfTaggers = {}; for (let model of this.models) { if (!modelKeys.includes(model.recordKey)) { continue; } if (model.model_type === "nb") { nbTaggers.push(new NaiveBayesTextTagger(model, toksToTfIdfVector)); } else if (model.model_type === "nmf") { nmfTaggers[model.parent_tag] = new NmfTextTagger( model, toksToTfIdfVector ); } } this.taggers = { nbTaggers, nmfTaggers }; } } /** * Sets and generates a Recipe Executor. * A Recipe Executor is a set of actions that can be consumed by a Recipe. * The Recipe determines the order and specifics of which the actions are called. */ generateRecipeExecutor() { const recipeExecutor = new RecipeExecutor( this.taggers.nbTaggers, this.taggers.nmfTaggers, tokenize ); this.recipeExecutor = recipeExecutor; } /** * Examines the user's browse history and returns an interest vector that * describes the topics the user frequently browses. */ createInterestVector(history) { let interestVector = {}; for (let historyRec of history) { let ivItem = this.recipeExecutor.executeRecipe( historyRec, this.interestConfig.history_item_builder ); if (ivItem === null) { continue; } interestVector = this.recipeExecutor.executeCombinerRecipe( interestVector, ivItem, this.interestConfig.interest_combiner ); if (interestVector === null) { return null; } } const finalResult = this.recipeExecutor.executeRecipe( interestVector, this.interestConfig.interest_finalizer ); return { ok: true, interestVector: finalResult, }; } /** * Calculates a score of a Pocket item when compared to the user's interest * vector. Returns the score. Higher scores are better. Assumes this.interestVector * is populated. */ calculateItemRelevanceScore(pocketItem) { const { personalization_models } = pocketItem; let scorableItem; // If the server provides some models, we can just use them, // and skip generating them. if (personalization_models && Object.keys(personalization_models).length) { scorableItem = { id: pocketItem.id, item_tags: personalization_models, item_score: pocketItem.item_score, item_sort_id: 1, }; } else { scorableItem = this.recipeExecutor.executeRecipe( pocketItem, this.interestConfig.item_to_rank_builder ); if (scorableItem === null) { return null; } } // We're doing a deep copy on an object. let rankingVector = JSON.parse(JSON.stringify(this.interestVector)); Object.keys(scorableItem).forEach(key => { rankingVector[key] = scorableItem[key]; }); rankingVector = this.recipeExecutor.executeRecipe( rankingVector, this.interestConfig.item_ranker ); if (rankingVector === null) { return null; } return { scorableItem, rankingVector }; } };