From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- .../PersonalityProviderWorkerClass.jsm | 311 +++++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm (limited to 'browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm') diff --git a/browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm b/browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm new file mode 100644 index 0000000000..e761f827d2 --- /dev/null +++ b/browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm @@ -0,0 +1,311 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +// PersonalityProviderWorker.js imports the following scripts before this. +/* import-globals-from Tokenize.jsm */ +/* import-globals-from NaiveBayesTextTagger.jsm */ +/* import-globals-from NmfTextTagger.jsm */ +/* import-globals-from RecipeExecutor.jsm */ + +// We load this into a worker using importScripts, and in tests using import. +// We use var to avoid name collision errors. +// eslint-disable-next-line no-var +var EXPORTED_SYMBOLS = ["PersonalityProviderWorker"]; + +// A helper function to create a hash out of a file. +async function _getFileHash(filepath) { + const data = await IOUtils.read(filepath); + // File is an instance of Uint8Array + const digest = await crypto.subtle.digest("SHA-256", data); + const uint8 = new Uint8Array(digest); + // return the two-digit hexadecimal code for a byte + const toHex = b => b.toString(16).padStart(2, "0"); + return Array.from(uint8, toHex).join(""); +} + +/** + * V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history. + * This allows Firefox to classify pages into topics, by examining the text found on the page. + * It does this by looking at the history text content, title, and description. + */ +const PersonalityProviderWorker = class PersonalityProviderWorker { + async getPersonalityProviderDir() { + const personalityProviderDir = PathUtils.join( + await PathUtils.getLocalProfileDir(), + "personality-provider" + ); + + // Cache this so we don't need to await again. + this.getPersonalityProviderDir = () => + Promise.resolve(personalityProviderDir); + return personalityProviderDir; + } + + setBaseAttachmentsURL(url) { + this.baseAttachmentsURL = url; + } + + setInterestConfig(interestConfig) { + this.interestConfig = interestConfig; + } + + setInterestVector(interestVector) { + this.interestVector = interestVector; + } + + onSync(event) { + const { + data: { created, updated, deleted }, + } = event; + // Remove every removed attachment. + const toRemove = deleted.concat(updated.map(u => u.old)); + toRemove.forEach(record => this.deleteAttachment(record)); + + // Download every new/updated attachment. + const toDownload = created.concat(updated.map(u => u.new)); + // maybeDownloadAttachment is async but we don't care inside onSync. + toDownload.forEach(record => this.maybeDownloadAttachment(record)); + } + + /** + * Attempts to download the attachment, but only if it doesn't already exist. + */ + async maybeDownloadAttachment(record, retries = 3) { + const { + attachment: { filename, hash, size }, + } = record; + await IOUtils.makeDirectory(await this.getPersonalityProviderDir()); + const localFilePath = PathUtils.join( + await this.getPersonalityProviderDir(), + filename + ); + + let retry = 0; + while ( + retry++ < retries && + // exists is an issue for perf because I might not need to call it. + (!(await IOUtils.exists(localFilePath)) || + (await IOUtils.stat(localFilePath)).size !== size || + (await _getFileHash(localFilePath)) !== hash) + ) { + await this._downloadAttachment(record); + } + } + + /** + * Downloads the attachment to disk assuming the dir already exists + * and any existing files matching the filename are clobbered. + */ + async _downloadAttachment(record) { + const { + attachment: { location, filename }, + } = record; + const remoteFilePath = this.baseAttachmentsURL + location; + const localFilePath = PathUtils.join( + await this.getPersonalityProviderDir(), + filename + ); + + const xhr = new XMLHttpRequest(); + // Set false here for a synchronous request, because we're in a worker. + xhr.open("GET", remoteFilePath, false); + xhr.setRequestHeader("Accept-Encoding", "gzip"); + xhr.responseType = "arraybuffer"; + xhr.withCredentials = false; + xhr.send(null); + + if (xhr.status !== 200) { + console.error(`Failed to fetch ${remoteFilePath}: ${xhr.statusText}`); + return; + } + + const buffer = xhr.response; + const bytes = new Uint8Array(buffer); + + await IOUtils.write(localFilePath, bytes, { + tmpPath: `${localFilePath}.tmp`, + }); + } + + async deleteAttachment(record) { + const { + attachment: { filename }, + } = record; + await IOUtils.makeDirectory(await this.getPersonalityProviderDir()); + const path = PathUtils.join( + await this.getPersonalityProviderDir(), + filename + ); + + await IOUtils.remove(path, { ignoreAbsent: true }); + // Cleanup the directory if it is empty, do nothing if it is not empty. + try { + await IOUtils.remove(await this.getPersonalityProviderDir(), { + ignoreAbsent: true, + }); + } catch (e) { + // This is likely because the directory is not empty, so we don't care. + } + } + + /** + * Gets contents of the attachment if it already exists on file, + * and if not attempts to download it. + */ + async getAttachment(record) { + const { + attachment: { filename }, + } = record; + const filepath = PathUtils.join( + await this.getPersonalityProviderDir(), + filename + ); + + try { + await this.maybeDownloadAttachment(record); + return await IOUtils.readJSON(filepath); + } catch (error) { + console.error(`Failed to load ${filepath}: ${error.message}`); + } + return {}; + } + + async fetchModels(models) { + this.models = await Promise.all( + models.map(async record => ({ + ...(await this.getAttachment(record)), + recordKey: record.key, + })) + ); + if (!this.models.length) { + return { + ok: false, + }; + } + return { + ok: true, + }; + } + + generateTaggers(modelKeys) { + if (!this.taggers) { + let nbTaggers = []; + let nmfTaggers = {}; + + for (let model of this.models) { + if (!modelKeys.includes(model.recordKey)) { + continue; + } + if (model.model_type === "nb") { + nbTaggers.push(new NaiveBayesTextTagger(model, toksToTfIdfVector)); + } else if (model.model_type === "nmf") { + nmfTaggers[model.parent_tag] = new NmfTextTagger( + model, + toksToTfIdfVector + ); + } + } + this.taggers = { nbTaggers, nmfTaggers }; + } + } + + /** + * Sets and generates a Recipe Executor. + * A Recipe Executor is a set of actions that can be consumed by a Recipe. + * The Recipe determines the order and specifics of which the actions are called. + */ + generateRecipeExecutor() { + const recipeExecutor = new RecipeExecutor( + this.taggers.nbTaggers, + this.taggers.nmfTaggers, + tokenize + ); + this.recipeExecutor = recipeExecutor; + } + + /** + * Examines the user's browse history and returns an interest vector that + * describes the topics the user frequently browses. + */ + createInterestVector(history) { + let interestVector = {}; + + for (let historyRec of history) { + let ivItem = this.recipeExecutor.executeRecipe( + historyRec, + this.interestConfig.history_item_builder + ); + if (ivItem === null) { + continue; + } + interestVector = this.recipeExecutor.executeCombinerRecipe( + interestVector, + ivItem, + this.interestConfig.interest_combiner + ); + if (interestVector === null) { + return null; + } + } + + const finalResult = this.recipeExecutor.executeRecipe( + interestVector, + this.interestConfig.interest_finalizer + ); + + return { + ok: true, + interestVector: finalResult, + }; + } + + /** + * Calculates a score of a Pocket item when compared to the user's interest + * vector. Returns the score. Higher scores are better. Assumes this.interestVector + * is populated. + */ + calculateItemRelevanceScore(pocketItem) { + const { personalization_models } = pocketItem; + let scorableItem; + + // If the server provides some models, we can just use them, + // and skip generating them. + if (personalization_models && Object.keys(personalization_models).length) { + scorableItem = { + id: pocketItem.id, + item_tags: personalization_models, + item_score: pocketItem.item_score, + item_sort_id: 1, + }; + } else { + scorableItem = this.recipeExecutor.executeRecipe( + pocketItem, + this.interestConfig.item_to_rank_builder + ); + if (scorableItem === null) { + return null; + } + } + + // We're doing a deep copy on an object. + let rankingVector = JSON.parse(JSON.stringify(this.interestVector)); + + Object.keys(scorableItem).forEach(key => { + rankingVector[key] = scorableItem[key]; + }); + + rankingVector = this.recipeExecutor.executeRecipe( + rankingVector, + this.interestConfig.item_ranker + ); + + if (rankingVector === null) { + return null; + } + + return { scorableItem, rankingVector }; + } +}; -- cgit v1.2.3