summaryrefslogtreecommitdiffstats
path: root/browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm
diff options
context:
space:
mode:
Diffstat (limited to 'browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm')
-rw-r--r--browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm311
1 files changed, 311 insertions, 0 deletions
diff --git a/browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm b/browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm
new file mode 100644
index 0000000000..e761f827d2
--- /dev/null
+++ b/browser/components/newtab/lib/PersonalityProvider/PersonalityProviderWorkerClass.jsm
@@ -0,0 +1,311 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+// PersonalityProviderWorker.js imports the following scripts before this.
+/* import-globals-from Tokenize.jsm */
+/* import-globals-from NaiveBayesTextTagger.jsm */
+/* import-globals-from NmfTextTagger.jsm */
+/* import-globals-from RecipeExecutor.jsm */
+
+// We load this into a worker using importScripts, and in tests using import.
+// We use var to avoid name collision errors.
+// eslint-disable-next-line no-var
+var EXPORTED_SYMBOLS = ["PersonalityProviderWorker"];
+
+// A helper function to create a hash out of a file.
+async function _getFileHash(filepath) {
+ const data = await IOUtils.read(filepath);
+ // File is an instance of Uint8Array
+ const digest = await crypto.subtle.digest("SHA-256", data);
+ const uint8 = new Uint8Array(digest);
+ // return the two-digit hexadecimal code for a byte
+ const toHex = b => b.toString(16).padStart(2, "0");
+ return Array.from(uint8, toHex).join("");
+}
+
+/**
+ * V2 provider builds and ranks an interest profile (also called an “interest vector”) off the browse history.
+ * This allows Firefox to classify pages into topics, by examining the text found on the page.
+ * It does this by looking at the history text content, title, and description.
+ */
+const PersonalityProviderWorker = class PersonalityProviderWorker {
+ async getPersonalityProviderDir() {
+ const personalityProviderDir = PathUtils.join(
+ await PathUtils.getLocalProfileDir(),
+ "personality-provider"
+ );
+
+ // Cache this so we don't need to await again.
+ this.getPersonalityProviderDir = () =>
+ Promise.resolve(personalityProviderDir);
+ return personalityProviderDir;
+ }
+
+ setBaseAttachmentsURL(url) {
+ this.baseAttachmentsURL = url;
+ }
+
+ setInterestConfig(interestConfig) {
+ this.interestConfig = interestConfig;
+ }
+
+ setInterestVector(interestVector) {
+ this.interestVector = interestVector;
+ }
+
+ onSync(event) {
+ const {
+ data: { created, updated, deleted },
+ } = event;
+ // Remove every removed attachment.
+ const toRemove = deleted.concat(updated.map(u => u.old));
+ toRemove.forEach(record => this.deleteAttachment(record));
+
+ // Download every new/updated attachment.
+ const toDownload = created.concat(updated.map(u => u.new));
+ // maybeDownloadAttachment is async but we don't care inside onSync.
+ toDownload.forEach(record => this.maybeDownloadAttachment(record));
+ }
+
+ /**
+ * Attempts to download the attachment, but only if it doesn't already exist.
+ */
+ async maybeDownloadAttachment(record, retries = 3) {
+ const {
+ attachment: { filename, hash, size },
+ } = record;
+ await IOUtils.makeDirectory(await this.getPersonalityProviderDir());
+ const localFilePath = PathUtils.join(
+ await this.getPersonalityProviderDir(),
+ filename
+ );
+
+ let retry = 0;
+ while (
+ retry++ < retries &&
+ // exists is an issue for perf because I might not need to call it.
+ (!(await IOUtils.exists(localFilePath)) ||
+ (await IOUtils.stat(localFilePath)).size !== size ||
+ (await _getFileHash(localFilePath)) !== hash)
+ ) {
+ await this._downloadAttachment(record);
+ }
+ }
+
+ /**
+ * Downloads the attachment to disk assuming the dir already exists
+ * and any existing files matching the filename are clobbered.
+ */
+ async _downloadAttachment(record) {
+ const {
+ attachment: { location, filename },
+ } = record;
+ const remoteFilePath = this.baseAttachmentsURL + location;
+ const localFilePath = PathUtils.join(
+ await this.getPersonalityProviderDir(),
+ filename
+ );
+
+ const xhr = new XMLHttpRequest();
+ // Set false here for a synchronous request, because we're in a worker.
+ xhr.open("GET", remoteFilePath, false);
+ xhr.setRequestHeader("Accept-Encoding", "gzip");
+ xhr.responseType = "arraybuffer";
+ xhr.withCredentials = false;
+ xhr.send(null);
+
+ if (xhr.status !== 200) {
+ console.error(`Failed to fetch ${remoteFilePath}: ${xhr.statusText}`);
+ return;
+ }
+
+ const buffer = xhr.response;
+ const bytes = new Uint8Array(buffer);
+
+ await IOUtils.write(localFilePath, bytes, {
+ tmpPath: `${localFilePath}.tmp`,
+ });
+ }
+
+ async deleteAttachment(record) {
+ const {
+ attachment: { filename },
+ } = record;
+ await IOUtils.makeDirectory(await this.getPersonalityProviderDir());
+ const path = PathUtils.join(
+ await this.getPersonalityProviderDir(),
+ filename
+ );
+
+ await IOUtils.remove(path, { ignoreAbsent: true });
+ // Cleanup the directory if it is empty, do nothing if it is not empty.
+ try {
+ await IOUtils.remove(await this.getPersonalityProviderDir(), {
+ ignoreAbsent: true,
+ });
+ } catch (e) {
+ // This is likely because the directory is not empty, so we don't care.
+ }
+ }
+
+ /**
+ * Gets contents of the attachment if it already exists on file,
+ * and if not attempts to download it.
+ */
+ async getAttachment(record) {
+ const {
+ attachment: { filename },
+ } = record;
+ const filepath = PathUtils.join(
+ await this.getPersonalityProviderDir(),
+ filename
+ );
+
+ try {
+ await this.maybeDownloadAttachment(record);
+ return await IOUtils.readJSON(filepath);
+ } catch (error) {
+ console.error(`Failed to load ${filepath}: ${error.message}`);
+ }
+ return {};
+ }
+
+ async fetchModels(models) {
+ this.models = await Promise.all(
+ models.map(async record => ({
+ ...(await this.getAttachment(record)),
+ recordKey: record.key,
+ }))
+ );
+ if (!this.models.length) {
+ return {
+ ok: false,
+ };
+ }
+ return {
+ ok: true,
+ };
+ }
+
+ generateTaggers(modelKeys) {
+ if (!this.taggers) {
+ let nbTaggers = [];
+ let nmfTaggers = {};
+
+ for (let model of this.models) {
+ if (!modelKeys.includes(model.recordKey)) {
+ continue;
+ }
+ if (model.model_type === "nb") {
+ nbTaggers.push(new NaiveBayesTextTagger(model, toksToTfIdfVector));
+ } else if (model.model_type === "nmf") {
+ nmfTaggers[model.parent_tag] = new NmfTextTagger(
+ model,
+ toksToTfIdfVector
+ );
+ }
+ }
+ this.taggers = { nbTaggers, nmfTaggers };
+ }
+ }
+
+ /**
+ * Sets and generates a Recipe Executor.
+ * A Recipe Executor is a set of actions that can be consumed by a Recipe.
+ * The Recipe determines the order and specifics of which the actions are called.
+ */
+ generateRecipeExecutor() {
+ const recipeExecutor = new RecipeExecutor(
+ this.taggers.nbTaggers,
+ this.taggers.nmfTaggers,
+ tokenize
+ );
+ this.recipeExecutor = recipeExecutor;
+ }
+
+ /**
+ * Examines the user's browse history and returns an interest vector that
+ * describes the topics the user frequently browses.
+ */
+ createInterestVector(history) {
+ let interestVector = {};
+
+ for (let historyRec of history) {
+ let ivItem = this.recipeExecutor.executeRecipe(
+ historyRec,
+ this.interestConfig.history_item_builder
+ );
+ if (ivItem === null) {
+ continue;
+ }
+ interestVector = this.recipeExecutor.executeCombinerRecipe(
+ interestVector,
+ ivItem,
+ this.interestConfig.interest_combiner
+ );
+ if (interestVector === null) {
+ return null;
+ }
+ }
+
+ const finalResult = this.recipeExecutor.executeRecipe(
+ interestVector,
+ this.interestConfig.interest_finalizer
+ );
+
+ return {
+ ok: true,
+ interestVector: finalResult,
+ };
+ }
+
+ /**
+ * Calculates a score of a Pocket item when compared to the user's interest
+ * vector. Returns the score. Higher scores are better. Assumes this.interestVector
+ * is populated.
+ */
+ calculateItemRelevanceScore(pocketItem) {
+ const { personalization_models } = pocketItem;
+ let scorableItem;
+
+ // If the server provides some models, we can just use them,
+ // and skip generating them.
+ if (personalization_models && Object.keys(personalization_models).length) {
+ scorableItem = {
+ id: pocketItem.id,
+ item_tags: personalization_models,
+ item_score: pocketItem.item_score,
+ item_sort_id: 1,
+ };
+ } else {
+ scorableItem = this.recipeExecutor.executeRecipe(
+ pocketItem,
+ this.interestConfig.item_to_rank_builder
+ );
+ if (scorableItem === null) {
+ return null;
+ }
+ }
+
+ // We're doing a deep copy on an object.
+ let rankingVector = JSON.parse(JSON.stringify(this.interestVector));
+
+ Object.keys(scorableItem).forEach(key => {
+ rankingVector[key] = scorableItem[key];
+ });
+
+ rankingVector = this.recipeExecutor.executeRecipe(
+ rankingVector,
+ this.interestConfig.item_ranker
+ );
+
+ if (rankingVector === null) {
+ return null;
+ }
+
+ return { scorableItem, rankingVector };
+ }
+};