1 files changed, 65 insertions, 0 deletions
diff --git a/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.jsm b/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.jsm
new file mode 100644
index 0000000000..639c92b6e4
--- /dev/null
+++ b/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.jsm
@@ -0,0 +1,65 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+// We load this into a worker using importScripts, and in tests using import.
+// We use var to avoid name collision errors.
+// eslint-disable-next-line no-var
+var EXPORTED_SYMBOLS = ["NmfTextTagger"];
+
+const NmfTextTagger = class NmfTextTagger {
+  constructor(model, toksToTfIdfVector) {
+    this.model = model;
+    this.toksToTfIdfVector = toksToTfIdfVector;
+  }
+
+  /**
+   * A multiclass classifier that scores tokenized text against several classes through
+   * inference of a nonnegative matrix factorization of TF-IDF vectors and
+   * class labels. Returns a map of class labels as string keys to scores.
+   * (Higher is more confident.) All classes get scored, so it is up to
+   * consumer of this data determine what classes are most valuable.
+   */
+  tagTokens(tokens) {
+    let fv = this.toksToTfIdfVector(tokens, this.model.vocab_idfs);
+    let fve = Object.values(fv);
+
+    // normalize by the sum of the vector
+    let sum = 0.0;
+    for (let pair of fve) {
+      // eslint-disable-next-line prefer-destructuring
+      sum += pair[1];
+    }
+    for (let i = 0; i < fve.length; i++) {
+      // eslint-disable-next-line prefer-destructuring
+      fve[i][1] /= sum;
+    }
+
+    // dot the document with each topic vector so that we can transform it into
+    // the latent space
+    let toksInLatentSpace = [];
+    for (let topicVect of this.model.topic_word) {
+      let fvDotTwv = 0;
+      // dot fv with each topic word vector
+      for (let pair of fve) {
+        let [termId, tfidf] = pair;
+        fvDotTwv += tfidf * topicVect[termId];
+      }
+      toksInLatentSpace.push(fvDotTwv);
+    }
+
+    // now project toksInLatentSpace back into class space
+    let predictions = {};
+    Object.keys(this.model.document_topic).forEach(topic => {
+      let score = 0;
+      for (let i = 0; i < toksInLatentSpace.length; i++) {
+        score += toksInLatentSpace[i] * this.model.document_topic[topic][i];
+      }
+      predictions[topic] = score;
+    });
+
+    return predictions;
+  }
+};