summaryrefslogtreecommitdiffstats
path: root/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.mjs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.mjs
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.mjs')
-rw-r--r--browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.mjs58
1 files changed, 58 insertions, 0 deletions
diff --git a/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.mjs b/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.mjs
new file mode 100644
index 0000000000..5c77152d8d
--- /dev/null
+++ b/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.mjs
@@ -0,0 +1,58 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+export class NmfTextTagger {
+ constructor(model, toksToTfIdfVector) {
+ this.model = model;
+ this.toksToTfIdfVector = toksToTfIdfVector;
+ }
+
+ /**
+ * A multiclass classifier that scores tokenized text against several classes through
+ * inference of a nonnegative matrix factorization of TF-IDF vectors and
+ * class labels. Returns a map of class labels as string keys to scores.
+ * (Higher is more confident.) All classes get scored, so it is up to
+ * consumer of this data determine what classes are most valuable.
+ */
+ tagTokens(tokens) {
+ let fv = this.toksToTfIdfVector(tokens, this.model.vocab_idfs);
+ let fve = Object.values(fv);
+
+ // normalize by the sum of the vector
+ let sum = 0.0;
+ for (let pair of fve) {
+ // eslint-disable-next-line prefer-destructuring
+ sum += pair[1];
+ }
+ for (let i = 0; i < fve.length; i++) {
+ // eslint-disable-next-line prefer-destructuring
+ fve[i][1] /= sum;
+ }
+
+ // dot the document with each topic vector so that we can transform it into
+ // the latent space
+ let toksInLatentSpace = [];
+ for (let topicVect of this.model.topic_word) {
+ let fvDotTwv = 0;
+ // dot fv with each topic word vector
+ for (let pair of fve) {
+ let [termId, tfidf] = pair;
+ fvDotTwv += tfidf * topicVect[termId];
+ }
+ toksInLatentSpace.push(fvDotTwv);
+ }
+
+ // now project toksInLatentSpace back into class space
+ let predictions = {};
+ Object.keys(this.model.document_topic).forEach(topic => {
+ let score = 0;
+ for (let i = 0; i < toksInLatentSpace.length; i++) {
+ score += toksInLatentSpace[i] * this.model.document_topic[topic][i];
+ }
+ predictions[topic] = score;
+ });
+
+ return predictions;
+ }
+}