From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- .../lib/PersonalityProvider/NmfTextTagger.jsm | 65 ++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.jsm (limited to 'browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.jsm') diff --git a/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.jsm b/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.jsm new file mode 100644 index 0000000000..639c92b6e4 --- /dev/null +++ b/browser/components/newtab/lib/PersonalityProvider/NmfTextTagger.jsm @@ -0,0 +1,65 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +// We load this into a worker using importScripts, and in tests using import. +// We use var to avoid name collision errors. +// eslint-disable-next-line no-var +var EXPORTED_SYMBOLS = ["NmfTextTagger"]; + +const NmfTextTagger = class NmfTextTagger { + constructor(model, toksToTfIdfVector) { + this.model = model; + this.toksToTfIdfVector = toksToTfIdfVector; + } + + /** + * A multiclass classifier that scores tokenized text against several classes through + * inference of a nonnegative matrix factorization of TF-IDF vectors and + * class labels. Returns a map of class labels as string keys to scores. + * (Higher is more confident.) All classes get scored, so it is up to + * consumer of this data determine what classes are most valuable. + */ + tagTokens(tokens) { + let fv = this.toksToTfIdfVector(tokens, this.model.vocab_idfs); + let fve = Object.values(fv); + + // normalize by the sum of the vector + let sum = 0.0; + for (let pair of fve) { + // eslint-disable-next-line prefer-destructuring + sum += pair[1]; + } + for (let i = 0; i < fve.length; i++) { + // eslint-disable-next-line prefer-destructuring + fve[i][1] /= sum; + } + + // dot the document with each topic vector so that we can transform it into + // the latent space + let toksInLatentSpace = []; + for (let topicVect of this.model.topic_word) { + let fvDotTwv = 0; + // dot fv with each topic word vector + for (let pair of fve) { + let [termId, tfidf] = pair; + fvDotTwv += tfidf * topicVect[termId]; + } + toksInLatentSpace.push(fvDotTwv); + } + + // now project toksInLatentSpace back into class space + let predictions = {}; + Object.keys(this.model.document_topic).forEach(topic => { + let score = 0; + for (let i = 0; i < toksInLatentSpace.length; i++) { + score += toksInLatentSpace[i] * this.model.document_topic[topic][i]; + } + predictions[topic] = score; + }); + + return predictions; + } +}; -- cgit v1.2.3