From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- .../PersonalityProvider/NaiveBayesTextTagger.jsm | 67 ++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm (limited to 'browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm') diff --git a/browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm b/browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm new file mode 100644 index 0000000000..cc625076ba --- /dev/null +++ b/browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm @@ -0,0 +1,67 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +"use strict"; + +// We load this into a worker using importScripts, and in tests using import. +// We use var to avoid name collision errors. +// eslint-disable-next-line no-var +var EXPORTED_SYMBOLS = ["NaiveBayesTextTagger"]; + +const NaiveBayesTextTagger = class NaiveBayesTextTagger { + constructor(model, toksToTfIdfVector) { + this.model = model; + this.toksToTfIdfVector = toksToTfIdfVector; + } + + /** + * Determines if the tokenized text belongs to class according to binary naive Bayes + * classifier. Returns an object containing the class label ("label"), and + * the log probability ("logProb") that the text belongs to that class. If + * the positive class is more likely, then "label" is the positive class + * label. If the negative class is matched, then "label" is set to null. + */ + tagTokens(tokens) { + let fv = this.toksToTfIdfVector(tokens, this.model.vocab_idfs); + + let bestLogProb = null; + let bestClassId = -1; + let bestClassLabel = null; + let logSumExp = 0.0; // will be P(x). Used to create a proper probability + for (let classId = 0; classId < this.model.classes.length; classId++) { + let classModel = this.model.classes[classId]; + let classLogProb = classModel.log_prior; + + // dot fv with the class model + for (let pair of Object.values(fv)) { + let [termId, tfidf] = pair; + classLogProb += tfidf * classModel.feature_log_probs[termId]; + } + + if (bestLogProb === null || classLogProb > bestLogProb) { + bestLogProb = classLogProb; + bestClassId = classId; + } + logSumExp += Math.exp(classLogProb); + } + + // now normalize the probability by dividing by P(x) + logSumExp = Math.log(logSumExp); + bestLogProb -= logSumExp; + if (bestClassId === this.model.positive_class_id) { + bestClassLabel = this.model.positive_class_label; + } else { + bestClassLabel = null; + } + + let confident = + bestClassId === this.model.positive_class_id && + bestLogProb > this.model.positive_class_threshold_log_prob; + return { + label: bestClassLabel, + logProb: bestLogProb, + confident, + }; + } +}; -- cgit v1.2.3