summaryrefslogtreecommitdiffstats
path: root/browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm
diff options
context:
space:
mode:
Diffstat (limited to 'browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm')
-rw-r--r--browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm67
1 files changed, 67 insertions, 0 deletions
diff --git a/browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm b/browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm
new file mode 100644
index 0000000000..cc625076ba
--- /dev/null
+++ b/browser/components/newtab/lib/PersonalityProvider/NaiveBayesTextTagger.jsm
@@ -0,0 +1,67 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+// We load this into a worker using importScripts, and in tests using import.
+// We use var to avoid name collision errors.
+// eslint-disable-next-line no-var
+var EXPORTED_SYMBOLS = ["NaiveBayesTextTagger"];
+
+const NaiveBayesTextTagger = class NaiveBayesTextTagger {
+ constructor(model, toksToTfIdfVector) {
+ this.model = model;
+ this.toksToTfIdfVector = toksToTfIdfVector;
+ }
+
+ /**
+ * Determines if the tokenized text belongs to class according to binary naive Bayes
+ * classifier. Returns an object containing the class label ("label"), and
+ * the log probability ("logProb") that the text belongs to that class. If
+ * the positive class is more likely, then "label" is the positive class
+ * label. If the negative class is matched, then "label" is set to null.
+ */
+ tagTokens(tokens) {
+ let fv = this.toksToTfIdfVector(tokens, this.model.vocab_idfs);
+
+ let bestLogProb = null;
+ let bestClassId = -1;
+ let bestClassLabel = null;
+ let logSumExp = 0.0; // will be P(x). Used to create a proper probability
+ for (let classId = 0; classId < this.model.classes.length; classId++) {
+ let classModel = this.model.classes[classId];
+ let classLogProb = classModel.log_prior;
+
+ // dot fv with the class model
+ for (let pair of Object.values(fv)) {
+ let [termId, tfidf] = pair;
+ classLogProb += tfidf * classModel.feature_log_probs[termId];
+ }
+
+ if (bestLogProb === null || classLogProb > bestLogProb) {
+ bestLogProb = classLogProb;
+ bestClassId = classId;
+ }
+ logSumExp += Math.exp(classLogProb);
+ }
+
+ // now normalize the probability by dividing by P(x)
+ logSumExp = Math.log(logSumExp);
+ bestLogProb -= logSumExp;
+ if (bestClassId === this.model.positive_class_id) {
+ bestClassLabel = this.model.positive_class_label;
+ } else {
+ bestClassLabel = null;
+ }
+
+ let confident =
+ bestClassId === this.model.positive_class_id &&
+ bestLogProb > this.model.positive_class_threshold_log_prob;
+ return {
+ label: bestClassLabel,
+ logProb: bestLogProb,
+ confident,
+ };
+ }
+};