Adding upstream version 1:115.7.0.upstream/1%115.7.0 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 17:32:43 +0000
commit: 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree: a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/extensions/bayesian-spam-filter/test
parent: Initial commit. (diff)
download: thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz
thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip
21 files changed, 1734 insertions, 0 deletions
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js
new file mode 100644
index 0000000000..b502dcc2e5
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js
@@ -0,0 +1,28 @@
+var { MailServices } = ChromeUtils.import(
+  "resource:///modules/MailServices.jsm"
+);
+var { XPCOMUtils } = ChromeUtils.importESModule(
+  "resource://gre/modules/XPCOMUtils.sys.mjs"
+);
+var { mailTestUtils } = ChromeUtils.import(
+  "resource://testing-common/mailnews/MailTestUtils.jsm"
+);
+var { localAccountUtils } = ChromeUtils.import(
+  "resource://testing-common/mailnews/LocalAccountUtils.jsm"
+);
+
+var CC = Components.Constructor;
+
+// Ensure the profile directory is set up
+do_get_profile();
+
+function getSpec(aFileName) {
+  var file = do_get_file("resources/" + aFileName);
+  var uri = Services.io.newFileURI(file).QueryInterface(Ci.nsIURL);
+  uri = uri.mutate().setQuery("type=application/x-message-display").finalize();
+  return uri.spec;
+}
+
+registerCleanupFunction(function () {
+  load("../../../../resources/mailShutdown.js");
+});
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat
new file mode 100644
index 0000000000..31162459e4
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml
new file mode 100644
index 0000000000..4720467fe6
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml
@@ -0,0 +1,6 @@
+From - Sat Jan 26 08:43:42 2008
+Subject: test1
+Content-Type: text/plain; charset=iso-8859-1
+
+important
+
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml
new file mode 100644
index 0000000000..9a251486a9
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml
@@ -0,0 +1,6 @@
+From - Sat Jan 26 08:43:42 2008
+Subject: test2
+Content-Type: text/plain; charset=iso-8859-1
+
+work
+
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml
new file mode 100644
index 0000000000..de31992ac5
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml
@@ -0,0 +1,6 @@
+From - Sat Jan 26 08:43:42 2008
+Subject: test3
+Content-Type: text/plain; charset=iso-8859-1
+
+very important work
+
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml
new file mode 100644
index 0000000000..6a63f587b8
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml
@@ -0,0 +1,7 @@
+Date: Tue, 30 Apr 2008 00:12:17 -0700
+From: Mom <mother@example.com>
+To: Careful Reader <reader@example.org>
+Subject: eat your vegetables
+MIME-Version: 1.0
+
+vegetables are very important for your health and wealth.
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml
new file mode 100644
index 0000000000..cd6691b921
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml
@@ -0,0 +1,8 @@
+Date: Tue, 27 Apr 2006 00:13:23 -0700
+From: Evil Despot <boss@example.com>
+To: Careful Reader <reader@example.org>
+Subject: finish your report
+MIME-Version: 1.0
+
+If you want to keep your sorry job and health, finish that
+important report before the close of business today.
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat
new file mode 100644
index 0000000000..f273a4f10c
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml
new file mode 100644
index 0000000000..ea629213cc
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml
@@ -0,0 +1,7 @@
+Date: Tue, 29 Apr 2008 00:10:07 -0700
+From: Spam King <spammer@example.com>
+To: Careful Reader <reader@example.org>
+Subject: viagra is your nigerian xxx dream
+MIME-Version: 1.0
+
+click here to make lots of money and wealth
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml
new file mode 100644
index 0000000000..817d328cf2
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml
@@ -0,0 +1,8 @@
+Date: Mon, 27 Apr 2008 01:02:03 -0700
+From: Stock Pusher <broker@example.net>
+To: Careful Reader <reader@example.org>
+Subject: ABCD Corporation will soar tomorrow!
+MIME-Version: 1.0
+
+Make lots of money! Put all of your money into ACBD Corporation
+Stock!
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml
new file mode 100644
index 0000000000..0a524e604b
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml
@@ -0,0 +1,7 @@
+Date: Wed, 30 Apr 2008 01:11:17 -0700
+From: Spam King <spammer@example.com>
+To: Careful Reader <reader@example.org>
+Subject: we have your nigerian xxx dream
+MIME-Version: 1.0
+
+Not making lots of money and wealth? Call me!
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml
new file mode 100644
index 0000000000..775d3b41fa
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml
@@ -0,0 +1,8 @@
+Date: Tue, 28 Apr 2008 01:02:04 -0700
+From: Stock Pusher <broker@example.net>
+To: Careful Reader <reader@example.org>
+Subject: ABCD Corporation will really soar this time!
+MIME-Version: 1.0
+
+Make lots of money! Put all of your money into ABCD Corporation
+Stock! (We really mean it this time!)
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml
new file mode 100644
index 0000000000..d6e7e0ae3d
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml
@@ -0,0 +1,14 @@
+Date: Tue, 30 Apr 2008 00:12:17 -0700
+From: Mom <mother@example.com>
+To: Careful Reader <reader@example.org>
+Subject: eat your vegetables to live long
+Received: from c-1-2-3-4.hsd1.wa.example.net ([1.2.3.4] helo=theComputer)
+	by host301.example.com with esmtpa (Exim 4.69)
+	(envelope-from <someone@example.com>)
+	id 1LeEgH-0003GN-Rr
+	for reader@example.org; Mon, 02 Mar 2009 13:24:06 -0700
+MIME-Version: 1.0
+Message-Id: 14159
+Sender: Bugzilla Test Setup <noreply@example.org>
+
+This is a sentence. Important URL is http://www.example.org Check it out!
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js
new file mode 100644
index 0000000000..b6d37e879b
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js
@@ -0,0 +1,108 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// service class to manipulate the junk training.dat file
+//  code is adapted from Mnehy Thunderbird Extension
+
+/* exported TrainingData */
+function TrainingData() {
+  // local constants
+
+  const CC = Components.Constructor;
+
+  // public methods
+
+  this.read = read;
+
+  // public variables
+
+  this.mGoodTokens = 0;
+  this.mJunkTokens = 0;
+  this.mGoodMessages = 0;
+  this.mJunkMessages = 0;
+  this.mGoodCounts = {};
+  this.mJunkCounts = {};
+
+  // helper functions
+
+  function getJunkStatFile() {
+    var sBaseDir = Services.dirsvc.get("ProfD", Ci.nsIFile);
+    var CFileByFile = new CC(
+      "@mozilla.org/file/local;1",
+      "nsIFile",
+      "initWithFile"
+    );
+    var oFile = new CFileByFile(sBaseDir);
+    oFile.append("training.dat");
+    return oFile;
+  }
+
+  function getBinStream(oFile) {
+    if (oFile && oFile.exists()) {
+      var oUri = Services.io.newFileURI(oFile);
+      // open stream (channel)
+      let channel = Services.io.newChannelFromURI(
+        oUri,
+        null,
+        Services.scriptSecurityManager.getSystemPrincipal(),
+        null,
+        Ci.nsILoadInfo.SEC_ALLOW_CROSS_ORIGIN_SEC_CONTEXT_IS_NULL,
+        Ci.nsIContentPolicy.TYPE_OTHER
+      );
+      var oStream = channel.open();
+      // buffer it
+      var oBufStream = Cc[
+        "@mozilla.org/network/buffered-input-stream;1"
+      ].createInstance(Ci.nsIBufferedInputStream);
+      oBufStream.init(oStream, oFile.fileSize);
+      // read as binary
+      var oBinStream = Cc["@mozilla.org/binaryinputstream;1"].createInstance(
+        Ci.nsIBinaryInputStream
+      );
+      oBinStream.setInputStream(oBufStream);
+      // return it
+      return oBinStream;
+    }
+    return null;
+  }
+
+  // method specifications
+
+  function read() {
+    var file = getJunkStatFile();
+
+    // does the file exist?
+    Assert.ok(file.exists());
+
+    var fileStream = getBinStream(file);
+
+    // check magic number
+    var iMagicNumber = fileStream.read32();
+    Assert.equal(iMagicNumber, 0xfeedface);
+
+    // get ham'n'spam numbers
+    this.mGoodMessages = fileStream.read32();
+    this.mJunkMessages = fileStream.read32();
+
+    // Read good tokens
+    this.mGoodTokens = fileStream.read32();
+    var iRefCount, iTokenLen, sToken;
+    for (let i = 0; i < this.mGoodTokens; ++i) {
+      iRefCount = fileStream.read32();
+      iTokenLen = fileStream.read32();
+      sToken = fileStream.readBytes(iTokenLen);
+      this.mGoodCounts[sToken] = iRefCount;
+    }
+
+    // we have no further good tokens, so read junk tokens
+    this.mJunkTokens = fileStream.read32();
+    for (let i = 0; i < this.mJunkTokens; i++) {
+      // read token data
+      iRefCount = fileStream.read32();
+      iTokenLen = fileStream.read32();
+      sToken = fileStream.readBytes(iTokenLen);
+      this.mJunkCounts[sToken] = iRefCount;
+    }
+  }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js
new file mode 100644
index 0000000000..40180006d7
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js
@@ -0,0 +1,136 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// tests reduction in size of training.dat
+
+// main setup
+
+/* import-globals-from resources/trainingfile.js */
+load("resources/trainingfile.js");
+
+var { MailServices } = ChromeUtils.import(
+  "resource:///modules/MailServices.jsm"
+);
+
+// before shrink, the trained messages have 76 tokens. Force shrink.
+Services.prefs.setIntPref("mailnews.bayesian_spam_filter.junk_maxtokens", 75);
+
+// local constants
+var kUnclassified = MailServices.junk.UNCLASSIFIED;
+var kJunk = MailServices.junk.JUNK;
+var kGood = MailServices.junk.GOOD;
+
+var emails = [
+  "ham1.eml",
+  "ham2.eml",
+  "spam1.eml",
+  "spam2.eml",
+  "spam3.eml",
+  "spam4.eml",
+];
+var classifications = [kGood, kGood, kJunk, kJunk, kJunk, kJunk];
+var trainingData;
+
+// main test
+function run_test() {
+  localAccountUtils.loadLocalMailAccount();
+  MailServices.junk.resetTrainingData();
+
+  do_test_pending();
+
+  var email = emails.shift();
+  var classification = classifications.shift();
+  // additional calls to setMessageClassifiaction are done in the callback
+  MailServices.junk.setMessageClassification(
+    getSpec(email),
+    kUnclassified,
+    classification,
+    null,
+    doTestingListener
+  );
+}
+
+var doTestingListener = {
+  onMessageClassified(aMsgURI, aClassification, aJunkPercent) {
+    if (!aMsgURI) {
+      // Ignore end-of-batch signal.
+      return;
+    }
+    var email = emails.shift();
+    var classification = classifications.shift();
+    if (email) {
+      MailServices.junk.setMessageClassification(
+        getSpec(email),
+        kUnclassified,
+        classification,
+        null,
+        doTestingListener
+      );
+      return;
+    }
+
+    // all done classifying, time to test
+    MailServices.junk.shutdown(); // just flushes training.dat
+    trainingData = new TrainingData();
+    trainingData.read();
+
+    /*
+    // List training.dat information for debug
+    dump("training.data results: goodMessages=" + trainingData.mGoodMessages
+      + " junkMessages = " + trainingData.mJunkMessages
+      + " goodTokens = " + trainingData.mGoodTokens
+      + " junkTokens = " + trainingData.mJunkTokens
+      + "\n");
+    print("Good counts");
+    for (var token in trainingData.mGoodCounts)
+      dump("count: " + trainingData.mGoodCounts[token] + " token: " + token + "\n");
+    print("Junk Counts");
+    for (var token in trainingData.mJunkCounts)
+      dump("count: " + trainingData.mJunkCounts[token] + " token: " + token + "\n");
+    */
+
+    /* Selected pre-shrink counts after training
+    training.data results: goodMessages=2 junkMessages = 4 tokens = 78
+    Good counts
+    count: 1 token: subject:report
+    count: 2 token: important
+    count: 2 token: to:careful reader <reader@example.org>
+
+    Junk Counts
+    count: 3 token: make
+    count: 4 token: money
+    count: 4 token: to:careful reader <reader@example.org>
+    count: 2 token: money!
+    */
+
+    // Shrinking divides all counts by two. In comments, I show the
+    // calculation for each test, (pre-shrink count)/2.
+
+    Assert.equal(trainingData.mGoodMessages, 1); //  2/2
+    Assert.equal(trainingData.mJunkMessages, 2); //  4/2
+    checkToken("money", 0, 2); // (0/2, 4/2)
+    checkToken("subject:report", 0, 0); // (1/2, 0/2)
+    checkToken("to:careful reader <reader@example.org>", 1, 2); // (2/2, 4/2)
+    checkToken("make", 0, 1); // (0/2, 3/2)
+    checkToken("important", 1, 0); // (2/2, 0/2)
+
+    do_test_finished();
+  },
+};
+
+// helper functions
+
+function checkToken(aToken, aGoodCount, aJunkCount) {
+  print(" checking " + aToken);
+  var goodCount = trainingData.mGoodCounts[aToken];
+  var junkCount = trainingData.mJunkCounts[aToken];
+  if (!goodCount) {
+    goodCount = 0;
+  }
+  if (!junkCount) {
+    junkCount = 0;
+  }
+  Assert.equal(goodCount, aGoodCount);
+  Assert.equal(junkCount, aJunkCount);
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js
new file mode 100644
index 0000000000..222a9557d8
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js
@@ -0,0 +1,197 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Tests use of custom tokenization, originally introduced in bug 476389
+
+var { MailServices } = ChromeUtils.import(
+  "resource:///modules/MailServices.jsm"
+);
+
+// command functions for test data
+var kTrain = 0; // train a file
+var kTest = 1; // test headers returned from detail
+var kSetup = 2; // run a setup function
+
+// trait ids
+var kProArray = [3];
+var kAntiArray = [4];
+
+var gTest; // currently active test
+
+// The tests array defines the tests to attempt.
+
+var tests = [
+  // test a few tokens using defaults
+  {
+    command: kTrain,
+    fileName: "tokenTest.eml",
+  },
+  {
+    command: kTest,
+    fileName: "tokenTest.eml",
+    tokens: ["important", "subject:eat", "message-id:14159", "http://www"],
+    nottokens: ["idonotexist", "subject:to"],
+  },
+
+  // enable received, disable message-id
+  // switch tokenization of body to catch full urls (no "." delimiter)
+  // enable sender, keeping full value
+  {
+    command: kSetup,
+    operation() {
+      Services.prefs.setCharPref(
+        "mailnews.bayesian_spam_filter.tokenizeheader.received",
+        "standard"
+      );
+      Services.prefs.setCharPref(
+        "mailnews.bayesian_spam_filter.tokenizeheader.message-id",
+        "false"
+      );
+      Services.prefs.setCharPref(
+        "mailnews.bayesian_spam_filter.body_delimiters",
+        " \t\r\n\v"
+      );
+      Services.prefs.setCharPref(
+        "mailnews.bayesian_spam_filter.tokenizeheader.sender",
+        "full"
+      );
+    },
+  },
+  {
+    command: kTrain,
+    fileName: "tokenTest.eml",
+  },
+  {
+    command: kTest,
+    fileName: "tokenTest.eml",
+    tokens: [
+      "important",
+      "subject:eat",
+      "received:reader@example",
+      "skip:h 20",
+      "sender:bugzilla test setup <noreply@example.org>",
+      "received:<someone@example",
+    ],
+    nottokens: ["message-id:14159", "http://www"],
+  },
+
+  // increase the length of the maximum token to catch full URLs in the body
+  // add <>;, remove . from standard header delimiters to better capture emails
+  // use custom delimiters on sender, without "." or "<>"
+  {
+    command: kSetup,
+    operation() {
+      Services.prefs.setIntPref(
+        "mailnews.bayesian_spam_filter.maxlengthfortoken",
+        50
+      );
+      Services.prefs.setCharPref(
+        "mailnews.bayesian_spam_filter.header_delimiters",
+        " ;<>\t\r\n\v"
+      );
+      Services.prefs.setCharPref(
+        "mailnews.bayesian_spam_filter.tokenizeheader.sender",
+        " \t\r\n\v"
+      );
+    },
+  },
+  {
+    command: kTrain,
+    fileName: "tokenTest.eml",
+  },
+  {
+    command: kTest,
+    fileName: "tokenTest.eml",
+    tokens: [
+      "received:someone@example.com",
+      "http://www.example.org",
+      "received:reader@example.org",
+      "sender:<noreply@example.org>",
+    ],
+    nottokens: ["skip:h 20", "received:<someone@example"],
+  },
+];
+
+// main test
+function run_test() {
+  localAccountUtils.loadLocalMailAccount();
+  do_test_pending();
+
+  startCommand();
+}
+
+var listener = {
+  // nsIMsgTraitClassificationListener implementation
+  onMessageTraitsClassified(aMsgURI, aTraits, aPercents) {
+    startCommand();
+  },
+
+  onMessageTraitDetails(
+    aMsgURI,
+    aProTrait,
+    aTokenString,
+    aTokenPercents,
+    aRunningPercents
+  ) {
+    print("Details for " + aMsgURI);
+    for (let i = 0; i < aTokenString.length; i++) {
+      print("Token " + aTokenString[i]);
+    }
+
+    // we should have these tokens
+    for (let value of gTest.tokens) {
+      print("We should have '" + value + "'? ");
+      Assert.ok(aTokenString.includes(value));
+    }
+
+    // should not have these tokens
+    for (let value of gTest.nottokens) {
+      print("We should not have '" + value + "'? ");
+      Assert.ok(!aTokenString.includes(value));
+    }
+    startCommand();
+  },
+};
+
+// start the next test command
+function startCommand() {
+  if (!tests.length) {
+    // Do we have more commands?
+    // no, all done
+    do_test_finished();
+    return;
+  }
+
+  gTest = tests.shift();
+  // print("StartCommand command = " + gTest.command + ", remaining tests " + tests.length);
+  switch (gTest.command) {
+    case kTrain:
+      // train message
+
+      MailServices.junk.setMsgTraitClassification(
+        getSpec(gTest.fileName), // aMsgURI
+        [], // aOldTraits
+        kProArray, // aNewTraits
+        listener
+      ); // [optional] in nsIMsgTraitClassificationListener aTraitListener
+      // null,      // [optional] in nsIMsgWindow aMsgWindow
+      // null,      // [optional] in nsIJunkMailClassificationListener aJunkListener
+      break;
+
+    case kTest:
+      // test headers from detail message
+      MailServices.junk.detailMessage(
+        getSpec(gTest.fileName), // in string aMsgURI
+        kProArray[0], // proTrait
+        kAntiArray[0], // antiTrait
+        listener
+      ); // in nsIMsgTraitDetailListener aDetailListener
+      break;
+
+    case kSetup:
+      gTest.operation();
+      startCommand();
+      break;
+  }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js
new file mode 100644
index 0000000000..a1800b93e7
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js
@@ -0,0 +1,574 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// tests calls to the bayesian filter plugin to train, classify, and forget
+// messages using both the older junk-oriented calls, as well as the newer
+// trait-oriented calls. Only a single trait is tested. The main intent of
+// these tests is to demonstrate that both the old junk-oriented calls and the
+// new trait-oriented calls give the same results on junk processing.
+
+var { MailServices } = ChromeUtils.import(
+  "resource:///modules/MailServices.jsm"
+);
+
+// local constants
+var kUnclassified = MailServices.junk.UNCLASSIFIED;
+var kJunk = MailServices.junk.JUNK;
+var kGood = MailServices.junk.GOOD;
+var kJunkTrait = MailServices.junk.JUNK_TRAIT;
+var kGoodTrait = MailServices.junk.GOOD_TRAIT;
+var kIsHamScore = MailServices.junk.IS_HAM_SCORE;
+var kIsSpamScore = MailServices.junk.IS_SPAM_SCORE;
+
+// command functions for test data
+var kTrainJ = 0; // train using junk method
+var kTrainT = 1; // train using trait method
+var kClassJ = 2; // classify using junk method
+var kClassT = 3; // classify using trait method
+var kForgetJ = 4; // forget training using junk method
+var kForgetT = 5; // forget training using trait method
+var kCounts = 6; // test token and message counts
+
+var gProArray = [],
+  gAntiArray = []; // traits arrays, pro is junk, anti is good
+var gTest; // currently active test
+
+// The tests array defines the tests to attempt. Format of
+// an element "test" of this array (except for kCounts):
+//
+//   test.command: function to perform, see definitions above
+//   test.fileName: file containing message to test
+//   test.junkPercent: sets the classification (for Class or Forget commands)
+//                     tests the classification (for Class commands)
+//                     As a special case for the no-training tests, if
+//                     junkPercent is negative, test its absolute value
+//                     for percents, but reverse the junk/good classification
+//   test.traitListener: should we use the trait listener call?
+//   test.junkListener: should we use the junk listener call?
+
+var tests = [
+  // test the trait-based calls. We mix trait listeners, junk listeners,
+  // and both
+
+  {
+    // with no training, percents is 50 - but classifies as junk
+    command: kClassT,
+    fileName: "ham1.eml",
+    junkPercent: -50, // negative means classifies as junk
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // train 1 ham message
+    command: kTrainT,
+    fileName: "ham1.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // with ham but no spam training, percents are 0 and classifies as ham
+    command: kClassT,
+    fileName: "ham1.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // train 1 spam message
+    command: kTrainT,
+    fileName: "spam1.eml",
+    junkPercent: 100,
+    traitListener: true,
+    junkListener: false,
+  },
+  {
+    // the trained messages will classify at 0 and 100
+    command: kClassT,
+    fileName: "ham1.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassT,
+    fileName: "spam1.eml",
+    junkPercent: 100,
+    traitListener: true,
+    junkListener: false,
+  },
+  {
+    // ham2, spam2, spam4 give partial percents, but still ham
+    command: kClassT,
+    fileName: "ham2.eml",
+    junkPercent: 8,
+    traitListener: true,
+    junkListener: true,
+  },
+  {
+    command: kClassT,
+    fileName: "spam2.eml",
+    junkPercent: 81,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassT,
+    fileName: "spam4.eml",
+    junkPercent: 81,
+    traitListener: true,
+    junkListener: false,
+  },
+  {
+    // spam3 evaluates to spam
+    command: kClassT,
+    fileName: "spam3.eml",
+    junkPercent: 98,
+    traitListener: true,
+    junkListener: true,
+  },
+  {
+    // train ham2, then test percents of 0 (clearly good)
+    command: kTrainT,
+    fileName: "ham2.eml",
+    junkPercent: 0,
+    traitListener: true,
+    junkListener: true,
+  },
+  {
+    command: kClassT,
+    fileName: "ham2.eml",
+    junkPercent: 0,
+    traitListener: true,
+    junkListener: true,
+  },
+  {
+    // forget ham2, percents should return to partial value
+    command: kForgetT,
+    fileName: "ham2.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassT,
+    fileName: "ham2.eml",
+    junkPercent: 8,
+    traitListener: true,
+    junkListener: true,
+  },
+  {
+    // train, classify, forget, reclassify spam4
+    command: kTrainT,
+    fileName: "spam4.eml",
+    junkPercent: 100,
+    traitListener: true,
+    junkListener: true,
+  },
+  {
+    command: kClassT,
+    fileName: "spam4.eml",
+    junkPercent: 100,
+    traitListener: true,
+    junkListener: true,
+  },
+  {
+    command: kCounts,
+    tokenCount: 66, // count of tokens in the corpus
+    junkCount: 2, // count of junk messages in the corpus
+    goodCount: 1, // count of good messages in the corpus
+  },
+  {
+    command: kForgetT,
+    fileName: "spam4.eml",
+    junkPercent: 100,
+    traitListener: true,
+    junkListener: false,
+  },
+  {
+    command: kClassT,
+    fileName: "spam4.eml",
+    junkPercent: 81,
+    traitListener: true,
+    junkListener: true,
+  },
+  {
+    // forget ham1 and spam1 to empty training
+    command: kForgetT,
+    fileName: "ham1.eml",
+    junkPercent: 0,
+    traitListener: true,
+    junkListener: true,
+  },
+  {
+    command: kForgetT,
+    fileName: "spam1.eml",
+    junkPercent: 100,
+    traitListener: true,
+    junkListener: true,
+  },
+  // repeat the whole sequence using the junk calls
+  {
+    // train 1 ham and 1 spam message
+    command: kTrainJ,
+    fileName: "ham1.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kTrainJ,
+    fileName: "spam1.eml",
+    junkPercent: 100,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // the trained messages will classify at 0 and 100
+    command: kClassJ,
+    fileName: "ham1.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassJ,
+    fileName: "spam1.eml",
+    junkPercent: 100,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // ham2, spam2, spam4 give partial percents, but still ham
+    command: kClassJ,
+    fileName: "ham2.eml",
+    junkPercent: 8,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassJ,
+    fileName: "spam2.eml",
+    junkPercent: 81,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassJ,
+    fileName: "spam4.eml",
+    junkPercent: 81,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // spam3 evaluates to spam
+    command: kClassJ,
+    fileName: "spam3.eml",
+    junkPercent: 98,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // train ham2, then test percents of 0 (clearly good)
+    command: kTrainJ,
+    fileName: "ham2.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassJ,
+    fileName: "ham2.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // forget ham2, percents should return to partial value
+    command: kForgetJ,
+    fileName: "ham2.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassJ,
+    fileName: "ham2.eml",
+    junkPercent: 8,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // train, classify, forget, reclassify spam4
+    command: kTrainJ,
+    fileName: "spam4.eml",
+    junkPercent: 100,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassJ,
+    fileName: "spam4.eml",
+    junkPercent: 100,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kForgetJ,
+    fileName: "spam4.eml",
+    junkPercent: 100,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kClassJ,
+    fileName: "spam4.eml",
+    junkPercent: 81,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    // forget ham1 and spam1 to be empty
+    command: kForgetJ,
+    fileName: "ham1.eml",
+    junkPercent: 0,
+    traitListener: false,
+    junkListener: true,
+  },
+  {
+    command: kForgetJ,
+    fileName: "spam1.eml",
+    junkPercent: 100,
+    traitListener: false,
+    junkListener: true,
+  },
+];
+
+// main test
+function run_test() {
+  localAccountUtils.loadLocalMailAccount();
+  do_test_pending();
+
+  // setup pro/anti arrays as junk/good
+  gProArray.push(kJunkTrait);
+  gAntiArray.push(kGoodTrait);
+
+  startCommand();
+}
+
+var junkListener = {
+  // nsIJunkMailClassificationListener implementation
+  onMessageClassified(aMsgURI, aClassification, aJunkPercent) {
+    if (!aMsgURI) {
+      // Ignore end-of-batch signal.
+      return;
+    }
+    // print("Message URI is " + aMsgURI);
+    // print("Junk percent is " + aJunkPercent);
+    // print("Classification is " + aClassification);
+    var command = gTest.command;
+    var junkPercent = gTest.junkPercent;
+    // file returned correctly
+    Assert.equal(getSpec(gTest.fileName), aMsgURI);
+
+    // checks of aClassification
+
+    // forget returns unclassified
+    if (command == kForgetJ || command == kForgetT) {
+      Assert.equal(aClassification, kUnclassified);
+    } else {
+      // classification or train should return an actual classification
+      // check junk classification set by default cutoff of 90
+      var isGood = Math.abs(junkPercent) < 90;
+      if (junkPercent < 0) {
+        isGood = !isGood;
+      }
+      Assert.equal(aClassification, isGood ? kGood : kJunk);
+    }
+
+    // checks of aJunkPercent
+
+    if (command == kClassJ || command == kClassT) {
+      // classify returns the actual junk percents
+      Assert.equal(Math.abs(junkPercent), aJunkPercent);
+    } else if (command == kTrainJ || command == kTrainT) {
+      // train returns the ham and spam limits
+      Assert.equal(aJunkPercent, junkPercent < 90 ? kIsHamScore : kIsSpamScore);
+    } else {
+      // Forget always returns 0.
+      Assert.equal(aJunkPercent, 0);
+    }
+
+    // if the current test includes a trait listener, it will
+    // run next, so we defer to it for starting the next command
+    if (gTest.traitListener) {
+      return;
+    }
+    startCommand();
+  },
+};
+
+var traitListener = {
+  // nsIMsgTraitClassificationListener implementation
+  onMessageTraitsClassified(aMsgURI, aTraits, aPercents) {
+    if (!aMsgURI) {
+      // Ignore end-of-batch signal.
+      return;
+    }
+    // print("(Trait Listener)Message URI is " + aMsgURI);
+    // print("(Trait Listener)Junk percent is " + aPercents);
+    var command = gTest.command;
+    var junkPercent = gTest.junkPercent;
+    // print("command, junkPercent is " + command + " , " + junkPercent);
+
+    Assert.equal(getSpec(gTest.fileName), aMsgURI);
+
+    // checks of aPercents
+
+    if (command == kForgetJ || command == kForgetT) {
+      // "forgets" with null newClassifications does not return a percent
+      Assert.equal(aPercents.length, 0);
+    } else {
+      var percent = aPercents[0];
+      // print("Percent is " + percent);
+      if (command == kClassJ || command == kClassT) {
+        // Classify returns actual percents
+        Assert.equal(percent, junkPercent);
+      } else {
+        // Train simply returns 100.
+        Assert.equal(percent, 100);
+      }
+    }
+
+    // checks of aTraits
+
+    if (command == kForgetJ || command == kForgetT) {
+      // "forgets" with null newClassifications does not return a
+      // classification
+      Assert.equal(aTraits.length, 0);
+    } else if (command == kClassJ || command == kClassT) {
+      // classification just returns the tested "Pro" trait (junk)
+      let trait = aTraits[0];
+      Assert.equal(trait, kJunkTrait);
+    } else {
+      // training returns the actual trait trained
+      let trait = aTraits[0];
+      Assert.equal(trait, junkPercent < 90 ? kGoodTrait : kJunkTrait);
+    }
+
+    // All done, start the next test
+    startCommand();
+  },
+};
+
+// start the next test command
+function startCommand() {
+  if (!tests.length) {
+    // Do we have more commands?
+    // no, all done
+    do_test_finished();
+    return;
+  }
+
+  gTest = tests.shift();
+  print(
+    "StartCommand command = " +
+      gTest.command +
+      ", remaining tests " +
+      tests.length
+  );
+  var command = gTest.command;
+  var junkPercent = gTest.junkPercent;
+  var fileName = gTest.fileName;
+  var tListener = gTest.traitListener;
+  var jListener = gTest.junkListener;
+  switch (command) {
+    case kTrainJ:
+      // train message using junk call
+      MailServices.junk.setMessageClassification(
+        getSpec(fileName), // in string aMsgURI
+        null, // in nsMsgJunkStatus aOldUserClassification
+        junkPercent == kIsHamScore ? kGood : kJunk, // in nsMsgJunkStatus aNewClassification
+        null, // in nsIMsgWindow aMsgWindow
+        junkListener
+      ); // in nsIJunkMailClassificationListener aListener);
+      break;
+
+    case kTrainT:
+      // train message using trait call
+      MailServices.junk.setMsgTraitClassification(
+        getSpec(fileName), // aMsgURI
+        [], // aOldTraits
+        junkPercent == kIsSpamScore ? gProArray : gAntiArray, // aNewTraits
+        tListener ? traitListener : null, // aTraitListener
+        null, // aMsgWindow
+        jListener ? junkListener : null
+      );
+      break;
+
+    case kClassJ:
+      // classify message using junk call
+      MailServices.junk.classifyMessage(
+        getSpec(fileName), // in string aMsgURI
+        null, // in nsIMsgWindow aMsgWindow
+        junkListener
+      ); // in nsIJunkMailClassificationListener aListener
+      break;
+
+    case kClassT:
+      // classify message using trait call
+      MailServices.junk.classifyTraitsInMessage(
+        getSpec(fileName), // in string aMsgURI
+        gProArray, // in array aProTraits,
+        gAntiArray, // in array aAntiTraits
+        tListener ? traitListener : null, // in nsIMsgTraitClassificationListener aTraitListener
+        null, // in nsIMsgWindow aMsgWindow
+        jListener ? junkListener : null
+      ); // in nsIJunkMailClassificationListener aJunkListener
+      break;
+
+    case kForgetJ:
+      // forget message using junk call
+      MailServices.junk.setMessageClassification(
+        getSpec(fileName), // in string aMsgURI
+        junkPercent == kIsHamScore ? kGood : kJunk, // in nsMsgJunkStatus aOldUserClassification
+        null, // in nsMsgJunkStatus aNewClassification,
+        null, // in nsIMsgWindow aMsgWindow,
+        junkListener
+      ); // in nsIJunkMailClassificationListener aListener
+      break;
+
+    case kForgetT:
+      // forget message using trait call
+      MailServices.junk.setMsgTraitClassification(
+        getSpec(fileName), // in string aMsgURI
+        junkPercent == kIsSpamScore ? gProArray : gAntiArray, // in array aOldTraits
+        [], // in array aNewTraits
+        tListener ? traitListener : null, // in nsIMsgTraitClassificationListener aTraitListener
+        null, // in nsIMsgWindow aMsgWindow
+        jListener ? junkListener : null
+      ); // in nsIJunkMailClassificationListener aJunkListener
+      break;
+
+    case kCounts:
+      // test counts
+      let msgCount = {};
+      let nsIMsgCorpus = MailServices.junk.QueryInterface(Ci.nsIMsgCorpus);
+      let tokenCount = nsIMsgCorpus.corpusCounts(null, {});
+      nsIMsgCorpus.corpusCounts(kJunkTrait, msgCount);
+      let junkCount = msgCount.value;
+      nsIMsgCorpus.corpusCounts(kGoodTrait, msgCount);
+      let goodCount = msgCount.value;
+      print(
+        "tokenCount, junkCount, goodCount is " + tokenCount,
+        junkCount,
+        goodCount
+      );
+      Assert.equal(tokenCount, gTest.tokenCount);
+      Assert.equal(junkCount, gTest.junkCount);
+      Assert.equal(goodCount, gTest.goodCount);
+      do_timeout(0, startCommand);
+      break;
+  }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js
new file mode 100644
index 0000000000..0c39215fcb
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js
@@ -0,0 +1,144 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Tests corpus management functions using nsIMsgCorpus
+
+var { MailServices } = ChromeUtils.import(
+  "resource:///modules/MailServices.jsm"
+);
+
+var msgCorpus = MailServices.junk.QueryInterface(Ci.nsIMsgCorpus);
+
+// tokens found in the test corpus file. trait 1001 was trained with
+// 2 messages, and trait 1003 with 1.
+
+var tokenData = [
+  // [traitid, count, token]
+  [1001, 0, "iDoNotExist"],
+  [1001, 1, "linecount"],
+  [1001, 2, "envelope-to:kenttest@caspia.com"],
+  [1003, 0, "iAlsoDoNotExist"],
+  [1003, 0, "isjunk"], // in 1001 but not 1003
+  [1003, 1, "linecount"],
+  [1003, 1, "subject:test"],
+  [1003, 1, "envelope-to:kenttest@caspia.com"],
+];
+
+// list of tests
+
+var gTests = [
+  // train two different combinations of messages
+  function checkLoadOnce() {
+    let fileName = "msgCorpus.dat";
+    let file = do_get_file("resources/" + fileName);
+    msgCorpus.updateData(file, true);
+
+    // check message counts
+    let messageCount = {};
+    msgCorpus.corpusCounts(1001, messageCount);
+    Assert.equal(2, messageCount.value);
+    msgCorpus.corpusCounts(1003, messageCount);
+    Assert.equal(1, messageCount.value);
+
+    for (let i = 0; i < tokenData.length; i++) {
+      let id = tokenData[i][0];
+      let count = tokenData[i][1];
+      let word = tokenData[i][2];
+      Assert.equal(count, msgCorpus.getTokenCount(word, id));
+    }
+  },
+  function checkLoadTwice() {
+    let fileName = "msgCorpus.dat";
+    let file = do_get_file("resources/" + fileName);
+    msgCorpus.updateData(file, true);
+
+    // check message counts
+    let messageCount = {};
+    msgCorpus.corpusCounts(1001, messageCount);
+    Assert.equal(4, messageCount.value);
+    msgCorpus.corpusCounts(1003, messageCount);
+    Assert.equal(2, messageCount.value);
+
+    for (let i = 0; i < tokenData.length; i++) {
+      let id = tokenData[i][0];
+      let count = 2 * tokenData[i][1];
+      let word = tokenData[i][2];
+      Assert.equal(count, msgCorpus.getTokenCount(word, id));
+    }
+  },
+  // remap the ids in the file to different local ids
+  function loadWithRemap() {
+    let fileName = "msgCorpus.dat";
+    let file = do_get_file("resources/" + fileName);
+    msgCorpus.updateData(file, true, [1001, 1003], [1, 3]);
+
+    for (let i = 0; i < tokenData.length; i++) {
+      let id = tokenData[i][0] - 1000;
+      let count = tokenData[i][1];
+      let word = tokenData[i][2];
+      Assert.equal(count, msgCorpus.getTokenCount(word, id));
+    }
+  },
+  // test removing data
+  function checkRemove() {
+    let fileName = "msgCorpus.dat";
+    let file = do_get_file("resources/" + fileName);
+    msgCorpus.updateData(file, false);
+
+    // check message counts
+    let messageCount = {};
+    msgCorpus.corpusCounts(1001, messageCount);
+    Assert.equal(2, messageCount.value);
+    msgCorpus.corpusCounts(1003, messageCount);
+    Assert.equal(1, messageCount.value);
+
+    for (let i = 0; i < tokenData.length; i++) {
+      let id = tokenData[i][0];
+      let count = tokenData[i][1];
+      let word = tokenData[i][2];
+      Assert.equal(count, msgCorpus.getTokenCount(word, id));
+    }
+  },
+  // test clearing a trait
+  function checkClear() {
+    let messageCountObject = {};
+    /*
+    msgCorpus.corpusCounts(1001, messageCountObject);
+    let v1001 = messageCountObject.value;
+    msgCorpus.corpusCounts(1003, messageCountObject);
+    let v1003 = messageCountObject.value;
+    dump("pre-clear value " + v1001 + " " + v1003 + "\n");
+    /**/
+    msgCorpus.clearTrait(1001);
+    // check that the message count is zero
+    msgCorpus.corpusCounts(1001, messageCountObject);
+    Assert.equal(0, messageCountObject.value);
+    // but the other trait should still have counts
+    msgCorpus.corpusCounts(1003, messageCountObject);
+    Assert.equal(1, messageCountObject.value);
+    // check that token count was cleared
+    for (let i = 0; i < tokenData.length; i++) {
+      let id = tokenData[i][0];
+      let count = tokenData[i][1];
+      let word = tokenData[i][2];
+      Assert.equal(id == 1001 ? 0 : count, msgCorpus.getTokenCount(word, id));
+    }
+  },
+];
+
+// main test
+function run_test() {
+  do_test_pending();
+  while (true) {
+    if (!gTests.length) {
+      // Do we have more commands?
+      // no, all done
+      do_test_finished();
+      return;
+    }
+
+    let test = gTests.shift();
+    test();
+  }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js
new file mode 100644
index 0000000000..41a9f22a9b
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js
@@ -0,0 +1,172 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Tests bayes trait analysis with aliases. Adapted from test_traits.js
+
+/*
+ * These tests rely on data stored in a file, with the same format as traits.dat,
+ * that was trained in the following manner. There are two training messages,
+ * included here as files aliases1.eml and aliases2.eml  Aliases.dat was trained on
+ * each of these messages, for different trait indices, as follows, with
+ * columns showing the training count for each trait index:
+ *
+ *     file   count(1001)  count(1005) count(1007) count(1009)
+ *
+ *   aliases1.eml      1            0           2           0
+ *   aliases2.eml      0            1           0           1
+ *
+ * There is also a third email file, aliases3.eml, which combines tokens
+ * from aliases1.eml and aliases2.eml
+ *
+ * The goal here is to demonstrate that traits 1001 and 1007, and traits
+ * 1005 and 1009, can be combined using aliases. We classify messages with
+ * trait 1001 as the PRO trait, and 1005 as the ANTI trait.
+ *
+ * With these characteristics, I've run a trait analysis without aliases, and
+ * determined that the following is the correct percentage results from the
+ * analysis for each message. "Train11" means that the training was 1 pro count
+ * from aliases1.eml, and 1 anti count from alias2.eml. "Train32" is 3 pro counts,
+ * and 2 anti counts.
+ *
+ *                 percentage
+ *    file         Train11       Train32
+ *
+ * alias1.eml        92             98
+ * alias2.eml         8              3
+ * alias3.eml        50             53
+ */
+
+var { MailServices } = ChromeUtils.import(
+  "resource:///modules/MailServices.jsm"
+);
+
+var traitService = Cc["@mozilla.org/msg-trait-service;1"].getService(
+  Ci.nsIMsgTraitService
+);
+var kProTrait = 1001;
+var kAntiTrait = 1005;
+var kProAlias = 1007;
+var kAntiAlias = 1009;
+
+var gTest; // currently active test
+
+// The tests array defines the tests to attempt. Format of
+// an element "test" of this array:
+//
+//   test.fileName: file containing message to test
+//   test.proAliases: array of aliases for the pro trait
+//   test.antiAliases: array of aliases for the anti trait
+//   test.percent: expected results from the classifier
+
+var tests = [
+  {
+    fileName: "aliases1.eml",
+    proAliases: [],
+    antiAliases: [],
+    percent: 92,
+  },
+  {
+    fileName: "aliases2.eml",
+    proAliases: [],
+    antiAliases: [],
+    percent: 8,
+  },
+  {
+    fileName: "aliases3.eml",
+    proAliases: [],
+    antiAliases: [],
+    percent: 50,
+  },
+  {
+    fileName: "aliases1.eml",
+    proAliases: [kProAlias],
+    antiAliases: [kAntiAlias],
+    percent: 98,
+  },
+  {
+    fileName: "aliases2.eml",
+    proAliases: [kProAlias],
+    antiAliases: [kAntiAlias],
+    percent: 3,
+  },
+  {
+    fileName: "aliases3.eml",
+    proAliases: [kProAlias],
+    antiAliases: [kAntiAlias],
+    percent: 53,
+  },
+];
+
+// main test
+function run_test() {
+  localAccountUtils.loadLocalMailAccount();
+
+  // load in the aliases trait testing file
+  MailServices.junk
+    .QueryInterface(Ci.nsIMsgCorpus)
+    .updateData(do_get_file("resources/aliases.dat"), true);
+  do_test_pending();
+
+  startCommand();
+}
+
+var listener = {
+  // nsIMsgTraitClassificationListener implementation
+  onMessageTraitsClassified(aMsgURI, aTraits, aPercents) {
+    // print("Message URI is " + aMsgURI);
+    if (!aMsgURI) {
+      // Ignore end-of-batch signal.
+      return;
+    }
+
+    Assert.equal(aPercents[0], gTest.percent);
+    // All done, start the next test
+    startCommand();
+  },
+};
+
+// start the next test command
+function startCommand() {
+  if (!tests.length) {
+    // Do we have more commands?
+    // no, all done
+    do_test_finished();
+    return;
+  }
+
+  gTest = tests.shift();
+
+  // classify message
+  var antiArray = [kAntiTrait];
+  var proArray = [kProTrait];
+
+  // remove any existing aliases
+  let proAliases = traitService.getAliases(kProTrait);
+  let antiAliases = traitService.getAliases(kAntiTrait);
+  let proAlias;
+  let antiAlias;
+  while ((proAlias = proAliases.pop())) {
+    traitService.removeAlias(kProTrait, proAlias);
+  }
+  while ((antiAlias = antiAliases.pop())) {
+    traitService.removeAlias(kAntiTrait, antiAlias);
+  }
+
+  // add new aliases
+  while ((proAlias = gTest.proAliases.pop())) {
+    traitService.addAlias(kProTrait, proAlias);
+  }
+  while ((antiAlias = gTest.antiAliases.pop())) {
+    traitService.addAlias(kAntiTrait, antiAlias);
+  }
+
+  MailServices.junk.classifyTraitsInMessage(
+    getSpec(gTest.fileName), // in string aMsgURI
+    proArray, // in array aProTraits,
+    antiArray, // in array aAntiTraits
+    listener
+  ); // in nsIMsgTraitClassificationListener aTraitListener
+  // null,      // [optional] in nsIMsgWindow aMsgWindow
+  // null,      // [optional] in nsIJunkMailClassificationListener aJunkListener
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js
new file mode 100644
index 0000000000..b005db72cc
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js
@@ -0,0 +1,287 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Tests bayes trait analysis
+
+// I make this an instance so that I know I can reset and get
+// a completely new component. Should be getService in production code.
+var nsIJunkMailPlugin = Cc[
+  "@mozilla.org/messenger/filter-plugin;1?name=bayesianfilter"
+].createInstance(Ci.nsIJunkMailPlugin);
+
+// command functions for test data
+var kTrain = 0; // train a file as a trait
+var kClass = 1; // classify files with traits
+var kReset = 2; // reload plugin, reading in data from disk
+var kDetail = 3; // test details
+
+var gTest; // currently active test
+
+// The tests array defines the tests to attempt. Format of
+// an element "test" of this array:
+//
+//   test.command: function to perform, see definitions above
+//   test.fileName: file(s) containing message(s) to test
+//   test.traitIds: Array of traits to train (kTrain) or pro trait (kClass)
+//   test.traitAntiIds: Array of anti traits to classify
+//   test.percents: array of arrays (1 per message, 1 per trait) of
+//                  expected results from the classifier
+
+var tests = [
+  // train two different combinations of messages
+  {
+    command: kTrain,
+    fileName: "ham1.eml",
+    traitIds: [3, 6],
+  },
+  {
+    command: kTrain,
+    fileName: "spam1.eml",
+    traitIds: [4],
+  },
+  {
+    command: kTrain,
+    fileName: "spam4.eml",
+    traitIds: [5],
+  },
+  // test the message classifications using both singular and plural classifier
+  {
+    command: kClass,
+    fileName: "ham1.eml",
+    traitIds: [4, 6],
+    traitAntiIds: [3, 5],
+    // ham1 is trained "anti" for first test, "pro" for second
+    percents: [[0, 100]],
+  },
+  {
+    command: kClass,
+    fileName: "ham2.eml",
+    traitIds: [4, 6],
+    traitAntiIds: [3, 5],
+    // these are partial percents for an untrained message. ham2 is similar to ham1
+    percents: [[8, 95]],
+  },
+  {
+    command: kDetail,
+    fileName: "spam2.eml",
+    traitIds: [4],
+    traitAntiIds: [3],
+    percents: {
+      lots: 84,
+      money: 84,
+      make: 84,
+      your: 16,
+    },
+    runnings: [84, 92, 95, 81],
+  },
+  {
+    command: kClass,
+    fileName: "spam1.eml,spam2.eml,spam3.eml,spam4.eml",
+    traitIds: [4, 6],
+    traitAntiIds: [3, 5],
+    // spam1 trained as "pro" for first pro/anti pair
+    // spam4 trained as "anti" for second pro/anti pair
+    // others are partials
+    percents: [
+      [100, 50],
+      [81, 0],
+      [98, 50],
+      [81, 0],
+    ],
+  },
+  // reset the plugin, read in data, and retest the classification
+  // this tests the trait file writing
+  {
+    command: kReset,
+  },
+  {
+    command: kClass,
+    fileName: "ham1.eml",
+    traitIds: [4, 6],
+    traitAntiIds: [3, 5],
+    percents: [[0, 100]],
+  },
+  {
+    command: kClass,
+    fileName: "ham2.eml",
+    traitIds: [4, 6],
+    traitAntiIds: [3, 5],
+    percents: [[8, 95]],
+  },
+  {
+    command: kClass,
+    fileName: "spam1.eml,spam2.eml,spam3.eml,spam4.eml",
+    traitIds: [4, 6],
+    traitAntiIds: [3, 5],
+    percents: [
+      [100, 50],
+      [81, 0],
+      [98, 50],
+      [81, 0],
+    ],
+  },
+];
+
+// main test
+function run_test() {
+  localAccountUtils.loadLocalMailAccount();
+  do_test_pending();
+
+  startCommand();
+}
+
+var listener = {
+  // nsIMsgTraitClassificationListener implementation
+  onMessageTraitsClassified(aMsgURI, aTraits, aPercents) {
+    // print("Message URI is " + aMsgURI);
+    if (!aMsgURI) {
+      // Ignore end-of-batch signal.
+      return;
+    }
+
+    switch (gTest.command) {
+      case kClass:
+        Assert.equal(gTest.files[gTest.currentIndex], aMsgURI);
+        var currentPercents = gTest.percents[gTest.currentIndex];
+        for (let i = 0; i < currentPercents.length; i++) {
+          // print("expecting score " + currentPercents[i] +
+          //      " got score " + aPercents[i]);
+          Assert.equal(currentPercents[i], aPercents[i]);
+        }
+        gTest.currentIndex++;
+        break;
+
+      case kTrain: // We tested this some in test_junkAsTraits.js, so let's not bother
+      default:
+        break;
+    }
+    if (!--gTest.callbacks) {
+      // All done, start the next test
+      startCommand();
+    }
+  },
+  onMessageTraitDetails(
+    aMsgURI,
+    aProTrait,
+    aTokenString,
+    aTokenPercents,
+    aRunningPercents
+  ) {
+    print("Details for " + aMsgURI);
+    for (let i = 0; i < aTokenString.length; i++) {
+      print(
+        "Percent " +
+          aTokenPercents[i] +
+          " Running " +
+          aRunningPercents[i] +
+          " Token " +
+          aTokenString[i]
+      );
+      Assert.ok(aTokenString[i] in gTest.percents);
+
+      Assert.equal(gTest.percents[aTokenString[i]], aTokenPercents[i]);
+      Assert.equal(gTest.runnings[i], aRunningPercents[i]);
+      delete gTest.percents[aTokenString[i]];
+    }
+    Assert.equal(Object.keys(gTest.percents).length, 0);
+    if (gTest.command == kClass) {
+      gTest.currentIndex++;
+    }
+    startCommand();
+  },
+};
+
+// start the next test command
+function startCommand() {
+  if (!tests.length) {
+    // Do we have more commands?
+    // no, all done
+    do_test_finished();
+    return;
+  }
+
+  gTest = tests.shift();
+  print(
+    "StartCommand command = " +
+      gTest.command +
+      ", remaining tests " +
+      tests.length
+  );
+  switch (gTest.command) {
+    case kTrain: {
+      // train message
+      let proArray = [];
+      for (let i = 0; i < gTest.traitIds.length; i++) {
+        proArray.push(gTest.traitIds[i]);
+      }
+      gTest.callbacks = 1;
+
+      nsIJunkMailPlugin.setMsgTraitClassification(
+        getSpec(gTest.fileName), // aMsgURI
+        [], // aOldTraits
+        proArray, // aNewTraits
+        listener
+      ); // [optional] in nsIMsgTraitClassificationListener aTraitListener
+      // null,      // [optional] in nsIMsgWindow aMsgWindow
+      // null,      // [optional] in nsIJunkMailClassificationListener aJunkListener
+      break;
+    }
+    case kClass: {
+      // classify message
+      var antiArray = [];
+      let proArray = [];
+      for (let i = 0; i < gTest.traitIds.length; i++) {
+        antiArray.push(gTest.traitAntiIds[i]);
+        proArray.push(gTest.traitIds[i]);
+      }
+      gTest.files = gTest.fileName.split(",");
+      gTest.callbacks = gTest.files.length;
+      gTest.currentIndex = 0;
+      for (let i = 0; i < gTest.files.length; i++) {
+        gTest.files[i] = getSpec(gTest.files[i]);
+      }
+      if (gTest.files.length == 1) {
+        // use the singular classifier
+        nsIJunkMailPlugin.classifyTraitsInMessage(
+          getSpec(gTest.fileName), // in string aMsgURI
+          proArray, // in array aProTraits,
+          antiArray, // in array aAntiTraits
+          listener
+        ); // in nsIMsgTraitClassificationListener aTraitListener
+        // null,      // [optional] in nsIMsgWindow aMsgWindow
+        // null,      // [optional] in nsIJunkMailClassificationListener aJunkListener
+      } else {
+        // use the plural classifier
+        nsIJunkMailPlugin.classifyTraitsInMessages(
+          gTest.files, // in Array<ACString> aMsgURIs,
+          proArray, // in array aProTraits,
+          antiArray, // in array aAntiTraits
+          listener
+        ); // in nsIMsgTraitClassificationListener aTraitListener
+        // null,      // [optional] in nsIMsgWindow aMsgWindow
+        // null,      // [optional] in nsIJunkMailClassificationListener aJunkListener
+      }
+      break;
+    }
+    case kDetail:
+      // detail message
+      nsIJunkMailPlugin.detailMessage(
+        getSpec(gTest.fileName), // in string aMsgURI
+        gTest.traitIds[0], // proTrait
+        gTest.traitAntiIds[0], // antiTrait
+        listener
+      ); // in nsIMsgTraitDetailListener aDetailListener
+      break;
+    case kReset:
+      // reload a new nsIJunkMailPlugin, reading file in the process
+      nsIJunkMailPlugin.shutdown(); // writes files
+      nsIJunkMailPlugin = null;
+      nsIJunkMailPlugin = Cc[
+        "@mozilla.org/messenger/filter-plugin;1?name=bayesianfilter"
+      ].createInstance(Ci.nsIJunkMailPlugin);
+      // does not do a callback, so we must restart next command
+      startCommand();
+      break;
+  }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini
new file mode 100644
index 0000000000..86776834ba
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini
@@ -0,0 +1,11 @@
+[DEFAULT]
+head = head_bayes.js
+tail =
+support-files = resources/*
+
+[test_bug228675.js]
+[test_customTokenization.js]
+[test_junkAsTraits.js]
+[test_msgCorpus.js]
+[test_traitAliases.js]
+[test_traits.js]
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 17:32:43 +0000
commit	6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
tree	a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/extensions/bayesian-spam-filter/test
parent	Initial commit. (diff)
download	thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip