summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/extensions/bayesian-spam-filter/test
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
commit6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
treea68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/extensions/bayesian-spam-filter/test
parentInitial commit. (diff)
downloadthunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz
thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'comm/mailnews/extensions/bayesian-spam-filter/test')
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js28
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.datbin0 -> 446 bytes
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml6
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml6
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml6
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml7
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml8
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.datbin0 -> 2447 bytes
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml7
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml8
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml7
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml8
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml14
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js108
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js136
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js197
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js574
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js144
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js172
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js287
-rw-r--r--comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini11
21 files changed, 1734 insertions, 0 deletions
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js
new file mode 100644
index 0000000000..b502dcc2e5
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js
@@ -0,0 +1,28 @@
+var { MailServices } = ChromeUtils.import(
+ "resource:///modules/MailServices.jsm"
+);
+var { XPCOMUtils } = ChromeUtils.importESModule(
+ "resource://gre/modules/XPCOMUtils.sys.mjs"
+);
+var { mailTestUtils } = ChromeUtils.import(
+ "resource://testing-common/mailnews/MailTestUtils.jsm"
+);
+var { localAccountUtils } = ChromeUtils.import(
+ "resource://testing-common/mailnews/LocalAccountUtils.jsm"
+);
+
+var CC = Components.Constructor;
+
+// Ensure the profile directory is set up
+do_get_profile();
+
+function getSpec(aFileName) {
+ var file = do_get_file("resources/" + aFileName);
+ var uri = Services.io.newFileURI(file).QueryInterface(Ci.nsIURL);
+ uri = uri.mutate().setQuery("type=application/x-message-display").finalize();
+ return uri.spec;
+}
+
+registerCleanupFunction(function () {
+ load("../../../../resources/mailShutdown.js");
+});
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat
new file mode 100644
index 0000000000..31162459e4
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat
Binary files differ
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml
new file mode 100644
index 0000000000..4720467fe6
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml
@@ -0,0 +1,6 @@
+From - Sat Jan 26 08:43:42 2008
+Subject: test1
+Content-Type: text/plain; charset=iso-8859-1
+
+important
+
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml
new file mode 100644
index 0000000000..9a251486a9
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml
@@ -0,0 +1,6 @@
+From - Sat Jan 26 08:43:42 2008
+Subject: test2
+Content-Type: text/plain; charset=iso-8859-1
+
+work
+
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml
new file mode 100644
index 0000000000..de31992ac5
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml
@@ -0,0 +1,6 @@
+From - Sat Jan 26 08:43:42 2008
+Subject: test3
+Content-Type: text/plain; charset=iso-8859-1
+
+very important work
+
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml
new file mode 100644
index 0000000000..6a63f587b8
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml
@@ -0,0 +1,7 @@
+Date: Tue, 30 Apr 2008 00:12:17 -0700
+From: Mom <mother@example.com>
+To: Careful Reader <reader@example.org>
+Subject: eat your vegetables
+MIME-Version: 1.0
+
+vegetables are very important for your health and wealth.
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml
new file mode 100644
index 0000000000..cd6691b921
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml
@@ -0,0 +1,8 @@
+Date: Tue, 27 Apr 2006 00:13:23 -0700
+From: Evil Despot <boss@example.com>
+To: Careful Reader <reader@example.org>
+Subject: finish your report
+MIME-Version: 1.0
+
+If you want to keep your sorry job and health, finish that
+important report before the close of business today.
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat
new file mode 100644
index 0000000000..f273a4f10c
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat
Binary files differ
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml
new file mode 100644
index 0000000000..ea629213cc
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml
@@ -0,0 +1,7 @@
+Date: Tue, 29 Apr 2008 00:10:07 -0700
+From: Spam King <spammer@example.com>
+To: Careful Reader <reader@example.org>
+Subject: viagra is your nigerian xxx dream
+MIME-Version: 1.0
+
+click here to make lots of money and wealth
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml
new file mode 100644
index 0000000000..817d328cf2
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml
@@ -0,0 +1,8 @@
+Date: Mon, 27 Apr 2008 01:02:03 -0700
+From: Stock Pusher <broker@example.net>
+To: Careful Reader <reader@example.org>
+Subject: ABCD Corporation will soar tomorrow!
+MIME-Version: 1.0
+
+Make lots of money! Put all of your money into ACBD Corporation
+Stock!
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml
new file mode 100644
index 0000000000..0a524e604b
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml
@@ -0,0 +1,7 @@
+Date: Wed, 30 Apr 2008 01:11:17 -0700
+From: Spam King <spammer@example.com>
+To: Careful Reader <reader@example.org>
+Subject: we have your nigerian xxx dream
+MIME-Version: 1.0
+
+Not making lots of money and wealth? Call me!
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml
new file mode 100644
index 0000000000..775d3b41fa
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml
@@ -0,0 +1,8 @@
+Date: Tue, 28 Apr 2008 01:02:04 -0700
+From: Stock Pusher <broker@example.net>
+To: Careful Reader <reader@example.org>
+Subject: ABCD Corporation will really soar this time!
+MIME-Version: 1.0
+
+Make lots of money! Put all of your money into ABCD Corporation
+Stock! (We really mean it this time!)
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml
new file mode 100644
index 0000000000..d6e7e0ae3d
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml
@@ -0,0 +1,14 @@
+Date: Tue, 30 Apr 2008 00:12:17 -0700
+From: Mom <mother@example.com>
+To: Careful Reader <reader@example.org>
+Subject: eat your vegetables to live long
+Received: from c-1-2-3-4.hsd1.wa.example.net ([1.2.3.4] helo=theComputer)
+ by host301.example.com with esmtpa (Exim 4.69)
+ (envelope-from <someone@example.com>)
+ id 1LeEgH-0003GN-Rr
+ for reader@example.org; Mon, 02 Mar 2009 13:24:06 -0700
+MIME-Version: 1.0
+Message-Id: 14159
+Sender: Bugzilla Test Setup <noreply@example.org>
+
+This is a sentence. Important URL is http://www.example.org Check it out!
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js
new file mode 100644
index 0000000000..b6d37e879b
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js
@@ -0,0 +1,108 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// service class to manipulate the junk training.dat file
+// code is adapted from Mnehy Thunderbird Extension
+
+/* exported TrainingData */
+function TrainingData() {
+ // local constants
+
+ const CC = Components.Constructor;
+
+ // public methods
+
+ this.read = read;
+
+ // public variables
+
+ this.mGoodTokens = 0;
+ this.mJunkTokens = 0;
+ this.mGoodMessages = 0;
+ this.mJunkMessages = 0;
+ this.mGoodCounts = {};
+ this.mJunkCounts = {};
+
+ // helper functions
+
+ function getJunkStatFile() {
+ var sBaseDir = Services.dirsvc.get("ProfD", Ci.nsIFile);
+ var CFileByFile = new CC(
+ "@mozilla.org/file/local;1",
+ "nsIFile",
+ "initWithFile"
+ );
+ var oFile = new CFileByFile(sBaseDir);
+ oFile.append("training.dat");
+ return oFile;
+ }
+
+ function getBinStream(oFile) {
+ if (oFile && oFile.exists()) {
+ var oUri = Services.io.newFileURI(oFile);
+ // open stream (channel)
+ let channel = Services.io.newChannelFromURI(
+ oUri,
+ null,
+ Services.scriptSecurityManager.getSystemPrincipal(),
+ null,
+ Ci.nsILoadInfo.SEC_ALLOW_CROSS_ORIGIN_SEC_CONTEXT_IS_NULL,
+ Ci.nsIContentPolicy.TYPE_OTHER
+ );
+ var oStream = channel.open();
+ // buffer it
+ var oBufStream = Cc[
+ "@mozilla.org/network/buffered-input-stream;1"
+ ].createInstance(Ci.nsIBufferedInputStream);
+ oBufStream.init(oStream, oFile.fileSize);
+ // read as binary
+ var oBinStream = Cc["@mozilla.org/binaryinputstream;1"].createInstance(
+ Ci.nsIBinaryInputStream
+ );
+ oBinStream.setInputStream(oBufStream);
+ // return it
+ return oBinStream;
+ }
+ return null;
+ }
+
+ // method specifications
+
+ function read() {
+ var file = getJunkStatFile();
+
+ // does the file exist?
+ Assert.ok(file.exists());
+
+ var fileStream = getBinStream(file);
+
+ // check magic number
+ var iMagicNumber = fileStream.read32();
+ Assert.equal(iMagicNumber, 0xfeedface);
+
+ // get ham'n'spam numbers
+ this.mGoodMessages = fileStream.read32();
+ this.mJunkMessages = fileStream.read32();
+
+ // Read good tokens
+ this.mGoodTokens = fileStream.read32();
+ var iRefCount, iTokenLen, sToken;
+ for (let i = 0; i < this.mGoodTokens; ++i) {
+ iRefCount = fileStream.read32();
+ iTokenLen = fileStream.read32();
+ sToken = fileStream.readBytes(iTokenLen);
+ this.mGoodCounts[sToken] = iRefCount;
+ }
+
+ // we have no further good tokens, so read junk tokens
+ this.mJunkTokens = fileStream.read32();
+ for (let i = 0; i < this.mJunkTokens; i++) {
+ // read token data
+ iRefCount = fileStream.read32();
+ iTokenLen = fileStream.read32();
+ sToken = fileStream.readBytes(iTokenLen);
+ this.mJunkCounts[sToken] = iRefCount;
+ }
+ }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js
new file mode 100644
index 0000000000..40180006d7
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js
@@ -0,0 +1,136 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// tests reduction in size of training.dat
+
+// main setup
+
+/* import-globals-from resources/trainingfile.js */
+load("resources/trainingfile.js");
+
+var { MailServices } = ChromeUtils.import(
+ "resource:///modules/MailServices.jsm"
+);
+
+// before shrink, the trained messages have 76 tokens. Force shrink.
+Services.prefs.setIntPref("mailnews.bayesian_spam_filter.junk_maxtokens", 75);
+
+// local constants
+var kUnclassified = MailServices.junk.UNCLASSIFIED;
+var kJunk = MailServices.junk.JUNK;
+var kGood = MailServices.junk.GOOD;
+
+var emails = [
+ "ham1.eml",
+ "ham2.eml",
+ "spam1.eml",
+ "spam2.eml",
+ "spam3.eml",
+ "spam4.eml",
+];
+var classifications = [kGood, kGood, kJunk, kJunk, kJunk, kJunk];
+var trainingData;
+
+// main test
+function run_test() {
+ localAccountUtils.loadLocalMailAccount();
+ MailServices.junk.resetTrainingData();
+
+ do_test_pending();
+
+ var email = emails.shift();
+ var classification = classifications.shift();
+ // additional calls to setMessageClassifiaction are done in the callback
+ MailServices.junk.setMessageClassification(
+ getSpec(email),
+ kUnclassified,
+ classification,
+ null,
+ doTestingListener
+ );
+}
+
+var doTestingListener = {
+ onMessageClassified(aMsgURI, aClassification, aJunkPercent) {
+ if (!aMsgURI) {
+ // Ignore end-of-batch signal.
+ return;
+ }
+ var email = emails.shift();
+ var classification = classifications.shift();
+ if (email) {
+ MailServices.junk.setMessageClassification(
+ getSpec(email),
+ kUnclassified,
+ classification,
+ null,
+ doTestingListener
+ );
+ return;
+ }
+
+ // all done classifying, time to test
+ MailServices.junk.shutdown(); // just flushes training.dat
+ trainingData = new TrainingData();
+ trainingData.read();
+
+ /*
+ // List training.dat information for debug
+ dump("training.data results: goodMessages=" + trainingData.mGoodMessages
+ + " junkMessages = " + trainingData.mJunkMessages
+ + " goodTokens = " + trainingData.mGoodTokens
+ + " junkTokens = " + trainingData.mJunkTokens
+ + "\n");
+ print("Good counts");
+ for (var token in trainingData.mGoodCounts)
+ dump("count: " + trainingData.mGoodCounts[token] + " token: " + token + "\n");
+ print("Junk Counts");
+ for (var token in trainingData.mJunkCounts)
+ dump("count: " + trainingData.mJunkCounts[token] + " token: " + token + "\n");
+ */
+
+ /* Selected pre-shrink counts after training
+ training.data results: goodMessages=2 junkMessages = 4 tokens = 78
+ Good counts
+ count: 1 token: subject:report
+ count: 2 token: important
+ count: 2 token: to:careful reader <reader@example.org>
+
+ Junk Counts
+ count: 3 token: make
+ count: 4 token: money
+ count: 4 token: to:careful reader <reader@example.org>
+ count: 2 token: money!
+ */
+
+ // Shrinking divides all counts by two. In comments, I show the
+ // calculation for each test, (pre-shrink count)/2.
+
+ Assert.equal(trainingData.mGoodMessages, 1); // 2/2
+ Assert.equal(trainingData.mJunkMessages, 2); // 4/2
+ checkToken("money", 0, 2); // (0/2, 4/2)
+ checkToken("subject:report", 0, 0); // (1/2, 0/2)
+ checkToken("to:careful reader <reader@example.org>", 1, 2); // (2/2, 4/2)
+ checkToken("make", 0, 1); // (0/2, 3/2)
+ checkToken("important", 1, 0); // (2/2, 0/2)
+
+ do_test_finished();
+ },
+};
+
+// helper functions
+
+function checkToken(aToken, aGoodCount, aJunkCount) {
+ print(" checking " + aToken);
+ var goodCount = trainingData.mGoodCounts[aToken];
+ var junkCount = trainingData.mJunkCounts[aToken];
+ if (!goodCount) {
+ goodCount = 0;
+ }
+ if (!junkCount) {
+ junkCount = 0;
+ }
+ Assert.equal(goodCount, aGoodCount);
+ Assert.equal(junkCount, aJunkCount);
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js
new file mode 100644
index 0000000000..222a9557d8
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js
@@ -0,0 +1,197 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Tests use of custom tokenization, originally introduced in bug 476389
+
+var { MailServices } = ChromeUtils.import(
+ "resource:///modules/MailServices.jsm"
+);
+
+// command functions for test data
+var kTrain = 0; // train a file
+var kTest = 1; // test headers returned from detail
+var kSetup = 2; // run a setup function
+
+// trait ids
+var kProArray = [3];
+var kAntiArray = [4];
+
+var gTest; // currently active test
+
+// The tests array defines the tests to attempt.
+
+var tests = [
+ // test a few tokens using defaults
+ {
+ command: kTrain,
+ fileName: "tokenTest.eml",
+ },
+ {
+ command: kTest,
+ fileName: "tokenTest.eml",
+ tokens: ["important", "subject:eat", "message-id:14159", "http://www"],
+ nottokens: ["idonotexist", "subject:to"],
+ },
+
+ // enable received, disable message-id
+ // switch tokenization of body to catch full urls (no "." delimiter)
+ // enable sender, keeping full value
+ {
+ command: kSetup,
+ operation() {
+ Services.prefs.setCharPref(
+ "mailnews.bayesian_spam_filter.tokenizeheader.received",
+ "standard"
+ );
+ Services.prefs.setCharPref(
+ "mailnews.bayesian_spam_filter.tokenizeheader.message-id",
+ "false"
+ );
+ Services.prefs.setCharPref(
+ "mailnews.bayesian_spam_filter.body_delimiters",
+ " \t\r\n\v"
+ );
+ Services.prefs.setCharPref(
+ "mailnews.bayesian_spam_filter.tokenizeheader.sender",
+ "full"
+ );
+ },
+ },
+ {
+ command: kTrain,
+ fileName: "tokenTest.eml",
+ },
+ {
+ command: kTest,
+ fileName: "tokenTest.eml",
+ tokens: [
+ "important",
+ "subject:eat",
+ "received:reader@example",
+ "skip:h 20",
+ "sender:bugzilla test setup <noreply@example.org>",
+ "received:<someone@example",
+ ],
+ nottokens: ["message-id:14159", "http://www"],
+ },
+
+ // increase the length of the maximum token to catch full URLs in the body
+ // add <>;, remove . from standard header delimiters to better capture emails
+ // use custom delimiters on sender, without "." or "<>"
+ {
+ command: kSetup,
+ operation() {
+ Services.prefs.setIntPref(
+ "mailnews.bayesian_spam_filter.maxlengthfortoken",
+ 50
+ );
+ Services.prefs.setCharPref(
+ "mailnews.bayesian_spam_filter.header_delimiters",
+ " ;<>\t\r\n\v"
+ );
+ Services.prefs.setCharPref(
+ "mailnews.bayesian_spam_filter.tokenizeheader.sender",
+ " \t\r\n\v"
+ );
+ },
+ },
+ {
+ command: kTrain,
+ fileName: "tokenTest.eml",
+ },
+ {
+ command: kTest,
+ fileName: "tokenTest.eml",
+ tokens: [
+ "received:someone@example.com",
+ "http://www.example.org",
+ "received:reader@example.org",
+ "sender:<noreply@example.org>",
+ ],
+ nottokens: ["skip:h 20", "received:<someone@example"],
+ },
+];
+
+// main test
+function run_test() {
+ localAccountUtils.loadLocalMailAccount();
+ do_test_pending();
+
+ startCommand();
+}
+
+var listener = {
+ // nsIMsgTraitClassificationListener implementation
+ onMessageTraitsClassified(aMsgURI, aTraits, aPercents) {
+ startCommand();
+ },
+
+ onMessageTraitDetails(
+ aMsgURI,
+ aProTrait,
+ aTokenString,
+ aTokenPercents,
+ aRunningPercents
+ ) {
+ print("Details for " + aMsgURI);
+ for (let i = 0; i < aTokenString.length; i++) {
+ print("Token " + aTokenString[i]);
+ }
+
+ // we should have these tokens
+ for (let value of gTest.tokens) {
+ print("We should have '" + value + "'? ");
+ Assert.ok(aTokenString.includes(value));
+ }
+
+ // should not have these tokens
+ for (let value of gTest.nottokens) {
+ print("We should not have '" + value + "'? ");
+ Assert.ok(!aTokenString.includes(value));
+ }
+ startCommand();
+ },
+};
+
+// start the next test command
+function startCommand() {
+ if (!tests.length) {
+ // Do we have more commands?
+ // no, all done
+ do_test_finished();
+ return;
+ }
+
+ gTest = tests.shift();
+ // print("StartCommand command = " + gTest.command + ", remaining tests " + tests.length);
+ switch (gTest.command) {
+ case kTrain:
+ // train message
+
+ MailServices.junk.setMsgTraitClassification(
+ getSpec(gTest.fileName), // aMsgURI
+ [], // aOldTraits
+ kProArray, // aNewTraits
+ listener
+ ); // [optional] in nsIMsgTraitClassificationListener aTraitListener
+ // null, // [optional] in nsIMsgWindow aMsgWindow
+ // null, // [optional] in nsIJunkMailClassificationListener aJunkListener
+ break;
+
+ case kTest:
+ // test headers from detail message
+ MailServices.junk.detailMessage(
+ getSpec(gTest.fileName), // in string aMsgURI
+ kProArray[0], // proTrait
+ kAntiArray[0], // antiTrait
+ listener
+ ); // in nsIMsgTraitDetailListener aDetailListener
+ break;
+
+ case kSetup:
+ gTest.operation();
+ startCommand();
+ break;
+ }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js
new file mode 100644
index 0000000000..a1800b93e7
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js
@@ -0,0 +1,574 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// tests calls to the bayesian filter plugin to train, classify, and forget
+// messages using both the older junk-oriented calls, as well as the newer
+// trait-oriented calls. Only a single trait is tested. The main intent of
+// these tests is to demonstrate that both the old junk-oriented calls and the
+// new trait-oriented calls give the same results on junk processing.
+
+var { MailServices } = ChromeUtils.import(
+ "resource:///modules/MailServices.jsm"
+);
+
+// local constants
+var kUnclassified = MailServices.junk.UNCLASSIFIED;
+var kJunk = MailServices.junk.JUNK;
+var kGood = MailServices.junk.GOOD;
+var kJunkTrait = MailServices.junk.JUNK_TRAIT;
+var kGoodTrait = MailServices.junk.GOOD_TRAIT;
+var kIsHamScore = MailServices.junk.IS_HAM_SCORE;
+var kIsSpamScore = MailServices.junk.IS_SPAM_SCORE;
+
+// command functions for test data
+var kTrainJ = 0; // train using junk method
+var kTrainT = 1; // train using trait method
+var kClassJ = 2; // classify using junk method
+var kClassT = 3; // classify using trait method
+var kForgetJ = 4; // forget training using junk method
+var kForgetT = 5; // forget training using trait method
+var kCounts = 6; // test token and message counts
+
+var gProArray = [],
+ gAntiArray = []; // traits arrays, pro is junk, anti is good
+var gTest; // currently active test
+
+// The tests array defines the tests to attempt. Format of
+// an element "test" of this array (except for kCounts):
+//
+// test.command: function to perform, see definitions above
+// test.fileName: file containing message to test
+// test.junkPercent: sets the classification (for Class or Forget commands)
+// tests the classification (for Class commands)
+// As a special case for the no-training tests, if
+// junkPercent is negative, test its absolute value
+// for percents, but reverse the junk/good classification
+// test.traitListener: should we use the trait listener call?
+// test.junkListener: should we use the junk listener call?
+
+var tests = [
+ // test the trait-based calls. We mix trait listeners, junk listeners,
+ // and both
+
+ {
+ // with no training, percents is 50 - but classifies as junk
+ command: kClassT,
+ fileName: "ham1.eml",
+ junkPercent: -50, // negative means classifies as junk
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // train 1 ham message
+ command: kTrainT,
+ fileName: "ham1.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // with ham but no spam training, percents are 0 and classifies as ham
+ command: kClassT,
+ fileName: "ham1.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // train 1 spam message
+ command: kTrainT,
+ fileName: "spam1.eml",
+ junkPercent: 100,
+ traitListener: true,
+ junkListener: false,
+ },
+ {
+ // the trained messages will classify at 0 and 100
+ command: kClassT,
+ fileName: "ham1.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassT,
+ fileName: "spam1.eml",
+ junkPercent: 100,
+ traitListener: true,
+ junkListener: false,
+ },
+ {
+ // ham2, spam2, spam4 give partial percents, but still ham
+ command: kClassT,
+ fileName: "ham2.eml",
+ junkPercent: 8,
+ traitListener: true,
+ junkListener: true,
+ },
+ {
+ command: kClassT,
+ fileName: "spam2.eml",
+ junkPercent: 81,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassT,
+ fileName: "spam4.eml",
+ junkPercent: 81,
+ traitListener: true,
+ junkListener: false,
+ },
+ {
+ // spam3 evaluates to spam
+ command: kClassT,
+ fileName: "spam3.eml",
+ junkPercent: 98,
+ traitListener: true,
+ junkListener: true,
+ },
+ {
+ // train ham2, then test percents of 0 (clearly good)
+ command: kTrainT,
+ fileName: "ham2.eml",
+ junkPercent: 0,
+ traitListener: true,
+ junkListener: true,
+ },
+ {
+ command: kClassT,
+ fileName: "ham2.eml",
+ junkPercent: 0,
+ traitListener: true,
+ junkListener: true,
+ },
+ {
+ // forget ham2, percents should return to partial value
+ command: kForgetT,
+ fileName: "ham2.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassT,
+ fileName: "ham2.eml",
+ junkPercent: 8,
+ traitListener: true,
+ junkListener: true,
+ },
+ {
+ // train, classify, forget, reclassify spam4
+ command: kTrainT,
+ fileName: "spam4.eml",
+ junkPercent: 100,
+ traitListener: true,
+ junkListener: true,
+ },
+ {
+ command: kClassT,
+ fileName: "spam4.eml",
+ junkPercent: 100,
+ traitListener: true,
+ junkListener: true,
+ },
+ {
+ command: kCounts,
+ tokenCount: 66, // count of tokens in the corpus
+ junkCount: 2, // count of junk messages in the corpus
+ goodCount: 1, // count of good messages in the corpus
+ },
+ {
+ command: kForgetT,
+ fileName: "spam4.eml",
+ junkPercent: 100,
+ traitListener: true,
+ junkListener: false,
+ },
+ {
+ command: kClassT,
+ fileName: "spam4.eml",
+ junkPercent: 81,
+ traitListener: true,
+ junkListener: true,
+ },
+ {
+ // forget ham1 and spam1 to empty training
+ command: kForgetT,
+ fileName: "ham1.eml",
+ junkPercent: 0,
+ traitListener: true,
+ junkListener: true,
+ },
+ {
+ command: kForgetT,
+ fileName: "spam1.eml",
+ junkPercent: 100,
+ traitListener: true,
+ junkListener: true,
+ },
+ // repeat the whole sequence using the junk calls
+ {
+ // train 1 ham and 1 spam message
+ command: kTrainJ,
+ fileName: "ham1.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kTrainJ,
+ fileName: "spam1.eml",
+ junkPercent: 100,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // the trained messages will classify at 0 and 100
+ command: kClassJ,
+ fileName: "ham1.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassJ,
+ fileName: "spam1.eml",
+ junkPercent: 100,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // ham2, spam2, spam4 give partial percents, but still ham
+ command: kClassJ,
+ fileName: "ham2.eml",
+ junkPercent: 8,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassJ,
+ fileName: "spam2.eml",
+ junkPercent: 81,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassJ,
+ fileName: "spam4.eml",
+ junkPercent: 81,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // spam3 evaluates to spam
+ command: kClassJ,
+ fileName: "spam3.eml",
+ junkPercent: 98,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // train ham2, then test percents of 0 (clearly good)
+ command: kTrainJ,
+ fileName: "ham2.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassJ,
+ fileName: "ham2.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // forget ham2, percents should return to partial value
+ command: kForgetJ,
+ fileName: "ham2.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassJ,
+ fileName: "ham2.eml",
+ junkPercent: 8,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // train, classify, forget, reclassify spam4
+ command: kTrainJ,
+ fileName: "spam4.eml",
+ junkPercent: 100,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassJ,
+ fileName: "spam4.eml",
+ junkPercent: 100,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kForgetJ,
+ fileName: "spam4.eml",
+ junkPercent: 100,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kClassJ,
+ fileName: "spam4.eml",
+ junkPercent: 81,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ // forget ham1 and spam1 to be empty
+ command: kForgetJ,
+ fileName: "ham1.eml",
+ junkPercent: 0,
+ traitListener: false,
+ junkListener: true,
+ },
+ {
+ command: kForgetJ,
+ fileName: "spam1.eml",
+ junkPercent: 100,
+ traitListener: false,
+ junkListener: true,
+ },
+];
+
+// main test
+function run_test() {
+ localAccountUtils.loadLocalMailAccount();
+ do_test_pending();
+
+ // setup pro/anti arrays as junk/good
+ gProArray.push(kJunkTrait);
+ gAntiArray.push(kGoodTrait);
+
+ startCommand();
+}
+
+var junkListener = {
+ // nsIJunkMailClassificationListener implementation
+ onMessageClassified(aMsgURI, aClassification, aJunkPercent) {
+ if (!aMsgURI) {
+ // Ignore end-of-batch signal.
+ return;
+ }
+ // print("Message URI is " + aMsgURI);
+ // print("Junk percent is " + aJunkPercent);
+ // print("Classification is " + aClassification);
+ var command = gTest.command;
+ var junkPercent = gTest.junkPercent;
+ // file returned correctly
+ Assert.equal(getSpec(gTest.fileName), aMsgURI);
+
+ // checks of aClassification
+
+ // forget returns unclassified
+ if (command == kForgetJ || command == kForgetT) {
+ Assert.equal(aClassification, kUnclassified);
+ } else {
+ // classification or train should return an actual classification
+ // check junk classification set by default cutoff of 90
+ var isGood = Math.abs(junkPercent) < 90;
+ if (junkPercent < 0) {
+ isGood = !isGood;
+ }
+ Assert.equal(aClassification, isGood ? kGood : kJunk);
+ }
+
+ // checks of aJunkPercent
+
+ if (command == kClassJ || command == kClassT) {
+ // classify returns the actual junk percents
+ Assert.equal(Math.abs(junkPercent), aJunkPercent);
+ } else if (command == kTrainJ || command == kTrainT) {
+ // train returns the ham and spam limits
+ Assert.equal(aJunkPercent, junkPercent < 90 ? kIsHamScore : kIsSpamScore);
+ } else {
+ // Forget always returns 0.
+ Assert.equal(aJunkPercent, 0);
+ }
+
+ // if the current test includes a trait listener, it will
+ // run next, so we defer to it for starting the next command
+ if (gTest.traitListener) {
+ return;
+ }
+ startCommand();
+ },
+};
+
+var traitListener = {
+ // nsIMsgTraitClassificationListener implementation
+ onMessageTraitsClassified(aMsgURI, aTraits, aPercents) {
+ if (!aMsgURI) {
+ // Ignore end-of-batch signal.
+ return;
+ }
+ // print("(Trait Listener)Message URI is " + aMsgURI);
+ // print("(Trait Listener)Junk percent is " + aPercents);
+ var command = gTest.command;
+ var junkPercent = gTest.junkPercent;
+ // print("command, junkPercent is " + command + " , " + junkPercent);
+
+ Assert.equal(getSpec(gTest.fileName), aMsgURI);
+
+ // checks of aPercents
+
+ if (command == kForgetJ || command == kForgetT) {
+ // "forgets" with null newClassifications does not return a percent
+ Assert.equal(aPercents.length, 0);
+ } else {
+ var percent = aPercents[0];
+ // print("Percent is " + percent);
+ if (command == kClassJ || command == kClassT) {
+ // Classify returns actual percents
+ Assert.equal(percent, junkPercent);
+ } else {
+ // Train simply returns 100.
+ Assert.equal(percent, 100);
+ }
+ }
+
+ // checks of aTraits
+
+ if (command == kForgetJ || command == kForgetT) {
+ // "forgets" with null newClassifications does not return a
+ // classification
+ Assert.equal(aTraits.length, 0);
+ } else if (command == kClassJ || command == kClassT) {
+ // classification just returns the tested "Pro" trait (junk)
+ let trait = aTraits[0];
+ Assert.equal(trait, kJunkTrait);
+ } else {
+ // training returns the actual trait trained
+ let trait = aTraits[0];
+ Assert.equal(trait, junkPercent < 90 ? kGoodTrait : kJunkTrait);
+ }
+
+ // All done, start the next test
+ startCommand();
+ },
+};
+
+// start the next test command
+function startCommand() {
+ if (!tests.length) {
+ // Do we have more commands?
+ // no, all done
+ do_test_finished();
+ return;
+ }
+
+ gTest = tests.shift();
+ print(
+ "StartCommand command = " +
+ gTest.command +
+ ", remaining tests " +
+ tests.length
+ );
+ var command = gTest.command;
+ var junkPercent = gTest.junkPercent;
+ var fileName = gTest.fileName;
+ var tListener = gTest.traitListener;
+ var jListener = gTest.junkListener;
+ switch (command) {
+ case kTrainJ:
+ // train message using junk call
+ MailServices.junk.setMessageClassification(
+ getSpec(fileName), // in string aMsgURI
+ null, // in nsMsgJunkStatus aOldUserClassification
+ junkPercent == kIsHamScore ? kGood : kJunk, // in nsMsgJunkStatus aNewClassification
+ null, // in nsIMsgWindow aMsgWindow
+ junkListener
+ ); // in nsIJunkMailClassificationListener aListener);
+ break;
+
+ case kTrainT:
+ // train message using trait call
+ MailServices.junk.setMsgTraitClassification(
+ getSpec(fileName), // aMsgURI
+ [], // aOldTraits
+ junkPercent == kIsSpamScore ? gProArray : gAntiArray, // aNewTraits
+ tListener ? traitListener : null, // aTraitListener
+ null, // aMsgWindow
+ jListener ? junkListener : null
+ );
+ break;
+
+ case kClassJ:
+ // classify message using junk call
+ MailServices.junk.classifyMessage(
+ getSpec(fileName), // in string aMsgURI
+ null, // in nsIMsgWindow aMsgWindow
+ junkListener
+ ); // in nsIJunkMailClassificationListener aListener
+ break;
+
+ case kClassT:
+ // classify message using trait call
+ MailServices.junk.classifyTraitsInMessage(
+ getSpec(fileName), // in string aMsgURI
+ gProArray, // in array aProTraits,
+ gAntiArray, // in array aAntiTraits
+ tListener ? traitListener : null, // in nsIMsgTraitClassificationListener aTraitListener
+ null, // in nsIMsgWindow aMsgWindow
+ jListener ? junkListener : null
+ ); // in nsIJunkMailClassificationListener aJunkListener
+ break;
+
+ case kForgetJ:
+ // forget message using junk call
+ MailServices.junk.setMessageClassification(
+ getSpec(fileName), // in string aMsgURI
+ junkPercent == kIsHamScore ? kGood : kJunk, // in nsMsgJunkStatus aOldUserClassification
+ null, // in nsMsgJunkStatus aNewClassification,
+ null, // in nsIMsgWindow aMsgWindow,
+ junkListener
+ ); // in nsIJunkMailClassificationListener aListener
+ break;
+
+ case kForgetT:
+ // forget message using trait call
+ MailServices.junk.setMsgTraitClassification(
+ getSpec(fileName), // in string aMsgURI
+ junkPercent == kIsSpamScore ? gProArray : gAntiArray, // in array aOldTraits
+ [], // in array aNewTraits
+ tListener ? traitListener : null, // in nsIMsgTraitClassificationListener aTraitListener
+ null, // in nsIMsgWindow aMsgWindow
+ jListener ? junkListener : null
+ ); // in nsIJunkMailClassificationListener aJunkListener
+ break;
+
+ case kCounts:
+ // test counts
+ let msgCount = {};
+ let nsIMsgCorpus = MailServices.junk.QueryInterface(Ci.nsIMsgCorpus);
+ let tokenCount = nsIMsgCorpus.corpusCounts(null, {});
+ nsIMsgCorpus.corpusCounts(kJunkTrait, msgCount);
+ let junkCount = msgCount.value;
+ nsIMsgCorpus.corpusCounts(kGoodTrait, msgCount);
+ let goodCount = msgCount.value;
+ print(
+ "tokenCount, junkCount, goodCount is " + tokenCount,
+ junkCount,
+ goodCount
+ );
+ Assert.equal(tokenCount, gTest.tokenCount);
+ Assert.equal(junkCount, gTest.junkCount);
+ Assert.equal(goodCount, gTest.goodCount);
+ do_timeout(0, startCommand);
+ break;
+ }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js
new file mode 100644
index 0000000000..0c39215fcb
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js
@@ -0,0 +1,144 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Tests corpus management functions using nsIMsgCorpus
+
+var { MailServices } = ChromeUtils.import(
+ "resource:///modules/MailServices.jsm"
+);
+
+var msgCorpus = MailServices.junk.QueryInterface(Ci.nsIMsgCorpus);
+
+// tokens found in the test corpus file. trait 1001 was trained with
+// 2 messages, and trait 1003 with 1.
+
+var tokenData = [
+ // [traitid, count, token]
+ [1001, 0, "iDoNotExist"],
+ [1001, 1, "linecount"],
+ [1001, 2, "envelope-to:kenttest@caspia.com"],
+ [1003, 0, "iAlsoDoNotExist"],
+ [1003, 0, "isjunk"], // in 1001 but not 1003
+ [1003, 1, "linecount"],
+ [1003, 1, "subject:test"],
+ [1003, 1, "envelope-to:kenttest@caspia.com"],
+];
+
+// list of tests
+
+var gTests = [
+ // train two different combinations of messages
+ function checkLoadOnce() {
+ let fileName = "msgCorpus.dat";
+ let file = do_get_file("resources/" + fileName);
+ msgCorpus.updateData(file, true);
+
+ // check message counts
+ let messageCount = {};
+ msgCorpus.corpusCounts(1001, messageCount);
+ Assert.equal(2, messageCount.value);
+ msgCorpus.corpusCounts(1003, messageCount);
+ Assert.equal(1, messageCount.value);
+
+ for (let i = 0; i < tokenData.length; i++) {
+ let id = tokenData[i][0];
+ let count = tokenData[i][1];
+ let word = tokenData[i][2];
+ Assert.equal(count, msgCorpus.getTokenCount(word, id));
+ }
+ },
+ function checkLoadTwice() {
+ let fileName = "msgCorpus.dat";
+ let file = do_get_file("resources/" + fileName);
+ msgCorpus.updateData(file, true);
+
+ // check message counts
+ let messageCount = {};
+ msgCorpus.corpusCounts(1001, messageCount);
+ Assert.equal(4, messageCount.value);
+ msgCorpus.corpusCounts(1003, messageCount);
+ Assert.equal(2, messageCount.value);
+
+ for (let i = 0; i < tokenData.length; i++) {
+ let id = tokenData[i][0];
+ let count = 2 * tokenData[i][1];
+ let word = tokenData[i][2];
+ Assert.equal(count, msgCorpus.getTokenCount(word, id));
+ }
+ },
+ // remap the ids in the file to different local ids
+ function loadWithRemap() {
+ let fileName = "msgCorpus.dat";
+ let file = do_get_file("resources/" + fileName);
+ msgCorpus.updateData(file, true, [1001, 1003], [1, 3]);
+
+ for (let i = 0; i < tokenData.length; i++) {
+ let id = tokenData[i][0] - 1000;
+ let count = tokenData[i][1];
+ let word = tokenData[i][2];
+ Assert.equal(count, msgCorpus.getTokenCount(word, id));
+ }
+ },
+ // test removing data
+ function checkRemove() {
+ let fileName = "msgCorpus.dat";
+ let file = do_get_file("resources/" + fileName);
+ msgCorpus.updateData(file, false);
+
+ // check message counts
+ let messageCount = {};
+ msgCorpus.corpusCounts(1001, messageCount);
+ Assert.equal(2, messageCount.value);
+ msgCorpus.corpusCounts(1003, messageCount);
+ Assert.equal(1, messageCount.value);
+
+ for (let i = 0; i < tokenData.length; i++) {
+ let id = tokenData[i][0];
+ let count = tokenData[i][1];
+ let word = tokenData[i][2];
+ Assert.equal(count, msgCorpus.getTokenCount(word, id));
+ }
+ },
+ // test clearing a trait
+ function checkClear() {
+ let messageCountObject = {};
+ /*
+ msgCorpus.corpusCounts(1001, messageCountObject);
+ let v1001 = messageCountObject.value;
+ msgCorpus.corpusCounts(1003, messageCountObject);
+ let v1003 = messageCountObject.value;
+ dump("pre-clear value " + v1001 + " " + v1003 + "\n");
+ /**/
+ msgCorpus.clearTrait(1001);
+ // check that the message count is zero
+ msgCorpus.corpusCounts(1001, messageCountObject);
+ Assert.equal(0, messageCountObject.value);
+ // but the other trait should still have counts
+ msgCorpus.corpusCounts(1003, messageCountObject);
+ Assert.equal(1, messageCountObject.value);
+ // check that token count was cleared
+ for (let i = 0; i < tokenData.length; i++) {
+ let id = tokenData[i][0];
+ let count = tokenData[i][1];
+ let word = tokenData[i][2];
+ Assert.equal(id == 1001 ? 0 : count, msgCorpus.getTokenCount(word, id));
+ }
+ },
+];
+
+// main test
+function run_test() {
+ do_test_pending();
+ while (true) {
+ if (!gTests.length) {
+ // Do we have more commands?
+ // no, all done
+ do_test_finished();
+ return;
+ }
+
+ let test = gTests.shift();
+ test();
+ }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js
new file mode 100644
index 0000000000..41a9f22a9b
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js
@@ -0,0 +1,172 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Tests bayes trait analysis with aliases. Adapted from test_traits.js
+
+/*
+ * These tests rely on data stored in a file, with the same format as traits.dat,
+ * that was trained in the following manner. There are two training messages,
+ * included here as files aliases1.eml and aliases2.eml Aliases.dat was trained on
+ * each of these messages, for different trait indices, as follows, with
+ * columns showing the training count for each trait index:
+ *
+ * file count(1001) count(1005) count(1007) count(1009)
+ *
+ * aliases1.eml 1 0 2 0
+ * aliases2.eml 0 1 0 1
+ *
+ * There is also a third email file, aliases3.eml, which combines tokens
+ * from aliases1.eml and aliases2.eml
+ *
+ * The goal here is to demonstrate that traits 1001 and 1007, and traits
+ * 1005 and 1009, can be combined using aliases. We classify messages with
+ * trait 1001 as the PRO trait, and 1005 as the ANTI trait.
+ *
+ * With these characteristics, I've run a trait analysis without aliases, and
+ * determined that the following is the correct percentage results from the
+ * analysis for each message. "Train11" means that the training was 1 pro count
+ * from aliases1.eml, and 1 anti count from alias2.eml. "Train32" is 3 pro counts,
+ * and 2 anti counts.
+ *
+ * percentage
+ * file Train11 Train32
+ *
+ * alias1.eml 92 98
+ * alias2.eml 8 3
+ * alias3.eml 50 53
+ */
+
+var { MailServices } = ChromeUtils.import(
+ "resource:///modules/MailServices.jsm"
+);
+
+var traitService = Cc["@mozilla.org/msg-trait-service;1"].getService(
+ Ci.nsIMsgTraitService
+);
+var kProTrait = 1001;
+var kAntiTrait = 1005;
+var kProAlias = 1007;
+var kAntiAlias = 1009;
+
+var gTest; // currently active test
+
+// The tests array defines the tests to attempt. Format of
+// an element "test" of this array:
+//
+// test.fileName: file containing message to test
+// test.proAliases: array of aliases for the pro trait
+// test.antiAliases: array of aliases for the anti trait
+// test.percent: expected results from the classifier
+
+var tests = [
+ {
+ fileName: "aliases1.eml",
+ proAliases: [],
+ antiAliases: [],
+ percent: 92,
+ },
+ {
+ fileName: "aliases2.eml",
+ proAliases: [],
+ antiAliases: [],
+ percent: 8,
+ },
+ {
+ fileName: "aliases3.eml",
+ proAliases: [],
+ antiAliases: [],
+ percent: 50,
+ },
+ {
+ fileName: "aliases1.eml",
+ proAliases: [kProAlias],
+ antiAliases: [kAntiAlias],
+ percent: 98,
+ },
+ {
+ fileName: "aliases2.eml",
+ proAliases: [kProAlias],
+ antiAliases: [kAntiAlias],
+ percent: 3,
+ },
+ {
+ fileName: "aliases3.eml",
+ proAliases: [kProAlias],
+ antiAliases: [kAntiAlias],
+ percent: 53,
+ },
+];
+
+// main test
+function run_test() {
+ localAccountUtils.loadLocalMailAccount();
+
+ // load in the aliases trait testing file
+ MailServices.junk
+ .QueryInterface(Ci.nsIMsgCorpus)
+ .updateData(do_get_file("resources/aliases.dat"), true);
+ do_test_pending();
+
+ startCommand();
+}
+
+var listener = {
+ // nsIMsgTraitClassificationListener implementation
+ onMessageTraitsClassified(aMsgURI, aTraits, aPercents) {
+ // print("Message URI is " + aMsgURI);
+ if (!aMsgURI) {
+ // Ignore end-of-batch signal.
+ return;
+ }
+
+ Assert.equal(aPercents[0], gTest.percent);
+ // All done, start the next test
+ startCommand();
+ },
+};
+
+// start the next test command
+function startCommand() {
+ if (!tests.length) {
+ // Do we have more commands?
+ // no, all done
+ do_test_finished();
+ return;
+ }
+
+ gTest = tests.shift();
+
+ // classify message
+ var antiArray = [kAntiTrait];
+ var proArray = [kProTrait];
+
+ // remove any existing aliases
+ let proAliases = traitService.getAliases(kProTrait);
+ let antiAliases = traitService.getAliases(kAntiTrait);
+ let proAlias;
+ let antiAlias;
+ while ((proAlias = proAliases.pop())) {
+ traitService.removeAlias(kProTrait, proAlias);
+ }
+ while ((antiAlias = antiAliases.pop())) {
+ traitService.removeAlias(kAntiTrait, antiAlias);
+ }
+
+ // add new aliases
+ while ((proAlias = gTest.proAliases.pop())) {
+ traitService.addAlias(kProTrait, proAlias);
+ }
+ while ((antiAlias = gTest.antiAliases.pop())) {
+ traitService.addAlias(kAntiTrait, antiAlias);
+ }
+
+ MailServices.junk.classifyTraitsInMessage(
+ getSpec(gTest.fileName), // in string aMsgURI
+ proArray, // in array aProTraits,
+ antiArray, // in array aAntiTraits
+ listener
+ ); // in nsIMsgTraitClassificationListener aTraitListener
+ // null, // [optional] in nsIMsgWindow aMsgWindow
+ // null, // [optional] in nsIJunkMailClassificationListener aJunkListener
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js
new file mode 100644
index 0000000000..b005db72cc
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js
@@ -0,0 +1,287 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Tests bayes trait analysis
+
+// I make this an instance so that I know I can reset and get
+// a completely new component. Should be getService in production code.
+var nsIJunkMailPlugin = Cc[
+ "@mozilla.org/messenger/filter-plugin;1?name=bayesianfilter"
+].createInstance(Ci.nsIJunkMailPlugin);
+
+// command functions for test data
+var kTrain = 0; // train a file as a trait
+var kClass = 1; // classify files with traits
+var kReset = 2; // reload plugin, reading in data from disk
+var kDetail = 3; // test details
+
+var gTest; // currently active test
+
+// The tests array defines the tests to attempt. Format of
+// an element "test" of this array:
+//
+// test.command: function to perform, see definitions above
+// test.fileName: file(s) containing message(s) to test
+// test.traitIds: Array of traits to train (kTrain) or pro trait (kClass)
+// test.traitAntiIds: Array of anti traits to classify
+// test.percents: array of arrays (1 per message, 1 per trait) of
+// expected results from the classifier
+
+var tests = [
+ // train two different combinations of messages
+ {
+ command: kTrain,
+ fileName: "ham1.eml",
+ traitIds: [3, 6],
+ },
+ {
+ command: kTrain,
+ fileName: "spam1.eml",
+ traitIds: [4],
+ },
+ {
+ command: kTrain,
+ fileName: "spam4.eml",
+ traitIds: [5],
+ },
+ // test the message classifications using both singular and plural classifier
+ {
+ command: kClass,
+ fileName: "ham1.eml",
+ traitIds: [4, 6],
+ traitAntiIds: [3, 5],
+ // ham1 is trained "anti" for first test, "pro" for second
+ percents: [[0, 100]],
+ },
+ {
+ command: kClass,
+ fileName: "ham2.eml",
+ traitIds: [4, 6],
+ traitAntiIds: [3, 5],
+ // these are partial percents for an untrained message. ham2 is similar to ham1
+ percents: [[8, 95]],
+ },
+ {
+ command: kDetail,
+ fileName: "spam2.eml",
+ traitIds: [4],
+ traitAntiIds: [3],
+ percents: {
+ lots: 84,
+ money: 84,
+ make: 84,
+ your: 16,
+ },
+ runnings: [84, 92, 95, 81],
+ },
+ {
+ command: kClass,
+ fileName: "spam1.eml,spam2.eml,spam3.eml,spam4.eml",
+ traitIds: [4, 6],
+ traitAntiIds: [3, 5],
+ // spam1 trained as "pro" for first pro/anti pair
+ // spam4 trained as "anti" for second pro/anti pair
+ // others are partials
+ percents: [
+ [100, 50],
+ [81, 0],
+ [98, 50],
+ [81, 0],
+ ],
+ },
+ // reset the plugin, read in data, and retest the classification
+ // this tests the trait file writing
+ {
+ command: kReset,
+ },
+ {
+ command: kClass,
+ fileName: "ham1.eml",
+ traitIds: [4, 6],
+ traitAntiIds: [3, 5],
+ percents: [[0, 100]],
+ },
+ {
+ command: kClass,
+ fileName: "ham2.eml",
+ traitIds: [4, 6],
+ traitAntiIds: [3, 5],
+ percents: [[8, 95]],
+ },
+ {
+ command: kClass,
+ fileName: "spam1.eml,spam2.eml,spam3.eml,spam4.eml",
+ traitIds: [4, 6],
+ traitAntiIds: [3, 5],
+ percents: [
+ [100, 50],
+ [81, 0],
+ [98, 50],
+ [81, 0],
+ ],
+ },
+];
+
+// main test
+function run_test() {
+ localAccountUtils.loadLocalMailAccount();
+ do_test_pending();
+
+ startCommand();
+}
+
+var listener = {
+ // nsIMsgTraitClassificationListener implementation
+ onMessageTraitsClassified(aMsgURI, aTraits, aPercents) {
+ // print("Message URI is " + aMsgURI);
+ if (!aMsgURI) {
+ // Ignore end-of-batch signal.
+ return;
+ }
+
+ switch (gTest.command) {
+ case kClass:
+ Assert.equal(gTest.files[gTest.currentIndex], aMsgURI);
+ var currentPercents = gTest.percents[gTest.currentIndex];
+ for (let i = 0; i < currentPercents.length; i++) {
+ // print("expecting score " + currentPercents[i] +
+ // " got score " + aPercents[i]);
+ Assert.equal(currentPercents[i], aPercents[i]);
+ }
+ gTest.currentIndex++;
+ break;
+
+ case kTrain: // We tested this some in test_junkAsTraits.js, so let's not bother
+ default:
+ break;
+ }
+ if (!--gTest.callbacks) {
+ // All done, start the next test
+ startCommand();
+ }
+ },
+ onMessageTraitDetails(
+ aMsgURI,
+ aProTrait,
+ aTokenString,
+ aTokenPercents,
+ aRunningPercents
+ ) {
+ print("Details for " + aMsgURI);
+ for (let i = 0; i < aTokenString.length; i++) {
+ print(
+ "Percent " +
+ aTokenPercents[i] +
+ " Running " +
+ aRunningPercents[i] +
+ " Token " +
+ aTokenString[i]
+ );
+ Assert.ok(aTokenString[i] in gTest.percents);
+
+ Assert.equal(gTest.percents[aTokenString[i]], aTokenPercents[i]);
+ Assert.equal(gTest.runnings[i], aRunningPercents[i]);
+ delete gTest.percents[aTokenString[i]];
+ }
+ Assert.equal(Object.keys(gTest.percents).length, 0);
+ if (gTest.command == kClass) {
+ gTest.currentIndex++;
+ }
+ startCommand();
+ },
+};
+
+// start the next test command
+function startCommand() {
+ if (!tests.length) {
+ // Do we have more commands?
+ // no, all done
+ do_test_finished();
+ return;
+ }
+
+ gTest = tests.shift();
+ print(
+ "StartCommand command = " +
+ gTest.command +
+ ", remaining tests " +
+ tests.length
+ );
+ switch (gTest.command) {
+ case kTrain: {
+ // train message
+ let proArray = [];
+ for (let i = 0; i < gTest.traitIds.length; i++) {
+ proArray.push(gTest.traitIds[i]);
+ }
+ gTest.callbacks = 1;
+
+ nsIJunkMailPlugin.setMsgTraitClassification(
+ getSpec(gTest.fileName), // aMsgURI
+ [], // aOldTraits
+ proArray, // aNewTraits
+ listener
+ ); // [optional] in nsIMsgTraitClassificationListener aTraitListener
+ // null, // [optional] in nsIMsgWindow aMsgWindow
+ // null, // [optional] in nsIJunkMailClassificationListener aJunkListener
+ break;
+ }
+ case kClass: {
+ // classify message
+ var antiArray = [];
+ let proArray = [];
+ for (let i = 0; i < gTest.traitIds.length; i++) {
+ antiArray.push(gTest.traitAntiIds[i]);
+ proArray.push(gTest.traitIds[i]);
+ }
+ gTest.files = gTest.fileName.split(",");
+ gTest.callbacks = gTest.files.length;
+ gTest.currentIndex = 0;
+ for (let i = 0; i < gTest.files.length; i++) {
+ gTest.files[i] = getSpec(gTest.files[i]);
+ }
+ if (gTest.files.length == 1) {
+ // use the singular classifier
+ nsIJunkMailPlugin.classifyTraitsInMessage(
+ getSpec(gTest.fileName), // in string aMsgURI
+ proArray, // in array aProTraits,
+ antiArray, // in array aAntiTraits
+ listener
+ ); // in nsIMsgTraitClassificationListener aTraitListener
+ // null, // [optional] in nsIMsgWindow aMsgWindow
+ // null, // [optional] in nsIJunkMailClassificationListener aJunkListener
+ } else {
+ // use the plural classifier
+ nsIJunkMailPlugin.classifyTraitsInMessages(
+ gTest.files, // in Array<ACString> aMsgURIs,
+ proArray, // in array aProTraits,
+ antiArray, // in array aAntiTraits
+ listener
+ ); // in nsIMsgTraitClassificationListener aTraitListener
+ // null, // [optional] in nsIMsgWindow aMsgWindow
+ // null, // [optional] in nsIJunkMailClassificationListener aJunkListener
+ }
+ break;
+ }
+ case kDetail:
+ // detail message
+ nsIJunkMailPlugin.detailMessage(
+ getSpec(gTest.fileName), // in string aMsgURI
+ gTest.traitIds[0], // proTrait
+ gTest.traitAntiIds[0], // antiTrait
+ listener
+ ); // in nsIMsgTraitDetailListener aDetailListener
+ break;
+ case kReset:
+ // reload a new nsIJunkMailPlugin, reading file in the process
+ nsIJunkMailPlugin.shutdown(); // writes files
+ nsIJunkMailPlugin = null;
+ nsIJunkMailPlugin = Cc[
+ "@mozilla.org/messenger/filter-plugin;1?name=bayesianfilter"
+ ].createInstance(Ci.nsIJunkMailPlugin);
+ // does not do a callback, so we must restart next command
+ startCommand();
+ break;
+ }
+}
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini
new file mode 100644
index 0000000000..86776834ba
--- /dev/null
+++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini
@@ -0,0 +1,11 @@
+[DEFAULT]
+head = head_bayes.js
+tail =
+support-files = resources/*
+
+[test_bug228675.js]
+[test_customTokenization.js]
+[test_junkAsTraits.js]
+[test_msgCorpus.js]
+[test_traitAliases.js]
+[test_traits.js]