diff options
Diffstat (limited to '')
21 files changed, 1734 insertions, 0 deletions
diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js new file mode 100644 index 0000000000..b502dcc2e5 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/head_bayes.js @@ -0,0 +1,28 @@ +var { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); +var { XPCOMUtils } = ChromeUtils.importESModule( + "resource://gre/modules/XPCOMUtils.sys.mjs" +); +var { mailTestUtils } = ChromeUtils.import( + "resource://testing-common/mailnews/MailTestUtils.jsm" +); +var { localAccountUtils } = ChromeUtils.import( + "resource://testing-common/mailnews/LocalAccountUtils.jsm" +); + +var CC = Components.Constructor; + +// Ensure the profile directory is set up +do_get_profile(); + +function getSpec(aFileName) { + var file = do_get_file("resources/" + aFileName); + var uri = Services.io.newFileURI(file).QueryInterface(Ci.nsIURL); + uri = uri.mutate().setQuery("type=application/x-message-display").finalize(); + return uri.spec; +} + +registerCleanupFunction(function () { + load("../../../../resources/mailShutdown.js"); +}); diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat Binary files differnew file mode 100644 index 0000000000..31162459e4 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases.dat diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml new file mode 100644 index 0000000000..4720467fe6 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases1.eml @@ -0,0 +1,6 @@ +From - Sat Jan 26 08:43:42 2008 +Subject: test1 +Content-Type: text/plain; charset=iso-8859-1 + +important + diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml new file mode 100644 index 0000000000..9a251486a9 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases2.eml @@ -0,0 +1,6 @@ +From - Sat Jan 26 08:43:42 2008 +Subject: test2 +Content-Type: text/plain; charset=iso-8859-1 + +work + diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml new file mode 100644 index 0000000000..de31992ac5 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/aliases3.eml @@ -0,0 +1,6 @@ +From - Sat Jan 26 08:43:42 2008 +Subject: test3 +Content-Type: text/plain; charset=iso-8859-1 + +very important work + diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml new file mode 100644 index 0000000000..6a63f587b8 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham1.eml @@ -0,0 +1,7 @@ +Date: Tue, 30 Apr 2008 00:12:17 -0700 +From: Mom <mother@example.com> +To: Careful Reader <reader@example.org> +Subject: eat your vegetables +MIME-Version: 1.0 + +vegetables are very important for your health and wealth. diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml new file mode 100644 index 0000000000..cd6691b921 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/ham2.eml @@ -0,0 +1,8 @@ +Date: Tue, 27 Apr 2006 00:13:23 -0700 +From: Evil Despot <boss@example.com> +To: Careful Reader <reader@example.org> +Subject: finish your report +MIME-Version: 1.0 + +If you want to keep your sorry job and health, finish that +important report before the close of business today. diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat Binary files differnew file mode 100644 index 0000000000..f273a4f10c --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/msgCorpus.dat diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml new file mode 100644 index 0000000000..ea629213cc --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam1.eml @@ -0,0 +1,7 @@ +Date: Tue, 29 Apr 2008 00:10:07 -0700 +From: Spam King <spammer@example.com> +To: Careful Reader <reader@example.org> +Subject: viagra is your nigerian xxx dream +MIME-Version: 1.0 + +click here to make lots of money and wealth diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml new file mode 100644 index 0000000000..817d328cf2 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam2.eml @@ -0,0 +1,8 @@ +Date: Mon, 27 Apr 2008 01:02:03 -0700 +From: Stock Pusher <broker@example.net> +To: Careful Reader <reader@example.org> +Subject: ABCD Corporation will soar tomorrow! +MIME-Version: 1.0 + +Make lots of money! Put all of your money into ACBD Corporation +Stock! diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml new file mode 100644 index 0000000000..0a524e604b --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam3.eml @@ -0,0 +1,7 @@ +Date: Wed, 30 Apr 2008 01:11:17 -0700 +From: Spam King <spammer@example.com> +To: Careful Reader <reader@example.org> +Subject: we have your nigerian xxx dream +MIME-Version: 1.0 + +Not making lots of money and wealth? Call me! diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml new file mode 100644 index 0000000000..775d3b41fa --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/spam4.eml @@ -0,0 +1,8 @@ +Date: Tue, 28 Apr 2008 01:02:04 -0700 +From: Stock Pusher <broker@example.net> +To: Careful Reader <reader@example.org> +Subject: ABCD Corporation will really soar this time! +MIME-Version: 1.0 + +Make lots of money! Put all of your money into ABCD Corporation +Stock! (We really mean it this time!) diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml new file mode 100644 index 0000000000..d6e7e0ae3d --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/tokenTest.eml @@ -0,0 +1,14 @@ +Date: Tue, 30 Apr 2008 00:12:17 -0700 +From: Mom <mother@example.com> +To: Careful Reader <reader@example.org> +Subject: eat your vegetables to live long +Received: from c-1-2-3-4.hsd1.wa.example.net ([1.2.3.4] helo=theComputer) + by host301.example.com with esmtpa (Exim 4.69) + (envelope-from <someone@example.com>) + id 1LeEgH-0003GN-Rr + for reader@example.org; Mon, 02 Mar 2009 13:24:06 -0700 +MIME-Version: 1.0 +Message-Id: 14159 +Sender: Bugzilla Test Setup <noreply@example.org> + +This is a sentence. Important URL is http://www.example.org Check it out! diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js new file mode 100644 index 0000000000..b6d37e879b --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/resources/trainingfile.js @@ -0,0 +1,108 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// service class to manipulate the junk training.dat file +// code is adapted from Mnehy Thunderbird Extension + +/* exported TrainingData */ +function TrainingData() { + // local constants + + const CC = Components.Constructor; + + // public methods + + this.read = read; + + // public variables + + this.mGoodTokens = 0; + this.mJunkTokens = 0; + this.mGoodMessages = 0; + this.mJunkMessages = 0; + this.mGoodCounts = {}; + this.mJunkCounts = {}; + + // helper functions + + function getJunkStatFile() { + var sBaseDir = Services.dirsvc.get("ProfD", Ci.nsIFile); + var CFileByFile = new CC( + "@mozilla.org/file/local;1", + "nsIFile", + "initWithFile" + ); + var oFile = new CFileByFile(sBaseDir); + oFile.append("training.dat"); + return oFile; + } + + function getBinStream(oFile) { + if (oFile && oFile.exists()) { + var oUri = Services.io.newFileURI(oFile); + // open stream (channel) + let channel = Services.io.newChannelFromURI( + oUri, + null, + Services.scriptSecurityManager.getSystemPrincipal(), + null, + Ci.nsILoadInfo.SEC_ALLOW_CROSS_ORIGIN_SEC_CONTEXT_IS_NULL, + Ci.nsIContentPolicy.TYPE_OTHER + ); + var oStream = channel.open(); + // buffer it + var oBufStream = Cc[ + "@mozilla.org/network/buffered-input-stream;1" + ].createInstance(Ci.nsIBufferedInputStream); + oBufStream.init(oStream, oFile.fileSize); + // read as binary + var oBinStream = Cc["@mozilla.org/binaryinputstream;1"].createInstance( + Ci.nsIBinaryInputStream + ); + oBinStream.setInputStream(oBufStream); + // return it + return oBinStream; + } + return null; + } + + // method specifications + + function read() { + var file = getJunkStatFile(); + + // does the file exist? + Assert.ok(file.exists()); + + var fileStream = getBinStream(file); + + // check magic number + var iMagicNumber = fileStream.read32(); + Assert.equal(iMagicNumber, 0xfeedface); + + // get ham'n'spam numbers + this.mGoodMessages = fileStream.read32(); + this.mJunkMessages = fileStream.read32(); + + // Read good tokens + this.mGoodTokens = fileStream.read32(); + var iRefCount, iTokenLen, sToken; + for (let i = 0; i < this.mGoodTokens; ++i) { + iRefCount = fileStream.read32(); + iTokenLen = fileStream.read32(); + sToken = fileStream.readBytes(iTokenLen); + this.mGoodCounts[sToken] = iRefCount; + } + + // we have no further good tokens, so read junk tokens + this.mJunkTokens = fileStream.read32(); + for (let i = 0; i < this.mJunkTokens; i++) { + // read token data + iRefCount = fileStream.read32(); + iTokenLen = fileStream.read32(); + sToken = fileStream.readBytes(iTokenLen); + this.mJunkCounts[sToken] = iRefCount; + } + } +} diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js new file mode 100644 index 0000000000..40180006d7 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_bug228675.js @@ -0,0 +1,136 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// tests reduction in size of training.dat + +// main setup + +/* import-globals-from resources/trainingfile.js */ +load("resources/trainingfile.js"); + +var { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); + +// before shrink, the trained messages have 76 tokens. Force shrink. +Services.prefs.setIntPref("mailnews.bayesian_spam_filter.junk_maxtokens", 75); + +// local constants +var kUnclassified = MailServices.junk.UNCLASSIFIED; +var kJunk = MailServices.junk.JUNK; +var kGood = MailServices.junk.GOOD; + +var emails = [ + "ham1.eml", + "ham2.eml", + "spam1.eml", + "spam2.eml", + "spam3.eml", + "spam4.eml", +]; +var classifications = [kGood, kGood, kJunk, kJunk, kJunk, kJunk]; +var trainingData; + +// main test +function run_test() { + localAccountUtils.loadLocalMailAccount(); + MailServices.junk.resetTrainingData(); + + do_test_pending(); + + var email = emails.shift(); + var classification = classifications.shift(); + // additional calls to setMessageClassifiaction are done in the callback + MailServices.junk.setMessageClassification( + getSpec(email), + kUnclassified, + classification, + null, + doTestingListener + ); +} + +var doTestingListener = { + onMessageClassified(aMsgURI, aClassification, aJunkPercent) { + if (!aMsgURI) { + // Ignore end-of-batch signal. + return; + } + var email = emails.shift(); + var classification = classifications.shift(); + if (email) { + MailServices.junk.setMessageClassification( + getSpec(email), + kUnclassified, + classification, + null, + doTestingListener + ); + return; + } + + // all done classifying, time to test + MailServices.junk.shutdown(); // just flushes training.dat + trainingData = new TrainingData(); + trainingData.read(); + + /* + // List training.dat information for debug + dump("training.data results: goodMessages=" + trainingData.mGoodMessages + + " junkMessages = " + trainingData.mJunkMessages + + " goodTokens = " + trainingData.mGoodTokens + + " junkTokens = " + trainingData.mJunkTokens + + "\n"); + print("Good counts"); + for (var token in trainingData.mGoodCounts) + dump("count: " + trainingData.mGoodCounts[token] + " token: " + token + "\n"); + print("Junk Counts"); + for (var token in trainingData.mJunkCounts) + dump("count: " + trainingData.mJunkCounts[token] + " token: " + token + "\n"); + */ + + /* Selected pre-shrink counts after training + training.data results: goodMessages=2 junkMessages = 4 tokens = 78 + Good counts + count: 1 token: subject:report + count: 2 token: important + count: 2 token: to:careful reader <reader@example.org> + + Junk Counts + count: 3 token: make + count: 4 token: money + count: 4 token: to:careful reader <reader@example.org> + count: 2 token: money! + */ + + // Shrinking divides all counts by two. In comments, I show the + // calculation for each test, (pre-shrink count)/2. + + Assert.equal(trainingData.mGoodMessages, 1); // 2/2 + Assert.equal(trainingData.mJunkMessages, 2); // 4/2 + checkToken("money", 0, 2); // (0/2, 4/2) + checkToken("subject:report", 0, 0); // (1/2, 0/2) + checkToken("to:careful reader <reader@example.org>", 1, 2); // (2/2, 4/2) + checkToken("make", 0, 1); // (0/2, 3/2) + checkToken("important", 1, 0); // (2/2, 0/2) + + do_test_finished(); + }, +}; + +// helper functions + +function checkToken(aToken, aGoodCount, aJunkCount) { + print(" checking " + aToken); + var goodCount = trainingData.mGoodCounts[aToken]; + var junkCount = trainingData.mJunkCounts[aToken]; + if (!goodCount) { + goodCount = 0; + } + if (!junkCount) { + junkCount = 0; + } + Assert.equal(goodCount, aGoodCount); + Assert.equal(junkCount, aJunkCount); +} diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js new file mode 100644 index 0000000000..222a9557d8 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_customTokenization.js @@ -0,0 +1,197 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Tests use of custom tokenization, originally introduced in bug 476389 + +var { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); + +// command functions for test data +var kTrain = 0; // train a file +var kTest = 1; // test headers returned from detail +var kSetup = 2; // run a setup function + +// trait ids +var kProArray = [3]; +var kAntiArray = [4]; + +var gTest; // currently active test + +// The tests array defines the tests to attempt. + +var tests = [ + // test a few tokens using defaults + { + command: kTrain, + fileName: "tokenTest.eml", + }, + { + command: kTest, + fileName: "tokenTest.eml", + tokens: ["important", "subject:eat", "message-id:14159", "http://www"], + nottokens: ["idonotexist", "subject:to"], + }, + + // enable received, disable message-id + // switch tokenization of body to catch full urls (no "." delimiter) + // enable sender, keeping full value + { + command: kSetup, + operation() { + Services.prefs.setCharPref( + "mailnews.bayesian_spam_filter.tokenizeheader.received", + "standard" + ); + Services.prefs.setCharPref( + "mailnews.bayesian_spam_filter.tokenizeheader.message-id", + "false" + ); + Services.prefs.setCharPref( + "mailnews.bayesian_spam_filter.body_delimiters", + " \t\r\n\v" + ); + Services.prefs.setCharPref( + "mailnews.bayesian_spam_filter.tokenizeheader.sender", + "full" + ); + }, + }, + { + command: kTrain, + fileName: "tokenTest.eml", + }, + { + command: kTest, + fileName: "tokenTest.eml", + tokens: [ + "important", + "subject:eat", + "received:reader@example", + "skip:h 20", + "sender:bugzilla test setup <noreply@example.org>", + "received:<someone@example", + ], + nottokens: ["message-id:14159", "http://www"], + }, + + // increase the length of the maximum token to catch full URLs in the body + // add <>;, remove . from standard header delimiters to better capture emails + // use custom delimiters on sender, without "." or "<>" + { + command: kSetup, + operation() { + Services.prefs.setIntPref( + "mailnews.bayesian_spam_filter.maxlengthfortoken", + 50 + ); + Services.prefs.setCharPref( + "mailnews.bayesian_spam_filter.header_delimiters", + " ;<>\t\r\n\v" + ); + Services.prefs.setCharPref( + "mailnews.bayesian_spam_filter.tokenizeheader.sender", + " \t\r\n\v" + ); + }, + }, + { + command: kTrain, + fileName: "tokenTest.eml", + }, + { + command: kTest, + fileName: "tokenTest.eml", + tokens: [ + "received:someone@example.com", + "http://www.example.org", + "received:reader@example.org", + "sender:<noreply@example.org>", + ], + nottokens: ["skip:h 20", "received:<someone@example"], + }, +]; + +// main test +function run_test() { + localAccountUtils.loadLocalMailAccount(); + do_test_pending(); + + startCommand(); +} + +var listener = { + // nsIMsgTraitClassificationListener implementation + onMessageTraitsClassified(aMsgURI, aTraits, aPercents) { + startCommand(); + }, + + onMessageTraitDetails( + aMsgURI, + aProTrait, + aTokenString, + aTokenPercents, + aRunningPercents + ) { + print("Details for " + aMsgURI); + for (let i = 0; i < aTokenString.length; i++) { + print("Token " + aTokenString[i]); + } + + // we should have these tokens + for (let value of gTest.tokens) { + print("We should have '" + value + "'? "); + Assert.ok(aTokenString.includes(value)); + } + + // should not have these tokens + for (let value of gTest.nottokens) { + print("We should not have '" + value + "'? "); + Assert.ok(!aTokenString.includes(value)); + } + startCommand(); + }, +}; + +// start the next test command +function startCommand() { + if (!tests.length) { + // Do we have more commands? + // no, all done + do_test_finished(); + return; + } + + gTest = tests.shift(); + // print("StartCommand command = " + gTest.command + ", remaining tests " + tests.length); + switch (gTest.command) { + case kTrain: + // train message + + MailServices.junk.setMsgTraitClassification( + getSpec(gTest.fileName), // aMsgURI + [], // aOldTraits + kProArray, // aNewTraits + listener + ); // [optional] in nsIMsgTraitClassificationListener aTraitListener + // null, // [optional] in nsIMsgWindow aMsgWindow + // null, // [optional] in nsIJunkMailClassificationListener aJunkListener + break; + + case kTest: + // test headers from detail message + MailServices.junk.detailMessage( + getSpec(gTest.fileName), // in string aMsgURI + kProArray[0], // proTrait + kAntiArray[0], // antiTrait + listener + ); // in nsIMsgTraitDetailListener aDetailListener + break; + + case kSetup: + gTest.operation(); + startCommand(); + break; + } +} diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js new file mode 100644 index 0000000000..a1800b93e7 --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_junkAsTraits.js @@ -0,0 +1,574 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// tests calls to the bayesian filter plugin to train, classify, and forget +// messages using both the older junk-oriented calls, as well as the newer +// trait-oriented calls. Only a single trait is tested. The main intent of +// these tests is to demonstrate that both the old junk-oriented calls and the +// new trait-oriented calls give the same results on junk processing. + +var { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); + +// local constants +var kUnclassified = MailServices.junk.UNCLASSIFIED; +var kJunk = MailServices.junk.JUNK; +var kGood = MailServices.junk.GOOD; +var kJunkTrait = MailServices.junk.JUNK_TRAIT; +var kGoodTrait = MailServices.junk.GOOD_TRAIT; +var kIsHamScore = MailServices.junk.IS_HAM_SCORE; +var kIsSpamScore = MailServices.junk.IS_SPAM_SCORE; + +// command functions for test data +var kTrainJ = 0; // train using junk method +var kTrainT = 1; // train using trait method +var kClassJ = 2; // classify using junk method +var kClassT = 3; // classify using trait method +var kForgetJ = 4; // forget training using junk method +var kForgetT = 5; // forget training using trait method +var kCounts = 6; // test token and message counts + +var gProArray = [], + gAntiArray = []; // traits arrays, pro is junk, anti is good +var gTest; // currently active test + +// The tests array defines the tests to attempt. Format of +// an element "test" of this array (except for kCounts): +// +// test.command: function to perform, see definitions above +// test.fileName: file containing message to test +// test.junkPercent: sets the classification (for Class or Forget commands) +// tests the classification (for Class commands) +// As a special case for the no-training tests, if +// junkPercent is negative, test its absolute value +// for percents, but reverse the junk/good classification +// test.traitListener: should we use the trait listener call? +// test.junkListener: should we use the junk listener call? + +var tests = [ + // test the trait-based calls. We mix trait listeners, junk listeners, + // and both + + { + // with no training, percents is 50 - but classifies as junk + command: kClassT, + fileName: "ham1.eml", + junkPercent: -50, // negative means classifies as junk + traitListener: false, + junkListener: true, + }, + { + // train 1 ham message + command: kTrainT, + fileName: "ham1.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + // with ham but no spam training, percents are 0 and classifies as ham + command: kClassT, + fileName: "ham1.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + // train 1 spam message + command: kTrainT, + fileName: "spam1.eml", + junkPercent: 100, + traitListener: true, + junkListener: false, + }, + { + // the trained messages will classify at 0 and 100 + command: kClassT, + fileName: "ham1.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + command: kClassT, + fileName: "spam1.eml", + junkPercent: 100, + traitListener: true, + junkListener: false, + }, + { + // ham2, spam2, spam4 give partial percents, but still ham + command: kClassT, + fileName: "ham2.eml", + junkPercent: 8, + traitListener: true, + junkListener: true, + }, + { + command: kClassT, + fileName: "spam2.eml", + junkPercent: 81, + traitListener: false, + junkListener: true, + }, + { + command: kClassT, + fileName: "spam4.eml", + junkPercent: 81, + traitListener: true, + junkListener: false, + }, + { + // spam3 evaluates to spam + command: kClassT, + fileName: "spam3.eml", + junkPercent: 98, + traitListener: true, + junkListener: true, + }, + { + // train ham2, then test percents of 0 (clearly good) + command: kTrainT, + fileName: "ham2.eml", + junkPercent: 0, + traitListener: true, + junkListener: true, + }, + { + command: kClassT, + fileName: "ham2.eml", + junkPercent: 0, + traitListener: true, + junkListener: true, + }, + { + // forget ham2, percents should return to partial value + command: kForgetT, + fileName: "ham2.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + command: kClassT, + fileName: "ham2.eml", + junkPercent: 8, + traitListener: true, + junkListener: true, + }, + { + // train, classify, forget, reclassify spam4 + command: kTrainT, + fileName: "spam4.eml", + junkPercent: 100, + traitListener: true, + junkListener: true, + }, + { + command: kClassT, + fileName: "spam4.eml", + junkPercent: 100, + traitListener: true, + junkListener: true, + }, + { + command: kCounts, + tokenCount: 66, // count of tokens in the corpus + junkCount: 2, // count of junk messages in the corpus + goodCount: 1, // count of good messages in the corpus + }, + { + command: kForgetT, + fileName: "spam4.eml", + junkPercent: 100, + traitListener: true, + junkListener: false, + }, + { + command: kClassT, + fileName: "spam4.eml", + junkPercent: 81, + traitListener: true, + junkListener: true, + }, + { + // forget ham1 and spam1 to empty training + command: kForgetT, + fileName: "ham1.eml", + junkPercent: 0, + traitListener: true, + junkListener: true, + }, + { + command: kForgetT, + fileName: "spam1.eml", + junkPercent: 100, + traitListener: true, + junkListener: true, + }, + // repeat the whole sequence using the junk calls + { + // train 1 ham and 1 spam message + command: kTrainJ, + fileName: "ham1.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + command: kTrainJ, + fileName: "spam1.eml", + junkPercent: 100, + traitListener: false, + junkListener: true, + }, + { + // the trained messages will classify at 0 and 100 + command: kClassJ, + fileName: "ham1.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + command: kClassJ, + fileName: "spam1.eml", + junkPercent: 100, + traitListener: false, + junkListener: true, + }, + { + // ham2, spam2, spam4 give partial percents, but still ham + command: kClassJ, + fileName: "ham2.eml", + junkPercent: 8, + traitListener: false, + junkListener: true, + }, + { + command: kClassJ, + fileName: "spam2.eml", + junkPercent: 81, + traitListener: false, + junkListener: true, + }, + { + command: kClassJ, + fileName: "spam4.eml", + junkPercent: 81, + traitListener: false, + junkListener: true, + }, + { + // spam3 evaluates to spam + command: kClassJ, + fileName: "spam3.eml", + junkPercent: 98, + traitListener: false, + junkListener: true, + }, + { + // train ham2, then test percents of 0 (clearly good) + command: kTrainJ, + fileName: "ham2.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + command: kClassJ, + fileName: "ham2.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + // forget ham2, percents should return to partial value + command: kForgetJ, + fileName: "ham2.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + command: kClassJ, + fileName: "ham2.eml", + junkPercent: 8, + traitListener: false, + junkListener: true, + }, + { + // train, classify, forget, reclassify spam4 + command: kTrainJ, + fileName: "spam4.eml", + junkPercent: 100, + traitListener: false, + junkListener: true, + }, + { + command: kClassJ, + fileName: "spam4.eml", + junkPercent: 100, + traitListener: false, + junkListener: true, + }, + { + command: kForgetJ, + fileName: "spam4.eml", + junkPercent: 100, + traitListener: false, + junkListener: true, + }, + { + command: kClassJ, + fileName: "spam4.eml", + junkPercent: 81, + traitListener: false, + junkListener: true, + }, + { + // forget ham1 and spam1 to be empty + command: kForgetJ, + fileName: "ham1.eml", + junkPercent: 0, + traitListener: false, + junkListener: true, + }, + { + command: kForgetJ, + fileName: "spam1.eml", + junkPercent: 100, + traitListener: false, + junkListener: true, + }, +]; + +// main test +function run_test() { + localAccountUtils.loadLocalMailAccount(); + do_test_pending(); + + // setup pro/anti arrays as junk/good + gProArray.push(kJunkTrait); + gAntiArray.push(kGoodTrait); + + startCommand(); +} + +var junkListener = { + // nsIJunkMailClassificationListener implementation + onMessageClassified(aMsgURI, aClassification, aJunkPercent) { + if (!aMsgURI) { + // Ignore end-of-batch signal. + return; + } + // print("Message URI is " + aMsgURI); + // print("Junk percent is " + aJunkPercent); + // print("Classification is " + aClassification); + var command = gTest.command; + var junkPercent = gTest.junkPercent; + // file returned correctly + Assert.equal(getSpec(gTest.fileName), aMsgURI); + + // checks of aClassification + + // forget returns unclassified + if (command == kForgetJ || command == kForgetT) { + Assert.equal(aClassification, kUnclassified); + } else { + // classification or train should return an actual classification + // check junk classification set by default cutoff of 90 + var isGood = Math.abs(junkPercent) < 90; + if (junkPercent < 0) { + isGood = !isGood; + } + Assert.equal(aClassification, isGood ? kGood : kJunk); + } + + // checks of aJunkPercent + + if (command == kClassJ || command == kClassT) { + // classify returns the actual junk percents + Assert.equal(Math.abs(junkPercent), aJunkPercent); + } else if (command == kTrainJ || command == kTrainT) { + // train returns the ham and spam limits + Assert.equal(aJunkPercent, junkPercent < 90 ? kIsHamScore : kIsSpamScore); + } else { + // Forget always returns 0. + Assert.equal(aJunkPercent, 0); + } + + // if the current test includes a trait listener, it will + // run next, so we defer to it for starting the next command + if (gTest.traitListener) { + return; + } + startCommand(); + }, +}; + +var traitListener = { + // nsIMsgTraitClassificationListener implementation + onMessageTraitsClassified(aMsgURI, aTraits, aPercents) { + if (!aMsgURI) { + // Ignore end-of-batch signal. + return; + } + // print("(Trait Listener)Message URI is " + aMsgURI); + // print("(Trait Listener)Junk percent is " + aPercents); + var command = gTest.command; + var junkPercent = gTest.junkPercent; + // print("command, junkPercent is " + command + " , " + junkPercent); + + Assert.equal(getSpec(gTest.fileName), aMsgURI); + + // checks of aPercents + + if (command == kForgetJ || command == kForgetT) { + // "forgets" with null newClassifications does not return a percent + Assert.equal(aPercents.length, 0); + } else { + var percent = aPercents[0]; + // print("Percent is " + percent); + if (command == kClassJ || command == kClassT) { + // Classify returns actual percents + Assert.equal(percent, junkPercent); + } else { + // Train simply returns 100. + Assert.equal(percent, 100); + } + } + + // checks of aTraits + + if (command == kForgetJ || command == kForgetT) { + // "forgets" with null newClassifications does not return a + // classification + Assert.equal(aTraits.length, 0); + } else if (command == kClassJ || command == kClassT) { + // classification just returns the tested "Pro" trait (junk) + let trait = aTraits[0]; + Assert.equal(trait, kJunkTrait); + } else { + // training returns the actual trait trained + let trait = aTraits[0]; + Assert.equal(trait, junkPercent < 90 ? kGoodTrait : kJunkTrait); + } + + // All done, start the next test + startCommand(); + }, +}; + +// start the next test command +function startCommand() { + if (!tests.length) { + // Do we have more commands? + // no, all done + do_test_finished(); + return; + } + + gTest = tests.shift(); + print( + "StartCommand command = " + + gTest.command + + ", remaining tests " + + tests.length + ); + var command = gTest.command; + var junkPercent = gTest.junkPercent; + var fileName = gTest.fileName; + var tListener = gTest.traitListener; + var jListener = gTest.junkListener; + switch (command) { + case kTrainJ: + // train message using junk call + MailServices.junk.setMessageClassification( + getSpec(fileName), // in string aMsgURI + null, // in nsMsgJunkStatus aOldUserClassification + junkPercent == kIsHamScore ? kGood : kJunk, // in nsMsgJunkStatus aNewClassification + null, // in nsIMsgWindow aMsgWindow + junkListener + ); // in nsIJunkMailClassificationListener aListener); + break; + + case kTrainT: + // train message using trait call + MailServices.junk.setMsgTraitClassification( + getSpec(fileName), // aMsgURI + [], // aOldTraits + junkPercent == kIsSpamScore ? gProArray : gAntiArray, // aNewTraits + tListener ? traitListener : null, // aTraitListener + null, // aMsgWindow + jListener ? junkListener : null + ); + break; + + case kClassJ: + // classify message using junk call + MailServices.junk.classifyMessage( + getSpec(fileName), // in string aMsgURI + null, // in nsIMsgWindow aMsgWindow + junkListener + ); // in nsIJunkMailClassificationListener aListener + break; + + case kClassT: + // classify message using trait call + MailServices.junk.classifyTraitsInMessage( + getSpec(fileName), // in string aMsgURI + gProArray, // in array aProTraits, + gAntiArray, // in array aAntiTraits + tListener ? traitListener : null, // in nsIMsgTraitClassificationListener aTraitListener + null, // in nsIMsgWindow aMsgWindow + jListener ? junkListener : null + ); // in nsIJunkMailClassificationListener aJunkListener + break; + + case kForgetJ: + // forget message using junk call + MailServices.junk.setMessageClassification( + getSpec(fileName), // in string aMsgURI + junkPercent == kIsHamScore ? kGood : kJunk, // in nsMsgJunkStatus aOldUserClassification + null, // in nsMsgJunkStatus aNewClassification, + null, // in nsIMsgWindow aMsgWindow, + junkListener + ); // in nsIJunkMailClassificationListener aListener + break; + + case kForgetT: + // forget message using trait call + MailServices.junk.setMsgTraitClassification( + getSpec(fileName), // in string aMsgURI + junkPercent == kIsSpamScore ? gProArray : gAntiArray, // in array aOldTraits + [], // in array aNewTraits + tListener ? traitListener : null, // in nsIMsgTraitClassificationListener aTraitListener + null, // in nsIMsgWindow aMsgWindow + jListener ? junkListener : null + ); // in nsIJunkMailClassificationListener aJunkListener + break; + + case kCounts: + // test counts + let msgCount = {}; + let nsIMsgCorpus = MailServices.junk.QueryInterface(Ci.nsIMsgCorpus); + let tokenCount = nsIMsgCorpus.corpusCounts(null, {}); + nsIMsgCorpus.corpusCounts(kJunkTrait, msgCount); + let junkCount = msgCount.value; + nsIMsgCorpus.corpusCounts(kGoodTrait, msgCount); + let goodCount = msgCount.value; + print( + "tokenCount, junkCount, goodCount is " + tokenCount, + junkCount, + goodCount + ); + Assert.equal(tokenCount, gTest.tokenCount); + Assert.equal(junkCount, gTest.junkCount); + Assert.equal(goodCount, gTest.goodCount); + do_timeout(0, startCommand); + break; + } +} diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js new file mode 100644 index 0000000000..0c39215fcb --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_msgCorpus.js @@ -0,0 +1,144 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Tests corpus management functions using nsIMsgCorpus + +var { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); + +var msgCorpus = MailServices.junk.QueryInterface(Ci.nsIMsgCorpus); + +// tokens found in the test corpus file. trait 1001 was trained with +// 2 messages, and trait 1003 with 1. + +var tokenData = [ + // [traitid, count, token] + [1001, 0, "iDoNotExist"], + [1001, 1, "linecount"], + [1001, 2, "envelope-to:kenttest@caspia.com"], + [1003, 0, "iAlsoDoNotExist"], + [1003, 0, "isjunk"], // in 1001 but not 1003 + [1003, 1, "linecount"], + [1003, 1, "subject:test"], + [1003, 1, "envelope-to:kenttest@caspia.com"], +]; + +// list of tests + +var gTests = [ + // train two different combinations of messages + function checkLoadOnce() { + let fileName = "msgCorpus.dat"; + let file = do_get_file("resources/" + fileName); + msgCorpus.updateData(file, true); + + // check message counts + let messageCount = {}; + msgCorpus.corpusCounts(1001, messageCount); + Assert.equal(2, messageCount.value); + msgCorpus.corpusCounts(1003, messageCount); + Assert.equal(1, messageCount.value); + + for (let i = 0; i < tokenData.length; i++) { + let id = tokenData[i][0]; + let count = tokenData[i][1]; + let word = tokenData[i][2]; + Assert.equal(count, msgCorpus.getTokenCount(word, id)); + } + }, + function checkLoadTwice() { + let fileName = "msgCorpus.dat"; + let file = do_get_file("resources/" + fileName); + msgCorpus.updateData(file, true); + + // check message counts + let messageCount = {}; + msgCorpus.corpusCounts(1001, messageCount); + Assert.equal(4, messageCount.value); + msgCorpus.corpusCounts(1003, messageCount); + Assert.equal(2, messageCount.value); + + for (let i = 0; i < tokenData.length; i++) { + let id = tokenData[i][0]; + let count = 2 * tokenData[i][1]; + let word = tokenData[i][2]; + Assert.equal(count, msgCorpus.getTokenCount(word, id)); + } + }, + // remap the ids in the file to different local ids + function loadWithRemap() { + let fileName = "msgCorpus.dat"; + let file = do_get_file("resources/" + fileName); + msgCorpus.updateData(file, true, [1001, 1003], [1, 3]); + + for (let i = 0; i < tokenData.length; i++) { + let id = tokenData[i][0] - 1000; + let count = tokenData[i][1]; + let word = tokenData[i][2]; + Assert.equal(count, msgCorpus.getTokenCount(word, id)); + } + }, + // test removing data + function checkRemove() { + let fileName = "msgCorpus.dat"; + let file = do_get_file("resources/" + fileName); + msgCorpus.updateData(file, false); + + // check message counts + let messageCount = {}; + msgCorpus.corpusCounts(1001, messageCount); + Assert.equal(2, messageCount.value); + msgCorpus.corpusCounts(1003, messageCount); + Assert.equal(1, messageCount.value); + + for (let i = 0; i < tokenData.length; i++) { + let id = tokenData[i][0]; + let count = tokenData[i][1]; + let word = tokenData[i][2]; + Assert.equal(count, msgCorpus.getTokenCount(word, id)); + } + }, + // test clearing a trait + function checkClear() { + let messageCountObject = {}; + /* + msgCorpus.corpusCounts(1001, messageCountObject); + let v1001 = messageCountObject.value; + msgCorpus.corpusCounts(1003, messageCountObject); + let v1003 = messageCountObject.value; + dump("pre-clear value " + v1001 + " " + v1003 + "\n"); + /**/ + msgCorpus.clearTrait(1001); + // check that the message count is zero + msgCorpus.corpusCounts(1001, messageCountObject); + Assert.equal(0, messageCountObject.value); + // but the other trait should still have counts + msgCorpus.corpusCounts(1003, messageCountObject); + Assert.equal(1, messageCountObject.value); + // check that token count was cleared + for (let i = 0; i < tokenData.length; i++) { + let id = tokenData[i][0]; + let count = tokenData[i][1]; + let word = tokenData[i][2]; + Assert.equal(id == 1001 ? 0 : count, msgCorpus.getTokenCount(word, id)); + } + }, +]; + +// main test +function run_test() { + do_test_pending(); + while (true) { + if (!gTests.length) { + // Do we have more commands? + // no, all done + do_test_finished(); + return; + } + + let test = gTests.shift(); + test(); + } +} diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js new file mode 100644 index 0000000000..41a9f22a9b --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js @@ -0,0 +1,172 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Tests bayes trait analysis with aliases. Adapted from test_traits.js + +/* + * These tests rely on data stored in a file, with the same format as traits.dat, + * that was trained in the following manner. There are two training messages, + * included here as files aliases1.eml and aliases2.eml Aliases.dat was trained on + * each of these messages, for different trait indices, as follows, with + * columns showing the training count for each trait index: + * + * file count(1001) count(1005) count(1007) count(1009) + * + * aliases1.eml 1 0 2 0 + * aliases2.eml 0 1 0 1 + * + * There is also a third email file, aliases3.eml, which combines tokens + * from aliases1.eml and aliases2.eml + * + * The goal here is to demonstrate that traits 1001 and 1007, and traits + * 1005 and 1009, can be combined using aliases. We classify messages with + * trait 1001 as the PRO trait, and 1005 as the ANTI trait. + * + * With these characteristics, I've run a trait analysis without aliases, and + * determined that the following is the correct percentage results from the + * analysis for each message. "Train11" means that the training was 1 pro count + * from aliases1.eml, and 1 anti count from alias2.eml. "Train32" is 3 pro counts, + * and 2 anti counts. + * + * percentage + * file Train11 Train32 + * + * alias1.eml 92 98 + * alias2.eml 8 3 + * alias3.eml 50 53 + */ + +var { MailServices } = ChromeUtils.import( + "resource:///modules/MailServices.jsm" +); + +var traitService = Cc["@mozilla.org/msg-trait-service;1"].getService( + Ci.nsIMsgTraitService +); +var kProTrait = 1001; +var kAntiTrait = 1005; +var kProAlias = 1007; +var kAntiAlias = 1009; + +var gTest; // currently active test + +// The tests array defines the tests to attempt. Format of +// an element "test" of this array: +// +// test.fileName: file containing message to test +// test.proAliases: array of aliases for the pro trait +// test.antiAliases: array of aliases for the anti trait +// test.percent: expected results from the classifier + +var tests = [ + { + fileName: "aliases1.eml", + proAliases: [], + antiAliases: [], + percent: 92, + }, + { + fileName: "aliases2.eml", + proAliases: [], + antiAliases: [], + percent: 8, + }, + { + fileName: "aliases3.eml", + proAliases: [], + antiAliases: [], + percent: 50, + }, + { + fileName: "aliases1.eml", + proAliases: [kProAlias], + antiAliases: [kAntiAlias], + percent: 98, + }, + { + fileName: "aliases2.eml", + proAliases: [kProAlias], + antiAliases: [kAntiAlias], + percent: 3, + }, + { + fileName: "aliases3.eml", + proAliases: [kProAlias], + antiAliases: [kAntiAlias], + percent: 53, + }, +]; + +// main test +function run_test() { + localAccountUtils.loadLocalMailAccount(); + + // load in the aliases trait testing file + MailServices.junk + .QueryInterface(Ci.nsIMsgCorpus) + .updateData(do_get_file("resources/aliases.dat"), true); + do_test_pending(); + + startCommand(); +} + +var listener = { + // nsIMsgTraitClassificationListener implementation + onMessageTraitsClassified(aMsgURI, aTraits, aPercents) { + // print("Message URI is " + aMsgURI); + if (!aMsgURI) { + // Ignore end-of-batch signal. + return; + } + + Assert.equal(aPercents[0], gTest.percent); + // All done, start the next test + startCommand(); + }, +}; + +// start the next test command +function startCommand() { + if (!tests.length) { + // Do we have more commands? + // no, all done + do_test_finished(); + return; + } + + gTest = tests.shift(); + + // classify message + var antiArray = [kAntiTrait]; + var proArray = [kProTrait]; + + // remove any existing aliases + let proAliases = traitService.getAliases(kProTrait); + let antiAliases = traitService.getAliases(kAntiTrait); + let proAlias; + let antiAlias; + while ((proAlias = proAliases.pop())) { + traitService.removeAlias(kProTrait, proAlias); + } + while ((antiAlias = antiAliases.pop())) { + traitService.removeAlias(kAntiTrait, antiAlias); + } + + // add new aliases + while ((proAlias = gTest.proAliases.pop())) { + traitService.addAlias(kProTrait, proAlias); + } + while ((antiAlias = gTest.antiAliases.pop())) { + traitService.addAlias(kAntiTrait, antiAlias); + } + + MailServices.junk.classifyTraitsInMessage( + getSpec(gTest.fileName), // in string aMsgURI + proArray, // in array aProTraits, + antiArray, // in array aAntiTraits + listener + ); // in nsIMsgTraitClassificationListener aTraitListener + // null, // [optional] in nsIMsgWindow aMsgWindow + // null, // [optional] in nsIJunkMailClassificationListener aJunkListener +} diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js new file mode 100644 index 0000000000..b005db72cc --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traits.js @@ -0,0 +1,287 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Tests bayes trait analysis + +// I make this an instance so that I know I can reset and get +// a completely new component. Should be getService in production code. +var nsIJunkMailPlugin = Cc[ + "@mozilla.org/messenger/filter-plugin;1?name=bayesianfilter" +].createInstance(Ci.nsIJunkMailPlugin); + +// command functions for test data +var kTrain = 0; // train a file as a trait +var kClass = 1; // classify files with traits +var kReset = 2; // reload plugin, reading in data from disk +var kDetail = 3; // test details + +var gTest; // currently active test + +// The tests array defines the tests to attempt. Format of +// an element "test" of this array: +// +// test.command: function to perform, see definitions above +// test.fileName: file(s) containing message(s) to test +// test.traitIds: Array of traits to train (kTrain) or pro trait (kClass) +// test.traitAntiIds: Array of anti traits to classify +// test.percents: array of arrays (1 per message, 1 per trait) of +// expected results from the classifier + +var tests = [ + // train two different combinations of messages + { + command: kTrain, + fileName: "ham1.eml", + traitIds: [3, 6], + }, + { + command: kTrain, + fileName: "spam1.eml", + traitIds: [4], + }, + { + command: kTrain, + fileName: "spam4.eml", + traitIds: [5], + }, + // test the message classifications using both singular and plural classifier + { + command: kClass, + fileName: "ham1.eml", + traitIds: [4, 6], + traitAntiIds: [3, 5], + // ham1 is trained "anti" for first test, "pro" for second + percents: [[0, 100]], + }, + { + command: kClass, + fileName: "ham2.eml", + traitIds: [4, 6], + traitAntiIds: [3, 5], + // these are partial percents for an untrained message. ham2 is similar to ham1 + percents: [[8, 95]], + }, + { + command: kDetail, + fileName: "spam2.eml", + traitIds: [4], + traitAntiIds: [3], + percents: { + lots: 84, + money: 84, + make: 84, + your: 16, + }, + runnings: [84, 92, 95, 81], + }, + { + command: kClass, + fileName: "spam1.eml,spam2.eml,spam3.eml,spam4.eml", + traitIds: [4, 6], + traitAntiIds: [3, 5], + // spam1 trained as "pro" for first pro/anti pair + // spam4 trained as "anti" for second pro/anti pair + // others are partials + percents: [ + [100, 50], + [81, 0], + [98, 50], + [81, 0], + ], + }, + // reset the plugin, read in data, and retest the classification + // this tests the trait file writing + { + command: kReset, + }, + { + command: kClass, + fileName: "ham1.eml", + traitIds: [4, 6], + traitAntiIds: [3, 5], + percents: [[0, 100]], + }, + { + command: kClass, + fileName: "ham2.eml", + traitIds: [4, 6], + traitAntiIds: [3, 5], + percents: [[8, 95]], + }, + { + command: kClass, + fileName: "spam1.eml,spam2.eml,spam3.eml,spam4.eml", + traitIds: [4, 6], + traitAntiIds: [3, 5], + percents: [ + [100, 50], + [81, 0], + [98, 50], + [81, 0], + ], + }, +]; + +// main test +function run_test() { + localAccountUtils.loadLocalMailAccount(); + do_test_pending(); + + startCommand(); +} + +var listener = { + // nsIMsgTraitClassificationListener implementation + onMessageTraitsClassified(aMsgURI, aTraits, aPercents) { + // print("Message URI is " + aMsgURI); + if (!aMsgURI) { + // Ignore end-of-batch signal. + return; + } + + switch (gTest.command) { + case kClass: + Assert.equal(gTest.files[gTest.currentIndex], aMsgURI); + var currentPercents = gTest.percents[gTest.currentIndex]; + for (let i = 0; i < currentPercents.length; i++) { + // print("expecting score " + currentPercents[i] + + // " got score " + aPercents[i]); + Assert.equal(currentPercents[i], aPercents[i]); + } + gTest.currentIndex++; + break; + + case kTrain: // We tested this some in test_junkAsTraits.js, so let's not bother + default: + break; + } + if (!--gTest.callbacks) { + // All done, start the next test + startCommand(); + } + }, + onMessageTraitDetails( + aMsgURI, + aProTrait, + aTokenString, + aTokenPercents, + aRunningPercents + ) { + print("Details for " + aMsgURI); + for (let i = 0; i < aTokenString.length; i++) { + print( + "Percent " + + aTokenPercents[i] + + " Running " + + aRunningPercents[i] + + " Token " + + aTokenString[i] + ); + Assert.ok(aTokenString[i] in gTest.percents); + + Assert.equal(gTest.percents[aTokenString[i]], aTokenPercents[i]); + Assert.equal(gTest.runnings[i], aRunningPercents[i]); + delete gTest.percents[aTokenString[i]]; + } + Assert.equal(Object.keys(gTest.percents).length, 0); + if (gTest.command == kClass) { + gTest.currentIndex++; + } + startCommand(); + }, +}; + +// start the next test command +function startCommand() { + if (!tests.length) { + // Do we have more commands? + // no, all done + do_test_finished(); + return; + } + + gTest = tests.shift(); + print( + "StartCommand command = " + + gTest.command + + ", remaining tests " + + tests.length + ); + switch (gTest.command) { + case kTrain: { + // train message + let proArray = []; + for (let i = 0; i < gTest.traitIds.length; i++) { + proArray.push(gTest.traitIds[i]); + } + gTest.callbacks = 1; + + nsIJunkMailPlugin.setMsgTraitClassification( + getSpec(gTest.fileName), // aMsgURI + [], // aOldTraits + proArray, // aNewTraits + listener + ); // [optional] in nsIMsgTraitClassificationListener aTraitListener + // null, // [optional] in nsIMsgWindow aMsgWindow + // null, // [optional] in nsIJunkMailClassificationListener aJunkListener + break; + } + case kClass: { + // classify message + var antiArray = []; + let proArray = []; + for (let i = 0; i < gTest.traitIds.length; i++) { + antiArray.push(gTest.traitAntiIds[i]); + proArray.push(gTest.traitIds[i]); + } + gTest.files = gTest.fileName.split(","); + gTest.callbacks = gTest.files.length; + gTest.currentIndex = 0; + for (let i = 0; i < gTest.files.length; i++) { + gTest.files[i] = getSpec(gTest.files[i]); + } + if (gTest.files.length == 1) { + // use the singular classifier + nsIJunkMailPlugin.classifyTraitsInMessage( + getSpec(gTest.fileName), // in string aMsgURI + proArray, // in array aProTraits, + antiArray, // in array aAntiTraits + listener + ); // in nsIMsgTraitClassificationListener aTraitListener + // null, // [optional] in nsIMsgWindow aMsgWindow + // null, // [optional] in nsIJunkMailClassificationListener aJunkListener + } else { + // use the plural classifier + nsIJunkMailPlugin.classifyTraitsInMessages( + gTest.files, // in Array<ACString> aMsgURIs, + proArray, // in array aProTraits, + antiArray, // in array aAntiTraits + listener + ); // in nsIMsgTraitClassificationListener aTraitListener + // null, // [optional] in nsIMsgWindow aMsgWindow + // null, // [optional] in nsIJunkMailClassificationListener aJunkListener + } + break; + } + case kDetail: + // detail message + nsIJunkMailPlugin.detailMessage( + getSpec(gTest.fileName), // in string aMsgURI + gTest.traitIds[0], // proTrait + gTest.traitAntiIds[0], // antiTrait + listener + ); // in nsIMsgTraitDetailListener aDetailListener + break; + case kReset: + // reload a new nsIJunkMailPlugin, reading file in the process + nsIJunkMailPlugin.shutdown(); // writes files + nsIJunkMailPlugin = null; + nsIJunkMailPlugin = Cc[ + "@mozilla.org/messenger/filter-plugin;1?name=bayesianfilter" + ].createInstance(Ci.nsIJunkMailPlugin); + // does not do a callback, so we must restart next command + startCommand(); + break; + } +} diff --git a/comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini new file mode 100644 index 0000000000..86776834ba --- /dev/null +++ b/comm/mailnews/extensions/bayesian-spam-filter/test/unit/xpcshell.ini @@ -0,0 +1,11 @@ +[DEFAULT] +head = head_bayes.js +tail = +support-files = resources/* + +[test_bug228675.js] +[test_customTokenization.js] +[test_junkAsTraits.js] +[test_msgCorpus.js] +[test_traitAliases.js] +[test_traits.js] |