summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/extensions/bayesian-spam-filter/test/unit/test_traitAliases.js
blob: 41a9f22a9b76f5f40e5517eb40a27a909bc48c74 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// Tests bayes trait analysis with aliases. Adapted from test_traits.js

/*
 * These tests rely on data stored in a file, with the same format as traits.dat,
 * that was trained in the following manner. There are two training messages,
 * included here as files aliases1.eml and aliases2.eml  Aliases.dat was trained on
 * each of these messages, for different trait indices, as follows, with
 * columns showing the training count for each trait index:
 *
 *     file   count(1001)  count(1005) count(1007) count(1009)
 *
 *   aliases1.eml      1            0           2           0
 *   aliases2.eml      0            1           0           1
 *
 * There is also a third email file, aliases3.eml, which combines tokens
 * from aliases1.eml and aliases2.eml
 *
 * The goal here is to demonstrate that traits 1001 and 1007, and traits
 * 1005 and 1009, can be combined using aliases. We classify messages with
 * trait 1001 as the PRO trait, and 1005 as the ANTI trait.
 *
 * With these characteristics, I've run a trait analysis without aliases, and
 * determined that the following is the correct percentage results from the
 * analysis for each message. "Train11" means that the training was 1 pro count
 * from aliases1.eml, and 1 anti count from alias2.eml. "Train32" is 3 pro counts,
 * and 2 anti counts.
 *
 *                 percentage
 *    file         Train11       Train32
 *
 * alias1.eml        92             98
 * alias2.eml         8              3
 * alias3.eml        50             53
 */

var { MailServices } = ChromeUtils.import(
  "resource:///modules/MailServices.jsm"
);

var traitService = Cc["@mozilla.org/msg-trait-service;1"].getService(
  Ci.nsIMsgTraitService
);
var kProTrait = 1001;
var kAntiTrait = 1005;
var kProAlias = 1007;
var kAntiAlias = 1009;

var gTest; // currently active test

// The tests array defines the tests to attempt. Format of
// an element "test" of this array:
//
//   test.fileName: file containing message to test
//   test.proAliases: array of aliases for the pro trait
//   test.antiAliases: array of aliases for the anti trait
//   test.percent: expected results from the classifier

var tests = [
  {
    fileName: "aliases1.eml",
    proAliases: [],
    antiAliases: [],
    percent: 92,
  },
  {
    fileName: "aliases2.eml",
    proAliases: [],
    antiAliases: [],
    percent: 8,
  },
  {
    fileName: "aliases3.eml",
    proAliases: [],
    antiAliases: [],
    percent: 50,
  },
  {
    fileName: "aliases1.eml",
    proAliases: [kProAlias],
    antiAliases: [kAntiAlias],
    percent: 98,
  },
  {
    fileName: "aliases2.eml",
    proAliases: [kProAlias],
    antiAliases: [kAntiAlias],
    percent: 3,
  },
  {
    fileName: "aliases3.eml",
    proAliases: [kProAlias],
    antiAliases: [kAntiAlias],
    percent: 53,
  },
];

// main test
function run_test() {
  localAccountUtils.loadLocalMailAccount();

  // load in the aliases trait testing file
  MailServices.junk
    .QueryInterface(Ci.nsIMsgCorpus)
    .updateData(do_get_file("resources/aliases.dat"), true);
  do_test_pending();

  startCommand();
}

var listener = {
  // nsIMsgTraitClassificationListener implementation
  onMessageTraitsClassified(aMsgURI, aTraits, aPercents) {
    // print("Message URI is " + aMsgURI);
    if (!aMsgURI) {
      // Ignore end-of-batch signal.
      return;
    }

    Assert.equal(aPercents[0], gTest.percent);
    // All done, start the next test
    startCommand();
  },
};

// start the next test command
function startCommand() {
  if (!tests.length) {
    // Do we have more commands?
    // no, all done
    do_test_finished();
    return;
  }

  gTest = tests.shift();

  // classify message
  var antiArray = [kAntiTrait];
  var proArray = [kProTrait];

  // remove any existing aliases
  let proAliases = traitService.getAliases(kProTrait);
  let antiAliases = traitService.getAliases(kAntiTrait);
  let proAlias;
  let antiAlias;
  while ((proAlias = proAliases.pop())) {
    traitService.removeAlias(kProTrait, proAlias);
  }
  while ((antiAlias = antiAliases.pop())) {
    traitService.removeAlias(kAntiTrait, antiAlias);
  }

  // add new aliases
  while ((proAlias = gTest.proAliases.pop())) {
    traitService.addAlias(kProTrait, proAlias);
  }
  while ((antiAlias = gTest.antiAliases.pop())) {
    traitService.addAlias(kAntiTrait, antiAlias);
  }

  MailServices.junk.classifyTraitsInMessage(
    getSpec(gTest.fileName), // in string aMsgURI
    proArray, // in array aProTraits,
    antiArray, // in array aAntiTraits
    listener
  ); // in nsIMsgTraitClassificationListener aTraitListener
  // null,      // [optional] in nsIMsgWindow aMsgWindow
  // null,      // [optional] in nsIJunkMailClassificationListener aJunkListener
}