summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/search/public/nsIMsgFilterPlugin.idl
blob: 93934a364a4b45a537c8761149509ddd187e60c1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsISupports.idl"
#include "MailNewsTypes2.idl"

interface nsIMsgWindow;
interface nsIFile;

/**
 * This interface is still very much under development, and is not yet stable.
 */

[scriptable, uuid(e2e56690-a676-11d6-80c9-00008646b737)]
interface nsIMsgFilterPlugin : nsISupports
{
    /**
     * Do any necessary cleanup: flush and close any open files, etc.
     */
    void shutdown();

    /**
     * Some protocols (ie IMAP) can, as an optimization, avoid
     * downloading all message header lines.  If your plugin doesn't need
     * any more than the minimal set, it can return false for this attribute.
     */
    readonly attribute boolean shouldDownloadAllHeaders;

};

/*
 * These interfaces typically implement a Bayesian classifier of messages.
 *
 * Two sets of interfaces may be used: the older junk-only interfaces, and
 * the newer trait-oriented interfaces that treat junk classification as
 * one of a set of classifications to accomplish.
 */

[scriptable, uuid(b15a0f9c-df07-4af0-9ba8-80dca68ac35d)]
interface nsIJunkMailClassificationListener : nsISupports
{
  /**
   * Inform a listener of a message's classification as junk. At the end
   * of a batch of classifications, signify end of batch by calling with
   * null aMsgURI (other parameters are don't care)
   *
   * @param aMsgURI          URI of the message that was classified.
   * @param aClassification  classification of message as UNCLASSIFIED, GOOD,
   *                         or JUNK.
   * @param aJunkPercent     indicator of degree of uncertainty, with 100 being
   *                         probably junk, and 0 probably good
   */
  void onMessageClassified(in AUTF8String aMsgURI,
                           in nsMsgJunkStatus aClassification,
                           in uint32_t aJunkPercent);
};

[scriptable, uuid(AF247D07-72F0-482d-9EAB-5A786407AA4C)]
interface nsIMsgTraitClassificationListener : nsISupports
{
  /**
   * Inform a listener of a message's match to traits. The list
   * of traits being matched is in aTraits. Corresponding
   * indicator of match (percent) is in aPercents. At the end
   * of a batch of classifications, signify end of batch by calling with
   * null aMsgURI (other parameters are don't care)
   *
   * @param aMsgURI      URI of the message that was classified
   * @param aTraits      array of matched trait ids
   * @param aPercents    array of percent match (0 is unmatched, 100 is fully
   *                     matched) of the trait with the corresponding array
   *                     index in aTraits
   */
  void onMessageTraitsClassified(in AUTF8String aMsgURI,
    in Array<unsigned long> aTraits,
    in Array<unsigned long> aPercents);
};

[scriptable, uuid(12667532-88D1-44a7-AD48-F73719BE5C92)]
interface nsIMsgTraitDetailListener : nsISupports
{
  /**
   * Inform a listener of details of a message's match to traits.
   * This returns the tokens that were used in the calculation,
   * the calculated percent probability that each token matches the trait,
   * and a running estimate (starting with the strongest tokens) of the
   * combined total probability that a message matches the trait, when
   * only tokens stronger than the current token are used.
   *
   * @param aMsgURI         URI of the message that was classified
   * @param aProTrait       trait id of pro trait for the calculation
   * @param tokenStrings    the string for a particular token
   * @param tokenPercents   calculated probability that a message with that token
   *                        matches the trait
   * @param runningPercents calculated probability that the message matches the
   *                        trait, accounting for this token and all stronger tokens.
   */
    void onMessageTraitDetails(in AUTF8String aMsgUri,
                               in unsigned long aProTrait,
                               in Array<AString> tokenStrings,
                               in Array<unsigned long> tokenPercents,
                               in Array<unsigned long> runningPercents);
};

[scriptable, uuid(8EA5BBCA-F735-4d43-8541-D203D8E2FF2F)]
interface nsIJunkMailPlugin : nsIMsgFilterPlugin
{
    /**
     * Message classifications.
     */
    const nsMsgJunkStatus UNCLASSIFIED = 0;
    const nsMsgJunkStatus GOOD = 1;
    const nsMsgJunkStatus JUNK = 2;

    /**
     * Message junk score constants. Junkscore can only be one of these two
     * values (or not set).
     */
    const nsMsgJunkScore IS_SPAM_SCORE = 100; // junk
    const nsMsgJunkScore IS_HAM_SCORE = 0; // not junk

    /**
     * Trait ids for junk analysis. These values are fixed to ensure
     * backwards compatibility with existing junk-oriented classification
     * code.
     */

    const unsigned long GOOD_TRAIT = 1; // good
    const unsigned long JUNK_TRAIT = 2; // junk

    /**
     * Given a message URI, determine what its current classification is
     * according to the current training set.
     */
    void classifyMessage(in AUTF8String aMsgURI, in nsIMsgWindow aMsgWindow,
                         in nsIJunkMailClassificationListener aListener);

    void classifyMessages(in Array<AUTF8String> aMsgURIs,
                          in nsIMsgWindow aMsgWindow,
                          in nsIJunkMailClassificationListener aListener);

    /**
     * Given a message URI, evaluate its relative match to a list of
     * traits according to the current training set.
     *
     * @param aMsgURI          URI of the message to be evaluated
     * @param aProTraits       array of trait ids for trained messages that
     *                         match the tested trait (for example,
     *                         JUNK_TRAIT if testing for junk)
     * @param aAntiTraits      array of trait ids for trained messages that
     *                         do not match the tested trait (for example,
     *                         GOOD_TRAIT if testing for junk)
     * @param aTraitListener   trait-oriented callback listener (may be null)
     * @param aMsgWindow       current message window (may be null)
     * @param aJunkListener    junk-oriented callback listener (may be null)
     */

    void classifyTraitsInMessage(
           in AUTF8String aMsgURI,
           in Array<unsigned long> aProTraits,
           in Array<unsigned long> aAntiTraits,
           in nsIMsgTraitClassificationListener aTraitListener,
           [optional] in nsIMsgWindow aMsgWindow,
           [optional] in nsIJunkMailClassificationListener aJunkListener);

    /**
     * Given an array of message URIs, evaluate their relative match to a
     * list of traits according to the current training set.
     *
     * @param aMsgURIs         array of URIs of the messages to be evaluated
     * @param aProTraits       array of trait ids for trained messages that
     *                         match the tested trait (for example,
     *                         JUNK_TRAIT if testing for junk)
     * @param aAntiTraits      array of trait ids for trained messages that
     *                         do not match the tested trait (for example,
     *                         GOOD_TRAIT if testing for junk)
     * @param aTraitListener   trait-oriented callback listener (may be null)
     * @param aMsgWindow       current message window (may be null)
     * @param aJunkListener    junk-oriented callback listener (may be null)
     */

    void classifyTraitsInMessages(
           in Array<AUTF8String> aMsgURIs,
           in Array<unsigned long> aProTraits,
           in Array<unsigned long> aAntiTraits,
           in nsIMsgTraitClassificationListener aTraitListener,
           [optional] in nsIMsgWindow aMsgWindow,
           [optional] in nsIJunkMailClassificationListener aJunkListener);

    /**
     * Called when a user forces the classification of a message. Should
     * cause the training set to be updated appropriately.
     *
     * @arg aMsgURI                     URI of the message to be classified
     * @arg aOldUserClassification      Was it previous manually classified
     *                                  by the user?  If so, how?
     * @arg aNewClassification          New manual classification.
     * @arg aListener                   Callback (may be null)
     */
    void setMessageClassification(
        in AUTF8String aMsgURI, in nsMsgJunkStatus aOldUserClassification,
        in nsMsgJunkStatus aNewClassification,
        in nsIMsgWindow aMsgWindow,
        in nsIJunkMailClassificationListener aListener);

    /**
     * Called when a user forces a change in the classification of a message.
     * Should cause the training set to be updated appropriately.
     *
     * @param aMsgURI           URI of the message to be classified
     * @param aOldTraits        array of trait IDs of the old
     *                          message classification(s), if any
     * @param aNewTraits        array of trait IDs of the new
     *                          message classification(s), if any
     * @param aTraitListener    trait-oriented listener (may be null)
     * @param aMsgWindow        current message window (may be null)
     * @param aJunkListener     junk-oriented listener (may be null)
     */
    void setMsgTraitClassification(
        in AUTF8String aMsgURI,
        in Array<unsigned long> aOldTraits,
        in Array<unsigned long> aNewTraits,
        [optional] in nsIMsgTraitClassificationListener aTraitListener,
        [optional] in nsIMsgWindow aMsgWindow,
        [optional] in nsIJunkMailClassificationListener aJunkListener);

    readonly attribute boolean userHasClassified;

    /** Removes the training file and clears out any in memory training tokens.
        User must retrain after doing this.
    **/
    void resetTrainingData();

    /**
     * Given a message URI, return a list of tokens and their contribution to
     * the analysis of a message's match to a trait according to the
     * current training set.
     *
     * @param aMsgURI          URI of the message to be evaluated
     * @param aProTrait        trait id for trained messages that match the
     *                         tested trait (for example, JUNK_TRAIT if testing
     *                         for junk)
     * @param aAntiTrait       trait id for trained messages that do not match
     *                         the tested trait (for example, GOOD_TRAIT
     *                         if testing for junk)
     * @param aListener        callback listener for results
     * @param aMsgWindow       current message window (may be null)
     */
    void detailMessage(
        in AUTF8String aMsgURI,
        in unsigned long aProTrait,
        in unsigned long aAntiTrait,
        in nsIMsgTraitDetailListener aListener,
        [optional] in nsIMsgWindow aMsgWindow);

};

/**
 * The nsIMsgCorpus interface manages a corpus of mail data used for
 * statistical analysis of messages.
 */
[scriptable, uuid(70BAD26F-DFD4-41bd-8FAB-4C09B9C1E845)]
interface nsIMsgCorpus : nsISupports
{
  /**
   * Clear the corpus data for a trait id.
   *
   * @param aTrait       trait id
   */
   void clearTrait(in unsigned long aTrait);

  /**
   * Update corpus data from a file.
   * Uses the parallel arrays aFromTraits and aToTraits. These arrays allow
   * conversion of the trait id stored in the file (which may be originated
   * externally) to the trait id used in the local corpus (which is defined
   * locally using nsIMsgTraitService, and mapped by that interface to a
   * globally unique trait id string).
   *
   * @param aFile       the file with the data, in the format:
   *
   *                    Format of the trait file for version 1:
   *                    [0xFCA93601]  (the 01 is the version)
   *                    for each trait to write:
   *                    [id of trait to write] (0 means end of list)
   *                    [number of messages per trait]
   *                    for each token with non-zero count
   *                    [count]
   *                    [length of word]word
   *
   * @param aIsAdd      should the data be added, or removed? True if
   *                    adding, false if removing.
   *
   * @param aFromTraits array of trait ids used in aFile. If aFile contains
   *                    trait ids that are not in this array, they are not
   *                    remapped, but assumed to be local trait ids.
   *
   * @param aToTraits   array of trait ids, corresponding to elements of
   *                    aFromTraits, that represent the local trait ids to
   *                    be used in storing data from aFile into the local corpus.
   */
  void updateData(in nsIFile aFile, in boolean aIsAdd,
                  [optional] in Array<unsigned long> aFromTraits,
                  [optional] in Array<unsigned long> aToTraits);

  /**
   * Get the corpus count for a token as a string.
   *
   * @param aWord    string of characters representing the token
   * @param aTrait   trait id
   *
   * @return         count of that token in the corpus
   *
   */
  unsigned long getTokenCount(in AUTF8String aWord, in unsigned long aTrait);

  /**
   * Gives information on token and message count information in the
   * training data corpus.
   *
   * @param aTrait           trait id (may be null)
   * @param aMessageCount    count of messages that have been trained with aTrait
   *
   * @return                 token count for all traits
   */

  unsigned long corpusCounts(in unsigned long aTrait, out unsigned long aMessageCount);
};