third_party/rust/relevancy/src/relevancy.udl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

namespace relevancy { };

[Error]
interface RelevancyApiError {
    Unexpected(string reason);
};

// Top-level class for the Relevancy component
interface RelevancyStore {
    // Construct a new RelevancyStore
    //
    // This is non-blocking since databases and other resources are lazily opened.
    constructor(string dbpath);

    // Close any open resources (for example databases)
    //
    // Calling `close` will interrupt any in-progress queries on other threads.
    void close();

    // Interrupt any current database queries
    void interrupt();

    // Ingest the top URLs by frequency to build up the user's interest vector
    [Throws=RelevancyApiError]
    InterestVector ingest(sequence<string> top_urls);

    // Calculate metrics for the user's interest vector in order to measure how strongly we're
    // identifying interests.  See the `InterestMetrics` struct for details.
    [Throws=RelevancyApiError]
    InterestMetrics calculate_metrics();

    // Get the interest vector for the user.
    //
    // This is intended to be show to the user in an `about:` page so that users can judge if it
    // feels correct.
    [Throws=RelevancyApiError]
    InterestVector user_interest_vector();
};

enum Interest {
    "Animals",
    "Arts",
    "Autos",
    "Business",
    "Career",
    "Education",
    "Fashion",
    "Finance",
    "Food",
    "Government",
    // "Health",
    "Hobbies",
    "Home",
    "News",
    "RealEstate",
    "Society",
    "Sports",
    "Tech",
    "Travel",
    "Inconclusive",
};

// Interest metrics that we want to send to Glean as part of the validation process.  These contain
// the cosine similarity when comparing the user's interest against various interest vectors that
// consumers may use.
//
// Cosine similary was chosen because it seems easy to calculate.  This was then matched against
// some semi-plausible real-world interest vectors that consumers might use.  This is all up for
// debate and we may decide to switch to some other metrics.
//
// Similarity values are transformed to integers by multiplying the floating point value by 1000 and
// rounding.  This is to make them compatible with Glean's distribution metrics.
dictionary InterestMetrics {
    // Similarity between the user's interest vector and an interest vector where the element for
    // the user's top interest is copied, but all other interests are set to zero.  This measures
    // the highest possible similarity with consumers that used interest vectors with a single
    // interest set.
    u32 top_single_interest_similarity;

    // The same as before, but the top 2 interests are copied. This measures the highest possible
    // similarity with consumers that used interest vectors with a two interests (note: this means
    // they would need to choose the user's top two interests and have the exact same proportion
    // between them as the user).
    u32 top_2interest_similarity;

    // The same as before, but the top 3 interests are copied.
    u32 top_3interest_similarity;
};

// Vector storing a count value for each interest
//
// Here "vector" refers to the mathematical object, not a Rust `Vec`.  It always has a fixed
// number of elements.
dictionary InterestVector {
    u32 animals;
    u32 arts;
    u32 autos;
    u32 business;
    u32 career;
    u32 education;
    u32 fashion;
    u32 finance;
    u32 food;
    u32 government;
    // u32 health;
    u32 hobbies;
    u32 home;
    u32 news;
    u32 real_estate;
    u32 society;
    u32 sports;
    u32 tech;
    u32 travel;
    u32 inconclusive;
};