1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
namespace relevancy { };
[Error]
interface RelevancyApiError {
Unexpected(string reason);
};
// Top-level class for the Relevancy component
interface RelevancyStore {
// Construct a new RelevancyStore
[Throws=RelevancyApiError]
constructor(string dbpath);
// Ingest the top URLs by frequency to build up the user's interest vector
[Throws=RelevancyApiError]
void ingest(sequence<string> top_urls);
// Calculate metrics for the user's interest vector in order to measure how strongly we're
// identifying interests. See the `InterestMetrics` struct for details.
[Throws=RelevancyApiError]
InterestMetrics calculate_metrics();
// Get the interest vector for the user.
//
// This is intended to be show to the user in an `about:` page so that users can judge if it
// feels correct.
[Throws=RelevancyApiError]
InterestVector user_interest_vector();
};
enum Interest {
"Animals",
"Arts",
"Autos",
"Business",
"Career",
"Education",
"Fashion",
"Finance",
"Food",
"Government",
"Health",
"Hobbies",
"Home",
"News",
"RealEstate",
"Society",
"Sports",
"Tech",
"Travel",
"Inconclusive",
};
// Interest metrics that we want to send to Glean as part of the validation process. These contain
// the cosine similarity when comparing the user's interest against various interest vectors that
// consumers may use.
//
// Cosine similary was chosen because it seems easy to calculate. This was then matched against
// some semi-plausible real-world interest vectors that consumers might use. This is all up for
// debate and we may decide to switch to some other metrics.
//
// Similarity values are transformed to integers by multiplying the floating point value by 1000 and
// rounding. This is to make them compatible with Glean's distribution metrics.
dictionary InterestMetrics {
// Similarity between the user's interest vector and an interest vector where the element for
// the user's top interest is copied, but all other interests are set to zero. This measures
// the highest possible similarity with consumers that used interest vectors with a single
// interest set.
u32 top_single_interest_similarity;
// The same as before, but the top 2 interests are copied. This measures the highest possible
// similarity with consumers that used interest vectors with a two interests (note: this means
// they would need to choose the user's top two interests and have the exact same proportion
// between them as the user).
u32 top_2interest_similarity;
// The same as before, but the top 3 interests are copied.
u32 top_3interest_similarity;
};
// Vector storing a count value for each interest
//
// Here "vector" refers to the mathematical object, not a Rust `Vec`. It always has a fixed
// number of elements.
dictionary InterestVector {
u32 animals;
u32 arts;
u32 autos;
u32 business;
u32 career;
u32 education;
u32 fashion;
u32 finance;
u32 food;
u32 government;
u32 health;
u32 hobbies;
u32 home;
u32 news;
u32 real_estate;
u32 society;
u32 sports;
u32 tech;
u32 travel;
u32 inconclusive;
};
|