summaryrefslogtreecommitdiffstats
path: root/third_party/rust/relevancy/src/relevancy.udl
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/relevancy/src/relevancy.udl')
-rw-r--r--third_party/rust/relevancy/src/relevancy.udl106
1 files changed, 106 insertions, 0 deletions
diff --git a/third_party/rust/relevancy/src/relevancy.udl b/third_party/rust/relevancy/src/relevancy.udl
new file mode 100644
index 0000000000..e07243ec28
--- /dev/null
+++ b/third_party/rust/relevancy/src/relevancy.udl
@@ -0,0 +1,106 @@
+namespace relevancy { };
+
+[Error]
+interface RelevancyApiError {
+ Unexpected(string reason);
+};
+
+// Top-level class for the Relevancy component
+interface RelevancyStore {
+ // Construct a new RelevancyStore
+ [Throws=RelevancyApiError]
+ constructor(string dbpath);
+
+ // Ingest the top URLs by frequency to build up the user's interest vector
+ [Throws=RelevancyApiError]
+ void ingest(sequence<string> top_urls);
+
+ // Calculate metrics for the user's interest vector in order to measure how strongly we're
+ // identifying interests. See the `InterestMetrics` struct for details.
+ [Throws=RelevancyApiError]
+ InterestMetrics calculate_metrics();
+
+ // Get the interest vector for the user.
+ //
+ // This is intended to be show to the user in an `about:` page so that users can judge if it
+ // feels correct.
+ [Throws=RelevancyApiError]
+ InterestVector user_interest_vector();
+};
+
+enum Interest {
+ "Animals",
+ "Arts",
+ "Autos",
+ "Business",
+ "Career",
+ "Education",
+ "Fashion",
+ "Finance",
+ "Food",
+ "Government",
+ "Health",
+ "Hobbies",
+ "Home",
+ "News",
+ "RealEstate",
+ "Society",
+ "Sports",
+ "Tech",
+ "Travel",
+ "Inconclusive",
+};
+
+// Interest metrics that we want to send to Glean as part of the validation process. These contain
+// the cosine similarity when comparing the user's interest against various interest vectors that
+// consumers may use.
+//
+// Cosine similary was chosen because it seems easy to calculate. This was then matched against
+// some semi-plausible real-world interest vectors that consumers might use. This is all up for
+// debate and we may decide to switch to some other metrics.
+//
+// Similarity values are transformed to integers by multiplying the floating point value by 1000 and
+// rounding. This is to make them compatible with Glean's distribution metrics.
+dictionary InterestMetrics {
+ // Similarity between the user's interest vector and an interest vector where the element for
+ // the user's top interest is copied, but all other interests are set to zero. This measures
+ // the highest possible similarity with consumers that used interest vectors with a single
+ // interest set.
+ u32 top_single_interest_similarity;
+
+ // The same as before, but the top 2 interests are copied. This measures the highest possible
+ // similarity with consumers that used interest vectors with a two interests (note: this means
+ // they would need to choose the user's top two interests and have the exact same proportion
+ // between them as the user).
+ u32 top_2interest_similarity;
+
+ // The same as before, but the top 3 interests are copied.
+ u32 top_3interest_similarity;
+};
+
+// Vector storing a count value for each interest
+//
+// Here "vector" refers to the mathematical object, not a Rust `Vec`. It always has a fixed
+// number of elements.
+dictionary InterestVector {
+ u32 animals;
+ u32 arts;
+ u32 autos;
+ u32 business;
+ u32 career;
+ u32 education;
+ u32 fashion;
+ u32 finance;
+ u32 food;
+ u32 government;
+ u32 health;
+ u32 hobbies;
+ u32 home;
+ u32 news;
+ u32 real_estate;
+ u32 society;
+ u32 sports;
+ u32 tech;
+ u32 travel;
+ u32 inconclusive;
+};