summaryrefslogtreecommitdiffstats
path: root/third_party/rust/suggest
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/rust/suggest/.cargo-checksum.json2
-rw-r--r--third_party/rust/suggest/Cargo.toml28
-rw-r--r--third_party/rust/suggest/README.md110
-rw-r--r--third_party/rust/suggest/benches/benchmark_all.rs25
-rw-r--r--third_party/rust/suggest/src/benchmarks/README.md28
-rw-r--r--third_party/rust/suggest/src/benchmarks/client.rs97
-rw-r--r--third_party/rust/suggest/src/benchmarks/ingest.rs116
-rw-r--r--third_party/rust/suggest/src/benchmarks/mod.rs40
-rw-r--r--third_party/rust/suggest/src/bin/debug_ingestion_sizes.rs9
-rw-r--r--third_party/rust/suggest/src/config.rs5
-rw-r--r--third_party/rust/suggest/src/db.rs720
-rw-r--r--third_party/rust/suggest/src/lib.rs4
-rw-r--r--third_party/rust/suggest/src/pocket.rs2
-rw-r--r--third_party/rust/suggest/src/provider.rs48
-rw-r--r--third_party/rust/suggest/src/rs.rs223
-rw-r--r--third_party/rust/suggest/src/schema.rs187
-rw-r--r--third_party/rust/suggest/src/store.rs1113
-rw-r--r--third_party/rust/suggest/src/suggest.udl11
-rw-r--r--third_party/rust/suggest/src/suggestion.rs34
-rw-r--r--third_party/rust/suggest/src/yelp.rs28
20 files changed, 2366 insertions, 464 deletions
diff --git a/third_party/rust/suggest/.cargo-checksum.json b/third_party/rust/suggest/.cargo-checksum.json
index c8c5fa2566..120a503f29 100644
--- a/third_party/rust/suggest/.cargo-checksum.json
+++ b/third_party/rust/suggest/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{"Cargo.toml":"4aa81cff67e67b08ba3348c1acddaa5aee887df3c35006754c9cda4273a94458","README.md":"8d7457893194e255b87e5a2667ee25c87bd470f5338d7078506f866a67a3fdbd","build.rs":"78780c5cccfe22c3ff4198624b9e188559c437c3e6fa1c8bb66548eee6aa66bf","src/config.rs":"03630b2219b6674e332a1f96f44db74def17f985c850a800299b815fa72241c2","src/db.rs":"d373ad097edac2bbcc6e1b14f51c21b6e2cab2289d27667332798c9cde4dcbef","src/error.rs":"f563210a6c050d98ec85e0f6d9401e7373bfb816e865e8edabbabb23d848ba13","src/keyword.rs":"988d0ab021c0df19cfd3c519df7d37f606bf984cd14d0efca4e5a7aff88344dd","src/lib.rs":"65a035dbfb17e2d2d9f237ad52dc03982ae28c70e3dcf3d96cc9f2d7af79efe3","src/pocket.rs":"c4dda43390d1c39dc795933596b3c1e4e282932cac6c69da53c6e05d39e9ef29","src/provider.rs":"4fe662587efc5a80d000c217ce124506c6800293c50ff460ef95e9e659c764b9","src/rs.rs":"0910368f9e7c4703b00d0de86902d647d70c1f75a256fbeb2126c91f0499a083","src/schema.rs":"8fad4cc624f48946676adbc3de7d061f05fe82531523008f417d6130a2132e34","src/store.rs":"a869971d5593bec2dd40822ba63d0e5a5def96a870ff5a7c33afbcbf5869946b","src/suggest.udl":"d941662596d48793d1570e5b8432b7fd7b4fb1b4550fb38d4e14224fcf4195bc","src/suggestion.rs":"7ee407949f40d88e5d3d4c0da400b987e85ace9f34c648f010cd7f5f2aba0506","src/yelp.rs":"37e77900c12c68cca292a84c6dd6c67d16628c68f4612d8d9bedb1bddf985229","uniffi.toml":"f26317442ddb5b3281245bef6e60ffcb78bb95d29fe4a351a56dbb88d4ec8aab"},"package":null} \ No newline at end of file
+{"files":{"Cargo.toml":"05e4d7f7b3649a3e3fa441c4af53a633d18f20bb04fd761ed33fc9d461fd0dee","README.md":"fb72d0028586cab1421b853ef529d7ce78ad7316818b7733a4f3488b0fba67f7","benches/benchmark_all.rs":"c2343c9197b6d9ccb0798d7701b1b0d2569d494dd31a975d21d7ec6f26e32879","build.rs":"78780c5cccfe22c3ff4198624b9e188559c437c3e6fa1c8bb66548eee6aa66bf","src/benchmarks/README.md":"ee6d50df2c31cfd80a5bc047011b518dcf57f1ef928a811bb770f1a09f41b3de","src/benchmarks/client.rs":"5d5db3f6e132654c06532feba15f98576122f6b9572ab5fa27b0c67d5b39ecb6","src/benchmarks/ingest.rs":"1ffdc403fb945ea0b58353df9773ba45ab0e9082d61dd5330ad49fad8cbb5d9f","src/benchmarks/mod.rs":"fe1898ba4d783213525da10d92858ee84cebfd22749bad7aeb461d338fe5504a","src/bin/debug_ingestion_sizes.rs":"ce6e810be7b3fc19e826d75b622b82cfab5a1a99397a6d0833c2c4eebff2d364","src/config.rs":"206ae9dc768c755649cb0c88a7b1fc3c926c715441784f61e9dc06a8a02fc568","src/db.rs":"734f5fd9f36f03c07a508a9a353872b81107f5fe09f27294ba27d7e1249e3988","src/error.rs":"f563210a6c050d98ec85e0f6d9401e7373bfb816e865e8edabbabb23d848ba13","src/keyword.rs":"988d0ab021c0df19cfd3c519df7d37f606bf984cd14d0efca4e5a7aff88344dd","src/lib.rs":"91ebbe0e1ffb99eefde204f81bc6bb199b4941976347baf1f132fd0ede20479c","src/pocket.rs":"1316668840ec9b4ea886223921dc9d3b5a1731d1a5206c0b1089f2a6c45c1b7b","src/provider.rs":"fe76f19a223f5cac056c7d48525087ca2c26bf0629b0e11b1f8dc98d165c8bb2","src/rs.rs":"e3eabde58c859ebe1154bf8da56ca134ace135934e3f280acc8186b4204399b3","src/schema.rs":"8b21006940e872658d722b52ba171280c96789eecf614b837d8cdbc9153ab576","src/store.rs":"413779074db3ce4589c31cd4fb0a050d44d1cbad1df3c94101d03e98efdf09cb","src/suggest.udl":"de50ea5c7ece0ae0ff4798979e0e12a5227b42bf024d48b6f585ea30a5133eb3","src/suggestion.rs":"f31227779d13d1b03a622e08a417ceba4afb161885a01c2bc87a6a652b5e8be5","src/yelp.rs":"9c0dc02a994cc05df524aa4ef337d10f575d1891259193b6419fed6fe279cb54","uniffi.toml":"f26317442ddb5b3281245bef6e60ffcb78bb95d29fe4a351a56dbb88d4ec8aab"},"package":null} \ No newline at end of file
diff --git a/third_party/rust/suggest/Cargo.toml b/third_party/rust/suggest/Cargo.toml
index 17ce1af26d..cec02ceadf 100644
--- a/third_party/rust/suggest/Cargo.toml
+++ b/third_party/rust/suggest/Cargo.toml
@@ -21,6 +21,15 @@ description = "Manages sponsored and web suggestions for Firefox Suggest"
readme = "README.md"
license = "MPL-2.0"
+[[bin]]
+name = "debug_ingestion_sizes"
+required-features = ["benchmark_api"]
+
+[[bench]]
+name = "benchmark_all"
+harness = false
+required-features = ["benchmark_api"]
+
[dependencies]
anyhow = "1.0"
chrono = "0.4"
@@ -28,7 +37,7 @@ once_cell = "1.5"
parking_lot = ">=0.11,<=0.12"
serde_json = "1"
thiserror = "1"
-uniffi = "0.25.2"
+uniffi = "0.27.1"
[dependencies.error-support]
path = "../support/error"
@@ -53,6 +62,10 @@ features = ["derive"]
[dependencies.sql-support]
path = "../support/sql"
+[dependencies.tempfile]
+version = "3.2.0"
+optional = true
+
[dependencies.url]
version = "2.1"
features = ["serde"]
@@ -60,7 +73,12 @@ features = ["serde"]
[dependencies.viaduct]
path = "../viaduct"
+[dependencies.viaduct-reqwest]
+path = "../support/viaduct-reqwest"
+optional = true
+
[dev-dependencies]
+criterion = "0.5"
expect-test = "1.4"
hex = "0.4"
@@ -72,5 +90,11 @@ default-features = false
path = "../support/rc_crypto"
[build-dependencies.uniffi]
-version = "0.25.2"
+version = "0.27.1"
features = ["build"]
+
+[features]
+benchmark_api = [
+ "tempfile",
+ "viaduct-reqwest",
+]
diff --git a/third_party/rust/suggest/README.md b/third_party/rust/suggest/README.md
index 74716d2ebb..bd68f0d7c4 100644
--- a/third_party/rust/suggest/README.md
+++ b/third_party/rust/suggest/README.md
@@ -1,7 +1,111 @@
# Suggest
-The **Suggest Rust component** powers the [Firefox Suggest](https://support.mozilla.org/en-US/kb/firefox-suggest-faq) feature.
+The **Suggest Rust component** provides address bar search suggestions from Mozilla. This includes suggestions from sponsors, as well as non-sponsored suggestions for other web destinations. These suggestions are part of the [Firefox Suggest](https://support.mozilla.org/en-US/kb/firefox-suggest-faq) feature.
-This component currently supports the basic Suggest experience only. The basic experience shows suggestions for sponsored and web content from a canned dataset. The component downloads the dataset from [Remote Settings](https://remote-settings.readthedocs.io/en/latest/), stores the suggestions in a local database, and makes them available to the Firefox address bar. Because matching is done locally, Mozilla never sees the user's query.
+This component is integrated into Firefox Desktop, Android, and iOS.
-The opt-in "Improved Firefox Suggest Experience", which sends user queries to a [Mozilla-owned proxy server](https://mozilla-services.github.io/merino/intro.html) for server-side matching, is not currently supported.
+## Architecture
+
+Search suggestions from Mozilla are stored in a [Remote Settings](https://remote-settings.readthedocs.io/en/latest/) collection. The Suggest component downloads these suggestions from Remote Settings, stores them in a local SQLite database, and makes them available to the Firefox address bar. Because these suggestions are stored and matched locally, Mozilla never sees the user's search queries.
+
+This component follows the architecture of the other [Application Services Rust components](https://mozilla.github.io/application-services/book/index.html): a cross-platform Rust core, and platform-specific bindings for Firefox Desktop, Android, and iOS. These bindings are generated automatically using the [UniFFI](https://mozilla.github.io/uniffi-rs/) tool.
+
+### For consumers
+
+This section is for application developers. It describes how Firefox Desktop, Android, and iOS consume the Suggest Rust component.
+
+The cornerstone of the component is the `SuggestStore` interface, which is the **store**. The store _ingests_ (downloads and persists) suggestions from Remote Settings, and returns matching suggestions as the user types. This is the main interface that applications use to interact with the component.
+
+While the store provides most of the functionality, the application has a few responsibilities:
+
+**1. Create and manage a `SuggestStore` as a singleton.** Under the hood, the store holds multiple connections to the database: a read-write connection for ingestion, and a read-only connection for queries. The store uses the right connection for each operation, so applications shouldn't create multiple stores. The application is responsible for specifying the correct platform-specific storage directory for the database. The database contains _cached data_, like suggestions, and _user data_, like which suggestions have been dismissed. For this reason, applications should persist the database in their durable storage or "profile" directory. Applications specify the storage directory, and create a store, using the `SuggestStoreBuilder` interface.
+
+**2. Periodically call the store's `ingest()` method to ingest new suggestions.** While the store takes care of efficiently downloading and persisting suggestions from Remote Settings, the application is still responsible for scheduling this work. This is because the Suggest component doesn't have access to the various platform-specific background work scheduling mechanisms, like `nsIUpdateTimerManager` on Desktop, `WorkManager` on Android, or `BGTaskScheduler` on iOS. These are three different APIs with different constraints, written in three different languages. Instead of trying to bind to these different mechanisms, the component leaves it up to the application to use the right one on each platform. Ingestion is network- and disk I/O-bound, and should be done in the background.
+
+**3. Use the store's `query()` and `interrupt()` methods to query for fresh suggestions as the user types.** The application passes the user's input, and additional options like which suggestion types to return, to `query()`. Querying the database is disk I/O-bound, so applications should use their respective platforms' facilities for asynchronous work—Kotlin coroutines on Android, and Swift actors on iOS; the UniFFI bindings for Desktop take care of dispatching work to a background thread pool—to avoid blocking the main thread. Running `query()` off-main-thread also lets applications `interrupt()` those queries from the main thread when the user's input changes. This avoids waiting for a query to return suggestions that are now stale.
+
+### For contributors
+
+This section is a primer for engineers contributing code to the Suggest Rust component.
+
+`suggest.udl` describes the component's interface for foreign language consumers. UniFFI uses this file to generate the language bindings for each platform. If you're adding a new suggestion type, you'll want to update the declarations of `Suggestion` and `SuggestionProvider` in this file, as well as the definitions of those types in `suggestion.rs` and `provider.rs`, respectively.
+
+`store.rs` contains the implementation of `SuggestStore` and most of the Suggest component's tests.
+
+`schema.rs` manages the database schema. **Remember to bump the schema version and add a migration whenever you change the schema.**
+
+`db.rs` interacts with the Suggest database. The `SuggestDao` type in this file is a ["data access object"](https://en.wikipedia.org/wiki/Data_access_object) (DAO) that contains all the SQL statements for querying and updating the SQLite database. By convention, `SuggestDao` methods that can write to the database take `&mut self`, and methods that only read take `&self`. The `SuggestDb::read()` and `SuggestDb::write()` methods take a closure that receives either `&SuggestDao` or `&mut SuggestDao`; this is how the DAO enforces that all writes are done in a transaction. If you're curious to learn about how we use SQLite, or you're diagnosing a slow query or adding a new suggestion type, you'll almost certainly want to look at this file.
+
+`rs.rs` defines all the Remote Settings [record](https://docs.kinto-storage.org/en/stable/concepts.html#buckets-collections-and-records) and [attachment](https://docs.kinto-storage.org/en/stable/faq.html#can-i-store-files-inside-kinto) types. The records in the `quicksuggest` Remote Settings collection don't store the suggestions themselves. Instead, each record has a type, and a pointer to a JSON attachment that contains multiple suggestions of that type. This file defines [Serde](https://serde.rs/)-compatible types for all these records and attachments.
+
+`errors.rs` contains all the errors that this component returns. We use the crate-internal `Error` type for all fallible operations within the component, and the public `SuggestApiError` type for errors that applications should handle.
+
+There are other suggestion provider-specific files, like `yelp.rs`, `pocket.rs`, and `keyword.rs`, that aren't covered in this primer. If you're new to the component, we recommend starting with the highest-level interface in `store.rs` first, and jumping to the other files and types as you encounter them in the code.
+
+## Documentation
+
+Each Rust file contains [inline documentation](https://doc.rust-lang.org/rustdoc/what-is-rustdoc.html) for types, traits, functions, and methods.
+
+Documentation for `pub` symbols is written with application developers in mind: even if you're a Desktop, Android or iOS developer, the Rust documentation is meant to give you an understanding of how the Gecko, Kotlin and Swift bindings work.
+
+You can see the documentation for all public symbols by running from the command line:
+
+```shell
+cargo doc --open
+```
+
+By convention, symbols that are exported as part of the component's foreign language interface have `pub` visibility, and symbols that are only used within the component have `pub(crate)` or private visibility. As an exception to this convention, symbols with [doctests](https://doc.rust-lang.org/rustdoc/write-documentation/documentation-tests.html) are also `pub`, because doctests [can only link against public symbols](https://doc.rust-lang.org/rustdoc/write-documentation/documentation-tests.html#include-items-only-when-collecting-doctests).
+
+If you're working on the component, you can see the documentation for `pub(crate)` and private symbols using:
+
+```shell
+cargo doc --document-private-items --open
+```
+
+💡 If you're adding a new suggestion type, the documentation for the `rs` module is a great place to start.
+
+Please help us keep our documentation useful for everyone, and feel free to file bugs for anything that looks unclear or out-of-date!
+
+## Tests
+
+We use a technique called ["snapshot testing"](https://notlaura.com/what-is-a-snapshot-test/) with the [`expect-test`](https://docs.rs/expect-test/latest/expect_test/) crate for the Suggest component's tests. This technique makes it easier to compare and update all the expected outputs when adding, removing, and changing suggestion types.
+
+The snapshot tests in `store.rs` look like this:
+
+```rs
+expect![["{expected-output}"]].assert_debug_eq(&actual_output);
+```
+
+The `expect-test` crate generates the `{expected-output}` string, and can update it automatically. If you add, remove, or change a suggestion type, or update the schema, and run `cargo test`, you'll likely see a few failures. `expect-test` will print a readable diff in the `cargo test` output, which you can audit for accuracy.
+
+If the diff looks good, you can update the expectations in-place from the command line using:
+
+```shell
+env UPDATE_EXPECT=1 cargo test
+```
+
+Most of the tests in `store.rs` are integration-style tests that use a fake Remote Settings interface.
+
+## ⚠️ Breaking Changes ⚠️
+
+A "breaking change" is any code change that breaks the build, tests, or behavior of a consuming application.
+
+These are some common changes that can break consumers:
+
+* **Changing the signature of a method or function that's currently in use.** Adding, removing, or changing the type of an argument or a return value, or reordering arguments, is a breaking change on Desktop, Android, and iOS.
+* **Removing or renaming a method, function, or type that's currently in use.**
+* **Adding or removing a `[Throws]` attribute**. Changing a non-throwing function to throw an error, or vice versa, is a build breaking change on iOS. On Desktop and Android, changing a non-throwing function to a throwing function won't break the build, but can cause crashes if the consumer doesn't handle the new error.
+* **Changing the fields of a dictionary or an enum.** Adding, removing, reordering, or changing the type of a dictionary or an `[Enum]` interface field is a guaranteed build breaking change on iOS. It may be a breaking change on Desktop and Android if the consumer creates new instances of the dictionary or enum.
+
+When working on the Suggest component, this means:
+
+* Adding a new suggestion type is generally _not_ breaking.
+* Adding a new method to `SuggestStore` is _not_ breaking.
+* Renaming or removing a `SuggestStore` method that's currently in use _is_ breaking.
+* Adding a new field to an existing suggestion type _is_ breaking.
+
+If you need to make a breaking change, don't panic! We have a [process for landing them in Application Services](https://mozilla.github.io/application-services/book/howtos/breaking-changes.html), and you can use a [branch build](https://mozilla.github.io/application-services/book/howtos/branch-builds.html) to verify that Android and iOS build and run their tests with your change.
+
+## Bugs
+
+We use Bugzilla to track bugs and feature work. You can use [this link](https://bugzilla.mozilla.org/enter_bug.cgi?product=Application+Services&component=Suggest) to file bugs in the `Application Services :: Suggest` bug component.
diff --git a/third_party/rust/suggest/benches/benchmark_all.rs b/third_party/rust/suggest/benches/benchmark_all.rs
new file mode 100644
index 0000000000..2e328e5804
--- /dev/null
+++ b/third_party/rust/suggest/benches/benchmark_all.rs
@@ -0,0 +1,25 @@
+use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
+use suggest::benchmarks::{ingest, BenchmarkWithInput};
+
+pub fn ingest_single_provider(c: &mut Criterion) {
+ let mut group = c.benchmark_group("ingest");
+ viaduct_reqwest::use_reqwest_backend();
+ // This needs to be 10 for now, or else the `ingest-amp-wikipedia` benchmark would take around
+ // 100s to run which feels like too long. `ingest-amp-mobile` also would take a around 50s.
+ group.sample_size(10);
+ for (name, benchmark) in ingest::all_benchmarks() {
+ group.bench_function(format!("ingest-{name}"), |b| {
+ b.iter_batched(
+ || benchmark.generate_input(),
+ |input| benchmark.benchmarked_code(input),
+ // See https://docs.rs/criterion/latest/criterion/enum.BatchSize.html#variants for
+ // a discussion of this. PerIteration is chosen for these benchmarks because the
+ // input holds a database file handle
+ BatchSize::PerIteration,
+ );
+ });
+ }
+}
+
+criterion_group!(benches, ingest_single_provider);
+criterion_main!(benches);
diff --git a/third_party/rust/suggest/src/benchmarks/README.md b/third_party/rust/suggest/src/benchmarks/README.md
new file mode 100644
index 0000000000..45150d8413
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/README.md
@@ -0,0 +1,28 @@
+# Suggest benchmarking code
+
+Use `cargo suggest-bench` to run these benchmarks.
+
+The main benchmarking code lives here, while the criterion integration code lives in the `benches/`
+directory.
+
+## Benchmarks
+
+### ingest-[provider-type]
+
+Time it takes to ingest all suggestions for a provider type on an empty database.
+The bechmark downloads network resources in advance in order to exclude the network request time
+from these measurements.
+
+### Benchmarks it would be nice to have
+
+- Ingestion with synthetic data. This would isolate the benchmark from changes to the RS database.
+- Fetching suggestions
+
+## cargo suggest-debug-ingestion-sizes
+
+Run this to get row counts for all database tables. This can be very useful for improving
+benchmarks, since targeting the tables with the largest number of rows will usually lead to the
+largest improvements.
+
+The command also prints out the size of all remote-settings attachments, which can be good to
+optimize on its own since it represents the amount of data user's need to download.
diff --git a/third_party/rust/suggest/src/benchmarks/client.rs b/third_party/rust/suggest/src/benchmarks/client.rs
new file mode 100644
index 0000000000..f5a21fd9cc
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/client.rs
@@ -0,0 +1,97 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::{rs::SuggestRemoteSettingsClient, Result};
+use parking_lot::Mutex;
+use remote_settings::{Client, GetItemsOptions, RemoteSettingsConfig, RemoteSettingsResponse};
+use std::collections::HashMap;
+
+/// Remotes settings client that runs during the benchmark warm-up phase.
+///
+/// This should be used to run a full ingestion.
+/// Then it can be converted into a [RemoteSettingsBenchmarkClient], which allows benchmark code to exclude the network request time.
+/// [RemoteSettingsBenchmarkClient] implements [SuggestRemoteSettingsClient] by getting data from a HashMap rather than hitting the network.
+pub struct RemoteSettingsWarmUpClient {
+ client: Client,
+ pub get_records_responses: Mutex<HashMap<GetItemsOptions, RemoteSettingsResponse>>,
+ pub get_attachment_responses: Mutex<HashMap<String, Vec<u8>>>,
+}
+
+impl RemoteSettingsWarmUpClient {
+ pub fn new() -> Self {
+ Self {
+ client: Client::new(RemoteSettingsConfig {
+ server_url: None,
+ bucket_name: None,
+ collection_name: crate::rs::REMOTE_SETTINGS_COLLECTION.into(),
+ })
+ .unwrap(),
+ get_records_responses: Mutex::new(HashMap::new()),
+ get_attachment_responses: Mutex::new(HashMap::new()),
+ }
+ }
+}
+
+impl Default for RemoteSettingsWarmUpClient {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl SuggestRemoteSettingsClient for RemoteSettingsWarmUpClient {
+ fn get_records_with_options(
+ &self,
+ options: &GetItemsOptions,
+ ) -> Result<RemoteSettingsResponse> {
+ let response = self.client.get_records_with_options(options)?;
+ self.get_records_responses
+ .lock()
+ .insert(options.clone(), response.clone());
+ Ok(response)
+ }
+
+ fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+ let response = self.client.get_attachment(location)?;
+ self.get_attachment_responses
+ .lock()
+ .insert(location.to_string(), response.clone());
+ Ok(response)
+ }
+}
+
+#[derive(Clone)]
+pub struct RemoteSettingsBenchmarkClient {
+ pub get_records_responses: HashMap<GetItemsOptions, RemoteSettingsResponse>,
+ pub get_attachment_responses: HashMap<String, Vec<u8>>,
+}
+
+impl SuggestRemoteSettingsClient for RemoteSettingsBenchmarkClient {
+ fn get_records_with_options(
+ &self,
+ options: &GetItemsOptions,
+ ) -> Result<RemoteSettingsResponse> {
+ Ok(self
+ .get_records_responses
+ .get(options)
+ .unwrap_or_else(|| panic!("options not found: {options:?}"))
+ .clone())
+ }
+
+ fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+ Ok(self
+ .get_attachment_responses
+ .get(location)
+ .unwrap_or_else(|| panic!("location not found: {location:?}"))
+ .clone())
+ }
+}
+
+impl From<RemoteSettingsWarmUpClient> for RemoteSettingsBenchmarkClient {
+ fn from(warm_up_client: RemoteSettingsWarmUpClient) -> Self {
+ Self {
+ get_records_responses: warm_up_client.get_records_responses.into_inner(),
+ get_attachment_responses: warm_up_client.get_attachment_responses.into_inner(),
+ }
+ }
+}
diff --git a/third_party/rust/suggest/src/benchmarks/ingest.rs b/third_party/rust/suggest/src/benchmarks/ingest.rs
new file mode 100644
index 0000000000..bbefc6a00a
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/ingest.rs
@@ -0,0 +1,116 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::{
+ benchmarks::{
+ client::{RemoteSettingsBenchmarkClient, RemoteSettingsWarmUpClient},
+ BenchmarkWithInput,
+ },
+ rs::SuggestRecordType,
+ store::SuggestStoreInner,
+ SuggestIngestionConstraints,
+};
+use std::sync::atomic::{AtomicU32, Ordering};
+
+static DB_FILE_COUNTER: AtomicU32 = AtomicU32::new(0);
+
+pub struct IngestBenchmark {
+ temp_dir: tempfile::TempDir,
+ client: RemoteSettingsBenchmarkClient,
+ record_type: SuggestRecordType,
+}
+
+impl IngestBenchmark {
+ pub fn new(record_type: SuggestRecordType) -> Self {
+ let temp_dir = tempfile::tempdir().unwrap();
+ let store = SuggestStoreInner::new(
+ temp_dir.path().join("warmup.sqlite"),
+ RemoteSettingsWarmUpClient::new(),
+ );
+ store.benchmark_ingest_records_by_type(record_type);
+ Self {
+ client: RemoteSettingsBenchmarkClient::from(store.into_settings_client()),
+ temp_dir,
+ record_type,
+ }
+ }
+}
+
+// The input for each benchmark is `SuggestStoreInner` with a fresh database.
+//
+// This is wrapped in a newtype so that it can be exposed in the public trait
+pub struct InputType(SuggestStoreInner<RemoteSettingsBenchmarkClient>);
+
+impl BenchmarkWithInput for IngestBenchmark {
+ type Input = InputType;
+
+ fn generate_input(&self) -> Self::Input {
+ let data_path = self.temp_dir.path().join(format!(
+ "db{}.sqlite",
+ DB_FILE_COUNTER.fetch_add(1, Ordering::Relaxed)
+ ));
+ let store = SuggestStoreInner::new(data_path, self.client.clone());
+ store.ensure_db_initialized();
+ InputType(store)
+ }
+
+ fn benchmarked_code(&self, input: Self::Input) {
+ let InputType(store) = input;
+ store.benchmark_ingest_records_by_type(self.record_type);
+ }
+}
+
+/// Get IngestBenchmark instances for all record types
+pub fn all_benchmarks() -> Vec<(&'static str, IngestBenchmark)> {
+ vec![
+ ("icon", IngestBenchmark::new(SuggestRecordType::Icon)),
+ (
+ "amp-wikipedia",
+ IngestBenchmark::new(SuggestRecordType::AmpWikipedia),
+ ),
+ ("amo", IngestBenchmark::new(SuggestRecordType::Amo)),
+ ("pocket", IngestBenchmark::new(SuggestRecordType::Pocket)),
+ ("yelp", IngestBenchmark::new(SuggestRecordType::Yelp)),
+ ("mdn", IngestBenchmark::new(SuggestRecordType::Mdn)),
+ ("weather", IngestBenchmark::new(SuggestRecordType::Weather)),
+ (
+ "global-config",
+ IngestBenchmark::new(SuggestRecordType::GlobalConfig),
+ ),
+ (
+ "amp-mobile",
+ IngestBenchmark::new(SuggestRecordType::AmpMobile),
+ ),
+ ]
+}
+
+pub fn print_debug_ingestion_sizes() {
+ viaduct_reqwest::use_reqwest_backend();
+ let store = SuggestStoreInner::new(
+ "file:debug_ingestion_sizes?mode=memory&cache=shared",
+ RemoteSettingsWarmUpClient::new(),
+ );
+ store
+ .ingest(SuggestIngestionConstraints::default())
+ .unwrap();
+ let table_row_counts = store.table_row_counts();
+ let client = store.into_settings_client();
+ let total_attachment_size: usize = client
+ .get_attachment_responses
+ .lock()
+ .values()
+ .map(|data| data.len())
+ .sum();
+
+ println!(
+ "Total attachment size: {}kb",
+ (total_attachment_size + 500) / 1000
+ );
+ println!();
+ println!("Database table row counts");
+ println!("-------------------------");
+ for (name, count) in table_row_counts {
+ println!("{name:30}: {count}");
+ }
+}
diff --git a/third_party/rust/suggest/src/benchmarks/mod.rs b/third_party/rust/suggest/src/benchmarks/mod.rs
new file mode 100644
index 0000000000..eb3b2e8abe
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/mod.rs
@@ -0,0 +1,40 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Benchmarking support
+//!
+//! Benchmarks are split up into two parts: the functions to be benchmarked live here, which the benchmarking code itself lives in `benches/bench.rs`.
+//! It's easier to write benchmarking code inside the main crate, where we have access to private items.
+//! However, it's easier to integrate with Cargo and criterion if benchmarks live in a separate crate.
+//!
+//! All benchmarks are defined as structs that implement either the [Benchmark] or [BenchmarkWithInput]
+
+pub mod client;
+pub mod ingest;
+
+/// Trait for simple benchmarks
+///
+/// This supports simple benchmarks that don't require any input. Note: global setup can be done
+/// in the `new()` method for the struct.
+pub trait Benchmark {
+ /// Perform the operations that we're benchmarking.
+ fn benchmarked_code(&self);
+}
+
+/// Trait for benchmarks that require input
+///
+/// This will run using Criterion's `iter_batched` function. Criterion will create a batch of
+/// inputs, then pass each one to benchmark.
+///
+/// This supports simple benchmarks that don't require any input. Note: global setup can be done
+/// in the `new()` method for the struct.
+pub trait BenchmarkWithInput {
+ type Input;
+
+ /// Generate the input (this is not included in the benchmark time)
+ fn generate_input(&self) -> Self::Input;
+
+ /// Perform the operations that we're benchmarking.
+ fn benchmarked_code(&self, input: Self::Input);
+}
diff --git a/third_party/rust/suggest/src/bin/debug_ingestion_sizes.rs b/third_party/rust/suggest/src/bin/debug_ingestion_sizes.rs
new file mode 100644
index 0000000000..14ca3d9462
--- /dev/null
+++ b/third_party/rust/suggest/src/bin/debug_ingestion_sizes.rs
@@ -0,0 +1,9 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use suggest::benchmarks::ingest;
+
+fn main() {
+ ingest::print_debug_ingestion_sizes()
+}
diff --git a/third_party/rust/suggest/src/config.rs b/third_party/rust/suggest/src/config.rs
index fcb3c2e256..532ac5b504 100644
--- a/third_party/rust/suggest/src/config.rs
+++ b/third_party/rust/suggest/src/config.rs
@@ -1,3 +1,8 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
use serde::{Deserialize, Serialize};
use crate::rs::{DownloadedGlobalConfig, DownloadedWeatherData};
diff --git a/third_party/rust/suggest/src/db.rs b/third_party/rust/suggest/src/db.rs
index 07fc3ab4a2..0412c50d8f 100644
--- a/third_party/rust/suggest/src/db.rs
+++ b/third_party/rust/suggest/src/db.rs
@@ -23,17 +23,17 @@ use crate::{
rs::{
DownloadedAmoSuggestion, DownloadedAmpSuggestion, DownloadedAmpWikipediaSuggestion,
DownloadedMdnSuggestion, DownloadedPocketSuggestion, DownloadedWeatherData,
- SuggestRecordId,
+ DownloadedWikipediaSuggestion, SuggestRecordId,
},
- schema::{SuggestConnectionInitializer, VERSION},
+ schema::{clear_database, SuggestConnectionInitializer, VERSION},
store::{UnparsableRecord, UnparsableRecords},
suggestion::{cook_raw_suggestion_url, AmpSuggestionType, Suggestion},
Result, SuggestionQuery,
};
-/// The metadata key whose value is the timestamp of the last record ingested
-/// from the Suggest Remote Settings collection.
-pub const LAST_INGEST_META_KEY: &str = "last_quicksuggest_ingest";
+/// The metadata key prefix for the last ingested unparsable record. These are
+/// records that were not parsed properly, or were not of the "approved" types.
+pub const LAST_INGEST_META_UNPARSABLE: &str = "last_quicksuggest_ingest_unparsable";
/// The metadata key whose value keeps track of records of suggestions
/// that aren't parsable and which schema version it was first seen in.
pub const UNPARSABLE_RECORDS_META_KEY: &str = "unparsable_records";
@@ -148,20 +148,28 @@ impl<'a> SuggestDao<'a> {
self.put_unparsable_record_id(&record_id)?;
// Advance the last fetch time, so that we can resume
// fetching after this record if we're interrupted.
- self.put_last_ingest_if_newer(record.last_modified)
+ self.put_last_ingest_if_newer(LAST_INGEST_META_UNPARSABLE, record.last_modified)
}
- pub fn handle_ingested_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> {
+ pub fn handle_ingested_record(
+ &mut self,
+ last_ingest_key: &str,
+ record: &RemoteSettingsRecord,
+ ) -> Result<()> {
let record_id = SuggestRecordId::from(&record.id);
// Remove this record's ID from the list of unparsable
// records, since we understand it now.
self.drop_unparsable_record_id(&record_id)?;
// Advance the last fetch time, so that we can resume
// fetching after this record if we're interrupted.
- self.put_last_ingest_if_newer(record.last_modified)
+ self.put_last_ingest_if_newer(last_ingest_key, record.last_modified)
}
- pub fn handle_deleted_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> {
+ pub fn handle_deleted_record(
+ &mut self,
+ last_ingest_key: &str,
+ record: &RemoteSettingsRecord,
+ ) -> Result<()> {
let record_id = SuggestRecordId::from(&record.id);
// Drop either the icon or suggestions, records only contain one or the other
match record_id.as_icon_id() {
@@ -173,7 +181,7 @@ impl<'a> SuggestDao<'a> {
self.drop_unparsable_record_id(&record_id)?;
// Advance the last fetch time, so that we can resume
// fetching after this record if we're interrupted.
- self.put_last_ingest_if_newer(record.last_modified)
+ self.put_last_ingest_if_newer(last_ingest_key, record.last_modified)
}
// =============== Low level API ===============
@@ -231,15 +239,20 @@ impl<'a> SuggestDao<'a> {
s.title,
s.url,
s.provider,
- s.score
+ s.score,
+ fk.full_keyword
FROM
suggestions s
JOIN
keywords k
ON k.suggestion_id = s.id
+ LEFT JOIN
+ full_keywords fk
+ ON k.full_keyword_id = fk.id
WHERE
s.provider = :provider
AND k.keyword = :keyword
+ AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
"#,
named_params! {
":keyword": keyword_lowercased,
@@ -248,8 +261,9 @@ impl<'a> SuggestDao<'a> {
|row| -> Result<Suggestion> {
let suggestion_id: i64 = row.get("id")?;
let title = row.get("title")?;
- let raw_url = row.get::<_, String>("url")?;
- let score = row.get::<_, f64>("score")?;
+ let raw_url: String = row.get("url")?;
+ let score: f64 = row.get("score")?;
+ let full_keyword_from_db: Option<String> = row.get("full_keyword")?;
let keywords: Vec<String> = self.conn.query_rows_and_then_cached(
r#"
@@ -277,9 +291,12 @@ impl<'a> SuggestDao<'a> {
amp.iab_category,
amp.impression_url,
amp.click_url,
- (SELECT i.data FROM icons i WHERE i.id = amp.icon_id) AS icon
+ i.data AS icon,
+ i.mimetype AS icon_mimetype
FROM
amp_custom_details amp
+ LEFT JOIN
+ icons i ON amp.icon_id = i.id
WHERE
amp.suggestion_id = :suggestion_id
"#,
@@ -298,8 +315,10 @@ impl<'a> SuggestDao<'a> {
title,
url: cooked_url,
raw_url,
- full_keyword: full_keyword(keyword_lowercased, &keywords),
+ full_keyword: full_keyword_from_db
+ .unwrap_or_else(|| full_keyword(keyword_lowercased, &keywords)),
icon: row.get("icon")?,
+ icon_mimetype: row.get("icon_mimetype")?,
impression_url: row.get("impression_url")?,
click_url: cooked_click_url,
raw_click_url,
@@ -330,6 +349,7 @@ impl<'a> SuggestDao<'a> {
WHERE
s.provider = :provider
AND k.keyword = :keyword
+ AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
"#,
named_params! {
":keyword": keyword_lowercased,
@@ -350,36 +370,51 @@ impl<'a> SuggestDao<'a> {
},
|row| row.get(0),
)?;
- let icon = self.conn.try_query_one(
- "SELECT i.data
+ let (icon, icon_mimetype) = self
+ .conn
+ .try_query_row(
+ "SELECT i.data, i.mimetype
FROM icons i
JOIN wikipedia_custom_details s ON s.icon_id = i.id
- WHERE s.suggestion_id = :suggestion_id",
- named_params! {
- ":suggestion_id": suggestion_id
- },
- true,
- )?;
+ WHERE s.suggestion_id = :suggestion_id
+ LIMIT 1",
+ named_params! {
+ ":suggestion_id": suggestion_id
+ },
+ |row| -> Result<_> {
+ Ok((
+ row.get::<_, Option<Vec<u8>>>(0)?,
+ row.get::<_, Option<String>>(1)?,
+ ))
+ },
+ true,
+ )?
+ .unwrap_or((None, None));
+
Ok(Suggestion::Wikipedia {
title,
url: raw_url,
full_keyword: full_keyword(keyword_lowercased, &keywords),
icon,
+ icon_mimetype,
})
},
)?;
Ok(suggestions)
}
- /// Fetches Suggestions of type Amo provider that match the given query
- pub fn fetch_amo_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ /// Query for suggestions using the keyword prefix and provider
+ fn map_prefix_keywords<T>(
+ &self,
+ query: &SuggestionQuery,
+ provider: &SuggestionProvider,
+ mut mapper: impl FnMut(&rusqlite::Row, &str) -> Result<T>,
+ ) -> Result<Vec<T>> {
let keyword_lowercased = &query.keyword.to_lowercase();
let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased);
- let suggestions_limit = &query.limit.unwrap_or(-1);
- let suggestions = self
- .conn
- .query_rows_and_then_cached(
- r#"
+ let suggestions_limit = query.limit.unwrap_or(-1);
+ self.conn.query_rows_and_then_cached(
+ r#"
SELECT
s.id,
MAX(k.rank) AS rank,
@@ -397,6 +432,7 @@ impl<'a> SuggestDao<'a> {
k.keyword_prefix = :keyword_prefix
AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF')
AND s.provider = :provider
+ AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
GROUP BY
s.id
ORDER BY
@@ -405,13 +441,23 @@ impl<'a> SuggestDao<'a> {
LIMIT
:suggestions_limit
"#,
- named_params! {
- ":keyword_prefix": keyword_prefix,
- ":keyword_suffix": keyword_suffix,
- ":provider": SuggestionProvider::Amo,
- ":suggestions_limit": suggestions_limit,
- },
- |row| -> Result<Option<Suggestion>> {
+ &[
+ (":keyword_prefix", &keyword_prefix as &dyn ToSql),
+ (":keyword_suffix", &keyword_suffix as &dyn ToSql),
+ (":provider", provider as &dyn ToSql),
+ (":suggestions_limit", &suggestions_limit as &dyn ToSql),
+ ],
+ |row| mapper(row, keyword_suffix),
+ )
+ }
+
+ /// Fetches Suggestions of type Amo provider that match the given query
+ pub fn fetch_amo_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ let suggestions = self
+ .map_prefix_keywords(
+ query,
+ &SuggestionProvider::Amo,
+ |row, keyword_suffix| -> Result<Option<Suggestion>> {
let suggestion_id: i64 = row.get("id")?;
let title = row.get("title")?;
let raw_url = row.get::<_, String>("url")?;
@@ -486,6 +532,7 @@ impl<'a> SuggestDao<'a> {
k.keyword_prefix = :keyword_prefix
AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF')
AND s.provider = :provider
+ AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
GROUP BY
s.id,
k.confidence
@@ -534,45 +581,11 @@ impl<'a> SuggestDao<'a> {
/// Fetches suggestions for MDN
pub fn fetch_mdn_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
- let keyword_lowercased = &query.keyword.to_lowercase();
- let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased);
- let suggestions_limit = &query.limit.unwrap_or(-1);
let suggestions = self
- .conn
- .query_rows_and_then_cached(
- r#"
- SELECT
- s.id,
- MAX(k.rank) AS rank,
- s.title,
- s.url,
- s.provider,
- s.score,
- k.keyword_suffix
- FROM
- suggestions s
- JOIN
- prefix_keywords k
- ON k.suggestion_id = s.id
- WHERE
- k.keyword_prefix = :keyword_prefix
- AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF')
- AND s.provider = :provider
- GROUP BY
- s.id
- ORDER BY
- s.score DESC,
- rank DESC
- LIMIT
- :suggestions_limit
- "#,
- named_params! {
- ":keyword_prefix": keyword_prefix,
- ":keyword_suffix": keyword_suffix,
- ":provider": SuggestionProvider::Mdn,
- ":suggestions_limit": suggestions_limit,
- },
- |row| -> Result<Option<Suggestion>> {
+ .map_prefix_keywords(
+ query,
+ &SuggestionProvider::Mdn,
+ |row, keyword_suffix| -> Result<Option<Suggestion>> {
let suggestion_id: i64 = row.get("id")?;
let title = row.get("title")?;
let raw_url = row.get::<_, String>("url")?;
@@ -657,35 +670,15 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedAmoSuggestion],
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- SuggestionProvider::Amo as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": suggestion.title,
- ":url": suggestion.url,
- ":score": suggestion.score,
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &suggestion.title,
+ &suggestion.url,
+ suggestion.score,
+ SuggestionProvider::Amo,
)?;
self.conn.execute(
"INSERT INTO amo_custom_details(
@@ -747,105 +740,48 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedAmpWikipediaSuggestion],
) -> Result<()> {
+ // Prepare statements outside of the loop. This results in a large performance
+ // improvement on a fresh ingest, since there are so many rows.
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
+ let mut amp_insert = AmpInsertStatement::new(self.conn)?;
+ let mut wiki_insert = WikipediaInsertStatement::new(self.conn)?;
+ let mut keyword_insert = KeywordInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
let common_details = suggestion.common_details();
let provider = suggestion.provider();
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- provider as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": common_details.title,
- ":url": common_details.url,
- ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE)
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &common_details.title,
+ &common_details.url,
+ common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
+ provider,
)?;
match suggestion {
DownloadedAmpWikipediaSuggestion::Amp(amp) => {
- self.conn.execute(
- "INSERT INTO amp_custom_details(
- suggestion_id,
- advertiser,
- block_id,
- iab_category,
- impression_url,
- click_url,
- icon_id
- )
- VALUES(
- :suggestion_id,
- :advertiser,
- :block_id,
- :iab_category,
- :impression_url,
- :click_url,
- :icon_id
- )",
- named_params! {
- ":suggestion_id": suggestion_id,
- ":advertiser": amp.advertiser,
- ":block_id": amp.block_id,
- ":iab_category": amp.iab_category,
- ":impression_url": amp.impression_url,
- ":click_url": amp.click_url,
- ":icon_id": amp.icon_id,
- },
- )?;
+ amp_insert.execute(suggestion_id, amp)?;
}
DownloadedAmpWikipediaSuggestion::Wikipedia(wikipedia) => {
- self.conn.execute(
- "INSERT INTO wikipedia_custom_details(
- suggestion_id,
- icon_id
- )
- VALUES(
- :suggestion_id,
- :icon_id
- )",
- named_params! {
- ":suggestion_id": suggestion_id,
- ":icon_id": wikipedia.icon_id,
- },
- )?;
+ wiki_insert.execute(suggestion_id, wikipedia)?;
}
}
- for (index, keyword) in common_details.keywords.iter().enumerate() {
- self.conn.execute(
- "INSERT INTO keywords(
- keyword,
- suggestion_id,
- rank
- )
- VALUES(
- :keyword,
- :suggestion_id,
- :rank
- )",
- named_params! {
- ":keyword": keyword,
- ":rank": index,
- ":suggestion_id": suggestion_id,
- },
+ let mut full_keyword_inserter = FullKeywordInserter::new(self.conn, suggestion_id);
+ for keyword in common_details.keywords() {
+ let full_keyword_id = match (suggestion, keyword.full_keyword) {
+ // Try to associate full keyword data. Only do this for AMP, we decided to
+ // skip it for Wikipedia in https://bugzilla.mozilla.org/show_bug.cgi?id=1876217
+ (DownloadedAmpWikipediaSuggestion::Amp(_), Some(full_keyword)) => {
+ Some(full_keyword_inserter.maybe_insert(full_keyword)?)
+ }
+ _ => None,
+ };
+
+ keyword_insert.execute(
+ suggestion_id,
+ keyword.keyword,
+ full_keyword_id,
+ keyword.rank,
)?;
}
}
@@ -859,84 +795,32 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedAmpSuggestion],
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
+ let mut amp_insert = AmpInsertStatement::new(self.conn)?;
+ let mut keyword_insert = KeywordInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
let common_details = &suggestion.common_details;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- SuggestionProvider::AmpMobile as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": common_details.title,
- ":url": common_details.url,
- ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE)
- },
- |row| row.get(0),
- true,
- )?;
- self.conn.execute(
- "INSERT INTO amp_custom_details(
- suggestion_id,
- advertiser,
- block_id,
- iab_category,
- impression_url,
- click_url,
- icon_id
- )
- VALUES(
- :suggestion_id,
- :advertiser,
- :block_id,
- :iab_category,
- :impression_url,
- :click_url,
- :icon_id
- )",
- named_params! {
- ":suggestion_id": suggestion_id,
- ":advertiser": suggestion.advertiser,
- ":block_id": suggestion.block_id,
- ":iab_category": suggestion.iab_category,
- ":impression_url": suggestion.impression_url,
- ":click_url": suggestion.click_url,
- ":icon_id": suggestion.icon_id,
- },
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &common_details.title,
+ &common_details.url,
+ common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
+ SuggestionProvider::AmpMobile,
)?;
+ amp_insert.execute(suggestion_id, suggestion)?;
- for (index, keyword) in common_details.keywords.iter().enumerate() {
- self.conn.execute(
- "INSERT INTO keywords(
- keyword,
- suggestion_id,
- rank
- )
- VALUES(
- :keyword,
- :suggestion_id,
- :rank
- )",
- named_params! {
- ":keyword": keyword,
- ":rank": index,
- ":suggestion_id": suggestion_id,
- },
+ let mut full_keyword_inserter = FullKeywordInserter::new(self.conn, suggestion_id);
+ for keyword in common_details.keywords() {
+ let full_keyword_id = keyword
+ .full_keyword
+ .map(|full_keyword| full_keyword_inserter.maybe_insert(full_keyword))
+ .transpose()?;
+ keyword_insert.execute(
+ suggestion_id,
+ keyword.keyword,
+ full_keyword_id,
+ keyword.rank,
)?;
}
}
@@ -950,37 +834,16 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedPocketSuggestion],
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- SuggestionProvider::Pocket as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": suggestion.title,
- ":url": suggestion.url,
- ":score": suggestion.score,
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &suggestion.title,
+ &suggestion.url,
+ suggestion.score,
+ SuggestionProvider::Pocket,
)?;
-
for ((rank, keyword), confidence) in suggestion
.high_confidence_keywords
.iter()
@@ -1030,35 +893,15 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedMdnSuggestion],
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- SuggestionProvider::Mdn as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": suggestion.title,
- ":url": suggestion.url,
- ":score": suggestion.score,
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &suggestion.title,
+ &suggestion.url,
+ suggestion.score,
+ SuggestionProvider::Mdn,
)?;
self.conn.execute_cached(
"INSERT INTO mdn_custom_details(
@@ -1107,20 +950,14 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
data: &DownloadedWeatherData,
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
self.scope.err_if_interrupted()?;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(record_id, provider, title, url, score)
- VALUES(:record_id, {}, '', '', :score)
- RETURNING id",
- SuggestionProvider::Weather as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":score": data.weather.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ "",
+ "",
+ data.weather.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
+ SuggestionProvider::Weather,
)?;
for (index, keyword) in data.weather.keywords.iter().enumerate() {
self.conn.execute(
@@ -1141,24 +978,43 @@ impl<'a> SuggestDao<'a> {
}
/// Inserts or replaces an icon for a suggestion into the database.
- pub fn put_icon(&mut self, icon_id: &str, data: &[u8]) -> Result<()> {
+ pub fn put_icon(&mut self, icon_id: &str, data: &[u8], mimetype: &str) -> Result<()> {
self.conn.execute(
"INSERT OR REPLACE INTO icons(
id,
- data
+ data,
+ mimetype
)
VALUES(
:id,
- :data
+ :data,
+ :mimetype
)",
named_params! {
":id": icon_id,
":data": data,
+ ":mimetype": mimetype,
},
)?;
Ok(())
}
+ pub fn insert_dismissal(&self, url: &str) -> Result<()> {
+ self.conn.execute(
+ "INSERT OR IGNORE INTO dismissed_suggestions(url)
+ VALUES(:url)",
+ named_params! {
+ ":url": url,
+ },
+ )?;
+ Ok(())
+ }
+
+ pub fn clear_dismissals(&self) -> Result<()> {
+ self.conn.execute("DELETE FROM dismissed_suggestions", ())?;
+ Ok(())
+ }
+
/// Deletes all suggestions associated with a Remote Settings record from
/// the database.
pub fn drop_suggestions(&mut self, record_id: &SuggestRecordId) -> Result<()> {
@@ -1196,12 +1052,7 @@ impl<'a> SuggestDao<'a> {
/// Clears the database, removing all suggestions, icons, and metadata.
pub fn clear(&mut self) -> Result<()> {
- self.conn.execute_batch(
- "DELETE FROM suggestions;
- DELETE FROM icons;
- DELETE FROM meta;",
- )?;
- Ok(())
+ Ok(clear_database(self.conn)?)
}
/// Returns the value associated with a metadata key.
@@ -1224,12 +1075,14 @@ impl<'a> SuggestDao<'a> {
/// Updates the last ingest timestamp if the given last modified time is
/// newer than the existing one recorded.
- pub fn put_last_ingest_if_newer(&mut self, record_last_modified: u64) -> Result<()> {
- let last_ingest = self
- .get_meta::<u64>(LAST_INGEST_META_KEY)?
- .unwrap_or_default();
+ pub fn put_last_ingest_if_newer(
+ &mut self,
+ last_ingest_key: &str,
+ record_last_modified: u64,
+ ) -> Result<()> {
+ let last_ingest = self.get_meta::<u64>(last_ingest_key)?.unwrap_or_default();
if record_last_modified > last_ingest {
- self.put_meta(LAST_INGEST_META_KEY, record_last_modified)?;
+ self.put_meta(last_ingest_key, record_last_modified)?;
}
Ok(())
@@ -1310,6 +1163,185 @@ impl<'a> SuggestDao<'a> {
}
}
+/// Helper struct to get full_keyword_ids for a suggestion
+///
+/// `FullKeywordInserter` handles repeated full keywords efficiently. The first instance will
+/// cause a row to be inserted into the database. Subsequent instances will return the same
+/// full_keyword_id.
+struct FullKeywordInserter<'a> {
+ conn: &'a Connection,
+ suggestion_id: i64,
+ last_inserted: Option<(&'a str, i64)>,
+}
+
+impl<'a> FullKeywordInserter<'a> {
+ fn new(conn: &'a Connection, suggestion_id: i64) -> Self {
+ Self {
+ conn,
+ suggestion_id,
+ last_inserted: None,
+ }
+ }
+
+ fn maybe_insert(&mut self, full_keyword: &'a str) -> rusqlite::Result<i64> {
+ match self.last_inserted {
+ Some((s, id)) if s == full_keyword => Ok(id),
+ _ => {
+ let full_keyword_id = self.conn.query_row_and_then(
+ "INSERT INTO full_keywords(
+ suggestion_id,
+ full_keyword
+ )
+ VALUES(
+ :suggestion_id,
+ :keyword
+ )
+ RETURNING id",
+ named_params! {
+ ":keyword": full_keyword,
+ ":suggestion_id": self.suggestion_id,
+ },
+ |row| row.get(0),
+ )?;
+ self.last_inserted = Some((full_keyword, full_keyword_id));
+ Ok(full_keyword_id)
+ }
+ }
+ }
+}
+
+// ======================== Statement types ========================
+//
+// During ingestion we can insert hundreds of thousands of rows. These types enable speedups by
+// allowing us to prepare a statement outside a loop and use it many times inside the loop.
+//
+// Each type wraps [Connection::prepare] and [Statement] to provide a simplified interface,
+// tailored to a specific query.
+//
+// This pattern is applicable for whenever we execute the same query repeatedly in a loop.
+// The impact scales with the number of loop iterations, which is why we currently don't do this
+// for providers like Mdn, Pocket, and Weather, which have relatively small number of records
+// compared to Amp/Wikipedia.
+
+struct SuggestionInsertStatement<'conn>(rusqlite::Statement<'conn>);
+
+impl<'conn> SuggestionInsertStatement<'conn> {
+ fn new(conn: &'conn Connection) -> Result<Self> {
+ Ok(Self(conn.prepare(
+ "INSERT INTO suggestions(
+ record_id,
+ title,
+ url,
+ score,
+ provider
+ )
+ VALUES(?, ?, ?, ?, ?)
+ RETURNING id",
+ )?))
+ }
+
+ /// Execute the insert and return the `suggestion_id` for the new row
+ fn execute(
+ &mut self,
+ record_id: &SuggestRecordId,
+ title: &str,
+ url: &str,
+ score: f64,
+ provider: SuggestionProvider,
+ ) -> Result<i64> {
+ Ok(self.0.query_row(
+ (record_id.as_str(), title, url, score, provider as u8),
+ |row| row.get(0),
+ )?)
+ }
+}
+
+struct AmpInsertStatement<'conn>(rusqlite::Statement<'conn>);
+
+impl<'conn> AmpInsertStatement<'conn> {
+ fn new(conn: &'conn Connection) -> Result<Self> {
+ Ok(Self(conn.prepare(
+ "INSERT INTO amp_custom_details(
+ suggestion_id,
+ advertiser,
+ block_id,
+ iab_category,
+ impression_url,
+ click_url,
+ icon_id
+ )
+ VALUES(?, ?, ?, ?, ?, ?, ?)
+ ",
+ )?))
+ }
+
+ fn execute(&mut self, suggestion_id: i64, amp: &DownloadedAmpSuggestion) -> Result<()> {
+ self.0.execute((
+ suggestion_id,
+ &amp.advertiser,
+ amp.block_id,
+ &amp.iab_category,
+ &amp.impression_url,
+ &amp.click_url,
+ &amp.icon_id,
+ ))?;
+ Ok(())
+ }
+}
+
+struct WikipediaInsertStatement<'conn>(rusqlite::Statement<'conn>);
+
+impl<'conn> WikipediaInsertStatement<'conn> {
+ fn new(conn: &'conn Connection) -> Result<Self> {
+ Ok(Self(conn.prepare(
+ "INSERT INTO wikipedia_custom_details(
+ suggestion_id,
+ icon_id
+ )
+ VALUES(?, ?)
+ ",
+ )?))
+ }
+
+ fn execute(
+ &mut self,
+ suggestion_id: i64,
+ wikipedia: &DownloadedWikipediaSuggestion,
+ ) -> Result<()> {
+ self.0.execute((suggestion_id, &wikipedia.icon_id))?;
+ Ok(())
+ }
+}
+
+struct KeywordInsertStatement<'conn>(rusqlite::Statement<'conn>);
+
+impl<'conn> KeywordInsertStatement<'conn> {
+ fn new(conn: &'conn Connection) -> Result<Self> {
+ Ok(Self(conn.prepare(
+ "INSERT INTO keywords(
+ suggestion_id,
+ keyword,
+ full_keyword_id,
+ rank
+ )
+ VALUES(?, ?, ?, ?)
+ ",
+ )?))
+ }
+
+ fn execute(
+ &mut self,
+ suggestion_id: i64,
+ keyword: &str,
+ full_keyword_id: Option<i64>,
+ rank: usize,
+ ) -> Result<()> {
+ self.0
+ .execute((suggestion_id, keyword, full_keyword_id, rank))?;
+ Ok(())
+ }
+}
+
fn provider_config_meta_key(provider: SuggestionProvider) -> String {
format!("{}{}", PROVIDER_CONFIG_META_KEY_PREFIX, provider as u8)
}
diff --git a/third_party/rust/suggest/src/lib.rs b/third_party/rust/suggest/src/lib.rs
index 23775b7dec..15746614d0 100644
--- a/third_party/rust/suggest/src/lib.rs
+++ b/third_party/rust/suggest/src/lib.rs
@@ -4,6 +4,8 @@
*/
use remote_settings::RemoteSettingsConfig;
+#[cfg(feature = "benchmark_api")]
+pub mod benchmarks;
mod config;
mod db;
mod error;
@@ -26,7 +28,7 @@ pub(crate) type Result<T> = std::result::Result<T, error::Error>;
pub type SuggestApiResult<T> = std::result::Result<T, error::SuggestApiError>;
/// A query for suggestions to show in the address bar.
-#[derive(Debug, Default)]
+#[derive(Clone, Debug, Default)]
pub struct SuggestionQuery {
pub keyword: String,
pub providers: Vec<SuggestionProvider>,
diff --git a/third_party/rust/suggest/src/pocket.rs b/third_party/rust/suggest/src/pocket.rs
index cf7070c62a..06fd4b012d 100644
--- a/third_party/rust/suggest/src/pocket.rs
+++ b/third_party/rust/suggest/src/pocket.rs
@@ -12,7 +12,7 @@ use rusqlite::{Result as RusqliteResult, ToSql};
/// substring for the suffix.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
#[repr(u8)]
-pub enum KeywordConfidence {
+pub(crate) enum KeywordConfidence {
Low = 0,
High = 1,
}
diff --git a/third_party/rust/suggest/src/provider.rs b/third_party/rust/suggest/src/provider.rs
index 1449c35c8a..2c0b6674cb 100644
--- a/third_party/rust/suggest/src/provider.rs
+++ b/third_party/rust/suggest/src/provider.rs
@@ -8,6 +8,8 @@ use rusqlite::{
Result as RusqliteResult,
};
+use crate::rs::SuggestRecordType;
+
/// A provider is a source of search suggestions.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
#[repr(u8)]
@@ -46,6 +48,52 @@ impl SuggestionProvider {
_ => None,
}
}
+
+ pub(crate) fn records_for_provider(&self) -> Vec<SuggestRecordType> {
+ match self {
+ SuggestionProvider::Amp => {
+ vec![
+ SuggestRecordType::AmpWikipedia,
+ SuggestRecordType::Icon,
+ SuggestRecordType::GlobalConfig,
+ ]
+ }
+ SuggestionProvider::Wikipedia => {
+ vec![
+ SuggestRecordType::AmpWikipedia,
+ SuggestRecordType::Icon,
+ SuggestRecordType::GlobalConfig,
+ ]
+ }
+ SuggestionProvider::Amo => {
+ vec![SuggestRecordType::Amo, SuggestRecordType::GlobalConfig]
+ }
+ SuggestionProvider::Pocket => {
+ vec![SuggestRecordType::Pocket, SuggestRecordType::GlobalConfig]
+ }
+ SuggestionProvider::Yelp => {
+ vec![
+ SuggestRecordType::Yelp,
+ SuggestRecordType::Icon,
+ SuggestRecordType::GlobalConfig,
+ ]
+ }
+ SuggestionProvider::Mdn => {
+ vec![SuggestRecordType::Mdn, SuggestRecordType::GlobalConfig]
+ }
+ SuggestionProvider::Weather => {
+ vec![SuggestRecordType::Weather, SuggestRecordType::GlobalConfig]
+ }
+ SuggestionProvider::AmpMobile => {
+ vec![
+ SuggestRecordType::AmpMobile,
+ SuggestRecordType::AmpWikipedia,
+ SuggestRecordType::Icon,
+ SuggestRecordType::GlobalConfig,
+ ]
+ }
+ }
+ }
}
impl ToSql for SuggestionProvider {
diff --git a/third_party/rust/suggest/src/rs.rs b/third_party/rust/suggest/src/rs.rs
index 198a8c43f6..4a733ece9d 100644
--- a/third_party/rust/suggest/src/rs.rs
+++ b/third_party/rust/suggest/src/rs.rs
@@ -31,7 +31,7 @@
//! the new suggestion in their results, and return `Suggestion::T` variants
//! as needed.
-use std::borrow::Cow;
+use std::{borrow::Cow, fmt};
use remote_settings::{GetItemsOptions, RemoteSettingsResponse};
use serde::{Deserialize, Deserializer};
@@ -47,6 +47,20 @@ pub(crate) const REMOTE_SETTINGS_COLLECTION: &str = "quicksuggest";
/// `mozilla-services/quicksuggest-rs` repo.
pub(crate) const SUGGESTIONS_PER_ATTACHMENT: u64 = 200;
+/// A list of default record types to download if nothing is specified.
+/// This currently defaults to all of the record types.
+pub(crate) const DEFAULT_RECORDS_TYPES: [SuggestRecordType; 9] = [
+ SuggestRecordType::Icon,
+ SuggestRecordType::AmpWikipedia,
+ SuggestRecordType::Amo,
+ SuggestRecordType::Pocket,
+ SuggestRecordType::Yelp,
+ SuggestRecordType::Mdn,
+ SuggestRecordType::Weather,
+ SuggestRecordType::GlobalConfig,
+ SuggestRecordType::AmpMobile,
+];
+
/// A trait for a client that downloads suggestions from Remote Settings.
///
/// This trait lets tests use a mock client.
@@ -102,6 +116,61 @@ pub(crate) enum SuggestRecord {
AmpMobile,
}
+/// Enum for the different record types that can be consumed.
+/// Extracting this from the serialization enum so that we can
+/// extend it to get type metadata.
+#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]
+pub enum SuggestRecordType {
+ Icon,
+ AmpWikipedia,
+ Amo,
+ Pocket,
+ Yelp,
+ Mdn,
+ Weather,
+ GlobalConfig,
+ AmpMobile,
+}
+
+impl From<SuggestRecord> for SuggestRecordType {
+ fn from(suggest_record: SuggestRecord) -> Self {
+ match suggest_record {
+ SuggestRecord::Amo => Self::Amo,
+ SuggestRecord::AmpWikipedia => Self::AmpWikipedia,
+ SuggestRecord::Icon => Self::Icon,
+ SuggestRecord::Mdn => Self::Mdn,
+ SuggestRecord::Pocket => Self::Pocket,
+ SuggestRecord::Weather(_) => Self::Weather,
+ SuggestRecord::Yelp => Self::Yelp,
+ SuggestRecord::GlobalConfig(_) => Self::GlobalConfig,
+ SuggestRecord::AmpMobile => Self::AmpMobile,
+ }
+ }
+}
+
+impl fmt::Display for SuggestRecordType {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ Self::Icon => write!(f, "icon"),
+ Self::AmpWikipedia => write!(f, "data"),
+ Self::Amo => write!(f, "amo-suggestions"),
+ Self::Pocket => write!(f, "pocket-suggestions"),
+ Self::Yelp => write!(f, "yelp-suggestions"),
+ Self::Mdn => write!(f, "mdn-suggestions"),
+ Self::Weather => write!(f, "weather"),
+ Self::GlobalConfig => write!(f, "configuration"),
+ Self::AmpMobile => write!(f, "amp-mobile-suggestions"),
+ }
+ }
+}
+
+impl SuggestRecordType {
+ /// Return the meta key for the last ingested record.
+ pub fn last_ingest_meta_key(&self) -> String {
+ format!("last_quicksuggest_ingest_{}", self)
+ }
+}
+
/// Represents either a single value, or a list of values. This is used to
/// deserialize downloaded attachments.
#[derive(Clone, Debug, Deserialize)]
@@ -156,16 +225,18 @@ where
}
/// Fields that are common to all downloaded suggestions.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Default, Deserialize)]
pub(crate) struct DownloadedSuggestionCommonDetails {
pub keywords: Vec<String>,
pub title: String,
pub url: String,
pub score: Option<f64>,
+ #[serde(default)]
+ pub full_keywords: Vec<(String, usize)>,
}
/// An AMP suggestion to ingest from an AMP-Wikipedia attachment.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Default, Deserialize)]
pub(crate) struct DownloadedAmpSuggestion {
#[serde(flatten)]
pub common_details: DownloadedSuggestionCommonDetails,
@@ -180,7 +251,7 @@ pub(crate) struct DownloadedAmpSuggestion {
}
/// A Wikipedia suggestion to ingest from an AMP-Wikipedia attachment.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Default, Deserialize)]
pub(crate) struct DownloadedWikipediaSuggestion {
#[serde(flatten)]
pub common_details: DownloadedSuggestionCommonDetails,
@@ -214,6 +285,34 @@ impl DownloadedAmpWikipediaSuggestion {
}
}
+impl DownloadedSuggestionCommonDetails {
+ /// Iterate over all keywords for this suggestion
+ pub fn keywords(&self) -> impl Iterator<Item = AmpKeyword<'_>> {
+ let full_keywords = self
+ .full_keywords
+ .iter()
+ .flat_map(|(full_keyword, repeat_for)| {
+ std::iter::repeat(Some(full_keyword.as_str())).take(*repeat_for)
+ })
+ .chain(std::iter::repeat(None)); // In case of insufficient full keywords, just fill in with infinite `None`s
+ //
+ self.keywords.iter().zip(full_keywords).enumerate().map(
+ move |(i, (keyword, full_keyword))| AmpKeyword {
+ rank: i,
+ keyword,
+ full_keyword,
+ },
+ )
+ }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct AmpKeyword<'a> {
+ pub rank: usize,
+ pub keyword: &'a str,
+ pub full_keyword: Option<&'a str>,
+}
+
impl<'de> Deserialize<'de> for DownloadedAmpWikipediaSuggestion {
fn deserialize<D>(
deserializer: D,
@@ -344,3 +443,119 @@ where
{
String::deserialize(deserializer).map(|s| s.parse().ok())
}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_full_keywords() {
+ let suggestion = DownloadedAmpWikipediaSuggestion::Amp(DownloadedAmpSuggestion {
+ common_details: DownloadedSuggestionCommonDetails {
+ keywords: vec![
+ String::from("f"),
+ String::from("fo"),
+ String::from("foo"),
+ String::from("foo b"),
+ String::from("foo ba"),
+ String::from("foo bar"),
+ ],
+ full_keywords: vec![(String::from("foo"), 3), (String::from("foo bar"), 3)],
+ ..DownloadedSuggestionCommonDetails::default()
+ },
+ ..DownloadedAmpSuggestion::default()
+ });
+
+ assert_eq!(
+ Vec::from_iter(suggestion.common_details().keywords()),
+ vec![
+ AmpKeyword {
+ rank: 0,
+ keyword: "f",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 1,
+ keyword: "fo",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 2,
+ keyword: "foo",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 3,
+ keyword: "foo b",
+ full_keyword: Some("foo bar"),
+ },
+ AmpKeyword {
+ rank: 4,
+ keyword: "foo ba",
+ full_keyword: Some("foo bar"),
+ },
+ AmpKeyword {
+ rank: 5,
+ keyword: "foo bar",
+ full_keyword: Some("foo bar"),
+ },
+ ],
+ );
+ }
+
+ #[test]
+ fn test_missing_full_keywords() {
+ let suggestion = DownloadedAmpWikipediaSuggestion::Amp(DownloadedAmpSuggestion {
+ common_details: DownloadedSuggestionCommonDetails {
+ keywords: vec![
+ String::from("f"),
+ String::from("fo"),
+ String::from("foo"),
+ String::from("foo b"),
+ String::from("foo ba"),
+ String::from("foo bar"),
+ ],
+ // Only the first 3 keywords have full keywords associated with them
+ full_keywords: vec![(String::from("foo"), 3)],
+ ..DownloadedSuggestionCommonDetails::default()
+ },
+ ..DownloadedAmpSuggestion::default()
+ });
+
+ assert_eq!(
+ Vec::from_iter(suggestion.common_details().keywords()),
+ vec![
+ AmpKeyword {
+ rank: 0,
+ keyword: "f",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 1,
+ keyword: "fo",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 2,
+ keyword: "foo",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 3,
+ keyword: "foo b",
+ full_keyword: None,
+ },
+ AmpKeyword {
+ rank: 4,
+ keyword: "foo ba",
+ full_keyword: None,
+ },
+ AmpKeyword {
+ rank: 5,
+ keyword: "foo bar",
+ full_keyword: None,
+ },
+ ],
+ );
+ }
+}
diff --git a/third_party/rust/suggest/src/schema.rs b/third_party/rust/suggest/src/schema.rs
index 95d987c09e..b304363de5 100644
--- a/third_party/rust/suggest/src/schema.rs
+++ b/third_party/rust/suggest/src/schema.rs
@@ -13,7 +13,9 @@ use sql_support::open_database::{self, ConnectionInitializer};
/// 1. Bump this version.
/// 2. Add a migration from the old version to the new version in
/// [`SuggestConnectionInitializer::upgrade_from`].
-pub const VERSION: u32 = 14;
+/// a. If suggestions should be re-ingested after the migration, call `clear_database()` inside
+/// the migration.
+pub const VERSION: u32 = 18;
/// The current Suggest database schema.
pub const SQL: &str = "
@@ -25,10 +27,19 @@ pub const SQL: &str = "
CREATE TABLE keywords(
keyword TEXT NOT NULL,
suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL,
rank INTEGER NOT NULL,
PRIMARY KEY (keyword, suggestion_id)
) WITHOUT ROWID;
+ -- full keywords are what we display to the user when a (partial) keyword matches
+ -- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted.
+ CREATE TABLE full_keywords(
+ id INTEGER PRIMARY KEY,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ full_keyword TEXT NOT NULL
+ );
+
CREATE TABLE prefix_keywords(
keyword_prefix TEXT NOT NULL,
keyword_suffix TEXT NOT NULL DEFAULT '',
@@ -79,7 +90,8 @@ pub const SQL: &str = "
CREATE TABLE icons(
id TEXT PRIMARY KEY,
- data BLOB NOT NULL
+ data BLOB NOT NULL,
+ mimetype TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE yelp_subjects(
@@ -111,6 +123,10 @@ pub const SQL: &str = "
description TEXT NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
+
+ CREATE TABLE dismissed_suggestions (
+ url TEXT PRIMARY KEY
+ ) WITHOUT ROWID;
";
/// Initializes an SQLite connection to the Suggest database, performing
@@ -139,15 +155,178 @@ impl ConnectionInitializer for SuggestConnectionInitializer {
Ok(db.execute_batch(SQL)?)
}
- fn upgrade_from(&self, _db: &Transaction<'_>, version: u32) -> open_database::Result<()> {
+ fn upgrade_from(&self, tx: &Transaction<'_>, version: u32) -> open_database::Result<()> {
match version {
- 1..=13 => {
+ 1..=15 => {
// Treat databases with these older schema versions as corrupt,
// so that they'll be replaced by a fresh, empty database with
// the current schema.
Err(open_database::Error::Corrupt)
}
+ 16 => {
+ tx.execute(
+ "
+ CREATE TABLE dismissed_suggestions (
+ url_hash INTEGER PRIMARY KEY
+ ) WITHOUT ROWID;",
+ (),
+ )?;
+ Ok(())
+ }
+ 17 => {
+ tx.execute(
+ "
+ DROP TABLE dismissed_suggestions;
+ CREATE TABLE dismissed_suggestions (
+ url TEXT PRIMARY KEY
+ ) WITHOUT ROWID;",
+ (),
+ )?;
+ Ok(())
+ }
_ => Err(open_database::Error::IncompatibleVersion(version)),
}
}
}
+
+/// Clears the database, removing all suggestions, icons, and metadata.
+pub fn clear_database(db: &Connection) -> rusqlite::Result<()> {
+ db.execute_batch(
+ "
+ DELETE FROM meta;
+ DELETE FROM suggestions;
+ DELETE FROM icons;
+ DELETE FROM yelp_subjects;
+ DELETE FROM yelp_modifiers;
+ DELETE FROM yelp_location_signs;
+ DELETE FROM yelp_custom_details;
+ ",
+ )
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use sql_support::open_database::test_utils::MigratedDatabaseFile;
+
+ // Snapshot of the v16 schema. We use this to test that we can migrate from there to the
+ // current schema.
+ const V16_SCHEMA: &str = r#"
+ CREATE TABLE meta(
+ key TEXT PRIMARY KEY,
+ value NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE keywords(
+ keyword TEXT NOT NULL,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL,
+ rank INTEGER NOT NULL,
+ PRIMARY KEY (keyword, suggestion_id)
+ ) WITHOUT ROWID;
+
+ -- full keywords are what we display to the user when a (partial) keyword matches
+ -- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted.
+ CREATE TABLE full_keywords(
+ id INTEGER PRIMARY KEY,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ full_keyword TEXT NOT NULL
+ );
+
+ CREATE TABLE prefix_keywords(
+ keyword_prefix TEXT NOT NULL,
+ keyword_suffix TEXT NOT NULL DEFAULT '',
+ confidence INTEGER NOT NULL DEFAULT 0,
+ rank INTEGER NOT NULL,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
+ ) WITHOUT ROWID;
+
+ CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
+
+ CREATE TABLE suggestions(
+ id INTEGER PRIMARY KEY,
+ record_id TEXT NOT NULL,
+ provider INTEGER NOT NULL,
+ title TEXT NOT NULL,
+ url TEXT NOT NULL,
+ score REAL NOT NULL
+ );
+
+ CREATE TABLE amp_custom_details(
+ suggestion_id INTEGER PRIMARY KEY,
+ advertiser TEXT NOT NULL,
+ block_id INTEGER NOT NULL,
+ iab_category TEXT NOT NULL,
+ impression_url TEXT NOT NULL,
+ click_url TEXT NOT NULL,
+ icon_id TEXT NOT NULL,
+ FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
+ );
+
+ CREATE TABLE wikipedia_custom_details(
+ suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
+ icon_id TEXT NOT NULL
+ );
+
+ CREATE TABLE amo_custom_details(
+ suggestion_id INTEGER PRIMARY KEY,
+ description TEXT NOT NULL,
+ guid TEXT NOT NULL,
+ icon_url TEXT NOT NULL,
+ rating TEXT,
+ number_of_ratings INTEGER NOT NULL,
+ FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
+ );
+
+ CREATE INDEX suggestions_record_id ON suggestions(record_id);
+
+ CREATE TABLE icons(
+ id TEXT PRIMARY KEY,
+ data BLOB NOT NULL,
+ mimetype TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_subjects(
+ keyword TEXT PRIMARY KEY,
+ record_id TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_modifiers(
+ type INTEGER NOT NULL,
+ keyword TEXT NOT NULL,
+ record_id TEXT NOT NULL,
+ PRIMARY KEY (type, keyword)
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_location_signs(
+ keyword TEXT PRIMARY KEY,
+ need_location INTEGER NOT NULL,
+ record_id TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_custom_details(
+ icon_id TEXT PRIMARY KEY,
+ score REAL NOT NULL,
+ record_id TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE mdn_custom_details(
+ suggestion_id INTEGER PRIMARY KEY,
+ description TEXT NOT NULL,
+ FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
+ );
+
+ PRAGMA user_version=16;
+"#;
+
+ /// Test running all schema upgrades from V16, which was the first schema with a "real"
+ /// migration.
+ ///
+ /// If an upgrade fails, then this test will fail with a panic.
+ #[test]
+ fn test_all_upgrades() {
+ let db_file = MigratedDatabaseFile::new(SuggestConnectionInitializer, V16_SCHEMA);
+ db_file.run_all_upgrades();
+ }
+}
diff --git a/third_party/rust/suggest/src/store.rs b/third_party/rust/suggest/src/store.rs
index e1f437e8c5..c55cffc7f5 100644
--- a/third_party/rust/suggest/src/store.rs
+++ b/third_party/rust/suggest/src/store.rs
@@ -4,7 +4,7 @@
*/
use std::{
- collections::BTreeMap,
+ collections::{BTreeMap, BTreeSet},
path::{Path, PathBuf},
sync::Arc,
};
@@ -24,13 +24,15 @@ use serde::{de::DeserializeOwned, Deserialize, Serialize};
use crate::{
config::{SuggestGlobalConfig, SuggestProviderConfig},
db::{
- ConnectionType, SuggestDao, SuggestDb, LAST_INGEST_META_KEY, UNPARSABLE_RECORDS_META_KEY,
+ ConnectionType, SuggestDao, SuggestDb, LAST_INGEST_META_UNPARSABLE,
+ UNPARSABLE_RECORDS_META_KEY,
},
error::Error,
provider::SuggestionProvider,
rs::{
- SuggestAttachment, SuggestRecord, SuggestRecordId, SuggestRemoteSettingsClient,
- REMOTE_SETTINGS_COLLECTION, SUGGESTIONS_PER_ATTACHMENT,
+ SuggestAttachment, SuggestRecord, SuggestRecordId, SuggestRecordType,
+ SuggestRemoteSettingsClient, DEFAULT_RECORDS_TYPES, REMOTE_SETTINGS_COLLECTION,
+ SUGGESTIONS_PER_ATTACHMENT,
},
schema::VERSION,
Result, SuggestApiResult, Suggestion, SuggestionQuery,
@@ -48,7 +50,6 @@ pub struct SuggestStoreBuilder(Mutex<SuggestStoreBuilderInner>);
#[derive(Default)]
struct SuggestStoreBuilderInner {
data_path: Option<String>,
- cache_path: Option<String>,
remote_settings_config: Option<RemoteSettingsConfig>,
}
@@ -68,8 +69,8 @@ impl SuggestStoreBuilder {
self
}
- pub fn cache_path(self: Arc<Self>, path: String) -> Arc<Self> {
- self.0.lock().cache_path = Some(path);
+ pub fn cache_path(self: Arc<Self>, _path: String) -> Arc<Self> {
+ // We used to use this, but we're not using it anymore, just ignore the call
self
}
@@ -85,10 +86,6 @@ impl SuggestStoreBuilder {
.data_path
.clone()
.ok_or_else(|| Error::SuggestStoreBuilder("data_path not specified".to_owned()))?;
- let cache_path = inner
- .cache_path
- .clone()
- .ok_or_else(|| Error::SuggestStoreBuilder("cache_path not specified".to_owned()))?;
let settings_client =
remote_settings::Client::new(inner.remote_settings_config.clone().unwrap_or_else(
|| RemoteSettingsConfig {
@@ -98,7 +95,7 @@ impl SuggestStoreBuilder {
},
))?;
Ok(Arc::new(SuggestStore {
- inner: SuggestStoreInner::new(data_path, cache_path, settings_client),
+ inner: SuggestStoreInner::new(data_path, settings_client),
}))
}
}
@@ -182,7 +179,7 @@ impl SuggestStore {
)?)
}()?;
Ok(Self {
- inner: SuggestStoreInner::new("".to_owned(), path.to_owned(), settings_client),
+ inner: SuggestStoreInner::new(path.to_owned(), settings_client),
})
}
@@ -192,6 +189,22 @@ impl SuggestStore {
self.inner.query(query)
}
+ /// Dismiss a suggestion
+ ///
+ /// Dismissed suggestions will not be returned again
+ ///
+ /// In the case of AMP suggestions this should be the raw URL.
+ #[handle_error(Error)]
+ pub fn dismiss_suggestion(&self, suggestion_url: String) -> SuggestApiResult<()> {
+ self.inner.dismiss_suggestion(suggestion_url)
+ }
+
+ /// Clear dismissed suggestions
+ #[handle_error(Error)]
+ pub fn clear_dismissed_suggestions(&self) -> SuggestApiResult<()> {
+ self.inner.clear_dismissed_suggestions()
+ }
+
/// Interrupts any ongoing queries.
///
/// This should be called when the user types new input into the address
@@ -238,6 +251,7 @@ pub struct SuggestIngestionConstraints {
/// Because of how suggestions are partitioned in Remote Settings, this is a
/// soft limit, and the store might ingest more than requested.
pub max_suggestions: Option<u64>,
+ pub providers: Option<Vec<SuggestionProvider>>,
}
/// The implementation of the store. This is generic over the Remote Settings
@@ -250,23 +264,14 @@ pub(crate) struct SuggestStoreInner<S> {
/// It's not currently used because not all consumers pass this in yet.
#[allow(unused)]
data_path: PathBuf,
- /// Path to the temporary SQL database.
- ///
- /// This stores things that should be deleted when the user clears their cache.
- cache_path: PathBuf,
dbs: OnceCell<SuggestStoreDbs>,
settings_client: S,
}
impl<S> SuggestStoreInner<S> {
- fn new(
- data_path: impl Into<PathBuf>,
- cache_path: impl Into<PathBuf>,
- settings_client: S,
- ) -> Self {
+ pub fn new(data_path: impl Into<PathBuf>, settings_client: S) -> Self {
Self {
data_path: data_path.into(),
- cache_path: cache_path.into(),
dbs: OnceCell::new(),
settings_client,
}
@@ -276,7 +281,7 @@ impl<S> SuggestStoreInner<S> {
/// they're not already open.
fn dbs(&self) -> Result<&SuggestStoreDbs> {
self.dbs
- .get_or_try_init(|| SuggestStoreDbs::open(&self.cache_path))
+ .get_or_try_init(|| SuggestStoreDbs::open(&self.data_path))
}
fn query(&self, query: SuggestionQuery) -> Result<Vec<Suggestion>> {
@@ -286,6 +291,17 @@ impl<S> SuggestStoreInner<S> {
self.dbs()?.reader.read(|dao| dao.fetch_suggestions(&query))
}
+ fn dismiss_suggestion(&self, suggestion_url: String) -> Result<()> {
+ self.dbs()?
+ .writer
+ .write(|dao| dao.insert_dismissal(&suggestion_url))
+ }
+
+ fn clear_dismissed_suggestions(&self) -> Result<()> {
+ self.dbs()?.writer.write(|dao| dao.clear_dismissals())?;
+ Ok(())
+ }
+
fn interrupt(&self) {
if let Some(dbs) = self.dbs.get() {
// Only interrupt if the databases are already open.
@@ -315,7 +331,7 @@ impl<S> SuggestStoreInner<S>
where
S: SuggestRemoteSettingsClient,
{
- fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> {
+ pub fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> {
let writer = &self.dbs()?.writer;
if let Some(unparsable_records) =
@@ -330,26 +346,58 @@ where
for unparsable_ids in all_unparsable_ids.chunks(UNPARSABLE_IDS_PER_REQUEST) {
let mut options = GetItemsOptions::new();
for unparsable_id in unparsable_ids {
- options.eq("id", *unparsable_id);
+ options.filter_eq("id", *unparsable_id);
}
let records_chunk = self
.settings_client
.get_records_with_options(&options)?
.records;
- self.ingest_records(writer, &records_chunk)?;
+ self.ingest_records(LAST_INGEST_META_UNPARSABLE, writer, &records_chunk)?;
}
}
+ // use std::collections::BTreeSet;
+ let ingest_record_types = if let Some(rt) = &constraints.providers {
+ rt.iter()
+ .flat_map(|x| x.records_for_provider())
+ .collect::<BTreeSet<_>>()
+ .into_iter()
+ .collect()
+ } else {
+ DEFAULT_RECORDS_TYPES.to_vec()
+ };
+
+ for ingest_record_type in ingest_record_types {
+ self.ingest_records_by_type(ingest_record_type, writer, &constraints)?;
+ }
+
+ Ok(())
+ }
+
+ fn ingest_records_by_type(
+ &self,
+ ingest_record_type: SuggestRecordType,
+ writer: &SuggestDb,
+ constraints: &SuggestIngestionConstraints,
+ ) -> Result<()> {
let mut options = GetItemsOptions::new();
+
// Remote Settings returns records in descending modification order
// (newest first), but we want them in ascending order (oldest first),
// so that we can eventually resume downloading where we left off.
options.sort("last_modified", SortOrder::Ascending);
- if let Some(last_ingest) = writer.read(|dao| dao.get_meta::<u64>(LAST_INGEST_META_KEY))? {
+
+ options.filter_eq("type", ingest_record_type.to_string());
+
+ // Get the last ingest value. This is the max of the last_ingest_keys
+ // that are in the database.
+ if let Some(last_ingest) = writer
+ .read(|dao| dao.get_meta::<u64>(ingest_record_type.last_ingest_meta_key().as_str()))?
+ {
// Only download changes since our last ingest. If our last ingest
// was interrupted, we'll pick up where we left off.
- options.gt("last_modified", last_ingest.to_string());
+ options.filter_gt("last_modified", last_ingest.to_string());
}
if let Some(max_suggestions) = constraints.max_suggestions {
@@ -363,39 +411,56 @@ where
.settings_client
.get_records_with_options(&options)?
.records;
- self.ingest_records(writer, &records)?;
-
+ self.ingest_records(&ingest_record_type.last_ingest_meta_key(), writer, &records)?;
Ok(())
}
- fn ingest_records(&self, writer: &SuggestDb, records: &[RemoteSettingsRecord]) -> Result<()> {
+ fn ingest_records(
+ &self,
+ last_ingest_key: &str,
+ writer: &SuggestDb,
+ records: &[RemoteSettingsRecord],
+ ) -> Result<()> {
for record in records {
let record_id = SuggestRecordId::from(&record.id);
if record.deleted {
// If the entire record was deleted, drop all its suggestions
// and advance the last ingest time.
- writer.write(|dao| dao.handle_deleted_record(record))?;
+ writer.write(|dao| dao.handle_deleted_record(last_ingest_key, record))?;
continue;
}
let Ok(fields) =
serde_json::from_value(serde_json::Value::Object(record.fields.clone()))
else {
// We don't recognize this record's type, so we don't know how
- // to ingest its suggestions. Record this in the meta table.
+ // to ingest its suggestions. Skip processing this record.
writer.write(|dao| dao.handle_unparsable_record(record))?;
continue;
};
match fields {
SuggestRecord::AmpWikipedia => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_amp_wikipedia_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ // TODO: Currently re-creating the last_ingest_key because using last_ingest_meta
+ // breaks the tests (particularly the unparsable functionality). So, keeping
+ // a direct reference until we remove the "unparsable" functionality.
+ &SuggestRecordType::AmpWikipedia.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_amp_wikipedia_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::AmpMobile => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_amp_mobile_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ &SuggestRecordType::AmpMobile.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_amp_mobile_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::Icon => {
let (Some(icon_id), Some(attachment)) =
@@ -404,47 +469,79 @@ where
// An icon record should have an icon ID and an
// attachment. Icons that don't have these are
// malformed, so skip to the next record.
- writer.write(|dao| dao.put_last_ingest_if_newer(record.last_modified))?;
+ writer.write(|dao| {
+ dao.put_last_ingest_if_newer(
+ &SuggestRecordType::Icon.last_ingest_meta_key(),
+ record.last_modified,
+ )
+ })?;
continue;
};
let data = self.settings_client.get_attachment(&attachment.location)?;
writer.write(|dao| {
- dao.put_icon(icon_id, &data)?;
- dao.handle_ingested_record(record)
+ dao.put_icon(icon_id, &data, &attachment.mimetype)?;
+ dao.handle_ingested_record(
+ &SuggestRecordType::Icon.last_ingest_meta_key(),
+ record,
+ )
})?;
}
SuggestRecord::Amo => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_amo_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ &SuggestRecordType::Amo.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_amo_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::Pocket => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_pocket_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ &SuggestRecordType::Pocket.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_pocket_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::Yelp => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- match suggestions.first() {
+ self.ingest_attachment(
+ &SuggestRecordType::Yelp.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| match suggestions.first() {
Some(suggestion) => dao.insert_yelp_suggestions(record_id, suggestion),
None => Ok(()),
- }
- })?;
+ },
+ )?;
}
SuggestRecord::Mdn => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_mdn_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ &SuggestRecordType::Mdn.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_mdn_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::Weather(data) => {
- self.ingest_record(writer, record, |dao, record_id| {
- dao.insert_weather_data(record_id, &data)
- })?;
+ self.ingest_record(
+ &SuggestRecordType::Weather.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id| dao.insert_weather_data(record_id, &data),
+ )?;
}
SuggestRecord::GlobalConfig(config) => {
- self.ingest_record(writer, record, |dao, _| {
- dao.put_global_config(&SuggestGlobalConfig::from(&config))
- })?;
+ self.ingest_record(
+ &SuggestRecordType::GlobalConfig.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, _| dao.put_global_config(&SuggestGlobalConfig::from(&config)),
+ )?;
}
}
}
@@ -453,6 +550,7 @@ where
fn ingest_record(
&self,
+ last_ingest_key: &str,
writer: &SuggestDb,
record: &RemoteSettingsRecord,
ingestion_handler: impl FnOnce(&mut SuggestDao<'_>, &SuggestRecordId) -> Result<()>,
@@ -469,12 +567,13 @@ where
// Ingest (or re-ingest) all data in the record.
ingestion_handler(dao, &record_id)?;
- dao.handle_ingested_record(record)
+ dao.handle_ingested_record(last_ingest_key, record)
})
}
fn ingest_attachment<T>(
&self,
+ last_ingest_key: &str,
writer: &SuggestDb,
record: &RemoteSettingsRecord,
ingestion_handler: impl FnOnce(&mut SuggestDao<'_>, &SuggestRecordId, &[T]) -> Result<()>,
@@ -486,20 +585,72 @@ where
// This method should be called only when a record is expected to
// have an attachment. If it doesn't have one, it's malformed, so
// skip to the next record.
- writer.write(|dao| dao.put_last_ingest_if_newer(record.last_modified))?;
+ writer
+ .write(|dao| dao.put_last_ingest_if_newer(last_ingest_key, record.last_modified))?;
return Ok(());
};
let attachment_data = self.settings_client.get_attachment(&attachment.location)?;
match serde_json::from_slice::<SuggestAttachment<T>>(&attachment_data) {
- Ok(attachment) => self.ingest_record(writer, record, |dao, record_id| {
- ingestion_handler(dao, record_id, attachment.suggestions())
- }),
+ Ok(attachment) => {
+ self.ingest_record(last_ingest_key, writer, record, |dao, record_id| {
+ ingestion_handler(dao, record_id, attachment.suggestions())
+ })
+ }
Err(_) => writer.write(|dao| dao.handle_unparsable_record(record)),
}
}
}
+#[cfg(feature = "benchmark_api")]
+impl<S> SuggestStoreInner<S>
+where
+ S: SuggestRemoteSettingsClient,
+{
+ pub fn into_settings_client(self) -> S {
+ self.settings_client
+ }
+
+ pub fn ensure_db_initialized(&self) {
+ self.dbs().unwrap();
+ }
+
+ pub fn benchmark_ingest_records_by_type(&self, ingest_record_type: SuggestRecordType) {
+ self.ingest_records_by_type(
+ ingest_record_type,
+ &self.dbs().unwrap().writer,
+ &SuggestIngestionConstraints::default(),
+ )
+ .unwrap()
+ }
+
+ pub fn table_row_counts(&self) -> Vec<(String, u32)> {
+ use sql_support::ConnExt;
+
+ // Note: since this is just used for debugging, use unwrap to simplify the error handling.
+ let reader = &self.dbs().unwrap().reader;
+ let conn = reader.conn.lock();
+ let table_names: Vec<String> = conn
+ .query_rows_and_then(
+ "SELECT name FROM sqlite_master where type = 'table'",
+ (),
+ |row| row.get(0),
+ )
+ .unwrap();
+ let mut table_names_with_counts: Vec<(String, u32)> = table_names
+ .into_iter()
+ .map(|name| {
+ let count: u32 = conn
+ .query_one(&format!("SELECT COUNT(*) FROM {name}"))
+ .unwrap();
+ (name, count)
+ })
+ .collect();
+ table_names_with_counts.sort_by(|a, b| (b.1.cmp(&a.1)));
+ table_names_with_counts
+ }
+}
+
/// Holds a store's open connections to the Suggest database.
struct SuggestStoreDbs {
/// A read-write connection used to update the database with new data.
@@ -549,10 +700,6 @@ mod tests {
"file:test_store_data_{}?mode=memory&cache=shared",
hex::encode(unique_suffix),
),
- format!(
- "file:test_store_cache_{}?mode=memory&cache=shared",
- hex::encode(unique_suffix),
- ),
settings_client,
)
}
@@ -721,7 +868,14 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15));
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
expect![[r#"
[
Amp {
@@ -729,6 +883,7 @@ mod tests {
url: "https://www.lph-nm.biz",
raw_url: "https://www.lph-nm.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "los",
block_id: 0,
advertiser: "Los Pollos Hermanos",
@@ -834,6 +989,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -872,6 +1030,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "penne",
block_id: 0,
advertiser: "Good Place Eats",
@@ -895,6 +1056,267 @@ mod tests {
Ok(())
}
+ #[test]
+ fn ingest_full_keywords() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "2",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "3",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-3.json",
+ "mimetype": "application/json",
+ "location": "data-3.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "4",
+ "type": "amp-mobile-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-4.json",
+ "mimetype": "application/json",
+ "location": "data-4.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ // AMP attachment with full keyword data
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "full_keywords": [
+ // Full keyword for the first 4 keywords
+ ("los pollos", 4),
+ // Full keyword for the next 2 keywords
+ ("los pollos hermanos (restaurant)", 2),
+ ],
+ "title": "Los Pollos Hermanos - Albuquerque - 1",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?
+ // AMP attachment without a full keyword
+ .with_data(
+ "data-2.json",
+ json!([{
+ "id": 1,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque - 2",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?
+ // Wikipedia attachment with full keyword data. We should ignore the full
+ // keyword data for Wikipedia suggestions
+ .with_data(
+ "data-3.json",
+ json!([{
+ "id": 2,
+ "advertiser": "Wikipedia",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque - Wiki",
+ "full_keywords": [
+ ("Los Pollos Hermanos - Albuquerque", 6),
+ ],
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "score": 0.3,
+ }]),
+ )?
+ // Amp mobile suggestion, this is essentially the same as 1, except for the SuggestionProvider
+ .with_data(
+ "data-4.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "full_keywords": [
+ // Full keyword for the first 4 keywords
+ ("los pollos", 4),
+ // Full keyword for the next 2 keywords
+ ("los pollos hermanos (restaurant)", 2),
+ ],
+ "title": "Los Pollos Hermanos - Albuquerque - 4",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ // This one should match the first full keyword for the first AMP item.
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 1",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los pollos",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 2",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los",
+ block_id: 1,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "lo".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ // This one should match the second full keyword for the first AMP item.
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 1",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los pollos hermanos (restaurant)",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 2",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los pollos hermanos",
+ block_id: 1,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "los pollos h".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ // This one matches a Wikipedia suggestion, so the full keyword should be ignored
+ expect![[r#"
+ [
+ Wikipedia {
+ title: "Los Pollos Hermanos - Albuquerque - Wiki",
+ url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los",
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "los".into(),
+ providers: vec![SuggestionProvider::Wikipedia],
+ limit: None,
+ })?);
+ // This one matches a Wikipedia suggestion, so the full keyword should be ignored
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 4",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los pollos hermanos (restaurant)",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "los pollos h".into(),
+ providers: vec![SuggestionProvider::AmpMobile],
+ limit: None,
+ })?);
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
/// Tests ingesting a data attachment containing a single suggestion,
/// instead of an array of suggestions.
#[test]
@@ -941,6 +1363,7 @@ mod tests {
url: "https://www.lasagna.restaurant",
raw_url: "https://www.lasagna.restaurant",
icon: None,
+ icon_mimetype: None,
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -1014,7 +1437,14 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(15u64));
+ assert_eq!(
+ dao.get_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15u64)
+ );
expect![[r#"
[
Amp {
@@ -1022,6 +1452,7 @@ mod tests {
url: "https://www.lasagna.restaurant",
raw_url: "https://www.lasagna.restaurant",
icon: None,
+ icon_mimetype: None,
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -1084,8 +1515,15 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
- store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(30u64));
+ store.dbs()?.reader.read(|dao: &SuggestDao<'_>| {
+ assert_eq!(
+ dao.get_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(30u64)
+ );
assert!(dao
.fetch_suggestions(&SuggestionQuery {
keyword: "la".into(),
@@ -1100,6 +1538,7 @@ mod tests {
url: "https://www.lph-nm.biz",
raw_url: "https://www.lph-nm.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "los pollos",
block_id: 0,
advertiser: "Los Pollos Hermanos",
@@ -1123,6 +1562,7 @@ mod tests {
url: "https://penne.biz",
raw_url: "https://penne.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "penne",
block_id: 0,
advertiser: "Good Place Eats",
@@ -1219,7 +1659,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(25u64));
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(25u64)
+ );
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
@@ -1259,7 +1702,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(35u64));
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(35u64)
+ );
expect![[r#"
[
Amp {
@@ -1286,6 +1732,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -1327,6 +1776,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "los",
block_id: 0,
advertiser: "Los Pollos Hermanos",
@@ -1422,7 +1874,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(15u64));
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Amo.last_ingest_meta_key().as_str())?,
+ Some(15u64)
+ );
expect![[r#"
[
@@ -1513,7 +1968,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(30u64));
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Amo.last_ingest_meta_key().as_str())?,
+ Some(30u64)
+ );
expect![[r#"
[
@@ -1648,13 +2106,24 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(20));
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
1
);
assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 1);
+ assert_eq!(
+ dao.get_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(20)
+ );
Ok(())
})?;
@@ -1674,14 +2143,16 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
0
);
assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 0);
-
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(30)
+ );
Ok(())
})?;
@@ -1717,6 +2188,7 @@ mod tests {
for (max_suggestions, expected_limit) in table {
store.ingest(SuggestIngestionConstraints {
max_suggestions: Some(max_suggestions),
+ providers: Some(vec![SuggestionProvider::Amp]),
})?;
let actual_limit = store
.settings_client
@@ -1772,7 +2244,14 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15));
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
@@ -1789,7 +2268,14 @@ mod tests {
store.clear()?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, None);
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ None
+ );
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
@@ -2080,6 +2566,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -2143,6 +2632,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "multimatch",
},
]
@@ -2199,6 +2691,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "multimatch",
},
]
@@ -2268,6 +2763,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -2349,6 +2847,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "california",
},
Wikipedia {
@@ -2370,6 +2871,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "california",
},
]
@@ -2403,6 +2907,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "california",
},
]
@@ -2614,6 +3121,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2647,6 +3157,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2680,6 +3193,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2735,6 +3251,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2779,6 +3298,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2812,6 +3334,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2856,6 +3381,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2889,6 +3417,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2933,6 +3464,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2966,6 +3500,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2999,6 +3536,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3032,6 +3572,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3076,6 +3619,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3109,6 +3655,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3186,6 +3735,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3219,6 +3771,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3252,6 +3807,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -3285,6 +3843,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -3318,6 +3879,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3362,6 +3926,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3395,6 +3962,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3428,6 +3998,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3483,6 +4056,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3516,6 +4092,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3549,6 +4128,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3593,6 +4175,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3734,6 +4319,7 @@ mod tests {
url: "https://www.lasagna.restaurant",
raw_url: "https://www.lasagna.restaurant",
icon: None,
+ icon_mimetype: None,
full_keyword: "amp wiki match",
block_id: 0,
advertiser: "Good Place Eats",
@@ -3762,6 +4348,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "amp wiki match",
},
Amp {
@@ -3769,6 +4358,7 @@ mod tests {
url: "https://penne.biz",
raw_url: "https://penne.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "amp wiki match",
block_id: 0,
advertiser: "Good Place Eats",
@@ -3801,6 +4391,7 @@ mod tests {
url: "https://www.lasagna.restaurant",
raw_url: "https://www.lasagna.restaurant",
icon: None,
+ icon_mimetype: None,
full_keyword: "amp wiki match",
block_id: 0,
advertiser: "Good Place Eats",
@@ -3829,6 +4420,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "amp wiki match",
},
]
@@ -3873,6 +4467,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "pocket wiki match",
},
Pocket {
@@ -4193,6 +4790,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -4234,6 +4834,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -4275,6 +4878,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -4304,6 +4910,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -4365,7 +4974,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(45));
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(45)
+ );
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
@@ -4400,16 +5012,19 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ assert_eq!(
+ dao.get_meta("last_quicksuggest_ingest_unparsable")?,
+ Some(30)
+ );
expect![[r#"
Some(
UnparsableRecords(
{
"clippy-2": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
"fancy-new-suggestions-1": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
},
),
@@ -4469,16 +5084,19 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ assert_eq!(
+ dao.get_meta("last_quicksuggest_ingest_unparsable")?,
+ Some(30)
+ );
expect![[r#"
Some(
UnparsableRecords(
{
"clippy-2": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
"fancy-new-suggestions-1": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
},
),
@@ -4505,13 +5123,105 @@ mod tests {
let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
store.dbs()?.writer.write(|dao| {
- dao.put_meta(LAST_INGEST_META_KEY, 30)?;
+ dao.put_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str(),
+ 30,
+ )?;
Ok(())
})?;
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(30)
+ );
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests that we only ingest providers that we're concerned with.
+ #[test]
+ fn ingest_constraints_provider() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ }, {
+ "id": "icon-1",
+ "type": "icon",
+ "last_modified": 30,
+ }]))?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.dbs()?.writer.write(|dao| {
+ // Check that existing data is updated properly.
+ dao.put_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str(),
+ 10,
+ )?;
+ Ok(())
+ })?;
+
+ let constraints = SuggestIngestionConstraints {
+ max_suggestions: Some(100),
+ providers: Some(vec![SuggestionProvider::Amp, SuggestionProvider::Pocket]),
+ };
+ store.ingest(constraints)?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(30)
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Pocket.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Amo.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Yelp.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Mdn.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::AmpMobile.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::GlobalConfig
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ None
+ );
Ok(())
})?;
@@ -4582,10 +5292,10 @@ mod tests {
UnparsableRecords(
{
"clippy-2": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
"fancy-new-suggestions-1": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
},
),
@@ -4671,7 +5381,7 @@ mod tests {
UnparsableRecords(
{
"invalid-attachment": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
},
),
@@ -4683,7 +5393,14 @@ mod tests {
// Test that the valid record was read
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15));
+ assert_eq!(
+ dao.get_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
expect![[r#"
[
Amp {
@@ -4691,6 +5408,7 @@ mod tests {
url: "https://www.lph-nm.biz",
raw_url: "https://www.lph-nm.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "los",
block_id: 0,
advertiser: "Los Pollos Hermanos",
@@ -4899,6 +5617,7 @@ mod tests {
url: "https://www.yelp.com/search?find_desc=ramen",
title: "ramen",
icon: None,
+ icon_mimetype: None,
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -5313,4 +6032,204 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn remove_dismissed_suggestions() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+
+ }, {
+ "id": "data-2",
+ "type": "amo-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-3",
+ "type": "pocket-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-3.json",
+ "mimetype": "application/json",
+ "location": "data-3.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-5",
+ "type": "mdn-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-5.json",
+ "mimetype": "application/json",
+ "location": "data-5.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-6",
+ "type": "amp-mobile-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-6.json",
+ "mimetype": "application/json",
+ "location": "data-6.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-2",
+ "type": "icon",
+ "last_modified": 20,
+ "attachment": {
+ "filename": "icon-2.png",
+ "mimetype": "image/png",
+ "location": "icon-2.png",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-3",
+ "type": "icon",
+ "last_modified": 25,
+ "attachment": {
+ "filename": "icon-3.png",
+ "mimetype": "image/png",
+ "location": "icon-3.png",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["cats"],
+ "title": "Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.31
+ }, {
+ "id": 0,
+ "advertiser": "Wikipedia",
+ "iab_category": "5 - Education",
+ "keywords": ["cats"],
+ "title": "California",
+ "url": "https://wikipedia.org/California",
+ "icon": "3"
+ }]),
+ )?
+ .with_data(
+ "data-2.json",
+ json!([
+ {
+ "description": "amo suggestion",
+ "url": "https://addons.mozilla.org/en-US/firefox/addon/example",
+ "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ "keywords": ["cats"],
+ "title": "Firefox Relay",
+ "icon": "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ "rating": "4.9",
+ "number_of_ratings": 888,
+ "score": 0.32
+ },
+ ]),
+ )?
+ .with_data(
+ "data-3.json",
+ json!([
+ {
+ "description": "pocket suggestion",
+ "url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ "lowConfidenceKeywords": [],
+ "highConfidenceKeywords": ["cats"],
+ "title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ "score": 0.33
+ },
+ ]),
+ )?
+ .with_data(
+ "data-5.json",
+ json!([
+ {
+ "description": "Javascript Array",
+ "url": "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array",
+ "keywords": ["cats"],
+ "title": "Array",
+ "score": 0.24
+ },
+ ]),
+ )?
+ .with_data(
+ "data-6.json",
+ json!([
+ {
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["cats"],
+ "title": "Mobile - Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "3",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.26
+ }
+ ]),
+ )?
+ .with_icon("icon-2.png", "i-am-an-icon".as_bytes().into())
+ .with_icon("icon-3.png", "also-an-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ // A query for cats should return all suggestions
+ let query = SuggestionQuery {
+ keyword: "cats".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ SuggestionProvider::Mdn,
+ SuggestionProvider::AmpMobile,
+ ],
+ limit: None,
+ };
+ let results = store.query(query.clone())?;
+ assert_eq!(results.len(), 6);
+
+ for result in results {
+ store.dismiss_suggestion(result.raw_url().unwrap().to_string())?;
+ }
+
+ // After dismissing the suggestions, the next query shouldn't return them
+ assert_eq!(store.query(query.clone())?.len(), 0);
+
+ // Clearing the dismissals should cause them to be returned again
+ store.clear_dismissed_suggestions()?;
+ assert_eq!(store.query(query.clone())?.len(), 6);
+
+ Ok(())
+ }
}
diff --git a/third_party/rust/suggest/src/suggest.udl b/third_party/rust/suggest/src/suggest.udl
index 1cd8911a48..4a4e3fe9a0 100644
--- a/third_party/rust/suggest/src/suggest.udl
+++ b/third_party/rust/suggest/src/suggest.udl
@@ -40,6 +40,7 @@ interface Suggestion {
string url,
string raw_url,
sequence<u8>? icon,
+ string? icon_mimetype,
string full_keyword,
i64 block_id,
string advertiser,
@@ -59,6 +60,7 @@ interface Suggestion {
string title,
string url,
sequence<u8>? icon,
+ string? icon_mimetype,
string full_keyword
);
Amo(
@@ -75,6 +77,7 @@ interface Suggestion {
string url,
string title,
sequence<u8>? icon,
+ string? icon_mimetype,
f64 score,
boolean has_location_sign,
boolean subject_exact_match,
@@ -99,6 +102,7 @@ dictionary SuggestionQuery {
dictionary SuggestIngestionConstraints {
u64? max_suggestions = null;
+ sequence<SuggestionProvider>? providers = null;
};
dictionary SuggestGlobalConfig {
@@ -119,6 +123,12 @@ interface SuggestStore {
[Throws=SuggestApiError]
sequence<Suggestion> query(SuggestionQuery query);
+ [Throws=SuggestApiError]
+ void dismiss_suggestion(string raw_suggestion_url);
+
+ [Throws=SuggestApiError]
+ void clear_dismissed_suggestions();
+
void interrupt();
[Throws=SuggestApiError]
@@ -140,6 +150,7 @@ interface SuggestStoreBuilder {
[Self=ByArc]
SuggestStoreBuilder data_path(string path);
+ // Deprecated: this is no longer used by the suggest component.
[Self=ByArc]
SuggestStoreBuilder cache_path(string path);
diff --git a/third_party/rust/suggest/src/suggestion.rs b/third_party/rust/suggest/src/suggestion.rs
index f5425e3c73..c0b45524c7 100644
--- a/third_party/rust/suggest/src/suggestion.rs
+++ b/third_party/rust/suggest/src/suggestion.rs
@@ -29,6 +29,7 @@ pub enum Suggestion {
url: String,
raw_url: String,
icon: Option<Vec<u8>>,
+ icon_mimetype: Option<String>,
full_keyword: String,
block_id: i64,
advertiser: String,
@@ -48,6 +49,7 @@ pub enum Suggestion {
title: String,
url: String,
icon: Option<Vec<u8>>,
+ icon_mimetype: Option<String>,
full_keyword: String,
},
Amo {
@@ -64,6 +66,7 @@ pub enum Suggestion {
url: String,
title: String,
icon: Option<Vec<u8>>,
+ icon_mimetype: Option<String>,
score: f64,
has_location_sign: bool,
subject_exact_match: bool,
@@ -106,6 +109,37 @@ impl Ord for Suggestion {
}
}
+impl Suggestion {
+ /// Get the URL for this suggestion, if present
+ pub fn url(&self) -> Option<&str> {
+ match self {
+ Self::Amp { url, .. }
+ | Self::Pocket { url, .. }
+ | Self::Wikipedia { url, .. }
+ | Self::Amo { url, .. }
+ | Self::Yelp { url, .. }
+ | Self::Mdn { url, .. } => Some(url),
+ _ => None,
+ }
+ }
+
+ /// Get the raw URL for this suggestion, if present
+ ///
+ /// This is the same as `url` except for Amp. In that case, `url` is the URL after being
+ /// "cooked" using template interpolation, while `raw_url` is the URL template.
+ pub fn raw_url(&self) -> Option<&str> {
+ match self {
+ Self::Amp { raw_url: url, .. }
+ | Self::Pocket { url, .. }
+ | Self::Wikipedia { url, .. }
+ | Self::Amo { url, .. }
+ | Self::Yelp { url, .. }
+ | Self::Mdn { url, .. } => Some(url),
+ _ => None,
+ }
+ }
+}
+
impl Eq for Suggestion {}
/// Replaces all template parameters in a "raw" sponsored suggestion URL,
/// producing a "cooked" URL with real values.
diff --git a/third_party/rust/suggest/src/yelp.rs b/third_party/rust/suggest/src/yelp.rs
index 2413709c67..0eb6c10d56 100644
--- a/third_party/rust/suggest/src/yelp.rs
+++ b/third_party/rust/suggest/src/yelp.rs
@@ -52,7 +52,7 @@ const SUBJECT_PREFIX_MATCH_THRESHOLD: usize = 2;
impl<'a> SuggestDao<'a> {
/// Inserts the suggestions for Yelp attachment into the database.
- pub fn insert_yelp_suggestions(
+ pub(crate) fn insert_yelp_suggestions(
&mut self,
record_id: &SuggestRecordId,
suggestion: &DownloadedYelpSuggestion,
@@ -130,7 +130,10 @@ impl<'a> SuggestDao<'a> {
}
/// Fetch Yelp suggestion from given user's query.
- pub fn fetch_yelp_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ pub(crate) fn fetch_yelp_suggestions(
+ &self,
+ query: &SuggestionQuery,
+ ) -> Result<Vec<Suggestion>> {
if !query.providers.contains(&SuggestionProvider::Yelp) {
return Ok(vec![]);
}
@@ -144,7 +147,7 @@ impl<'a> SuggestDao<'a> {
let Some((subject, subject_exact_match)) = self.find_subject(query_string)? else {
return Ok(vec![]);
};
- let (icon, score) = self.fetch_custom_details()?;
+ let (icon, icon_mimetype, score) = self.fetch_custom_details()?;
let builder = SuggestionBuilder {
subject: &subject,
subject_exact_match,
@@ -154,6 +157,7 @@ impl<'a> SuggestDao<'a> {
location: None,
need_location: false,
icon,
+ icon_mimetype,
score,
};
return Ok(vec![builder.into()]);
@@ -185,7 +189,7 @@ impl<'a> SuggestDao<'a> {
return Ok(vec![]);
};
- let (icon, score) = self.fetch_custom_details()?;
+ let (icon, icon_mimetype, score) = self.fetch_custom_details()?;
let builder = SuggestionBuilder {
subject: &subject,
subject_exact_match,
@@ -195,6 +199,7 @@ impl<'a> SuggestDao<'a> {
location,
need_location,
icon,
+ icon_mimetype,
score,
};
Ok(vec![builder.into()])
@@ -204,6 +209,7 @@ impl<'a> SuggestDao<'a> {
/// It returns the location tuple as follows:
/// (
/// Option<Vec<u8>>: Icon data. If not found, returns None.
+ /// Option<String>: Mimetype of the icon data. If not found, returns None.
/// f64: Reflects score field in the yelp_custom_details table.
/// )
///
@@ -212,11 +218,11 @@ impl<'a> SuggestDao<'a> {
/// on Remote Settings. The following query will perform a table scan against
/// `yelp_custom_details` followed by an index search against `icons`,
/// which should be fine since there is only one record in the first table.
- fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, f64)> {
+ fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, Option<String>, f64)> {
let result = self.conn.query_row_and_then_cachable(
r#"
SELECT
- i.data, y.score
+ i.data, i.mimetype, y.score
FROM
yelp_custom_details y
LEFT JOIN
@@ -226,7 +232,13 @@ impl<'a> SuggestDao<'a> {
1
"#,
(),
- |row| -> Result<_> { Ok((row.get::<_, Option<Vec<u8>>>(0)?, row.get::<_, f64>(1)?)) },
+ |row| -> Result<_> {
+ Ok((
+ row.get::<_, Option<Vec<u8>>>(0)?,
+ row.get::<_, Option<String>>(1)?,
+ row.get::<_, f64>(2)?,
+ ))
+ },
true,
)?;
@@ -439,6 +451,7 @@ struct SuggestionBuilder<'a> {
location: Option<String>,
need_location: bool,
icon: Option<Vec<u8>>,
+ icon_mimetype: Option<String>,
score: f64,
}
@@ -488,6 +501,7 @@ impl<'a> From<SuggestionBuilder<'a>> for Suggestion {
url,
title,
icon: builder.icon,
+ icon_mimetype: builder.icon_mimetype,
score: builder.score,
has_location_sign: location_modifier.is_none() && builder.location_sign.is_some(),
subject_exact_match: builder.subject_exact_match,