summaryrefslogtreecommitdiffstats
path: root/third_party/rust/suggest/src
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/suggest/src')
-rw-r--r--third_party/rust/suggest/src/benchmarks/README.md28
-rw-r--r--third_party/rust/suggest/src/benchmarks/client.rs97
-rw-r--r--third_party/rust/suggest/src/benchmarks/ingest.rs116
-rw-r--r--third_party/rust/suggest/src/benchmarks/mod.rs40
-rw-r--r--third_party/rust/suggest/src/bin/debug_ingestion_sizes.rs9
-rw-r--r--third_party/rust/suggest/src/config.rs5
-rw-r--r--third_party/rust/suggest/src/db.rs720
-rw-r--r--third_party/rust/suggest/src/lib.rs4
-rw-r--r--third_party/rust/suggest/src/pocket.rs2
-rw-r--r--third_party/rust/suggest/src/provider.rs48
-rw-r--r--third_party/rust/suggest/src/rs.rs223
-rw-r--r--third_party/rust/suggest/src/schema.rs187
-rw-r--r--third_party/rust/suggest/src/store.rs1113
-rw-r--r--third_party/rust/suggest/src/suggest.udl11
-rw-r--r--third_party/rust/suggest/src/suggestion.rs34
-rw-r--r--third_party/rust/suggest/src/yelp.rs28
16 files changed, 2207 insertions, 458 deletions
diff --git a/third_party/rust/suggest/src/benchmarks/README.md b/third_party/rust/suggest/src/benchmarks/README.md
new file mode 100644
index 0000000000..45150d8413
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/README.md
@@ -0,0 +1,28 @@
+# Suggest benchmarking code
+
+Use `cargo suggest-bench` to run these benchmarks.
+
+The main benchmarking code lives here, while the criterion integration code lives in the `benches/`
+directory.
+
+## Benchmarks
+
+### ingest-[provider-type]
+
+Time it takes to ingest all suggestions for a provider type on an empty database.
+The bechmark downloads network resources in advance in order to exclude the network request time
+from these measurements.
+
+### Benchmarks it would be nice to have
+
+- Ingestion with synthetic data. This would isolate the benchmark from changes to the RS database.
+- Fetching suggestions
+
+## cargo suggest-debug-ingestion-sizes
+
+Run this to get row counts for all database tables. This can be very useful for improving
+benchmarks, since targeting the tables with the largest number of rows will usually lead to the
+largest improvements.
+
+The command also prints out the size of all remote-settings attachments, which can be good to
+optimize on its own since it represents the amount of data user's need to download.
diff --git a/third_party/rust/suggest/src/benchmarks/client.rs b/third_party/rust/suggest/src/benchmarks/client.rs
new file mode 100644
index 0000000000..f5a21fd9cc
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/client.rs
@@ -0,0 +1,97 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::{rs::SuggestRemoteSettingsClient, Result};
+use parking_lot::Mutex;
+use remote_settings::{Client, GetItemsOptions, RemoteSettingsConfig, RemoteSettingsResponse};
+use std::collections::HashMap;
+
+/// Remotes settings client that runs during the benchmark warm-up phase.
+///
+/// This should be used to run a full ingestion.
+/// Then it can be converted into a [RemoteSettingsBenchmarkClient], which allows benchmark code to exclude the network request time.
+/// [RemoteSettingsBenchmarkClient] implements [SuggestRemoteSettingsClient] by getting data from a HashMap rather than hitting the network.
+pub struct RemoteSettingsWarmUpClient {
+ client: Client,
+ pub get_records_responses: Mutex<HashMap<GetItemsOptions, RemoteSettingsResponse>>,
+ pub get_attachment_responses: Mutex<HashMap<String, Vec<u8>>>,
+}
+
+impl RemoteSettingsWarmUpClient {
+ pub fn new() -> Self {
+ Self {
+ client: Client::new(RemoteSettingsConfig {
+ server_url: None,
+ bucket_name: None,
+ collection_name: crate::rs::REMOTE_SETTINGS_COLLECTION.into(),
+ })
+ .unwrap(),
+ get_records_responses: Mutex::new(HashMap::new()),
+ get_attachment_responses: Mutex::new(HashMap::new()),
+ }
+ }
+}
+
+impl Default for RemoteSettingsWarmUpClient {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl SuggestRemoteSettingsClient for RemoteSettingsWarmUpClient {
+ fn get_records_with_options(
+ &self,
+ options: &GetItemsOptions,
+ ) -> Result<RemoteSettingsResponse> {
+ let response = self.client.get_records_with_options(options)?;
+ self.get_records_responses
+ .lock()
+ .insert(options.clone(), response.clone());
+ Ok(response)
+ }
+
+ fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+ let response = self.client.get_attachment(location)?;
+ self.get_attachment_responses
+ .lock()
+ .insert(location.to_string(), response.clone());
+ Ok(response)
+ }
+}
+
+#[derive(Clone)]
+pub struct RemoteSettingsBenchmarkClient {
+ pub get_records_responses: HashMap<GetItemsOptions, RemoteSettingsResponse>,
+ pub get_attachment_responses: HashMap<String, Vec<u8>>,
+}
+
+impl SuggestRemoteSettingsClient for RemoteSettingsBenchmarkClient {
+ fn get_records_with_options(
+ &self,
+ options: &GetItemsOptions,
+ ) -> Result<RemoteSettingsResponse> {
+ Ok(self
+ .get_records_responses
+ .get(options)
+ .unwrap_or_else(|| panic!("options not found: {options:?}"))
+ .clone())
+ }
+
+ fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+ Ok(self
+ .get_attachment_responses
+ .get(location)
+ .unwrap_or_else(|| panic!("location not found: {location:?}"))
+ .clone())
+ }
+}
+
+impl From<RemoteSettingsWarmUpClient> for RemoteSettingsBenchmarkClient {
+ fn from(warm_up_client: RemoteSettingsWarmUpClient) -> Self {
+ Self {
+ get_records_responses: warm_up_client.get_records_responses.into_inner(),
+ get_attachment_responses: warm_up_client.get_attachment_responses.into_inner(),
+ }
+ }
+}
diff --git a/third_party/rust/suggest/src/benchmarks/ingest.rs b/third_party/rust/suggest/src/benchmarks/ingest.rs
new file mode 100644
index 0000000000..bbefc6a00a
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/ingest.rs
@@ -0,0 +1,116 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::{
+ benchmarks::{
+ client::{RemoteSettingsBenchmarkClient, RemoteSettingsWarmUpClient},
+ BenchmarkWithInput,
+ },
+ rs::SuggestRecordType,
+ store::SuggestStoreInner,
+ SuggestIngestionConstraints,
+};
+use std::sync::atomic::{AtomicU32, Ordering};
+
+static DB_FILE_COUNTER: AtomicU32 = AtomicU32::new(0);
+
+pub struct IngestBenchmark {
+ temp_dir: tempfile::TempDir,
+ client: RemoteSettingsBenchmarkClient,
+ record_type: SuggestRecordType,
+}
+
+impl IngestBenchmark {
+ pub fn new(record_type: SuggestRecordType) -> Self {
+ let temp_dir = tempfile::tempdir().unwrap();
+ let store = SuggestStoreInner::new(
+ temp_dir.path().join("warmup.sqlite"),
+ RemoteSettingsWarmUpClient::new(),
+ );
+ store.benchmark_ingest_records_by_type(record_type);
+ Self {
+ client: RemoteSettingsBenchmarkClient::from(store.into_settings_client()),
+ temp_dir,
+ record_type,
+ }
+ }
+}
+
+// The input for each benchmark is `SuggestStoreInner` with a fresh database.
+//
+// This is wrapped in a newtype so that it can be exposed in the public trait
+pub struct InputType(SuggestStoreInner<RemoteSettingsBenchmarkClient>);
+
+impl BenchmarkWithInput for IngestBenchmark {
+ type Input = InputType;
+
+ fn generate_input(&self) -> Self::Input {
+ let data_path = self.temp_dir.path().join(format!(
+ "db{}.sqlite",
+ DB_FILE_COUNTER.fetch_add(1, Ordering::Relaxed)
+ ));
+ let store = SuggestStoreInner::new(data_path, self.client.clone());
+ store.ensure_db_initialized();
+ InputType(store)
+ }
+
+ fn benchmarked_code(&self, input: Self::Input) {
+ let InputType(store) = input;
+ store.benchmark_ingest_records_by_type(self.record_type);
+ }
+}
+
+/// Get IngestBenchmark instances for all record types
+pub fn all_benchmarks() -> Vec<(&'static str, IngestBenchmark)> {
+ vec![
+ ("icon", IngestBenchmark::new(SuggestRecordType::Icon)),
+ (
+ "amp-wikipedia",
+ IngestBenchmark::new(SuggestRecordType::AmpWikipedia),
+ ),
+ ("amo", IngestBenchmark::new(SuggestRecordType::Amo)),
+ ("pocket", IngestBenchmark::new(SuggestRecordType::Pocket)),
+ ("yelp", IngestBenchmark::new(SuggestRecordType::Yelp)),
+ ("mdn", IngestBenchmark::new(SuggestRecordType::Mdn)),
+ ("weather", IngestBenchmark::new(SuggestRecordType::Weather)),
+ (
+ "global-config",
+ IngestBenchmark::new(SuggestRecordType::GlobalConfig),
+ ),
+ (
+ "amp-mobile",
+ IngestBenchmark::new(SuggestRecordType::AmpMobile),
+ ),
+ ]
+}
+
+pub fn print_debug_ingestion_sizes() {
+ viaduct_reqwest::use_reqwest_backend();
+ let store = SuggestStoreInner::new(
+ "file:debug_ingestion_sizes?mode=memory&cache=shared",
+ RemoteSettingsWarmUpClient::new(),
+ );
+ store
+ .ingest(SuggestIngestionConstraints::default())
+ .unwrap();
+ let table_row_counts = store.table_row_counts();
+ let client = store.into_settings_client();
+ let total_attachment_size: usize = client
+ .get_attachment_responses
+ .lock()
+ .values()
+ .map(|data| data.len())
+ .sum();
+
+ println!(
+ "Total attachment size: {}kb",
+ (total_attachment_size + 500) / 1000
+ );
+ println!();
+ println!("Database table row counts");
+ println!("-------------------------");
+ for (name, count) in table_row_counts {
+ println!("{name:30}: {count}");
+ }
+}
diff --git a/third_party/rust/suggest/src/benchmarks/mod.rs b/third_party/rust/suggest/src/benchmarks/mod.rs
new file mode 100644
index 0000000000..eb3b2e8abe
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/mod.rs
@@ -0,0 +1,40 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Benchmarking support
+//!
+//! Benchmarks are split up into two parts: the functions to be benchmarked live here, which the benchmarking code itself lives in `benches/bench.rs`.
+//! It's easier to write benchmarking code inside the main crate, where we have access to private items.
+//! However, it's easier to integrate with Cargo and criterion if benchmarks live in a separate crate.
+//!
+//! All benchmarks are defined as structs that implement either the [Benchmark] or [BenchmarkWithInput]
+
+pub mod client;
+pub mod ingest;
+
+/// Trait for simple benchmarks
+///
+/// This supports simple benchmarks that don't require any input. Note: global setup can be done
+/// in the `new()` method for the struct.
+pub trait Benchmark {
+ /// Perform the operations that we're benchmarking.
+ fn benchmarked_code(&self);
+}
+
+/// Trait for benchmarks that require input
+///
+/// This will run using Criterion's `iter_batched` function. Criterion will create a batch of
+/// inputs, then pass each one to benchmark.
+///
+/// This supports simple benchmarks that don't require any input. Note: global setup can be done
+/// in the `new()` method for the struct.
+pub trait BenchmarkWithInput {
+ type Input;
+
+ /// Generate the input (this is not included in the benchmark time)
+ fn generate_input(&self) -> Self::Input;
+
+ /// Perform the operations that we're benchmarking.
+ fn benchmarked_code(&self, input: Self::Input);
+}
diff --git a/third_party/rust/suggest/src/bin/debug_ingestion_sizes.rs b/third_party/rust/suggest/src/bin/debug_ingestion_sizes.rs
new file mode 100644
index 0000000000..14ca3d9462
--- /dev/null
+++ b/third_party/rust/suggest/src/bin/debug_ingestion_sizes.rs
@@ -0,0 +1,9 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use suggest::benchmarks::ingest;
+
+fn main() {
+ ingest::print_debug_ingestion_sizes()
+}
diff --git a/third_party/rust/suggest/src/config.rs b/third_party/rust/suggest/src/config.rs
index fcb3c2e256..532ac5b504 100644
--- a/third_party/rust/suggest/src/config.rs
+++ b/third_party/rust/suggest/src/config.rs
@@ -1,3 +1,8 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
use serde::{Deserialize, Serialize};
use crate::rs::{DownloadedGlobalConfig, DownloadedWeatherData};
diff --git a/third_party/rust/suggest/src/db.rs b/third_party/rust/suggest/src/db.rs
index 07fc3ab4a2..0412c50d8f 100644
--- a/third_party/rust/suggest/src/db.rs
+++ b/third_party/rust/suggest/src/db.rs
@@ -23,17 +23,17 @@ use crate::{
rs::{
DownloadedAmoSuggestion, DownloadedAmpSuggestion, DownloadedAmpWikipediaSuggestion,
DownloadedMdnSuggestion, DownloadedPocketSuggestion, DownloadedWeatherData,
- SuggestRecordId,
+ DownloadedWikipediaSuggestion, SuggestRecordId,
},
- schema::{SuggestConnectionInitializer, VERSION},
+ schema::{clear_database, SuggestConnectionInitializer, VERSION},
store::{UnparsableRecord, UnparsableRecords},
suggestion::{cook_raw_suggestion_url, AmpSuggestionType, Suggestion},
Result, SuggestionQuery,
};
-/// The metadata key whose value is the timestamp of the last record ingested
-/// from the Suggest Remote Settings collection.
-pub const LAST_INGEST_META_KEY: &str = "last_quicksuggest_ingest";
+/// The metadata key prefix for the last ingested unparsable record. These are
+/// records that were not parsed properly, or were not of the "approved" types.
+pub const LAST_INGEST_META_UNPARSABLE: &str = "last_quicksuggest_ingest_unparsable";
/// The metadata key whose value keeps track of records of suggestions
/// that aren't parsable and which schema version it was first seen in.
pub const UNPARSABLE_RECORDS_META_KEY: &str = "unparsable_records";
@@ -148,20 +148,28 @@ impl<'a> SuggestDao<'a> {
self.put_unparsable_record_id(&record_id)?;
// Advance the last fetch time, so that we can resume
// fetching after this record if we're interrupted.
- self.put_last_ingest_if_newer(record.last_modified)
+ self.put_last_ingest_if_newer(LAST_INGEST_META_UNPARSABLE, record.last_modified)
}
- pub fn handle_ingested_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> {
+ pub fn handle_ingested_record(
+ &mut self,
+ last_ingest_key: &str,
+ record: &RemoteSettingsRecord,
+ ) -> Result<()> {
let record_id = SuggestRecordId::from(&record.id);
// Remove this record's ID from the list of unparsable
// records, since we understand it now.
self.drop_unparsable_record_id(&record_id)?;
// Advance the last fetch time, so that we can resume
// fetching after this record if we're interrupted.
- self.put_last_ingest_if_newer(record.last_modified)
+ self.put_last_ingest_if_newer(last_ingest_key, record.last_modified)
}
- pub fn handle_deleted_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> {
+ pub fn handle_deleted_record(
+ &mut self,
+ last_ingest_key: &str,
+ record: &RemoteSettingsRecord,
+ ) -> Result<()> {
let record_id = SuggestRecordId::from(&record.id);
// Drop either the icon or suggestions, records only contain one or the other
match record_id.as_icon_id() {
@@ -173,7 +181,7 @@ impl<'a> SuggestDao<'a> {
self.drop_unparsable_record_id(&record_id)?;
// Advance the last fetch time, so that we can resume
// fetching after this record if we're interrupted.
- self.put_last_ingest_if_newer(record.last_modified)
+ self.put_last_ingest_if_newer(last_ingest_key, record.last_modified)
}
// =============== Low level API ===============
@@ -231,15 +239,20 @@ impl<'a> SuggestDao<'a> {
s.title,
s.url,
s.provider,
- s.score
+ s.score,
+ fk.full_keyword
FROM
suggestions s
JOIN
keywords k
ON k.suggestion_id = s.id
+ LEFT JOIN
+ full_keywords fk
+ ON k.full_keyword_id = fk.id
WHERE
s.provider = :provider
AND k.keyword = :keyword
+ AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
"#,
named_params! {
":keyword": keyword_lowercased,
@@ -248,8 +261,9 @@ impl<'a> SuggestDao<'a> {
|row| -> Result<Suggestion> {
let suggestion_id: i64 = row.get("id")?;
let title = row.get("title")?;
- let raw_url = row.get::<_, String>("url")?;
- let score = row.get::<_, f64>("score")?;
+ let raw_url: String = row.get("url")?;
+ let score: f64 = row.get("score")?;
+ let full_keyword_from_db: Option<String> = row.get("full_keyword")?;
let keywords: Vec<String> = self.conn.query_rows_and_then_cached(
r#"
@@ -277,9 +291,12 @@ impl<'a> SuggestDao<'a> {
amp.iab_category,
amp.impression_url,
amp.click_url,
- (SELECT i.data FROM icons i WHERE i.id = amp.icon_id) AS icon
+ i.data AS icon,
+ i.mimetype AS icon_mimetype
FROM
amp_custom_details amp
+ LEFT JOIN
+ icons i ON amp.icon_id = i.id
WHERE
amp.suggestion_id = :suggestion_id
"#,
@@ -298,8 +315,10 @@ impl<'a> SuggestDao<'a> {
title,
url: cooked_url,
raw_url,
- full_keyword: full_keyword(keyword_lowercased, &keywords),
+ full_keyword: full_keyword_from_db
+ .unwrap_or_else(|| full_keyword(keyword_lowercased, &keywords)),
icon: row.get("icon")?,
+ icon_mimetype: row.get("icon_mimetype")?,
impression_url: row.get("impression_url")?,
click_url: cooked_click_url,
raw_click_url,
@@ -330,6 +349,7 @@ impl<'a> SuggestDao<'a> {
WHERE
s.provider = :provider
AND k.keyword = :keyword
+ AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
"#,
named_params! {
":keyword": keyword_lowercased,
@@ -350,36 +370,51 @@ impl<'a> SuggestDao<'a> {
},
|row| row.get(0),
)?;
- let icon = self.conn.try_query_one(
- "SELECT i.data
+ let (icon, icon_mimetype) = self
+ .conn
+ .try_query_row(
+ "SELECT i.data, i.mimetype
FROM icons i
JOIN wikipedia_custom_details s ON s.icon_id = i.id
- WHERE s.suggestion_id = :suggestion_id",
- named_params! {
- ":suggestion_id": suggestion_id
- },
- true,
- )?;
+ WHERE s.suggestion_id = :suggestion_id
+ LIMIT 1",
+ named_params! {
+ ":suggestion_id": suggestion_id
+ },
+ |row| -> Result<_> {
+ Ok((
+ row.get::<_, Option<Vec<u8>>>(0)?,
+ row.get::<_, Option<String>>(1)?,
+ ))
+ },
+ true,
+ )?
+ .unwrap_or((None, None));
+
Ok(Suggestion::Wikipedia {
title,
url: raw_url,
full_keyword: full_keyword(keyword_lowercased, &keywords),
icon,
+ icon_mimetype,
})
},
)?;
Ok(suggestions)
}
- /// Fetches Suggestions of type Amo provider that match the given query
- pub fn fetch_amo_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ /// Query for suggestions using the keyword prefix and provider
+ fn map_prefix_keywords<T>(
+ &self,
+ query: &SuggestionQuery,
+ provider: &SuggestionProvider,
+ mut mapper: impl FnMut(&rusqlite::Row, &str) -> Result<T>,
+ ) -> Result<Vec<T>> {
let keyword_lowercased = &query.keyword.to_lowercase();
let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased);
- let suggestions_limit = &query.limit.unwrap_or(-1);
- let suggestions = self
- .conn
- .query_rows_and_then_cached(
- r#"
+ let suggestions_limit = query.limit.unwrap_or(-1);
+ self.conn.query_rows_and_then_cached(
+ r#"
SELECT
s.id,
MAX(k.rank) AS rank,
@@ -397,6 +432,7 @@ impl<'a> SuggestDao<'a> {
k.keyword_prefix = :keyword_prefix
AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF')
AND s.provider = :provider
+ AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
GROUP BY
s.id
ORDER BY
@@ -405,13 +441,23 @@ impl<'a> SuggestDao<'a> {
LIMIT
:suggestions_limit
"#,
- named_params! {
- ":keyword_prefix": keyword_prefix,
- ":keyword_suffix": keyword_suffix,
- ":provider": SuggestionProvider::Amo,
- ":suggestions_limit": suggestions_limit,
- },
- |row| -> Result<Option<Suggestion>> {
+ &[
+ (":keyword_prefix", &keyword_prefix as &dyn ToSql),
+ (":keyword_suffix", &keyword_suffix as &dyn ToSql),
+ (":provider", provider as &dyn ToSql),
+ (":suggestions_limit", &suggestions_limit as &dyn ToSql),
+ ],
+ |row| mapper(row, keyword_suffix),
+ )
+ }
+
+ /// Fetches Suggestions of type Amo provider that match the given query
+ pub fn fetch_amo_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ let suggestions = self
+ .map_prefix_keywords(
+ query,
+ &SuggestionProvider::Amo,
+ |row, keyword_suffix| -> Result<Option<Suggestion>> {
let suggestion_id: i64 = row.get("id")?;
let title = row.get("title")?;
let raw_url = row.get::<_, String>("url")?;
@@ -486,6 +532,7 @@ impl<'a> SuggestDao<'a> {
k.keyword_prefix = :keyword_prefix
AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF')
AND s.provider = :provider
+ AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url)
GROUP BY
s.id,
k.confidence
@@ -534,45 +581,11 @@ impl<'a> SuggestDao<'a> {
/// Fetches suggestions for MDN
pub fn fetch_mdn_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
- let keyword_lowercased = &query.keyword.to_lowercase();
- let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased);
- let suggestions_limit = &query.limit.unwrap_or(-1);
let suggestions = self
- .conn
- .query_rows_and_then_cached(
- r#"
- SELECT
- s.id,
- MAX(k.rank) AS rank,
- s.title,
- s.url,
- s.provider,
- s.score,
- k.keyword_suffix
- FROM
- suggestions s
- JOIN
- prefix_keywords k
- ON k.suggestion_id = s.id
- WHERE
- k.keyword_prefix = :keyword_prefix
- AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF')
- AND s.provider = :provider
- GROUP BY
- s.id
- ORDER BY
- s.score DESC,
- rank DESC
- LIMIT
- :suggestions_limit
- "#,
- named_params! {
- ":keyword_prefix": keyword_prefix,
- ":keyword_suffix": keyword_suffix,
- ":provider": SuggestionProvider::Mdn,
- ":suggestions_limit": suggestions_limit,
- },
- |row| -> Result<Option<Suggestion>> {
+ .map_prefix_keywords(
+ query,
+ &SuggestionProvider::Mdn,
+ |row, keyword_suffix| -> Result<Option<Suggestion>> {
let suggestion_id: i64 = row.get("id")?;
let title = row.get("title")?;
let raw_url = row.get::<_, String>("url")?;
@@ -657,35 +670,15 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedAmoSuggestion],
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- SuggestionProvider::Amo as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": suggestion.title,
- ":url": suggestion.url,
- ":score": suggestion.score,
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &suggestion.title,
+ &suggestion.url,
+ suggestion.score,
+ SuggestionProvider::Amo,
)?;
self.conn.execute(
"INSERT INTO amo_custom_details(
@@ -747,105 +740,48 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedAmpWikipediaSuggestion],
) -> Result<()> {
+ // Prepare statements outside of the loop. This results in a large performance
+ // improvement on a fresh ingest, since there are so many rows.
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
+ let mut amp_insert = AmpInsertStatement::new(self.conn)?;
+ let mut wiki_insert = WikipediaInsertStatement::new(self.conn)?;
+ let mut keyword_insert = KeywordInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
let common_details = suggestion.common_details();
let provider = suggestion.provider();
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- provider as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": common_details.title,
- ":url": common_details.url,
- ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE)
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &common_details.title,
+ &common_details.url,
+ common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
+ provider,
)?;
match suggestion {
DownloadedAmpWikipediaSuggestion::Amp(amp) => {
- self.conn.execute(
- "INSERT INTO amp_custom_details(
- suggestion_id,
- advertiser,
- block_id,
- iab_category,
- impression_url,
- click_url,
- icon_id
- )
- VALUES(
- :suggestion_id,
- :advertiser,
- :block_id,
- :iab_category,
- :impression_url,
- :click_url,
- :icon_id
- )",
- named_params! {
- ":suggestion_id": suggestion_id,
- ":advertiser": amp.advertiser,
- ":block_id": amp.block_id,
- ":iab_category": amp.iab_category,
- ":impression_url": amp.impression_url,
- ":click_url": amp.click_url,
- ":icon_id": amp.icon_id,
- },
- )?;
+ amp_insert.execute(suggestion_id, amp)?;
}
DownloadedAmpWikipediaSuggestion::Wikipedia(wikipedia) => {
- self.conn.execute(
- "INSERT INTO wikipedia_custom_details(
- suggestion_id,
- icon_id
- )
- VALUES(
- :suggestion_id,
- :icon_id
- )",
- named_params! {
- ":suggestion_id": suggestion_id,
- ":icon_id": wikipedia.icon_id,
- },
- )?;
+ wiki_insert.execute(suggestion_id, wikipedia)?;
}
}
- for (index, keyword) in common_details.keywords.iter().enumerate() {
- self.conn.execute(
- "INSERT INTO keywords(
- keyword,
- suggestion_id,
- rank
- )
- VALUES(
- :keyword,
- :suggestion_id,
- :rank
- )",
- named_params! {
- ":keyword": keyword,
- ":rank": index,
- ":suggestion_id": suggestion_id,
- },
+ let mut full_keyword_inserter = FullKeywordInserter::new(self.conn, suggestion_id);
+ for keyword in common_details.keywords() {
+ let full_keyword_id = match (suggestion, keyword.full_keyword) {
+ // Try to associate full keyword data. Only do this for AMP, we decided to
+ // skip it for Wikipedia in https://bugzilla.mozilla.org/show_bug.cgi?id=1876217
+ (DownloadedAmpWikipediaSuggestion::Amp(_), Some(full_keyword)) => {
+ Some(full_keyword_inserter.maybe_insert(full_keyword)?)
+ }
+ _ => None,
+ };
+
+ keyword_insert.execute(
+ suggestion_id,
+ keyword.keyword,
+ full_keyword_id,
+ keyword.rank,
)?;
}
}
@@ -859,84 +795,32 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedAmpSuggestion],
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
+ let mut amp_insert = AmpInsertStatement::new(self.conn)?;
+ let mut keyword_insert = KeywordInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
let common_details = &suggestion.common_details;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- SuggestionProvider::AmpMobile as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": common_details.title,
- ":url": common_details.url,
- ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE)
- },
- |row| row.get(0),
- true,
- )?;
- self.conn.execute(
- "INSERT INTO amp_custom_details(
- suggestion_id,
- advertiser,
- block_id,
- iab_category,
- impression_url,
- click_url,
- icon_id
- )
- VALUES(
- :suggestion_id,
- :advertiser,
- :block_id,
- :iab_category,
- :impression_url,
- :click_url,
- :icon_id
- )",
- named_params! {
- ":suggestion_id": suggestion_id,
- ":advertiser": suggestion.advertiser,
- ":block_id": suggestion.block_id,
- ":iab_category": suggestion.iab_category,
- ":impression_url": suggestion.impression_url,
- ":click_url": suggestion.click_url,
- ":icon_id": suggestion.icon_id,
- },
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &common_details.title,
+ &common_details.url,
+ common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
+ SuggestionProvider::AmpMobile,
)?;
+ amp_insert.execute(suggestion_id, suggestion)?;
- for (index, keyword) in common_details.keywords.iter().enumerate() {
- self.conn.execute(
- "INSERT INTO keywords(
- keyword,
- suggestion_id,
- rank
- )
- VALUES(
- :keyword,
- :suggestion_id,
- :rank
- )",
- named_params! {
- ":keyword": keyword,
- ":rank": index,
- ":suggestion_id": suggestion_id,
- },
+ let mut full_keyword_inserter = FullKeywordInserter::new(self.conn, suggestion_id);
+ for keyword in common_details.keywords() {
+ let full_keyword_id = keyword
+ .full_keyword
+ .map(|full_keyword| full_keyword_inserter.maybe_insert(full_keyword))
+ .transpose()?;
+ keyword_insert.execute(
+ suggestion_id,
+ keyword.keyword,
+ full_keyword_id,
+ keyword.rank,
)?;
}
}
@@ -950,37 +834,16 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedPocketSuggestion],
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- SuggestionProvider::Pocket as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": suggestion.title,
- ":url": suggestion.url,
- ":score": suggestion.score,
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &suggestion.title,
+ &suggestion.url,
+ suggestion.score,
+ SuggestionProvider::Pocket,
)?;
-
for ((rank, keyword), confidence) in suggestion
.high_confidence_keywords
.iter()
@@ -1030,35 +893,15 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
suggestions: &[DownloadedMdnSuggestion],
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
for suggestion in suggestions {
self.scope.err_if_interrupted()?;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(
- record_id,
- provider,
- title,
- url,
- score
- )
- VALUES(
- :record_id,
- {},
- :title,
- :url,
- :score
- )
- RETURNING id",
- SuggestionProvider::Mdn as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":title": suggestion.title,
- ":url": suggestion.url,
- ":score": suggestion.score,
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ &suggestion.title,
+ &suggestion.url,
+ suggestion.score,
+ SuggestionProvider::Mdn,
)?;
self.conn.execute_cached(
"INSERT INTO mdn_custom_details(
@@ -1107,20 +950,14 @@ impl<'a> SuggestDao<'a> {
record_id: &SuggestRecordId,
data: &DownloadedWeatherData,
) -> Result<()> {
+ let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
self.scope.err_if_interrupted()?;
- let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
- &format!(
- "INSERT INTO suggestions(record_id, provider, title, url, score)
- VALUES(:record_id, {}, '', '', :score)
- RETURNING id",
- SuggestionProvider::Weather as u8
- ),
- named_params! {
- ":record_id": record_id.as_str(),
- ":score": data.weather.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
- },
- |row| row.get(0),
- true,
+ let suggestion_id = suggestion_insert.execute(
+ record_id,
+ "",
+ "",
+ data.weather.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
+ SuggestionProvider::Weather,
)?;
for (index, keyword) in data.weather.keywords.iter().enumerate() {
self.conn.execute(
@@ -1141,24 +978,43 @@ impl<'a> SuggestDao<'a> {
}
/// Inserts or replaces an icon for a suggestion into the database.
- pub fn put_icon(&mut self, icon_id: &str, data: &[u8]) -> Result<()> {
+ pub fn put_icon(&mut self, icon_id: &str, data: &[u8], mimetype: &str) -> Result<()> {
self.conn.execute(
"INSERT OR REPLACE INTO icons(
id,
- data
+ data,
+ mimetype
)
VALUES(
:id,
- :data
+ :data,
+ :mimetype
)",
named_params! {
":id": icon_id,
":data": data,
+ ":mimetype": mimetype,
},
)?;
Ok(())
}
+ pub fn insert_dismissal(&self, url: &str) -> Result<()> {
+ self.conn.execute(
+ "INSERT OR IGNORE INTO dismissed_suggestions(url)
+ VALUES(:url)",
+ named_params! {
+ ":url": url,
+ },
+ )?;
+ Ok(())
+ }
+
+ pub fn clear_dismissals(&self) -> Result<()> {
+ self.conn.execute("DELETE FROM dismissed_suggestions", ())?;
+ Ok(())
+ }
+
/// Deletes all suggestions associated with a Remote Settings record from
/// the database.
pub fn drop_suggestions(&mut self, record_id: &SuggestRecordId) -> Result<()> {
@@ -1196,12 +1052,7 @@ impl<'a> SuggestDao<'a> {
/// Clears the database, removing all suggestions, icons, and metadata.
pub fn clear(&mut self) -> Result<()> {
- self.conn.execute_batch(
- "DELETE FROM suggestions;
- DELETE FROM icons;
- DELETE FROM meta;",
- )?;
- Ok(())
+ Ok(clear_database(self.conn)?)
}
/// Returns the value associated with a metadata key.
@@ -1224,12 +1075,14 @@ impl<'a> SuggestDao<'a> {
/// Updates the last ingest timestamp if the given last modified time is
/// newer than the existing one recorded.
- pub fn put_last_ingest_if_newer(&mut self, record_last_modified: u64) -> Result<()> {
- let last_ingest = self
- .get_meta::<u64>(LAST_INGEST_META_KEY)?
- .unwrap_or_default();
+ pub fn put_last_ingest_if_newer(
+ &mut self,
+ last_ingest_key: &str,
+ record_last_modified: u64,
+ ) -> Result<()> {
+ let last_ingest = self.get_meta::<u64>(last_ingest_key)?.unwrap_or_default();
if record_last_modified > last_ingest {
- self.put_meta(LAST_INGEST_META_KEY, record_last_modified)?;
+ self.put_meta(last_ingest_key, record_last_modified)?;
}
Ok(())
@@ -1310,6 +1163,185 @@ impl<'a> SuggestDao<'a> {
}
}
+/// Helper struct to get full_keyword_ids for a suggestion
+///
+/// `FullKeywordInserter` handles repeated full keywords efficiently. The first instance will
+/// cause a row to be inserted into the database. Subsequent instances will return the same
+/// full_keyword_id.
+struct FullKeywordInserter<'a> {
+ conn: &'a Connection,
+ suggestion_id: i64,
+ last_inserted: Option<(&'a str, i64)>,
+}
+
+impl<'a> FullKeywordInserter<'a> {
+ fn new(conn: &'a Connection, suggestion_id: i64) -> Self {
+ Self {
+ conn,
+ suggestion_id,
+ last_inserted: None,
+ }
+ }
+
+ fn maybe_insert(&mut self, full_keyword: &'a str) -> rusqlite::Result<i64> {
+ match self.last_inserted {
+ Some((s, id)) if s == full_keyword => Ok(id),
+ _ => {
+ let full_keyword_id = self.conn.query_row_and_then(
+ "INSERT INTO full_keywords(
+ suggestion_id,
+ full_keyword
+ )
+ VALUES(
+ :suggestion_id,
+ :keyword
+ )
+ RETURNING id",
+ named_params! {
+ ":keyword": full_keyword,
+ ":suggestion_id": self.suggestion_id,
+ },
+ |row| row.get(0),
+ )?;
+ self.last_inserted = Some((full_keyword, full_keyword_id));
+ Ok(full_keyword_id)
+ }
+ }
+ }
+}
+
+// ======================== Statement types ========================
+//
+// During ingestion we can insert hundreds of thousands of rows. These types enable speedups by
+// allowing us to prepare a statement outside a loop and use it many times inside the loop.
+//
+// Each type wraps [Connection::prepare] and [Statement] to provide a simplified interface,
+// tailored to a specific query.
+//
+// This pattern is applicable for whenever we execute the same query repeatedly in a loop.
+// The impact scales with the number of loop iterations, which is why we currently don't do this
+// for providers like Mdn, Pocket, and Weather, which have relatively small number of records
+// compared to Amp/Wikipedia.
+
+struct SuggestionInsertStatement<'conn>(rusqlite::Statement<'conn>);
+
+impl<'conn> SuggestionInsertStatement<'conn> {
+ fn new(conn: &'conn Connection) -> Result<Self> {
+ Ok(Self(conn.prepare(
+ "INSERT INTO suggestions(
+ record_id,
+ title,
+ url,
+ score,
+ provider
+ )
+ VALUES(?, ?, ?, ?, ?)
+ RETURNING id",
+ )?))
+ }
+
+ /// Execute the insert and return the `suggestion_id` for the new row
+ fn execute(
+ &mut self,
+ record_id: &SuggestRecordId,
+ title: &str,
+ url: &str,
+ score: f64,
+ provider: SuggestionProvider,
+ ) -> Result<i64> {
+ Ok(self.0.query_row(
+ (record_id.as_str(), title, url, score, provider as u8),
+ |row| row.get(0),
+ )?)
+ }
+}
+
+struct AmpInsertStatement<'conn>(rusqlite::Statement<'conn>);
+
+impl<'conn> AmpInsertStatement<'conn> {
+ fn new(conn: &'conn Connection) -> Result<Self> {
+ Ok(Self(conn.prepare(
+ "INSERT INTO amp_custom_details(
+ suggestion_id,
+ advertiser,
+ block_id,
+ iab_category,
+ impression_url,
+ click_url,
+ icon_id
+ )
+ VALUES(?, ?, ?, ?, ?, ?, ?)
+ ",
+ )?))
+ }
+
+ fn execute(&mut self, suggestion_id: i64, amp: &DownloadedAmpSuggestion) -> Result<()> {
+ self.0.execute((
+ suggestion_id,
+ &amp.advertiser,
+ amp.block_id,
+ &amp.iab_category,
+ &amp.impression_url,
+ &amp.click_url,
+ &amp.icon_id,
+ ))?;
+ Ok(())
+ }
+}
+
+struct WikipediaInsertStatement<'conn>(rusqlite::Statement<'conn>);
+
+impl<'conn> WikipediaInsertStatement<'conn> {
+ fn new(conn: &'conn Connection) -> Result<Self> {
+ Ok(Self(conn.prepare(
+ "INSERT INTO wikipedia_custom_details(
+ suggestion_id,
+ icon_id
+ )
+ VALUES(?, ?)
+ ",
+ )?))
+ }
+
+ fn execute(
+ &mut self,
+ suggestion_id: i64,
+ wikipedia: &DownloadedWikipediaSuggestion,
+ ) -> Result<()> {
+ self.0.execute((suggestion_id, &wikipedia.icon_id))?;
+ Ok(())
+ }
+}
+
+struct KeywordInsertStatement<'conn>(rusqlite::Statement<'conn>);
+
+impl<'conn> KeywordInsertStatement<'conn> {
+ fn new(conn: &'conn Connection) -> Result<Self> {
+ Ok(Self(conn.prepare(
+ "INSERT INTO keywords(
+ suggestion_id,
+ keyword,
+ full_keyword_id,
+ rank
+ )
+ VALUES(?, ?, ?, ?)
+ ",
+ )?))
+ }
+
+ fn execute(
+ &mut self,
+ suggestion_id: i64,
+ keyword: &str,
+ full_keyword_id: Option<i64>,
+ rank: usize,
+ ) -> Result<()> {
+ self.0
+ .execute((suggestion_id, keyword, full_keyword_id, rank))?;
+ Ok(())
+ }
+}
+
fn provider_config_meta_key(provider: SuggestionProvider) -> String {
format!("{}{}", PROVIDER_CONFIG_META_KEY_PREFIX, provider as u8)
}
diff --git a/third_party/rust/suggest/src/lib.rs b/third_party/rust/suggest/src/lib.rs
index 23775b7dec..15746614d0 100644
--- a/third_party/rust/suggest/src/lib.rs
+++ b/third_party/rust/suggest/src/lib.rs
@@ -4,6 +4,8 @@
*/
use remote_settings::RemoteSettingsConfig;
+#[cfg(feature = "benchmark_api")]
+pub mod benchmarks;
mod config;
mod db;
mod error;
@@ -26,7 +28,7 @@ pub(crate) type Result<T> = std::result::Result<T, error::Error>;
pub type SuggestApiResult<T> = std::result::Result<T, error::SuggestApiError>;
/// A query for suggestions to show in the address bar.
-#[derive(Debug, Default)]
+#[derive(Clone, Debug, Default)]
pub struct SuggestionQuery {
pub keyword: String,
pub providers: Vec<SuggestionProvider>,
diff --git a/third_party/rust/suggest/src/pocket.rs b/third_party/rust/suggest/src/pocket.rs
index cf7070c62a..06fd4b012d 100644
--- a/third_party/rust/suggest/src/pocket.rs
+++ b/third_party/rust/suggest/src/pocket.rs
@@ -12,7 +12,7 @@ use rusqlite::{Result as RusqliteResult, ToSql};
/// substring for the suffix.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
#[repr(u8)]
-pub enum KeywordConfidence {
+pub(crate) enum KeywordConfidence {
Low = 0,
High = 1,
}
diff --git a/third_party/rust/suggest/src/provider.rs b/third_party/rust/suggest/src/provider.rs
index 1449c35c8a..2c0b6674cb 100644
--- a/third_party/rust/suggest/src/provider.rs
+++ b/third_party/rust/suggest/src/provider.rs
@@ -8,6 +8,8 @@ use rusqlite::{
Result as RusqliteResult,
};
+use crate::rs::SuggestRecordType;
+
/// A provider is a source of search suggestions.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
#[repr(u8)]
@@ -46,6 +48,52 @@ impl SuggestionProvider {
_ => None,
}
}
+
+ pub(crate) fn records_for_provider(&self) -> Vec<SuggestRecordType> {
+ match self {
+ SuggestionProvider::Amp => {
+ vec![
+ SuggestRecordType::AmpWikipedia,
+ SuggestRecordType::Icon,
+ SuggestRecordType::GlobalConfig,
+ ]
+ }
+ SuggestionProvider::Wikipedia => {
+ vec![
+ SuggestRecordType::AmpWikipedia,
+ SuggestRecordType::Icon,
+ SuggestRecordType::GlobalConfig,
+ ]
+ }
+ SuggestionProvider::Amo => {
+ vec![SuggestRecordType::Amo, SuggestRecordType::GlobalConfig]
+ }
+ SuggestionProvider::Pocket => {
+ vec![SuggestRecordType::Pocket, SuggestRecordType::GlobalConfig]
+ }
+ SuggestionProvider::Yelp => {
+ vec![
+ SuggestRecordType::Yelp,
+ SuggestRecordType::Icon,
+ SuggestRecordType::GlobalConfig,
+ ]
+ }
+ SuggestionProvider::Mdn => {
+ vec![SuggestRecordType::Mdn, SuggestRecordType::GlobalConfig]
+ }
+ SuggestionProvider::Weather => {
+ vec![SuggestRecordType::Weather, SuggestRecordType::GlobalConfig]
+ }
+ SuggestionProvider::AmpMobile => {
+ vec![
+ SuggestRecordType::AmpMobile,
+ SuggestRecordType::AmpWikipedia,
+ SuggestRecordType::Icon,
+ SuggestRecordType::GlobalConfig,
+ ]
+ }
+ }
+ }
}
impl ToSql for SuggestionProvider {
diff --git a/third_party/rust/suggest/src/rs.rs b/third_party/rust/suggest/src/rs.rs
index 198a8c43f6..4a733ece9d 100644
--- a/third_party/rust/suggest/src/rs.rs
+++ b/third_party/rust/suggest/src/rs.rs
@@ -31,7 +31,7 @@
//! the new suggestion in their results, and return `Suggestion::T` variants
//! as needed.
-use std::borrow::Cow;
+use std::{borrow::Cow, fmt};
use remote_settings::{GetItemsOptions, RemoteSettingsResponse};
use serde::{Deserialize, Deserializer};
@@ -47,6 +47,20 @@ pub(crate) const REMOTE_SETTINGS_COLLECTION: &str = "quicksuggest";
/// `mozilla-services/quicksuggest-rs` repo.
pub(crate) const SUGGESTIONS_PER_ATTACHMENT: u64 = 200;
+/// A list of default record types to download if nothing is specified.
+/// This currently defaults to all of the record types.
+pub(crate) const DEFAULT_RECORDS_TYPES: [SuggestRecordType; 9] = [
+ SuggestRecordType::Icon,
+ SuggestRecordType::AmpWikipedia,
+ SuggestRecordType::Amo,
+ SuggestRecordType::Pocket,
+ SuggestRecordType::Yelp,
+ SuggestRecordType::Mdn,
+ SuggestRecordType::Weather,
+ SuggestRecordType::GlobalConfig,
+ SuggestRecordType::AmpMobile,
+];
+
/// A trait for a client that downloads suggestions from Remote Settings.
///
/// This trait lets tests use a mock client.
@@ -102,6 +116,61 @@ pub(crate) enum SuggestRecord {
AmpMobile,
}
+/// Enum for the different record types that can be consumed.
+/// Extracting this from the serialization enum so that we can
+/// extend it to get type metadata.
+#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]
+pub enum SuggestRecordType {
+ Icon,
+ AmpWikipedia,
+ Amo,
+ Pocket,
+ Yelp,
+ Mdn,
+ Weather,
+ GlobalConfig,
+ AmpMobile,
+}
+
+impl From<SuggestRecord> for SuggestRecordType {
+ fn from(suggest_record: SuggestRecord) -> Self {
+ match suggest_record {
+ SuggestRecord::Amo => Self::Amo,
+ SuggestRecord::AmpWikipedia => Self::AmpWikipedia,
+ SuggestRecord::Icon => Self::Icon,
+ SuggestRecord::Mdn => Self::Mdn,
+ SuggestRecord::Pocket => Self::Pocket,
+ SuggestRecord::Weather(_) => Self::Weather,
+ SuggestRecord::Yelp => Self::Yelp,
+ SuggestRecord::GlobalConfig(_) => Self::GlobalConfig,
+ SuggestRecord::AmpMobile => Self::AmpMobile,
+ }
+ }
+}
+
+impl fmt::Display for SuggestRecordType {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ Self::Icon => write!(f, "icon"),
+ Self::AmpWikipedia => write!(f, "data"),
+ Self::Amo => write!(f, "amo-suggestions"),
+ Self::Pocket => write!(f, "pocket-suggestions"),
+ Self::Yelp => write!(f, "yelp-suggestions"),
+ Self::Mdn => write!(f, "mdn-suggestions"),
+ Self::Weather => write!(f, "weather"),
+ Self::GlobalConfig => write!(f, "configuration"),
+ Self::AmpMobile => write!(f, "amp-mobile-suggestions"),
+ }
+ }
+}
+
+impl SuggestRecordType {
+ /// Return the meta key for the last ingested record.
+ pub fn last_ingest_meta_key(&self) -> String {
+ format!("last_quicksuggest_ingest_{}", self)
+ }
+}
+
/// Represents either a single value, or a list of values. This is used to
/// deserialize downloaded attachments.
#[derive(Clone, Debug, Deserialize)]
@@ -156,16 +225,18 @@ where
}
/// Fields that are common to all downloaded suggestions.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Default, Deserialize)]
pub(crate) struct DownloadedSuggestionCommonDetails {
pub keywords: Vec<String>,
pub title: String,
pub url: String,
pub score: Option<f64>,
+ #[serde(default)]
+ pub full_keywords: Vec<(String, usize)>,
}
/// An AMP suggestion to ingest from an AMP-Wikipedia attachment.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Default, Deserialize)]
pub(crate) struct DownloadedAmpSuggestion {
#[serde(flatten)]
pub common_details: DownloadedSuggestionCommonDetails,
@@ -180,7 +251,7 @@ pub(crate) struct DownloadedAmpSuggestion {
}
/// A Wikipedia suggestion to ingest from an AMP-Wikipedia attachment.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Default, Deserialize)]
pub(crate) struct DownloadedWikipediaSuggestion {
#[serde(flatten)]
pub common_details: DownloadedSuggestionCommonDetails,
@@ -214,6 +285,34 @@ impl DownloadedAmpWikipediaSuggestion {
}
}
+impl DownloadedSuggestionCommonDetails {
+ /// Iterate over all keywords for this suggestion
+ pub fn keywords(&self) -> impl Iterator<Item = AmpKeyword<'_>> {
+ let full_keywords = self
+ .full_keywords
+ .iter()
+ .flat_map(|(full_keyword, repeat_for)| {
+ std::iter::repeat(Some(full_keyword.as_str())).take(*repeat_for)
+ })
+ .chain(std::iter::repeat(None)); // In case of insufficient full keywords, just fill in with infinite `None`s
+ //
+ self.keywords.iter().zip(full_keywords).enumerate().map(
+ move |(i, (keyword, full_keyword))| AmpKeyword {
+ rank: i,
+ keyword,
+ full_keyword,
+ },
+ )
+ }
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub(crate) struct AmpKeyword<'a> {
+ pub rank: usize,
+ pub keyword: &'a str,
+ pub full_keyword: Option<&'a str>,
+}
+
impl<'de> Deserialize<'de> for DownloadedAmpWikipediaSuggestion {
fn deserialize<D>(
deserializer: D,
@@ -344,3 +443,119 @@ where
{
String::deserialize(deserializer).map(|s| s.parse().ok())
}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn test_full_keywords() {
+ let suggestion = DownloadedAmpWikipediaSuggestion::Amp(DownloadedAmpSuggestion {
+ common_details: DownloadedSuggestionCommonDetails {
+ keywords: vec![
+ String::from("f"),
+ String::from("fo"),
+ String::from("foo"),
+ String::from("foo b"),
+ String::from("foo ba"),
+ String::from("foo bar"),
+ ],
+ full_keywords: vec![(String::from("foo"), 3), (String::from("foo bar"), 3)],
+ ..DownloadedSuggestionCommonDetails::default()
+ },
+ ..DownloadedAmpSuggestion::default()
+ });
+
+ assert_eq!(
+ Vec::from_iter(suggestion.common_details().keywords()),
+ vec![
+ AmpKeyword {
+ rank: 0,
+ keyword: "f",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 1,
+ keyword: "fo",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 2,
+ keyword: "foo",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 3,
+ keyword: "foo b",
+ full_keyword: Some("foo bar"),
+ },
+ AmpKeyword {
+ rank: 4,
+ keyword: "foo ba",
+ full_keyword: Some("foo bar"),
+ },
+ AmpKeyword {
+ rank: 5,
+ keyword: "foo bar",
+ full_keyword: Some("foo bar"),
+ },
+ ],
+ );
+ }
+
+ #[test]
+ fn test_missing_full_keywords() {
+ let suggestion = DownloadedAmpWikipediaSuggestion::Amp(DownloadedAmpSuggestion {
+ common_details: DownloadedSuggestionCommonDetails {
+ keywords: vec![
+ String::from("f"),
+ String::from("fo"),
+ String::from("foo"),
+ String::from("foo b"),
+ String::from("foo ba"),
+ String::from("foo bar"),
+ ],
+ // Only the first 3 keywords have full keywords associated with them
+ full_keywords: vec![(String::from("foo"), 3)],
+ ..DownloadedSuggestionCommonDetails::default()
+ },
+ ..DownloadedAmpSuggestion::default()
+ });
+
+ assert_eq!(
+ Vec::from_iter(suggestion.common_details().keywords()),
+ vec![
+ AmpKeyword {
+ rank: 0,
+ keyword: "f",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 1,
+ keyword: "fo",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 2,
+ keyword: "foo",
+ full_keyword: Some("foo"),
+ },
+ AmpKeyword {
+ rank: 3,
+ keyword: "foo b",
+ full_keyword: None,
+ },
+ AmpKeyword {
+ rank: 4,
+ keyword: "foo ba",
+ full_keyword: None,
+ },
+ AmpKeyword {
+ rank: 5,
+ keyword: "foo bar",
+ full_keyword: None,
+ },
+ ],
+ );
+ }
+}
diff --git a/third_party/rust/suggest/src/schema.rs b/third_party/rust/suggest/src/schema.rs
index 95d987c09e..b304363de5 100644
--- a/third_party/rust/suggest/src/schema.rs
+++ b/third_party/rust/suggest/src/schema.rs
@@ -13,7 +13,9 @@ use sql_support::open_database::{self, ConnectionInitializer};
/// 1. Bump this version.
/// 2. Add a migration from the old version to the new version in
/// [`SuggestConnectionInitializer::upgrade_from`].
-pub const VERSION: u32 = 14;
+/// a. If suggestions should be re-ingested after the migration, call `clear_database()` inside
+/// the migration.
+pub const VERSION: u32 = 18;
/// The current Suggest database schema.
pub const SQL: &str = "
@@ -25,10 +27,19 @@ pub const SQL: &str = "
CREATE TABLE keywords(
keyword TEXT NOT NULL,
suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL,
rank INTEGER NOT NULL,
PRIMARY KEY (keyword, suggestion_id)
) WITHOUT ROWID;
+ -- full keywords are what we display to the user when a (partial) keyword matches
+ -- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted.
+ CREATE TABLE full_keywords(
+ id INTEGER PRIMARY KEY,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ full_keyword TEXT NOT NULL
+ );
+
CREATE TABLE prefix_keywords(
keyword_prefix TEXT NOT NULL,
keyword_suffix TEXT NOT NULL DEFAULT '',
@@ -79,7 +90,8 @@ pub const SQL: &str = "
CREATE TABLE icons(
id TEXT PRIMARY KEY,
- data BLOB NOT NULL
+ data BLOB NOT NULL,
+ mimetype TEXT NOT NULL
) WITHOUT ROWID;
CREATE TABLE yelp_subjects(
@@ -111,6 +123,10 @@ pub const SQL: &str = "
description TEXT NOT NULL,
FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
);
+
+ CREATE TABLE dismissed_suggestions (
+ url TEXT PRIMARY KEY
+ ) WITHOUT ROWID;
";
/// Initializes an SQLite connection to the Suggest database, performing
@@ -139,15 +155,178 @@ impl ConnectionInitializer for SuggestConnectionInitializer {
Ok(db.execute_batch(SQL)?)
}
- fn upgrade_from(&self, _db: &Transaction<'_>, version: u32) -> open_database::Result<()> {
+ fn upgrade_from(&self, tx: &Transaction<'_>, version: u32) -> open_database::Result<()> {
match version {
- 1..=13 => {
+ 1..=15 => {
// Treat databases with these older schema versions as corrupt,
// so that they'll be replaced by a fresh, empty database with
// the current schema.
Err(open_database::Error::Corrupt)
}
+ 16 => {
+ tx.execute(
+ "
+ CREATE TABLE dismissed_suggestions (
+ url_hash INTEGER PRIMARY KEY
+ ) WITHOUT ROWID;",
+ (),
+ )?;
+ Ok(())
+ }
+ 17 => {
+ tx.execute(
+ "
+ DROP TABLE dismissed_suggestions;
+ CREATE TABLE dismissed_suggestions (
+ url TEXT PRIMARY KEY
+ ) WITHOUT ROWID;",
+ (),
+ )?;
+ Ok(())
+ }
_ => Err(open_database::Error::IncompatibleVersion(version)),
}
}
}
+
+/// Clears the database, removing all suggestions, icons, and metadata.
+pub fn clear_database(db: &Connection) -> rusqlite::Result<()> {
+ db.execute_batch(
+ "
+ DELETE FROM meta;
+ DELETE FROM suggestions;
+ DELETE FROM icons;
+ DELETE FROM yelp_subjects;
+ DELETE FROM yelp_modifiers;
+ DELETE FROM yelp_location_signs;
+ DELETE FROM yelp_custom_details;
+ ",
+ )
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use sql_support::open_database::test_utils::MigratedDatabaseFile;
+
+ // Snapshot of the v16 schema. We use this to test that we can migrate from there to the
+ // current schema.
+ const V16_SCHEMA: &str = r#"
+ CREATE TABLE meta(
+ key TEXT PRIMARY KEY,
+ value NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE keywords(
+ keyword TEXT NOT NULL,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL,
+ rank INTEGER NOT NULL,
+ PRIMARY KEY (keyword, suggestion_id)
+ ) WITHOUT ROWID;
+
+ -- full keywords are what we display to the user when a (partial) keyword matches
+ -- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted.
+ CREATE TABLE full_keywords(
+ id INTEGER PRIMARY KEY,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ full_keyword TEXT NOT NULL
+ );
+
+ CREATE TABLE prefix_keywords(
+ keyword_prefix TEXT NOT NULL,
+ keyword_suffix TEXT NOT NULL DEFAULT '',
+ confidence INTEGER NOT NULL DEFAULT 0,
+ rank INTEGER NOT NULL,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
+ ) WITHOUT ROWID;
+
+ CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
+
+ CREATE TABLE suggestions(
+ id INTEGER PRIMARY KEY,
+ record_id TEXT NOT NULL,
+ provider INTEGER NOT NULL,
+ title TEXT NOT NULL,
+ url TEXT NOT NULL,
+ score REAL NOT NULL
+ );
+
+ CREATE TABLE amp_custom_details(
+ suggestion_id INTEGER PRIMARY KEY,
+ advertiser TEXT NOT NULL,
+ block_id INTEGER NOT NULL,
+ iab_category TEXT NOT NULL,
+ impression_url TEXT NOT NULL,
+ click_url TEXT NOT NULL,
+ icon_id TEXT NOT NULL,
+ FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
+ );
+
+ CREATE TABLE wikipedia_custom_details(
+ suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
+ icon_id TEXT NOT NULL
+ );
+
+ CREATE TABLE amo_custom_details(
+ suggestion_id INTEGER PRIMARY KEY,
+ description TEXT NOT NULL,
+ guid TEXT NOT NULL,
+ icon_url TEXT NOT NULL,
+ rating TEXT,
+ number_of_ratings INTEGER NOT NULL,
+ FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
+ );
+
+ CREATE INDEX suggestions_record_id ON suggestions(record_id);
+
+ CREATE TABLE icons(
+ id TEXT PRIMARY KEY,
+ data BLOB NOT NULL,
+ mimetype TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_subjects(
+ keyword TEXT PRIMARY KEY,
+ record_id TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_modifiers(
+ type INTEGER NOT NULL,
+ keyword TEXT NOT NULL,
+ record_id TEXT NOT NULL,
+ PRIMARY KEY (type, keyword)
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_location_signs(
+ keyword TEXT PRIMARY KEY,
+ need_location INTEGER NOT NULL,
+ record_id TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_custom_details(
+ icon_id TEXT PRIMARY KEY,
+ score REAL NOT NULL,
+ record_id TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE mdn_custom_details(
+ suggestion_id INTEGER PRIMARY KEY,
+ description TEXT NOT NULL,
+ FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
+ );
+
+ PRAGMA user_version=16;
+"#;
+
+ /// Test running all schema upgrades from V16, which was the first schema with a "real"
+ /// migration.
+ ///
+ /// If an upgrade fails, then this test will fail with a panic.
+ #[test]
+ fn test_all_upgrades() {
+ let db_file = MigratedDatabaseFile::new(SuggestConnectionInitializer, V16_SCHEMA);
+ db_file.run_all_upgrades();
+ }
+}
diff --git a/third_party/rust/suggest/src/store.rs b/third_party/rust/suggest/src/store.rs
index e1f437e8c5..c55cffc7f5 100644
--- a/third_party/rust/suggest/src/store.rs
+++ b/third_party/rust/suggest/src/store.rs
@@ -4,7 +4,7 @@
*/
use std::{
- collections::BTreeMap,
+ collections::{BTreeMap, BTreeSet},
path::{Path, PathBuf},
sync::Arc,
};
@@ -24,13 +24,15 @@ use serde::{de::DeserializeOwned, Deserialize, Serialize};
use crate::{
config::{SuggestGlobalConfig, SuggestProviderConfig},
db::{
- ConnectionType, SuggestDao, SuggestDb, LAST_INGEST_META_KEY, UNPARSABLE_RECORDS_META_KEY,
+ ConnectionType, SuggestDao, SuggestDb, LAST_INGEST_META_UNPARSABLE,
+ UNPARSABLE_RECORDS_META_KEY,
},
error::Error,
provider::SuggestionProvider,
rs::{
- SuggestAttachment, SuggestRecord, SuggestRecordId, SuggestRemoteSettingsClient,
- REMOTE_SETTINGS_COLLECTION, SUGGESTIONS_PER_ATTACHMENT,
+ SuggestAttachment, SuggestRecord, SuggestRecordId, SuggestRecordType,
+ SuggestRemoteSettingsClient, DEFAULT_RECORDS_TYPES, REMOTE_SETTINGS_COLLECTION,
+ SUGGESTIONS_PER_ATTACHMENT,
},
schema::VERSION,
Result, SuggestApiResult, Suggestion, SuggestionQuery,
@@ -48,7 +50,6 @@ pub struct SuggestStoreBuilder(Mutex<SuggestStoreBuilderInner>);
#[derive(Default)]
struct SuggestStoreBuilderInner {
data_path: Option<String>,
- cache_path: Option<String>,
remote_settings_config: Option<RemoteSettingsConfig>,
}
@@ -68,8 +69,8 @@ impl SuggestStoreBuilder {
self
}
- pub fn cache_path(self: Arc<Self>, path: String) -> Arc<Self> {
- self.0.lock().cache_path = Some(path);
+ pub fn cache_path(self: Arc<Self>, _path: String) -> Arc<Self> {
+ // We used to use this, but we're not using it anymore, just ignore the call
self
}
@@ -85,10 +86,6 @@ impl SuggestStoreBuilder {
.data_path
.clone()
.ok_or_else(|| Error::SuggestStoreBuilder("data_path not specified".to_owned()))?;
- let cache_path = inner
- .cache_path
- .clone()
- .ok_or_else(|| Error::SuggestStoreBuilder("cache_path not specified".to_owned()))?;
let settings_client =
remote_settings::Client::new(inner.remote_settings_config.clone().unwrap_or_else(
|| RemoteSettingsConfig {
@@ -98,7 +95,7 @@ impl SuggestStoreBuilder {
},
))?;
Ok(Arc::new(SuggestStore {
- inner: SuggestStoreInner::new(data_path, cache_path, settings_client),
+ inner: SuggestStoreInner::new(data_path, settings_client),
}))
}
}
@@ -182,7 +179,7 @@ impl SuggestStore {
)?)
}()?;
Ok(Self {
- inner: SuggestStoreInner::new("".to_owned(), path.to_owned(), settings_client),
+ inner: SuggestStoreInner::new(path.to_owned(), settings_client),
})
}
@@ -192,6 +189,22 @@ impl SuggestStore {
self.inner.query(query)
}
+ /// Dismiss a suggestion
+ ///
+ /// Dismissed suggestions will not be returned again
+ ///
+ /// In the case of AMP suggestions this should be the raw URL.
+ #[handle_error(Error)]
+ pub fn dismiss_suggestion(&self, suggestion_url: String) -> SuggestApiResult<()> {
+ self.inner.dismiss_suggestion(suggestion_url)
+ }
+
+ /// Clear dismissed suggestions
+ #[handle_error(Error)]
+ pub fn clear_dismissed_suggestions(&self) -> SuggestApiResult<()> {
+ self.inner.clear_dismissed_suggestions()
+ }
+
/// Interrupts any ongoing queries.
///
/// This should be called when the user types new input into the address
@@ -238,6 +251,7 @@ pub struct SuggestIngestionConstraints {
/// Because of how suggestions are partitioned in Remote Settings, this is a
/// soft limit, and the store might ingest more than requested.
pub max_suggestions: Option<u64>,
+ pub providers: Option<Vec<SuggestionProvider>>,
}
/// The implementation of the store. This is generic over the Remote Settings
@@ -250,23 +264,14 @@ pub(crate) struct SuggestStoreInner<S> {
/// It's not currently used because not all consumers pass this in yet.
#[allow(unused)]
data_path: PathBuf,
- /// Path to the temporary SQL database.
- ///
- /// This stores things that should be deleted when the user clears their cache.
- cache_path: PathBuf,
dbs: OnceCell<SuggestStoreDbs>,
settings_client: S,
}
impl<S> SuggestStoreInner<S> {
- fn new(
- data_path: impl Into<PathBuf>,
- cache_path: impl Into<PathBuf>,
- settings_client: S,
- ) -> Self {
+ pub fn new(data_path: impl Into<PathBuf>, settings_client: S) -> Self {
Self {
data_path: data_path.into(),
- cache_path: cache_path.into(),
dbs: OnceCell::new(),
settings_client,
}
@@ -276,7 +281,7 @@ impl<S> SuggestStoreInner<S> {
/// they're not already open.
fn dbs(&self) -> Result<&SuggestStoreDbs> {
self.dbs
- .get_or_try_init(|| SuggestStoreDbs::open(&self.cache_path))
+ .get_or_try_init(|| SuggestStoreDbs::open(&self.data_path))
}
fn query(&self, query: SuggestionQuery) -> Result<Vec<Suggestion>> {
@@ -286,6 +291,17 @@ impl<S> SuggestStoreInner<S> {
self.dbs()?.reader.read(|dao| dao.fetch_suggestions(&query))
}
+ fn dismiss_suggestion(&self, suggestion_url: String) -> Result<()> {
+ self.dbs()?
+ .writer
+ .write(|dao| dao.insert_dismissal(&suggestion_url))
+ }
+
+ fn clear_dismissed_suggestions(&self) -> Result<()> {
+ self.dbs()?.writer.write(|dao| dao.clear_dismissals())?;
+ Ok(())
+ }
+
fn interrupt(&self) {
if let Some(dbs) = self.dbs.get() {
// Only interrupt if the databases are already open.
@@ -315,7 +331,7 @@ impl<S> SuggestStoreInner<S>
where
S: SuggestRemoteSettingsClient,
{
- fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> {
+ pub fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> {
let writer = &self.dbs()?.writer;
if let Some(unparsable_records) =
@@ -330,26 +346,58 @@ where
for unparsable_ids in all_unparsable_ids.chunks(UNPARSABLE_IDS_PER_REQUEST) {
let mut options = GetItemsOptions::new();
for unparsable_id in unparsable_ids {
- options.eq("id", *unparsable_id);
+ options.filter_eq("id", *unparsable_id);
}
let records_chunk = self
.settings_client
.get_records_with_options(&options)?
.records;
- self.ingest_records(writer, &records_chunk)?;
+ self.ingest_records(LAST_INGEST_META_UNPARSABLE, writer, &records_chunk)?;
}
}
+ // use std::collections::BTreeSet;
+ let ingest_record_types = if let Some(rt) = &constraints.providers {
+ rt.iter()
+ .flat_map(|x| x.records_for_provider())
+ .collect::<BTreeSet<_>>()
+ .into_iter()
+ .collect()
+ } else {
+ DEFAULT_RECORDS_TYPES.to_vec()
+ };
+
+ for ingest_record_type in ingest_record_types {
+ self.ingest_records_by_type(ingest_record_type, writer, &constraints)?;
+ }
+
+ Ok(())
+ }
+
+ fn ingest_records_by_type(
+ &self,
+ ingest_record_type: SuggestRecordType,
+ writer: &SuggestDb,
+ constraints: &SuggestIngestionConstraints,
+ ) -> Result<()> {
let mut options = GetItemsOptions::new();
+
// Remote Settings returns records in descending modification order
// (newest first), but we want them in ascending order (oldest first),
// so that we can eventually resume downloading where we left off.
options.sort("last_modified", SortOrder::Ascending);
- if let Some(last_ingest) = writer.read(|dao| dao.get_meta::<u64>(LAST_INGEST_META_KEY))? {
+
+ options.filter_eq("type", ingest_record_type.to_string());
+
+ // Get the last ingest value. This is the max of the last_ingest_keys
+ // that are in the database.
+ if let Some(last_ingest) = writer
+ .read(|dao| dao.get_meta::<u64>(ingest_record_type.last_ingest_meta_key().as_str()))?
+ {
// Only download changes since our last ingest. If our last ingest
// was interrupted, we'll pick up where we left off.
- options.gt("last_modified", last_ingest.to_string());
+ options.filter_gt("last_modified", last_ingest.to_string());
}
if let Some(max_suggestions) = constraints.max_suggestions {
@@ -363,39 +411,56 @@ where
.settings_client
.get_records_with_options(&options)?
.records;
- self.ingest_records(writer, &records)?;
-
+ self.ingest_records(&ingest_record_type.last_ingest_meta_key(), writer, &records)?;
Ok(())
}
- fn ingest_records(&self, writer: &SuggestDb, records: &[RemoteSettingsRecord]) -> Result<()> {
+ fn ingest_records(
+ &self,
+ last_ingest_key: &str,
+ writer: &SuggestDb,
+ records: &[RemoteSettingsRecord],
+ ) -> Result<()> {
for record in records {
let record_id = SuggestRecordId::from(&record.id);
if record.deleted {
// If the entire record was deleted, drop all its suggestions
// and advance the last ingest time.
- writer.write(|dao| dao.handle_deleted_record(record))?;
+ writer.write(|dao| dao.handle_deleted_record(last_ingest_key, record))?;
continue;
}
let Ok(fields) =
serde_json::from_value(serde_json::Value::Object(record.fields.clone()))
else {
// We don't recognize this record's type, so we don't know how
- // to ingest its suggestions. Record this in the meta table.
+ // to ingest its suggestions. Skip processing this record.
writer.write(|dao| dao.handle_unparsable_record(record))?;
continue;
};
match fields {
SuggestRecord::AmpWikipedia => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_amp_wikipedia_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ // TODO: Currently re-creating the last_ingest_key because using last_ingest_meta
+ // breaks the tests (particularly the unparsable functionality). So, keeping
+ // a direct reference until we remove the "unparsable" functionality.
+ &SuggestRecordType::AmpWikipedia.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_amp_wikipedia_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::AmpMobile => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_amp_mobile_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ &SuggestRecordType::AmpMobile.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_amp_mobile_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::Icon => {
let (Some(icon_id), Some(attachment)) =
@@ -404,47 +469,79 @@ where
// An icon record should have an icon ID and an
// attachment. Icons that don't have these are
// malformed, so skip to the next record.
- writer.write(|dao| dao.put_last_ingest_if_newer(record.last_modified))?;
+ writer.write(|dao| {
+ dao.put_last_ingest_if_newer(
+ &SuggestRecordType::Icon.last_ingest_meta_key(),
+ record.last_modified,
+ )
+ })?;
continue;
};
let data = self.settings_client.get_attachment(&attachment.location)?;
writer.write(|dao| {
- dao.put_icon(icon_id, &data)?;
- dao.handle_ingested_record(record)
+ dao.put_icon(icon_id, &data, &attachment.mimetype)?;
+ dao.handle_ingested_record(
+ &SuggestRecordType::Icon.last_ingest_meta_key(),
+ record,
+ )
})?;
}
SuggestRecord::Amo => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_amo_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ &SuggestRecordType::Amo.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_amo_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::Pocket => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_pocket_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ &SuggestRecordType::Pocket.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_pocket_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::Yelp => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- match suggestions.first() {
+ self.ingest_attachment(
+ &SuggestRecordType::Yelp.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| match suggestions.first() {
Some(suggestion) => dao.insert_yelp_suggestions(record_id, suggestion),
None => Ok(()),
- }
- })?;
+ },
+ )?;
}
SuggestRecord::Mdn => {
- self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
- dao.insert_mdn_suggestions(record_id, suggestions)
- })?;
+ self.ingest_attachment(
+ &SuggestRecordType::Mdn.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id, suggestions| {
+ dao.insert_mdn_suggestions(record_id, suggestions)
+ },
+ )?;
}
SuggestRecord::Weather(data) => {
- self.ingest_record(writer, record, |dao, record_id| {
- dao.insert_weather_data(record_id, &data)
- })?;
+ self.ingest_record(
+ &SuggestRecordType::Weather.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, record_id| dao.insert_weather_data(record_id, &data),
+ )?;
}
SuggestRecord::GlobalConfig(config) => {
- self.ingest_record(writer, record, |dao, _| {
- dao.put_global_config(&SuggestGlobalConfig::from(&config))
- })?;
+ self.ingest_record(
+ &SuggestRecordType::GlobalConfig.last_ingest_meta_key(),
+ writer,
+ record,
+ |dao, _| dao.put_global_config(&SuggestGlobalConfig::from(&config)),
+ )?;
}
}
}
@@ -453,6 +550,7 @@ where
fn ingest_record(
&self,
+ last_ingest_key: &str,
writer: &SuggestDb,
record: &RemoteSettingsRecord,
ingestion_handler: impl FnOnce(&mut SuggestDao<'_>, &SuggestRecordId) -> Result<()>,
@@ -469,12 +567,13 @@ where
// Ingest (or re-ingest) all data in the record.
ingestion_handler(dao, &record_id)?;
- dao.handle_ingested_record(record)
+ dao.handle_ingested_record(last_ingest_key, record)
})
}
fn ingest_attachment<T>(
&self,
+ last_ingest_key: &str,
writer: &SuggestDb,
record: &RemoteSettingsRecord,
ingestion_handler: impl FnOnce(&mut SuggestDao<'_>, &SuggestRecordId, &[T]) -> Result<()>,
@@ -486,20 +585,72 @@ where
// This method should be called only when a record is expected to
// have an attachment. If it doesn't have one, it's malformed, so
// skip to the next record.
- writer.write(|dao| dao.put_last_ingest_if_newer(record.last_modified))?;
+ writer
+ .write(|dao| dao.put_last_ingest_if_newer(last_ingest_key, record.last_modified))?;
return Ok(());
};
let attachment_data = self.settings_client.get_attachment(&attachment.location)?;
match serde_json::from_slice::<SuggestAttachment<T>>(&attachment_data) {
- Ok(attachment) => self.ingest_record(writer, record, |dao, record_id| {
- ingestion_handler(dao, record_id, attachment.suggestions())
- }),
+ Ok(attachment) => {
+ self.ingest_record(last_ingest_key, writer, record, |dao, record_id| {
+ ingestion_handler(dao, record_id, attachment.suggestions())
+ })
+ }
Err(_) => writer.write(|dao| dao.handle_unparsable_record(record)),
}
}
}
+#[cfg(feature = "benchmark_api")]
+impl<S> SuggestStoreInner<S>
+where
+ S: SuggestRemoteSettingsClient,
+{
+ pub fn into_settings_client(self) -> S {
+ self.settings_client
+ }
+
+ pub fn ensure_db_initialized(&self) {
+ self.dbs().unwrap();
+ }
+
+ pub fn benchmark_ingest_records_by_type(&self, ingest_record_type: SuggestRecordType) {
+ self.ingest_records_by_type(
+ ingest_record_type,
+ &self.dbs().unwrap().writer,
+ &SuggestIngestionConstraints::default(),
+ )
+ .unwrap()
+ }
+
+ pub fn table_row_counts(&self) -> Vec<(String, u32)> {
+ use sql_support::ConnExt;
+
+ // Note: since this is just used for debugging, use unwrap to simplify the error handling.
+ let reader = &self.dbs().unwrap().reader;
+ let conn = reader.conn.lock();
+ let table_names: Vec<String> = conn
+ .query_rows_and_then(
+ "SELECT name FROM sqlite_master where type = 'table'",
+ (),
+ |row| row.get(0),
+ )
+ .unwrap();
+ let mut table_names_with_counts: Vec<(String, u32)> = table_names
+ .into_iter()
+ .map(|name| {
+ let count: u32 = conn
+ .query_one(&format!("SELECT COUNT(*) FROM {name}"))
+ .unwrap();
+ (name, count)
+ })
+ .collect();
+ table_names_with_counts.sort_by(|a, b| (b.1.cmp(&a.1)));
+ table_names_with_counts
+ }
+}
+
/// Holds a store's open connections to the Suggest database.
struct SuggestStoreDbs {
/// A read-write connection used to update the database with new data.
@@ -549,10 +700,6 @@ mod tests {
"file:test_store_data_{}?mode=memory&cache=shared",
hex::encode(unique_suffix),
),
- format!(
- "file:test_store_cache_{}?mode=memory&cache=shared",
- hex::encode(unique_suffix),
- ),
settings_client,
)
}
@@ -721,7 +868,14 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15));
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
expect![[r#"
[
Amp {
@@ -729,6 +883,7 @@ mod tests {
url: "https://www.lph-nm.biz",
raw_url: "https://www.lph-nm.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "los",
block_id: 0,
advertiser: "Los Pollos Hermanos",
@@ -834,6 +989,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -872,6 +1030,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "penne",
block_id: 0,
advertiser: "Good Place Eats",
@@ -895,6 +1056,267 @@ mod tests {
Ok(())
}
+ #[test]
+ fn ingest_full_keywords() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "2",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "3",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-3.json",
+ "mimetype": "application/json",
+ "location": "data-3.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "4",
+ "type": "amp-mobile-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-4.json",
+ "mimetype": "application/json",
+ "location": "data-4.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ // AMP attachment with full keyword data
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "full_keywords": [
+ // Full keyword for the first 4 keywords
+ ("los pollos", 4),
+ // Full keyword for the next 2 keywords
+ ("los pollos hermanos (restaurant)", 2),
+ ],
+ "title": "Los Pollos Hermanos - Albuquerque - 1",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?
+ // AMP attachment without a full keyword
+ .with_data(
+ "data-2.json",
+ json!([{
+ "id": 1,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque - 2",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?
+ // Wikipedia attachment with full keyword data. We should ignore the full
+ // keyword data for Wikipedia suggestions
+ .with_data(
+ "data-3.json",
+ json!([{
+ "id": 2,
+ "advertiser": "Wikipedia",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque - Wiki",
+ "full_keywords": [
+ ("Los Pollos Hermanos - Albuquerque", 6),
+ ],
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "score": 0.3,
+ }]),
+ )?
+ // Amp mobile suggestion, this is essentially the same as 1, except for the SuggestionProvider
+ .with_data(
+ "data-4.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "full_keywords": [
+ // Full keyword for the first 4 keywords
+ ("los pollos", 4),
+ // Full keyword for the next 2 keywords
+ ("los pollos hermanos (restaurant)", 2),
+ ],
+ "title": "Los Pollos Hermanos - Albuquerque - 4",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ // This one should match the first full keyword for the first AMP item.
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 1",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los pollos",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 2",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los",
+ block_id: 1,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "lo".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ // This one should match the second full keyword for the first AMP item.
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 1",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los pollos hermanos (restaurant)",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 2",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los pollos hermanos",
+ block_id: 1,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "los pollos h".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ // This one matches a Wikipedia suggestion, so the full keyword should be ignored
+ expect![[r#"
+ [
+ Wikipedia {
+ title: "Los Pollos Hermanos - Albuquerque - Wiki",
+ url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los",
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "los".into(),
+ providers: vec![SuggestionProvider::Wikipedia],
+ limit: None,
+ })?);
+ // This one matches a Wikipedia suggestion, so the full keyword should be ignored
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque - 4",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ icon_mimetype: None,
+ full_keyword: "los pollos hermanos (restaurant)",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "los pollos h".into(),
+ providers: vec![SuggestionProvider::AmpMobile],
+ limit: None,
+ })?);
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
/// Tests ingesting a data attachment containing a single suggestion,
/// instead of an array of suggestions.
#[test]
@@ -941,6 +1363,7 @@ mod tests {
url: "https://www.lasagna.restaurant",
raw_url: "https://www.lasagna.restaurant",
icon: None,
+ icon_mimetype: None,
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -1014,7 +1437,14 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(15u64));
+ assert_eq!(
+ dao.get_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15u64)
+ );
expect![[r#"
[
Amp {
@@ -1022,6 +1452,7 @@ mod tests {
url: "https://www.lasagna.restaurant",
raw_url: "https://www.lasagna.restaurant",
icon: None,
+ icon_mimetype: None,
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -1084,8 +1515,15 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
- store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(30u64));
+ store.dbs()?.reader.read(|dao: &SuggestDao<'_>| {
+ assert_eq!(
+ dao.get_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(30u64)
+ );
assert!(dao
.fetch_suggestions(&SuggestionQuery {
keyword: "la".into(),
@@ -1100,6 +1538,7 @@ mod tests {
url: "https://www.lph-nm.biz",
raw_url: "https://www.lph-nm.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "los pollos",
block_id: 0,
advertiser: "Los Pollos Hermanos",
@@ -1123,6 +1562,7 @@ mod tests {
url: "https://penne.biz",
raw_url: "https://penne.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "penne",
block_id: 0,
advertiser: "Good Place Eats",
@@ -1219,7 +1659,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(25u64));
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(25u64)
+ );
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
@@ -1259,7 +1702,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(35u64));
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(35u64)
+ );
expect![[r#"
[
Amp {
@@ -1286,6 +1732,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -1327,6 +1776,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "los",
block_id: 0,
advertiser: "Los Pollos Hermanos",
@@ -1422,7 +1874,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(15u64));
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Amo.last_ingest_meta_key().as_str())?,
+ Some(15u64)
+ );
expect![[r#"
[
@@ -1513,7 +1968,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(30u64));
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Amo.last_ingest_meta_key().as_str())?,
+ Some(30u64)
+ );
expect![[r#"
[
@@ -1648,13 +2106,24 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(20));
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
1
);
assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 1);
+ assert_eq!(
+ dao.get_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(20)
+ );
Ok(())
})?;
@@ -1674,14 +2143,16 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
0
);
assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 0);
-
+ assert_eq!(
+ dao.get_meta(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(30)
+ );
Ok(())
})?;
@@ -1717,6 +2188,7 @@ mod tests {
for (max_suggestions, expected_limit) in table {
store.ingest(SuggestIngestionConstraints {
max_suggestions: Some(max_suggestions),
+ providers: Some(vec![SuggestionProvider::Amp]),
})?;
let actual_limit = store
.settings_client
@@ -1772,7 +2244,14 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15));
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
@@ -1789,7 +2268,14 @@ mod tests {
store.clear()?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, None);
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ None
+ );
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
@@ -2080,6 +2566,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -2143,6 +2632,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "multimatch",
},
]
@@ -2199,6 +2691,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "multimatch",
},
]
@@ -2268,6 +2763,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -2349,6 +2847,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "california",
},
Wikipedia {
@@ -2370,6 +2871,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "california",
},
]
@@ -2403,6 +2907,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "california",
},
]
@@ -2614,6 +3121,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2647,6 +3157,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2680,6 +3193,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2735,6 +3251,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2779,6 +3298,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2812,6 +3334,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2856,6 +3381,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2889,6 +3417,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2933,6 +3464,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2966,6 +3500,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -2999,6 +3536,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3032,6 +3572,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3076,6 +3619,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3109,6 +3655,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3186,6 +3735,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3219,6 +3771,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3252,6 +3807,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -3285,6 +3843,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: true,
subject_exact_match: true,
@@ -3318,6 +3879,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3362,6 +3926,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3395,6 +3962,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -3428,6 +3998,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3483,6 +4056,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3516,6 +4092,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3549,6 +4128,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3593,6 +4175,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/svg+xml",
+ ),
score: 0.5,
has_location_sign: false,
subject_exact_match: false,
@@ -3734,6 +4319,7 @@ mod tests {
url: "https://www.lasagna.restaurant",
raw_url: "https://www.lasagna.restaurant",
icon: None,
+ icon_mimetype: None,
full_keyword: "amp wiki match",
block_id: 0,
advertiser: "Good Place Eats",
@@ -3762,6 +4348,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "amp wiki match",
},
Amp {
@@ -3769,6 +4358,7 @@ mod tests {
url: "https://penne.biz",
raw_url: "https://penne.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "amp wiki match",
block_id: 0,
advertiser: "Good Place Eats",
@@ -3801,6 +4391,7 @@ mod tests {
url: "https://www.lasagna.restaurant",
raw_url: "https://www.lasagna.restaurant",
icon: None,
+ icon_mimetype: None,
full_keyword: "amp wiki match",
block_id: 0,
advertiser: "Good Place Eats",
@@ -3829,6 +4420,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "amp wiki match",
},
]
@@ -3873,6 +4467,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "pocket wiki match",
},
Pocket {
@@ -4193,6 +4790,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -4234,6 +4834,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -4275,6 +4878,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -4304,6 +4910,9 @@ mod tests {
110,
],
),
+ icon_mimetype: Some(
+ "image/png",
+ ),
full_keyword: "lasagna",
block_id: 0,
advertiser: "Good Place Eats",
@@ -4365,7 +4974,10 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(45));
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(45)
+ );
assert_eq!(
dao.conn
.query_one::<i64>("SELECT count(*) FROM suggestions")?,
@@ -4400,16 +5012,19 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ assert_eq!(
+ dao.get_meta("last_quicksuggest_ingest_unparsable")?,
+ Some(30)
+ );
expect![[r#"
Some(
UnparsableRecords(
{
"clippy-2": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
"fancy-new-suggestions-1": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
},
),
@@ -4469,16 +5084,19 @@ mod tests {
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ assert_eq!(
+ dao.get_meta("last_quicksuggest_ingest_unparsable")?,
+ Some(30)
+ );
expect![[r#"
Some(
UnparsableRecords(
{
"clippy-2": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
"fancy-new-suggestions-1": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
},
),
@@ -4505,13 +5123,105 @@ mod tests {
let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
store.dbs()?.writer.write(|dao| {
- dao.put_meta(LAST_INGEST_META_KEY, 30)?;
+ dao.put_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str(),
+ 30,
+ )?;
Ok(())
})?;
store.ingest(SuggestIngestionConstraints::default())?;
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(30)
+ );
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests that we only ingest providers that we're concerned with.
+ #[test]
+ fn ingest_constraints_provider() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ }, {
+ "id": "icon-1",
+ "type": "icon",
+ "last_modified": 30,
+ }]))?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.dbs()?.writer.write(|dao| {
+ // Check that existing data is updated properly.
+ dao.put_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str(),
+ 10,
+ )?;
+ Ok(())
+ })?;
+
+ let constraints = SuggestIngestionConstraints {
+ max_suggestions: Some(100),
+ providers: Some(vec![SuggestionProvider::Amp, SuggestionProvider::Pocket]),
+ };
+ store.ingest(constraints)?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Icon.last_ingest_meta_key().as_str())?,
+ Some(30)
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Pocket.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Amo.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Yelp.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::Mdn.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(SuggestRecordType::AmpMobile.last_ingest_meta_key().as_str())?,
+ None
+ );
+ assert_eq!(
+ dao.get_meta::<u64>(
+ SuggestRecordType::GlobalConfig
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ None
+ );
Ok(())
})?;
@@ -4582,10 +5292,10 @@ mod tests {
UnparsableRecords(
{
"clippy-2": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
"fancy-new-suggestions-1": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
},
),
@@ -4671,7 +5381,7 @@ mod tests {
UnparsableRecords(
{
"invalid-attachment": UnparsableRecord {
- schema_version: 14,
+ schema_version: 18,
},
},
),
@@ -4683,7 +5393,14 @@ mod tests {
// Test that the valid record was read
store.dbs()?.reader.read(|dao| {
- assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15));
+ assert_eq!(
+ dao.get_meta(
+ SuggestRecordType::AmpWikipedia
+ .last_ingest_meta_key()
+ .as_str()
+ )?,
+ Some(15)
+ );
expect![[r#"
[
Amp {
@@ -4691,6 +5408,7 @@ mod tests {
url: "https://www.lph-nm.biz",
raw_url: "https://www.lph-nm.biz",
icon: None,
+ icon_mimetype: None,
full_keyword: "los",
block_id: 0,
advertiser: "Los Pollos Hermanos",
@@ -4899,6 +5617,7 @@ mod tests {
url: "https://www.yelp.com/search?find_desc=ramen",
title: "ramen",
icon: None,
+ icon_mimetype: None,
score: 0.5,
has_location_sign: false,
subject_exact_match: true,
@@ -5313,4 +6032,204 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn remove_dismissed_suggestions() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+
+ }, {
+ "id": "data-2",
+ "type": "amo-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-3",
+ "type": "pocket-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-3.json",
+ "mimetype": "application/json",
+ "location": "data-3.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-5",
+ "type": "mdn-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-5.json",
+ "mimetype": "application/json",
+ "location": "data-5.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-6",
+ "type": "amp-mobile-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-6.json",
+ "mimetype": "application/json",
+ "location": "data-6.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-2",
+ "type": "icon",
+ "last_modified": 20,
+ "attachment": {
+ "filename": "icon-2.png",
+ "mimetype": "image/png",
+ "location": "icon-2.png",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-3",
+ "type": "icon",
+ "last_modified": 25,
+ "attachment": {
+ "filename": "icon-3.png",
+ "mimetype": "image/png",
+ "location": "icon-3.png",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["cats"],
+ "title": "Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.31
+ }, {
+ "id": 0,
+ "advertiser": "Wikipedia",
+ "iab_category": "5 - Education",
+ "keywords": ["cats"],
+ "title": "California",
+ "url": "https://wikipedia.org/California",
+ "icon": "3"
+ }]),
+ )?
+ .with_data(
+ "data-2.json",
+ json!([
+ {
+ "description": "amo suggestion",
+ "url": "https://addons.mozilla.org/en-US/firefox/addon/example",
+ "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ "keywords": ["cats"],
+ "title": "Firefox Relay",
+ "icon": "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ "rating": "4.9",
+ "number_of_ratings": 888,
+ "score": 0.32
+ },
+ ]),
+ )?
+ .with_data(
+ "data-3.json",
+ json!([
+ {
+ "description": "pocket suggestion",
+ "url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ "lowConfidenceKeywords": [],
+ "highConfidenceKeywords": ["cats"],
+ "title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ "score": 0.33
+ },
+ ]),
+ )?
+ .with_data(
+ "data-5.json",
+ json!([
+ {
+ "description": "Javascript Array",
+ "url": "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array",
+ "keywords": ["cats"],
+ "title": "Array",
+ "score": 0.24
+ },
+ ]),
+ )?
+ .with_data(
+ "data-6.json",
+ json!([
+ {
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["cats"],
+ "title": "Mobile - Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "3",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.26
+ }
+ ]),
+ )?
+ .with_icon("icon-2.png", "i-am-an-icon".as_bytes().into())
+ .with_icon("icon-3.png", "also-an-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ // A query for cats should return all suggestions
+ let query = SuggestionQuery {
+ keyword: "cats".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ SuggestionProvider::Mdn,
+ SuggestionProvider::AmpMobile,
+ ],
+ limit: None,
+ };
+ let results = store.query(query.clone())?;
+ assert_eq!(results.len(), 6);
+
+ for result in results {
+ store.dismiss_suggestion(result.raw_url().unwrap().to_string())?;
+ }
+
+ // After dismissing the suggestions, the next query shouldn't return them
+ assert_eq!(store.query(query.clone())?.len(), 0);
+
+ // Clearing the dismissals should cause them to be returned again
+ store.clear_dismissed_suggestions()?;
+ assert_eq!(store.query(query.clone())?.len(), 6);
+
+ Ok(())
+ }
}
diff --git a/third_party/rust/suggest/src/suggest.udl b/third_party/rust/suggest/src/suggest.udl
index 1cd8911a48..4a4e3fe9a0 100644
--- a/third_party/rust/suggest/src/suggest.udl
+++ b/third_party/rust/suggest/src/suggest.udl
@@ -40,6 +40,7 @@ interface Suggestion {
string url,
string raw_url,
sequence<u8>? icon,
+ string? icon_mimetype,
string full_keyword,
i64 block_id,
string advertiser,
@@ -59,6 +60,7 @@ interface Suggestion {
string title,
string url,
sequence<u8>? icon,
+ string? icon_mimetype,
string full_keyword
);
Amo(
@@ -75,6 +77,7 @@ interface Suggestion {
string url,
string title,
sequence<u8>? icon,
+ string? icon_mimetype,
f64 score,
boolean has_location_sign,
boolean subject_exact_match,
@@ -99,6 +102,7 @@ dictionary SuggestionQuery {
dictionary SuggestIngestionConstraints {
u64? max_suggestions = null;
+ sequence<SuggestionProvider>? providers = null;
};
dictionary SuggestGlobalConfig {
@@ -119,6 +123,12 @@ interface SuggestStore {
[Throws=SuggestApiError]
sequence<Suggestion> query(SuggestionQuery query);
+ [Throws=SuggestApiError]
+ void dismiss_suggestion(string raw_suggestion_url);
+
+ [Throws=SuggestApiError]
+ void clear_dismissed_suggestions();
+
void interrupt();
[Throws=SuggestApiError]
@@ -140,6 +150,7 @@ interface SuggestStoreBuilder {
[Self=ByArc]
SuggestStoreBuilder data_path(string path);
+ // Deprecated: this is no longer used by the suggest component.
[Self=ByArc]
SuggestStoreBuilder cache_path(string path);
diff --git a/third_party/rust/suggest/src/suggestion.rs b/third_party/rust/suggest/src/suggestion.rs
index f5425e3c73..c0b45524c7 100644
--- a/third_party/rust/suggest/src/suggestion.rs
+++ b/third_party/rust/suggest/src/suggestion.rs
@@ -29,6 +29,7 @@ pub enum Suggestion {
url: String,
raw_url: String,
icon: Option<Vec<u8>>,
+ icon_mimetype: Option<String>,
full_keyword: String,
block_id: i64,
advertiser: String,
@@ -48,6 +49,7 @@ pub enum Suggestion {
title: String,
url: String,
icon: Option<Vec<u8>>,
+ icon_mimetype: Option<String>,
full_keyword: String,
},
Amo {
@@ -64,6 +66,7 @@ pub enum Suggestion {
url: String,
title: String,
icon: Option<Vec<u8>>,
+ icon_mimetype: Option<String>,
score: f64,
has_location_sign: bool,
subject_exact_match: bool,
@@ -106,6 +109,37 @@ impl Ord for Suggestion {
}
}
+impl Suggestion {
+ /// Get the URL for this suggestion, if present
+ pub fn url(&self) -> Option<&str> {
+ match self {
+ Self::Amp { url, .. }
+ | Self::Pocket { url, .. }
+ | Self::Wikipedia { url, .. }
+ | Self::Amo { url, .. }
+ | Self::Yelp { url, .. }
+ | Self::Mdn { url, .. } => Some(url),
+ _ => None,
+ }
+ }
+
+ /// Get the raw URL for this suggestion, if present
+ ///
+ /// This is the same as `url` except for Amp. In that case, `url` is the URL after being
+ /// "cooked" using template interpolation, while `raw_url` is the URL template.
+ pub fn raw_url(&self) -> Option<&str> {
+ match self {
+ Self::Amp { raw_url: url, .. }
+ | Self::Pocket { url, .. }
+ | Self::Wikipedia { url, .. }
+ | Self::Amo { url, .. }
+ | Self::Yelp { url, .. }
+ | Self::Mdn { url, .. } => Some(url),
+ _ => None,
+ }
+ }
+}
+
impl Eq for Suggestion {}
/// Replaces all template parameters in a "raw" sponsored suggestion URL,
/// producing a "cooked" URL with real values.
diff --git a/third_party/rust/suggest/src/yelp.rs b/third_party/rust/suggest/src/yelp.rs
index 2413709c67..0eb6c10d56 100644
--- a/third_party/rust/suggest/src/yelp.rs
+++ b/third_party/rust/suggest/src/yelp.rs
@@ -52,7 +52,7 @@ const SUBJECT_PREFIX_MATCH_THRESHOLD: usize = 2;
impl<'a> SuggestDao<'a> {
/// Inserts the suggestions for Yelp attachment into the database.
- pub fn insert_yelp_suggestions(
+ pub(crate) fn insert_yelp_suggestions(
&mut self,
record_id: &SuggestRecordId,
suggestion: &DownloadedYelpSuggestion,
@@ -130,7 +130,10 @@ impl<'a> SuggestDao<'a> {
}
/// Fetch Yelp suggestion from given user's query.
- pub fn fetch_yelp_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ pub(crate) fn fetch_yelp_suggestions(
+ &self,
+ query: &SuggestionQuery,
+ ) -> Result<Vec<Suggestion>> {
if !query.providers.contains(&SuggestionProvider::Yelp) {
return Ok(vec![]);
}
@@ -144,7 +147,7 @@ impl<'a> SuggestDao<'a> {
let Some((subject, subject_exact_match)) = self.find_subject(query_string)? else {
return Ok(vec![]);
};
- let (icon, score) = self.fetch_custom_details()?;
+ let (icon, icon_mimetype, score) = self.fetch_custom_details()?;
let builder = SuggestionBuilder {
subject: &subject,
subject_exact_match,
@@ -154,6 +157,7 @@ impl<'a> SuggestDao<'a> {
location: None,
need_location: false,
icon,
+ icon_mimetype,
score,
};
return Ok(vec![builder.into()]);
@@ -185,7 +189,7 @@ impl<'a> SuggestDao<'a> {
return Ok(vec![]);
};
- let (icon, score) = self.fetch_custom_details()?;
+ let (icon, icon_mimetype, score) = self.fetch_custom_details()?;
let builder = SuggestionBuilder {
subject: &subject,
subject_exact_match,
@@ -195,6 +199,7 @@ impl<'a> SuggestDao<'a> {
location,
need_location,
icon,
+ icon_mimetype,
score,
};
Ok(vec![builder.into()])
@@ -204,6 +209,7 @@ impl<'a> SuggestDao<'a> {
/// It returns the location tuple as follows:
/// (
/// Option<Vec<u8>>: Icon data. If not found, returns None.
+ /// Option<String>: Mimetype of the icon data. If not found, returns None.
/// f64: Reflects score field in the yelp_custom_details table.
/// )
///
@@ -212,11 +218,11 @@ impl<'a> SuggestDao<'a> {
/// on Remote Settings. The following query will perform a table scan against
/// `yelp_custom_details` followed by an index search against `icons`,
/// which should be fine since there is only one record in the first table.
- fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, f64)> {
+ fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, Option<String>, f64)> {
let result = self.conn.query_row_and_then_cachable(
r#"
SELECT
- i.data, y.score
+ i.data, i.mimetype, y.score
FROM
yelp_custom_details y
LEFT JOIN
@@ -226,7 +232,13 @@ impl<'a> SuggestDao<'a> {
1
"#,
(),
- |row| -> Result<_> { Ok((row.get::<_, Option<Vec<u8>>>(0)?, row.get::<_, f64>(1)?)) },
+ |row| -> Result<_> {
+ Ok((
+ row.get::<_, Option<Vec<u8>>>(0)?,
+ row.get::<_, Option<String>>(1)?,
+ row.get::<_, f64>(2)?,
+ ))
+ },
true,
)?;
@@ -439,6 +451,7 @@ struct SuggestionBuilder<'a> {
location: Option<String>,
need_location: bool,
icon: Option<Vec<u8>>,
+ icon_mimetype: Option<String>,
score: f64,
}
@@ -488,6 +501,7 @@ impl<'a> From<SuggestionBuilder<'a>> for Suggestion {
url,
title,
icon: builder.icon,
+ icon_mimetype: builder.icon_mimetype,
score: builder.score,
has_location_sign: location_modifier.is_none() && builder.location_sign.is_some(),
subject_exact_match: builder.subject_exact_match,