From 8dd16259287f58f9273002717ec4d27e97127719 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 12 Jun 2024 07:43:14 +0200 Subject: Merging upstream version 127.0. Signed-off-by: Daniel Baumann --- third_party/rust/suggest/src/benchmarks/client.rs | 1 + third_party/rust/suggest/src/db.rs | 6 + third_party/rust/suggest/src/lib.rs | 2 +- third_party/rust/suggest/src/schema.rs | 454 +++++++++++----------- third_party/rust/suggest/src/store.rs | 216 +++++++++- third_party/rust/suggest/src/suggest.udl | 15 + 6 files changed, 455 insertions(+), 239 deletions(-) (limited to 'third_party/rust/suggest/src') diff --git a/third_party/rust/suggest/src/benchmarks/client.rs b/third_party/rust/suggest/src/benchmarks/client.rs index f5a21fd9cc..713bd7752b 100644 --- a/third_party/rust/suggest/src/benchmarks/client.rs +++ b/third_party/rust/suggest/src/benchmarks/client.rs @@ -22,6 +22,7 @@ impl RemoteSettingsWarmUpClient { pub fn new() -> Self { Self { client: Client::new(RemoteSettingsConfig { + server: None, server_url: None, bucket_name: None, collection_name: crate::rs::REMOTE_SETTINGS_COLLECTION.into(), diff --git a/third_party/rust/suggest/src/db.rs b/third_party/rust/suggest/src/db.rs index 0412c50d8f..6b6603ab71 100644 --- a/third_party/rust/suggest/src/db.rs +++ b/third_party/rust/suggest/src/db.rs @@ -188,6 +188,12 @@ impl<'a> SuggestDao<'a> { // // These methods implement CRUD operations + pub fn suggestions_table_empty(&self) -> Result { + Ok(self + .conn + .query_one::("SELECT NOT EXISTS (SELECT 1 FROM suggestions)")?) + } + /// Fetches suggestions that match the given query from the database. pub fn fetch_suggestions(&self, query: &SuggestionQuery) -> Result> { let unique_providers = query.providers.iter().collect::>(); diff --git a/third_party/rust/suggest/src/lib.rs b/third_party/rust/suggest/src/lib.rs index 15746614d0..93b456b8b4 100644 --- a/third_party/rust/suggest/src/lib.rs +++ b/third_party/rust/suggest/src/lib.rs @@ -3,7 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use remote_settings::RemoteSettingsConfig; +use remote_settings::{RemoteSettingsConfig, RemoteSettingsServer}; #[cfg(feature = "benchmark_api")] pub mod benchmarks; mod config; diff --git a/third_party/rust/suggest/src/schema.rs b/third_party/rust/suggest/src/schema.rs index b304363de5..76a0deed39 100644 --- a/third_party/rust/suggest/src/schema.rs +++ b/third_party/rust/suggest/src/schema.rs @@ -15,118 +15,118 @@ use sql_support::open_database::{self, ConnectionInitializer}; /// [`SuggestConnectionInitializer::upgrade_from`]. /// a. If suggestions should be re-ingested after the migration, call `clear_database()` inside /// the migration. -pub const VERSION: u32 = 18; +pub const VERSION: u32 = 19; /// The current Suggest database schema. pub const SQL: &str = " - CREATE TABLE meta( - key TEXT PRIMARY KEY, - value NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE keywords( - keyword TEXT NOT NULL, - suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, - full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL, - rank INTEGER NOT NULL, - PRIMARY KEY (keyword, suggestion_id) - ) WITHOUT ROWID; - - -- full keywords are what we display to the user when a (partial) keyword matches - -- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted. - CREATE TABLE full_keywords( - id INTEGER PRIMARY KEY, - suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, - full_keyword TEXT NOT NULL - ); - - CREATE TABLE prefix_keywords( - keyword_prefix TEXT NOT NULL, - keyword_suffix TEXT NOT NULL DEFAULT '', - confidence INTEGER NOT NULL DEFAULT 0, - rank INTEGER NOT NULL, - suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, - PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id) - ) WITHOUT ROWID; - - CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank); - - CREATE TABLE suggestions( - id INTEGER PRIMARY KEY, - record_id TEXT NOT NULL, - provider INTEGER NOT NULL, - title TEXT NOT NULL, - url TEXT NOT NULL, - score REAL NOT NULL - ); - - CREATE TABLE amp_custom_details( - suggestion_id INTEGER PRIMARY KEY, - advertiser TEXT NOT NULL, - block_id INTEGER NOT NULL, - iab_category TEXT NOT NULL, - impression_url TEXT NOT NULL, - click_url TEXT NOT NULL, - icon_id TEXT NOT NULL, - FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE - ); - - CREATE TABLE wikipedia_custom_details( - suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE, - icon_id TEXT NOT NULL - ); - - CREATE TABLE amo_custom_details( - suggestion_id INTEGER PRIMARY KEY, - description TEXT NOT NULL, - guid TEXT NOT NULL, - icon_url TEXT NOT NULL, - rating TEXT, - number_of_ratings INTEGER NOT NULL, - FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE - ); - - CREATE INDEX suggestions_record_id ON suggestions(record_id); - - CREATE TABLE icons( - id TEXT PRIMARY KEY, - data BLOB NOT NULL, - mimetype TEXT NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE yelp_subjects( - keyword TEXT PRIMARY KEY, - record_id TEXT NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE yelp_modifiers( - type INTEGER NOT NULL, - keyword TEXT NOT NULL, - record_id TEXT NOT NULL, - PRIMARY KEY (type, keyword) - ) WITHOUT ROWID; - - CREATE TABLE yelp_location_signs( - keyword TEXT PRIMARY KEY, - need_location INTEGER NOT NULL, - record_id TEXT NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE yelp_custom_details( - icon_id TEXT PRIMARY KEY, - score REAL NOT NULL, - record_id TEXT NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE mdn_custom_details( - suggestion_id INTEGER PRIMARY KEY, - description TEXT NOT NULL, - FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE - ); - - CREATE TABLE dismissed_suggestions ( - url TEXT PRIMARY KEY - ) WITHOUT ROWID; +CREATE TABLE meta( + key TEXT PRIMARY KEY, + value NOT NULL +) WITHOUT ROWID; + +CREATE TABLE keywords( + keyword TEXT NOT NULL, + suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, + full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL, + rank INTEGER NOT NULL, + PRIMARY KEY (keyword, suggestion_id) +) WITHOUT ROWID; + +-- full keywords are what we display to the user when a (partial) keyword matches +-- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted. +CREATE TABLE full_keywords( + id INTEGER PRIMARY KEY, + suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, + full_keyword TEXT NOT NULL +); + +CREATE TABLE prefix_keywords( + keyword_prefix TEXT NOT NULL, + keyword_suffix TEXT NOT NULL DEFAULT '', + confidence INTEGER NOT NULL DEFAULT 0, + rank INTEGER NOT NULL, + suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, + PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id) +) WITHOUT ROWID; + +CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank); + +CREATE TABLE suggestions( + id INTEGER PRIMARY KEY, + record_id TEXT NOT NULL, + provider INTEGER NOT NULL, + title TEXT NOT NULL, + url TEXT NOT NULL, + score REAL NOT NULL +); + +CREATE TABLE amp_custom_details( + suggestion_id INTEGER PRIMARY KEY, + advertiser TEXT NOT NULL, + block_id INTEGER NOT NULL, + iab_category TEXT NOT NULL, + impression_url TEXT NOT NULL, + click_url TEXT NOT NULL, + icon_id TEXT NOT NULL, + FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE +); + +CREATE TABLE wikipedia_custom_details( + suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE, + icon_id TEXT NOT NULL +); + +CREATE TABLE amo_custom_details( + suggestion_id INTEGER PRIMARY KEY, + description TEXT NOT NULL, + guid TEXT NOT NULL, + icon_url TEXT NOT NULL, + rating TEXT, + number_of_ratings INTEGER NOT NULL, + FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE +); + +CREATE INDEX suggestions_record_id ON suggestions(record_id); + +CREATE TABLE icons( + id TEXT PRIMARY KEY, + data BLOB NOT NULL, + mimetype TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE yelp_subjects( + keyword TEXT PRIMARY KEY, + record_id TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE yelp_modifiers( + type INTEGER NOT NULL, + keyword TEXT NOT NULL, + record_id TEXT NOT NULL, + PRIMARY KEY (type, keyword) +) WITHOUT ROWID; + +CREATE TABLE yelp_location_signs( + keyword TEXT PRIMARY KEY, + need_location INTEGER NOT NULL, + record_id TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE yelp_custom_details( + icon_id TEXT PRIMARY KEY, + score REAL NOT NULL, + record_id TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE mdn_custom_details( + suggestion_id INTEGER PRIMARY KEY, + description TEXT NOT NULL, + FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE +); + +CREATE TABLE dismissed_suggestions ( + url TEXT PRIMARY KEY +) WITHOUT ROWID; "; /// Initializes an SQLite connection to the Suggest database, performing @@ -166,9 +166,9 @@ impl ConnectionInitializer for SuggestConnectionInitializer { 16 => { tx.execute( " - CREATE TABLE dismissed_suggestions ( - url_hash INTEGER PRIMARY KEY - ) WITHOUT ROWID;", +CREATE TABLE dismissed_suggestions ( + url_hash INTEGER PRIMARY KEY +) WITHOUT ROWID;", (), )?; Ok(()) @@ -176,14 +176,23 @@ impl ConnectionInitializer for SuggestConnectionInitializer { 17 => { tx.execute( " - DROP TABLE dismissed_suggestions; - CREATE TABLE dismissed_suggestions ( - url TEXT PRIMARY KEY - ) WITHOUT ROWID;", +DROP TABLE dismissed_suggestions; +CREATE TABLE dismissed_suggestions ( + url TEXT PRIMARY KEY +) WITHOUT ROWID;", (), )?; Ok(()) } + 18 => { + tx.execute_batch( + " +CREATE TABLE IF NOT EXISTS dismissed_suggestions ( + url TEXT PRIMARY KEY +) WITHOUT ROWID;", + )?; + Ok(()) + } _ => Err(open_database::Error::IncompatibleVersion(version)), } } @@ -212,112 +221,112 @@ mod test { // Snapshot of the v16 schema. We use this to test that we can migrate from there to the // current schema. const V16_SCHEMA: &str = r#" - CREATE TABLE meta( - key TEXT PRIMARY KEY, - value NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE keywords( - keyword TEXT NOT NULL, - suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, - full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL, - rank INTEGER NOT NULL, - PRIMARY KEY (keyword, suggestion_id) - ) WITHOUT ROWID; - - -- full keywords are what we display to the user when a (partial) keyword matches - -- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted. - CREATE TABLE full_keywords( - id INTEGER PRIMARY KEY, - suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, - full_keyword TEXT NOT NULL - ); - - CREATE TABLE prefix_keywords( - keyword_prefix TEXT NOT NULL, - keyword_suffix TEXT NOT NULL DEFAULT '', - confidence INTEGER NOT NULL DEFAULT 0, - rank INTEGER NOT NULL, - suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, - PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id) - ) WITHOUT ROWID; - - CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank); - - CREATE TABLE suggestions( - id INTEGER PRIMARY KEY, - record_id TEXT NOT NULL, - provider INTEGER NOT NULL, - title TEXT NOT NULL, - url TEXT NOT NULL, - score REAL NOT NULL - ); - - CREATE TABLE amp_custom_details( - suggestion_id INTEGER PRIMARY KEY, - advertiser TEXT NOT NULL, - block_id INTEGER NOT NULL, - iab_category TEXT NOT NULL, - impression_url TEXT NOT NULL, - click_url TEXT NOT NULL, - icon_id TEXT NOT NULL, - FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE - ); - - CREATE TABLE wikipedia_custom_details( - suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE, - icon_id TEXT NOT NULL - ); - - CREATE TABLE amo_custom_details( - suggestion_id INTEGER PRIMARY KEY, - description TEXT NOT NULL, - guid TEXT NOT NULL, - icon_url TEXT NOT NULL, - rating TEXT, - number_of_ratings INTEGER NOT NULL, - FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE - ); - - CREATE INDEX suggestions_record_id ON suggestions(record_id); - - CREATE TABLE icons( - id TEXT PRIMARY KEY, - data BLOB NOT NULL, - mimetype TEXT NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE yelp_subjects( - keyword TEXT PRIMARY KEY, - record_id TEXT NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE yelp_modifiers( - type INTEGER NOT NULL, - keyword TEXT NOT NULL, - record_id TEXT NOT NULL, - PRIMARY KEY (type, keyword) - ) WITHOUT ROWID; - - CREATE TABLE yelp_location_signs( - keyword TEXT PRIMARY KEY, - need_location INTEGER NOT NULL, - record_id TEXT NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE yelp_custom_details( - icon_id TEXT PRIMARY KEY, - score REAL NOT NULL, - record_id TEXT NOT NULL - ) WITHOUT ROWID; - - CREATE TABLE mdn_custom_details( - suggestion_id INTEGER PRIMARY KEY, - description TEXT NOT NULL, - FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE - ); - - PRAGMA user_version=16; +CREATE TABLE meta( + key TEXT PRIMARY KEY, + value NOT NULL +) WITHOUT ROWID; + +CREATE TABLE keywords( + keyword TEXT NOT NULL, + suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, + full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL, + rank INTEGER NOT NULL, + PRIMARY KEY (keyword, suggestion_id) +) WITHOUT ROWID; + +-- full keywords are what we display to the user when a (partial) keyword matches +-- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted. +CREATE TABLE full_keywords( + id INTEGER PRIMARY KEY, + suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, + full_keyword TEXT NOT NULL +); + +CREATE TABLE prefix_keywords( + keyword_prefix TEXT NOT NULL, + keyword_suffix TEXT NOT NULL DEFAULT '', + confidence INTEGER NOT NULL DEFAULT 0, + rank INTEGER NOT NULL, + suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, + PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id) +) WITHOUT ROWID; + +CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank); + +CREATE TABLE suggestions( + id INTEGER PRIMARY KEY, + record_id TEXT NOT NULL, + provider INTEGER NOT NULL, + title TEXT NOT NULL, + url TEXT NOT NULL, + score REAL NOT NULL +); + +CREATE TABLE amp_custom_details( + suggestion_id INTEGER PRIMARY KEY, + advertiser TEXT NOT NULL, + block_id INTEGER NOT NULL, + iab_category TEXT NOT NULL, + impression_url TEXT NOT NULL, + click_url TEXT NOT NULL, + icon_id TEXT NOT NULL, + FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE +); + +CREATE TABLE wikipedia_custom_details( + suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE, + icon_id TEXT NOT NULL +); + +CREATE TABLE amo_custom_details( + suggestion_id INTEGER PRIMARY KEY, + description TEXT NOT NULL, + guid TEXT NOT NULL, + icon_url TEXT NOT NULL, + rating TEXT, + number_of_ratings INTEGER NOT NULL, + FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE +); + +CREATE INDEX suggestions_record_id ON suggestions(record_id); + +CREATE TABLE icons( + id TEXT PRIMARY KEY, + data BLOB NOT NULL, + mimetype TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE yelp_subjects( + keyword TEXT PRIMARY KEY, + record_id TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE yelp_modifiers( + type INTEGER NOT NULL, + keyword TEXT NOT NULL, + record_id TEXT NOT NULL, + PRIMARY KEY (type, keyword) +) WITHOUT ROWID; + +CREATE TABLE yelp_location_signs( + keyword TEXT PRIMARY KEY, + need_location INTEGER NOT NULL, + record_id TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE yelp_custom_details( + icon_id TEXT PRIMARY KEY, + score REAL NOT NULL, + record_id TEXT NOT NULL +) WITHOUT ROWID; + +CREATE TABLE mdn_custom_details( + suggestion_id INTEGER PRIMARY KEY, + description TEXT NOT NULL, + FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE +); + +PRAGMA user_version=16; "#; /// Test running all schema upgrades from V16, which was the first schema with a "real" @@ -328,5 +337,6 @@ mod test { fn test_all_upgrades() { let db_file = MigratedDatabaseFile::new(SuggestConnectionInitializer, V16_SCHEMA); db_file.run_all_upgrades(); + db_file.assert_schema_matches_new_database(); } } diff --git a/third_party/rust/suggest/src/store.rs b/third_party/rust/suggest/src/store.rs index c55cffc7f5..19886b22b8 100644 --- a/third_party/rust/suggest/src/store.rs +++ b/third_party/rust/suggest/src/store.rs @@ -13,7 +13,8 @@ use error_support::handle_error; use once_cell::sync::OnceCell; use parking_lot::Mutex; use remote_settings::{ - self, GetItemsOptions, RemoteSettingsConfig, RemoteSettingsRecord, SortOrder, + self, GetItemsOptions, RemoteSettingsConfig, RemoteSettingsRecord, RemoteSettingsServer, + SortOrder, }; use rusqlite::{ types::{FromSql, ToSqlOutput}, @@ -50,6 +51,7 @@ pub struct SuggestStoreBuilder(Mutex); #[derive(Default)] struct SuggestStoreBuilderInner { data_path: Option, + remote_settings_server: Option, remote_settings_config: Option, } @@ -79,6 +81,11 @@ impl SuggestStoreBuilder { self } + pub fn remote_settings_server(self: Arc, server: RemoteSettingsServer) -> Arc { + self.0.lock().remote_settings_server = Some(server); + self + } + #[handle_error(Error)] pub fn build(&self) -> SuggestApiResult> { let inner = self.0.lock(); @@ -86,14 +93,29 @@ impl SuggestStoreBuilder { .data_path .clone() .ok_or_else(|| Error::SuggestStoreBuilder("data_path not specified".to_owned()))?; - let settings_client = - remote_settings::Client::new(inner.remote_settings_config.clone().unwrap_or_else( - || RemoteSettingsConfig { - server_url: None, - bucket_name: None, - collection_name: REMOTE_SETTINGS_COLLECTION.into(), - }, - ))?; + let remote_settings_config = match ( + inner.remote_settings_server.as_ref(), + inner.remote_settings_config.as_ref(), + ) { + (Some(server), None) => RemoteSettingsConfig { + server: Some(server.clone()), + server_url: None, + bucket_name: None, + collection_name: REMOTE_SETTINGS_COLLECTION.into(), + }, + (None, Some(remote_settings_config)) => remote_settings_config.clone(), + (None, None) => RemoteSettingsConfig { + server: None, + server_url: None, + bucket_name: None, + collection_name: REMOTE_SETTINGS_COLLECTION.into(), + }, + (Some(_), Some(_)) => Err(Error::SuggestStoreBuilder( + "can't specify both `remote_settings_server` and `remote_settings_config`" + .to_owned(), + ))?, + }; + let settings_client = remote_settings::Client::new(remote_settings_config)?; Ok(Arc::new(SuggestStore { inner: SuggestStoreInner::new(data_path, settings_client), })) @@ -172,6 +194,7 @@ impl SuggestStore { let settings_client = || -> Result<_> { Ok(remote_settings::Client::new( settings_config.unwrap_or_else(|| RemoteSettingsConfig { + server: None, server_url: None, bucket_name: None, collection_name: REMOTE_SETTINGS_COLLECTION.into(), @@ -252,6 +275,8 @@ pub struct SuggestIngestionConstraints { /// soft limit, and the store might ingest more than requested. pub max_suggestions: Option, pub providers: Option>, + /// Only run ingestion if the table `suggestions` is empty + pub empty_only: bool, } /// The implementation of the store. This is generic over the Remote Settings @@ -334,6 +359,10 @@ where pub fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> { let writer = &self.dbs()?.writer; + if constraints.empty_only && !writer.read(|dao| dao.suggestions_table_empty())? { + return Ok(()); + } + if let Some(unparsable_records) = writer.read(|dao| dao.get_meta::(UNPARSABLE_RECORDS_META_KEY))? { @@ -865,6 +894,12 @@ mod tests { let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + // suggestions_table_empty returns true before the ingestion is complete + assert!(store + .dbs()? + .reader + .read(|dao| dao.suggestions_table_empty())?); + store.ingest(SuggestIngestionConstraints::default())?; store.dbs()?.reader.read(|dao| { @@ -904,6 +939,153 @@ mod tests { Ok(()) })?; + // suggestions_table_empty returns false after the ingestion is complete + assert!(!store + .dbs()? + .reader + .read(|dao| dao.suggestions_table_empty())?); + + Ok(()) + } + + /// Tests ingesting suggestions into an empty database. + #[test] + fn ingest_empty_only() -> anyhow::Result<()> { + before_each(); + + // This ingestion should run, since the DB is empty + let snapshot = Snapshot::with_records(json!([{ + "id": "1234", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"], + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "5678", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )?; + let mut store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + store.ingest(SuggestIngestionConstraints { + empty_only: true, + ..SuggestIngestionConstraints::default() + })?; + + store.dbs()?.reader.read(|dao| { + expect![[r#" + [ + Amp { + title: "Los Pollos Hermanos - Albuquerque", + url: "https://www.lph-nm.biz", + raw_url: "https://www.lph-nm.biz", + icon: None, + icon_mimetype: None, + full_keyword: "los", + block_id: 0, + advertiser: "Los Pollos Hermanos", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "lo".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + + Ok(()) + })?; + + // ingestion should run with SuggestIngestionConstraints::empty_only = true, since the DB + // is empty + store.settings_client = SnapshotSettingsClient::with_snapshot(Snapshot::with_records(json!([{ + "id": "1234", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, { + "id": "12345", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-2.json", + "mimetype": "application/json", + "location": "data-2.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"], + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "5678", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]) + )? + .with_data("data-2.json", json!([{ + "id": 1, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"], + "title": "Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url" + }]), + )?); + store.ingest(SuggestIngestionConstraints { + empty_only: true, + ..SuggestIngestionConstraints::default() + })?; + + store.dbs()?.reader.read(|dao| { + expect![[r#" + [] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "la".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + + Ok(()) + })?; + Ok(()) } @@ -2189,6 +2371,7 @@ mod tests { store.ingest(SuggestIngestionConstraints { max_suggestions: Some(max_suggestions), providers: Some(vec![SuggestionProvider::Amp]), + ..SuggestIngestionConstraints::default() })?; let actual_limit = store .settings_client @@ -5021,10 +5204,10 @@ mod tests { UnparsableRecords( { "clippy-2": UnparsableRecord { - schema_version: 18, + schema_version: 19, }, "fancy-new-suggestions-1": UnparsableRecord { - schema_version: 18, + schema_version: 19, }, }, ), @@ -5093,10 +5276,10 @@ mod tests { UnparsableRecords( { "clippy-2": UnparsableRecord { - schema_version: 18, + schema_version: 19, }, "fancy-new-suggestions-1": UnparsableRecord { - schema_version: 18, + schema_version: 19, }, }, ), @@ -5178,6 +5361,7 @@ mod tests { let constraints = SuggestIngestionConstraints { max_suggestions: Some(100), providers: Some(vec![SuggestionProvider::Amp, SuggestionProvider::Pocket]), + ..SuggestIngestionConstraints::default() }; store.ingest(constraints)?; @@ -5292,10 +5476,10 @@ mod tests { UnparsableRecords( { "clippy-2": UnparsableRecord { - schema_version: 18, + schema_version: 19, }, "fancy-new-suggestions-1": UnparsableRecord { - schema_version: 18, + schema_version: 19, }, }, ), @@ -5381,7 +5565,7 @@ mod tests { UnparsableRecords( { "invalid-attachment": UnparsableRecord { - schema_version: 18, + schema_version: 19, }, }, ), diff --git a/third_party/rust/suggest/src/suggest.udl b/third_party/rust/suggest/src/suggest.udl index 4a4e3fe9a0..0c4781b951 100644 --- a/third_party/rust/suggest/src/suggest.udl +++ b/third_party/rust/suggest/src/suggest.udl @@ -6,6 +6,9 @@ [External="remote_settings"] typedef extern RemoteSettingsConfig; +[External="remote_settings"] +typedef extern RemoteSettingsServer; + namespace suggest { boolean raw_suggestion_url_matches([ByRef] string raw_url, [ByRef] string url); @@ -103,6 +106,14 @@ dictionary SuggestionQuery { dictionary SuggestIngestionConstraints { u64? max_suggestions = null; sequence? providers = null; + // Only ingest if the table `suggestions` is empty. + // + // This is indented to handle periodic updates. Consumers can schedule an ingest with + // `empty_only=true` on startup and a regular ingest with `empty_only=false` to run on a long periodic schedule (maybe + // once a day). This allows ingestion to normally be run at a slow, periodic rate. However, if + // there is a schema upgrade that causes the database to be thrown away, then the + // `empty_only=true` ingestion that runs on startup will repopulate it. + boolean empty_only = false; }; dictionary SuggestGlobalConfig { @@ -154,6 +165,10 @@ interface SuggestStoreBuilder { [Self=ByArc] SuggestStoreBuilder cache_path(string path); + [Self=ByArc] + SuggestStoreBuilder remote_settings_server(RemoteSettingsServer server); + + // Deprecated: Use `remote_settings_server()` instead. [Self=ByArc] SuggestStoreBuilder remote_settings_config(RemoteSettingsConfig config); -- cgit v1.2.3