diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/suggest | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/suggest')
-rw-r--r-- | third_party/rust/suggest/.cargo-checksum.json | 1 | ||||
-rw-r--r-- | third_party/rust/suggest/Cargo.toml | 76 | ||||
-rw-r--r-- | third_party/rust/suggest/README.md | 7 | ||||
-rw-r--r-- | third_party/rust/suggest/build.rs | 7 | ||||
-rw-r--r-- | third_party/rust/suggest/src/config.rs | 31 | ||||
-rw-r--r-- | third_party/rust/suggest/src/db.rs | 1315 | ||||
-rw-r--r-- | third_party/rust/suggest/src/error.rs | 79 | ||||
-rw-r--r-- | third_party/rust/suggest/src/keyword.rs | 102 | ||||
-rw-r--r-- | third_party/rust/suggest/src/lib.rs | 36 | ||||
-rw-r--r-- | third_party/rust/suggest/src/pocket.rs | 59 | ||||
-rw-r--r-- | third_party/rust/suggest/src/provider.rs | 55 | ||||
-rw-r--r-- | third_party/rust/suggest/src/rs.rs | 346 | ||||
-rw-r--r-- | third_party/rust/suggest/src/schema.rs | 153 | ||||
-rw-r--r-- | third_party/rust/suggest/src/store.rs | 5316 | ||||
-rw-r--r-- | third_party/rust/suggest/src/suggest.udl | 151 | ||||
-rw-r--r-- | third_party/rust/suggest/src/suggestion.rs | 250 | ||||
-rw-r--r-- | third_party/rust/suggest/src/yelp.rs | 497 | ||||
-rw-r--r-- | third_party/rust/suggest/uniffi.toml | 10 |
18 files changed, 8491 insertions, 0 deletions
diff --git a/third_party/rust/suggest/.cargo-checksum.json b/third_party/rust/suggest/.cargo-checksum.json new file mode 100644 index 0000000000..c8c5fa2566 --- /dev/null +++ b/third_party/rust/suggest/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.toml":"4aa81cff67e67b08ba3348c1acddaa5aee887df3c35006754c9cda4273a94458","README.md":"8d7457893194e255b87e5a2667ee25c87bd470f5338d7078506f866a67a3fdbd","build.rs":"78780c5cccfe22c3ff4198624b9e188559c437c3e6fa1c8bb66548eee6aa66bf","src/config.rs":"03630b2219b6674e332a1f96f44db74def17f985c850a800299b815fa72241c2","src/db.rs":"d373ad097edac2bbcc6e1b14f51c21b6e2cab2289d27667332798c9cde4dcbef","src/error.rs":"f563210a6c050d98ec85e0f6d9401e7373bfb816e865e8edabbabb23d848ba13","src/keyword.rs":"988d0ab021c0df19cfd3c519df7d37f606bf984cd14d0efca4e5a7aff88344dd","src/lib.rs":"65a035dbfb17e2d2d9f237ad52dc03982ae28c70e3dcf3d96cc9f2d7af79efe3","src/pocket.rs":"c4dda43390d1c39dc795933596b3c1e4e282932cac6c69da53c6e05d39e9ef29","src/provider.rs":"4fe662587efc5a80d000c217ce124506c6800293c50ff460ef95e9e659c764b9","src/rs.rs":"0910368f9e7c4703b00d0de86902d647d70c1f75a256fbeb2126c91f0499a083","src/schema.rs":"8fad4cc624f48946676adbc3de7d061f05fe82531523008f417d6130a2132e34","src/store.rs":"a869971d5593bec2dd40822ba63d0e5a5def96a870ff5a7c33afbcbf5869946b","src/suggest.udl":"d941662596d48793d1570e5b8432b7fd7b4fb1b4550fb38d4e14224fcf4195bc","src/suggestion.rs":"7ee407949f40d88e5d3d4c0da400b987e85ace9f34c648f010cd7f5f2aba0506","src/yelp.rs":"37e77900c12c68cca292a84c6dd6c67d16628c68f4612d8d9bedb1bddf985229","uniffi.toml":"f26317442ddb5b3281245bef6e60ffcb78bb95d29fe4a351a56dbb88d4ec8aab"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/suggest/Cargo.toml b/third_party/rust/suggest/Cargo.toml new file mode 100644 index 0000000000..17ce1af26d --- /dev/null +++ b/third_party/rust/suggest/Cargo.toml @@ -0,0 +1,76 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "suggest" +version = "0.1.0" +exclude = [ + "/android", + "/ios", +] +description = "Manages sponsored and web suggestions for Firefox Suggest" +readme = "README.md" +license = "MPL-2.0" + +[dependencies] +anyhow = "1.0" +chrono = "0.4" +once_cell = "1.5" +parking_lot = ">=0.11,<=0.12" +serde_json = "1" +thiserror = "1" +uniffi = "0.25.2" + +[dependencies.error-support] +path = "../support/error" + +[dependencies.interrupt-support] +path = "../support/interrupt" + +[dependencies.remote_settings] +path = "../remote_settings" + +[dependencies.rusqlite] +version = "0.30.0" +features = [ + "functions", + "bundled", +] + +[dependencies.serde] +version = "1" +features = ["derive"] + +[dependencies.sql-support] +path = "../support/sql" + +[dependencies.url] +version = "2.1" +features = ["serde"] + +[dependencies.viaduct] +path = "../viaduct" + +[dev-dependencies] +expect-test = "1.4" +hex = "0.4" + +[dev-dependencies.env_logger] +version = "0.10" +default-features = false + +[dev-dependencies.rc_crypto] +path = "../support/rc_crypto" + +[build-dependencies.uniffi] +version = "0.25.2" +features = ["build"] diff --git a/third_party/rust/suggest/README.md b/third_party/rust/suggest/README.md new file mode 100644 index 0000000000..74716d2ebb --- /dev/null +++ b/third_party/rust/suggest/README.md @@ -0,0 +1,7 @@ +# Suggest + +The **Suggest Rust component** powers the [Firefox Suggest](https://support.mozilla.org/en-US/kb/firefox-suggest-faq) feature. + +This component currently supports the basic Suggest experience only. The basic experience shows suggestions for sponsored and web content from a canned dataset. The component downloads the dataset from [Remote Settings](https://remote-settings.readthedocs.io/en/latest/), stores the suggestions in a local database, and makes them available to the Firefox address bar. Because matching is done locally, Mozilla never sees the user's query. + +The opt-in "Improved Firefox Suggest Experience", which sends user queries to a [Mozilla-owned proxy server](https://mozilla-services.github.io/merino/intro.html) for server-side matching, is not currently supported. diff --git a/third_party/rust/suggest/build.rs b/third_party/rust/suggest/build.rs new file mode 100644 index 0000000000..5364bad4c1 --- /dev/null +++ b/third_party/rust/suggest/build.rs @@ -0,0 +1,7 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +fn main() { + uniffi::generate_scaffolding("./src/suggest.udl").unwrap(); +} diff --git a/third_party/rust/suggest/src/config.rs b/third_party/rust/suggest/src/config.rs new file mode 100644 index 0000000000..fcb3c2e256 --- /dev/null +++ b/third_party/rust/suggest/src/config.rs @@ -0,0 +1,31 @@ +use serde::{Deserialize, Serialize}; + +use crate::rs::{DownloadedGlobalConfig, DownloadedWeatherData}; + +/// Global Suggest configuration data. +#[derive(Clone, Default, Debug, Deserialize, Serialize)] +pub struct SuggestGlobalConfig { + pub show_less_frequently_cap: i32, +} + +impl From<&DownloadedGlobalConfig> for SuggestGlobalConfig { + fn from(config: &DownloadedGlobalConfig) -> Self { + Self { + show_less_frequently_cap: config.configuration.show_less_frequently_cap, + } + } +} + +/// Per-provider configuration data. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub enum SuggestProviderConfig { + Weather { min_keyword_length: i32 }, +} + +impl From<&DownloadedWeatherData> for SuggestProviderConfig { + fn from(data: &DownloadedWeatherData) -> Self { + Self::Weather { + min_keyword_length: data.weather.min_keyword_length, + } + } +} diff --git a/third_party/rust/suggest/src/db.rs b/third_party/rust/suggest/src/db.rs new file mode 100644 index 0000000000..07fc3ab4a2 --- /dev/null +++ b/third_party/rust/suggest/src/db.rs @@ -0,0 +1,1315 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +use std::{collections::HashSet, path::Path, sync::Arc}; + +use interrupt_support::{SqlInterruptHandle, SqlInterruptScope}; +use parking_lot::Mutex; +use remote_settings::RemoteSettingsRecord; +use rusqlite::{ + named_params, + types::{FromSql, ToSql}, + Connection, OpenFlags, +}; +use sql_support::{open_database::open_database_with_flags, ConnExt}; + +use crate::{ + config::{SuggestGlobalConfig, SuggestProviderConfig}, + keyword::full_keyword, + pocket::{split_keyword, KeywordConfidence}, + provider::SuggestionProvider, + rs::{ + DownloadedAmoSuggestion, DownloadedAmpSuggestion, DownloadedAmpWikipediaSuggestion, + DownloadedMdnSuggestion, DownloadedPocketSuggestion, DownloadedWeatherData, + SuggestRecordId, + }, + schema::{SuggestConnectionInitializer, VERSION}, + store::{UnparsableRecord, UnparsableRecords}, + suggestion::{cook_raw_suggestion_url, AmpSuggestionType, Suggestion}, + Result, SuggestionQuery, +}; + +/// The metadata key whose value is the timestamp of the last record ingested +/// from the Suggest Remote Settings collection. +pub const LAST_INGEST_META_KEY: &str = "last_quicksuggest_ingest"; +/// The metadata key whose value keeps track of records of suggestions +/// that aren't parsable and which schema version it was first seen in. +pub const UNPARSABLE_RECORDS_META_KEY: &str = "unparsable_records"; +/// The metadata key whose value is a JSON string encoding a +/// `SuggestGlobalConfig`, which contains global Suggest configuration data. +pub const GLOBAL_CONFIG_META_KEY: &str = "global_config"; +/// Prefix of metadata keys whose values are JSON strings encoding +/// `SuggestProviderConfig`, which contains per-provider configuration data. The +/// full key is this prefix plus the `SuggestionProvider` value as a u8. +pub const PROVIDER_CONFIG_META_KEY_PREFIX: &str = "provider_config_"; + +// Default value when Suggestion does not have a value for score +pub const DEFAULT_SUGGESTION_SCORE: f64 = 0.2; + +/// The database connection type. +#[derive(Clone, Copy)] +pub(crate) enum ConnectionType { + ReadOnly, + ReadWrite, +} + +impl From<ConnectionType> for OpenFlags { + fn from(type_: ConnectionType) -> Self { + match type_ { + ConnectionType::ReadOnly => { + OpenFlags::SQLITE_OPEN_URI + | OpenFlags::SQLITE_OPEN_NO_MUTEX + | OpenFlags::SQLITE_OPEN_READ_ONLY + } + ConnectionType::ReadWrite => { + OpenFlags::SQLITE_OPEN_URI + | OpenFlags::SQLITE_OPEN_NO_MUTEX + | OpenFlags::SQLITE_OPEN_CREATE + | OpenFlags::SQLITE_OPEN_READ_WRITE + } + } + } +} + +/// A thread-safe wrapper around an SQLite connection to the Suggest database, +/// and its interrupt handle. +pub(crate) struct SuggestDb { + pub conn: Mutex<Connection>, + + /// An object that's used to interrupt an ongoing database operation. + /// + /// When this handle is interrupted, the thread that's currently accessing + /// the database will be told to stop and release the `conn` lock as soon + /// as possible. + pub interrupt_handle: Arc<SqlInterruptHandle>, +} + +impl SuggestDb { + /// Opens a read-only or read-write connection to a Suggest database at the + /// given path. + pub fn open(path: impl AsRef<Path>, type_: ConnectionType) -> Result<Self> { + let conn = open_database_with_flags(path, type_.into(), &SuggestConnectionInitializer)?; + Ok(Self::with_connection(conn)) + } + + fn with_connection(conn: Connection) -> Self { + let interrupt_handle = Arc::new(SqlInterruptHandle::new(&conn)); + Self { + conn: Mutex::new(conn), + interrupt_handle, + } + } + + /// Accesses the Suggest database for reading. + pub fn read<T>(&self, op: impl FnOnce(&SuggestDao) -> Result<T>) -> Result<T> { + let conn = self.conn.lock(); + let scope = self.interrupt_handle.begin_interrupt_scope()?; + let dao = SuggestDao::new(&conn, scope); + op(&dao) + } + + /// Accesses the Suggest database in a transaction for reading and writing. + pub fn write<T>(&self, op: impl FnOnce(&mut SuggestDao) -> Result<T>) -> Result<T> { + let mut conn = self.conn.lock(); + let scope = self.interrupt_handle.begin_interrupt_scope()?; + let tx = conn.transaction()?; + let mut dao = SuggestDao::new(&tx, scope); + let result = op(&mut dao)?; + tx.commit()?; + Ok(result) + } +} + +/// A data access object (DAO) that wraps a connection to the Suggest database +/// with methods for reading and writing suggestions, icons, and metadata. +/// +/// Methods that only read from the database take an immutable reference to +/// `self` (`&self`), and methods that write to the database take a mutable +/// reference (`&mut self`). +pub(crate) struct SuggestDao<'a> { + pub conn: &'a Connection, + pub scope: SqlInterruptScope, +} + +impl<'a> SuggestDao<'a> { + fn new(conn: &'a Connection, scope: SqlInterruptScope) -> Self { + Self { conn, scope } + } + + // =============== High level API =============== + // + // These methods combine several low-level calls into one logical operation. + + pub fn handle_unparsable_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> { + let record_id = SuggestRecordId::from(&record.id); + // Remember this record's ID so that we will try again later + self.put_unparsable_record_id(&record_id)?; + // Advance the last fetch time, so that we can resume + // fetching after this record if we're interrupted. + self.put_last_ingest_if_newer(record.last_modified) + } + + pub fn handle_ingested_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> { + let record_id = SuggestRecordId::from(&record.id); + // Remove this record's ID from the list of unparsable + // records, since we understand it now. + self.drop_unparsable_record_id(&record_id)?; + // Advance the last fetch time, so that we can resume + // fetching after this record if we're interrupted. + self.put_last_ingest_if_newer(record.last_modified) + } + + pub fn handle_deleted_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> { + let record_id = SuggestRecordId::from(&record.id); + // Drop either the icon or suggestions, records only contain one or the other + match record_id.as_icon_id() { + Some(icon_id) => self.drop_icon(icon_id)?, + None => self.drop_suggestions(&record_id)?, + }; + // Remove this record's ID from the list of unparsable + // records, since we understand it now. + self.drop_unparsable_record_id(&record_id)?; + // Advance the last fetch time, so that we can resume + // fetching after this record if we're interrupted. + self.put_last_ingest_if_newer(record.last_modified) + } + + // =============== Low level API =============== + // + // These methods implement CRUD operations + + /// Fetches suggestions that match the given query from the database. + pub fn fetch_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> { + let unique_providers = query.providers.iter().collect::<HashSet<_>>(); + unique_providers + .iter() + .try_fold(vec![], |mut acc, provider| { + let suggestions = match provider { + SuggestionProvider::Amp => { + self.fetch_amp_suggestions(query, AmpSuggestionType::Desktop) + } + SuggestionProvider::AmpMobile => { + self.fetch_amp_suggestions(query, AmpSuggestionType::Mobile) + } + SuggestionProvider::Wikipedia => self.fetch_wikipedia_suggestions(query), + SuggestionProvider::Amo => self.fetch_amo_suggestions(query), + SuggestionProvider::Pocket => self.fetch_pocket_suggestions(query), + SuggestionProvider::Yelp => self.fetch_yelp_suggestions(query), + SuggestionProvider::Mdn => self.fetch_mdn_suggestions(query), + SuggestionProvider::Weather => self.fetch_weather_suggestions(query), + }?; + acc.extend(suggestions); + Ok(acc) + }) + .map(|mut suggestions| { + suggestions.sort(); + if let Some(limit) = query.limit.and_then(|limit| usize::try_from(limit).ok()) { + suggestions.truncate(limit); + } + suggestions + }) + } + + /// Fetches Suggestions of type Amp provider that match the given query + pub fn fetch_amp_suggestions( + &self, + query: &SuggestionQuery, + suggestion_type: AmpSuggestionType, + ) -> Result<Vec<Suggestion>> { + let keyword_lowercased = &query.keyword.to_lowercase(); + let provider = match suggestion_type { + AmpSuggestionType::Mobile => SuggestionProvider::AmpMobile, + AmpSuggestionType::Desktop => SuggestionProvider::Amp, + }; + let suggestions = self.conn.query_rows_and_then_cached( + r#" + SELECT + s.id, + k.rank, + s.title, + s.url, + s.provider, + s.score + FROM + suggestions s + JOIN + keywords k + ON k.suggestion_id = s.id + WHERE + s.provider = :provider + AND k.keyword = :keyword + "#, + named_params! { + ":keyword": keyword_lowercased, + ":provider": provider + }, + |row| -> Result<Suggestion> { + let suggestion_id: i64 = row.get("id")?; + let title = row.get("title")?; + let raw_url = row.get::<_, String>("url")?; + let score = row.get::<_, f64>("score")?; + + let keywords: Vec<String> = self.conn.query_rows_and_then_cached( + r#" + SELECT + keyword + FROM + keywords + WHERE + suggestion_id = :suggestion_id + AND rank >= :rank + ORDER BY + rank ASC + "#, + named_params! { + ":suggestion_id": suggestion_id, + ":rank": row.get::<_, i64>("rank")?, + }, + |row| row.get(0), + )?; + self.conn.query_row_and_then( + r#" + SELECT + amp.advertiser, + amp.block_id, + amp.iab_category, + amp.impression_url, + amp.click_url, + (SELECT i.data FROM icons i WHERE i.id = amp.icon_id) AS icon + FROM + amp_custom_details amp + WHERE + amp.suggestion_id = :suggestion_id + "#, + named_params! { + ":suggestion_id": suggestion_id + }, + |row| { + let cooked_url = cook_raw_suggestion_url(&raw_url); + let raw_click_url = row.get::<_, String>("click_url")?; + let cooked_click_url = cook_raw_suggestion_url(&raw_click_url); + + Ok(Suggestion::Amp { + block_id: row.get("block_id")?, + advertiser: row.get("advertiser")?, + iab_category: row.get("iab_category")?, + title, + url: cooked_url, + raw_url, + full_keyword: full_keyword(keyword_lowercased, &keywords), + icon: row.get("icon")?, + impression_url: row.get("impression_url")?, + click_url: cooked_click_url, + raw_click_url, + score, + }) + }, + ) + }, + )?; + Ok(suggestions) + } + + /// Fetches Suggestions of type Wikipedia provider that match the given query + pub fn fetch_wikipedia_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> { + let keyword_lowercased = &query.keyword.to_lowercase(); + let suggestions = self.conn.query_rows_and_then_cached( + r#" + SELECT + s.id, + k.rank, + s.title, + s.url + FROM + suggestions s + JOIN + keywords k + ON k.suggestion_id = s.id + WHERE + s.provider = :provider + AND k.keyword = :keyword + "#, + named_params! { + ":keyword": keyword_lowercased, + ":provider": SuggestionProvider::Wikipedia + }, + |row| -> Result<Suggestion> { + let suggestion_id: i64 = row.get("id")?; + let title = row.get("title")?; + let raw_url = row.get::<_, String>("url")?; + + let keywords: Vec<String> = self.conn.query_rows_and_then_cached( + "SELECT keyword FROM keywords + WHERE suggestion_id = :suggestion_id AND rank >= :rank + ORDER BY rank ASC", + named_params! { + ":suggestion_id": suggestion_id, + ":rank": row.get::<_, i64>("rank")?, + }, + |row| row.get(0), + )?; + let icon = self.conn.try_query_one( + "SELECT i.data + FROM icons i + JOIN wikipedia_custom_details s ON s.icon_id = i.id + WHERE s.suggestion_id = :suggestion_id", + named_params! { + ":suggestion_id": suggestion_id + }, + true, + )?; + Ok(Suggestion::Wikipedia { + title, + url: raw_url, + full_keyword: full_keyword(keyword_lowercased, &keywords), + icon, + }) + }, + )?; + Ok(suggestions) + } + + /// Fetches Suggestions of type Amo provider that match the given query + pub fn fetch_amo_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> { + let keyword_lowercased = &query.keyword.to_lowercase(); + let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased); + let suggestions_limit = &query.limit.unwrap_or(-1); + let suggestions = self + .conn + .query_rows_and_then_cached( + r#" + SELECT + s.id, + MAX(k.rank) AS rank, + s.title, + s.url, + s.provider, + s.score, + k.keyword_suffix + FROM + suggestions s + JOIN + prefix_keywords k + ON k.suggestion_id = s.id + WHERE + k.keyword_prefix = :keyword_prefix + AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF') + AND s.provider = :provider + GROUP BY + s.id + ORDER BY + s.score DESC, + rank DESC + LIMIT + :suggestions_limit + "#, + named_params! { + ":keyword_prefix": keyword_prefix, + ":keyword_suffix": keyword_suffix, + ":provider": SuggestionProvider::Amo, + ":suggestions_limit": suggestions_limit, + }, + |row| -> Result<Option<Suggestion>> { + let suggestion_id: i64 = row.get("id")?; + let title = row.get("title")?; + let raw_url = row.get::<_, String>("url")?; + let score = row.get::<_, f64>("score")?; + + let full_suffix = row.get::<_, String>("keyword_suffix")?; + full_suffix + .starts_with(keyword_suffix) + .then(|| { + self.conn.query_row_and_then( + r#" + SELECT + amo.description, + amo.guid, + amo.rating, + amo.icon_url, + amo.number_of_ratings + FROM + amo_custom_details amo + WHERE + amo.suggestion_id = :suggestion_id + "#, + named_params! { + ":suggestion_id": suggestion_id + }, + |row| { + Ok(Suggestion::Amo { + title, + url: raw_url, + icon_url: row.get("icon_url")?, + description: row.get("description")?, + rating: row.get("rating")?, + number_of_ratings: row.get("number_of_ratings")?, + guid: row.get("guid")?, + score, + }) + }, + ) + }) + .transpose() + }, + )? + .into_iter() + .flatten() + .collect(); + Ok(suggestions) + } + + /// Fetches Suggestions of type pocket provider that match the given query + pub fn fetch_pocket_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> { + let keyword_lowercased = &query.keyword.to_lowercase(); + let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased); + let suggestions = self + .conn + .query_rows_and_then_cached( + r#" + SELECT + s.id, + MAX(k.rank) AS rank, + s.title, + s.url, + s.provider, + s.score, + k.confidence, + k.keyword_suffix + FROM + suggestions s + JOIN + prefix_keywords k + ON k.suggestion_id = s.id + WHERE + k.keyword_prefix = :keyword_prefix + AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF') + AND s.provider = :provider + GROUP BY + s.id, + k.confidence + ORDER BY + s.score DESC, + rank DESC + "#, + named_params! { + ":keyword_prefix": keyword_prefix, + ":keyword_suffix": keyword_suffix, + ":provider": SuggestionProvider::Pocket, + }, + |row| -> Result<Option<Suggestion>> { + let title = row.get("title")?; + let raw_url = row.get::<_, String>("url")?; + let score = row.get::<_, f64>("score")?; + let confidence = row.get("confidence")?; + let full_suffix = row.get::<_, String>("keyword_suffix")?; + let suffixes_match = match confidence { + KeywordConfidence::Low => full_suffix.starts_with(keyword_suffix), + KeywordConfidence::High => full_suffix == keyword_suffix, + }; + if suffixes_match { + Ok(Some(Suggestion::Pocket { + title, + url: raw_url, + score, + is_top_pick: matches!(confidence, KeywordConfidence::High), + })) + } else { + Ok(None) + } + }, + )? + .into_iter() + .flatten() + .take( + query + .limit + .and_then(|limit| usize::try_from(limit).ok()) + .unwrap_or(usize::MAX), + ) + .collect(); + Ok(suggestions) + } + + /// Fetches suggestions for MDN + pub fn fetch_mdn_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> { + let keyword_lowercased = &query.keyword.to_lowercase(); + let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased); + let suggestions_limit = &query.limit.unwrap_or(-1); + let suggestions = self + .conn + .query_rows_and_then_cached( + r#" + SELECT + s.id, + MAX(k.rank) AS rank, + s.title, + s.url, + s.provider, + s.score, + k.keyword_suffix + FROM + suggestions s + JOIN + prefix_keywords k + ON k.suggestion_id = s.id + WHERE + k.keyword_prefix = :keyword_prefix + AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF') + AND s.provider = :provider + GROUP BY + s.id + ORDER BY + s.score DESC, + rank DESC + LIMIT + :suggestions_limit + "#, + named_params! { + ":keyword_prefix": keyword_prefix, + ":keyword_suffix": keyword_suffix, + ":provider": SuggestionProvider::Mdn, + ":suggestions_limit": suggestions_limit, + }, + |row| -> Result<Option<Suggestion>> { + let suggestion_id: i64 = row.get("id")?; + let title = row.get("title")?; + let raw_url = row.get::<_, String>("url")?; + let score = row.get::<_, f64>("score")?; + + let full_suffix = row.get::<_, String>("keyword_suffix")?; + full_suffix + .starts_with(keyword_suffix) + .then(|| { + self.conn.query_row_and_then( + r#" + SELECT + description + FROM + mdn_custom_details + WHERE + suggestion_id = :suggestion_id + "#, + named_params! { + ":suggestion_id": suggestion_id + }, + |row| { + Ok(Suggestion::Mdn { + title, + url: raw_url, + description: row.get("description")?, + score, + }) + }, + ) + }) + .transpose() + }, + )? + .into_iter() + .flatten() + .collect(); + + Ok(suggestions) + } + + /// Fetches weather suggestions + pub fn fetch_weather_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> { + // Weather keywords are matched by prefix but the query must be at least + // three chars long. Unlike the prefix matching of other suggestion + // types, the query doesn't need to contain the first full word. + if query.keyword.len() < 3 { + return Ok(vec![]); + } + + let keyword_lowercased = &query.keyword.trim().to_lowercase(); + let suggestions = self.conn.query_rows_and_then_cached( + r#" + SELECT + s.score + FROM + suggestions s + JOIN + keywords k + ON k.suggestion_id = s.id + WHERE + s.provider = :provider + AND (k.keyword BETWEEN :keyword AND :keyword || X'FFFF') + "#, + named_params! { + ":keyword": keyword_lowercased, + ":provider": SuggestionProvider::Weather + }, + |row| -> Result<Suggestion> { + Ok(Suggestion::Weather { + score: row.get::<_, f64>("score")?, + }) + }, + )?; + Ok(suggestions) + } + + /// Inserts all suggestions from a downloaded AMO attachment into + /// the database. + pub fn insert_amo_suggestions( + &mut self, + record_id: &SuggestRecordId, + suggestions: &[DownloadedAmoSuggestion], + ) -> Result<()> { + for suggestion in suggestions { + self.scope.err_if_interrupted()?; + let suggestion_id: i64 = self.conn.query_row_and_then_cachable( + &format!( + "INSERT INTO suggestions( + record_id, + provider, + title, + url, + score + ) + VALUES( + :record_id, + {}, + :title, + :url, + :score + ) + RETURNING id", + SuggestionProvider::Amo as u8 + ), + named_params! { + ":record_id": record_id.as_str(), + ":title": suggestion.title, + ":url": suggestion.url, + ":score": suggestion.score, + }, + |row| row.get(0), + true, + )?; + self.conn.execute( + "INSERT INTO amo_custom_details( + suggestion_id, + description, + guid, + icon_url, + rating, + number_of_ratings + ) + VALUES( + :suggestion_id, + :description, + :guid, + :icon_url, + :rating, + :number_of_ratings + )", + named_params! { + ":suggestion_id": suggestion_id, + ":description": suggestion.description, + ":guid": suggestion.guid, + ":icon_url": suggestion.icon_url, + ":rating": suggestion.rating, + ":number_of_ratings": suggestion.number_of_ratings + }, + )?; + for (index, keyword) in suggestion.keywords.iter().enumerate() { + let (keyword_prefix, keyword_suffix) = split_keyword(keyword); + self.conn.execute( + "INSERT INTO prefix_keywords( + keyword_prefix, + keyword_suffix, + suggestion_id, + rank + ) + VALUES( + :keyword_prefix, + :keyword_suffix, + :suggestion_id, + :rank + )", + named_params! { + ":keyword_prefix": keyword_prefix, + ":keyword_suffix": keyword_suffix, + ":rank": index, + ":suggestion_id": suggestion_id, + }, + )?; + } + } + Ok(()) + } + + /// Inserts all suggestions from a downloaded AMP-Wikipedia attachment into + /// the database. + pub fn insert_amp_wikipedia_suggestions( + &mut self, + record_id: &SuggestRecordId, + suggestions: &[DownloadedAmpWikipediaSuggestion], + ) -> Result<()> { + for suggestion in suggestions { + self.scope.err_if_interrupted()?; + let common_details = suggestion.common_details(); + let provider = suggestion.provider(); + + let suggestion_id: i64 = self.conn.query_row_and_then_cachable( + &format!( + "INSERT INTO suggestions( + record_id, + provider, + title, + url, + score + ) + VALUES( + :record_id, + {}, + :title, + :url, + :score + ) + RETURNING id", + provider as u8 + ), + named_params! { + ":record_id": record_id.as_str(), + ":title": common_details.title, + ":url": common_details.url, + ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE) + }, + |row| row.get(0), + true, + )?; + match suggestion { + DownloadedAmpWikipediaSuggestion::Amp(amp) => { + self.conn.execute( + "INSERT INTO amp_custom_details( + suggestion_id, + advertiser, + block_id, + iab_category, + impression_url, + click_url, + icon_id + ) + VALUES( + :suggestion_id, + :advertiser, + :block_id, + :iab_category, + :impression_url, + :click_url, + :icon_id + )", + named_params! { + ":suggestion_id": suggestion_id, + ":advertiser": amp.advertiser, + ":block_id": amp.block_id, + ":iab_category": amp.iab_category, + ":impression_url": amp.impression_url, + ":click_url": amp.click_url, + ":icon_id": amp.icon_id, + }, + )?; + } + DownloadedAmpWikipediaSuggestion::Wikipedia(wikipedia) => { + self.conn.execute( + "INSERT INTO wikipedia_custom_details( + suggestion_id, + icon_id + ) + VALUES( + :suggestion_id, + :icon_id + )", + named_params! { + ":suggestion_id": suggestion_id, + ":icon_id": wikipedia.icon_id, + }, + )?; + } + } + for (index, keyword) in common_details.keywords.iter().enumerate() { + self.conn.execute( + "INSERT INTO keywords( + keyword, + suggestion_id, + rank + ) + VALUES( + :keyword, + :suggestion_id, + :rank + )", + named_params! { + ":keyword": keyword, + ":rank": index, + ":suggestion_id": suggestion_id, + }, + )?; + } + } + Ok(()) + } + + /// Inserts all suggestions from a downloaded AMP-Mobile attachment into + /// the database. + pub fn insert_amp_mobile_suggestions( + &mut self, + record_id: &SuggestRecordId, + suggestions: &[DownloadedAmpSuggestion], + ) -> Result<()> { + for suggestion in suggestions { + self.scope.err_if_interrupted()?; + let common_details = &suggestion.common_details; + let suggestion_id: i64 = self.conn.query_row_and_then_cachable( + &format!( + "INSERT INTO suggestions( + record_id, + provider, + title, + url, + score + ) + VALUES( + :record_id, + {}, + :title, + :url, + :score + ) + RETURNING id", + SuggestionProvider::AmpMobile as u8 + ), + named_params! { + ":record_id": record_id.as_str(), + ":title": common_details.title, + ":url": common_details.url, + ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE) + }, + |row| row.get(0), + true, + )?; + self.conn.execute( + "INSERT INTO amp_custom_details( + suggestion_id, + advertiser, + block_id, + iab_category, + impression_url, + click_url, + icon_id + ) + VALUES( + :suggestion_id, + :advertiser, + :block_id, + :iab_category, + :impression_url, + :click_url, + :icon_id + )", + named_params! { + ":suggestion_id": suggestion_id, + ":advertiser": suggestion.advertiser, + ":block_id": suggestion.block_id, + ":iab_category": suggestion.iab_category, + ":impression_url": suggestion.impression_url, + ":click_url": suggestion.click_url, + ":icon_id": suggestion.icon_id, + }, + )?; + + for (index, keyword) in common_details.keywords.iter().enumerate() { + self.conn.execute( + "INSERT INTO keywords( + keyword, + suggestion_id, + rank + ) + VALUES( + :keyword, + :suggestion_id, + :rank + )", + named_params! { + ":keyword": keyword, + ":rank": index, + ":suggestion_id": suggestion_id, + }, + )?; + } + } + Ok(()) + } + + /// Inserts all suggestions from a downloaded Pocket attachment into + /// the database. + pub fn insert_pocket_suggestions( + &mut self, + record_id: &SuggestRecordId, + suggestions: &[DownloadedPocketSuggestion], + ) -> Result<()> { + for suggestion in suggestions { + self.scope.err_if_interrupted()?; + let suggestion_id: i64 = self.conn.query_row_and_then_cachable( + &format!( + "INSERT INTO suggestions( + record_id, + provider, + title, + url, + score + ) + VALUES( + :record_id, + {}, + :title, + :url, + :score + ) + RETURNING id", + SuggestionProvider::Pocket as u8 + ), + named_params! { + ":record_id": record_id.as_str(), + ":title": suggestion.title, + ":url": suggestion.url, + ":score": suggestion.score, + }, + |row| row.get(0), + true, + )?; + + for ((rank, keyword), confidence) in suggestion + .high_confidence_keywords + .iter() + .enumerate() + .zip(std::iter::repeat(KeywordConfidence::High)) + .chain( + suggestion + .low_confidence_keywords + .iter() + .enumerate() + .zip(std::iter::repeat(KeywordConfidence::Low)), + ) + { + let (keyword_prefix, keyword_suffix) = split_keyword(keyword); + self.conn.execute( + "INSERT INTO prefix_keywords( + keyword_prefix, + keyword_suffix, + confidence, + rank, + suggestion_id + ) + VALUES( + :keyword_prefix, + :keyword_suffix, + :confidence, + :rank, + :suggestion_id + )", + named_params! { + ":keyword_prefix": keyword_prefix, + ":keyword_suffix": keyword_suffix, + ":confidence": confidence, + ":rank": rank, + ":suggestion_id": suggestion_id, + }, + )?; + } + } + Ok(()) + } + + /// Inserts all suggestions from a downloaded MDN attachment into + /// the database. + pub fn insert_mdn_suggestions( + &mut self, + record_id: &SuggestRecordId, + suggestions: &[DownloadedMdnSuggestion], + ) -> Result<()> { + for suggestion in suggestions { + self.scope.err_if_interrupted()?; + let suggestion_id: i64 = self.conn.query_row_and_then_cachable( + &format!( + "INSERT INTO suggestions( + record_id, + provider, + title, + url, + score + ) + VALUES( + :record_id, + {}, + :title, + :url, + :score + ) + RETURNING id", + SuggestionProvider::Mdn as u8 + ), + named_params! { + ":record_id": record_id.as_str(), + ":title": suggestion.title, + ":url": suggestion.url, + ":score": suggestion.score, + }, + |row| row.get(0), + true, + )?; + self.conn.execute_cached( + "INSERT INTO mdn_custom_details( + suggestion_id, + description + ) + VALUES( + :suggestion_id, + :description + )", + named_params! { + ":suggestion_id": suggestion_id, + ":description": suggestion.description, + }, + )?; + for (index, keyword) in suggestion.keywords.iter().enumerate() { + let (keyword_prefix, keyword_suffix) = split_keyword(keyword); + self.conn.execute_cached( + "INSERT INTO prefix_keywords( + keyword_prefix, + keyword_suffix, + suggestion_id, + rank + ) + VALUES( + :keyword_prefix, + :keyword_suffix, + :suggestion_id, + :rank + )", + named_params! { + ":keyword_prefix": keyword_prefix, + ":keyword_suffix": keyword_suffix, + ":rank": index, + ":suggestion_id": suggestion_id, + }, + )?; + } + } + Ok(()) + } + + /// Inserts weather record data into the database. + pub fn insert_weather_data( + &mut self, + record_id: &SuggestRecordId, + data: &DownloadedWeatherData, + ) -> Result<()> { + self.scope.err_if_interrupted()?; + let suggestion_id: i64 = self.conn.query_row_and_then_cachable( + &format!( + "INSERT INTO suggestions(record_id, provider, title, url, score) + VALUES(:record_id, {}, '', '', :score) + RETURNING id", + SuggestionProvider::Weather as u8 + ), + named_params! { + ":record_id": record_id.as_str(), + ":score": data.weather.score.unwrap_or(DEFAULT_SUGGESTION_SCORE), + }, + |row| row.get(0), + true, + )?; + for (index, keyword) in data.weather.keywords.iter().enumerate() { + self.conn.execute( + "INSERT INTO keywords(keyword, suggestion_id, rank) + VALUES(:keyword, :suggestion_id, :rank)", + named_params! { + ":keyword": keyword, + ":suggestion_id": suggestion_id, + ":rank": index, + }, + )?; + } + self.put_provider_config( + SuggestionProvider::Weather, + &SuggestProviderConfig::from(data), + )?; + Ok(()) + } + + /// Inserts or replaces an icon for a suggestion into the database. + pub fn put_icon(&mut self, icon_id: &str, data: &[u8]) -> Result<()> { + self.conn.execute( + "INSERT OR REPLACE INTO icons( + id, + data + ) + VALUES( + :id, + :data + )", + named_params! { + ":id": icon_id, + ":data": data, + }, + )?; + Ok(()) + } + + /// Deletes all suggestions associated with a Remote Settings record from + /// the database. + pub fn drop_suggestions(&mut self, record_id: &SuggestRecordId) -> Result<()> { + self.conn.execute_cached( + "DELETE FROM suggestions WHERE record_id = :record_id", + named_params! { ":record_id": record_id.as_str() }, + )?; + self.conn.execute_cached( + "DELETE FROM yelp_subjects WHERE record_id = :record_id", + named_params! { ":record_id": record_id.as_str() }, + )?; + self.conn.execute_cached( + "DELETE FROM yelp_modifiers WHERE record_id = :record_id", + named_params! { ":record_id": record_id.as_str() }, + )?; + self.conn.execute_cached( + "DELETE FROM yelp_location_signs WHERE record_id = :record_id", + named_params! { ":record_id": record_id.as_str() }, + )?; + self.conn.execute_cached( + "DELETE FROM yelp_custom_details WHERE record_id = :record_id", + named_params! { ":record_id": record_id.as_str() }, + )?; + Ok(()) + } + + /// Deletes an icon for a suggestion from the database. + pub fn drop_icon(&mut self, icon_id: &str) -> Result<()> { + self.conn.execute_cached( + "DELETE FROM icons WHERE id = :id", + named_params! { ":id": icon_id }, + )?; + Ok(()) + } + + /// Clears the database, removing all suggestions, icons, and metadata. + pub fn clear(&mut self) -> Result<()> { + self.conn.execute_batch( + "DELETE FROM suggestions; + DELETE FROM icons; + DELETE FROM meta;", + )?; + Ok(()) + } + + /// Returns the value associated with a metadata key. + pub fn get_meta<T: FromSql>(&self, key: &str) -> Result<Option<T>> { + Ok(self.conn.try_query_one( + "SELECT value FROM meta WHERE key = :key", + named_params! { ":key": key }, + true, + )?) + } + + /// Sets the value for a metadata key. + pub fn put_meta(&mut self, key: &str, value: impl ToSql) -> Result<()> { + self.conn.execute_cached( + "INSERT OR REPLACE INTO meta(key, value) VALUES(:key, :value)", + named_params! { ":key": key, ":value": value }, + )?; + Ok(()) + } + + /// Updates the last ingest timestamp if the given last modified time is + /// newer than the existing one recorded. + pub fn put_last_ingest_if_newer(&mut self, record_last_modified: u64) -> Result<()> { + let last_ingest = self + .get_meta::<u64>(LAST_INGEST_META_KEY)? + .unwrap_or_default(); + if record_last_modified > last_ingest { + self.put_meta(LAST_INGEST_META_KEY, record_last_modified)?; + } + + Ok(()) + } + + /// Adds an entry for a Suggest Remote Settings record to the list of + /// unparsable records. + /// + /// This is used to note records that we don't understand how to parse and + /// ingest yet. + pub fn put_unparsable_record_id(&mut self, record_id: &SuggestRecordId) -> Result<()> { + let mut unparsable_records = self + .get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)? + .unwrap_or_default(); + unparsable_records.0.insert( + record_id.as_str().to_string(), + UnparsableRecord { + schema_version: VERSION, + }, + ); + self.put_meta(UNPARSABLE_RECORDS_META_KEY, unparsable_records)?; + Ok(()) + } + + /// Removes an entry for a Suggest Remote Settings record from the list of + /// unparsable records. Does nothing if the record was not previously marked + /// as unparsable. + /// + /// This indicates that we now understand how to parse and ingest the + /// record, or that the record was deleted. + pub fn drop_unparsable_record_id(&mut self, record_id: &SuggestRecordId) -> Result<()> { + let Some(mut unparsable_records) = + self.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)? + else { + return Ok(()); + }; + if unparsable_records.0.remove(record_id.as_str()).is_none() { + return Ok(()); + }; + self.put_meta(UNPARSABLE_RECORDS_META_KEY, unparsable_records) + } + + /// Stores global Suggest configuration data. + pub fn put_global_config(&mut self, config: &SuggestGlobalConfig) -> Result<()> { + self.put_meta(GLOBAL_CONFIG_META_KEY, serde_json::to_string(config)?) + } + + /// Gets the stored global Suggest configuration data or a default config if + /// none is stored. + pub fn get_global_config(&self) -> Result<SuggestGlobalConfig> { + self.get_meta::<String>(GLOBAL_CONFIG_META_KEY)? + .map_or_else( + || Ok(SuggestGlobalConfig::default()), + |json| Ok(serde_json::from_str(&json)?), + ) + } + + /// Stores configuration data for a given provider. + pub fn put_provider_config( + &mut self, + provider: SuggestionProvider, + config: &SuggestProviderConfig, + ) -> Result<()> { + self.put_meta( + &provider_config_meta_key(provider), + serde_json::to_string(config)?, + ) + } + + /// Gets the stored configuration data for a given provider or None if none + /// is stored. + pub fn get_provider_config( + &self, + provider: SuggestionProvider, + ) -> Result<Option<SuggestProviderConfig>> { + self.get_meta::<String>(&provider_config_meta_key(provider))? + .map_or_else(|| Ok(None), |json| Ok(serde_json::from_str(&json)?)) + } +} + +fn provider_config_meta_key(provider: SuggestionProvider) -> String { + format!("{}{}", PROVIDER_CONFIG_META_KEY_PREFIX, provider as u8) +} diff --git a/third_party/rust/suggest/src/error.rs b/third_party/rust/suggest/src/error.rs new file mode 100644 index 0000000000..cd07c3591c --- /dev/null +++ b/third_party/rust/suggest/src/error.rs @@ -0,0 +1,79 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +use error_support::{ErrorHandling, GetErrorHandling}; +use remote_settings::RemoteSettingsError; + +/// A list of errors that are internal to the component. This is the error +/// type for private and crate-internal methods, and is never returned to the +/// application. +#[derive(Debug, thiserror::Error)] +pub(crate) enum Error { + #[error("Error opening database: {0}")] + OpenDatabase(#[from] sql_support::open_database::Error), + + #[error("Error executing SQL: {0}")] + Sql(#[from] rusqlite::Error), + + #[error("JSON error: {0}")] + Json(#[from] serde_json::Error), + + #[error("Error from Remote Settings: {0}")] + RemoteSettings(#[from] RemoteSettingsError), + + #[error("Operation interrupted")] + Interrupted(#[from] interrupt_support::Interrupted), + + #[error("SuggestStoreBuilder {0}")] + SuggestStoreBuilder(String), +} + +/// The error type for all Suggest component operations. These errors are +/// exposed to your application, which should handle them as needed. +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum SuggestApiError { + #[error("Network error: {reason}")] + Network { reason: String }, + // The server requested a backoff after too many requests + #[error("Backoff")] + Backoff { seconds: u64 }, + // The application interrupted a request + #[error("Interrupted")] + Interrupted, + #[error("Other error: {reason}")] + Other { reason: String }, +} + +// Define how our internal errors are handled and converted to external errors +// See `support/error/README.md` for how this works, especially the warning about PII. +impl GetErrorHandling for Error { + type ExternalError = SuggestApiError; + + fn get_error_handling(&self) -> ErrorHandling<Self::ExternalError> { + match self { + // Do nothing for interrupted errors, this is just normal operation. + Self::Interrupted(_) => ErrorHandling::convert(SuggestApiError::Interrupted), + // Network errors are expected to happen in practice. Let's log, but not report them. + Self::RemoteSettings(RemoteSettingsError::RequestError( + viaduct::Error::NetworkError(e), + )) => ErrorHandling::convert(SuggestApiError::Network { + reason: e.to_string(), + }) + .log_warning(), + // Backoff error shouldn't happen in practice, so let's report them for now. + // If these do happen in practice and we decide that there is a valid reason for them, + // then consider switching from reporting to Sentry to counting in Glean. + Self::RemoteSettings(RemoteSettingsError::BackoffError(seconds)) => { + ErrorHandling::convert(SuggestApiError::Backoff { seconds: *seconds }) + .report_error("suggest-backoff") + } + _ => ErrorHandling::convert(SuggestApiError::Other { + reason: self.to_string(), + }) + .report_error("suggest-unexpected"), + } + } +} diff --git a/third_party/rust/suggest/src/keyword.rs b/third_party/rust/suggest/src/keyword.rs new file mode 100644 index 0000000000..d15688d016 --- /dev/null +++ b/third_party/rust/suggest/src/keyword.rs @@ -0,0 +1,102 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +/// Given a list of keywords for a suggestion, returns a phrase that best +/// completes the user's query. This function uses two heuristics to pick the +/// best match: +/// +/// 1. Find the first keyword in the list that has at least one more word than +/// the query, then trim the keyword up to the end of that word. +/// 2. If there isn't a keyword with more words, pick the keyword that forms the +/// longest suffix of the query. This might be the query itself. +pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String { + let query_words_len = query.split_whitespace().count(); + let min_phrase_words_len = if query.ends_with(char::is_whitespace) { + // If the query ends with a space, find a keyword with at least one more + // word, so that the completed phrase can show a word after the space. + query_words_len + 1 + } else { + query_words_len + }; + keywords + .iter() + .map(AsRef::as_ref) + .filter(|phrase| phrase.starts_with(query)) + .map(|phrase| phrase.split_whitespace().collect::<Vec<_>>()) + .find(|phrase_words| phrase_words.len() > min_phrase_words_len) + .map(|phrase_words| phrase_words[..min_phrase_words_len].join(" ")) + .unwrap_or_else(|| { + keywords + .iter() + .map(AsRef::as_ref) + .filter(|phrase| phrase.starts_with(query) && query.len() < phrase.len()) + .max_by_key(|phrase| phrase.trim().len()) + .unwrap_or(query) + .to_owned() + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn keywords_with_more_words() { + assert_eq!( + full_keyword( + "moz", + &[ + "moz", + "mozi", + "mozil", + "mozill", + "mozilla", + "mozilla firefox" + ] + ), + "mozilla".to_owned(), + ); + assert_eq!( + full_keyword( + "mozilla", + &[ + "moz", + "mozi", + "mozil", + "mozill", + "mozilla", + "mozilla firefox" + ] + ), + "mozilla".to_owned(), + ); + } + + #[test] + fn keywords_with_longer_phrase() { + assert_eq!( + full_keyword("moz", &["moz", "mozi", "mozil", "mozill", "mozilla"]), + "mozilla".to_owned() + ); + assert_eq!( + full_keyword( + "mozilla f", + &["moz", "mozi", "mozil", "mozill", "mozilla firefox"] + ), + "mozilla firefox".to_owned() + ); + } + + #[test] + fn query_ends_with_space() { + assert_eq!( + full_keyword( + "mozilla ", + &["moz", "mozi", "mozil", "mozill", "mozilla firefox"] + ), + "mozilla firefox".to_owned() + ); + } +} diff --git a/third_party/rust/suggest/src/lib.rs b/third_party/rust/suggest/src/lib.rs new file mode 100644 index 0000000000..23775b7dec --- /dev/null +++ b/third_party/rust/suggest/src/lib.rs @@ -0,0 +1,36 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +use remote_settings::RemoteSettingsConfig; +mod config; +mod db; +mod error; +mod keyword; +pub mod pocket; +mod provider; +mod rs; +mod schema; +mod store; +mod suggestion; +mod yelp; + +pub use config::{SuggestGlobalConfig, SuggestProviderConfig}; +pub use error::SuggestApiError; +pub use provider::SuggestionProvider; +pub use store::{SuggestIngestionConstraints, SuggestStore, SuggestStoreBuilder}; +pub use suggestion::{raw_suggestion_url_matches, Suggestion}; + +pub(crate) type Result<T> = std::result::Result<T, error::Error>; +pub type SuggestApiResult<T> = std::result::Result<T, error::SuggestApiError>; + +/// A query for suggestions to show in the address bar. +#[derive(Debug, Default)] +pub struct SuggestionQuery { + pub keyword: String, + pub providers: Vec<SuggestionProvider>, + pub limit: Option<i32>, +} + +uniffi::include_scaffolding!("suggest"); diff --git a/third_party/rust/suggest/src/pocket.rs b/third_party/rust/suggest/src/pocket.rs new file mode 100644 index 0000000000..cf7070c62a --- /dev/null +++ b/third_party/rust/suggest/src/pocket.rs @@ -0,0 +1,59 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +use rusqlite::types::{FromSql, FromSqlError, FromSqlResult, ToSqlOutput, ValueRef}; +use rusqlite::{Result as RusqliteResult, ToSql}; + +/// Classification of Pocket confidence keywords, where High Confidence +/// require an exact match to keyword prefix and suffix. +/// While Low Confidence, requires a match on prefix and be a +/// substring for the suffix. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +#[repr(u8)] +pub enum KeywordConfidence { + Low = 0, + High = 1, +} + +impl FromSql for KeywordConfidence { + fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> { + let v = value.as_i64()?; + u8::try_from(v) + .ok() + .and_then(KeywordConfidence::from_u8) + .ok_or_else(|| FromSqlError::OutOfRange(v)) + } +} + +impl KeywordConfidence { + #[inline] + pub(crate) fn from_u8(v: u8) -> Option<Self> { + match v { + 0 => Some(KeywordConfidence::Low), + 1 => Some(KeywordConfidence::High), + _ => None, + } + } +} + +impl ToSql for KeywordConfidence { + fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> { + Ok(ToSqlOutput::from(*self as u8)) + } +} + +/// Split the keyword by the first whitespace into the prefix and the suffix. +/// Return an empty string as the suffix if there is no whitespace. +/// +/// # Examples +/// +/// ``` +/// # use suggest::pocket::split_keyword; +/// assert_eq!(split_keyword("foo"), ("foo", "")); +/// assert_eq!(split_keyword("foo bar baz"), ("foo", "bar baz")); +/// ``` +pub fn split_keyword(keyword: &str) -> (&str, &str) { + keyword.split_once(' ').unwrap_or((keyword, "")) +} diff --git a/third_party/rust/suggest/src/provider.rs b/third_party/rust/suggest/src/provider.rs new file mode 100644 index 0000000000..1449c35c8a --- /dev/null +++ b/third_party/rust/suggest/src/provider.rs @@ -0,0 +1,55 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +use rusqlite::{ + types::{FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef}, + Result as RusqliteResult, +}; + +/// A provider is a source of search suggestions. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +#[repr(u8)] +pub enum SuggestionProvider { + Amp = 1, + Wikipedia = 2, + Amo = 3, + Pocket = 4, + Yelp = 5, + Mdn = 6, + Weather = 7, + AmpMobile = 8, +} + +impl FromSql for SuggestionProvider { + fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> { + let v = value.as_i64()?; + u8::try_from(v) + .ok() + .and_then(SuggestionProvider::from_u8) + .ok_or_else(|| FromSqlError::OutOfRange(v)) + } +} + +impl SuggestionProvider { + #[inline] + pub(crate) fn from_u8(v: u8) -> Option<Self> { + match v { + 1 => Some(SuggestionProvider::Amp), + 2 => Some(SuggestionProvider::Wikipedia), + 3 => Some(SuggestionProvider::Amo), + 4 => Some(SuggestionProvider::Pocket), + 5 => Some(SuggestionProvider::Yelp), + 6 => Some(SuggestionProvider::Mdn), + 7 => Some(SuggestionProvider::Weather), + _ => None, + } + } +} + +impl ToSql for SuggestionProvider { + fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> { + Ok(ToSqlOutput::from(*self as u8)) + } +} diff --git a/third_party/rust/suggest/src/rs.rs b/third_party/rust/suggest/src/rs.rs new file mode 100644 index 0000000000..198a8c43f6 --- /dev/null +++ b/third_party/rust/suggest/src/rs.rs @@ -0,0 +1,346 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +//! Crate-internal types for interacting with Remote Settings (`rs`). Types in +//! this module describe records and attachments in the Suggest Remote Settings +//! collection. +//! +//! To add a new suggestion `T` to this component, you'll generally need to: +//! +//! 1. Add a variant named `T` to [`SuggestRecord`]. The variant must have a +//! `#[serde(rename)]` attribute that matches the suggestion record's +//! `type` field. +//! 2. Define a `DownloadedTSuggestion` type with the new suggestion's fields, +//! matching their attachment's schema. Your new type must derive or +//! implement [`serde::Deserialize`]. +//! 3. Update the database schema in the [`schema`] module to store the new +//! suggestion. +//! 4. Add an `insert_t_suggestions()` method to [`db::SuggestDao`] that +//! inserts `DownloadedTSuggestion`s into the database. +//! 5. Update [`store::SuggestStoreInner::ingest()`] to download, deserialize, +//! and store the new suggestion. +//! 6. Add a variant named `T` to [`suggestion::Suggestion`], with the fields +//! that you'd like to expose to the application. These can be the same +//! fields as `DownloadedTSuggestion`, or slightly different, depending on +//! what the application needs to show the suggestion. +//! 7. Update the `Suggestion` enum definition in `suggest.udl` to match your +//! new [`suggestion::Suggestion`] variant. +//! 8. Update any [`db::SuggestDao`] methods that query the database to include +//! the new suggestion in their results, and return `Suggestion::T` variants +//! as needed. + +use std::borrow::Cow; + +use remote_settings::{GetItemsOptions, RemoteSettingsResponse}; +use serde::{Deserialize, Deserializer}; + +use crate::{provider::SuggestionProvider, Result}; + +/// The Suggest Remote Settings collection name. +pub(crate) const REMOTE_SETTINGS_COLLECTION: &str = "quicksuggest"; + +/// The maximum number of suggestions in a Suggest record's attachment. +/// +/// This should be the same as the `BUCKET_SIZE` constant in the +/// `mozilla-services/quicksuggest-rs` repo. +pub(crate) const SUGGESTIONS_PER_ATTACHMENT: u64 = 200; + +/// A trait for a client that downloads suggestions from Remote Settings. +/// +/// This trait lets tests use a mock client. +pub(crate) trait SuggestRemoteSettingsClient { + /// Fetches records from the Suggest Remote Settings collection. + fn get_records_with_options(&self, options: &GetItemsOptions) + -> Result<RemoteSettingsResponse>; + + /// Fetches a record's attachment from the Suggest Remote Settings + /// collection. + fn get_attachment(&self, location: &str) -> Result<Vec<u8>>; +} + +impl SuggestRemoteSettingsClient for remote_settings::Client { + fn get_records_with_options( + &self, + options: &GetItemsOptions, + ) -> Result<RemoteSettingsResponse> { + Ok(remote_settings::Client::get_records_with_options( + self, options, + )?) + } + + fn get_attachment(&self, location: &str) -> Result<Vec<u8>> { + Ok(remote_settings::Client::get_attachment(self, location)?) + } +} + +/// A record in the Suggest Remote Settings collection. +/// +/// Except for the type, Suggest records don't carry additional fields. All +/// suggestions are stored in each record's attachment. +#[derive(Clone, Debug, Deserialize)] +#[serde(tag = "type")] +pub(crate) enum SuggestRecord { + #[serde(rename = "icon")] + Icon, + #[serde(rename = "data")] + AmpWikipedia, + #[serde(rename = "amo-suggestions")] + Amo, + #[serde(rename = "pocket-suggestions")] + Pocket, + #[serde(rename = "yelp-suggestions")] + Yelp, + #[serde(rename = "mdn-suggestions")] + Mdn, + #[serde(rename = "weather")] + Weather(DownloadedWeatherData), + #[serde(rename = "configuration")] + GlobalConfig(DownloadedGlobalConfig), + #[serde(rename = "amp-mobile-suggestions")] + AmpMobile, +} + +/// Represents either a single value, or a list of values. This is used to +/// deserialize downloaded attachments. +#[derive(Clone, Debug, Deserialize)] +#[serde(untagged)] +enum OneOrMany<T> { + One(T), + Many(Vec<T>), +} + +/// A downloaded Remote Settings attachment that contains suggestions. +#[derive(Clone, Debug, Deserialize)] +#[serde(transparent)] +pub(crate) struct SuggestAttachment<T>(OneOrMany<T>); + +impl<T> SuggestAttachment<T> { + /// Returns a slice of suggestions to ingest from the downloaded attachment. + pub fn suggestions(&self) -> &[T] { + match &self.0 { + OneOrMany::One(value) => std::slice::from_ref(value), + OneOrMany::Many(values) => values, + } + } +} + +/// The ID of a record in the Suggest Remote Settings collection. +#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[serde(transparent)] +pub(crate) struct SuggestRecordId<'a>(Cow<'a, str>); + +impl<'a> SuggestRecordId<'a> { + pub fn as_str(&self) -> &str { + &self.0 + } + + /// If this ID is for an icon record, extracts and returns the icon ID. + /// + /// The icon ID is the primary key for an ingested icon. Downloaded + /// suggestions also reference these icon IDs, in + /// [`DownloadedSuggestion::icon_id`]. + pub fn as_icon_id(&self) -> Option<&str> { + self.0.strip_prefix("icon-") + } +} + +impl<'a, T> From<T> for SuggestRecordId<'a> +where + T: Into<Cow<'a, str>>, +{ + fn from(value: T) -> Self { + Self(value.into()) + } +} + +/// Fields that are common to all downloaded suggestions. +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedSuggestionCommonDetails { + pub keywords: Vec<String>, + pub title: String, + pub url: String, + pub score: Option<f64>, +} + +/// An AMP suggestion to ingest from an AMP-Wikipedia attachment. +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedAmpSuggestion { + #[serde(flatten)] + pub common_details: DownloadedSuggestionCommonDetails, + pub advertiser: String, + #[serde(rename = "id")] + pub block_id: i32, + pub iab_category: String, + pub click_url: String, + pub impression_url: String, + #[serde(rename = "icon")] + pub icon_id: String, +} + +/// A Wikipedia suggestion to ingest from an AMP-Wikipedia attachment. +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedWikipediaSuggestion { + #[serde(flatten)] + pub common_details: DownloadedSuggestionCommonDetails, + #[serde(rename = "icon")] + pub icon_id: String, +} + +/// A suggestion to ingest from an AMP-Wikipedia attachment downloaded from +/// Remote Settings. +#[derive(Clone, Debug)] +pub(crate) enum DownloadedAmpWikipediaSuggestion { + Amp(DownloadedAmpSuggestion), + Wikipedia(DownloadedWikipediaSuggestion), +} + +impl DownloadedAmpWikipediaSuggestion { + /// Returns the details that are common to AMP and Wikipedia suggestions. + pub fn common_details(&self) -> &DownloadedSuggestionCommonDetails { + match self { + Self::Amp(DownloadedAmpSuggestion { common_details, .. }) => common_details, + Self::Wikipedia(DownloadedWikipediaSuggestion { common_details, .. }) => common_details, + } + } + + /// Returns the provider of this suggestion. + pub fn provider(&self) -> SuggestionProvider { + match self { + DownloadedAmpWikipediaSuggestion::Amp(_) => SuggestionProvider::Amp, + DownloadedAmpWikipediaSuggestion::Wikipedia(_) => SuggestionProvider::Wikipedia, + } + } +} + +impl<'de> Deserialize<'de> for DownloadedAmpWikipediaSuggestion { + fn deserialize<D>( + deserializer: D, + ) -> std::result::Result<DownloadedAmpWikipediaSuggestion, D::Error> + where + D: Deserializer<'de>, + { + // AMP and Wikipedia suggestions use the same schema. To separate them, + // we use a "maybe tagged" outer enum with tagged and untagged variants, + // and a "tagged" inner enum. + // + // Wikipedia suggestions will deserialize successfully into the tagged + // variant. AMP suggestions will try the tagged variant, fail, and fall + // back to the untagged variant. + // + // This approach works around serde-rs/serde#912. + + #[derive(Deserialize)] + #[serde(untagged)] + enum MaybeTagged { + Tagged(Tagged), + Untagged(DownloadedAmpSuggestion), + } + + #[derive(Deserialize)] + #[serde(tag = "advertiser")] + enum Tagged { + #[serde(rename = "Wikipedia")] + Wikipedia(DownloadedWikipediaSuggestion), + } + + Ok(match MaybeTagged::deserialize(deserializer)? { + MaybeTagged::Tagged(Tagged::Wikipedia(wikipedia)) => Self::Wikipedia(wikipedia), + MaybeTagged::Untagged(amp) => Self::Amp(amp), + }) + } +} + +/// An AMO suggestion to ingest from an attachment +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedAmoSuggestion { + pub description: String, + pub url: String, + pub guid: String, + #[serde(rename = "icon")] + pub icon_url: String, + pub rating: Option<String>, + pub number_of_ratings: i64, + pub title: String, + pub keywords: Vec<String>, + pub score: f64, +} +/// A Pocket suggestion to ingest from a Pocket Suggestion Attachment +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedPocketSuggestion { + pub url: String, + pub title: String, + #[serde(rename = "lowConfidenceKeywords")] + pub low_confidence_keywords: Vec<String>, + #[serde(rename = "highConfidenceKeywords")] + pub high_confidence_keywords: Vec<String>, + pub score: f64, +} +/// A location sign for Yelp to ingest from a Yelp Attachment +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedYelpLocationSign { + pub keyword: String, + #[serde(rename = "needLocation")] + pub need_location: bool, +} +/// A Yelp suggestion to ingest from a Yelp Attachment +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedYelpSuggestion { + pub subjects: Vec<String>, + #[serde(rename = "preModifiers")] + pub pre_modifiers: Vec<String>, + #[serde(rename = "postModifiers")] + pub post_modifiers: Vec<String>, + #[serde(rename = "locationSigns")] + pub location_signs: Vec<DownloadedYelpLocationSign>, + #[serde(rename = "yelpModifiers")] + pub yelp_modifiers: Vec<String>, + #[serde(rename = "icon")] + pub icon_id: String, + pub score: f64, +} + +/// An MDN suggestion to ingest from an attachment +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedMdnSuggestion { + pub url: String, + pub title: String, + pub description: String, + pub keywords: Vec<String>, + pub score: f64, +} + +/// Weather data to ingest from a weather record +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedWeatherData { + pub weather: DownloadedWeatherDataInner, +} +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedWeatherDataInner { + pub min_keyword_length: i32, + pub keywords: Vec<String>, + // Remote settings doesn't support floats in record JSON so we use a + // stringified float instead. If a float can't be parsed, this will be None. + #[serde(default, deserialize_with = "de_stringified_f64")] + pub score: Option<f64>, +} + +/// Global Suggest configuration data to ingest from a configuration record +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedGlobalConfig { + pub configuration: DownloadedGlobalConfigInner, +} +#[derive(Clone, Debug, Deserialize)] +pub(crate) struct DownloadedGlobalConfigInner { + /// The maximum number of times the user can click "Show less frequently" + /// for a suggestion in the UI. + pub show_less_frequently_cap: i32, +} + +fn de_stringified_f64<'de, D>(deserializer: D) -> std::result::Result<Option<f64>, D::Error> +where + D: Deserializer<'de>, +{ + String::deserialize(deserializer).map(|s| s.parse().ok()) +} diff --git a/third_party/rust/suggest/src/schema.rs b/third_party/rust/suggest/src/schema.rs new file mode 100644 index 0000000000..95d987c09e --- /dev/null +++ b/third_party/rust/suggest/src/schema.rs @@ -0,0 +1,153 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +use rusqlite::{Connection, Transaction}; +use sql_support::open_database::{self, ConnectionInitializer}; + +/// The current database schema version. +/// +/// For any changes to the schema [`SQL`], please make sure to: +/// +/// 1. Bump this version. +/// 2. Add a migration from the old version to the new version in +/// [`SuggestConnectionInitializer::upgrade_from`]. +pub const VERSION: u32 = 14; + +/// The current Suggest database schema. +pub const SQL: &str = " + CREATE TABLE meta( + key TEXT PRIMARY KEY, + value NOT NULL + ) WITHOUT ROWID; + + CREATE TABLE keywords( + keyword TEXT NOT NULL, + suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, + rank INTEGER NOT NULL, + PRIMARY KEY (keyword, suggestion_id) + ) WITHOUT ROWID; + + CREATE TABLE prefix_keywords( + keyword_prefix TEXT NOT NULL, + keyword_suffix TEXT NOT NULL DEFAULT '', + confidence INTEGER NOT NULL DEFAULT 0, + rank INTEGER NOT NULL, + suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE, + PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id) + ) WITHOUT ROWID; + + CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank); + + CREATE TABLE suggestions( + id INTEGER PRIMARY KEY, + record_id TEXT NOT NULL, + provider INTEGER NOT NULL, + title TEXT NOT NULL, + url TEXT NOT NULL, + score REAL NOT NULL + ); + + CREATE TABLE amp_custom_details( + suggestion_id INTEGER PRIMARY KEY, + advertiser TEXT NOT NULL, + block_id INTEGER NOT NULL, + iab_category TEXT NOT NULL, + impression_url TEXT NOT NULL, + click_url TEXT NOT NULL, + icon_id TEXT NOT NULL, + FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE + ); + + CREATE TABLE wikipedia_custom_details( + suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE, + icon_id TEXT NOT NULL + ); + + CREATE TABLE amo_custom_details( + suggestion_id INTEGER PRIMARY KEY, + description TEXT NOT NULL, + guid TEXT NOT NULL, + icon_url TEXT NOT NULL, + rating TEXT, + number_of_ratings INTEGER NOT NULL, + FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE + ); + + CREATE INDEX suggestions_record_id ON suggestions(record_id); + + CREATE TABLE icons( + id TEXT PRIMARY KEY, + data BLOB NOT NULL + ) WITHOUT ROWID; + + CREATE TABLE yelp_subjects( + keyword TEXT PRIMARY KEY, + record_id TEXT NOT NULL + ) WITHOUT ROWID; + + CREATE TABLE yelp_modifiers( + type INTEGER NOT NULL, + keyword TEXT NOT NULL, + record_id TEXT NOT NULL, + PRIMARY KEY (type, keyword) + ) WITHOUT ROWID; + + CREATE TABLE yelp_location_signs( + keyword TEXT PRIMARY KEY, + need_location INTEGER NOT NULL, + record_id TEXT NOT NULL + ) WITHOUT ROWID; + + CREATE TABLE yelp_custom_details( + icon_id TEXT PRIMARY KEY, + score REAL NOT NULL, + record_id TEXT NOT NULL + ) WITHOUT ROWID; + + CREATE TABLE mdn_custom_details( + suggestion_id INTEGER PRIMARY KEY, + description TEXT NOT NULL, + FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE + ); +"; + +/// Initializes an SQLite connection to the Suggest database, performing +/// migrations as needed. +pub struct SuggestConnectionInitializer; + +impl ConnectionInitializer for SuggestConnectionInitializer { + const NAME: &'static str = "suggest db"; + const END_VERSION: u32 = VERSION; + + fn prepare(&self, conn: &Connection, _db_empty: bool) -> open_database::Result<()> { + let initial_pragmas = " + -- Use in-memory storage for TEMP tables. + PRAGMA temp_store = 2; + + PRAGMA journal_mode = WAL; + PRAGMA foreign_keys = ON; + "; + conn.execute_batch(initial_pragmas)?; + sql_support::debug_tools::define_debug_functions(conn)?; + + Ok(()) + } + + fn init(&self, db: &Transaction<'_>) -> open_database::Result<()> { + Ok(db.execute_batch(SQL)?) + } + + fn upgrade_from(&self, _db: &Transaction<'_>, version: u32) -> open_database::Result<()> { + match version { + 1..=13 => { + // Treat databases with these older schema versions as corrupt, + // so that they'll be replaced by a fresh, empty database with + // the current schema. + Err(open_database::Error::Corrupt) + } + _ => Err(open_database::Error::IncompatibleVersion(version)), + } + } +} diff --git a/third_party/rust/suggest/src/store.rs b/third_party/rust/suggest/src/store.rs new file mode 100644 index 0000000000..e1f437e8c5 --- /dev/null +++ b/third_party/rust/suggest/src/store.rs @@ -0,0 +1,5316 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +use std::{ + collections::BTreeMap, + path::{Path, PathBuf}, + sync::Arc, +}; + +use error_support::handle_error; +use once_cell::sync::OnceCell; +use parking_lot::Mutex; +use remote_settings::{ + self, GetItemsOptions, RemoteSettingsConfig, RemoteSettingsRecord, SortOrder, +}; +use rusqlite::{ + types::{FromSql, ToSqlOutput}, + ToSql, +}; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; + +use crate::{ + config::{SuggestGlobalConfig, SuggestProviderConfig}, + db::{ + ConnectionType, SuggestDao, SuggestDb, LAST_INGEST_META_KEY, UNPARSABLE_RECORDS_META_KEY, + }, + error::Error, + provider::SuggestionProvider, + rs::{ + SuggestAttachment, SuggestRecord, SuggestRecordId, SuggestRemoteSettingsClient, + REMOTE_SETTINGS_COLLECTION, SUGGESTIONS_PER_ATTACHMENT, + }, + schema::VERSION, + Result, SuggestApiResult, Suggestion, SuggestionQuery, +}; + +/// The chunk size used to request unparsable records. +pub const UNPARSABLE_IDS_PER_REQUEST: usize = 150; + +/// Builder for [SuggestStore] +/// +/// Using a builder is preferred to calling the constructor directly since it's harder to confuse +/// the data_path and cache_path strings. +pub struct SuggestStoreBuilder(Mutex<SuggestStoreBuilderInner>); + +#[derive(Default)] +struct SuggestStoreBuilderInner { + data_path: Option<String>, + cache_path: Option<String>, + remote_settings_config: Option<RemoteSettingsConfig>, +} + +impl Default for SuggestStoreBuilder { + fn default() -> Self { + Self::new() + } +} + +impl SuggestStoreBuilder { + pub fn new() -> SuggestStoreBuilder { + Self(Mutex::new(SuggestStoreBuilderInner::default())) + } + + pub fn data_path(self: Arc<Self>, path: String) -> Arc<Self> { + self.0.lock().data_path = Some(path); + self + } + + pub fn cache_path(self: Arc<Self>, path: String) -> Arc<Self> { + self.0.lock().cache_path = Some(path); + self + } + + pub fn remote_settings_config(self: Arc<Self>, config: RemoteSettingsConfig) -> Arc<Self> { + self.0.lock().remote_settings_config = Some(config); + self + } + + #[handle_error(Error)] + pub fn build(&self) -> SuggestApiResult<Arc<SuggestStore>> { + let inner = self.0.lock(); + let data_path = inner + .data_path + .clone() + .ok_or_else(|| Error::SuggestStoreBuilder("data_path not specified".to_owned()))?; + let cache_path = inner + .cache_path + .clone() + .ok_or_else(|| Error::SuggestStoreBuilder("cache_path not specified".to_owned()))?; + let settings_client = + remote_settings::Client::new(inner.remote_settings_config.clone().unwrap_or_else( + || RemoteSettingsConfig { + server_url: None, + bucket_name: None, + collection_name: REMOTE_SETTINGS_COLLECTION.into(), + }, + ))?; + Ok(Arc::new(SuggestStore { + inner: SuggestStoreInner::new(data_path, cache_path, settings_client), + })) + } +} + +/// The store is the entry point to the Suggest component. It incrementally +/// downloads suggestions from the Remote Settings service, stores them in a +/// local database, and returns them in response to user queries. +/// +/// Your application should create a single store, and manage it as a singleton. +/// The store is thread-safe, and supports concurrent queries and ingests. We +/// expect that your application will call [`SuggestStore::query()`] to show +/// suggestions as the user types into the address bar, and periodically call +/// [`SuggestStore::ingest()`] in the background to update the database with +/// new suggestions from Remote Settings. +/// +/// For responsiveness, we recommend always calling `query()` on a worker +/// thread. When the user types new input into the address bar, call +/// [`SuggestStore::interrupt()`] on the main thread to cancel the query +/// for the old input, and unblock the worker thread for the new query. +/// +/// The store keeps track of the state needed to support incremental ingestion, +/// but doesn't schedule the ingestion work itself, or decide how many +/// suggestions to ingest at once. This is for two reasons: +/// +/// 1. The primitives for scheduling background work vary between platforms, and +/// aren't available to the lower-level Rust layer. You might use an idle +/// timer on Desktop, `WorkManager` on Android, or `BGTaskScheduler` on iOS. +/// 2. Ingestion constraints can change, depending on the platform and the needs +/// of your application. A mobile device on a metered connection might want +/// to request a small subset of the Suggest data and download the rest +/// later, while a desktop on a fast link might download the entire dataset +/// on the first launch. +pub struct SuggestStore { + inner: SuggestStoreInner<remote_settings::Client>, +} + +/// For records that aren't currently parsable, +/// the record ID and the schema version it's first seen in +/// is recorded in the meta table using `UNPARSABLE_RECORDS_META_KEY` as its key. +/// On the first ingest after an upgrade, re-request those records from Remote Settings, +/// and try to ingest them again. +#[derive(Deserialize, Serialize, Default, Debug)] +#[serde(transparent)] +pub(crate) struct UnparsableRecords(pub BTreeMap<String, UnparsableRecord>); + +impl FromSql for UnparsableRecords { + fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult<Self> { + serde_json::from_str(value.as_str()?) + .map_err(|err| rusqlite::types::FromSqlError::Other(Box::new(err))) + } +} + +impl ToSql for UnparsableRecords { + fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> { + Ok(ToSqlOutput::from(serde_json::to_string(self).map_err( + |err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)), + )?)) + } +} + +#[derive(Deserialize, Serialize, Debug)] +pub(crate) struct UnparsableRecord { + #[serde(rename = "v")] + pub schema_version: u32, +} + +impl SuggestStore { + /// Creates a Suggest store. + #[handle_error(Error)] + pub fn new( + path: &str, + settings_config: Option<RemoteSettingsConfig>, + ) -> SuggestApiResult<Self> { + let settings_client = || -> Result<_> { + Ok(remote_settings::Client::new( + settings_config.unwrap_or_else(|| RemoteSettingsConfig { + server_url: None, + bucket_name: None, + collection_name: REMOTE_SETTINGS_COLLECTION.into(), + }), + )?) + }()?; + Ok(Self { + inner: SuggestStoreInner::new("".to_owned(), path.to_owned(), settings_client), + }) + } + + /// Queries the database for suggestions. + #[handle_error(Error)] + pub fn query(&self, query: SuggestionQuery) -> SuggestApiResult<Vec<Suggestion>> { + self.inner.query(query) + } + + /// Interrupts any ongoing queries. + /// + /// This should be called when the user types new input into the address + /// bar, to ensure that they see fresh suggestions as they type. This + /// method does not interrupt any ongoing ingests. + pub fn interrupt(&self) { + self.inner.interrupt() + } + + /// Ingests new suggestions from Remote Settings. + #[handle_error(Error)] + pub fn ingest(&self, constraints: SuggestIngestionConstraints) -> SuggestApiResult<()> { + self.inner.ingest(constraints) + } + + /// Removes all content from the database. + #[handle_error(Error)] + pub fn clear(&self) -> SuggestApiResult<()> { + self.inner.clear() + } + + // Returns global Suggest configuration data. + #[handle_error(Error)] + pub fn fetch_global_config(&self) -> SuggestApiResult<SuggestGlobalConfig> { + self.inner.fetch_global_config() + } + + // Returns per-provider Suggest configuration data. + #[handle_error(Error)] + pub fn fetch_provider_config( + &self, + provider: SuggestionProvider, + ) -> SuggestApiResult<Option<SuggestProviderConfig>> { + self.inner.fetch_provider_config(provider) + } +} + +/// Constraints limit which suggestions to ingest from Remote Settings. +#[derive(Clone, Default, Debug)] +pub struct SuggestIngestionConstraints { + /// The approximate maximum number of suggestions to ingest. Set to [`None`] + /// for "no limit". + /// + /// Because of how suggestions are partitioned in Remote Settings, this is a + /// soft limit, and the store might ingest more than requested. + pub max_suggestions: Option<u64>, +} + +/// The implementation of the store. This is generic over the Remote Settings +/// client, and is split out from the concrete [`SuggestStore`] for testing +/// with a mock client. +pub(crate) struct SuggestStoreInner<S> { + /// Path to the persistent SQL database. + /// + /// This stores things that should persist when the user clears their cache. + /// It's not currently used because not all consumers pass this in yet. + #[allow(unused)] + data_path: PathBuf, + /// Path to the temporary SQL database. + /// + /// This stores things that should be deleted when the user clears their cache. + cache_path: PathBuf, + dbs: OnceCell<SuggestStoreDbs>, + settings_client: S, +} + +impl<S> SuggestStoreInner<S> { + fn new( + data_path: impl Into<PathBuf>, + cache_path: impl Into<PathBuf>, + settings_client: S, + ) -> Self { + Self { + data_path: data_path.into(), + cache_path: cache_path.into(), + dbs: OnceCell::new(), + settings_client, + } + } + + /// Returns this store's database connections, initializing them if + /// they're not already open. + fn dbs(&self) -> Result<&SuggestStoreDbs> { + self.dbs + .get_or_try_init(|| SuggestStoreDbs::open(&self.cache_path)) + } + + fn query(&self, query: SuggestionQuery) -> Result<Vec<Suggestion>> { + if query.keyword.is_empty() || query.providers.is_empty() { + return Ok(Vec::new()); + } + self.dbs()?.reader.read(|dao| dao.fetch_suggestions(&query)) + } + + fn interrupt(&self) { + if let Some(dbs) = self.dbs.get() { + // Only interrupt if the databases are already open. + dbs.reader.interrupt_handle.interrupt(); + } + } + + fn clear(&self) -> Result<()> { + self.dbs()?.writer.write(|dao| dao.clear()) + } + + pub fn fetch_global_config(&self) -> Result<SuggestGlobalConfig> { + self.dbs()?.reader.read(|dao| dao.get_global_config()) + } + + pub fn fetch_provider_config( + &self, + provider: SuggestionProvider, + ) -> Result<Option<SuggestProviderConfig>> { + self.dbs()? + .reader + .read(|dao| dao.get_provider_config(provider)) + } +} + +impl<S> SuggestStoreInner<S> +where + S: SuggestRemoteSettingsClient, +{ + fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> { + let writer = &self.dbs()?.writer; + + if let Some(unparsable_records) = + writer.read(|dao| dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY))? + { + let all_unparsable_ids = unparsable_records + .0 + .iter() + .filter(|(_, unparsable_record)| unparsable_record.schema_version < VERSION) + .map(|(record_id, _)| record_id) + .collect::<Vec<_>>(); + for unparsable_ids in all_unparsable_ids.chunks(UNPARSABLE_IDS_PER_REQUEST) { + let mut options = GetItemsOptions::new(); + for unparsable_id in unparsable_ids { + options.eq("id", *unparsable_id); + } + let records_chunk = self + .settings_client + .get_records_with_options(&options)? + .records; + + self.ingest_records(writer, &records_chunk)?; + } + } + + let mut options = GetItemsOptions::new(); + // Remote Settings returns records in descending modification order + // (newest first), but we want them in ascending order (oldest first), + // so that we can eventually resume downloading where we left off. + options.sort("last_modified", SortOrder::Ascending); + if let Some(last_ingest) = writer.read(|dao| dao.get_meta::<u64>(LAST_INGEST_META_KEY))? { + // Only download changes since our last ingest. If our last ingest + // was interrupted, we'll pick up where we left off. + options.gt("last_modified", last_ingest.to_string()); + } + + if let Some(max_suggestions) = constraints.max_suggestions { + // Each record's attachment has 200 suggestions, so download enough + // records to cover the requested maximum. + let max_records = (max_suggestions.saturating_sub(1) / SUGGESTIONS_PER_ATTACHMENT) + 1; + options.limit(max_records); + } + + let records = self + .settings_client + .get_records_with_options(&options)? + .records; + self.ingest_records(writer, &records)?; + + Ok(()) + } + + fn ingest_records(&self, writer: &SuggestDb, records: &[RemoteSettingsRecord]) -> Result<()> { + for record in records { + let record_id = SuggestRecordId::from(&record.id); + if record.deleted { + // If the entire record was deleted, drop all its suggestions + // and advance the last ingest time. + writer.write(|dao| dao.handle_deleted_record(record))?; + continue; + } + let Ok(fields) = + serde_json::from_value(serde_json::Value::Object(record.fields.clone())) + else { + // We don't recognize this record's type, so we don't know how + // to ingest its suggestions. Record this in the meta table. + writer.write(|dao| dao.handle_unparsable_record(record))?; + continue; + }; + + match fields { + SuggestRecord::AmpWikipedia => { + self.ingest_attachment(writer, record, |dao, record_id, suggestions| { + dao.insert_amp_wikipedia_suggestions(record_id, suggestions) + })?; + } + SuggestRecord::AmpMobile => { + self.ingest_attachment(writer, record, |dao, record_id, suggestions| { + dao.insert_amp_mobile_suggestions(record_id, suggestions) + })?; + } + SuggestRecord::Icon => { + let (Some(icon_id), Some(attachment)) = + (record_id.as_icon_id(), record.attachment.as_ref()) + else { + // An icon record should have an icon ID and an + // attachment. Icons that don't have these are + // malformed, so skip to the next record. + writer.write(|dao| dao.put_last_ingest_if_newer(record.last_modified))?; + continue; + }; + let data = self.settings_client.get_attachment(&attachment.location)?; + writer.write(|dao| { + dao.put_icon(icon_id, &data)?; + dao.handle_ingested_record(record) + })?; + } + SuggestRecord::Amo => { + self.ingest_attachment(writer, record, |dao, record_id, suggestions| { + dao.insert_amo_suggestions(record_id, suggestions) + })?; + } + SuggestRecord::Pocket => { + self.ingest_attachment(writer, record, |dao, record_id, suggestions| { + dao.insert_pocket_suggestions(record_id, suggestions) + })?; + } + SuggestRecord::Yelp => { + self.ingest_attachment(writer, record, |dao, record_id, suggestions| { + match suggestions.first() { + Some(suggestion) => dao.insert_yelp_suggestions(record_id, suggestion), + None => Ok(()), + } + })?; + } + SuggestRecord::Mdn => { + self.ingest_attachment(writer, record, |dao, record_id, suggestions| { + dao.insert_mdn_suggestions(record_id, suggestions) + })?; + } + SuggestRecord::Weather(data) => { + self.ingest_record(writer, record, |dao, record_id| { + dao.insert_weather_data(record_id, &data) + })?; + } + SuggestRecord::GlobalConfig(config) => { + self.ingest_record(writer, record, |dao, _| { + dao.put_global_config(&SuggestGlobalConfig::from(&config)) + })?; + } + } + } + Ok(()) + } + + fn ingest_record( + &self, + writer: &SuggestDb, + record: &RemoteSettingsRecord, + ingestion_handler: impl FnOnce(&mut SuggestDao<'_>, &SuggestRecordId) -> Result<()>, + ) -> Result<()> { + let record_id = SuggestRecordId::from(&record.id); + + writer.write(|dao| { + // Drop any data that we previously ingested from this record. + // Suggestions in particular don't have a stable identifier, and + // determining which suggestions in the record actually changed is + // more complicated than dropping and re-ingesting all of them. + dao.drop_suggestions(&record_id)?; + + // Ingest (or re-ingest) all data in the record. + ingestion_handler(dao, &record_id)?; + + dao.handle_ingested_record(record) + }) + } + + fn ingest_attachment<T>( + &self, + writer: &SuggestDb, + record: &RemoteSettingsRecord, + ingestion_handler: impl FnOnce(&mut SuggestDao<'_>, &SuggestRecordId, &[T]) -> Result<()>, + ) -> Result<()> + where + T: DeserializeOwned, + { + let Some(attachment) = record.attachment.as_ref() else { + // This method should be called only when a record is expected to + // have an attachment. If it doesn't have one, it's malformed, so + // skip to the next record. + writer.write(|dao| dao.put_last_ingest_if_newer(record.last_modified))?; + return Ok(()); + }; + + let attachment_data = self.settings_client.get_attachment(&attachment.location)?; + match serde_json::from_slice::<SuggestAttachment<T>>(&attachment_data) { + Ok(attachment) => self.ingest_record(writer, record, |dao, record_id| { + ingestion_handler(dao, record_id, attachment.suggestions()) + }), + Err(_) => writer.write(|dao| dao.handle_unparsable_record(record)), + } + } +} + +/// Holds a store's open connections to the Suggest database. +struct SuggestStoreDbs { + /// A read-write connection used to update the database with new data. + writer: SuggestDb, + /// A read-only connection used to query the database. + reader: SuggestDb, +} + +impl SuggestStoreDbs { + fn open(path: &Path) -> Result<Self> { + // Order is important here: the writer must be opened first, so that it + // can set up the database and run any migrations. + let writer = SuggestDb::open(path, ConnectionType::ReadWrite)?; + let reader = SuggestDb::open(path, ConnectionType::ReadOnly)?; + Ok(Self { writer, reader }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::{cell::RefCell, collections::HashMap}; + + use anyhow::{anyhow, Context}; + use expect_test::expect; + use parking_lot::Once; + use rc_crypto::rand; + use remote_settings::{RemoteSettingsRecord, RemoteSettingsResponse}; + use serde_json::json; + use sql_support::ConnExt; + + use crate::SuggestionProvider; + + /// Creates a unique in-memory Suggest store. + fn unique_test_store<S>(settings_client: S) -> SuggestStoreInner<S> + where + S: SuggestRemoteSettingsClient, + { + let mut unique_suffix = [0u8; 8]; + rand::fill(&mut unique_suffix).expect("Failed to generate unique suffix for test store"); + // A store opens separate connections to the same database for reading + // and writing, so we must give our in-memory database a name, and open + // it in shared-cache mode so that both connections can access it. + SuggestStoreInner::new( + format!( + "file:test_store_data_{}?mode=memory&cache=shared", + hex::encode(unique_suffix), + ), + format!( + "file:test_store_cache_{}?mode=memory&cache=shared", + hex::encode(unique_suffix), + ), + settings_client, + ) + } + + /// A snapshot containing fake Remote Settings records and attachments for + /// the store to ingest. We use snapshots to test the store's behavior in a + /// data-driven way. + struct Snapshot { + records: Vec<RemoteSettingsRecord>, + attachments: HashMap<&'static str, Vec<u8>>, + } + + impl Snapshot { + /// Creates a snapshot from a JSON value that represents a collection of + /// Suggest Remote Settings records. + /// + /// You can use the [`serde_json::json!`] macro to construct the JSON + /// value, then pass it to this function. It's easier to use the + /// `Snapshot::with_records(json!(...))` idiom than to construct the + /// records by hand. + fn with_records(value: serde_json::Value) -> anyhow::Result<Self> { + Ok(Self { + records: serde_json::from_value(value) + .context("Couldn't create snapshot with Remote Settings records")?, + attachments: HashMap::new(), + }) + } + + /// Adds a data attachment with one or more suggestions to the snapshot. + fn with_data( + mut self, + location: &'static str, + value: serde_json::Value, + ) -> anyhow::Result<Self> { + self.attachments.insert( + location, + serde_json::to_vec(&value).context("Couldn't add data attachment to snapshot")?, + ); + Ok(self) + } + + /// Adds an icon attachment to the snapshot. + fn with_icon(mut self, location: &'static str, bytes: Vec<u8>) -> Self { + self.attachments.insert(location, bytes); + self + } + } + + /// A fake Remote Settings client that returns records and attachments from + /// a snapshot. + struct SnapshotSettingsClient { + /// The current snapshot. You can modify it using + /// [`RefCell::borrow_mut()`] to simulate remote updates in tests. + snapshot: RefCell<Snapshot>, + + /// The options passed to the last [`Self::get_records_with_options()`] + /// call. + last_get_records_options: RefCell<Option<GetItemsOptions>>, + } + + impl SnapshotSettingsClient { + /// Creates a client with an initial snapshot. + fn with_snapshot(snapshot: Snapshot) -> Self { + Self { + snapshot: RefCell::new(snapshot), + last_get_records_options: RefCell::default(), + } + } + + /// Returns the most recent value of an option passed to + /// [`Self::get_records_with_options()`]. + fn last_get_records_option(&self, option: &str) -> Option<String> { + self.last_get_records_options + .borrow() + .as_ref() + .and_then(|options| { + options + .iter_query_pairs() + .find(|(key, _)| key == option) + .map(|(_, value)| value.into()) + }) + } + } + + impl SuggestRemoteSettingsClient for SnapshotSettingsClient { + fn get_records_with_options( + &self, + options: &GetItemsOptions, + ) -> Result<RemoteSettingsResponse> { + *self.last_get_records_options.borrow_mut() = Some(options.clone()); + let records = self.snapshot.borrow().records.clone(); + let last_modified = records + .iter() + .map(|record| record.last_modified) + .max() + .unwrap_or(0); + Ok(RemoteSettingsResponse { + records, + last_modified, + }) + } + + fn get_attachment(&self, location: &str) -> Result<Vec<u8>> { + Ok(self + .snapshot + .borrow() + .attachments + .get(location) + .unwrap_or_else(|| unreachable!("Unexpected request for attachment `{}`", location)) + .clone()) + } + } + + fn before_each() { + static ONCE: Once = Once::new(); + ONCE.call_once(|| { + env_logger::init(); + }); + } + + /// Tests that `SuggestStore` is usable with UniFFI, which requires exposed + /// interfaces to be `Send` and `Sync`. + #[test] + fn is_thread_safe() { + before_each(); + + fn is_send_sync<T: Send + Sync>() {} + is_send_sync::<SuggestStore>(); + } + + /// Tests ingesting suggestions into an empty database. + #[test] + fn ingest_suggestions() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "1234", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"], + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "5678", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15)); + expect![[r#" + [ + Amp { + title: "Los Pollos Hermanos - Albuquerque", + url: "https://www.lph-nm.biz", + raw_url: "https://www.lph-nm.biz", + icon: None, + full_keyword: "los", + block_id: 0, + advertiser: "Los Pollos Hermanos", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "lo".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + + Ok(()) + })?; + + Ok(()) + } + + /// Tests ingesting suggestions with icons. + #[test] + fn ingest_icons() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-2", + "type": "icon", + "last_modified": 20, + "attachment": { + "filename": "icon-2.png", + "mimetype": "image/png", + "location": "icon-2.png", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"], + "title": "Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url" + }, { + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["pe", "pen", "penne", "penne for your thoughts"], + "title": "Penne for Your Thoughts", + "url": "https://penne.biz", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )? + .with_icon("icon-2.png", "i-am-an-icon".as_bytes().into()); + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + expect![[r#" + [ + Amp { + title: "Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: Some( + [ + 105, + 45, + 97, + 109, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.2, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "la".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + expect![[r#" + [ + Amp { + title: "Penne for Your Thoughts", + url: "https://penne.biz", + raw_url: "https://penne.biz", + icon: Some( + [ + 105, + 45, + 97, + 109, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "penne", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "pe".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + + Ok(()) + })?; + + Ok(()) + } + + /// Tests ingesting a data attachment containing a single suggestion, + /// instead of an array of suggestions. + #[test] + fn ingest_one_suggestion_in_data_attachment() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!({ + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"], + "title": "Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + expect![[r#" + [ + Amp { + title: "Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: None, + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "la".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + + Ok(()) + })?; + + Ok(()) + } + + /// Tests re-ingesting suggestions from an updated attachment. + #[test] + fn reingest_amp_suggestions() -> anyhow::Result<()> { + before_each(); + + // Ingest suggestions from the initial snapshot. + let initial_snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"], + "title": "Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "1", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }, { + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["lo", "los p", "los pollos h"], + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(initial_snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(15u64)); + expect![[r#" + [ + Amp { + title: "Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: None, + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "la".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + Ok(()) + })?; + + // Update the snapshot with new suggestions: drop Lasagna, update Los + // Pollos, and add Penne. + *store.settings_client.snapshot.borrow_mut() = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "data", + "last_modified": 30, + "attachment": { + "filename": "data-1-1.json", + "mimetype": "application/json", + "location": "data-1-1.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1-1.json", + json!([{ + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["los ", "los pollos", "los pollos hermanos"], + "title": "Los Pollos Hermanos - Now Serving at 14 Locations!", + "url": "https://www.lph-nm.biz", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }, { + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["pe", "pen", "penne", "penne for your thoughts"], + "title": "Penne for Your Thoughts", + "url": "https://penne.biz", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )?; + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(30u64)); + assert!(dao + .fetch_suggestions(&SuggestionQuery { + keyword: "la".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })? + .is_empty()); + expect![[r#" + [ + Amp { + title: "Los Pollos Hermanos - Now Serving at 14 Locations!", + url: "https://www.lph-nm.biz", + raw_url: "https://www.lph-nm.biz", + icon: None, + full_keyword: "los pollos", + block_id: 0, + advertiser: "Los Pollos Hermanos", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "los ".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + expect![[r#" + [ + Amp { + title: "Penne for Your Thoughts", + url: "https://penne.biz", + raw_url: "https://penne.biz", + icon: None, + full_keyword: "penne", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "pe".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + Ok(()) + })?; + + Ok(()) + } + + /// Tests re-ingesting icons from an updated attachment. + #[test] + fn reingest_icons() -> anyhow::Result<()> { + before_each(); + + // Ingest suggestions and icons from the initial snapshot. + let initial_snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-2", + "type": "icon", + "last_modified": 20, + "attachment": { + "filename": "icon-2.png", + "mimetype": "image/png", + "location": "icon-2.png", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-3", + "type": "icon", + "last_modified": 25, + "attachment": { + "filename": "icon-3.png", + "mimetype": "image/png", + "location": "icon-3.png", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"], + "title": "Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }, { + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["lo", "los", "los pollos", "los pollos hermanos"], + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "3", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )? + .with_icon("icon-2.png", "lasagna-icon".as_bytes().into()) + .with_icon("icon-3.png", "pollos-icon".as_bytes().into()); + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(initial_snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(25u64)); + assert_eq!( + dao.conn + .query_one::<i64>("SELECT count(*) FROM suggestions")?, + 2 + ); + assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 2); + Ok(()) + })?; + + // Update the snapshot with new icons. + *store.settings_client.snapshot.borrow_mut() = Snapshot::with_records(json!([{ + "id": "icon-2", + "type": "icon", + "last_modified": 30, + "attachment": { + "filename": "icon-2.png", + "mimetype": "image/png", + "location": "icon-2.png", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-3", + "type": "icon", + "last_modified": 35, + "attachment": { + "filename": "icon-3.png", + "mimetype": "image/png", + "location": "icon-3.png", + "hash": "", + "size": 0, + } + }]))? + .with_icon("icon-2.png", "new-lasagna-icon".as_bytes().into()) + .with_icon("icon-3.png", "new-pollos-icon".as_bytes().into()); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(35u64)); + expect![[r#" + [ + Amp { + title: "Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: Some( + [ + 110, + 101, + 119, + 45, + 108, + 97, + 115, + 97, + 103, + 110, + 97, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "la".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + expect![[r#" + [ + Amp { + title: "Los Pollos Hermanos - Albuquerque", + url: "https://www.lph-nm.biz", + raw_url: "https://www.lph-nm.biz", + icon: Some( + [ + 110, + 101, + 119, + 45, + 112, + 111, + 108, + 108, + 111, + 115, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "los", + block_id: 0, + advertiser: "Los Pollos Hermanos", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "lo".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + Ok(()) + })?; + + Ok(()) + } + + /// Tests re-ingesting AMO suggestions from an updated attachment. + #[test] + fn reingest_amo_suggestions() -> anyhow::Result<()> { + before_each(); + + // Ingest suggestions from the initial snapshot. + let initial_snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "amo-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, { + "id": "data-2", + "type": "amo-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-2.json", + "mimetype": "application/json", + "location": "data-2.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!({ + "description": "First suggestion", + "url": "https://example.org/amo-suggestion-1", + "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + "keywords": ["relay", "spam", "masking email", "alias"], + "title": "AMO suggestion", + "icon": "https://example.org/amo-suggestion-1/icon.png", + "rating": "4.9", + "number_of_ratings": 800, + "score": 0.25 + }), + )? + .with_data( + "data-2.json", + json!([{ + "description": "Second suggestion", + "url": "https://example.org/amo-suggestion-2", + "guid": "{6d24e3b8-1400-4d37-9440-c798f9b79b1a}", + "keywords": ["dark mode", "dark theme", "night mode"], + "title": "Another AMO suggestion", + "icon": "https://example.org/amo-suggestion-2/icon.png", + "rating": "4.6", + "number_of_ratings": 750, + "score": 0.25 + }, { + "description": "Third suggestion", + "url": "https://example.org/amo-suggestion-3", + "guid": "{1e9d493b-0498-48bb-9b9a-8b45a44df146}", + "keywords": ["grammar", "spelling", "edit"], + "title": "Yet another AMO suggestion", + "icon": "https://example.org/amo-suggestion-3/icon.png", + "rating": "4.8", + "number_of_ratings": 900, + "score": 0.25 + }]), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(initial_snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(15u64)); + + expect![[r#" + [ + Amo { + title: "AMO suggestion", + url: "https://example.org/amo-suggestion-1", + icon_url: "https://example.org/amo-suggestion-1/icon.png", + description: "First suggestion", + rating: Some( + "4.9", + ), + number_of_ratings: 800, + guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + score: 0.25, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "masking e".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + })?); + + expect![[r#" + [ + Amo { + title: "Another AMO suggestion", + url: "https://example.org/amo-suggestion-2", + icon_url: "https://example.org/amo-suggestion-2/icon.png", + description: "Second suggestion", + rating: Some( + "4.6", + ), + number_of_ratings: 750, + guid: "{6d24e3b8-1400-4d37-9440-c798f9b79b1a}", + score: 0.25, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "night".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + })?); + + Ok(()) + })?; + + // Update the snapshot with new suggestions: update the second, drop the + // third, and add the fourth. + *store.settings_client.snapshot.borrow_mut() = Snapshot::with_records(json!([{ + "id": "data-2", + "type": "amo-suggestions", + "last_modified": 30, + "attachment": { + "filename": "data-2-1.json", + "mimetype": "application/json", + "location": "data-2-1.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-2-1.json", + json!([{ + "description": "Updated second suggestion", + "url": "https://example.org/amo-suggestion-2", + "guid": "{6d24e3b8-1400-4d37-9440-c798f9b79b1a}", + "keywords": ["dark mode", "night mode"], + "title": "Another AMO suggestion", + "icon": "https://example.org/amo-suggestion-2/icon.png", + "rating": "4.7", + "number_of_ratings": 775, + "score": 0.25 + }, { + "description": "Fourth suggestion", + "url": "https://example.org/amo-suggestion-4", + "guid": "{1ea82ebd-a1ba-4f57-b8bb-3824ead837bd}", + "keywords": ["image search", "visual search"], + "title": "New AMO suggestion", + "icon": "https://example.org/amo-suggestion-4/icon.png", + "rating": "5.0", + "number_of_ratings": 100, + "score": 0.25 + }]), + )?; + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(30u64)); + + expect![[r#" + [ + Amo { + title: "AMO suggestion", + url: "https://example.org/amo-suggestion-1", + icon_url: "https://example.org/amo-suggestion-1/icon.png", + description: "First suggestion", + rating: Some( + "4.9", + ), + number_of_ratings: 800, + guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + score: 0.25, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "masking e".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + })?); + + expect![[r#" + [] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "dark t".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + })?); + + expect![[r#" + [ + Amo { + title: "Another AMO suggestion", + url: "https://example.org/amo-suggestion-2", + icon_url: "https://example.org/amo-suggestion-2/icon.png", + description: "Updated second suggestion", + rating: Some( + "4.7", + ), + number_of_ratings: 775, + guid: "{6d24e3b8-1400-4d37-9440-c798f9b79b1a}", + score: 0.25, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "night".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + })?); + + expect![[r#" + [ + Amo { + title: "New AMO suggestion", + url: "https://example.org/amo-suggestion-4", + icon_url: "https://example.org/amo-suggestion-4/icon.png", + description: "Fourth suggestion", + rating: Some( + "5.0", + ), + number_of_ratings: 100, + guid: "{1ea82ebd-a1ba-4f57-b8bb-3824ead837bd}", + score: 0.25, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "image search".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + })?); + + Ok(()) + })?; + + Ok(()) + } + + /// Tests ingesting tombstones for previously-ingested suggestions and + /// icons. + #[test] + fn ingest_tombstones() -> anyhow::Result<()> { + before_each(); + + // Ingest suggestions and icons from the initial snapshot. + let initial_snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-2", + "type": "icon", + "last_modified": 20, + "attachment": { + "filename": "icon-2.png", + "mimetype": "image/png", + "location": "icon-2.png", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"], + "title": "Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )? + .with_icon("icon-2.png", "i-am-an-icon".as_bytes().into()); + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(initial_snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(20)); + assert_eq!( + dao.conn + .query_one::<i64>("SELECT count(*) FROM suggestions")?, + 1 + ); + assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 1); + + Ok(()) + })?; + + // Replace the records with tombstones. Ingesting these should remove + // all their suggestions and icons. + *store.settings_client.snapshot.borrow_mut() = Snapshot::with_records(json!([{ + "id": "data-1", + "last_modified": 25, + "deleted": true, + }, { + "id": "icon-2", + "last_modified": 30, + "deleted": true, + }]))?; + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30)); + assert_eq!( + dao.conn + .query_one::<i64>("SELECT count(*) FROM suggestions")?, + 0 + ); + assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 0); + + Ok(()) + })?; + + Ok(()) + } + + /// Tests ingesting suggestions with constraints. + #[test] + fn ingest_with_constraints() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([]))?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + assert_eq!( + store.settings_client.last_get_records_option("_limit"), + None, + ); + + // 200 suggestions per record, so test with numbers around that + // boundary. + let table = [ + (0, "1"), + (199, "1"), + (200, "1"), + (201, "2"), + (300, "2"), + (400, "2"), + (401, "3"), + ]; + for (max_suggestions, expected_limit) in table { + store.ingest(SuggestIngestionConstraints { + max_suggestions: Some(max_suggestions), + })?; + let actual_limit = store + .settings_client + .last_get_records_option("_limit") + .ok_or_else(|| { + anyhow!("Want limit = {} for {}", expected_limit, max_suggestions) + })?; + assert_eq!( + actual_limit, expected_limit, + "Want limit = {} for {}; got limit = {}", + expected_limit, max_suggestions, actual_limit + ); + } + + Ok(()) + } + + /// Tests clearing the store. + #[test] + fn clear() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"], + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "2", + "impression_url": "https://example.com", + "click_url": "https://example.com", + "score": 0.3 + }]), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15)); + assert_eq!( + dao.conn + .query_one::<i64>("SELECT count(*) FROM suggestions")?, + 1 + ); + assert_eq!( + dao.conn.query_one::<i64>("SELECT count(*) FROM keywords")?, + 6 + ); + + Ok(()) + })?; + + store.clear()?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, None); + assert_eq!( + dao.conn + .query_one::<i64>("SELECT count(*) FROM suggestions")?, + 0 + ); + assert_eq!( + dao.conn.query_one::<i64>("SELECT count(*) FROM keywords")?, + 0 + ); + + Ok(()) + })?; + + Ok(()) + } + + /// Tests querying suggestions. + #[test] + fn query() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + + }, { + "id": "data-2", + "type": "amo-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-2.json", + "mimetype": "application/json", + "location": "data-2.json", + "hash": "", + "size": 0, + }, + }, { + "id": "data-3", + "type": "pocket-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-3.json", + "mimetype": "application/json", + "location": "data-3.json", + "hash": "", + "size": 0, + }, + }, { + "id": "data-4", + "type": "yelp-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-4.json", + "mimetype": "application/json", + "location": "data-4.json", + "hash": "", + "size": 0, + }, + }, { + "id": "data-5", + "type": "mdn-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-5.json", + "mimetype": "application/json", + "location": "data-5.json", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-2", + "type": "icon", + "last_modified": 20, + "attachment": { + "filename": "icon-2.png", + "mimetype": "image/png", + "location": "icon-2.png", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-3", + "type": "icon", + "last_modified": 25, + "attachment": { + "filename": "icon-3.png", + "mimetype": "image/png", + "location": "icon-3.png", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-yelp-favicon", + "type": "icon", + "last_modified": 25, + "attachment": { + "filename": "yelp-favicon.svg", + "mimetype": "image/svg+xml", + "location": "yelp-favicon.svg", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"], + "title": "Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }, { + "id": 0, + "advertiser": "Wikipedia", + "iab_category": "5 - Education", + "keywords": ["cal", "cali", "california"], + "title": "California", + "url": "https://wikipedia.org/California", + "icon": "3" + }, { + "id": 0, + "advertiser": "Wikipedia", + "iab_category": "5 - Education", + "keywords": ["cal", "cali", "california", "institute", "technology"], + "title": "California Institute of Technology", + "url": "https://wikipedia.org/California_Institute_of_Technology", + "icon": "3" + },{ + "id": 0, + "advertiser": "Wikipedia", + "iab_category": "5 - Education", + "keywords": ["multimatch"], + "title": "Multimatch", + "url": "https://wikipedia.org/Multimatch", + "icon": "3" + }]), + )? + .with_data( + "data-2.json", + json!([ + { + "description": "amo suggestion", + "url": "https://addons.mozilla.org/en-US/firefox/addon/example", + "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + "keywords": ["relay", "spam", "masking email", "alias"], + "title": "Firefox Relay", + "icon": "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + "rating": "4.9", + "number_of_ratings": 888, + "score": 0.25 + }, + { + "description": "amo suggestion multi-match", + "url": "https://addons.mozilla.org/en-US/firefox/addon/multimatch", + "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + "keywords": ["multimatch"], + "title": "Firefox Multimatch", + "icon": "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + "rating": "4.9", + "number_of_ratings": 888, + "score": 0.25 + }, + ]), + )? + .with_data( + "data-3.json", + json!([ + { + "description": "pocket suggestion", + "url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + "lowConfidenceKeywords": ["soft life", "workaholism", "toxic work culture", "work-life balance"], + "highConfidenceKeywords": ["burnout women", "grind culture", "women burnout"], + "title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + "score": 0.25 + }, + { + "description": "pocket suggestion multi-match", + "url": "https://getpocket.com/collections/multimatch", + "lowConfidenceKeywords": [], + "highConfidenceKeywords": ["multimatch"], + "title": "Multimatching", + "score": 0.88 + }, + ]), + )? + .with_data( + "data-4.json", + json!({ + "subjects": ["ramen", "spicy ramen", "spicy random ramen", "rats", "raven", "raccoon", "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789", "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789Z"], + "preModifiers": ["best", "super best", "same_modifier"], + "postModifiers": ["delivery", "super delivery", "same_modifier"], + "locationSigns": [ + { "keyword": "in", "needLocation": true }, + { "keyword": "near", "needLocation": true }, + { "keyword": "near by", "needLocation": false }, + { "keyword": "near me", "needLocation": false }, + ], + "yelpModifiers": ["yelp", "yelp keyword"], + "icon": "yelp-favicon", + "score": 0.5 + }), + )? + .with_data( + "data-5.json", + json!([ + { + "description": "Javascript Array", + "url": "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array", + "keywords": ["array javascript", "javascript array", "wildcard"], + "title": "Array", + "score": 0.24 + }, + ]), + )? + .with_icon("icon-2.png", "i-am-an-icon".as_bytes().into()) + .with_icon("icon-3.png", "also-an-icon".as_bytes().into()) + .with_icon("yelp-favicon.svg", "yelp-icon".as_bytes().into()); + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + let table = [ + ( + "empty keyword; all providers", + SuggestionQuery { + keyword: String::new(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + SuggestionProvider::Yelp, + SuggestionProvider::Weather, + ], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `la`; all providers", + SuggestionQuery { + keyword: "la".into(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + SuggestionProvider::Yelp, + SuggestionProvider::Weather, + ], + limit: None, + }, + expect![[r#" + [ + Amp { + title: "Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: Some( + [ + 105, + 45, + 97, + 109, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]], + ), + ( + "multimatch; all providers", + SuggestionQuery { + keyword: "multimatch".into(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + ], + limit: None, + }, + expect![[r#" + [ + Pocket { + title: "Multimatching", + url: "https://getpocket.com/collections/multimatch", + score: 0.88, + is_top_pick: true, + }, + Amo { + title: "Firefox Multimatch", + url: "https://addons.mozilla.org/en-US/firefox/addon/multimatch", + icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + description: "amo suggestion multi-match", + rating: Some( + "4.9", + ), + number_of_ratings: 888, + guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + score: 0.25, + }, + Wikipedia { + title: "Multimatch", + url: "https://wikipedia.org/Multimatch", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "multimatch", + }, + ] + "#]], + ), + ( + "MultiMatch; all providers, mixed case", + SuggestionQuery { + keyword: "MultiMatch".into(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + ], + limit: None, + }, + expect![[r#" + [ + Pocket { + title: "Multimatching", + url: "https://getpocket.com/collections/multimatch", + score: 0.88, + is_top_pick: true, + }, + Amo { + title: "Firefox Multimatch", + url: "https://addons.mozilla.org/en-US/firefox/addon/multimatch", + icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + description: "amo suggestion multi-match", + rating: Some( + "4.9", + ), + number_of_ratings: 888, + guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + score: 0.25, + }, + Wikipedia { + title: "Multimatch", + url: "https://wikipedia.org/Multimatch", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "multimatch", + }, + ] + "#]], + ), + ( + "multimatch; all providers, limit 2", + SuggestionQuery { + keyword: "multimatch".into(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + ], + limit: Some(2), + }, + expect![[r#" + [ + Pocket { + title: "Multimatching", + url: "https://getpocket.com/collections/multimatch", + score: 0.88, + is_top_pick: true, + }, + Amo { + title: "Firefox Multimatch", + url: "https://addons.mozilla.org/en-US/firefox/addon/multimatch", + icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + description: "amo suggestion multi-match", + rating: Some( + "4.9", + ), + number_of_ratings: 888, + guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + score: 0.25, + }, + ] + "#]], + ), + ( + "keyword = `la`; AMP only", + SuggestionQuery { + keyword: "la".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + }, + expect![[r#" + [ + Amp { + title: "Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: Some( + [ + 105, + 45, + 97, + 109, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]], + ), + ( + "keyword = `la`; Wikipedia, AMO, and Pocket", + SuggestionQuery { + keyword: "la".into(), + providers: vec![ + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + ], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `la`; no providers", + SuggestionQuery { + keyword: "la".into(), + providers: vec![], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `cal`; AMP, AMO, and Pocket", + SuggestionQuery { + keyword: "cal".into(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + ], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `cal`; Wikipedia only", + SuggestionQuery { + keyword: "cal".into(), + providers: vec![SuggestionProvider::Wikipedia], + limit: None, + }, + expect![[r#" + [ + Wikipedia { + title: "California", + url: "https://wikipedia.org/California", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "california", + }, + Wikipedia { + title: "California Institute of Technology", + url: "https://wikipedia.org/California_Institute_of_Technology", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "california", + }, + ] + "#]], + ), + ( + "keyword = `cal`; Wikipedia with limit 1", + SuggestionQuery { + keyword: "cal".into(), + providers: vec![SuggestionProvider::Wikipedia], + limit: Some(1), + }, + expect![[r#" + [ + Wikipedia { + title: "California", + url: "https://wikipedia.org/California", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "california", + }, + ] + "#]], + ), + ( + "keyword = `cal`; no providers", + SuggestionQuery { + keyword: "cal".into(), + providers: vec![], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `spam`; AMO only", + SuggestionQuery { + keyword: "spam".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + }, + expect![[r#" + [ + Amo { + title: "Firefox Relay", + url: "https://addons.mozilla.org/en-US/firefox/addon/example", + icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + description: "amo suggestion", + rating: Some( + "4.9", + ), + number_of_ratings: 888, + guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + score: 0.25, + }, + ] + "#]], + ), + ( + "keyword = `masking`; AMO only", + SuggestionQuery { + keyword: "masking".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + }, + expect![[r#" + [ + Amo { + title: "Firefox Relay", + url: "https://addons.mozilla.org/en-US/firefox/addon/example", + icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + description: "amo suggestion", + rating: Some( + "4.9", + ), + number_of_ratings: 888, + guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + score: 0.25, + }, + ] + "#]], + ), + ( + "keyword = `masking e`; AMO only", + SuggestionQuery { + keyword: "masking e".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + }, + expect![[r#" + [ + Amo { + title: "Firefox Relay", + url: "https://addons.mozilla.org/en-US/firefox/addon/example", + icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + description: "amo suggestion", + rating: Some( + "4.9", + ), + number_of_ratings: 888, + guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + score: 0.25, + }, + ] + "#]], + ), + ( + "keyword = `masking s`; AMO only", + SuggestionQuery { + keyword: "masking s".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `soft`; AMP and Wikipedia", + SuggestionQuery { + keyword: "soft".into(), + providers: vec![SuggestionProvider::Amp, SuggestionProvider::Wikipedia], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `soft`; Pocket only", + SuggestionQuery { + keyword: "soft".into(), + providers: vec![SuggestionProvider::Pocket], + limit: None, + }, + expect![[r#" + [ + Pocket { + title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + score: 0.25, + is_top_pick: false, + }, + ] + "#]], + ), + ( + "keyword = `soft l`; Pocket only", + SuggestionQuery { + keyword: "soft l".into(), + providers: vec![SuggestionProvider::Pocket], + limit: None, + }, + expect![[r#" + [ + Pocket { + title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + score: 0.25, + is_top_pick: false, + }, + ] + "#]], + ), + ( + "keyword = `sof`; Pocket only", + SuggestionQuery { + keyword: "sof".into(), + providers: vec![SuggestionProvider::Pocket], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `burnout women`; Pocket only", + SuggestionQuery { + keyword: "burnout women".into(), + providers: vec![SuggestionProvider::Pocket], + limit: None, + }, + expect![[r#" + [ + Pocket { + title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + score: 0.25, + is_top_pick: true, + }, + ] + "#]], + ), + ( + "keyword = `burnout person`; Pocket only", + SuggestionQuery { + keyword: "burnout person".into(), + providers: vec![SuggestionProvider::Pocket], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `best spicy ramen delivery in tokyo`; Yelp only", + SuggestionQuery { + keyword: "best spicy ramen delivery in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=best+spicy+ramen+delivery&find_loc=tokyo", + title: "best spicy ramen delivery in tokyo", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `BeSt SpIcY rAmEn DeLiVeRy In ToKyO`; Yelp only", + SuggestionQuery { + keyword: "BeSt SpIcY rAmEn DeLiVeRy In ToKyO".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=BeSt+SpIcY+rAmEn+DeLiVeRy&find_loc=ToKyO", + title: "BeSt SpIcY rAmEn DeLiVeRy In ToKyO", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `best ramen delivery in tokyo`; Yelp only", + SuggestionQuery { + keyword: "best ramen delivery in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=best+ramen+delivery&find_loc=tokyo", + title: "best ramen delivery in tokyo", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `best invalid_ramen delivery in tokyo`; Yelp only", + SuggestionQuery { + keyword: "best invalid_ramen delivery in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `best delivery in tokyo`; Yelp only", + SuggestionQuery { + keyword: "best delivery in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `super best ramen delivery in tokyo`; Yelp only", + SuggestionQuery { + keyword: "super best ramen delivery in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=super+best+ramen+delivery&find_loc=tokyo", + title: "super best ramen delivery in tokyo", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `invalid_best ramen delivery in tokyo`; Yelp only", + SuggestionQuery { + keyword: "invalid_best ramen delivery in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `ramen delivery in tokyo`; Yelp only", + SuggestionQuery { + keyword: "ramen delivery in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen+delivery&find_loc=tokyo", + title: "ramen delivery in tokyo", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen super delivery in tokyo`; Yelp only", + SuggestionQuery { + keyword: "ramen super delivery in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen+super+delivery&find_loc=tokyo", + title: "ramen super delivery in tokyo", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen invalid_delivery in tokyo`; Yelp only", + SuggestionQuery { + keyword: "ramen invalid_delivery in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `ramen in tokyo`; Yelp only", + SuggestionQuery { + keyword: "ramen in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen&find_loc=tokyo", + title: "ramen in tokyo", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen near tokyo`; Yelp only", + SuggestionQuery { + keyword: "ramen near tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen&find_loc=tokyo", + title: "ramen near tokyo", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen invalid_in tokyo`; Yelp only", + SuggestionQuery { + keyword: "ramen invalid_in tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `ramen in San Francisco`; Yelp only", + SuggestionQuery { + keyword: "ramen in San Francisco".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen&find_loc=San+Francisco", + title: "ramen in San Francisco", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen in`; Yelp only", + SuggestionQuery { + keyword: "ramen in".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen", + title: "ramen in", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen near by`; Yelp only", + SuggestionQuery { + keyword: "ramen near by".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen+near+by", + title: "ramen near by", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen near me`; Yelp only", + SuggestionQuery { + keyword: "ramen near me".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen+near+me", + title: "ramen near me", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen near by tokyo`; Yelp only", + SuggestionQuery { + keyword: "ramen near by tokyo".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `ramen`; Yelp only", + SuggestionQuery { + keyword: "ramen".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen", + title: "ramen", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = maximum chars; Yelp only", + SuggestionQuery { + keyword: "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789", + title: "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = over chars; Yelp only", + SuggestionQuery { + keyword: "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789Z".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `best delivery`; Yelp only", + SuggestionQuery { + keyword: "best delivery".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `same_modifier same_modifier`; Yelp only", + SuggestionQuery { + keyword: "same_modifier same_modifier".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `same_modifier `; Yelp only", + SuggestionQuery { + keyword: "same_modifier ".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `yelp ramen`; Yelp only", + SuggestionQuery { + keyword: "yelp ramen".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen", + title: "ramen", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `yelp keyword ramen`; Yelp only", + SuggestionQuery { + keyword: "yelp keyword ramen".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen", + title: "ramen", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen in tokyo yelp`; Yelp only", + SuggestionQuery { + keyword: "ramen in tokyo yelp".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen&find_loc=tokyo", + title: "ramen in tokyo", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ramen in tokyo yelp keyword`; Yelp only", + SuggestionQuery { + keyword: "ramen in tokyo yelp keyword".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen&find_loc=tokyo", + title: "ramen in tokyo", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: true, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `yelp ramen yelp`; Yelp only", + SuggestionQuery { + keyword: "yelp ramen yelp".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen", + title: "ramen", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `best yelp ramen`; Yelp only", + SuggestionQuery { + keyword: "best yelp ramen".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `Spicy R`; Yelp only", + SuggestionQuery { + keyword: "Spicy R".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=Spicy+Ramen", + title: "Spicy Ramen", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: false, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `BeSt Ramen`; Yelp only", + SuggestionQuery { + keyword: "BeSt Ramen".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=BeSt+Ramen", + title: "BeSt Ramen", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `BeSt Spicy R`; Yelp only", + SuggestionQuery { + keyword: "BeSt Spicy R".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=BeSt+Spicy+Ramen", + title: "BeSt Spicy Ramen", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: false, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `BeSt R`; Yelp only", + SuggestionQuery { + keyword: "BeSt R".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `r`; Yelp only", + SuggestionQuery { + keyword: "r".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `ra`; Yelp only", + SuggestionQuery { + keyword: "ra".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=rats", + title: "rats", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: false, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `ram`; Yelp only", + SuggestionQuery { + keyword: "ram".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen", + title: "ramen", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: false, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `rac`; Yelp only", + SuggestionQuery { + keyword: "rac".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=raccoon", + title: "raccoon", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: false, + location_param: "find_loc", + }, + ] + "#]], + ), + ( + "keyword = `best r`; Yelp only", + SuggestionQuery { + keyword: "best r".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = `best ra`; Yelp only", + SuggestionQuery { + keyword: "best ra".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=best+rats", + title: "best rats", + icon: Some( + [ + 121, + 101, + 108, + 112, + 45, + 105, + 99, + 111, + 110, + ], + ), + score: 0.5, + has_location_sign: false, + subject_exact_match: false, + location_param: "find_loc", + }, + ] + "#]], + ), + ]; + for (what, query, expect) in table { + expect.assert_debug_eq( + &store + .query(query) + .with_context(|| format!("Couldn't query store for {}", what))?, + ); + } + + Ok(()) + } + + // Tests querying amp wikipedia + #[test] + fn query_with_multiple_providers_and_diff_scores() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, { + "id": "data-2", + "type": "pocket-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-2.json", + "mimetype": "application/json", + "location": "data-2.json", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-3", + "type": "icon", + "last_modified": 25, + "attachment": { + "filename": "icon-3.png", + "mimetype": "image/png", + "location": "icon-3.png", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow", "amp wiki match"], + "title": "Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }, { + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["pe", "pen", "penne", "penne for your thoughts", "amp wiki match"], + "title": "Penne for Your Thoughts", + "url": "https://penne.biz", + "icon": "2", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.1 + }, { + "id": 0, + "advertiser": "Wikipedia", + "iab_category": "5 - Education", + "keywords": ["amp wiki match", "pocket wiki match"], + "title": "Multimatch", + "url": "https://wikipedia.org/Multimatch", + "icon": "3" + }]), + )? + .with_data( + "data-2.json", + json!([ + { + "description": "pocket suggestion", + "url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + "lowConfidenceKeywords": ["soft life", "workaholism", "toxic work culture", "work-life balance", "pocket wiki match"], + "highConfidenceKeywords": ["burnout women", "grind culture", "women burnout"], + "title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + "score": 0.05 + }, + { + "description": "pocket suggestion multi-match", + "url": "https://getpocket.com/collections/multimatch", + "lowConfidenceKeywords": [], + "highConfidenceKeywords": ["pocket wiki match"], + "title": "Pocket wiki match", + "score": 0.88 + }, + ]), + )? + .with_icon("icon-3.png", "also-an-icon".as_bytes().into()); + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + let table = [ + ( + "keyword = `amp wiki match`; all providers", + SuggestionQuery { + keyword: "amp wiki match".into(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + SuggestionProvider::Yelp, + ], + limit: None, + }, + expect![[r#" + [ + Amp { + title: "Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: None, + full_keyword: "amp wiki match", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + Wikipedia { + title: "Multimatch", + url: "https://wikipedia.org/Multimatch", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "amp wiki match", + }, + Amp { + title: "Penne for Your Thoughts", + url: "https://penne.biz", + raw_url: "https://penne.biz", + icon: None, + full_keyword: "amp wiki match", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.1, + }, + ] + "#]], + ), + ( + "keyword = `amp wiki match`; all providers, limit 2", + SuggestionQuery { + keyword: "amp wiki match".into(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + SuggestionProvider::Yelp, + ], + limit: Some(2), + }, + expect![[r#" + [ + Amp { + title: "Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: None, + full_keyword: "amp wiki match", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + Wikipedia { + title: "Multimatch", + url: "https://wikipedia.org/Multimatch", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "amp wiki match", + }, + ] + "#]], + ), + ( + "pocket wiki match; all providers", + SuggestionQuery { + keyword: "pocket wiki match".into(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + ], + limit: None, + }, + expect![[r#" + [ + Pocket { + title: "Pocket wiki match", + url: "https://getpocket.com/collections/multimatch", + score: 0.88, + is_top_pick: true, + }, + Wikipedia { + title: "Multimatch", + url: "https://wikipedia.org/Multimatch", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "pocket wiki match", + }, + Pocket { + title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + score: 0.05, + is_top_pick: false, + }, + ] + "#]], + ), + ( + "pocket wiki match; all providers limit 1", + SuggestionQuery { + keyword: "pocket wiki match".into(), + providers: vec![ + SuggestionProvider::Amp, + SuggestionProvider::Wikipedia, + SuggestionProvider::Amo, + SuggestionProvider::Pocket, + ], + limit: Some(1), + }, + expect![[r#" + [ + Pocket { + title: "Pocket wiki match", + url: "https://getpocket.com/collections/multimatch", + score: 0.88, + is_top_pick: true, + }, + ] + "#]], + ), + ( + "work-life balance; duplicate providers", + SuggestionQuery { + keyword: "work-life balance".into(), + providers: vec![SuggestionProvider::Pocket, SuggestionProvider::Pocket], + limit: Some(-1), + }, + expect![[r#" + [ + Pocket { + title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + score: 0.05, + is_top_pick: false, + }, + ] + "#]], + ), + ]; + for (what, query, expect) in table { + expect.assert_debug_eq( + &store + .query(query) + .with_context(|| format!("Couldn't query store for {}", what))?, + ); + } + + Ok(()) + } + + // Tests querying multiple suggestions with multiple keywords with same prefix keyword + #[test] + fn query_with_multiple_suggestions_with_same_prefix() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "amo-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, { + "id": "data-2", + "type": "pocket-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-2.json", + "mimetype": "application/json", + "location": "data-2.json", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-3", + "type": "icon", + "last_modified": 25, + "attachment": { + "filename": "icon-3.png", + "mimetype": "image/png", + "location": "icon-3.png", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([ + { + "description": "amo suggestion", + "url": "https://addons.mozilla.org/en-US/firefox/addon/example", + "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + "keywords": ["relay", "spam", "masking email", "masking emails", "masking accounts", "alias" ], + "title": "Firefox Relay", + "icon": "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + "rating": "4.9", + "number_of_ratings": 888, + "score": 0.25 + } + ]), + )? + .with_data( + "data-2.json", + json!([ + { + "description": "pocket suggestion", + "url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + "lowConfidenceKeywords": ["soft life", "soft living", "soft work", "workaholism", "toxic work culture"], + "highConfidenceKeywords": ["burnout women", "grind culture", "women burnout", "soft lives"], + "title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + "score": 0.05 + } + ]), + )? + .with_icon("icon-3.png", "also-an-icon".as_bytes().into()); + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + let table = [ + ( + "keyword = `soft li`; pocket", + SuggestionQuery { + keyword: "soft li".into(), + providers: vec![SuggestionProvider::Pocket], + limit: None, + }, + expect![[r#" + [ + Pocket { + title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + score: 0.05, + is_top_pick: false, + }, + ] + "#]], + ), + ( + "keyword = `soft lives`; pocket", + SuggestionQuery { + keyword: "soft lives".into(), + providers: vec![SuggestionProvider::Pocket], + limit: None, + }, + expect![[r#" + [ + Pocket { + title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women", + url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women", + score: 0.05, + is_top_pick: true, + }, + ] + "#]], + ), + ( + "keyword = `masking `; amo provider", + SuggestionQuery { + keyword: "masking ".into(), + providers: vec![SuggestionProvider::Amo], + limit: None, + }, + expect![[r#" + [ + Amo { + title: "Firefox Relay", + url: "https://addons.mozilla.org/en-US/firefox/addon/example", + icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b", + description: "amo suggestion", + rating: Some( + "4.9", + ), + number_of_ratings: 888, + guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}", + score: 0.25, + }, + ] + "#]], + ), + ]; + for (what, query, expect) in table { + expect.assert_debug_eq( + &store + .query(query) + .with_context(|| format!("Couldn't query store for {}", what))?, + ); + } + + Ok(()) + } + + // Tests querying multiple suggestions with multiple keywords with same prefix keyword + #[test] + fn query_with_amp_mobile_provider() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "amp-mobile-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, { + "id": "data-2", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-2.json", + "mimetype": "application/json", + "location": "data-2.json", + "hash": "", + "size": 0, + }, + }, { + "id": "icon-3", + "type": "icon", + "last_modified": 25, + "attachment": { + "filename": "icon-3.png", + "mimetype": "image/png", + "location": "icon-3.png", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([ + { + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"], + "title": "Mobile - Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "3", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + } + ]), + )? + .with_data( + "data-2.json", + json!([ + { + "id": 0, + "advertiser": "Good Place Eats", + "iab_category": "8 - Food & Drink", + "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"], + "title": "Desktop - Lasagna Come Out Tomorrow", + "url": "https://www.lasagna.restaurant", + "icon": "3", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.2 + } + ]), + )? + .with_icon("icon-3.png", "also-an-icon".as_bytes().into()); + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + let table = [ + ( + "keyword = `las`; Amp Mobile", + SuggestionQuery { + keyword: "las".into(), + providers: vec![SuggestionProvider::AmpMobile], + limit: None, + }, + expect![[r#" + [ + Amp { + title: "Mobile - Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]], + ), + ( + "keyword = `las`; Amp", + SuggestionQuery { + keyword: "las".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + }, + expect![[r#" + [ + Amp { + title: "Desktop - Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.2, + }, + ] + "#]], + ), + ( + "keyword = `las `; amp and amp mobile", + SuggestionQuery { + keyword: "las".into(), + providers: vec![SuggestionProvider::Amp, SuggestionProvider::AmpMobile], + limit: None, + }, + expect![[r#" + [ + Amp { + title: "Mobile - Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + Amp { + title: "Desktop - Lasagna Come Out Tomorrow", + url: "https://www.lasagna.restaurant", + raw_url: "https://www.lasagna.restaurant", + icon: Some( + [ + 97, + 108, + 115, + 111, + 45, + 97, + 110, + 45, + 105, + 99, + 111, + 110, + ], + ), + full_keyword: "lasagna", + block_id: 0, + advertiser: "Good Place Eats", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.2, + }, + ] + "#]], + ), + ]; + for (what, query, expect) in table { + expect.assert_debug_eq( + &store + .query(query) + .with_context(|| format!("Couldn't query store for {}", what))?, + ); + } + + Ok(()) + } + + /// Tests ingesting malformed Remote Settings records that we understand, + /// but that are missing fields, or aren't in the format we expect. + #[test] + fn ingest_malformed() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + // Data record without an attachment. + "id": "missing-data-attachment", + "type": "data", + "last_modified": 15, + }, { + // Icon record without an attachment. + "id": "missing-icon-attachment", + "type": "icon", + "last_modified": 30, + }, { + // Icon record with an ID that's not `icon-{id}`, so suggestions in + // the data attachment won't be able to reference it. + "id": "bad-icon-id", + "type": "icon", + "last_modified": 45, + "attachment": { + "filename": "icon-1.png", + "mimetype": "image/png", + "location": "icon-1.png", + "hash": "", + "size": 0, + }, + }]))? + .with_icon("icon-1.png", "i-am-an-icon".as_bytes().into()); + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(45)); + assert_eq!( + dao.conn + .query_one::<i64>("SELECT count(*) FROM suggestions")?, + 0 + ); + assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 0); + + Ok(()) + })?; + + Ok(()) + } + + /// Tests unparsable Remote Settings records, which we don't know how to + /// ingest at all. + #[test] + fn ingest_unparsable() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "fancy-new-suggestions-1", + "type": "fancy-new-suggestions", + "last_modified": 15, + }, { + "id": "clippy-2", + "type": "clippy", + "last_modified": 30, + }]))?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30)); + expect![[r#" + Some( + UnparsableRecords( + { + "clippy-2": UnparsableRecord { + schema_version: 14, + }, + "fancy-new-suggestions-1": UnparsableRecord { + schema_version: 14, + }, + }, + ), + ) + "#]] + .assert_debug_eq(&dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?); + Ok(()) + })?; + + Ok(()) + } + + #[test] + fn ingest_mixed_parsable_unparsable_records() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "fancy-new-suggestions-1", + "type": "fancy-new-suggestions", + "last_modified": 15, + }, + { + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, + { + "id": "clippy-2", + "type": "clippy", + "last_modified": 30, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"], + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "5678", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3, + }]), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30)); + expect![[r#" + Some( + UnparsableRecords( + { + "clippy-2": UnparsableRecord { + schema_version: 14, + }, + "fancy-new-suggestions-1": UnparsableRecord { + schema_version: 14, + }, + }, + ), + ) + "#]] + .assert_debug_eq(&dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?); + Ok(()) + })?; + + Ok(()) + } + + /// Tests meta update field isn't updated for old unparsable Remote Settings + /// records. + #[test] + fn ingest_unparsable_and_meta_update_stays_the_same() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "fancy-new-suggestions-1", + "type": "fancy-new-suggestions", + "last_modified": 15, + }]))?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + store.dbs()?.writer.write(|dao| { + dao.put_meta(LAST_INGEST_META_KEY, 30)?; + Ok(()) + })?; + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30)); + Ok(()) + })?; + + Ok(()) + } + + #[test] + fn remove_known_records_out_of_meta_table() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "fancy-new-suggestions-1", + "type": "fancy-new-suggestions", + "last_modified": 15, + }, + { + "id": "data-1", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, + { + "id": "clippy-2", + "type": "clippy", + "last_modified": 15, + }]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"], + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "5678", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + let mut initial_data = UnparsableRecords::default(); + initial_data + .0 + .insert("data-1".to_string(), UnparsableRecord { schema_version: 1 }); + initial_data.0.insert( + "clippy-2".to_string(), + UnparsableRecord { schema_version: 1 }, + ); + store.dbs()?.writer.write(|dao| { + dao.put_meta(UNPARSABLE_RECORDS_META_KEY, initial_data)?; + Ok(()) + })?; + + store.ingest(SuggestIngestionConstraints::default())?; + + store.dbs()?.reader.read(|dao| { + expect![[r#" + Some( + UnparsableRecords( + { + "clippy-2": UnparsableRecord { + schema_version: 14, + }, + "fancy-new-suggestions-1": UnparsableRecord { + schema_version: 14, + }, + }, + ), + ) + "#]] + .assert_debug_eq(&dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?); + Ok(()) + })?; + + Ok(()) + } + + /// Tests that records with invalid attachments are ignored and marked as unparsable. + #[test] + fn skip_over_invalid_records() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([ + { + "id": "invalid-attachment", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-2.json", + "mimetype": "application/json", + "location": "data-2.json", + "hash": "", + "size": 0, + }, + }, + { + "id": "valid-record", + "type": "data", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }, + ]))? + .with_data( + "data-1.json", + json!([{ + "id": 0, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"], + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "5678", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )? + // This attachment is missing the `keywords` field and is invalid + .with_data( + "data-2.json", + json!([{ + "id": 1, + "advertiser": "Los Pollos Hermanos", + "iab_category": "8 - Food & Drink", + "title": "Los Pollos Hermanos - Albuquerque", + "url": "https://www.lph-nm.biz", + "icon": "5678", + "impression_url": "https://example.com/impression_url", + "click_url": "https://example.com/click_url", + "score": 0.3 + }]), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + // Test that the invalid record marked as unparsable + store.dbs()?.reader.read(|dao| { + expect![[r#" + Some( + UnparsableRecords( + { + "invalid-attachment": UnparsableRecord { + schema_version: 14, + }, + }, + ), + ) + "#]] + .assert_debug_eq(&dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?); + Ok(()) + })?; + + // Test that the valid record was read + store.dbs()?.reader.read(|dao| { + assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15)); + expect![[r#" + [ + Amp { + title: "Los Pollos Hermanos - Albuquerque", + url: "https://www.lph-nm.biz", + raw_url: "https://www.lph-nm.biz", + icon: None, + full_keyword: "los", + block_id: 0, + advertiser: "Los Pollos Hermanos", + iab_category: "8 - Food & Drink", + impression_url: "https://example.com/impression_url", + click_url: "https://example.com/click_url", + raw_click_url: "https://example.com/click_url", + score: 0.3, + }, + ] + "#]] + .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery { + keyword: "lo".into(), + providers: vec![SuggestionProvider::Amp], + limit: None, + })?); + + Ok(()) + })?; + + Ok(()) + } + + #[test] + fn unparsable_record_serialized_correctly() -> anyhow::Result<()> { + let unparseable_record = UnparsableRecord { schema_version: 1 }; + assert_eq!(serde_json::to_value(unparseable_record)?, json!({ "v": 1 }),); + Ok(()) + } + + #[test] + fn query_mdn() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "mdn-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([ + { + "description": "Javascript Array", + "url": "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array", + "keywords": ["array javascript", "javascript array", "wildcard"], + "title": "Array", + "score": 0.24 + }, + ]), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + let table = [ + ( + "keyword = prefix; MDN only", + SuggestionQuery { + keyword: "array".into(), + providers: vec![SuggestionProvider::Mdn], + limit: None, + }, + expect![[r#" + [ + Mdn { + title: "Array", + url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array", + description: "Javascript Array", + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = prefix + partial suffix; MDN only", + SuggestionQuery { + keyword: "array java".into(), + providers: vec![SuggestionProvider::Mdn], + limit: None, + }, + expect![[r#" + [ + Mdn { + title: "Array", + url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array", + description: "Javascript Array", + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = prefix + entire suffix; MDN only", + SuggestionQuery { + keyword: "javascript array".into(), + providers: vec![SuggestionProvider::Mdn], + limit: None, + }, + expect![[r#" + [ + Mdn { + title: "Array", + url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array", + description: "Javascript Array", + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = `partial prefix word`; MDN only", + SuggestionQuery { + keyword: "wild".into(), + providers: vec![SuggestionProvider::Mdn], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = single word; MDN only", + SuggestionQuery { + keyword: "wildcard".into(), + providers: vec![SuggestionProvider::Mdn], + limit: None, + }, + expect![[r#" + [ + Mdn { + title: "Array", + url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array", + description: "Javascript Array", + score: 0.24, + }, + ] + "#]], + ), + ]; + + for (what, query, expect) in table { + expect.assert_debug_eq( + &store + .query(query) + .with_context(|| format!("Couldn't query store for {}", what))?, + ); + } + + Ok(()) + } + + #[test] + fn query_no_yelp_icon_data() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "yelp-suggestions", + "last_modified": 15, + "attachment": { + "filename": "data-1.json", + "mimetype": "application/json", + "location": "data-1.json", + "hash": "", + "size": 0, + }, + }]))? + .with_data( + "data-1.json", + json!([ + { + "subjects": ["ramen"], + "preModifiers": [], + "postModifiers": [], + "locationSigns": [], + "yelpModifiers": [], + "icon": "yelp-favicon", + "score": 0.5 + }, + ]), + )?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + + store.ingest(SuggestIngestionConstraints::default())?; + + let table = [( + "keyword = ramen; Yelp only", + SuggestionQuery { + keyword: "ramen".into(), + providers: vec![SuggestionProvider::Yelp], + limit: None, + }, + expect![[r#" + [ + Yelp { + url: "https://www.yelp.com/search?find_desc=ramen", + title: "ramen", + icon: None, + score: 0.5, + has_location_sign: false, + subject_exact_match: true, + location_param: "find_loc", + }, + ] + "#]], + )]; + + for (what, query, expect) in table { + expect.assert_debug_eq( + &store + .query(query) + .with_context(|| format!("Couldn't query store for {}", what))?, + ); + } + + Ok(()) + } + + #[test] + fn weather() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "weather", + "last_modified": 15, + "weather": { + "min_keyword_length": 3, + "keywords": ["ab", "xyz", "weather"], + "score": "0.24" + } + }]))?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + store.ingest(SuggestIngestionConstraints::default())?; + + let table = [ + ( + "keyword = 'ab'; Weather only, no match since query is too short", + SuggestionQuery { + keyword: "ab".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = 'xab'; Weather only, no matching keyword", + SuggestionQuery { + keyword: "xab".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = 'abx'; Weather only, no matching keyword", + SuggestionQuery { + keyword: "abx".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = 'xy'; Weather only, no match since query is too short", + SuggestionQuery { + keyword: "xy".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = 'xyz'; Weather only, match", + SuggestionQuery { + keyword: "xyz".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [ + Weather { + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = 'xxyz'; Weather only, no matching keyword", + SuggestionQuery { + keyword: "xxyz".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = 'xyzx'; Weather only, no matching keyword", + SuggestionQuery { + keyword: "xyzx".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = 'we'; Weather only, no match since query is too short", + SuggestionQuery { + keyword: "we".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = 'wea'; Weather only, match", + SuggestionQuery { + keyword: "wea".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [ + Weather { + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = 'weat'; Weather only, match", + SuggestionQuery { + keyword: "weat".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [ + Weather { + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = 'weath'; Weather only, match", + SuggestionQuery { + keyword: "weath".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [ + Weather { + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = 'weathe'; Weather only, match", + SuggestionQuery { + keyword: "weathe".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [ + Weather { + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = 'weather'; Weather only, match", + SuggestionQuery { + keyword: "weather".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [ + Weather { + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = 'weatherx'; Weather only, no matching keyword", + SuggestionQuery { + keyword: "weatherx".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = 'xweather'; Weather only, no matching keyword", + SuggestionQuery { + keyword: "xweather".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = 'xwea'; Weather only, no matching keyword", + SuggestionQuery { + keyword: "xwea".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = ' weather '; Weather only, match", + SuggestionQuery { + keyword: " weather ".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [ + Weather { + score: 0.24, + }, + ] + "#]], + ), + ( + "keyword = 'x weather '; Weather only, no matching keyword", + SuggestionQuery { + keyword: "x weather ".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ( + "keyword = ' weather x'; Weather only, no matching keyword", + SuggestionQuery { + keyword: " weather x".into(), + providers: vec![SuggestionProvider::Weather], + limit: None, + }, + expect![[r#" + [] + "#]], + ), + ]; + + for (what, query, expect) in table { + expect.assert_debug_eq( + &store + .query(query) + .with_context(|| format!("Couldn't query store for {}", what))?, + ); + } + + expect![[r#" + Some( + Weather { + min_keyword_length: 3, + }, + ) + "#]] + .assert_debug_eq( + &store + .fetch_provider_config(SuggestionProvider::Weather) + .with_context(|| "Couldn't fetch provider config")?, + ); + + Ok(()) + } + + #[test] + fn fetch_global_config() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "configuration", + "last_modified": 15, + "configuration": { + "show_less_frequently_cap": 3, + } + }]))?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + store.ingest(SuggestIngestionConstraints::default())?; + + expect![[r#" + SuggestGlobalConfig { + show_less_frequently_cap: 3, + } + "#]] + .assert_debug_eq( + &store + .fetch_global_config() + .with_context(|| "fetch_global_config failed")?, + ); + + Ok(()) + } + + #[test] + fn fetch_global_config_default() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([]))?; + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + store.ingest(SuggestIngestionConstraints::default())?; + + expect![[r#" + SuggestGlobalConfig { + show_less_frequently_cap: 0, + } + "#]] + .assert_debug_eq( + &store + .fetch_global_config() + .with_context(|| "fetch_global_config failed")?, + ); + + Ok(()) + } + + #[test] + fn fetch_provider_config_none() -> anyhow::Result<()> { + before_each(); + + let snapshot = Snapshot::with_records(json!([]))?; + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + store.ingest(SuggestIngestionConstraints::default())?; + + expect![[r#" + None + "#]] + .assert_debug_eq( + &store + .fetch_provider_config(SuggestionProvider::Amp) + .with_context(|| "fetch_provider_config failed for Amp")?, + ); + + expect![[r#" + None + "#]] + .assert_debug_eq( + &store + .fetch_provider_config(SuggestionProvider::Weather) + .with_context(|| "fetch_provider_config failed for Weather")?, + ); + + Ok(()) + } + + #[test] + fn fetch_provider_config_other() -> anyhow::Result<()> { + before_each(); + + // Add some weather config. + let snapshot = Snapshot::with_records(json!([{ + "id": "data-1", + "type": "weather", + "last_modified": 15, + "weather": { + "min_keyword_length": 3, + "keywords": ["weather"], + "score": "0.24" + } + }]))?; + + let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot)); + store.ingest(SuggestIngestionConstraints::default())?; + + // Getting the config for a different provider should return None. + expect![[r#" + None + "#]] + .assert_debug_eq( + &store + .fetch_provider_config(SuggestionProvider::Amp) + .with_context(|| "fetch_provider_config failed for Amp")?, + ); + + Ok(()) + } +} diff --git a/third_party/rust/suggest/src/suggest.udl b/third_party/rust/suggest/src/suggest.udl new file mode 100644 index 0000000000..1cd8911a48 --- /dev/null +++ b/third_party/rust/suggest/src/suggest.udl @@ -0,0 +1,151 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +[External="remote_settings"] +typedef extern RemoteSettingsConfig; + +namespace suggest { + +boolean raw_suggestion_url_matches([ByRef] string raw_url, [ByRef] string url); + +}; + +[Error] +interface SuggestApiError { + // An operation was interrupted by calling `SuggestStore.interrupt()` + Interrupted(); + // The server requested a backoff after too many requests + Backoff(u64 seconds); + Network(string reason); + Other(string reason); +}; + +enum SuggestionProvider { + "Amp", + "Pocket", + "Wikipedia", + "Amo", + "Yelp", + "Mdn", + "Weather", + "AmpMobile", +}; + +[Enum] +interface Suggestion { + Amp( + string title, + string url, + string raw_url, + sequence<u8>? icon, + string full_keyword, + i64 block_id, + string advertiser, + string iab_category, + string impression_url, + string click_url, + string raw_click_url, + f64 score + ); + Pocket( + string title, + string url, + f64 score, + boolean is_top_pick + ); + Wikipedia( + string title, + string url, + sequence<u8>? icon, + string full_keyword + ); + Amo( + string title, + string url, + string icon_url, + string description, + string? rating, + i64 number_of_ratings, + string guid, + f64 score + ); + Yelp( + string url, + string title, + sequence<u8>? icon, + f64 score, + boolean has_location_sign, + boolean subject_exact_match, + string location_param + ); + Mdn( + string title, + string url, + string description, + f64 score + ); + Weather( + f64 score + ); +}; + +dictionary SuggestionQuery { + string keyword; + sequence<SuggestionProvider> providers; + i32? limit = null; +}; + +dictionary SuggestIngestionConstraints { + u64? max_suggestions = null; +}; + +dictionary SuggestGlobalConfig { + i32 show_less_frequently_cap; +}; + +[Enum] +interface SuggestProviderConfig { + Weather( + i32 min_keyword_length + ); +}; + +interface SuggestStore { + [Throws=SuggestApiError] + constructor([ByRef] string path, optional RemoteSettingsConfig? settings_config = null); + + [Throws=SuggestApiError] + sequence<Suggestion> query(SuggestionQuery query); + + void interrupt(); + + [Throws=SuggestApiError] + void ingest(SuggestIngestionConstraints constraints); + + [Throws=SuggestApiError] + void clear(); + + [Throws=SuggestApiError] + SuggestGlobalConfig fetch_global_config(); + + [Throws=SuggestApiError] + SuggestProviderConfig? fetch_provider_config(SuggestionProvider provider); +}; + +interface SuggestStoreBuilder { + constructor(); + + [Self=ByArc] + SuggestStoreBuilder data_path(string path); + + [Self=ByArc] + SuggestStoreBuilder cache_path(string path); + + [Self=ByArc] + SuggestStoreBuilder remote_settings_config(RemoteSettingsConfig config); + + [Throws=SuggestApiError] + SuggestStore build(); +}; diff --git a/third_party/rust/suggest/src/suggestion.rs b/third_party/rust/suggest/src/suggestion.rs new file mode 100644 index 0000000000..f5425e3c73 --- /dev/null +++ b/third_party/rust/suggest/src/suggestion.rs @@ -0,0 +1,250 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +use chrono::Local; + +use crate::db::DEFAULT_SUGGESTION_SCORE; + +/// The template parameter for a timestamp in a "raw" sponsored suggestion URL. +const TIMESTAMP_TEMPLATE: &str = "%YYYYMMDDHH%"; + +/// The length, in bytes, of a timestamp in a "cooked" sponsored suggestion URL. +/// +/// Cooked timestamps don't include the leading or trailing `%`, so this is +/// 2 bytes shorter than [`TIMESTAMP_TEMPLATE`]. +const TIMESTAMP_LENGTH: usize = 10; + +/// Suggestion Types for Amp +pub(crate) enum AmpSuggestionType { + Mobile, + Desktop, +} +/// A suggestion from the database to show in the address bar. +#[derive(Clone, Debug, PartialEq)] +pub enum Suggestion { + Amp { + title: String, + url: String, + raw_url: String, + icon: Option<Vec<u8>>, + full_keyword: String, + block_id: i64, + advertiser: String, + iab_category: String, + impression_url: String, + click_url: String, + raw_click_url: String, + score: f64, + }, + Pocket { + title: String, + url: String, + score: f64, + is_top_pick: bool, + }, + Wikipedia { + title: String, + url: String, + icon: Option<Vec<u8>>, + full_keyword: String, + }, + Amo { + title: String, + url: String, + icon_url: String, + description: String, + rating: Option<String>, + number_of_ratings: i64, + guid: String, + score: f64, + }, + Yelp { + url: String, + title: String, + icon: Option<Vec<u8>>, + score: f64, + has_location_sign: bool, + subject_exact_match: bool, + location_param: String, + }, + Mdn { + title: String, + url: String, + description: String, + score: f64, + }, + Weather { + score: f64, + }, +} + +impl PartialOrd for Suggestion { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for Suggestion { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + let a_score = match self { + Suggestion::Amp { score, .. } + | Suggestion::Pocket { score, .. } + | Suggestion::Amo { score, .. } => score, + _ => &DEFAULT_SUGGESTION_SCORE, + }; + let b_score = match other { + Suggestion::Amp { score, .. } + | Suggestion::Pocket { score, .. } + | Suggestion::Amo { score, .. } => score, + _ => &DEFAULT_SUGGESTION_SCORE, + }; + b_score + .partial_cmp(a_score) + .unwrap_or(std::cmp::Ordering::Equal) + } +} + +impl Eq for Suggestion {} +/// Replaces all template parameters in a "raw" sponsored suggestion URL, +/// producing a "cooked" URL with real values. +pub(crate) fn cook_raw_suggestion_url(raw_url: &str) -> String { + let timestamp = Local::now().format("%Y%m%d%H").to_string(); + debug_assert!(timestamp.len() == TIMESTAMP_LENGTH); + // "Raw" sponsored suggestion URLs must not contain more than one timestamp + // template parameter, so we replace just the first occurrence. + raw_url.replacen(TIMESTAMP_TEMPLATE, ×tamp, 1) +} + +/// Determines whether a "raw" sponsored suggestion URL is equivalent to a +/// "cooked" URL. The two URLs are equivalent if they are identical except for +/// their replaced template parameters, which can be different. +pub fn raw_suggestion_url_matches(raw_url: &str, cooked_url: &str) -> bool { + let Some((raw_url_prefix, raw_url_suffix)) = raw_url.split_once(TIMESTAMP_TEMPLATE) else { + return raw_url == cooked_url; + }; + let (Some(cooked_url_prefix), Some(cooked_url_suffix)) = ( + cooked_url.get(..raw_url_prefix.len()), + cooked_url.get(raw_url_prefix.len() + TIMESTAMP_LENGTH..), + ) else { + return false; + }; + if raw_url_prefix != cooked_url_prefix || raw_url_suffix != cooked_url_suffix { + return false; + } + let maybe_timestamp = + &cooked_url[raw_url_prefix.len()..raw_url_prefix.len() + TIMESTAMP_LENGTH]; + maybe_timestamp.bytes().all(|b| b.is_ascii_digit()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn cook_url_with_template_parameters() { + let raw_url_with_one_timestamp = "https://example.com?a=%YYYYMMDDHH%"; + let cooked_url_with_one_timestamp = cook_raw_suggestion_url(raw_url_with_one_timestamp); + assert_eq!( + cooked_url_with_one_timestamp.len(), + raw_url_with_one_timestamp.len() - 2 + ); + assert_ne!(raw_url_with_one_timestamp, cooked_url_with_one_timestamp); + + let raw_url_with_trailing_segment = "https://example.com?a=%YYYYMMDDHH%&b=c"; + let cooked_url_with_trailing_segment = + cook_raw_suggestion_url(raw_url_with_trailing_segment); + assert_eq!( + cooked_url_with_trailing_segment.len(), + raw_url_with_trailing_segment.len() - 2 + ); + assert_ne!( + raw_url_with_trailing_segment, + cooked_url_with_trailing_segment + ); + } + + #[test] + fn cook_url_without_template_parameters() { + let raw_url_without_timestamp = "https://example.com?b=c"; + let cooked_url_without_timestamp = cook_raw_suggestion_url(raw_url_without_timestamp); + assert_eq!(raw_url_without_timestamp, cooked_url_without_timestamp); + } + + #[test] + fn url_with_template_parameters_matches() { + let raw_url_with_one_timestamp = "https://example.com?a=%YYYYMMDDHH%"; + let raw_url_with_trailing_segment = "https://example.com?a=%YYYYMMDDHH%&b=c"; + + // Equivalent, except for their replaced template parameters. + assert!(raw_suggestion_url_matches( + raw_url_with_one_timestamp, + "https://example.com?a=0000000000" + )); + assert!(raw_suggestion_url_matches( + raw_url_with_trailing_segment, + "https://example.com?a=1111111111&b=c" + )); + + // Different lengths. + assert!(!raw_suggestion_url_matches( + raw_url_with_one_timestamp, + "https://example.com?a=1234567890&c=d" + )); + assert!(!raw_suggestion_url_matches( + raw_url_with_one_timestamp, + "https://example.com?a=123456789" + )); + assert!(!raw_suggestion_url_matches( + raw_url_with_trailing_segment, + "https://example.com?a=0987654321" + )); + assert!(!raw_suggestion_url_matches( + raw_url_with_trailing_segment, + "https://example.com?a=0987654321&b=c&d=e" + )); + + // Different query parameter names. + assert!(!raw_suggestion_url_matches( + raw_url_with_one_timestamp, // `a`. + "https://example.com?b=4444444444" // `b`. + )); + assert!(!raw_suggestion_url_matches( + raw_url_with_trailing_segment, // `a&b`. + "https://example.com?a=5555555555&c=c" // `a&c`. + )); + + // Not a timestamp. + assert!(!raw_suggestion_url_matches( + raw_url_with_one_timestamp, + "https://example.com?a=bcdefghijk" + )); + assert!(!raw_suggestion_url_matches( + raw_url_with_trailing_segment, + "https://example.com?a=bcdefghijk&b=c" + )); + } + + #[test] + fn url_without_template_parameters_matches() { + let raw_url_without_timestamp = "https://example.com?b=c"; + + assert!(raw_suggestion_url_matches( + raw_url_without_timestamp, + "https://example.com?b=c" + )); + assert!(!raw_suggestion_url_matches( + raw_url_without_timestamp, + "http://example.com" + )); + assert!(!raw_suggestion_url_matches( + raw_url_without_timestamp, // `a`. + "http://example.com?a=c" // `b`. + )); + assert!(!raw_suggestion_url_matches( + raw_url_without_timestamp, + "https://example.com?b=c&d=e" + )); + } +} diff --git a/third_party/rust/suggest/src/yelp.rs b/third_party/rust/suggest/src/yelp.rs new file mode 100644 index 0000000000..2413709c67 --- /dev/null +++ b/third_party/rust/suggest/src/yelp.rs @@ -0,0 +1,497 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +use rusqlite::types::ToSqlOutput; +use rusqlite::{named_params, Result as RusqliteResult, ToSql}; +use sql_support::ConnExt; +use url::form_urlencoded; + +use crate::{ + db::SuggestDao, + provider::SuggestionProvider, + rs::{DownloadedYelpSuggestion, SuggestRecordId}, + suggestion::Suggestion, + Result, SuggestionQuery, +}; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +#[repr(u8)] +enum Modifier { + Pre = 0, + Post = 1, + Yelp = 2, +} + +impl ToSql for Modifier { + fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> { + Ok(ToSqlOutput::from(*self as u8)) + } +} + +/// This module assumes like following query. +/// "Yelp-modifier? Pre-modifier? Subject Post-modifier? (Location-modifier | Location-sign Location?)? Yelp-modifier?" +/// For example, the query below is valid. +/// "Yelp (Yelp-modifier) Best(Pre-modifier) Ramen(Subject) Delivery(Post-modifier) In(Location-sign) Tokyo(Location)" +/// Also, as everything except Subject is optional, "Ramen" will be also valid query. +/// However, "Best Best Ramen" and "Ramen Best" is out of the above appearance order rule, +/// parsing will be failed. Also, every words except Location needs to be registered in DB. +/// Please refer to the query test in store.rs for all of combination. +/// Currently, the maximum query length is determined while refering to having word lengths in DB +/// and location names. +/// max subject: 50 + pre-modifier: 10 + post-modifier: 10 + location-sign: 7 + location: 50 = 127 = 150. +const MAX_QUERY_LENGTH: usize = 150; + +/// The max number of words consisting the modifier. To improve the SQL performance by matching with +/// "keyword=:modifier" (please see is_modifier()), define this how many words we should check. +const MAX_MODIFIER_WORDS_NUMBER: usize = 2; + +/// At least this many characters must be typed for a subject to be matched. +const SUBJECT_PREFIX_MATCH_THRESHOLD: usize = 2; + +impl<'a> SuggestDao<'a> { + /// Inserts the suggestions for Yelp attachment into the database. + pub fn insert_yelp_suggestions( + &mut self, + record_id: &SuggestRecordId, + suggestion: &DownloadedYelpSuggestion, + ) -> Result<()> { + for keyword in &suggestion.subjects { + self.scope.err_if_interrupted()?; + self.conn.execute_cached( + "INSERT INTO yelp_subjects(record_id, keyword) VALUES(:record_id, :keyword)", + named_params! { + ":record_id": record_id.as_str(), + ":keyword": keyword, + }, + )?; + } + + for keyword in &suggestion.pre_modifiers { + self.scope.err_if_interrupted()?; + self.conn.execute_cached( + "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)", + named_params! { + ":record_id": record_id.as_str(), + ":type": Modifier::Pre, + ":keyword": keyword, + }, + )?; + } + + for keyword in &suggestion.post_modifiers { + self.scope.err_if_interrupted()?; + self.conn.execute_cached( + "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)", + named_params! { + ":record_id": record_id.as_str(), + ":type": Modifier::Post, + ":keyword": keyword, + }, + )?; + } + + for keyword in &suggestion.yelp_modifiers { + self.scope.err_if_interrupted()?; + self.conn.execute_cached( + "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)", + named_params! { + ":record_id": record_id.as_str(), + ":type": Modifier::Yelp, + ":keyword": keyword, + }, + )?; + } + + for sign in &suggestion.location_signs { + self.scope.err_if_interrupted()?; + self.conn.execute_cached( + "INSERT INTO yelp_location_signs(record_id, keyword, need_location) VALUES(:record_id, :keyword, :need_location)", + named_params! { + ":record_id": record_id.as_str(), + ":keyword": sign.keyword, + ":need_location": sign.need_location, + }, + )?; + } + + self.scope.err_if_interrupted()?; + self.conn.execute_cached( + "INSERT INTO yelp_custom_details(record_id, icon_id, score) VALUES(:record_id, :icon_id, :score)", + named_params! { + ":record_id": record_id.as_str(), + ":icon_id": suggestion.icon_id, + ":score": suggestion.score, + }, + )?; + + Ok(()) + } + + /// Fetch Yelp suggestion from given user's query. + pub fn fetch_yelp_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> { + if !query.providers.contains(&SuggestionProvider::Yelp) { + return Ok(vec![]); + } + + if query.keyword.len() > MAX_QUERY_LENGTH { + return Ok(vec![]); + } + + let query_string = &query.keyword.trim(); + if !query_string.contains(' ') { + let Some((subject, subject_exact_match)) = self.find_subject(query_string)? else { + return Ok(vec![]); + }; + let (icon, score) = self.fetch_custom_details()?; + let builder = SuggestionBuilder { + subject: &subject, + subject_exact_match, + pre_modifier: None, + post_modifier: None, + location_sign: None, + location: None, + need_location: false, + icon, + score, + }; + return Ok(vec![builder.into()]); + } + + // Find the yelp keyword modifier and remove them from the query. + let (query_without_yelp_modifiers, _, _) = + self.find_modifiers(query_string, Modifier::Yelp, Modifier::Yelp)?; + + // Find the location sign and the location. + let (query_without_location, location_sign, location, need_location) = + self.find_location(&query_without_yelp_modifiers)?; + + if let (Some(_), false) = (&location, need_location) { + // The location sign does not need the specific location, but user is setting something. + return Ok(vec![]); + } + + if query_without_location.is_empty() { + // No remained query. + return Ok(vec![]); + } + + // Find the modifiers. + let (subject_candidate, pre_modifier, post_modifier) = + self.find_modifiers(&query_without_location, Modifier::Pre, Modifier::Post)?; + + let Some((subject, subject_exact_match)) = self.find_subject(&subject_candidate)? else { + return Ok(vec![]); + }; + + let (icon, score) = self.fetch_custom_details()?; + let builder = SuggestionBuilder { + subject: &subject, + subject_exact_match, + pre_modifier, + post_modifier, + location_sign, + location, + need_location, + icon, + score, + }; + Ok(vec![builder.into()]) + } + + /// Fetch the custom details for Yelp suggestions. + /// It returns the location tuple as follows: + /// ( + /// Option<Vec<u8>>: Icon data. If not found, returns None. + /// f64: Reflects score field in the yelp_custom_details table. + /// ) + /// + /// Note that there should be only one record in `yelp_custom_details` + /// as all the Yelp assets are stored in the attachment of a single record + /// on Remote Settings. The following query will perform a table scan against + /// `yelp_custom_details` followed by an index search against `icons`, + /// which should be fine since there is only one record in the first table. + fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, f64)> { + let result = self.conn.query_row_and_then_cachable( + r#" + SELECT + i.data, y.score + FROM + yelp_custom_details y + LEFT JOIN + icons i + ON y.icon_id = i.id + LIMIT + 1 + "#, + (), + |row| -> Result<_> { Ok((row.get::<_, Option<Vec<u8>>>(0)?, row.get::<_, f64>(1)?)) }, + true, + )?; + + Ok(result) + } + + /// Find the location information from the given query string. + /// It returns the location tuple as follows: + /// ( + /// String: Query string that is removed found location information. + /// Option<String>: Location sign found in yelp_location_signs table. If not found, returns None. + /// Option<String>: Specific location name after location sign. If not found, returns None. + /// bool: Reflects need_location field in the table. + /// ) + fn find_location(&self, query: &str) -> Result<(String, Option<String>, Option<String>, bool)> { + let query_with_spaces = format!(" {} ", query); + let mut results: Vec<(usize, usize, i8)> = self.conn.query_rows_and_then_cached( + " + SELECT + INSTR(:query, ' ' || keyword || ' ') AS sign_index, + LENGTH(keyword) AS sign_length, + need_location + FROM yelp_location_signs + WHERE + sign_index > 0 + ORDER BY + sign_length DESC + LIMIT 1 + ", + named_params! { + ":query": &query_with_spaces.to_lowercase(), + }, + |row| -> Result<_> { + Ok(( + row.get::<_, usize>("sign_index")?, + row.get::<_, usize>("sign_length")?, + row.get::<_, i8>("need_location")?, + )) + }, + )?; + + let (sign_index, sign_length, need_location) = if let Some(res) = results.pop() { + res + } else { + return Ok((query.trim().to_string(), None, None, false)); + }; + + let pre_location = query_with_spaces + .get(..sign_index) + .map(str::trim) + .map(str::to_string) + .unwrap_or_default(); + let location_sign = query_with_spaces + .get(sign_index..sign_index + sign_length) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string); + let location = query_with_spaces + .get(sign_index + sign_length..) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string); + + Ok((pre_location, location_sign, location, need_location == 1)) + } + + /// Find the pre/post modifier from the given query string. + /// It returns the modifiers tuple as follows: + /// ( + /// String: Query string that is removed found the modifiers. + /// Option<String>: Pre-modifier found in the yelp_modifiers table. If not found, returns None. + /// Option<String>: Post-modifier found in the yelp_modifiers table. If not found, returns None. + /// ) + fn find_modifiers( + &self, + query: &str, + pre_modifier_type: Modifier, + post_modifier_type: Modifier, + ) -> Result<(String, Option<String>, Option<String>)> { + if !query.contains(' ') { + return Ok((query.to_string(), None, None)); + } + + let words: Vec<_> = query.split_whitespace().collect(); + + let mut pre_modifier = None; + for n in (1..=MAX_MODIFIER_WORDS_NUMBER).rev() { + let mut candidate_chunks = words.chunks(n); + let candidate = candidate_chunks.next().unwrap_or(&[""]).join(" "); + if self.is_modifier(&candidate, pre_modifier_type)? { + pre_modifier = Some(candidate); + break; + } + } + + let mut post_modifier = None; + for n in (1..=MAX_MODIFIER_WORDS_NUMBER).rev() { + let mut candidate_chunks = words.rchunks(n); + let candidate = candidate_chunks.next().unwrap_or(&[""]).join(" "); + if self.is_modifier(&candidate, post_modifier_type)? { + post_modifier = Some(candidate); + break; + } + } + + let mut without_modifiers = query; + if let Some(ref modifier) = pre_modifier { + without_modifiers = &without_modifiers[modifier.len()..]; + } + if let Some(ref modifier) = post_modifier { + without_modifiers = &without_modifiers[..without_modifiers.len() - modifier.len()]; + } + + Ok(( + without_modifiers.trim().to_string(), + pre_modifier, + post_modifier, + )) + } + + /// Find the subject from the given string. + /// It returns the Option. If it is not none, it contains the tuple as follows: + /// ( + /// String: Subject. + /// bool: Whether the subject matched exactly with the paramter. + /// ) + fn find_subject(&self, candidate: &str) -> Result<Option<(String, bool)>> { + if candidate.is_empty() { + return Ok(None); + } + + // If the length of subject candidate is less than + // SUBJECT_PREFIX_MATCH_THRESHOLD, should exact match. + if candidate.len() < SUBJECT_PREFIX_MATCH_THRESHOLD { + return Ok(if self.is_subject(candidate)? { + Some((candidate.to_string(), true)) + } else { + None + }); + } + + // Otherwise, apply prefix-match. + Ok( + match self.conn.query_row_and_then_cachable( + "SELECT keyword + FROM yelp_subjects + WHERE keyword BETWEEN :candidate AND :candidate || x'FFFF' + ORDER BY LENGTH(keyword) ASC, keyword ASC + LIMIT 1", + named_params! { + ":candidate": candidate.to_lowercase(), + }, + |row| row.get::<_, String>(0), + true, + ) { + Ok(keyword) => { + debug_assert!(candidate.len() <= keyword.len()); + Some(( + format!("{}{}", candidate, &keyword[candidate.len()..]), + candidate.len() == keyword.len(), + )) + } + Err(_) => None, + }, + ) + } + + fn is_modifier(&self, word: &str, modifier_type: Modifier) -> Result<bool> { + let result = self.conn.query_row_and_then_cachable( + " + SELECT EXISTS ( + SELECT 1 FROM yelp_modifiers WHERE type = :type AND keyword = :word LIMIT 1 + ) + ", + named_params! { + ":type": modifier_type, + ":word": word.to_lowercase(), + }, + |row| row.get::<_, bool>(0), + true, + )?; + + Ok(result) + } + + fn is_subject(&self, word: &str) -> Result<bool> { + let result = self.conn.query_row_and_then_cachable( + " + SELECT EXISTS ( + SELECT 1 FROM yelp_subjects WHERE keyword = :word LIMIT 1 + ) + ", + named_params! { + ":word": word.to_lowercase(), + }, + |row| row.get::<_, bool>(0), + true, + )?; + + Ok(result) + } +} + +struct SuggestionBuilder<'a> { + subject: &'a str, + subject_exact_match: bool, + pre_modifier: Option<String>, + post_modifier: Option<String>, + location_sign: Option<String>, + location: Option<String>, + need_location: bool, + icon: Option<Vec<u8>>, + score: f64, +} + +impl<'a> From<SuggestionBuilder<'a>> for Suggestion { + fn from(builder: SuggestionBuilder<'a>) -> Suggestion { + // This location sign such the 'near by' needs to add as a description parameter. + let location_modifier = if !builder.need_location { + builder.location_sign.as_deref() + } else { + None + }; + let description = [ + builder.pre_modifier.as_deref(), + Some(builder.subject), + builder.post_modifier.as_deref(), + location_modifier, + ] + .iter() + .flatten() + .copied() + .collect::<Vec<_>>() + .join(" "); + + // https://www.yelp.com/search?find_desc={description}&find_loc={location} + let mut url = String::from("https://www.yelp.com/search?"); + let mut parameters = form_urlencoded::Serializer::new(String::new()); + parameters.append_pair("find_desc", &description); + if let (Some(location), true) = (&builder.location, builder.need_location) { + parameters.append_pair("find_loc", location); + } + url.push_str(¶meters.finish()); + + let title = [ + builder.pre_modifier.as_deref(), + Some(builder.subject), + builder.post_modifier.as_deref(), + builder.location_sign.as_deref(), + builder.location.as_deref(), + ] + .iter() + .flatten() + .copied() + .collect::<Vec<_>>() + .join(" "); + + Suggestion::Yelp { + url, + title, + icon: builder.icon, + score: builder.score, + has_location_sign: location_modifier.is_none() && builder.location_sign.is_some(), + subject_exact_match: builder.subject_exact_match, + location_param: "find_loc".to_string(), + } + } +} diff --git a/third_party/rust/suggest/uniffi.toml b/third_party/rust/suggest/uniffi.toml new file mode 100644 index 0000000000..273b6e9f1e --- /dev/null +++ b/third_party/rust/suggest/uniffi.toml @@ -0,0 +1,10 @@ +[bindings.kotlin] +package_name = "mozilla.appservices.suggest" +cdylib_name = "megazord" + +[bindings.kotlin.external_packages] +remote_settings = "mozilla.appservices.remotesettings" + +[bindings.swift] +ffi_module_name = "MozillaRustComponents" +ffi_module_filename = "suggestFFI" |