summaryrefslogtreecommitdiffstats
path: root/third_party/rust/suggest/src
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/suggest/src')
-rw-r--r--third_party/rust/suggest/src/config.rs31
-rw-r--r--third_party/rust/suggest/src/db.rs1315
-rw-r--r--third_party/rust/suggest/src/error.rs79
-rw-r--r--third_party/rust/suggest/src/keyword.rs102
-rw-r--r--third_party/rust/suggest/src/lib.rs36
-rw-r--r--third_party/rust/suggest/src/pocket.rs59
-rw-r--r--third_party/rust/suggest/src/provider.rs55
-rw-r--r--third_party/rust/suggest/src/rs.rs346
-rw-r--r--third_party/rust/suggest/src/schema.rs153
-rw-r--r--third_party/rust/suggest/src/store.rs5316
-rw-r--r--third_party/rust/suggest/src/suggest.udl151
-rw-r--r--third_party/rust/suggest/src/suggestion.rs250
-rw-r--r--third_party/rust/suggest/src/yelp.rs497
13 files changed, 8390 insertions, 0 deletions
diff --git a/third_party/rust/suggest/src/config.rs b/third_party/rust/suggest/src/config.rs
new file mode 100644
index 0000000000..fcb3c2e256
--- /dev/null
+++ b/third_party/rust/suggest/src/config.rs
@@ -0,0 +1,31 @@
+use serde::{Deserialize, Serialize};
+
+use crate::rs::{DownloadedGlobalConfig, DownloadedWeatherData};
+
+/// Global Suggest configuration data.
+#[derive(Clone, Default, Debug, Deserialize, Serialize)]
+pub struct SuggestGlobalConfig {
+ pub show_less_frequently_cap: i32,
+}
+
+impl From<&DownloadedGlobalConfig> for SuggestGlobalConfig {
+ fn from(config: &DownloadedGlobalConfig) -> Self {
+ Self {
+ show_less_frequently_cap: config.configuration.show_less_frequently_cap,
+ }
+ }
+}
+
+/// Per-provider configuration data.
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub enum SuggestProviderConfig {
+ Weather { min_keyword_length: i32 },
+}
+
+impl From<&DownloadedWeatherData> for SuggestProviderConfig {
+ fn from(data: &DownloadedWeatherData) -> Self {
+ Self::Weather {
+ min_keyword_length: data.weather.min_keyword_length,
+ }
+ }
+}
diff --git a/third_party/rust/suggest/src/db.rs b/third_party/rust/suggest/src/db.rs
new file mode 100644
index 0000000000..07fc3ab4a2
--- /dev/null
+++ b/third_party/rust/suggest/src/db.rs
@@ -0,0 +1,1315 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use std::{collections::HashSet, path::Path, sync::Arc};
+
+use interrupt_support::{SqlInterruptHandle, SqlInterruptScope};
+use parking_lot::Mutex;
+use remote_settings::RemoteSettingsRecord;
+use rusqlite::{
+ named_params,
+ types::{FromSql, ToSql},
+ Connection, OpenFlags,
+};
+use sql_support::{open_database::open_database_with_flags, ConnExt};
+
+use crate::{
+ config::{SuggestGlobalConfig, SuggestProviderConfig},
+ keyword::full_keyword,
+ pocket::{split_keyword, KeywordConfidence},
+ provider::SuggestionProvider,
+ rs::{
+ DownloadedAmoSuggestion, DownloadedAmpSuggestion, DownloadedAmpWikipediaSuggestion,
+ DownloadedMdnSuggestion, DownloadedPocketSuggestion, DownloadedWeatherData,
+ SuggestRecordId,
+ },
+ schema::{SuggestConnectionInitializer, VERSION},
+ store::{UnparsableRecord, UnparsableRecords},
+ suggestion::{cook_raw_suggestion_url, AmpSuggestionType, Suggestion},
+ Result, SuggestionQuery,
+};
+
+/// The metadata key whose value is the timestamp of the last record ingested
+/// from the Suggest Remote Settings collection.
+pub const LAST_INGEST_META_KEY: &str = "last_quicksuggest_ingest";
+/// The metadata key whose value keeps track of records of suggestions
+/// that aren't parsable and which schema version it was first seen in.
+pub const UNPARSABLE_RECORDS_META_KEY: &str = "unparsable_records";
+/// The metadata key whose value is a JSON string encoding a
+/// `SuggestGlobalConfig`, which contains global Suggest configuration data.
+pub const GLOBAL_CONFIG_META_KEY: &str = "global_config";
+/// Prefix of metadata keys whose values are JSON strings encoding
+/// `SuggestProviderConfig`, which contains per-provider configuration data. The
+/// full key is this prefix plus the `SuggestionProvider` value as a u8.
+pub const PROVIDER_CONFIG_META_KEY_PREFIX: &str = "provider_config_";
+
+// Default value when Suggestion does not have a value for score
+pub const DEFAULT_SUGGESTION_SCORE: f64 = 0.2;
+
+/// The database connection type.
+#[derive(Clone, Copy)]
+pub(crate) enum ConnectionType {
+ ReadOnly,
+ ReadWrite,
+}
+
+impl From<ConnectionType> for OpenFlags {
+ fn from(type_: ConnectionType) -> Self {
+ match type_ {
+ ConnectionType::ReadOnly => {
+ OpenFlags::SQLITE_OPEN_URI
+ | OpenFlags::SQLITE_OPEN_NO_MUTEX
+ | OpenFlags::SQLITE_OPEN_READ_ONLY
+ }
+ ConnectionType::ReadWrite => {
+ OpenFlags::SQLITE_OPEN_URI
+ | OpenFlags::SQLITE_OPEN_NO_MUTEX
+ | OpenFlags::SQLITE_OPEN_CREATE
+ | OpenFlags::SQLITE_OPEN_READ_WRITE
+ }
+ }
+ }
+}
+
+/// A thread-safe wrapper around an SQLite connection to the Suggest database,
+/// and its interrupt handle.
+pub(crate) struct SuggestDb {
+ pub conn: Mutex<Connection>,
+
+ /// An object that's used to interrupt an ongoing database operation.
+ ///
+ /// When this handle is interrupted, the thread that's currently accessing
+ /// the database will be told to stop and release the `conn` lock as soon
+ /// as possible.
+ pub interrupt_handle: Arc<SqlInterruptHandle>,
+}
+
+impl SuggestDb {
+ /// Opens a read-only or read-write connection to a Suggest database at the
+ /// given path.
+ pub fn open(path: impl AsRef<Path>, type_: ConnectionType) -> Result<Self> {
+ let conn = open_database_with_flags(path, type_.into(), &SuggestConnectionInitializer)?;
+ Ok(Self::with_connection(conn))
+ }
+
+ fn with_connection(conn: Connection) -> Self {
+ let interrupt_handle = Arc::new(SqlInterruptHandle::new(&conn));
+ Self {
+ conn: Mutex::new(conn),
+ interrupt_handle,
+ }
+ }
+
+ /// Accesses the Suggest database for reading.
+ pub fn read<T>(&self, op: impl FnOnce(&SuggestDao) -> Result<T>) -> Result<T> {
+ let conn = self.conn.lock();
+ let scope = self.interrupt_handle.begin_interrupt_scope()?;
+ let dao = SuggestDao::new(&conn, scope);
+ op(&dao)
+ }
+
+ /// Accesses the Suggest database in a transaction for reading and writing.
+ pub fn write<T>(&self, op: impl FnOnce(&mut SuggestDao) -> Result<T>) -> Result<T> {
+ let mut conn = self.conn.lock();
+ let scope = self.interrupt_handle.begin_interrupt_scope()?;
+ let tx = conn.transaction()?;
+ let mut dao = SuggestDao::new(&tx, scope);
+ let result = op(&mut dao)?;
+ tx.commit()?;
+ Ok(result)
+ }
+}
+
+/// A data access object (DAO) that wraps a connection to the Suggest database
+/// with methods for reading and writing suggestions, icons, and metadata.
+///
+/// Methods that only read from the database take an immutable reference to
+/// `self` (`&self`), and methods that write to the database take a mutable
+/// reference (`&mut self`).
+pub(crate) struct SuggestDao<'a> {
+ pub conn: &'a Connection,
+ pub scope: SqlInterruptScope,
+}
+
+impl<'a> SuggestDao<'a> {
+ fn new(conn: &'a Connection, scope: SqlInterruptScope) -> Self {
+ Self { conn, scope }
+ }
+
+ // =============== High level API ===============
+ //
+ // These methods combine several low-level calls into one logical operation.
+
+ pub fn handle_unparsable_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> {
+ let record_id = SuggestRecordId::from(&record.id);
+ // Remember this record's ID so that we will try again later
+ self.put_unparsable_record_id(&record_id)?;
+ // Advance the last fetch time, so that we can resume
+ // fetching after this record if we're interrupted.
+ self.put_last_ingest_if_newer(record.last_modified)
+ }
+
+ pub fn handle_ingested_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> {
+ let record_id = SuggestRecordId::from(&record.id);
+ // Remove this record's ID from the list of unparsable
+ // records, since we understand it now.
+ self.drop_unparsable_record_id(&record_id)?;
+ // Advance the last fetch time, so that we can resume
+ // fetching after this record if we're interrupted.
+ self.put_last_ingest_if_newer(record.last_modified)
+ }
+
+ pub fn handle_deleted_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> {
+ let record_id = SuggestRecordId::from(&record.id);
+ // Drop either the icon or suggestions, records only contain one or the other
+ match record_id.as_icon_id() {
+ Some(icon_id) => self.drop_icon(icon_id)?,
+ None => self.drop_suggestions(&record_id)?,
+ };
+ // Remove this record's ID from the list of unparsable
+ // records, since we understand it now.
+ self.drop_unparsable_record_id(&record_id)?;
+ // Advance the last fetch time, so that we can resume
+ // fetching after this record if we're interrupted.
+ self.put_last_ingest_if_newer(record.last_modified)
+ }
+
+ // =============== Low level API ===============
+ //
+ // These methods implement CRUD operations
+
+ /// Fetches suggestions that match the given query from the database.
+ pub fn fetch_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ let unique_providers = query.providers.iter().collect::<HashSet<_>>();
+ unique_providers
+ .iter()
+ .try_fold(vec![], |mut acc, provider| {
+ let suggestions = match provider {
+ SuggestionProvider::Amp => {
+ self.fetch_amp_suggestions(query, AmpSuggestionType::Desktop)
+ }
+ SuggestionProvider::AmpMobile => {
+ self.fetch_amp_suggestions(query, AmpSuggestionType::Mobile)
+ }
+ SuggestionProvider::Wikipedia => self.fetch_wikipedia_suggestions(query),
+ SuggestionProvider::Amo => self.fetch_amo_suggestions(query),
+ SuggestionProvider::Pocket => self.fetch_pocket_suggestions(query),
+ SuggestionProvider::Yelp => self.fetch_yelp_suggestions(query),
+ SuggestionProvider::Mdn => self.fetch_mdn_suggestions(query),
+ SuggestionProvider::Weather => self.fetch_weather_suggestions(query),
+ }?;
+ acc.extend(suggestions);
+ Ok(acc)
+ })
+ .map(|mut suggestions| {
+ suggestions.sort();
+ if let Some(limit) = query.limit.and_then(|limit| usize::try_from(limit).ok()) {
+ suggestions.truncate(limit);
+ }
+ suggestions
+ })
+ }
+
+ /// Fetches Suggestions of type Amp provider that match the given query
+ pub fn fetch_amp_suggestions(
+ &self,
+ query: &SuggestionQuery,
+ suggestion_type: AmpSuggestionType,
+ ) -> Result<Vec<Suggestion>> {
+ let keyword_lowercased = &query.keyword.to_lowercase();
+ let provider = match suggestion_type {
+ AmpSuggestionType::Mobile => SuggestionProvider::AmpMobile,
+ AmpSuggestionType::Desktop => SuggestionProvider::Amp,
+ };
+ let suggestions = self.conn.query_rows_and_then_cached(
+ r#"
+ SELECT
+ s.id,
+ k.rank,
+ s.title,
+ s.url,
+ s.provider,
+ s.score
+ FROM
+ suggestions s
+ JOIN
+ keywords k
+ ON k.suggestion_id = s.id
+ WHERE
+ s.provider = :provider
+ AND k.keyword = :keyword
+ "#,
+ named_params! {
+ ":keyword": keyword_lowercased,
+ ":provider": provider
+ },
+ |row| -> Result<Suggestion> {
+ let suggestion_id: i64 = row.get("id")?;
+ let title = row.get("title")?;
+ let raw_url = row.get::<_, String>("url")?;
+ let score = row.get::<_, f64>("score")?;
+
+ let keywords: Vec<String> = self.conn.query_rows_and_then_cached(
+ r#"
+ SELECT
+ keyword
+ FROM
+ keywords
+ WHERE
+ suggestion_id = :suggestion_id
+ AND rank >= :rank
+ ORDER BY
+ rank ASC
+ "#,
+ named_params! {
+ ":suggestion_id": suggestion_id,
+ ":rank": row.get::<_, i64>("rank")?,
+ },
+ |row| row.get(0),
+ )?;
+ self.conn.query_row_and_then(
+ r#"
+ SELECT
+ amp.advertiser,
+ amp.block_id,
+ amp.iab_category,
+ amp.impression_url,
+ amp.click_url,
+ (SELECT i.data FROM icons i WHERE i.id = amp.icon_id) AS icon
+ FROM
+ amp_custom_details amp
+ WHERE
+ amp.suggestion_id = :suggestion_id
+ "#,
+ named_params! {
+ ":suggestion_id": suggestion_id
+ },
+ |row| {
+ let cooked_url = cook_raw_suggestion_url(&raw_url);
+ let raw_click_url = row.get::<_, String>("click_url")?;
+ let cooked_click_url = cook_raw_suggestion_url(&raw_click_url);
+
+ Ok(Suggestion::Amp {
+ block_id: row.get("block_id")?,
+ advertiser: row.get("advertiser")?,
+ iab_category: row.get("iab_category")?,
+ title,
+ url: cooked_url,
+ raw_url,
+ full_keyword: full_keyword(keyword_lowercased, &keywords),
+ icon: row.get("icon")?,
+ impression_url: row.get("impression_url")?,
+ click_url: cooked_click_url,
+ raw_click_url,
+ score,
+ })
+ },
+ )
+ },
+ )?;
+ Ok(suggestions)
+ }
+
+ /// Fetches Suggestions of type Wikipedia provider that match the given query
+ pub fn fetch_wikipedia_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ let keyword_lowercased = &query.keyword.to_lowercase();
+ let suggestions = self.conn.query_rows_and_then_cached(
+ r#"
+ SELECT
+ s.id,
+ k.rank,
+ s.title,
+ s.url
+ FROM
+ suggestions s
+ JOIN
+ keywords k
+ ON k.suggestion_id = s.id
+ WHERE
+ s.provider = :provider
+ AND k.keyword = :keyword
+ "#,
+ named_params! {
+ ":keyword": keyword_lowercased,
+ ":provider": SuggestionProvider::Wikipedia
+ },
+ |row| -> Result<Suggestion> {
+ let suggestion_id: i64 = row.get("id")?;
+ let title = row.get("title")?;
+ let raw_url = row.get::<_, String>("url")?;
+
+ let keywords: Vec<String> = self.conn.query_rows_and_then_cached(
+ "SELECT keyword FROM keywords
+ WHERE suggestion_id = :suggestion_id AND rank >= :rank
+ ORDER BY rank ASC",
+ named_params! {
+ ":suggestion_id": suggestion_id,
+ ":rank": row.get::<_, i64>("rank")?,
+ },
+ |row| row.get(0),
+ )?;
+ let icon = self.conn.try_query_one(
+ "SELECT i.data
+ FROM icons i
+ JOIN wikipedia_custom_details s ON s.icon_id = i.id
+ WHERE s.suggestion_id = :suggestion_id",
+ named_params! {
+ ":suggestion_id": suggestion_id
+ },
+ true,
+ )?;
+ Ok(Suggestion::Wikipedia {
+ title,
+ url: raw_url,
+ full_keyword: full_keyword(keyword_lowercased, &keywords),
+ icon,
+ })
+ },
+ )?;
+ Ok(suggestions)
+ }
+
+ /// Fetches Suggestions of type Amo provider that match the given query
+ pub fn fetch_amo_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ let keyword_lowercased = &query.keyword.to_lowercase();
+ let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased);
+ let suggestions_limit = &query.limit.unwrap_or(-1);
+ let suggestions = self
+ .conn
+ .query_rows_and_then_cached(
+ r#"
+ SELECT
+ s.id,
+ MAX(k.rank) AS rank,
+ s.title,
+ s.url,
+ s.provider,
+ s.score,
+ k.keyword_suffix
+ FROM
+ suggestions s
+ JOIN
+ prefix_keywords k
+ ON k.suggestion_id = s.id
+ WHERE
+ k.keyword_prefix = :keyword_prefix
+ AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF')
+ AND s.provider = :provider
+ GROUP BY
+ s.id
+ ORDER BY
+ s.score DESC,
+ rank DESC
+ LIMIT
+ :suggestions_limit
+ "#,
+ named_params! {
+ ":keyword_prefix": keyword_prefix,
+ ":keyword_suffix": keyword_suffix,
+ ":provider": SuggestionProvider::Amo,
+ ":suggestions_limit": suggestions_limit,
+ },
+ |row| -> Result<Option<Suggestion>> {
+ let suggestion_id: i64 = row.get("id")?;
+ let title = row.get("title")?;
+ let raw_url = row.get::<_, String>("url")?;
+ let score = row.get::<_, f64>("score")?;
+
+ let full_suffix = row.get::<_, String>("keyword_suffix")?;
+ full_suffix
+ .starts_with(keyword_suffix)
+ .then(|| {
+ self.conn.query_row_and_then(
+ r#"
+ SELECT
+ amo.description,
+ amo.guid,
+ amo.rating,
+ amo.icon_url,
+ amo.number_of_ratings
+ FROM
+ amo_custom_details amo
+ WHERE
+ amo.suggestion_id = :suggestion_id
+ "#,
+ named_params! {
+ ":suggestion_id": suggestion_id
+ },
+ |row| {
+ Ok(Suggestion::Amo {
+ title,
+ url: raw_url,
+ icon_url: row.get("icon_url")?,
+ description: row.get("description")?,
+ rating: row.get("rating")?,
+ number_of_ratings: row.get("number_of_ratings")?,
+ guid: row.get("guid")?,
+ score,
+ })
+ },
+ )
+ })
+ .transpose()
+ },
+ )?
+ .into_iter()
+ .flatten()
+ .collect();
+ Ok(suggestions)
+ }
+
+ /// Fetches Suggestions of type pocket provider that match the given query
+ pub fn fetch_pocket_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ let keyword_lowercased = &query.keyword.to_lowercase();
+ let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased);
+ let suggestions = self
+ .conn
+ .query_rows_and_then_cached(
+ r#"
+ SELECT
+ s.id,
+ MAX(k.rank) AS rank,
+ s.title,
+ s.url,
+ s.provider,
+ s.score,
+ k.confidence,
+ k.keyword_suffix
+ FROM
+ suggestions s
+ JOIN
+ prefix_keywords k
+ ON k.suggestion_id = s.id
+ WHERE
+ k.keyword_prefix = :keyword_prefix
+ AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF')
+ AND s.provider = :provider
+ GROUP BY
+ s.id,
+ k.confidence
+ ORDER BY
+ s.score DESC,
+ rank DESC
+ "#,
+ named_params! {
+ ":keyword_prefix": keyword_prefix,
+ ":keyword_suffix": keyword_suffix,
+ ":provider": SuggestionProvider::Pocket,
+ },
+ |row| -> Result<Option<Suggestion>> {
+ let title = row.get("title")?;
+ let raw_url = row.get::<_, String>("url")?;
+ let score = row.get::<_, f64>("score")?;
+ let confidence = row.get("confidence")?;
+ let full_suffix = row.get::<_, String>("keyword_suffix")?;
+ let suffixes_match = match confidence {
+ KeywordConfidence::Low => full_suffix.starts_with(keyword_suffix),
+ KeywordConfidence::High => full_suffix == keyword_suffix,
+ };
+ if suffixes_match {
+ Ok(Some(Suggestion::Pocket {
+ title,
+ url: raw_url,
+ score,
+ is_top_pick: matches!(confidence, KeywordConfidence::High),
+ }))
+ } else {
+ Ok(None)
+ }
+ },
+ )?
+ .into_iter()
+ .flatten()
+ .take(
+ query
+ .limit
+ .and_then(|limit| usize::try_from(limit).ok())
+ .unwrap_or(usize::MAX),
+ )
+ .collect();
+ Ok(suggestions)
+ }
+
+ /// Fetches suggestions for MDN
+ pub fn fetch_mdn_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ let keyword_lowercased = &query.keyword.to_lowercase();
+ let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased);
+ let suggestions_limit = &query.limit.unwrap_or(-1);
+ let suggestions = self
+ .conn
+ .query_rows_and_then_cached(
+ r#"
+ SELECT
+ s.id,
+ MAX(k.rank) AS rank,
+ s.title,
+ s.url,
+ s.provider,
+ s.score,
+ k.keyword_suffix
+ FROM
+ suggestions s
+ JOIN
+ prefix_keywords k
+ ON k.suggestion_id = s.id
+ WHERE
+ k.keyword_prefix = :keyword_prefix
+ AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF')
+ AND s.provider = :provider
+ GROUP BY
+ s.id
+ ORDER BY
+ s.score DESC,
+ rank DESC
+ LIMIT
+ :suggestions_limit
+ "#,
+ named_params! {
+ ":keyword_prefix": keyword_prefix,
+ ":keyword_suffix": keyword_suffix,
+ ":provider": SuggestionProvider::Mdn,
+ ":suggestions_limit": suggestions_limit,
+ },
+ |row| -> Result<Option<Suggestion>> {
+ let suggestion_id: i64 = row.get("id")?;
+ let title = row.get("title")?;
+ let raw_url = row.get::<_, String>("url")?;
+ let score = row.get::<_, f64>("score")?;
+
+ let full_suffix = row.get::<_, String>("keyword_suffix")?;
+ full_suffix
+ .starts_with(keyword_suffix)
+ .then(|| {
+ self.conn.query_row_and_then(
+ r#"
+ SELECT
+ description
+ FROM
+ mdn_custom_details
+ WHERE
+ suggestion_id = :suggestion_id
+ "#,
+ named_params! {
+ ":suggestion_id": suggestion_id
+ },
+ |row| {
+ Ok(Suggestion::Mdn {
+ title,
+ url: raw_url,
+ description: row.get("description")?,
+ score,
+ })
+ },
+ )
+ })
+ .transpose()
+ },
+ )?
+ .into_iter()
+ .flatten()
+ .collect();
+
+ Ok(suggestions)
+ }
+
+ /// Fetches weather suggestions
+ pub fn fetch_weather_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ // Weather keywords are matched by prefix but the query must be at least
+ // three chars long. Unlike the prefix matching of other suggestion
+ // types, the query doesn't need to contain the first full word.
+ if query.keyword.len() < 3 {
+ return Ok(vec![]);
+ }
+
+ let keyword_lowercased = &query.keyword.trim().to_lowercase();
+ let suggestions = self.conn.query_rows_and_then_cached(
+ r#"
+ SELECT
+ s.score
+ FROM
+ suggestions s
+ JOIN
+ keywords k
+ ON k.suggestion_id = s.id
+ WHERE
+ s.provider = :provider
+ AND (k.keyword BETWEEN :keyword AND :keyword || X'FFFF')
+ "#,
+ named_params! {
+ ":keyword": keyword_lowercased,
+ ":provider": SuggestionProvider::Weather
+ },
+ |row| -> Result<Suggestion> {
+ Ok(Suggestion::Weather {
+ score: row.get::<_, f64>("score")?,
+ })
+ },
+ )?;
+ Ok(suggestions)
+ }
+
+ /// Inserts all suggestions from a downloaded AMO attachment into
+ /// the database.
+ pub fn insert_amo_suggestions(
+ &mut self,
+ record_id: &SuggestRecordId,
+ suggestions: &[DownloadedAmoSuggestion],
+ ) -> Result<()> {
+ for suggestion in suggestions {
+ self.scope.err_if_interrupted()?;
+ let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
+ &format!(
+ "INSERT INTO suggestions(
+ record_id,
+ provider,
+ title,
+ url,
+ score
+ )
+ VALUES(
+ :record_id,
+ {},
+ :title,
+ :url,
+ :score
+ )
+ RETURNING id",
+ SuggestionProvider::Amo as u8
+ ),
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":title": suggestion.title,
+ ":url": suggestion.url,
+ ":score": suggestion.score,
+ },
+ |row| row.get(0),
+ true,
+ )?;
+ self.conn.execute(
+ "INSERT INTO amo_custom_details(
+ suggestion_id,
+ description,
+ guid,
+ icon_url,
+ rating,
+ number_of_ratings
+ )
+ VALUES(
+ :suggestion_id,
+ :description,
+ :guid,
+ :icon_url,
+ :rating,
+ :number_of_ratings
+ )",
+ named_params! {
+ ":suggestion_id": suggestion_id,
+ ":description": suggestion.description,
+ ":guid": suggestion.guid,
+ ":icon_url": suggestion.icon_url,
+ ":rating": suggestion.rating,
+ ":number_of_ratings": suggestion.number_of_ratings
+ },
+ )?;
+ for (index, keyword) in suggestion.keywords.iter().enumerate() {
+ let (keyword_prefix, keyword_suffix) = split_keyword(keyword);
+ self.conn.execute(
+ "INSERT INTO prefix_keywords(
+ keyword_prefix,
+ keyword_suffix,
+ suggestion_id,
+ rank
+ )
+ VALUES(
+ :keyword_prefix,
+ :keyword_suffix,
+ :suggestion_id,
+ :rank
+ )",
+ named_params! {
+ ":keyword_prefix": keyword_prefix,
+ ":keyword_suffix": keyword_suffix,
+ ":rank": index,
+ ":suggestion_id": suggestion_id,
+ },
+ )?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Inserts all suggestions from a downloaded AMP-Wikipedia attachment into
+ /// the database.
+ pub fn insert_amp_wikipedia_suggestions(
+ &mut self,
+ record_id: &SuggestRecordId,
+ suggestions: &[DownloadedAmpWikipediaSuggestion],
+ ) -> Result<()> {
+ for suggestion in suggestions {
+ self.scope.err_if_interrupted()?;
+ let common_details = suggestion.common_details();
+ let provider = suggestion.provider();
+
+ let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
+ &format!(
+ "INSERT INTO suggestions(
+ record_id,
+ provider,
+ title,
+ url,
+ score
+ )
+ VALUES(
+ :record_id,
+ {},
+ :title,
+ :url,
+ :score
+ )
+ RETURNING id",
+ provider as u8
+ ),
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":title": common_details.title,
+ ":url": common_details.url,
+ ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE)
+ },
+ |row| row.get(0),
+ true,
+ )?;
+ match suggestion {
+ DownloadedAmpWikipediaSuggestion::Amp(amp) => {
+ self.conn.execute(
+ "INSERT INTO amp_custom_details(
+ suggestion_id,
+ advertiser,
+ block_id,
+ iab_category,
+ impression_url,
+ click_url,
+ icon_id
+ )
+ VALUES(
+ :suggestion_id,
+ :advertiser,
+ :block_id,
+ :iab_category,
+ :impression_url,
+ :click_url,
+ :icon_id
+ )",
+ named_params! {
+ ":suggestion_id": suggestion_id,
+ ":advertiser": amp.advertiser,
+ ":block_id": amp.block_id,
+ ":iab_category": amp.iab_category,
+ ":impression_url": amp.impression_url,
+ ":click_url": amp.click_url,
+ ":icon_id": amp.icon_id,
+ },
+ )?;
+ }
+ DownloadedAmpWikipediaSuggestion::Wikipedia(wikipedia) => {
+ self.conn.execute(
+ "INSERT INTO wikipedia_custom_details(
+ suggestion_id,
+ icon_id
+ )
+ VALUES(
+ :suggestion_id,
+ :icon_id
+ )",
+ named_params! {
+ ":suggestion_id": suggestion_id,
+ ":icon_id": wikipedia.icon_id,
+ },
+ )?;
+ }
+ }
+ for (index, keyword) in common_details.keywords.iter().enumerate() {
+ self.conn.execute(
+ "INSERT INTO keywords(
+ keyword,
+ suggestion_id,
+ rank
+ )
+ VALUES(
+ :keyword,
+ :suggestion_id,
+ :rank
+ )",
+ named_params! {
+ ":keyword": keyword,
+ ":rank": index,
+ ":suggestion_id": suggestion_id,
+ },
+ )?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Inserts all suggestions from a downloaded AMP-Mobile attachment into
+ /// the database.
+ pub fn insert_amp_mobile_suggestions(
+ &mut self,
+ record_id: &SuggestRecordId,
+ suggestions: &[DownloadedAmpSuggestion],
+ ) -> Result<()> {
+ for suggestion in suggestions {
+ self.scope.err_if_interrupted()?;
+ let common_details = &suggestion.common_details;
+ let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
+ &format!(
+ "INSERT INTO suggestions(
+ record_id,
+ provider,
+ title,
+ url,
+ score
+ )
+ VALUES(
+ :record_id,
+ {},
+ :title,
+ :url,
+ :score
+ )
+ RETURNING id",
+ SuggestionProvider::AmpMobile as u8
+ ),
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":title": common_details.title,
+ ":url": common_details.url,
+ ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE)
+ },
+ |row| row.get(0),
+ true,
+ )?;
+ self.conn.execute(
+ "INSERT INTO amp_custom_details(
+ suggestion_id,
+ advertiser,
+ block_id,
+ iab_category,
+ impression_url,
+ click_url,
+ icon_id
+ )
+ VALUES(
+ :suggestion_id,
+ :advertiser,
+ :block_id,
+ :iab_category,
+ :impression_url,
+ :click_url,
+ :icon_id
+ )",
+ named_params! {
+ ":suggestion_id": suggestion_id,
+ ":advertiser": suggestion.advertiser,
+ ":block_id": suggestion.block_id,
+ ":iab_category": suggestion.iab_category,
+ ":impression_url": suggestion.impression_url,
+ ":click_url": suggestion.click_url,
+ ":icon_id": suggestion.icon_id,
+ },
+ )?;
+
+ for (index, keyword) in common_details.keywords.iter().enumerate() {
+ self.conn.execute(
+ "INSERT INTO keywords(
+ keyword,
+ suggestion_id,
+ rank
+ )
+ VALUES(
+ :keyword,
+ :suggestion_id,
+ :rank
+ )",
+ named_params! {
+ ":keyword": keyword,
+ ":rank": index,
+ ":suggestion_id": suggestion_id,
+ },
+ )?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Inserts all suggestions from a downloaded Pocket attachment into
+ /// the database.
+ pub fn insert_pocket_suggestions(
+ &mut self,
+ record_id: &SuggestRecordId,
+ suggestions: &[DownloadedPocketSuggestion],
+ ) -> Result<()> {
+ for suggestion in suggestions {
+ self.scope.err_if_interrupted()?;
+ let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
+ &format!(
+ "INSERT INTO suggestions(
+ record_id,
+ provider,
+ title,
+ url,
+ score
+ )
+ VALUES(
+ :record_id,
+ {},
+ :title,
+ :url,
+ :score
+ )
+ RETURNING id",
+ SuggestionProvider::Pocket as u8
+ ),
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":title": suggestion.title,
+ ":url": suggestion.url,
+ ":score": suggestion.score,
+ },
+ |row| row.get(0),
+ true,
+ )?;
+
+ for ((rank, keyword), confidence) in suggestion
+ .high_confidence_keywords
+ .iter()
+ .enumerate()
+ .zip(std::iter::repeat(KeywordConfidence::High))
+ .chain(
+ suggestion
+ .low_confidence_keywords
+ .iter()
+ .enumerate()
+ .zip(std::iter::repeat(KeywordConfidence::Low)),
+ )
+ {
+ let (keyword_prefix, keyword_suffix) = split_keyword(keyword);
+ self.conn.execute(
+ "INSERT INTO prefix_keywords(
+ keyword_prefix,
+ keyword_suffix,
+ confidence,
+ rank,
+ suggestion_id
+ )
+ VALUES(
+ :keyword_prefix,
+ :keyword_suffix,
+ :confidence,
+ :rank,
+ :suggestion_id
+ )",
+ named_params! {
+ ":keyword_prefix": keyword_prefix,
+ ":keyword_suffix": keyword_suffix,
+ ":confidence": confidence,
+ ":rank": rank,
+ ":suggestion_id": suggestion_id,
+ },
+ )?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Inserts all suggestions from a downloaded MDN attachment into
+ /// the database.
+ pub fn insert_mdn_suggestions(
+ &mut self,
+ record_id: &SuggestRecordId,
+ suggestions: &[DownloadedMdnSuggestion],
+ ) -> Result<()> {
+ for suggestion in suggestions {
+ self.scope.err_if_interrupted()?;
+ let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
+ &format!(
+ "INSERT INTO suggestions(
+ record_id,
+ provider,
+ title,
+ url,
+ score
+ )
+ VALUES(
+ :record_id,
+ {},
+ :title,
+ :url,
+ :score
+ )
+ RETURNING id",
+ SuggestionProvider::Mdn as u8
+ ),
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":title": suggestion.title,
+ ":url": suggestion.url,
+ ":score": suggestion.score,
+ },
+ |row| row.get(0),
+ true,
+ )?;
+ self.conn.execute_cached(
+ "INSERT INTO mdn_custom_details(
+ suggestion_id,
+ description
+ )
+ VALUES(
+ :suggestion_id,
+ :description
+ )",
+ named_params! {
+ ":suggestion_id": suggestion_id,
+ ":description": suggestion.description,
+ },
+ )?;
+ for (index, keyword) in suggestion.keywords.iter().enumerate() {
+ let (keyword_prefix, keyword_suffix) = split_keyword(keyword);
+ self.conn.execute_cached(
+ "INSERT INTO prefix_keywords(
+ keyword_prefix,
+ keyword_suffix,
+ suggestion_id,
+ rank
+ )
+ VALUES(
+ :keyword_prefix,
+ :keyword_suffix,
+ :suggestion_id,
+ :rank
+ )",
+ named_params! {
+ ":keyword_prefix": keyword_prefix,
+ ":keyword_suffix": keyword_suffix,
+ ":rank": index,
+ ":suggestion_id": suggestion_id,
+ },
+ )?;
+ }
+ }
+ Ok(())
+ }
+
+ /// Inserts weather record data into the database.
+ pub fn insert_weather_data(
+ &mut self,
+ record_id: &SuggestRecordId,
+ data: &DownloadedWeatherData,
+ ) -> Result<()> {
+ self.scope.err_if_interrupted()?;
+ let suggestion_id: i64 = self.conn.query_row_and_then_cachable(
+ &format!(
+ "INSERT INTO suggestions(record_id, provider, title, url, score)
+ VALUES(:record_id, {}, '', '', :score)
+ RETURNING id",
+ SuggestionProvider::Weather as u8
+ ),
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":score": data.weather.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
+ },
+ |row| row.get(0),
+ true,
+ )?;
+ for (index, keyword) in data.weather.keywords.iter().enumerate() {
+ self.conn.execute(
+ "INSERT INTO keywords(keyword, suggestion_id, rank)
+ VALUES(:keyword, :suggestion_id, :rank)",
+ named_params! {
+ ":keyword": keyword,
+ ":suggestion_id": suggestion_id,
+ ":rank": index,
+ },
+ )?;
+ }
+ self.put_provider_config(
+ SuggestionProvider::Weather,
+ &SuggestProviderConfig::from(data),
+ )?;
+ Ok(())
+ }
+
+ /// Inserts or replaces an icon for a suggestion into the database.
+ pub fn put_icon(&mut self, icon_id: &str, data: &[u8]) -> Result<()> {
+ self.conn.execute(
+ "INSERT OR REPLACE INTO icons(
+ id,
+ data
+ )
+ VALUES(
+ :id,
+ :data
+ )",
+ named_params! {
+ ":id": icon_id,
+ ":data": data,
+ },
+ )?;
+ Ok(())
+ }
+
+ /// Deletes all suggestions associated with a Remote Settings record from
+ /// the database.
+ pub fn drop_suggestions(&mut self, record_id: &SuggestRecordId) -> Result<()> {
+ self.conn.execute_cached(
+ "DELETE FROM suggestions WHERE record_id = :record_id",
+ named_params! { ":record_id": record_id.as_str() },
+ )?;
+ self.conn.execute_cached(
+ "DELETE FROM yelp_subjects WHERE record_id = :record_id",
+ named_params! { ":record_id": record_id.as_str() },
+ )?;
+ self.conn.execute_cached(
+ "DELETE FROM yelp_modifiers WHERE record_id = :record_id",
+ named_params! { ":record_id": record_id.as_str() },
+ )?;
+ self.conn.execute_cached(
+ "DELETE FROM yelp_location_signs WHERE record_id = :record_id",
+ named_params! { ":record_id": record_id.as_str() },
+ )?;
+ self.conn.execute_cached(
+ "DELETE FROM yelp_custom_details WHERE record_id = :record_id",
+ named_params! { ":record_id": record_id.as_str() },
+ )?;
+ Ok(())
+ }
+
+ /// Deletes an icon for a suggestion from the database.
+ pub fn drop_icon(&mut self, icon_id: &str) -> Result<()> {
+ self.conn.execute_cached(
+ "DELETE FROM icons WHERE id = :id",
+ named_params! { ":id": icon_id },
+ )?;
+ Ok(())
+ }
+
+ /// Clears the database, removing all suggestions, icons, and metadata.
+ pub fn clear(&mut self) -> Result<()> {
+ self.conn.execute_batch(
+ "DELETE FROM suggestions;
+ DELETE FROM icons;
+ DELETE FROM meta;",
+ )?;
+ Ok(())
+ }
+
+ /// Returns the value associated with a metadata key.
+ pub fn get_meta<T: FromSql>(&self, key: &str) -> Result<Option<T>> {
+ Ok(self.conn.try_query_one(
+ "SELECT value FROM meta WHERE key = :key",
+ named_params! { ":key": key },
+ true,
+ )?)
+ }
+
+ /// Sets the value for a metadata key.
+ pub fn put_meta(&mut self, key: &str, value: impl ToSql) -> Result<()> {
+ self.conn.execute_cached(
+ "INSERT OR REPLACE INTO meta(key, value) VALUES(:key, :value)",
+ named_params! { ":key": key, ":value": value },
+ )?;
+ Ok(())
+ }
+
+ /// Updates the last ingest timestamp if the given last modified time is
+ /// newer than the existing one recorded.
+ pub fn put_last_ingest_if_newer(&mut self, record_last_modified: u64) -> Result<()> {
+ let last_ingest = self
+ .get_meta::<u64>(LAST_INGEST_META_KEY)?
+ .unwrap_or_default();
+ if record_last_modified > last_ingest {
+ self.put_meta(LAST_INGEST_META_KEY, record_last_modified)?;
+ }
+
+ Ok(())
+ }
+
+ /// Adds an entry for a Suggest Remote Settings record to the list of
+ /// unparsable records.
+ ///
+ /// This is used to note records that we don't understand how to parse and
+ /// ingest yet.
+ pub fn put_unparsable_record_id(&mut self, record_id: &SuggestRecordId) -> Result<()> {
+ let mut unparsable_records = self
+ .get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?
+ .unwrap_or_default();
+ unparsable_records.0.insert(
+ record_id.as_str().to_string(),
+ UnparsableRecord {
+ schema_version: VERSION,
+ },
+ );
+ self.put_meta(UNPARSABLE_RECORDS_META_KEY, unparsable_records)?;
+ Ok(())
+ }
+
+ /// Removes an entry for a Suggest Remote Settings record from the list of
+ /// unparsable records. Does nothing if the record was not previously marked
+ /// as unparsable.
+ ///
+ /// This indicates that we now understand how to parse and ingest the
+ /// record, or that the record was deleted.
+ pub fn drop_unparsable_record_id(&mut self, record_id: &SuggestRecordId) -> Result<()> {
+ let Some(mut unparsable_records) =
+ self.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?
+ else {
+ return Ok(());
+ };
+ if unparsable_records.0.remove(record_id.as_str()).is_none() {
+ return Ok(());
+ };
+ self.put_meta(UNPARSABLE_RECORDS_META_KEY, unparsable_records)
+ }
+
+ /// Stores global Suggest configuration data.
+ pub fn put_global_config(&mut self, config: &SuggestGlobalConfig) -> Result<()> {
+ self.put_meta(GLOBAL_CONFIG_META_KEY, serde_json::to_string(config)?)
+ }
+
+ /// Gets the stored global Suggest configuration data or a default config if
+ /// none is stored.
+ pub fn get_global_config(&self) -> Result<SuggestGlobalConfig> {
+ self.get_meta::<String>(GLOBAL_CONFIG_META_KEY)?
+ .map_or_else(
+ || Ok(SuggestGlobalConfig::default()),
+ |json| Ok(serde_json::from_str(&json)?),
+ )
+ }
+
+ /// Stores configuration data for a given provider.
+ pub fn put_provider_config(
+ &mut self,
+ provider: SuggestionProvider,
+ config: &SuggestProviderConfig,
+ ) -> Result<()> {
+ self.put_meta(
+ &provider_config_meta_key(provider),
+ serde_json::to_string(config)?,
+ )
+ }
+
+ /// Gets the stored configuration data for a given provider or None if none
+ /// is stored.
+ pub fn get_provider_config(
+ &self,
+ provider: SuggestionProvider,
+ ) -> Result<Option<SuggestProviderConfig>> {
+ self.get_meta::<String>(&provider_config_meta_key(provider))?
+ .map_or_else(|| Ok(None), |json| Ok(serde_json::from_str(&json)?))
+ }
+}
+
+fn provider_config_meta_key(provider: SuggestionProvider) -> String {
+ format!("{}{}", PROVIDER_CONFIG_META_KEY_PREFIX, provider as u8)
+}
diff --git a/third_party/rust/suggest/src/error.rs b/third_party/rust/suggest/src/error.rs
new file mode 100644
index 0000000000..cd07c3591c
--- /dev/null
+++ b/third_party/rust/suggest/src/error.rs
@@ -0,0 +1,79 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use error_support::{ErrorHandling, GetErrorHandling};
+use remote_settings::RemoteSettingsError;
+
+/// A list of errors that are internal to the component. This is the error
+/// type for private and crate-internal methods, and is never returned to the
+/// application.
+#[derive(Debug, thiserror::Error)]
+pub(crate) enum Error {
+ #[error("Error opening database: {0}")]
+ OpenDatabase(#[from] sql_support::open_database::Error),
+
+ #[error("Error executing SQL: {0}")]
+ Sql(#[from] rusqlite::Error),
+
+ #[error("JSON error: {0}")]
+ Json(#[from] serde_json::Error),
+
+ #[error("Error from Remote Settings: {0}")]
+ RemoteSettings(#[from] RemoteSettingsError),
+
+ #[error("Operation interrupted")]
+ Interrupted(#[from] interrupt_support::Interrupted),
+
+ #[error("SuggestStoreBuilder {0}")]
+ SuggestStoreBuilder(String),
+}
+
+/// The error type for all Suggest component operations. These errors are
+/// exposed to your application, which should handle them as needed.
+#[derive(Debug, thiserror::Error)]
+#[non_exhaustive]
+pub enum SuggestApiError {
+ #[error("Network error: {reason}")]
+ Network { reason: String },
+ // The server requested a backoff after too many requests
+ #[error("Backoff")]
+ Backoff { seconds: u64 },
+ // The application interrupted a request
+ #[error("Interrupted")]
+ Interrupted,
+ #[error("Other error: {reason}")]
+ Other { reason: String },
+}
+
+// Define how our internal errors are handled and converted to external errors
+// See `support/error/README.md` for how this works, especially the warning about PII.
+impl GetErrorHandling for Error {
+ type ExternalError = SuggestApiError;
+
+ fn get_error_handling(&self) -> ErrorHandling<Self::ExternalError> {
+ match self {
+ // Do nothing for interrupted errors, this is just normal operation.
+ Self::Interrupted(_) => ErrorHandling::convert(SuggestApiError::Interrupted),
+ // Network errors are expected to happen in practice. Let's log, but not report them.
+ Self::RemoteSettings(RemoteSettingsError::RequestError(
+ viaduct::Error::NetworkError(e),
+ )) => ErrorHandling::convert(SuggestApiError::Network {
+ reason: e.to_string(),
+ })
+ .log_warning(),
+ // Backoff error shouldn't happen in practice, so let's report them for now.
+ // If these do happen in practice and we decide that there is a valid reason for them,
+ // then consider switching from reporting to Sentry to counting in Glean.
+ Self::RemoteSettings(RemoteSettingsError::BackoffError(seconds)) => {
+ ErrorHandling::convert(SuggestApiError::Backoff { seconds: *seconds })
+ .report_error("suggest-backoff")
+ }
+ _ => ErrorHandling::convert(SuggestApiError::Other {
+ reason: self.to_string(),
+ })
+ .report_error("suggest-unexpected"),
+ }
+ }
+}
diff --git a/third_party/rust/suggest/src/keyword.rs b/third_party/rust/suggest/src/keyword.rs
new file mode 100644
index 0000000000..d15688d016
--- /dev/null
+++ b/third_party/rust/suggest/src/keyword.rs
@@ -0,0 +1,102 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+/// Given a list of keywords for a suggestion, returns a phrase that best
+/// completes the user's query. This function uses two heuristics to pick the
+/// best match:
+///
+/// 1. Find the first keyword in the list that has at least one more word than
+/// the query, then trim the keyword up to the end of that word.
+/// 2. If there isn't a keyword with more words, pick the keyword that forms the
+/// longest suffix of the query. This might be the query itself.
+pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
+ let query_words_len = query.split_whitespace().count();
+ let min_phrase_words_len = if query.ends_with(char::is_whitespace) {
+ // If the query ends with a space, find a keyword with at least one more
+ // word, so that the completed phrase can show a word after the space.
+ query_words_len + 1
+ } else {
+ query_words_len
+ };
+ keywords
+ .iter()
+ .map(AsRef::as_ref)
+ .filter(|phrase| phrase.starts_with(query))
+ .map(|phrase| phrase.split_whitespace().collect::<Vec<_>>())
+ .find(|phrase_words| phrase_words.len() > min_phrase_words_len)
+ .map(|phrase_words| phrase_words[..min_phrase_words_len].join(" "))
+ .unwrap_or_else(|| {
+ keywords
+ .iter()
+ .map(AsRef::as_ref)
+ .filter(|phrase| phrase.starts_with(query) && query.len() < phrase.len())
+ .max_by_key(|phrase| phrase.trim().len())
+ .unwrap_or(query)
+ .to_owned()
+ })
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn keywords_with_more_words() {
+ assert_eq!(
+ full_keyword(
+ "moz",
+ &[
+ "moz",
+ "mozi",
+ "mozil",
+ "mozill",
+ "mozilla",
+ "mozilla firefox"
+ ]
+ ),
+ "mozilla".to_owned(),
+ );
+ assert_eq!(
+ full_keyword(
+ "mozilla",
+ &[
+ "moz",
+ "mozi",
+ "mozil",
+ "mozill",
+ "mozilla",
+ "mozilla firefox"
+ ]
+ ),
+ "mozilla".to_owned(),
+ );
+ }
+
+ #[test]
+ fn keywords_with_longer_phrase() {
+ assert_eq!(
+ full_keyword("moz", &["moz", "mozi", "mozil", "mozill", "mozilla"]),
+ "mozilla".to_owned()
+ );
+ assert_eq!(
+ full_keyword(
+ "mozilla f",
+ &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
+ ),
+ "mozilla firefox".to_owned()
+ );
+ }
+
+ #[test]
+ fn query_ends_with_space() {
+ assert_eq!(
+ full_keyword(
+ "mozilla ",
+ &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
+ ),
+ "mozilla firefox".to_owned()
+ );
+ }
+}
diff --git a/third_party/rust/suggest/src/lib.rs b/third_party/rust/suggest/src/lib.rs
new file mode 100644
index 0000000000..23775b7dec
--- /dev/null
+++ b/third_party/rust/suggest/src/lib.rs
@@ -0,0 +1,36 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use remote_settings::RemoteSettingsConfig;
+mod config;
+mod db;
+mod error;
+mod keyword;
+pub mod pocket;
+mod provider;
+mod rs;
+mod schema;
+mod store;
+mod suggestion;
+mod yelp;
+
+pub use config::{SuggestGlobalConfig, SuggestProviderConfig};
+pub use error::SuggestApiError;
+pub use provider::SuggestionProvider;
+pub use store::{SuggestIngestionConstraints, SuggestStore, SuggestStoreBuilder};
+pub use suggestion::{raw_suggestion_url_matches, Suggestion};
+
+pub(crate) type Result<T> = std::result::Result<T, error::Error>;
+pub type SuggestApiResult<T> = std::result::Result<T, error::SuggestApiError>;
+
+/// A query for suggestions to show in the address bar.
+#[derive(Debug, Default)]
+pub struct SuggestionQuery {
+ pub keyword: String,
+ pub providers: Vec<SuggestionProvider>,
+ pub limit: Option<i32>,
+}
+
+uniffi::include_scaffolding!("suggest");
diff --git a/third_party/rust/suggest/src/pocket.rs b/third_party/rust/suggest/src/pocket.rs
new file mode 100644
index 0000000000..cf7070c62a
--- /dev/null
+++ b/third_party/rust/suggest/src/pocket.rs
@@ -0,0 +1,59 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use rusqlite::types::{FromSql, FromSqlError, FromSqlResult, ToSqlOutput, ValueRef};
+use rusqlite::{Result as RusqliteResult, ToSql};
+
+/// Classification of Pocket confidence keywords, where High Confidence
+/// require an exact match to keyword prefix and suffix.
+/// While Low Confidence, requires a match on prefix and be a
+/// substring for the suffix.
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
+#[repr(u8)]
+pub enum KeywordConfidence {
+ Low = 0,
+ High = 1,
+}
+
+impl FromSql for KeywordConfidence {
+ fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
+ let v = value.as_i64()?;
+ u8::try_from(v)
+ .ok()
+ .and_then(KeywordConfidence::from_u8)
+ .ok_or_else(|| FromSqlError::OutOfRange(v))
+ }
+}
+
+impl KeywordConfidence {
+ #[inline]
+ pub(crate) fn from_u8(v: u8) -> Option<Self> {
+ match v {
+ 0 => Some(KeywordConfidence::Low),
+ 1 => Some(KeywordConfidence::High),
+ _ => None,
+ }
+ }
+}
+
+impl ToSql for KeywordConfidence {
+ fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
+ Ok(ToSqlOutput::from(*self as u8))
+ }
+}
+
+/// Split the keyword by the first whitespace into the prefix and the suffix.
+/// Return an empty string as the suffix if there is no whitespace.
+///
+/// # Examples
+///
+/// ```
+/// # use suggest::pocket::split_keyword;
+/// assert_eq!(split_keyword("foo"), ("foo", ""));
+/// assert_eq!(split_keyword("foo bar baz"), ("foo", "bar baz"));
+/// ```
+pub fn split_keyword(keyword: &str) -> (&str, &str) {
+ keyword.split_once(' ').unwrap_or((keyword, ""))
+}
diff --git a/third_party/rust/suggest/src/provider.rs b/third_party/rust/suggest/src/provider.rs
new file mode 100644
index 0000000000..1449c35c8a
--- /dev/null
+++ b/third_party/rust/suggest/src/provider.rs
@@ -0,0 +1,55 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use rusqlite::{
+ types::{FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef},
+ Result as RusqliteResult,
+};
+
+/// A provider is a source of search suggestions.
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
+#[repr(u8)]
+pub enum SuggestionProvider {
+ Amp = 1,
+ Wikipedia = 2,
+ Amo = 3,
+ Pocket = 4,
+ Yelp = 5,
+ Mdn = 6,
+ Weather = 7,
+ AmpMobile = 8,
+}
+
+impl FromSql for SuggestionProvider {
+ fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
+ let v = value.as_i64()?;
+ u8::try_from(v)
+ .ok()
+ .and_then(SuggestionProvider::from_u8)
+ .ok_or_else(|| FromSqlError::OutOfRange(v))
+ }
+}
+
+impl SuggestionProvider {
+ #[inline]
+ pub(crate) fn from_u8(v: u8) -> Option<Self> {
+ match v {
+ 1 => Some(SuggestionProvider::Amp),
+ 2 => Some(SuggestionProvider::Wikipedia),
+ 3 => Some(SuggestionProvider::Amo),
+ 4 => Some(SuggestionProvider::Pocket),
+ 5 => Some(SuggestionProvider::Yelp),
+ 6 => Some(SuggestionProvider::Mdn),
+ 7 => Some(SuggestionProvider::Weather),
+ _ => None,
+ }
+ }
+}
+
+impl ToSql for SuggestionProvider {
+ fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
+ Ok(ToSqlOutput::from(*self as u8))
+ }
+}
diff --git a/third_party/rust/suggest/src/rs.rs b/third_party/rust/suggest/src/rs.rs
new file mode 100644
index 0000000000..198a8c43f6
--- /dev/null
+++ b/third_party/rust/suggest/src/rs.rs
@@ -0,0 +1,346 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+//! Crate-internal types for interacting with Remote Settings (`rs`). Types in
+//! this module describe records and attachments in the Suggest Remote Settings
+//! collection.
+//!
+//! To add a new suggestion `T` to this component, you'll generally need to:
+//!
+//! 1. Add a variant named `T` to [`SuggestRecord`]. The variant must have a
+//! `#[serde(rename)]` attribute that matches the suggestion record's
+//! `type` field.
+//! 2. Define a `DownloadedTSuggestion` type with the new suggestion's fields,
+//! matching their attachment's schema. Your new type must derive or
+//! implement [`serde::Deserialize`].
+//! 3. Update the database schema in the [`schema`] module to store the new
+//! suggestion.
+//! 4. Add an `insert_t_suggestions()` method to [`db::SuggestDao`] that
+//! inserts `DownloadedTSuggestion`s into the database.
+//! 5. Update [`store::SuggestStoreInner::ingest()`] to download, deserialize,
+//! and store the new suggestion.
+//! 6. Add a variant named `T` to [`suggestion::Suggestion`], with the fields
+//! that you'd like to expose to the application. These can be the same
+//! fields as `DownloadedTSuggestion`, or slightly different, depending on
+//! what the application needs to show the suggestion.
+//! 7. Update the `Suggestion` enum definition in `suggest.udl` to match your
+//! new [`suggestion::Suggestion`] variant.
+//! 8. Update any [`db::SuggestDao`] methods that query the database to include
+//! the new suggestion in their results, and return `Suggestion::T` variants
+//! as needed.
+
+use std::borrow::Cow;
+
+use remote_settings::{GetItemsOptions, RemoteSettingsResponse};
+use serde::{Deserialize, Deserializer};
+
+use crate::{provider::SuggestionProvider, Result};
+
+/// The Suggest Remote Settings collection name.
+pub(crate) const REMOTE_SETTINGS_COLLECTION: &str = "quicksuggest";
+
+/// The maximum number of suggestions in a Suggest record's attachment.
+///
+/// This should be the same as the `BUCKET_SIZE` constant in the
+/// `mozilla-services/quicksuggest-rs` repo.
+pub(crate) const SUGGESTIONS_PER_ATTACHMENT: u64 = 200;
+
+/// A trait for a client that downloads suggestions from Remote Settings.
+///
+/// This trait lets tests use a mock client.
+pub(crate) trait SuggestRemoteSettingsClient {
+ /// Fetches records from the Suggest Remote Settings collection.
+ fn get_records_with_options(&self, options: &GetItemsOptions)
+ -> Result<RemoteSettingsResponse>;
+
+ /// Fetches a record's attachment from the Suggest Remote Settings
+ /// collection.
+ fn get_attachment(&self, location: &str) -> Result<Vec<u8>>;
+}
+
+impl SuggestRemoteSettingsClient for remote_settings::Client {
+ fn get_records_with_options(
+ &self,
+ options: &GetItemsOptions,
+ ) -> Result<RemoteSettingsResponse> {
+ Ok(remote_settings::Client::get_records_with_options(
+ self, options,
+ )?)
+ }
+
+ fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+ Ok(remote_settings::Client::get_attachment(self, location)?)
+ }
+}
+
+/// A record in the Suggest Remote Settings collection.
+///
+/// Except for the type, Suggest records don't carry additional fields. All
+/// suggestions are stored in each record's attachment.
+#[derive(Clone, Debug, Deserialize)]
+#[serde(tag = "type")]
+pub(crate) enum SuggestRecord {
+ #[serde(rename = "icon")]
+ Icon,
+ #[serde(rename = "data")]
+ AmpWikipedia,
+ #[serde(rename = "amo-suggestions")]
+ Amo,
+ #[serde(rename = "pocket-suggestions")]
+ Pocket,
+ #[serde(rename = "yelp-suggestions")]
+ Yelp,
+ #[serde(rename = "mdn-suggestions")]
+ Mdn,
+ #[serde(rename = "weather")]
+ Weather(DownloadedWeatherData),
+ #[serde(rename = "configuration")]
+ GlobalConfig(DownloadedGlobalConfig),
+ #[serde(rename = "amp-mobile-suggestions")]
+ AmpMobile,
+}
+
+/// Represents either a single value, or a list of values. This is used to
+/// deserialize downloaded attachments.
+#[derive(Clone, Debug, Deserialize)]
+#[serde(untagged)]
+enum OneOrMany<T> {
+ One(T),
+ Many(Vec<T>),
+}
+
+/// A downloaded Remote Settings attachment that contains suggestions.
+#[derive(Clone, Debug, Deserialize)]
+#[serde(transparent)]
+pub(crate) struct SuggestAttachment<T>(OneOrMany<T>);
+
+impl<T> SuggestAttachment<T> {
+ /// Returns a slice of suggestions to ingest from the downloaded attachment.
+ pub fn suggestions(&self) -> &[T] {
+ match &self.0 {
+ OneOrMany::One(value) => std::slice::from_ref(value),
+ OneOrMany::Many(values) => values,
+ }
+ }
+}
+
+/// The ID of a record in the Suggest Remote Settings collection.
+#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[serde(transparent)]
+pub(crate) struct SuggestRecordId<'a>(Cow<'a, str>);
+
+impl<'a> SuggestRecordId<'a> {
+ pub fn as_str(&self) -> &str {
+ &self.0
+ }
+
+ /// If this ID is for an icon record, extracts and returns the icon ID.
+ ///
+ /// The icon ID is the primary key for an ingested icon. Downloaded
+ /// suggestions also reference these icon IDs, in
+ /// [`DownloadedSuggestion::icon_id`].
+ pub fn as_icon_id(&self) -> Option<&str> {
+ self.0.strip_prefix("icon-")
+ }
+}
+
+impl<'a, T> From<T> for SuggestRecordId<'a>
+where
+ T: Into<Cow<'a, str>>,
+{
+ fn from(value: T) -> Self {
+ Self(value.into())
+ }
+}
+
+/// Fields that are common to all downloaded suggestions.
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedSuggestionCommonDetails {
+ pub keywords: Vec<String>,
+ pub title: String,
+ pub url: String,
+ pub score: Option<f64>,
+}
+
+/// An AMP suggestion to ingest from an AMP-Wikipedia attachment.
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedAmpSuggestion {
+ #[serde(flatten)]
+ pub common_details: DownloadedSuggestionCommonDetails,
+ pub advertiser: String,
+ #[serde(rename = "id")]
+ pub block_id: i32,
+ pub iab_category: String,
+ pub click_url: String,
+ pub impression_url: String,
+ #[serde(rename = "icon")]
+ pub icon_id: String,
+}
+
+/// A Wikipedia suggestion to ingest from an AMP-Wikipedia attachment.
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedWikipediaSuggestion {
+ #[serde(flatten)]
+ pub common_details: DownloadedSuggestionCommonDetails,
+ #[serde(rename = "icon")]
+ pub icon_id: String,
+}
+
+/// A suggestion to ingest from an AMP-Wikipedia attachment downloaded from
+/// Remote Settings.
+#[derive(Clone, Debug)]
+pub(crate) enum DownloadedAmpWikipediaSuggestion {
+ Amp(DownloadedAmpSuggestion),
+ Wikipedia(DownloadedWikipediaSuggestion),
+}
+
+impl DownloadedAmpWikipediaSuggestion {
+ /// Returns the details that are common to AMP and Wikipedia suggestions.
+ pub fn common_details(&self) -> &DownloadedSuggestionCommonDetails {
+ match self {
+ Self::Amp(DownloadedAmpSuggestion { common_details, .. }) => common_details,
+ Self::Wikipedia(DownloadedWikipediaSuggestion { common_details, .. }) => common_details,
+ }
+ }
+
+ /// Returns the provider of this suggestion.
+ pub fn provider(&self) -> SuggestionProvider {
+ match self {
+ DownloadedAmpWikipediaSuggestion::Amp(_) => SuggestionProvider::Amp,
+ DownloadedAmpWikipediaSuggestion::Wikipedia(_) => SuggestionProvider::Wikipedia,
+ }
+ }
+}
+
+impl<'de> Deserialize<'de> for DownloadedAmpWikipediaSuggestion {
+ fn deserialize<D>(
+ deserializer: D,
+ ) -> std::result::Result<DownloadedAmpWikipediaSuggestion, D::Error>
+ where
+ D: Deserializer<'de>,
+ {
+ // AMP and Wikipedia suggestions use the same schema. To separate them,
+ // we use a "maybe tagged" outer enum with tagged and untagged variants,
+ // and a "tagged" inner enum.
+ //
+ // Wikipedia suggestions will deserialize successfully into the tagged
+ // variant. AMP suggestions will try the tagged variant, fail, and fall
+ // back to the untagged variant.
+ //
+ // This approach works around serde-rs/serde#912.
+
+ #[derive(Deserialize)]
+ #[serde(untagged)]
+ enum MaybeTagged {
+ Tagged(Tagged),
+ Untagged(DownloadedAmpSuggestion),
+ }
+
+ #[derive(Deserialize)]
+ #[serde(tag = "advertiser")]
+ enum Tagged {
+ #[serde(rename = "Wikipedia")]
+ Wikipedia(DownloadedWikipediaSuggestion),
+ }
+
+ Ok(match MaybeTagged::deserialize(deserializer)? {
+ MaybeTagged::Tagged(Tagged::Wikipedia(wikipedia)) => Self::Wikipedia(wikipedia),
+ MaybeTagged::Untagged(amp) => Self::Amp(amp),
+ })
+ }
+}
+
+/// An AMO suggestion to ingest from an attachment
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedAmoSuggestion {
+ pub description: String,
+ pub url: String,
+ pub guid: String,
+ #[serde(rename = "icon")]
+ pub icon_url: String,
+ pub rating: Option<String>,
+ pub number_of_ratings: i64,
+ pub title: String,
+ pub keywords: Vec<String>,
+ pub score: f64,
+}
+/// A Pocket suggestion to ingest from a Pocket Suggestion Attachment
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedPocketSuggestion {
+ pub url: String,
+ pub title: String,
+ #[serde(rename = "lowConfidenceKeywords")]
+ pub low_confidence_keywords: Vec<String>,
+ #[serde(rename = "highConfidenceKeywords")]
+ pub high_confidence_keywords: Vec<String>,
+ pub score: f64,
+}
+/// A location sign for Yelp to ingest from a Yelp Attachment
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedYelpLocationSign {
+ pub keyword: String,
+ #[serde(rename = "needLocation")]
+ pub need_location: bool,
+}
+/// A Yelp suggestion to ingest from a Yelp Attachment
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedYelpSuggestion {
+ pub subjects: Vec<String>,
+ #[serde(rename = "preModifiers")]
+ pub pre_modifiers: Vec<String>,
+ #[serde(rename = "postModifiers")]
+ pub post_modifiers: Vec<String>,
+ #[serde(rename = "locationSigns")]
+ pub location_signs: Vec<DownloadedYelpLocationSign>,
+ #[serde(rename = "yelpModifiers")]
+ pub yelp_modifiers: Vec<String>,
+ #[serde(rename = "icon")]
+ pub icon_id: String,
+ pub score: f64,
+}
+
+/// An MDN suggestion to ingest from an attachment
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedMdnSuggestion {
+ pub url: String,
+ pub title: String,
+ pub description: String,
+ pub keywords: Vec<String>,
+ pub score: f64,
+}
+
+/// Weather data to ingest from a weather record
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedWeatherData {
+ pub weather: DownloadedWeatherDataInner,
+}
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedWeatherDataInner {
+ pub min_keyword_length: i32,
+ pub keywords: Vec<String>,
+ // Remote settings doesn't support floats in record JSON so we use a
+ // stringified float instead. If a float can't be parsed, this will be None.
+ #[serde(default, deserialize_with = "de_stringified_f64")]
+ pub score: Option<f64>,
+}
+
+/// Global Suggest configuration data to ingest from a configuration record
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedGlobalConfig {
+ pub configuration: DownloadedGlobalConfigInner,
+}
+#[derive(Clone, Debug, Deserialize)]
+pub(crate) struct DownloadedGlobalConfigInner {
+ /// The maximum number of times the user can click "Show less frequently"
+ /// for a suggestion in the UI.
+ pub show_less_frequently_cap: i32,
+}
+
+fn de_stringified_f64<'de, D>(deserializer: D) -> std::result::Result<Option<f64>, D::Error>
+where
+ D: Deserializer<'de>,
+{
+ String::deserialize(deserializer).map(|s| s.parse().ok())
+}
diff --git a/third_party/rust/suggest/src/schema.rs b/third_party/rust/suggest/src/schema.rs
new file mode 100644
index 0000000000..95d987c09e
--- /dev/null
+++ b/third_party/rust/suggest/src/schema.rs
@@ -0,0 +1,153 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use rusqlite::{Connection, Transaction};
+use sql_support::open_database::{self, ConnectionInitializer};
+
+/// The current database schema version.
+///
+/// For any changes to the schema [`SQL`], please make sure to:
+///
+/// 1. Bump this version.
+/// 2. Add a migration from the old version to the new version in
+/// [`SuggestConnectionInitializer::upgrade_from`].
+pub const VERSION: u32 = 14;
+
+/// The current Suggest database schema.
+pub const SQL: &str = "
+ CREATE TABLE meta(
+ key TEXT PRIMARY KEY,
+ value NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE keywords(
+ keyword TEXT NOT NULL,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ rank INTEGER NOT NULL,
+ PRIMARY KEY (keyword, suggestion_id)
+ ) WITHOUT ROWID;
+
+ CREATE TABLE prefix_keywords(
+ keyword_prefix TEXT NOT NULL,
+ keyword_suffix TEXT NOT NULL DEFAULT '',
+ confidence INTEGER NOT NULL DEFAULT 0,
+ rank INTEGER NOT NULL,
+ suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
+ PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
+ ) WITHOUT ROWID;
+
+ CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
+
+ CREATE TABLE suggestions(
+ id INTEGER PRIMARY KEY,
+ record_id TEXT NOT NULL,
+ provider INTEGER NOT NULL,
+ title TEXT NOT NULL,
+ url TEXT NOT NULL,
+ score REAL NOT NULL
+ );
+
+ CREATE TABLE amp_custom_details(
+ suggestion_id INTEGER PRIMARY KEY,
+ advertiser TEXT NOT NULL,
+ block_id INTEGER NOT NULL,
+ iab_category TEXT NOT NULL,
+ impression_url TEXT NOT NULL,
+ click_url TEXT NOT NULL,
+ icon_id TEXT NOT NULL,
+ FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
+ );
+
+ CREATE TABLE wikipedia_custom_details(
+ suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
+ icon_id TEXT NOT NULL
+ );
+
+ CREATE TABLE amo_custom_details(
+ suggestion_id INTEGER PRIMARY KEY,
+ description TEXT NOT NULL,
+ guid TEXT NOT NULL,
+ icon_url TEXT NOT NULL,
+ rating TEXT,
+ number_of_ratings INTEGER NOT NULL,
+ FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
+ );
+
+ CREATE INDEX suggestions_record_id ON suggestions(record_id);
+
+ CREATE TABLE icons(
+ id TEXT PRIMARY KEY,
+ data BLOB NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_subjects(
+ keyword TEXT PRIMARY KEY,
+ record_id TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_modifiers(
+ type INTEGER NOT NULL,
+ keyword TEXT NOT NULL,
+ record_id TEXT NOT NULL,
+ PRIMARY KEY (type, keyword)
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_location_signs(
+ keyword TEXT PRIMARY KEY,
+ need_location INTEGER NOT NULL,
+ record_id TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE yelp_custom_details(
+ icon_id TEXT PRIMARY KEY,
+ score REAL NOT NULL,
+ record_id TEXT NOT NULL
+ ) WITHOUT ROWID;
+
+ CREATE TABLE mdn_custom_details(
+ suggestion_id INTEGER PRIMARY KEY,
+ description TEXT NOT NULL,
+ FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
+ );
+";
+
+/// Initializes an SQLite connection to the Suggest database, performing
+/// migrations as needed.
+pub struct SuggestConnectionInitializer;
+
+impl ConnectionInitializer for SuggestConnectionInitializer {
+ const NAME: &'static str = "suggest db";
+ const END_VERSION: u32 = VERSION;
+
+ fn prepare(&self, conn: &Connection, _db_empty: bool) -> open_database::Result<()> {
+ let initial_pragmas = "
+ -- Use in-memory storage for TEMP tables.
+ PRAGMA temp_store = 2;
+
+ PRAGMA journal_mode = WAL;
+ PRAGMA foreign_keys = ON;
+ ";
+ conn.execute_batch(initial_pragmas)?;
+ sql_support::debug_tools::define_debug_functions(conn)?;
+
+ Ok(())
+ }
+
+ fn init(&self, db: &Transaction<'_>) -> open_database::Result<()> {
+ Ok(db.execute_batch(SQL)?)
+ }
+
+ fn upgrade_from(&self, _db: &Transaction<'_>, version: u32) -> open_database::Result<()> {
+ match version {
+ 1..=13 => {
+ // Treat databases with these older schema versions as corrupt,
+ // so that they'll be replaced by a fresh, empty database with
+ // the current schema.
+ Err(open_database::Error::Corrupt)
+ }
+ _ => Err(open_database::Error::IncompatibleVersion(version)),
+ }
+ }
+}
diff --git a/third_party/rust/suggest/src/store.rs b/third_party/rust/suggest/src/store.rs
new file mode 100644
index 0000000000..e1f437e8c5
--- /dev/null
+++ b/third_party/rust/suggest/src/store.rs
@@ -0,0 +1,5316 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use std::{
+ collections::BTreeMap,
+ path::{Path, PathBuf},
+ sync::Arc,
+};
+
+use error_support::handle_error;
+use once_cell::sync::OnceCell;
+use parking_lot::Mutex;
+use remote_settings::{
+ self, GetItemsOptions, RemoteSettingsConfig, RemoteSettingsRecord, SortOrder,
+};
+use rusqlite::{
+ types::{FromSql, ToSqlOutput},
+ ToSql,
+};
+use serde::{de::DeserializeOwned, Deserialize, Serialize};
+
+use crate::{
+ config::{SuggestGlobalConfig, SuggestProviderConfig},
+ db::{
+ ConnectionType, SuggestDao, SuggestDb, LAST_INGEST_META_KEY, UNPARSABLE_RECORDS_META_KEY,
+ },
+ error::Error,
+ provider::SuggestionProvider,
+ rs::{
+ SuggestAttachment, SuggestRecord, SuggestRecordId, SuggestRemoteSettingsClient,
+ REMOTE_SETTINGS_COLLECTION, SUGGESTIONS_PER_ATTACHMENT,
+ },
+ schema::VERSION,
+ Result, SuggestApiResult, Suggestion, SuggestionQuery,
+};
+
+/// The chunk size used to request unparsable records.
+pub const UNPARSABLE_IDS_PER_REQUEST: usize = 150;
+
+/// Builder for [SuggestStore]
+///
+/// Using a builder is preferred to calling the constructor directly since it's harder to confuse
+/// the data_path and cache_path strings.
+pub struct SuggestStoreBuilder(Mutex<SuggestStoreBuilderInner>);
+
+#[derive(Default)]
+struct SuggestStoreBuilderInner {
+ data_path: Option<String>,
+ cache_path: Option<String>,
+ remote_settings_config: Option<RemoteSettingsConfig>,
+}
+
+impl Default for SuggestStoreBuilder {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl SuggestStoreBuilder {
+ pub fn new() -> SuggestStoreBuilder {
+ Self(Mutex::new(SuggestStoreBuilderInner::default()))
+ }
+
+ pub fn data_path(self: Arc<Self>, path: String) -> Arc<Self> {
+ self.0.lock().data_path = Some(path);
+ self
+ }
+
+ pub fn cache_path(self: Arc<Self>, path: String) -> Arc<Self> {
+ self.0.lock().cache_path = Some(path);
+ self
+ }
+
+ pub fn remote_settings_config(self: Arc<Self>, config: RemoteSettingsConfig) -> Arc<Self> {
+ self.0.lock().remote_settings_config = Some(config);
+ self
+ }
+
+ #[handle_error(Error)]
+ pub fn build(&self) -> SuggestApiResult<Arc<SuggestStore>> {
+ let inner = self.0.lock();
+ let data_path = inner
+ .data_path
+ .clone()
+ .ok_or_else(|| Error::SuggestStoreBuilder("data_path not specified".to_owned()))?;
+ let cache_path = inner
+ .cache_path
+ .clone()
+ .ok_or_else(|| Error::SuggestStoreBuilder("cache_path not specified".to_owned()))?;
+ let settings_client =
+ remote_settings::Client::new(inner.remote_settings_config.clone().unwrap_or_else(
+ || RemoteSettingsConfig {
+ server_url: None,
+ bucket_name: None,
+ collection_name: REMOTE_SETTINGS_COLLECTION.into(),
+ },
+ ))?;
+ Ok(Arc::new(SuggestStore {
+ inner: SuggestStoreInner::new(data_path, cache_path, settings_client),
+ }))
+ }
+}
+
+/// The store is the entry point to the Suggest component. It incrementally
+/// downloads suggestions from the Remote Settings service, stores them in a
+/// local database, and returns them in response to user queries.
+///
+/// Your application should create a single store, and manage it as a singleton.
+/// The store is thread-safe, and supports concurrent queries and ingests. We
+/// expect that your application will call [`SuggestStore::query()`] to show
+/// suggestions as the user types into the address bar, and periodically call
+/// [`SuggestStore::ingest()`] in the background to update the database with
+/// new suggestions from Remote Settings.
+///
+/// For responsiveness, we recommend always calling `query()` on a worker
+/// thread. When the user types new input into the address bar, call
+/// [`SuggestStore::interrupt()`] on the main thread to cancel the query
+/// for the old input, and unblock the worker thread for the new query.
+///
+/// The store keeps track of the state needed to support incremental ingestion,
+/// but doesn't schedule the ingestion work itself, or decide how many
+/// suggestions to ingest at once. This is for two reasons:
+///
+/// 1. The primitives for scheduling background work vary between platforms, and
+/// aren't available to the lower-level Rust layer. You might use an idle
+/// timer on Desktop, `WorkManager` on Android, or `BGTaskScheduler` on iOS.
+/// 2. Ingestion constraints can change, depending on the platform and the needs
+/// of your application. A mobile device on a metered connection might want
+/// to request a small subset of the Suggest data and download the rest
+/// later, while a desktop on a fast link might download the entire dataset
+/// on the first launch.
+pub struct SuggestStore {
+ inner: SuggestStoreInner<remote_settings::Client>,
+}
+
+/// For records that aren't currently parsable,
+/// the record ID and the schema version it's first seen in
+/// is recorded in the meta table using `UNPARSABLE_RECORDS_META_KEY` as its key.
+/// On the first ingest after an upgrade, re-request those records from Remote Settings,
+/// and try to ingest them again.
+#[derive(Deserialize, Serialize, Default, Debug)]
+#[serde(transparent)]
+pub(crate) struct UnparsableRecords(pub BTreeMap<String, UnparsableRecord>);
+
+impl FromSql for UnparsableRecords {
+ fn column_result(value: rusqlite::types::ValueRef<'_>) -> rusqlite::types::FromSqlResult<Self> {
+ serde_json::from_str(value.as_str()?)
+ .map_err(|err| rusqlite::types::FromSqlError::Other(Box::new(err)))
+ }
+}
+
+impl ToSql for UnparsableRecords {
+ fn to_sql(&self) -> rusqlite::Result<rusqlite::types::ToSqlOutput<'_>> {
+ Ok(ToSqlOutput::from(serde_json::to_string(self).map_err(
+ |err| rusqlite::Error::ToSqlConversionFailure(Box::new(err)),
+ )?))
+ }
+}
+
+#[derive(Deserialize, Serialize, Debug)]
+pub(crate) struct UnparsableRecord {
+ #[serde(rename = "v")]
+ pub schema_version: u32,
+}
+
+impl SuggestStore {
+ /// Creates a Suggest store.
+ #[handle_error(Error)]
+ pub fn new(
+ path: &str,
+ settings_config: Option<RemoteSettingsConfig>,
+ ) -> SuggestApiResult<Self> {
+ let settings_client = || -> Result<_> {
+ Ok(remote_settings::Client::new(
+ settings_config.unwrap_or_else(|| RemoteSettingsConfig {
+ server_url: None,
+ bucket_name: None,
+ collection_name: REMOTE_SETTINGS_COLLECTION.into(),
+ }),
+ )?)
+ }()?;
+ Ok(Self {
+ inner: SuggestStoreInner::new("".to_owned(), path.to_owned(), settings_client),
+ })
+ }
+
+ /// Queries the database for suggestions.
+ #[handle_error(Error)]
+ pub fn query(&self, query: SuggestionQuery) -> SuggestApiResult<Vec<Suggestion>> {
+ self.inner.query(query)
+ }
+
+ /// Interrupts any ongoing queries.
+ ///
+ /// This should be called when the user types new input into the address
+ /// bar, to ensure that they see fresh suggestions as they type. This
+ /// method does not interrupt any ongoing ingests.
+ pub fn interrupt(&self) {
+ self.inner.interrupt()
+ }
+
+ /// Ingests new suggestions from Remote Settings.
+ #[handle_error(Error)]
+ pub fn ingest(&self, constraints: SuggestIngestionConstraints) -> SuggestApiResult<()> {
+ self.inner.ingest(constraints)
+ }
+
+ /// Removes all content from the database.
+ #[handle_error(Error)]
+ pub fn clear(&self) -> SuggestApiResult<()> {
+ self.inner.clear()
+ }
+
+ // Returns global Suggest configuration data.
+ #[handle_error(Error)]
+ pub fn fetch_global_config(&self) -> SuggestApiResult<SuggestGlobalConfig> {
+ self.inner.fetch_global_config()
+ }
+
+ // Returns per-provider Suggest configuration data.
+ #[handle_error(Error)]
+ pub fn fetch_provider_config(
+ &self,
+ provider: SuggestionProvider,
+ ) -> SuggestApiResult<Option<SuggestProviderConfig>> {
+ self.inner.fetch_provider_config(provider)
+ }
+}
+
+/// Constraints limit which suggestions to ingest from Remote Settings.
+#[derive(Clone, Default, Debug)]
+pub struct SuggestIngestionConstraints {
+ /// The approximate maximum number of suggestions to ingest. Set to [`None`]
+ /// for "no limit".
+ ///
+ /// Because of how suggestions are partitioned in Remote Settings, this is a
+ /// soft limit, and the store might ingest more than requested.
+ pub max_suggestions: Option<u64>,
+}
+
+/// The implementation of the store. This is generic over the Remote Settings
+/// client, and is split out from the concrete [`SuggestStore`] for testing
+/// with a mock client.
+pub(crate) struct SuggestStoreInner<S> {
+ /// Path to the persistent SQL database.
+ ///
+ /// This stores things that should persist when the user clears their cache.
+ /// It's not currently used because not all consumers pass this in yet.
+ #[allow(unused)]
+ data_path: PathBuf,
+ /// Path to the temporary SQL database.
+ ///
+ /// This stores things that should be deleted when the user clears their cache.
+ cache_path: PathBuf,
+ dbs: OnceCell<SuggestStoreDbs>,
+ settings_client: S,
+}
+
+impl<S> SuggestStoreInner<S> {
+ fn new(
+ data_path: impl Into<PathBuf>,
+ cache_path: impl Into<PathBuf>,
+ settings_client: S,
+ ) -> Self {
+ Self {
+ data_path: data_path.into(),
+ cache_path: cache_path.into(),
+ dbs: OnceCell::new(),
+ settings_client,
+ }
+ }
+
+ /// Returns this store's database connections, initializing them if
+ /// they're not already open.
+ fn dbs(&self) -> Result<&SuggestStoreDbs> {
+ self.dbs
+ .get_or_try_init(|| SuggestStoreDbs::open(&self.cache_path))
+ }
+
+ fn query(&self, query: SuggestionQuery) -> Result<Vec<Suggestion>> {
+ if query.keyword.is_empty() || query.providers.is_empty() {
+ return Ok(Vec::new());
+ }
+ self.dbs()?.reader.read(|dao| dao.fetch_suggestions(&query))
+ }
+
+ fn interrupt(&self) {
+ if let Some(dbs) = self.dbs.get() {
+ // Only interrupt if the databases are already open.
+ dbs.reader.interrupt_handle.interrupt();
+ }
+ }
+
+ fn clear(&self) -> Result<()> {
+ self.dbs()?.writer.write(|dao| dao.clear())
+ }
+
+ pub fn fetch_global_config(&self) -> Result<SuggestGlobalConfig> {
+ self.dbs()?.reader.read(|dao| dao.get_global_config())
+ }
+
+ pub fn fetch_provider_config(
+ &self,
+ provider: SuggestionProvider,
+ ) -> Result<Option<SuggestProviderConfig>> {
+ self.dbs()?
+ .reader
+ .read(|dao| dao.get_provider_config(provider))
+ }
+}
+
+impl<S> SuggestStoreInner<S>
+where
+ S: SuggestRemoteSettingsClient,
+{
+ fn ingest(&self, constraints: SuggestIngestionConstraints) -> Result<()> {
+ let writer = &self.dbs()?.writer;
+
+ if let Some(unparsable_records) =
+ writer.read(|dao| dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY))?
+ {
+ let all_unparsable_ids = unparsable_records
+ .0
+ .iter()
+ .filter(|(_, unparsable_record)| unparsable_record.schema_version < VERSION)
+ .map(|(record_id, _)| record_id)
+ .collect::<Vec<_>>();
+ for unparsable_ids in all_unparsable_ids.chunks(UNPARSABLE_IDS_PER_REQUEST) {
+ let mut options = GetItemsOptions::new();
+ for unparsable_id in unparsable_ids {
+ options.eq("id", *unparsable_id);
+ }
+ let records_chunk = self
+ .settings_client
+ .get_records_with_options(&options)?
+ .records;
+
+ self.ingest_records(writer, &records_chunk)?;
+ }
+ }
+
+ let mut options = GetItemsOptions::new();
+ // Remote Settings returns records in descending modification order
+ // (newest first), but we want them in ascending order (oldest first),
+ // so that we can eventually resume downloading where we left off.
+ options.sort("last_modified", SortOrder::Ascending);
+ if let Some(last_ingest) = writer.read(|dao| dao.get_meta::<u64>(LAST_INGEST_META_KEY))? {
+ // Only download changes since our last ingest. If our last ingest
+ // was interrupted, we'll pick up where we left off.
+ options.gt("last_modified", last_ingest.to_string());
+ }
+
+ if let Some(max_suggestions) = constraints.max_suggestions {
+ // Each record's attachment has 200 suggestions, so download enough
+ // records to cover the requested maximum.
+ let max_records = (max_suggestions.saturating_sub(1) / SUGGESTIONS_PER_ATTACHMENT) + 1;
+ options.limit(max_records);
+ }
+
+ let records = self
+ .settings_client
+ .get_records_with_options(&options)?
+ .records;
+ self.ingest_records(writer, &records)?;
+
+ Ok(())
+ }
+
+ fn ingest_records(&self, writer: &SuggestDb, records: &[RemoteSettingsRecord]) -> Result<()> {
+ for record in records {
+ let record_id = SuggestRecordId::from(&record.id);
+ if record.deleted {
+ // If the entire record was deleted, drop all its suggestions
+ // and advance the last ingest time.
+ writer.write(|dao| dao.handle_deleted_record(record))?;
+ continue;
+ }
+ let Ok(fields) =
+ serde_json::from_value(serde_json::Value::Object(record.fields.clone()))
+ else {
+ // We don't recognize this record's type, so we don't know how
+ // to ingest its suggestions. Record this in the meta table.
+ writer.write(|dao| dao.handle_unparsable_record(record))?;
+ continue;
+ };
+
+ match fields {
+ SuggestRecord::AmpWikipedia => {
+ self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
+ dao.insert_amp_wikipedia_suggestions(record_id, suggestions)
+ })?;
+ }
+ SuggestRecord::AmpMobile => {
+ self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
+ dao.insert_amp_mobile_suggestions(record_id, suggestions)
+ })?;
+ }
+ SuggestRecord::Icon => {
+ let (Some(icon_id), Some(attachment)) =
+ (record_id.as_icon_id(), record.attachment.as_ref())
+ else {
+ // An icon record should have an icon ID and an
+ // attachment. Icons that don't have these are
+ // malformed, so skip to the next record.
+ writer.write(|dao| dao.put_last_ingest_if_newer(record.last_modified))?;
+ continue;
+ };
+ let data = self.settings_client.get_attachment(&attachment.location)?;
+ writer.write(|dao| {
+ dao.put_icon(icon_id, &data)?;
+ dao.handle_ingested_record(record)
+ })?;
+ }
+ SuggestRecord::Amo => {
+ self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
+ dao.insert_amo_suggestions(record_id, suggestions)
+ })?;
+ }
+ SuggestRecord::Pocket => {
+ self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
+ dao.insert_pocket_suggestions(record_id, suggestions)
+ })?;
+ }
+ SuggestRecord::Yelp => {
+ self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
+ match suggestions.first() {
+ Some(suggestion) => dao.insert_yelp_suggestions(record_id, suggestion),
+ None => Ok(()),
+ }
+ })?;
+ }
+ SuggestRecord::Mdn => {
+ self.ingest_attachment(writer, record, |dao, record_id, suggestions| {
+ dao.insert_mdn_suggestions(record_id, suggestions)
+ })?;
+ }
+ SuggestRecord::Weather(data) => {
+ self.ingest_record(writer, record, |dao, record_id| {
+ dao.insert_weather_data(record_id, &data)
+ })?;
+ }
+ SuggestRecord::GlobalConfig(config) => {
+ self.ingest_record(writer, record, |dao, _| {
+ dao.put_global_config(&SuggestGlobalConfig::from(&config))
+ })?;
+ }
+ }
+ }
+ Ok(())
+ }
+
+ fn ingest_record(
+ &self,
+ writer: &SuggestDb,
+ record: &RemoteSettingsRecord,
+ ingestion_handler: impl FnOnce(&mut SuggestDao<'_>, &SuggestRecordId) -> Result<()>,
+ ) -> Result<()> {
+ let record_id = SuggestRecordId::from(&record.id);
+
+ writer.write(|dao| {
+ // Drop any data that we previously ingested from this record.
+ // Suggestions in particular don't have a stable identifier, and
+ // determining which suggestions in the record actually changed is
+ // more complicated than dropping and re-ingesting all of them.
+ dao.drop_suggestions(&record_id)?;
+
+ // Ingest (or re-ingest) all data in the record.
+ ingestion_handler(dao, &record_id)?;
+
+ dao.handle_ingested_record(record)
+ })
+ }
+
+ fn ingest_attachment<T>(
+ &self,
+ writer: &SuggestDb,
+ record: &RemoteSettingsRecord,
+ ingestion_handler: impl FnOnce(&mut SuggestDao<'_>, &SuggestRecordId, &[T]) -> Result<()>,
+ ) -> Result<()>
+ where
+ T: DeserializeOwned,
+ {
+ let Some(attachment) = record.attachment.as_ref() else {
+ // This method should be called only when a record is expected to
+ // have an attachment. If it doesn't have one, it's malformed, so
+ // skip to the next record.
+ writer.write(|dao| dao.put_last_ingest_if_newer(record.last_modified))?;
+ return Ok(());
+ };
+
+ let attachment_data = self.settings_client.get_attachment(&attachment.location)?;
+ match serde_json::from_slice::<SuggestAttachment<T>>(&attachment_data) {
+ Ok(attachment) => self.ingest_record(writer, record, |dao, record_id| {
+ ingestion_handler(dao, record_id, attachment.suggestions())
+ }),
+ Err(_) => writer.write(|dao| dao.handle_unparsable_record(record)),
+ }
+ }
+}
+
+/// Holds a store's open connections to the Suggest database.
+struct SuggestStoreDbs {
+ /// A read-write connection used to update the database with new data.
+ writer: SuggestDb,
+ /// A read-only connection used to query the database.
+ reader: SuggestDb,
+}
+
+impl SuggestStoreDbs {
+ fn open(path: &Path) -> Result<Self> {
+ // Order is important here: the writer must be opened first, so that it
+ // can set up the database and run any migrations.
+ let writer = SuggestDb::open(path, ConnectionType::ReadWrite)?;
+ let reader = SuggestDb::open(path, ConnectionType::ReadOnly)?;
+ Ok(Self { writer, reader })
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ use std::{cell::RefCell, collections::HashMap};
+
+ use anyhow::{anyhow, Context};
+ use expect_test::expect;
+ use parking_lot::Once;
+ use rc_crypto::rand;
+ use remote_settings::{RemoteSettingsRecord, RemoteSettingsResponse};
+ use serde_json::json;
+ use sql_support::ConnExt;
+
+ use crate::SuggestionProvider;
+
+ /// Creates a unique in-memory Suggest store.
+ fn unique_test_store<S>(settings_client: S) -> SuggestStoreInner<S>
+ where
+ S: SuggestRemoteSettingsClient,
+ {
+ let mut unique_suffix = [0u8; 8];
+ rand::fill(&mut unique_suffix).expect("Failed to generate unique suffix for test store");
+ // A store opens separate connections to the same database for reading
+ // and writing, so we must give our in-memory database a name, and open
+ // it in shared-cache mode so that both connections can access it.
+ SuggestStoreInner::new(
+ format!(
+ "file:test_store_data_{}?mode=memory&cache=shared",
+ hex::encode(unique_suffix),
+ ),
+ format!(
+ "file:test_store_cache_{}?mode=memory&cache=shared",
+ hex::encode(unique_suffix),
+ ),
+ settings_client,
+ )
+ }
+
+ /// A snapshot containing fake Remote Settings records and attachments for
+ /// the store to ingest. We use snapshots to test the store's behavior in a
+ /// data-driven way.
+ struct Snapshot {
+ records: Vec<RemoteSettingsRecord>,
+ attachments: HashMap<&'static str, Vec<u8>>,
+ }
+
+ impl Snapshot {
+ /// Creates a snapshot from a JSON value that represents a collection of
+ /// Suggest Remote Settings records.
+ ///
+ /// You can use the [`serde_json::json!`] macro to construct the JSON
+ /// value, then pass it to this function. It's easier to use the
+ /// `Snapshot::with_records(json!(...))` idiom than to construct the
+ /// records by hand.
+ fn with_records(value: serde_json::Value) -> anyhow::Result<Self> {
+ Ok(Self {
+ records: serde_json::from_value(value)
+ .context("Couldn't create snapshot with Remote Settings records")?,
+ attachments: HashMap::new(),
+ })
+ }
+
+ /// Adds a data attachment with one or more suggestions to the snapshot.
+ fn with_data(
+ mut self,
+ location: &'static str,
+ value: serde_json::Value,
+ ) -> anyhow::Result<Self> {
+ self.attachments.insert(
+ location,
+ serde_json::to_vec(&value).context("Couldn't add data attachment to snapshot")?,
+ );
+ Ok(self)
+ }
+
+ /// Adds an icon attachment to the snapshot.
+ fn with_icon(mut self, location: &'static str, bytes: Vec<u8>) -> Self {
+ self.attachments.insert(location, bytes);
+ self
+ }
+ }
+
+ /// A fake Remote Settings client that returns records and attachments from
+ /// a snapshot.
+ struct SnapshotSettingsClient {
+ /// The current snapshot. You can modify it using
+ /// [`RefCell::borrow_mut()`] to simulate remote updates in tests.
+ snapshot: RefCell<Snapshot>,
+
+ /// The options passed to the last [`Self::get_records_with_options()`]
+ /// call.
+ last_get_records_options: RefCell<Option<GetItemsOptions>>,
+ }
+
+ impl SnapshotSettingsClient {
+ /// Creates a client with an initial snapshot.
+ fn with_snapshot(snapshot: Snapshot) -> Self {
+ Self {
+ snapshot: RefCell::new(snapshot),
+ last_get_records_options: RefCell::default(),
+ }
+ }
+
+ /// Returns the most recent value of an option passed to
+ /// [`Self::get_records_with_options()`].
+ fn last_get_records_option(&self, option: &str) -> Option<String> {
+ self.last_get_records_options
+ .borrow()
+ .as_ref()
+ .and_then(|options| {
+ options
+ .iter_query_pairs()
+ .find(|(key, _)| key == option)
+ .map(|(_, value)| value.into())
+ })
+ }
+ }
+
+ impl SuggestRemoteSettingsClient for SnapshotSettingsClient {
+ fn get_records_with_options(
+ &self,
+ options: &GetItemsOptions,
+ ) -> Result<RemoteSettingsResponse> {
+ *self.last_get_records_options.borrow_mut() = Some(options.clone());
+ let records = self.snapshot.borrow().records.clone();
+ let last_modified = records
+ .iter()
+ .map(|record| record.last_modified)
+ .max()
+ .unwrap_or(0);
+ Ok(RemoteSettingsResponse {
+ records,
+ last_modified,
+ })
+ }
+
+ fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+ Ok(self
+ .snapshot
+ .borrow()
+ .attachments
+ .get(location)
+ .unwrap_or_else(|| unreachable!("Unexpected request for attachment `{}`", location))
+ .clone())
+ }
+ }
+
+ fn before_each() {
+ static ONCE: Once = Once::new();
+ ONCE.call_once(|| {
+ env_logger::init();
+ });
+ }
+
+ /// Tests that `SuggestStore` is usable with UniFFI, which requires exposed
+ /// interfaces to be `Send` and `Sync`.
+ #[test]
+ fn is_thread_safe() {
+ before_each();
+
+ fn is_send_sync<T: Send + Sync>() {}
+ is_send_sync::<SuggestStore>();
+ }
+
+ /// Tests ingesting suggestions into an empty database.
+ #[test]
+ fn ingest_suggestions() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "1234",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15));
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ full_keyword: "los",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "lo".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests ingesting suggestions with icons.
+ #[test]
+ fn ingest_icons() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-2",
+ "type": "icon",
+ "last_modified": 20,
+ "attachment": {
+ "filename": "icon-2.png",
+ "mimetype": "image/png",
+ "location": "icon-2.png",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
+ "title": "Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url"
+ }, {
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["pe", "pen", "penne", "penne for your thoughts"],
+ "title": "Penne for Your Thoughts",
+ "url": "https://penne.biz",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?
+ .with_icon("icon-2.png", "i-am-an-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ expect![[r#"
+ [
+ Amp {
+ title: "Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: Some(
+ [
+ 105,
+ 45,
+ 97,
+ 109,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.2,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "la".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ expect![[r#"
+ [
+ Amp {
+ title: "Penne for Your Thoughts",
+ url: "https://penne.biz",
+ raw_url: "https://penne.biz",
+ icon: Some(
+ [
+ 105,
+ 45,
+ 97,
+ 109,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "penne",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "pe".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests ingesting a data attachment containing a single suggestion,
+ /// instead of an array of suggestions.
+ #[test]
+ fn ingest_one_suggestion_in_data_attachment() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!({
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
+ "title": "Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ expect![[r#"
+ [
+ Amp {
+ title: "Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: None,
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "la".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests re-ingesting suggestions from an updated attachment.
+ #[test]
+ fn reingest_amp_suggestions() -> anyhow::Result<()> {
+ before_each();
+
+ // Ingest suggestions from the initial snapshot.
+ let initial_snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
+ "title": "Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "1",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }, {
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los p", "los pollos h"],
+ "title": "Los Pollos Hermanos - Albuquerque",
+ "url": "https://www.lph-nm.biz",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(initial_snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(15u64));
+ expect![[r#"
+ [
+ Amp {
+ title: "Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: None,
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "la".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ Ok(())
+ })?;
+
+ // Update the snapshot with new suggestions: drop Lasagna, update Los
+ // Pollos, and add Penne.
+ *store.settings_client.snapshot.borrow_mut() = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 30,
+ "attachment": {
+ "filename": "data-1-1.json",
+ "mimetype": "application/json",
+ "location": "data-1-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["los ", "los pollos", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Now Serving at 14 Locations!",
+ "url": "https://www.lph-nm.biz",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }, {
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["pe", "pen", "penne", "penne for your thoughts"],
+ "title": "Penne for Your Thoughts",
+ "url": "https://penne.biz",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?;
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(30u64));
+ assert!(dao
+ .fetch_suggestions(&SuggestionQuery {
+ keyword: "la".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?
+ .is_empty());
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Now Serving at 14 Locations!",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ full_keyword: "los pollos",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "los ".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ expect![[r#"
+ [
+ Amp {
+ title: "Penne for Your Thoughts",
+ url: "https://penne.biz",
+ raw_url: "https://penne.biz",
+ icon: None,
+ full_keyword: "penne",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "pe".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests re-ingesting icons from an updated attachment.
+ #[test]
+ fn reingest_icons() -> anyhow::Result<()> {
+ before_each();
+
+ // Ingest suggestions and icons from the initial snapshot.
+ let initial_snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-2",
+ "type": "icon",
+ "last_modified": 20,
+ "attachment": {
+ "filename": "icon-2.png",
+ "mimetype": "image/png",
+ "location": "icon-2.png",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-3",
+ "type": "icon",
+ "last_modified": 25,
+ "attachment": {
+ "filename": "icon-3.png",
+ "mimetype": "image/png",
+ "location": "icon-3.png",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
+ "title": "Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }, {
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los pollos", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque",
+ "url": "https://www.lph-nm.biz",
+ "icon": "3",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?
+ .with_icon("icon-2.png", "lasagna-icon".as_bytes().into())
+ .with_icon("icon-3.png", "pollos-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(initial_snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(25u64));
+ assert_eq!(
+ dao.conn
+ .query_one::<i64>("SELECT count(*) FROM suggestions")?,
+ 2
+ );
+ assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 2);
+ Ok(())
+ })?;
+
+ // Update the snapshot with new icons.
+ *store.settings_client.snapshot.borrow_mut() = Snapshot::with_records(json!([{
+ "id": "icon-2",
+ "type": "icon",
+ "last_modified": 30,
+ "attachment": {
+ "filename": "icon-2.png",
+ "mimetype": "image/png",
+ "location": "icon-2.png",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-3",
+ "type": "icon",
+ "last_modified": 35,
+ "attachment": {
+ "filename": "icon-3.png",
+ "mimetype": "image/png",
+ "location": "icon-3.png",
+ "hash": "",
+ "size": 0,
+ }
+ }]))?
+ .with_icon("icon-2.png", "new-lasagna-icon".as_bytes().into())
+ .with_icon("icon-3.png", "new-pollos-icon".as_bytes().into());
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(35u64));
+ expect![[r#"
+ [
+ Amp {
+ title: "Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: Some(
+ [
+ 110,
+ 101,
+ 119,
+ 45,
+ 108,
+ 97,
+ 115,
+ 97,
+ 103,
+ 110,
+ 97,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "la".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: Some(
+ [
+ 110,
+ 101,
+ 119,
+ 45,
+ 112,
+ 111,
+ 108,
+ 108,
+ 111,
+ 115,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "los",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "lo".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests re-ingesting AMO suggestions from an updated attachment.
+ #[test]
+ fn reingest_amo_suggestions() -> anyhow::Result<()> {
+ before_each();
+
+ // Ingest suggestions from the initial snapshot.
+ let initial_snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "amo-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-2",
+ "type": "amo-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!({
+ "description": "First suggestion",
+ "url": "https://example.org/amo-suggestion-1",
+ "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ "keywords": ["relay", "spam", "masking email", "alias"],
+ "title": "AMO suggestion",
+ "icon": "https://example.org/amo-suggestion-1/icon.png",
+ "rating": "4.9",
+ "number_of_ratings": 800,
+ "score": 0.25
+ }),
+ )?
+ .with_data(
+ "data-2.json",
+ json!([{
+ "description": "Second suggestion",
+ "url": "https://example.org/amo-suggestion-2",
+ "guid": "{6d24e3b8-1400-4d37-9440-c798f9b79b1a}",
+ "keywords": ["dark mode", "dark theme", "night mode"],
+ "title": "Another AMO suggestion",
+ "icon": "https://example.org/amo-suggestion-2/icon.png",
+ "rating": "4.6",
+ "number_of_ratings": 750,
+ "score": 0.25
+ }, {
+ "description": "Third suggestion",
+ "url": "https://example.org/amo-suggestion-3",
+ "guid": "{1e9d493b-0498-48bb-9b9a-8b45a44df146}",
+ "keywords": ["grammar", "spelling", "edit"],
+ "title": "Yet another AMO suggestion",
+ "icon": "https://example.org/amo-suggestion-3/icon.png",
+ "rating": "4.8",
+ "number_of_ratings": 900,
+ "score": 0.25
+ }]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(initial_snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(15u64));
+
+ expect![[r#"
+ [
+ Amo {
+ title: "AMO suggestion",
+ url: "https://example.org/amo-suggestion-1",
+ icon_url: "https://example.org/amo-suggestion-1/icon.png",
+ description: "First suggestion",
+ rating: Some(
+ "4.9",
+ ),
+ number_of_ratings: 800,
+ guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ score: 0.25,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "masking e".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ })?);
+
+ expect![[r#"
+ [
+ Amo {
+ title: "Another AMO suggestion",
+ url: "https://example.org/amo-suggestion-2",
+ icon_url: "https://example.org/amo-suggestion-2/icon.png",
+ description: "Second suggestion",
+ rating: Some(
+ "4.6",
+ ),
+ number_of_ratings: 750,
+ guid: "{6d24e3b8-1400-4d37-9440-c798f9b79b1a}",
+ score: 0.25,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "night".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ })?);
+
+ Ok(())
+ })?;
+
+ // Update the snapshot with new suggestions: update the second, drop the
+ // third, and add the fourth.
+ *store.settings_client.snapshot.borrow_mut() = Snapshot::with_records(json!([{
+ "id": "data-2",
+ "type": "amo-suggestions",
+ "last_modified": 30,
+ "attachment": {
+ "filename": "data-2-1.json",
+ "mimetype": "application/json",
+ "location": "data-2-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-2-1.json",
+ json!([{
+ "description": "Updated second suggestion",
+ "url": "https://example.org/amo-suggestion-2",
+ "guid": "{6d24e3b8-1400-4d37-9440-c798f9b79b1a}",
+ "keywords": ["dark mode", "night mode"],
+ "title": "Another AMO suggestion",
+ "icon": "https://example.org/amo-suggestion-2/icon.png",
+ "rating": "4.7",
+ "number_of_ratings": 775,
+ "score": 0.25
+ }, {
+ "description": "Fourth suggestion",
+ "url": "https://example.org/amo-suggestion-4",
+ "guid": "{1ea82ebd-a1ba-4f57-b8bb-3824ead837bd}",
+ "keywords": ["image search", "visual search"],
+ "title": "New AMO suggestion",
+ "icon": "https://example.org/amo-suggestion-4/icon.png",
+ "rating": "5.0",
+ "number_of_ratings": 100,
+ "score": 0.25
+ }]),
+ )?;
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta(LAST_INGEST_META_KEY)?, Some(30u64));
+
+ expect![[r#"
+ [
+ Amo {
+ title: "AMO suggestion",
+ url: "https://example.org/amo-suggestion-1",
+ icon_url: "https://example.org/amo-suggestion-1/icon.png",
+ description: "First suggestion",
+ rating: Some(
+ "4.9",
+ ),
+ number_of_ratings: 800,
+ guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ score: 0.25,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "masking e".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ })?);
+
+ expect![[r#"
+ []
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "dark t".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ })?);
+
+ expect![[r#"
+ [
+ Amo {
+ title: "Another AMO suggestion",
+ url: "https://example.org/amo-suggestion-2",
+ icon_url: "https://example.org/amo-suggestion-2/icon.png",
+ description: "Updated second suggestion",
+ rating: Some(
+ "4.7",
+ ),
+ number_of_ratings: 775,
+ guid: "{6d24e3b8-1400-4d37-9440-c798f9b79b1a}",
+ score: 0.25,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "night".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ })?);
+
+ expect![[r#"
+ [
+ Amo {
+ title: "New AMO suggestion",
+ url: "https://example.org/amo-suggestion-4",
+ icon_url: "https://example.org/amo-suggestion-4/icon.png",
+ description: "Fourth suggestion",
+ rating: Some(
+ "5.0",
+ ),
+ number_of_ratings: 100,
+ guid: "{1ea82ebd-a1ba-4f57-b8bb-3824ead837bd}",
+ score: 0.25,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "image search".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ })?);
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests ingesting tombstones for previously-ingested suggestions and
+ /// icons.
+ #[test]
+ fn ingest_tombstones() -> anyhow::Result<()> {
+ before_each();
+
+ // Ingest suggestions and icons from the initial snapshot.
+ let initial_snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-2",
+ "type": "icon",
+ "last_modified": 20,
+ "attachment": {
+ "filename": "icon-2.png",
+ "mimetype": "image/png",
+ "location": "icon-2.png",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
+ "title": "Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?
+ .with_icon("icon-2.png", "i-am-an-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(initial_snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(20));
+ assert_eq!(
+ dao.conn
+ .query_one::<i64>("SELECT count(*) FROM suggestions")?,
+ 1
+ );
+ assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 1);
+
+ Ok(())
+ })?;
+
+ // Replace the records with tombstones. Ingesting these should remove
+ // all their suggestions and icons.
+ *store.settings_client.snapshot.borrow_mut() = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "last_modified": 25,
+ "deleted": true,
+ }, {
+ "id": "icon-2",
+ "last_modified": 30,
+ "deleted": true,
+ }]))?;
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ assert_eq!(
+ dao.conn
+ .query_one::<i64>("SELECT count(*) FROM suggestions")?,
+ 0
+ );
+ assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 0);
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests ingesting suggestions with constraints.
+ #[test]
+ fn ingest_with_constraints() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([]))?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+ assert_eq!(
+ store.settings_client.last_get_records_option("_limit"),
+ None,
+ );
+
+ // 200 suggestions per record, so test with numbers around that
+ // boundary.
+ let table = [
+ (0, "1"),
+ (199, "1"),
+ (200, "1"),
+ (201, "2"),
+ (300, "2"),
+ (400, "2"),
+ (401, "3"),
+ ];
+ for (max_suggestions, expected_limit) in table {
+ store.ingest(SuggestIngestionConstraints {
+ max_suggestions: Some(max_suggestions),
+ })?;
+ let actual_limit = store
+ .settings_client
+ .last_get_records_option("_limit")
+ .ok_or_else(|| {
+ anyhow!("Want limit = {} for {}", expected_limit, max_suggestions)
+ })?;
+ assert_eq!(
+ actual_limit, expected_limit,
+ "Want limit = {} for {}; got limit = {}",
+ expected_limit, max_suggestions, actual_limit
+ );
+ }
+
+ Ok(())
+ }
+
+ /// Tests clearing the store.
+ #[test]
+ fn clear() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque",
+ "url": "https://www.lph-nm.biz",
+ "icon": "2",
+ "impression_url": "https://example.com",
+ "click_url": "https://example.com",
+ "score": 0.3
+ }]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15));
+ assert_eq!(
+ dao.conn
+ .query_one::<i64>("SELECT count(*) FROM suggestions")?,
+ 1
+ );
+ assert_eq!(
+ dao.conn.query_one::<i64>("SELECT count(*) FROM keywords")?,
+ 6
+ );
+
+ Ok(())
+ })?;
+
+ store.clear()?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, None);
+ assert_eq!(
+ dao.conn
+ .query_one::<i64>("SELECT count(*) FROM suggestions")?,
+ 0
+ );
+ assert_eq!(
+ dao.conn.query_one::<i64>("SELECT count(*) FROM keywords")?,
+ 0
+ );
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests querying suggestions.
+ #[test]
+ fn query() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+
+ }, {
+ "id": "data-2",
+ "type": "amo-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-3",
+ "type": "pocket-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-3.json",
+ "mimetype": "application/json",
+ "location": "data-3.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-4",
+ "type": "yelp-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-4.json",
+ "mimetype": "application/json",
+ "location": "data-4.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-5",
+ "type": "mdn-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-5.json",
+ "mimetype": "application/json",
+ "location": "data-5.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-2",
+ "type": "icon",
+ "last_modified": 20,
+ "attachment": {
+ "filename": "icon-2.png",
+ "mimetype": "image/png",
+ "location": "icon-2.png",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-3",
+ "type": "icon",
+ "last_modified": 25,
+ "attachment": {
+ "filename": "icon-3.png",
+ "mimetype": "image/png",
+ "location": "icon-3.png",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-yelp-favicon",
+ "type": "icon",
+ "last_modified": 25,
+ "attachment": {
+ "filename": "yelp-favicon.svg",
+ "mimetype": "image/svg+xml",
+ "location": "yelp-favicon.svg",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
+ "title": "Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }, {
+ "id": 0,
+ "advertiser": "Wikipedia",
+ "iab_category": "5 - Education",
+ "keywords": ["cal", "cali", "california"],
+ "title": "California",
+ "url": "https://wikipedia.org/California",
+ "icon": "3"
+ }, {
+ "id": 0,
+ "advertiser": "Wikipedia",
+ "iab_category": "5 - Education",
+ "keywords": ["cal", "cali", "california", "institute", "technology"],
+ "title": "California Institute of Technology",
+ "url": "https://wikipedia.org/California_Institute_of_Technology",
+ "icon": "3"
+ },{
+ "id": 0,
+ "advertiser": "Wikipedia",
+ "iab_category": "5 - Education",
+ "keywords": ["multimatch"],
+ "title": "Multimatch",
+ "url": "https://wikipedia.org/Multimatch",
+ "icon": "3"
+ }]),
+ )?
+ .with_data(
+ "data-2.json",
+ json!([
+ {
+ "description": "amo suggestion",
+ "url": "https://addons.mozilla.org/en-US/firefox/addon/example",
+ "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ "keywords": ["relay", "spam", "masking email", "alias"],
+ "title": "Firefox Relay",
+ "icon": "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ "rating": "4.9",
+ "number_of_ratings": 888,
+ "score": 0.25
+ },
+ {
+ "description": "amo suggestion multi-match",
+ "url": "https://addons.mozilla.org/en-US/firefox/addon/multimatch",
+ "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ "keywords": ["multimatch"],
+ "title": "Firefox Multimatch",
+ "icon": "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ "rating": "4.9",
+ "number_of_ratings": 888,
+ "score": 0.25
+ },
+ ]),
+ )?
+ .with_data(
+ "data-3.json",
+ json!([
+ {
+ "description": "pocket suggestion",
+ "url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ "lowConfidenceKeywords": ["soft life", "workaholism", "toxic work culture", "work-life balance"],
+ "highConfidenceKeywords": ["burnout women", "grind culture", "women burnout"],
+ "title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ "score": 0.25
+ },
+ {
+ "description": "pocket suggestion multi-match",
+ "url": "https://getpocket.com/collections/multimatch",
+ "lowConfidenceKeywords": [],
+ "highConfidenceKeywords": ["multimatch"],
+ "title": "Multimatching",
+ "score": 0.88
+ },
+ ]),
+ )?
+ .with_data(
+ "data-4.json",
+ json!({
+ "subjects": ["ramen", "spicy ramen", "spicy random ramen", "rats", "raven", "raccoon", "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789", "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789Z"],
+ "preModifiers": ["best", "super best", "same_modifier"],
+ "postModifiers": ["delivery", "super delivery", "same_modifier"],
+ "locationSigns": [
+ { "keyword": "in", "needLocation": true },
+ { "keyword": "near", "needLocation": true },
+ { "keyword": "near by", "needLocation": false },
+ { "keyword": "near me", "needLocation": false },
+ ],
+ "yelpModifiers": ["yelp", "yelp keyword"],
+ "icon": "yelp-favicon",
+ "score": 0.5
+ }),
+ )?
+ .with_data(
+ "data-5.json",
+ json!([
+ {
+ "description": "Javascript Array",
+ "url": "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array",
+ "keywords": ["array javascript", "javascript array", "wildcard"],
+ "title": "Array",
+ "score": 0.24
+ },
+ ]),
+ )?
+ .with_icon("icon-2.png", "i-am-an-icon".as_bytes().into())
+ .with_icon("icon-3.png", "also-an-icon".as_bytes().into())
+ .with_icon("yelp-favicon.svg", "yelp-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ let table = [
+ (
+ "empty keyword; all providers",
+ SuggestionQuery {
+ keyword: String::new(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ SuggestionProvider::Yelp,
+ SuggestionProvider::Weather,
+ ],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `la`; all providers",
+ SuggestionQuery {
+ keyword: "la".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ SuggestionProvider::Yelp,
+ SuggestionProvider::Weather,
+ ],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amp {
+ title: "Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: Some(
+ [
+ 105,
+ 45,
+ 97,
+ 109,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "multimatch; all providers",
+ SuggestionQuery {
+ keyword: "multimatch".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ ],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "Multimatching",
+ url: "https://getpocket.com/collections/multimatch",
+ score: 0.88,
+ is_top_pick: true,
+ },
+ Amo {
+ title: "Firefox Multimatch",
+ url: "https://addons.mozilla.org/en-US/firefox/addon/multimatch",
+ icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ description: "amo suggestion multi-match",
+ rating: Some(
+ "4.9",
+ ),
+ number_of_ratings: 888,
+ guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ score: 0.25,
+ },
+ Wikipedia {
+ title: "Multimatch",
+ url: "https://wikipedia.org/Multimatch",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "multimatch",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "MultiMatch; all providers, mixed case",
+ SuggestionQuery {
+ keyword: "MultiMatch".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ ],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "Multimatching",
+ url: "https://getpocket.com/collections/multimatch",
+ score: 0.88,
+ is_top_pick: true,
+ },
+ Amo {
+ title: "Firefox Multimatch",
+ url: "https://addons.mozilla.org/en-US/firefox/addon/multimatch",
+ icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ description: "amo suggestion multi-match",
+ rating: Some(
+ "4.9",
+ ),
+ number_of_ratings: 888,
+ guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ score: 0.25,
+ },
+ Wikipedia {
+ title: "Multimatch",
+ url: "https://wikipedia.org/Multimatch",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "multimatch",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "multimatch; all providers, limit 2",
+ SuggestionQuery {
+ keyword: "multimatch".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ ],
+ limit: Some(2),
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "Multimatching",
+ url: "https://getpocket.com/collections/multimatch",
+ score: 0.88,
+ is_top_pick: true,
+ },
+ Amo {
+ title: "Firefox Multimatch",
+ url: "https://addons.mozilla.org/en-US/firefox/addon/multimatch",
+ icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ description: "amo suggestion multi-match",
+ rating: Some(
+ "4.9",
+ ),
+ number_of_ratings: 888,
+ guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ score: 0.25,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `la`; AMP only",
+ SuggestionQuery {
+ keyword: "la".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amp {
+ title: "Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: Some(
+ [
+ 105,
+ 45,
+ 97,
+ 109,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `la`; Wikipedia, AMO, and Pocket",
+ SuggestionQuery {
+ keyword: "la".into(),
+ providers: vec![
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ ],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `la`; no providers",
+ SuggestionQuery {
+ keyword: "la".into(),
+ providers: vec![],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `cal`; AMP, AMO, and Pocket",
+ SuggestionQuery {
+ keyword: "cal".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ ],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `cal`; Wikipedia only",
+ SuggestionQuery {
+ keyword: "cal".into(),
+ providers: vec![SuggestionProvider::Wikipedia],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Wikipedia {
+ title: "California",
+ url: "https://wikipedia.org/California",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "california",
+ },
+ Wikipedia {
+ title: "California Institute of Technology",
+ url: "https://wikipedia.org/California_Institute_of_Technology",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "california",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `cal`; Wikipedia with limit 1",
+ SuggestionQuery {
+ keyword: "cal".into(),
+ providers: vec![SuggestionProvider::Wikipedia],
+ limit: Some(1),
+ },
+ expect![[r#"
+ [
+ Wikipedia {
+ title: "California",
+ url: "https://wikipedia.org/California",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "california",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `cal`; no providers",
+ SuggestionQuery {
+ keyword: "cal".into(),
+ providers: vec![],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `spam`; AMO only",
+ SuggestionQuery {
+ keyword: "spam".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amo {
+ title: "Firefox Relay",
+ url: "https://addons.mozilla.org/en-US/firefox/addon/example",
+ icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ description: "amo suggestion",
+ rating: Some(
+ "4.9",
+ ),
+ number_of_ratings: 888,
+ guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ score: 0.25,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `masking`; AMO only",
+ SuggestionQuery {
+ keyword: "masking".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amo {
+ title: "Firefox Relay",
+ url: "https://addons.mozilla.org/en-US/firefox/addon/example",
+ icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ description: "amo suggestion",
+ rating: Some(
+ "4.9",
+ ),
+ number_of_ratings: 888,
+ guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ score: 0.25,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `masking e`; AMO only",
+ SuggestionQuery {
+ keyword: "masking e".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amo {
+ title: "Firefox Relay",
+ url: "https://addons.mozilla.org/en-US/firefox/addon/example",
+ icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ description: "amo suggestion",
+ rating: Some(
+ "4.9",
+ ),
+ number_of_ratings: 888,
+ guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ score: 0.25,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `masking s`; AMO only",
+ SuggestionQuery {
+ keyword: "masking s".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `soft`; AMP and Wikipedia",
+ SuggestionQuery {
+ keyword: "soft".into(),
+ providers: vec![SuggestionProvider::Amp, SuggestionProvider::Wikipedia],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `soft`; Pocket only",
+ SuggestionQuery {
+ keyword: "soft".into(),
+ providers: vec![SuggestionProvider::Pocket],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ score: 0.25,
+ is_top_pick: false,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `soft l`; Pocket only",
+ SuggestionQuery {
+ keyword: "soft l".into(),
+ providers: vec![SuggestionProvider::Pocket],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ score: 0.25,
+ is_top_pick: false,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `sof`; Pocket only",
+ SuggestionQuery {
+ keyword: "sof".into(),
+ providers: vec![SuggestionProvider::Pocket],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `burnout women`; Pocket only",
+ SuggestionQuery {
+ keyword: "burnout women".into(),
+ providers: vec![SuggestionProvider::Pocket],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ score: 0.25,
+ is_top_pick: true,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `burnout person`; Pocket only",
+ SuggestionQuery {
+ keyword: "burnout person".into(),
+ providers: vec![SuggestionProvider::Pocket],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `best spicy ramen delivery in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "best spicy ramen delivery in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=best+spicy+ramen+delivery&find_loc=tokyo",
+ title: "best spicy ramen delivery in tokyo",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `BeSt SpIcY rAmEn DeLiVeRy In ToKyO`; Yelp only",
+ SuggestionQuery {
+ keyword: "BeSt SpIcY rAmEn DeLiVeRy In ToKyO".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=BeSt+SpIcY+rAmEn+DeLiVeRy&find_loc=ToKyO",
+ title: "BeSt SpIcY rAmEn DeLiVeRy In ToKyO",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `best ramen delivery in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "best ramen delivery in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=best+ramen+delivery&find_loc=tokyo",
+ title: "best ramen delivery in tokyo",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `best invalid_ramen delivery in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "best invalid_ramen delivery in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `best delivery in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "best delivery in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `super best ramen delivery in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "super best ramen delivery in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=super+best+ramen+delivery&find_loc=tokyo",
+ title: "super best ramen delivery in tokyo",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `invalid_best ramen delivery in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "invalid_best ramen delivery in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `ramen delivery in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen delivery in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen+delivery&find_loc=tokyo",
+ title: "ramen delivery in tokyo",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen super delivery in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen super delivery in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen+super+delivery&find_loc=tokyo",
+ title: "ramen super delivery in tokyo",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen invalid_delivery in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen invalid_delivery in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `ramen in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen&find_loc=tokyo",
+ title: "ramen in tokyo",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen near tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen near tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen&find_loc=tokyo",
+ title: "ramen near tokyo",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen invalid_in tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen invalid_in tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `ramen in San Francisco`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen in San Francisco".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen&find_loc=San+Francisco",
+ title: "ramen in San Francisco",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen in`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen in".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen",
+ title: "ramen in",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen near by`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen near by".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen+near+by",
+ title: "ramen near by",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen near me`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen near me".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen+near+me",
+ title: "ramen near me",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen near by tokyo`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen near by tokyo".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `ramen`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen",
+ title: "ramen",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = maximum chars; Yelp only",
+ SuggestionQuery {
+ keyword: "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
+ title: "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = over chars; Yelp only",
+ SuggestionQuery {
+ keyword: "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789Z".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `best delivery`; Yelp only",
+ SuggestionQuery {
+ keyword: "best delivery".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `same_modifier same_modifier`; Yelp only",
+ SuggestionQuery {
+ keyword: "same_modifier same_modifier".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `same_modifier `; Yelp only",
+ SuggestionQuery {
+ keyword: "same_modifier ".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `yelp ramen`; Yelp only",
+ SuggestionQuery {
+ keyword: "yelp ramen".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen",
+ title: "ramen",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `yelp keyword ramen`; Yelp only",
+ SuggestionQuery {
+ keyword: "yelp keyword ramen".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen",
+ title: "ramen",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen in tokyo yelp`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen in tokyo yelp".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen&find_loc=tokyo",
+ title: "ramen in tokyo",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ramen in tokyo yelp keyword`; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen in tokyo yelp keyword".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen&find_loc=tokyo",
+ title: "ramen in tokyo",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: true,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `yelp ramen yelp`; Yelp only",
+ SuggestionQuery {
+ keyword: "yelp ramen yelp".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen",
+ title: "ramen",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `best yelp ramen`; Yelp only",
+ SuggestionQuery {
+ keyword: "best yelp ramen".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `Spicy R`; Yelp only",
+ SuggestionQuery {
+ keyword: "Spicy R".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=Spicy+Ramen",
+ title: "Spicy Ramen",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: false,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `BeSt Ramen`; Yelp only",
+ SuggestionQuery {
+ keyword: "BeSt Ramen".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=BeSt+Ramen",
+ title: "BeSt Ramen",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `BeSt Spicy R`; Yelp only",
+ SuggestionQuery {
+ keyword: "BeSt Spicy R".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=BeSt+Spicy+Ramen",
+ title: "BeSt Spicy Ramen",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: false,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `BeSt R`; Yelp only",
+ SuggestionQuery {
+ keyword: "BeSt R".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `r`; Yelp only",
+ SuggestionQuery {
+ keyword: "r".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `ra`; Yelp only",
+ SuggestionQuery {
+ keyword: "ra".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=rats",
+ title: "rats",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: false,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `ram`; Yelp only",
+ SuggestionQuery {
+ keyword: "ram".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen",
+ title: "ramen",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: false,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `rac`; Yelp only",
+ SuggestionQuery {
+ keyword: "rac".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=raccoon",
+ title: "raccoon",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: false,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `best r`; Yelp only",
+ SuggestionQuery {
+ keyword: "best r".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = `best ra`; Yelp only",
+ SuggestionQuery {
+ keyword: "best ra".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=best+rats",
+ title: "best rats",
+ icon: Some(
+ [
+ 121,
+ 101,
+ 108,
+ 112,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: false,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ ),
+ ];
+ for (what, query, expect) in table {
+ expect.assert_debug_eq(
+ &store
+ .query(query)
+ .with_context(|| format!("Couldn't query store for {}", what))?,
+ );
+ }
+
+ Ok(())
+ }
+
+ // Tests querying amp wikipedia
+ #[test]
+ fn query_with_multiple_providers_and_diff_scores() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-2",
+ "type": "pocket-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-3",
+ "type": "icon",
+ "last_modified": 25,
+ "attachment": {
+ "filename": "icon-3.png",
+ "mimetype": "image/png",
+ "location": "icon-3.png",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow", "amp wiki match"],
+ "title": "Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }, {
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["pe", "pen", "penne", "penne for your thoughts", "amp wiki match"],
+ "title": "Penne for Your Thoughts",
+ "url": "https://penne.biz",
+ "icon": "2",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.1
+ }, {
+ "id": 0,
+ "advertiser": "Wikipedia",
+ "iab_category": "5 - Education",
+ "keywords": ["amp wiki match", "pocket wiki match"],
+ "title": "Multimatch",
+ "url": "https://wikipedia.org/Multimatch",
+ "icon": "3"
+ }]),
+ )?
+ .with_data(
+ "data-2.json",
+ json!([
+ {
+ "description": "pocket suggestion",
+ "url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ "lowConfidenceKeywords": ["soft life", "workaholism", "toxic work culture", "work-life balance", "pocket wiki match"],
+ "highConfidenceKeywords": ["burnout women", "grind culture", "women burnout"],
+ "title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ "score": 0.05
+ },
+ {
+ "description": "pocket suggestion multi-match",
+ "url": "https://getpocket.com/collections/multimatch",
+ "lowConfidenceKeywords": [],
+ "highConfidenceKeywords": ["pocket wiki match"],
+ "title": "Pocket wiki match",
+ "score": 0.88
+ },
+ ]),
+ )?
+ .with_icon("icon-3.png", "also-an-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ let table = [
+ (
+ "keyword = `amp wiki match`; all providers",
+ SuggestionQuery {
+ keyword: "amp wiki match".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ SuggestionProvider::Yelp,
+ ],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amp {
+ title: "Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: None,
+ full_keyword: "amp wiki match",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ Wikipedia {
+ title: "Multimatch",
+ url: "https://wikipedia.org/Multimatch",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "amp wiki match",
+ },
+ Amp {
+ title: "Penne for Your Thoughts",
+ url: "https://penne.biz",
+ raw_url: "https://penne.biz",
+ icon: None,
+ full_keyword: "amp wiki match",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.1,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `amp wiki match`; all providers, limit 2",
+ SuggestionQuery {
+ keyword: "amp wiki match".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ SuggestionProvider::Yelp,
+ ],
+ limit: Some(2),
+ },
+ expect![[r#"
+ [
+ Amp {
+ title: "Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: None,
+ full_keyword: "amp wiki match",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ Wikipedia {
+ title: "Multimatch",
+ url: "https://wikipedia.org/Multimatch",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "amp wiki match",
+ },
+ ]
+ "#]],
+ ),
+ (
+ "pocket wiki match; all providers",
+ SuggestionQuery {
+ keyword: "pocket wiki match".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ ],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "Pocket wiki match",
+ url: "https://getpocket.com/collections/multimatch",
+ score: 0.88,
+ is_top_pick: true,
+ },
+ Wikipedia {
+ title: "Multimatch",
+ url: "https://wikipedia.org/Multimatch",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "pocket wiki match",
+ },
+ Pocket {
+ title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ score: 0.05,
+ is_top_pick: false,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "pocket wiki match; all providers limit 1",
+ SuggestionQuery {
+ keyword: "pocket wiki match".into(),
+ providers: vec![
+ SuggestionProvider::Amp,
+ SuggestionProvider::Wikipedia,
+ SuggestionProvider::Amo,
+ SuggestionProvider::Pocket,
+ ],
+ limit: Some(1),
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "Pocket wiki match",
+ url: "https://getpocket.com/collections/multimatch",
+ score: 0.88,
+ is_top_pick: true,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "work-life balance; duplicate providers",
+ SuggestionQuery {
+ keyword: "work-life balance".into(),
+ providers: vec![SuggestionProvider::Pocket, SuggestionProvider::Pocket],
+ limit: Some(-1),
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ score: 0.05,
+ is_top_pick: false,
+ },
+ ]
+ "#]],
+ ),
+ ];
+ for (what, query, expect) in table {
+ expect.assert_debug_eq(
+ &store
+ .query(query)
+ .with_context(|| format!("Couldn't query store for {}", what))?,
+ );
+ }
+
+ Ok(())
+ }
+
+ // Tests querying multiple suggestions with multiple keywords with same prefix keyword
+ #[test]
+ fn query_with_multiple_suggestions_with_same_prefix() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "amo-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-2",
+ "type": "pocket-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-3",
+ "type": "icon",
+ "last_modified": 25,
+ "attachment": {
+ "filename": "icon-3.png",
+ "mimetype": "image/png",
+ "location": "icon-3.png",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([
+ {
+ "description": "amo suggestion",
+ "url": "https://addons.mozilla.org/en-US/firefox/addon/example",
+ "guid": "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ "keywords": ["relay", "spam", "masking email", "masking emails", "masking accounts", "alias" ],
+ "title": "Firefox Relay",
+ "icon": "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ "rating": "4.9",
+ "number_of_ratings": 888,
+ "score": 0.25
+ }
+ ]),
+ )?
+ .with_data(
+ "data-2.json",
+ json!([
+ {
+ "description": "pocket suggestion",
+ "url": "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ "lowConfidenceKeywords": ["soft life", "soft living", "soft work", "workaholism", "toxic work culture"],
+ "highConfidenceKeywords": ["burnout women", "grind culture", "women burnout", "soft lives"],
+ "title": "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ "score": 0.05
+ }
+ ]),
+ )?
+ .with_icon("icon-3.png", "also-an-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ let table = [
+ (
+ "keyword = `soft li`; pocket",
+ SuggestionQuery {
+ keyword: "soft li".into(),
+ providers: vec![SuggestionProvider::Pocket],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ score: 0.05,
+ is_top_pick: false,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `soft lives`; pocket",
+ SuggestionQuery {
+ keyword: "soft lives".into(),
+ providers: vec![SuggestionProvider::Pocket],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Pocket {
+ title: "‘It’s Not Just Burnout:’ How Grind Culture Fails Women",
+ url: "https://getpocket.com/collections/its-not-just-burnout-how-grind-culture-failed-women",
+ score: 0.05,
+ is_top_pick: true,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `masking `; amo provider",
+ SuggestionQuery {
+ keyword: "masking ".into(),
+ providers: vec![SuggestionProvider::Amo],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amo {
+ title: "Firefox Relay",
+ url: "https://addons.mozilla.org/en-US/firefox/addon/example",
+ icon_url: "https://addons.mozilla.org/user-media/addon_icons/2633/2633704-64.png?modified=2c11a80b",
+ description: "amo suggestion",
+ rating: Some(
+ "4.9",
+ ),
+ number_of_ratings: 888,
+ guid: "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}",
+ score: 0.25,
+ },
+ ]
+ "#]],
+ ),
+ ];
+ for (what, query, expect) in table {
+ expect.assert_debug_eq(
+ &store
+ .query(query)
+ .with_context(|| format!("Couldn't query store for {}", what))?,
+ );
+ }
+
+ Ok(())
+ }
+
+ // Tests querying multiple suggestions with multiple keywords with same prefix keyword
+ #[test]
+ fn query_with_amp_mobile_provider() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "amp-mobile-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "data-2",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ }, {
+ "id": "icon-3",
+ "type": "icon",
+ "last_modified": 25,
+ "attachment": {
+ "filename": "icon-3.png",
+ "mimetype": "image/png",
+ "location": "icon-3.png",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([
+ {
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
+ "title": "Mobile - Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "3",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }
+ ]),
+ )?
+ .with_data(
+ "data-2.json",
+ json!([
+ {
+ "id": 0,
+ "advertiser": "Good Place Eats",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["la", "las", "lasa", "lasagna", "lasagna come out tomorrow"],
+ "title": "Desktop - Lasagna Come Out Tomorrow",
+ "url": "https://www.lasagna.restaurant",
+ "icon": "3",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.2
+ }
+ ]),
+ )?
+ .with_icon("icon-3.png", "also-an-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ let table = [
+ (
+ "keyword = `las`; Amp Mobile",
+ SuggestionQuery {
+ keyword: "las".into(),
+ providers: vec![SuggestionProvider::AmpMobile],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amp {
+ title: "Mobile - Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `las`; Amp",
+ SuggestionQuery {
+ keyword: "las".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amp {
+ title: "Desktop - Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.2,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `las `; amp and amp mobile",
+ SuggestionQuery {
+ keyword: "las".into(),
+ providers: vec![SuggestionProvider::Amp, SuggestionProvider::AmpMobile],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Amp {
+ title: "Mobile - Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ Amp {
+ title: "Desktop - Lasagna Come Out Tomorrow",
+ url: "https://www.lasagna.restaurant",
+ raw_url: "https://www.lasagna.restaurant",
+ icon: Some(
+ [
+ 97,
+ 108,
+ 115,
+ 111,
+ 45,
+ 97,
+ 110,
+ 45,
+ 105,
+ 99,
+ 111,
+ 110,
+ ],
+ ),
+ full_keyword: "lasagna",
+ block_id: 0,
+ advertiser: "Good Place Eats",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.2,
+ },
+ ]
+ "#]],
+ ),
+ ];
+ for (what, query, expect) in table {
+ expect.assert_debug_eq(
+ &store
+ .query(query)
+ .with_context(|| format!("Couldn't query store for {}", what))?,
+ );
+ }
+
+ Ok(())
+ }
+
+ /// Tests ingesting malformed Remote Settings records that we understand,
+ /// but that are missing fields, or aren't in the format we expect.
+ #[test]
+ fn ingest_malformed() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ // Data record without an attachment.
+ "id": "missing-data-attachment",
+ "type": "data",
+ "last_modified": 15,
+ }, {
+ // Icon record without an attachment.
+ "id": "missing-icon-attachment",
+ "type": "icon",
+ "last_modified": 30,
+ }, {
+ // Icon record with an ID that's not `icon-{id}`, so suggestions in
+ // the data attachment won't be able to reference it.
+ "id": "bad-icon-id",
+ "type": "icon",
+ "last_modified": 45,
+ "attachment": {
+ "filename": "icon-1.png",
+ "mimetype": "image/png",
+ "location": "icon-1.png",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_icon("icon-1.png", "i-am-an-icon".as_bytes().into());
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(45));
+ assert_eq!(
+ dao.conn
+ .query_one::<i64>("SELECT count(*) FROM suggestions")?,
+ 0
+ );
+ assert_eq!(dao.conn.query_one::<i64>("SELECT count(*) FROM icons")?, 0);
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests unparsable Remote Settings records, which we don't know how to
+ /// ingest at all.
+ #[test]
+ fn ingest_unparsable() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "fancy-new-suggestions-1",
+ "type": "fancy-new-suggestions",
+ "last_modified": 15,
+ }, {
+ "id": "clippy-2",
+ "type": "clippy",
+ "last_modified": 30,
+ }]))?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ expect![[r#"
+ Some(
+ UnparsableRecords(
+ {
+ "clippy-2": UnparsableRecord {
+ schema_version: 14,
+ },
+ "fancy-new-suggestions-1": UnparsableRecord {
+ schema_version: 14,
+ },
+ },
+ ),
+ )
+ "#]]
+ .assert_debug_eq(&dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?);
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn ingest_mixed_parsable_unparsable_records() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "fancy-new-suggestions-1",
+ "type": "fancy-new-suggestions",
+ "last_modified": 15,
+ },
+ {
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ },
+ {
+ "id": "clippy-2",
+ "type": "clippy",
+ "last_modified": 30,
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3,
+ }]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ expect![[r#"
+ Some(
+ UnparsableRecords(
+ {
+ "clippy-2": UnparsableRecord {
+ schema_version: 14,
+ },
+ "fancy-new-suggestions-1": UnparsableRecord {
+ schema_version: 14,
+ },
+ },
+ ),
+ )
+ "#]]
+ .assert_debug_eq(&dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?);
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests meta update field isn't updated for old unparsable Remote Settings
+ /// records.
+ #[test]
+ fn ingest_unparsable_and_meta_update_stays_the_same() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "fancy-new-suggestions-1",
+ "type": "fancy-new-suggestions",
+ "last_modified": 15,
+ }]))?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.dbs()?.writer.write(|dao| {
+ dao.put_meta(LAST_INGEST_META_KEY, 30)?;
+ Ok(())
+ })?;
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(30));
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn remove_known_records_out_of_meta_table() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "fancy-new-suggestions-1",
+ "type": "fancy-new-suggestions",
+ "last_modified": 15,
+ },
+ {
+ "id": "data-1",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ },
+ {
+ "id": "clippy-2",
+ "type": "clippy",
+ "last_modified": 15,
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ let mut initial_data = UnparsableRecords::default();
+ initial_data
+ .0
+ .insert("data-1".to_string(), UnparsableRecord { schema_version: 1 });
+ initial_data.0.insert(
+ "clippy-2".to_string(),
+ UnparsableRecord { schema_version: 1 },
+ );
+ store.dbs()?.writer.write(|dao| {
+ dao.put_meta(UNPARSABLE_RECORDS_META_KEY, initial_data)?;
+ Ok(())
+ })?;
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ store.dbs()?.reader.read(|dao| {
+ expect![[r#"
+ Some(
+ UnparsableRecords(
+ {
+ "clippy-2": UnparsableRecord {
+ schema_version: 14,
+ },
+ "fancy-new-suggestions-1": UnparsableRecord {
+ schema_version: 14,
+ },
+ },
+ ),
+ )
+ "#]]
+ .assert_debug_eq(&dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?);
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ /// Tests that records with invalid attachments are ignored and marked as unparsable.
+ #[test]
+ fn skip_over_invalid_records() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([
+ {
+ "id": "invalid-attachment",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-2.json",
+ "mimetype": "application/json",
+ "location": "data-2.json",
+ "hash": "",
+ "size": 0,
+ },
+ },
+ {
+ "id": "valid-record",
+ "type": "data",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ },
+ ]))?
+ .with_data(
+ "data-1.json",
+ json!([{
+ "id": 0,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "keywords": ["lo", "los", "los p", "los pollos", "los pollos h", "los pollos hermanos"],
+ "title": "Los Pollos Hermanos - Albuquerque",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?
+ // This attachment is missing the `keywords` field and is invalid
+ .with_data(
+ "data-2.json",
+ json!([{
+ "id": 1,
+ "advertiser": "Los Pollos Hermanos",
+ "iab_category": "8 - Food & Drink",
+ "title": "Los Pollos Hermanos - Albuquerque",
+ "url": "https://www.lph-nm.biz",
+ "icon": "5678",
+ "impression_url": "https://example.com/impression_url",
+ "click_url": "https://example.com/click_url",
+ "score": 0.3
+ }]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ // Test that the invalid record marked as unparsable
+ store.dbs()?.reader.read(|dao| {
+ expect![[r#"
+ Some(
+ UnparsableRecords(
+ {
+ "invalid-attachment": UnparsableRecord {
+ schema_version: 14,
+ },
+ },
+ ),
+ )
+ "#]]
+ .assert_debug_eq(&dao.get_meta::<UnparsableRecords>(UNPARSABLE_RECORDS_META_KEY)?);
+ Ok(())
+ })?;
+
+ // Test that the valid record was read
+ store.dbs()?.reader.read(|dao| {
+ assert_eq!(dao.get_meta::<u64>(LAST_INGEST_META_KEY)?, Some(15));
+ expect![[r#"
+ [
+ Amp {
+ title: "Los Pollos Hermanos - Albuquerque",
+ url: "https://www.lph-nm.biz",
+ raw_url: "https://www.lph-nm.biz",
+ icon: None,
+ full_keyword: "los",
+ block_id: 0,
+ advertiser: "Los Pollos Hermanos",
+ iab_category: "8 - Food & Drink",
+ impression_url: "https://example.com/impression_url",
+ click_url: "https://example.com/click_url",
+ raw_click_url: "https://example.com/click_url",
+ score: 0.3,
+ },
+ ]
+ "#]]
+ .assert_debug_eq(&dao.fetch_suggestions(&SuggestionQuery {
+ keyword: "lo".into(),
+ providers: vec![SuggestionProvider::Amp],
+ limit: None,
+ })?);
+
+ Ok(())
+ })?;
+
+ Ok(())
+ }
+
+ #[test]
+ fn unparsable_record_serialized_correctly() -> anyhow::Result<()> {
+ let unparseable_record = UnparsableRecord { schema_version: 1 };
+ assert_eq!(serde_json::to_value(unparseable_record)?, json!({ "v": 1 }),);
+ Ok(())
+ }
+
+ #[test]
+ fn query_mdn() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "mdn-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([
+ {
+ "description": "Javascript Array",
+ "url": "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array",
+ "keywords": ["array javascript", "javascript array", "wildcard"],
+ "title": "Array",
+ "score": 0.24
+ },
+ ]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ let table = [
+ (
+ "keyword = prefix; MDN only",
+ SuggestionQuery {
+ keyword: "array".into(),
+ providers: vec![SuggestionProvider::Mdn],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Mdn {
+ title: "Array",
+ url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array",
+ description: "Javascript Array",
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = prefix + partial suffix; MDN only",
+ SuggestionQuery {
+ keyword: "array java".into(),
+ providers: vec![SuggestionProvider::Mdn],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Mdn {
+ title: "Array",
+ url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array",
+ description: "Javascript Array",
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = prefix + entire suffix; MDN only",
+ SuggestionQuery {
+ keyword: "javascript array".into(),
+ providers: vec![SuggestionProvider::Mdn],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Mdn {
+ title: "Array",
+ url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array",
+ description: "Javascript Array",
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = `partial prefix word`; MDN only",
+ SuggestionQuery {
+ keyword: "wild".into(),
+ providers: vec![SuggestionProvider::Mdn],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = single word; MDN only",
+ SuggestionQuery {
+ keyword: "wildcard".into(),
+ providers: vec![SuggestionProvider::Mdn],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Mdn {
+ title: "Array",
+ url: "https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array",
+ description: "Javascript Array",
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ ];
+
+ for (what, query, expect) in table {
+ expect.assert_debug_eq(
+ &store
+ .query(query)
+ .with_context(|| format!("Couldn't query store for {}", what))?,
+ );
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn query_no_yelp_icon_data() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "yelp-suggestions",
+ "last_modified": 15,
+ "attachment": {
+ "filename": "data-1.json",
+ "mimetype": "application/json",
+ "location": "data-1.json",
+ "hash": "",
+ "size": 0,
+ },
+ }]))?
+ .with_data(
+ "data-1.json",
+ json!([
+ {
+ "subjects": ["ramen"],
+ "preModifiers": [],
+ "postModifiers": [],
+ "locationSigns": [],
+ "yelpModifiers": [],
+ "icon": "yelp-favicon",
+ "score": 0.5
+ },
+ ]),
+ )?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ let table = [(
+ "keyword = ramen; Yelp only",
+ SuggestionQuery {
+ keyword: "ramen".into(),
+ providers: vec![SuggestionProvider::Yelp],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Yelp {
+ url: "https://www.yelp.com/search?find_desc=ramen",
+ title: "ramen",
+ icon: None,
+ score: 0.5,
+ has_location_sign: false,
+ subject_exact_match: true,
+ location_param: "find_loc",
+ },
+ ]
+ "#]],
+ )];
+
+ for (what, query, expect) in table {
+ expect.assert_debug_eq(
+ &store
+ .query(query)
+ .with_context(|| format!("Couldn't query store for {}", what))?,
+ );
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn weather() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "weather",
+ "last_modified": 15,
+ "weather": {
+ "min_keyword_length": 3,
+ "keywords": ["ab", "xyz", "weather"],
+ "score": "0.24"
+ }
+ }]))?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ let table = [
+ (
+ "keyword = 'ab'; Weather only, no match since query is too short",
+ SuggestionQuery {
+ keyword: "ab".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = 'xab'; Weather only, no matching keyword",
+ SuggestionQuery {
+ keyword: "xab".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = 'abx'; Weather only, no matching keyword",
+ SuggestionQuery {
+ keyword: "abx".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = 'xy'; Weather only, no match since query is too short",
+ SuggestionQuery {
+ keyword: "xy".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = 'xyz'; Weather only, match",
+ SuggestionQuery {
+ keyword: "xyz".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Weather {
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = 'xxyz'; Weather only, no matching keyword",
+ SuggestionQuery {
+ keyword: "xxyz".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = 'xyzx'; Weather only, no matching keyword",
+ SuggestionQuery {
+ keyword: "xyzx".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = 'we'; Weather only, no match since query is too short",
+ SuggestionQuery {
+ keyword: "we".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = 'wea'; Weather only, match",
+ SuggestionQuery {
+ keyword: "wea".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Weather {
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = 'weat'; Weather only, match",
+ SuggestionQuery {
+ keyword: "weat".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Weather {
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = 'weath'; Weather only, match",
+ SuggestionQuery {
+ keyword: "weath".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Weather {
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = 'weathe'; Weather only, match",
+ SuggestionQuery {
+ keyword: "weathe".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Weather {
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = 'weather'; Weather only, match",
+ SuggestionQuery {
+ keyword: "weather".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Weather {
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = 'weatherx'; Weather only, no matching keyword",
+ SuggestionQuery {
+ keyword: "weatherx".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = 'xweather'; Weather only, no matching keyword",
+ SuggestionQuery {
+ keyword: "xweather".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = 'xwea'; Weather only, no matching keyword",
+ SuggestionQuery {
+ keyword: "xwea".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = ' weather '; Weather only, match",
+ SuggestionQuery {
+ keyword: " weather ".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ [
+ Weather {
+ score: 0.24,
+ },
+ ]
+ "#]],
+ ),
+ (
+ "keyword = 'x weather '; Weather only, no matching keyword",
+ SuggestionQuery {
+ keyword: "x weather ".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ (
+ "keyword = ' weather x'; Weather only, no matching keyword",
+ SuggestionQuery {
+ keyword: " weather x".into(),
+ providers: vec![SuggestionProvider::Weather],
+ limit: None,
+ },
+ expect![[r#"
+ []
+ "#]],
+ ),
+ ];
+
+ for (what, query, expect) in table {
+ expect.assert_debug_eq(
+ &store
+ .query(query)
+ .with_context(|| format!("Couldn't query store for {}", what))?,
+ );
+ }
+
+ expect![[r#"
+ Some(
+ Weather {
+ min_keyword_length: 3,
+ },
+ )
+ "#]]
+ .assert_debug_eq(
+ &store
+ .fetch_provider_config(SuggestionProvider::Weather)
+ .with_context(|| "Couldn't fetch provider config")?,
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn fetch_global_config() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "configuration",
+ "last_modified": 15,
+ "configuration": {
+ "show_less_frequently_cap": 3,
+ }
+ }]))?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ expect![[r#"
+ SuggestGlobalConfig {
+ show_less_frequently_cap: 3,
+ }
+ "#]]
+ .assert_debug_eq(
+ &store
+ .fetch_global_config()
+ .with_context(|| "fetch_global_config failed")?,
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn fetch_global_config_default() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([]))?;
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ expect![[r#"
+ SuggestGlobalConfig {
+ show_less_frequently_cap: 0,
+ }
+ "#]]
+ .assert_debug_eq(
+ &store
+ .fetch_global_config()
+ .with_context(|| "fetch_global_config failed")?,
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn fetch_provider_config_none() -> anyhow::Result<()> {
+ before_each();
+
+ let snapshot = Snapshot::with_records(json!([]))?;
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ expect![[r#"
+ None
+ "#]]
+ .assert_debug_eq(
+ &store
+ .fetch_provider_config(SuggestionProvider::Amp)
+ .with_context(|| "fetch_provider_config failed for Amp")?,
+ );
+
+ expect![[r#"
+ None
+ "#]]
+ .assert_debug_eq(
+ &store
+ .fetch_provider_config(SuggestionProvider::Weather)
+ .with_context(|| "fetch_provider_config failed for Weather")?,
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn fetch_provider_config_other() -> anyhow::Result<()> {
+ before_each();
+
+ // Add some weather config.
+ let snapshot = Snapshot::with_records(json!([{
+ "id": "data-1",
+ "type": "weather",
+ "last_modified": 15,
+ "weather": {
+ "min_keyword_length": 3,
+ "keywords": ["weather"],
+ "score": "0.24"
+ }
+ }]))?;
+
+ let store = unique_test_store(SnapshotSettingsClient::with_snapshot(snapshot));
+ store.ingest(SuggestIngestionConstraints::default())?;
+
+ // Getting the config for a different provider should return None.
+ expect![[r#"
+ None
+ "#]]
+ .assert_debug_eq(
+ &store
+ .fetch_provider_config(SuggestionProvider::Amp)
+ .with_context(|| "fetch_provider_config failed for Amp")?,
+ );
+
+ Ok(())
+ }
+}
diff --git a/third_party/rust/suggest/src/suggest.udl b/third_party/rust/suggest/src/suggest.udl
new file mode 100644
index 0000000000..1cd8911a48
--- /dev/null
+++ b/third_party/rust/suggest/src/suggest.udl
@@ -0,0 +1,151 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+[External="remote_settings"]
+typedef extern RemoteSettingsConfig;
+
+namespace suggest {
+
+boolean raw_suggestion_url_matches([ByRef] string raw_url, [ByRef] string url);
+
+};
+
+[Error]
+interface SuggestApiError {
+ // An operation was interrupted by calling `SuggestStore.interrupt()`
+ Interrupted();
+ // The server requested a backoff after too many requests
+ Backoff(u64 seconds);
+ Network(string reason);
+ Other(string reason);
+};
+
+enum SuggestionProvider {
+ "Amp",
+ "Pocket",
+ "Wikipedia",
+ "Amo",
+ "Yelp",
+ "Mdn",
+ "Weather",
+ "AmpMobile",
+};
+
+[Enum]
+interface Suggestion {
+ Amp(
+ string title,
+ string url,
+ string raw_url,
+ sequence<u8>? icon,
+ string full_keyword,
+ i64 block_id,
+ string advertiser,
+ string iab_category,
+ string impression_url,
+ string click_url,
+ string raw_click_url,
+ f64 score
+ );
+ Pocket(
+ string title,
+ string url,
+ f64 score,
+ boolean is_top_pick
+ );
+ Wikipedia(
+ string title,
+ string url,
+ sequence<u8>? icon,
+ string full_keyword
+ );
+ Amo(
+ string title,
+ string url,
+ string icon_url,
+ string description,
+ string? rating,
+ i64 number_of_ratings,
+ string guid,
+ f64 score
+ );
+ Yelp(
+ string url,
+ string title,
+ sequence<u8>? icon,
+ f64 score,
+ boolean has_location_sign,
+ boolean subject_exact_match,
+ string location_param
+ );
+ Mdn(
+ string title,
+ string url,
+ string description,
+ f64 score
+ );
+ Weather(
+ f64 score
+ );
+};
+
+dictionary SuggestionQuery {
+ string keyword;
+ sequence<SuggestionProvider> providers;
+ i32? limit = null;
+};
+
+dictionary SuggestIngestionConstraints {
+ u64? max_suggestions = null;
+};
+
+dictionary SuggestGlobalConfig {
+ i32 show_less_frequently_cap;
+};
+
+[Enum]
+interface SuggestProviderConfig {
+ Weather(
+ i32 min_keyword_length
+ );
+};
+
+interface SuggestStore {
+ [Throws=SuggestApiError]
+ constructor([ByRef] string path, optional RemoteSettingsConfig? settings_config = null);
+
+ [Throws=SuggestApiError]
+ sequence<Suggestion> query(SuggestionQuery query);
+
+ void interrupt();
+
+ [Throws=SuggestApiError]
+ void ingest(SuggestIngestionConstraints constraints);
+
+ [Throws=SuggestApiError]
+ void clear();
+
+ [Throws=SuggestApiError]
+ SuggestGlobalConfig fetch_global_config();
+
+ [Throws=SuggestApiError]
+ SuggestProviderConfig? fetch_provider_config(SuggestionProvider provider);
+};
+
+interface SuggestStoreBuilder {
+ constructor();
+
+ [Self=ByArc]
+ SuggestStoreBuilder data_path(string path);
+
+ [Self=ByArc]
+ SuggestStoreBuilder cache_path(string path);
+
+ [Self=ByArc]
+ SuggestStoreBuilder remote_settings_config(RemoteSettingsConfig config);
+
+ [Throws=SuggestApiError]
+ SuggestStore build();
+};
diff --git a/third_party/rust/suggest/src/suggestion.rs b/third_party/rust/suggest/src/suggestion.rs
new file mode 100644
index 0000000000..f5425e3c73
--- /dev/null
+++ b/third_party/rust/suggest/src/suggestion.rs
@@ -0,0 +1,250 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use chrono::Local;
+
+use crate::db::DEFAULT_SUGGESTION_SCORE;
+
+/// The template parameter for a timestamp in a "raw" sponsored suggestion URL.
+const TIMESTAMP_TEMPLATE: &str = "%YYYYMMDDHH%";
+
+/// The length, in bytes, of a timestamp in a "cooked" sponsored suggestion URL.
+///
+/// Cooked timestamps don't include the leading or trailing `%`, so this is
+/// 2 bytes shorter than [`TIMESTAMP_TEMPLATE`].
+const TIMESTAMP_LENGTH: usize = 10;
+
+/// Suggestion Types for Amp
+pub(crate) enum AmpSuggestionType {
+ Mobile,
+ Desktop,
+}
+/// A suggestion from the database to show in the address bar.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Suggestion {
+ Amp {
+ title: String,
+ url: String,
+ raw_url: String,
+ icon: Option<Vec<u8>>,
+ full_keyword: String,
+ block_id: i64,
+ advertiser: String,
+ iab_category: String,
+ impression_url: String,
+ click_url: String,
+ raw_click_url: String,
+ score: f64,
+ },
+ Pocket {
+ title: String,
+ url: String,
+ score: f64,
+ is_top_pick: bool,
+ },
+ Wikipedia {
+ title: String,
+ url: String,
+ icon: Option<Vec<u8>>,
+ full_keyword: String,
+ },
+ Amo {
+ title: String,
+ url: String,
+ icon_url: String,
+ description: String,
+ rating: Option<String>,
+ number_of_ratings: i64,
+ guid: String,
+ score: f64,
+ },
+ Yelp {
+ url: String,
+ title: String,
+ icon: Option<Vec<u8>>,
+ score: f64,
+ has_location_sign: bool,
+ subject_exact_match: bool,
+ location_param: String,
+ },
+ Mdn {
+ title: String,
+ url: String,
+ description: String,
+ score: f64,
+ },
+ Weather {
+ score: f64,
+ },
+}
+
+impl PartialOrd for Suggestion {
+ fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for Suggestion {
+ fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+ let a_score = match self {
+ Suggestion::Amp { score, .. }
+ | Suggestion::Pocket { score, .. }
+ | Suggestion::Amo { score, .. } => score,
+ _ => &DEFAULT_SUGGESTION_SCORE,
+ };
+ let b_score = match other {
+ Suggestion::Amp { score, .. }
+ | Suggestion::Pocket { score, .. }
+ | Suggestion::Amo { score, .. } => score,
+ _ => &DEFAULT_SUGGESTION_SCORE,
+ };
+ b_score
+ .partial_cmp(a_score)
+ .unwrap_or(std::cmp::Ordering::Equal)
+ }
+}
+
+impl Eq for Suggestion {}
+/// Replaces all template parameters in a "raw" sponsored suggestion URL,
+/// producing a "cooked" URL with real values.
+pub(crate) fn cook_raw_suggestion_url(raw_url: &str) -> String {
+ let timestamp = Local::now().format("%Y%m%d%H").to_string();
+ debug_assert!(timestamp.len() == TIMESTAMP_LENGTH);
+ // "Raw" sponsored suggestion URLs must not contain more than one timestamp
+ // template parameter, so we replace just the first occurrence.
+ raw_url.replacen(TIMESTAMP_TEMPLATE, &timestamp, 1)
+}
+
+/// Determines whether a "raw" sponsored suggestion URL is equivalent to a
+/// "cooked" URL. The two URLs are equivalent if they are identical except for
+/// their replaced template parameters, which can be different.
+pub fn raw_suggestion_url_matches(raw_url: &str, cooked_url: &str) -> bool {
+ let Some((raw_url_prefix, raw_url_suffix)) = raw_url.split_once(TIMESTAMP_TEMPLATE) else {
+ return raw_url == cooked_url;
+ };
+ let (Some(cooked_url_prefix), Some(cooked_url_suffix)) = (
+ cooked_url.get(..raw_url_prefix.len()),
+ cooked_url.get(raw_url_prefix.len() + TIMESTAMP_LENGTH..),
+ ) else {
+ return false;
+ };
+ if raw_url_prefix != cooked_url_prefix || raw_url_suffix != cooked_url_suffix {
+ return false;
+ }
+ let maybe_timestamp =
+ &cooked_url[raw_url_prefix.len()..raw_url_prefix.len() + TIMESTAMP_LENGTH];
+ maybe_timestamp.bytes().all(|b| b.is_ascii_digit())
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn cook_url_with_template_parameters() {
+ let raw_url_with_one_timestamp = "https://example.com?a=%YYYYMMDDHH%";
+ let cooked_url_with_one_timestamp = cook_raw_suggestion_url(raw_url_with_one_timestamp);
+ assert_eq!(
+ cooked_url_with_one_timestamp.len(),
+ raw_url_with_one_timestamp.len() - 2
+ );
+ assert_ne!(raw_url_with_one_timestamp, cooked_url_with_one_timestamp);
+
+ let raw_url_with_trailing_segment = "https://example.com?a=%YYYYMMDDHH%&b=c";
+ let cooked_url_with_trailing_segment =
+ cook_raw_suggestion_url(raw_url_with_trailing_segment);
+ assert_eq!(
+ cooked_url_with_trailing_segment.len(),
+ raw_url_with_trailing_segment.len() - 2
+ );
+ assert_ne!(
+ raw_url_with_trailing_segment,
+ cooked_url_with_trailing_segment
+ );
+ }
+
+ #[test]
+ fn cook_url_without_template_parameters() {
+ let raw_url_without_timestamp = "https://example.com?b=c";
+ let cooked_url_without_timestamp = cook_raw_suggestion_url(raw_url_without_timestamp);
+ assert_eq!(raw_url_without_timestamp, cooked_url_without_timestamp);
+ }
+
+ #[test]
+ fn url_with_template_parameters_matches() {
+ let raw_url_with_one_timestamp = "https://example.com?a=%YYYYMMDDHH%";
+ let raw_url_with_trailing_segment = "https://example.com?a=%YYYYMMDDHH%&b=c";
+
+ // Equivalent, except for their replaced template parameters.
+ assert!(raw_suggestion_url_matches(
+ raw_url_with_one_timestamp,
+ "https://example.com?a=0000000000"
+ ));
+ assert!(raw_suggestion_url_matches(
+ raw_url_with_trailing_segment,
+ "https://example.com?a=1111111111&b=c"
+ ));
+
+ // Different lengths.
+ assert!(!raw_suggestion_url_matches(
+ raw_url_with_one_timestamp,
+ "https://example.com?a=1234567890&c=d"
+ ));
+ assert!(!raw_suggestion_url_matches(
+ raw_url_with_one_timestamp,
+ "https://example.com?a=123456789"
+ ));
+ assert!(!raw_suggestion_url_matches(
+ raw_url_with_trailing_segment,
+ "https://example.com?a=0987654321"
+ ));
+ assert!(!raw_suggestion_url_matches(
+ raw_url_with_trailing_segment,
+ "https://example.com?a=0987654321&b=c&d=e"
+ ));
+
+ // Different query parameter names.
+ assert!(!raw_suggestion_url_matches(
+ raw_url_with_one_timestamp, // `a`.
+ "https://example.com?b=4444444444" // `b`.
+ ));
+ assert!(!raw_suggestion_url_matches(
+ raw_url_with_trailing_segment, // `a&b`.
+ "https://example.com?a=5555555555&c=c" // `a&c`.
+ ));
+
+ // Not a timestamp.
+ assert!(!raw_suggestion_url_matches(
+ raw_url_with_one_timestamp,
+ "https://example.com?a=bcdefghijk"
+ ));
+ assert!(!raw_suggestion_url_matches(
+ raw_url_with_trailing_segment,
+ "https://example.com?a=bcdefghijk&b=c"
+ ));
+ }
+
+ #[test]
+ fn url_without_template_parameters_matches() {
+ let raw_url_without_timestamp = "https://example.com?b=c";
+
+ assert!(raw_suggestion_url_matches(
+ raw_url_without_timestamp,
+ "https://example.com?b=c"
+ ));
+ assert!(!raw_suggestion_url_matches(
+ raw_url_without_timestamp,
+ "http://example.com"
+ ));
+ assert!(!raw_suggestion_url_matches(
+ raw_url_without_timestamp, // `a`.
+ "http://example.com?a=c" // `b`.
+ ));
+ assert!(!raw_suggestion_url_matches(
+ raw_url_without_timestamp,
+ "https://example.com?b=c&d=e"
+ ));
+ }
+}
diff --git a/third_party/rust/suggest/src/yelp.rs b/third_party/rust/suggest/src/yelp.rs
new file mode 100644
index 0000000000..2413709c67
--- /dev/null
+++ b/third_party/rust/suggest/src/yelp.rs
@@ -0,0 +1,497 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+use rusqlite::types::ToSqlOutput;
+use rusqlite::{named_params, Result as RusqliteResult, ToSql};
+use sql_support::ConnExt;
+use url::form_urlencoded;
+
+use crate::{
+ db::SuggestDao,
+ provider::SuggestionProvider,
+ rs::{DownloadedYelpSuggestion, SuggestRecordId},
+ suggestion::Suggestion,
+ Result, SuggestionQuery,
+};
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
+#[repr(u8)]
+enum Modifier {
+ Pre = 0,
+ Post = 1,
+ Yelp = 2,
+}
+
+impl ToSql for Modifier {
+ fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
+ Ok(ToSqlOutput::from(*self as u8))
+ }
+}
+
+/// This module assumes like following query.
+/// "Yelp-modifier? Pre-modifier? Subject Post-modifier? (Location-modifier | Location-sign Location?)? Yelp-modifier?"
+/// For example, the query below is valid.
+/// "Yelp (Yelp-modifier) Best(Pre-modifier) Ramen(Subject) Delivery(Post-modifier) In(Location-sign) Tokyo(Location)"
+/// Also, as everything except Subject is optional, "Ramen" will be also valid query.
+/// However, "Best Best Ramen" and "Ramen Best" is out of the above appearance order rule,
+/// parsing will be failed. Also, every words except Location needs to be registered in DB.
+/// Please refer to the query test in store.rs for all of combination.
+/// Currently, the maximum query length is determined while refering to having word lengths in DB
+/// and location names.
+/// max subject: 50 + pre-modifier: 10 + post-modifier: 10 + location-sign: 7 + location: 50 = 127 = 150.
+const MAX_QUERY_LENGTH: usize = 150;
+
+/// The max number of words consisting the modifier. To improve the SQL performance by matching with
+/// "keyword=:modifier" (please see is_modifier()), define this how many words we should check.
+const MAX_MODIFIER_WORDS_NUMBER: usize = 2;
+
+/// At least this many characters must be typed for a subject to be matched.
+const SUBJECT_PREFIX_MATCH_THRESHOLD: usize = 2;
+
+impl<'a> SuggestDao<'a> {
+ /// Inserts the suggestions for Yelp attachment into the database.
+ pub fn insert_yelp_suggestions(
+ &mut self,
+ record_id: &SuggestRecordId,
+ suggestion: &DownloadedYelpSuggestion,
+ ) -> Result<()> {
+ for keyword in &suggestion.subjects {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_subjects(record_id, keyword) VALUES(:record_id, :keyword)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":keyword": keyword,
+ },
+ )?;
+ }
+
+ for keyword in &suggestion.pre_modifiers {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":type": Modifier::Pre,
+ ":keyword": keyword,
+ },
+ )?;
+ }
+
+ for keyword in &suggestion.post_modifiers {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":type": Modifier::Post,
+ ":keyword": keyword,
+ },
+ )?;
+ }
+
+ for keyword in &suggestion.yelp_modifiers {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":type": Modifier::Yelp,
+ ":keyword": keyword,
+ },
+ )?;
+ }
+
+ for sign in &suggestion.location_signs {
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_location_signs(record_id, keyword, need_location) VALUES(:record_id, :keyword, :need_location)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":keyword": sign.keyword,
+ ":need_location": sign.need_location,
+ },
+ )?;
+ }
+
+ self.scope.err_if_interrupted()?;
+ self.conn.execute_cached(
+ "INSERT INTO yelp_custom_details(record_id, icon_id, score) VALUES(:record_id, :icon_id, :score)",
+ named_params! {
+ ":record_id": record_id.as_str(),
+ ":icon_id": suggestion.icon_id,
+ ":score": suggestion.score,
+ },
+ )?;
+
+ Ok(())
+ }
+
+ /// Fetch Yelp suggestion from given user's query.
+ pub fn fetch_yelp_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
+ if !query.providers.contains(&SuggestionProvider::Yelp) {
+ return Ok(vec![]);
+ }
+
+ if query.keyword.len() > MAX_QUERY_LENGTH {
+ return Ok(vec![]);
+ }
+
+ let query_string = &query.keyword.trim();
+ if !query_string.contains(' ') {
+ let Some((subject, subject_exact_match)) = self.find_subject(query_string)? else {
+ return Ok(vec![]);
+ };
+ let (icon, score) = self.fetch_custom_details()?;
+ let builder = SuggestionBuilder {
+ subject: &subject,
+ subject_exact_match,
+ pre_modifier: None,
+ post_modifier: None,
+ location_sign: None,
+ location: None,
+ need_location: false,
+ icon,
+ score,
+ };
+ return Ok(vec![builder.into()]);
+ }
+
+ // Find the yelp keyword modifier and remove them from the query.
+ let (query_without_yelp_modifiers, _, _) =
+ self.find_modifiers(query_string, Modifier::Yelp, Modifier::Yelp)?;
+
+ // Find the location sign and the location.
+ let (query_without_location, location_sign, location, need_location) =
+ self.find_location(&query_without_yelp_modifiers)?;
+
+ if let (Some(_), false) = (&location, need_location) {
+ // The location sign does not need the specific location, but user is setting something.
+ return Ok(vec![]);
+ }
+
+ if query_without_location.is_empty() {
+ // No remained query.
+ return Ok(vec![]);
+ }
+
+ // Find the modifiers.
+ let (subject_candidate, pre_modifier, post_modifier) =
+ self.find_modifiers(&query_without_location, Modifier::Pre, Modifier::Post)?;
+
+ let Some((subject, subject_exact_match)) = self.find_subject(&subject_candidate)? else {
+ return Ok(vec![]);
+ };
+
+ let (icon, score) = self.fetch_custom_details()?;
+ let builder = SuggestionBuilder {
+ subject: &subject,
+ subject_exact_match,
+ pre_modifier,
+ post_modifier,
+ location_sign,
+ location,
+ need_location,
+ icon,
+ score,
+ };
+ Ok(vec![builder.into()])
+ }
+
+ /// Fetch the custom details for Yelp suggestions.
+ /// It returns the location tuple as follows:
+ /// (
+ /// Option<Vec<u8>>: Icon data. If not found, returns None.
+ /// f64: Reflects score field in the yelp_custom_details table.
+ /// )
+ ///
+ /// Note that there should be only one record in `yelp_custom_details`
+ /// as all the Yelp assets are stored in the attachment of a single record
+ /// on Remote Settings. The following query will perform a table scan against
+ /// `yelp_custom_details` followed by an index search against `icons`,
+ /// which should be fine since there is only one record in the first table.
+ fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, f64)> {
+ let result = self.conn.query_row_and_then_cachable(
+ r#"
+ SELECT
+ i.data, y.score
+ FROM
+ yelp_custom_details y
+ LEFT JOIN
+ icons i
+ ON y.icon_id = i.id
+ LIMIT
+ 1
+ "#,
+ (),
+ |row| -> Result<_> { Ok((row.get::<_, Option<Vec<u8>>>(0)?, row.get::<_, f64>(1)?)) },
+ true,
+ )?;
+
+ Ok(result)
+ }
+
+ /// Find the location information from the given query string.
+ /// It returns the location tuple as follows:
+ /// (
+ /// String: Query string that is removed found location information.
+ /// Option<String>: Location sign found in yelp_location_signs table. If not found, returns None.
+ /// Option<String>: Specific location name after location sign. If not found, returns None.
+ /// bool: Reflects need_location field in the table.
+ /// )
+ fn find_location(&self, query: &str) -> Result<(String, Option<String>, Option<String>, bool)> {
+ let query_with_spaces = format!(" {} ", query);
+ let mut results: Vec<(usize, usize, i8)> = self.conn.query_rows_and_then_cached(
+ "
+ SELECT
+ INSTR(:query, ' ' || keyword || ' ') AS sign_index,
+ LENGTH(keyword) AS sign_length,
+ need_location
+ FROM yelp_location_signs
+ WHERE
+ sign_index > 0
+ ORDER BY
+ sign_length DESC
+ LIMIT 1
+ ",
+ named_params! {
+ ":query": &query_with_spaces.to_lowercase(),
+ },
+ |row| -> Result<_> {
+ Ok((
+ row.get::<_, usize>("sign_index")?,
+ row.get::<_, usize>("sign_length")?,
+ row.get::<_, i8>("need_location")?,
+ ))
+ },
+ )?;
+
+ let (sign_index, sign_length, need_location) = if let Some(res) = results.pop() {
+ res
+ } else {
+ return Ok((query.trim().to_string(), None, None, false));
+ };
+
+ let pre_location = query_with_spaces
+ .get(..sign_index)
+ .map(str::trim)
+ .map(str::to_string)
+ .unwrap_or_default();
+ let location_sign = query_with_spaces
+ .get(sign_index..sign_index + sign_length)
+ .map(str::trim)
+ .filter(|s| !s.is_empty())
+ .map(str::to_string);
+ let location = query_with_spaces
+ .get(sign_index + sign_length..)
+ .map(str::trim)
+ .filter(|s| !s.is_empty())
+ .map(str::to_string);
+
+ Ok((pre_location, location_sign, location, need_location == 1))
+ }
+
+ /// Find the pre/post modifier from the given query string.
+ /// It returns the modifiers tuple as follows:
+ /// (
+ /// String: Query string that is removed found the modifiers.
+ /// Option<String>: Pre-modifier found in the yelp_modifiers table. If not found, returns None.
+ /// Option<String>: Post-modifier found in the yelp_modifiers table. If not found, returns None.
+ /// )
+ fn find_modifiers(
+ &self,
+ query: &str,
+ pre_modifier_type: Modifier,
+ post_modifier_type: Modifier,
+ ) -> Result<(String, Option<String>, Option<String>)> {
+ if !query.contains(' ') {
+ return Ok((query.to_string(), None, None));
+ }
+
+ let words: Vec<_> = query.split_whitespace().collect();
+
+ let mut pre_modifier = None;
+ for n in (1..=MAX_MODIFIER_WORDS_NUMBER).rev() {
+ let mut candidate_chunks = words.chunks(n);
+ let candidate = candidate_chunks.next().unwrap_or(&[""]).join(" ");
+ if self.is_modifier(&candidate, pre_modifier_type)? {
+ pre_modifier = Some(candidate);
+ break;
+ }
+ }
+
+ let mut post_modifier = None;
+ for n in (1..=MAX_MODIFIER_WORDS_NUMBER).rev() {
+ let mut candidate_chunks = words.rchunks(n);
+ let candidate = candidate_chunks.next().unwrap_or(&[""]).join(" ");
+ if self.is_modifier(&candidate, post_modifier_type)? {
+ post_modifier = Some(candidate);
+ break;
+ }
+ }
+
+ let mut without_modifiers = query;
+ if let Some(ref modifier) = pre_modifier {
+ without_modifiers = &without_modifiers[modifier.len()..];
+ }
+ if let Some(ref modifier) = post_modifier {
+ without_modifiers = &without_modifiers[..without_modifiers.len() - modifier.len()];
+ }
+
+ Ok((
+ without_modifiers.trim().to_string(),
+ pre_modifier,
+ post_modifier,
+ ))
+ }
+
+ /// Find the subject from the given string.
+ /// It returns the Option. If it is not none, it contains the tuple as follows:
+ /// (
+ /// String: Subject.
+ /// bool: Whether the subject matched exactly with the paramter.
+ /// )
+ fn find_subject(&self, candidate: &str) -> Result<Option<(String, bool)>> {
+ if candidate.is_empty() {
+ return Ok(None);
+ }
+
+ // If the length of subject candidate is less than
+ // SUBJECT_PREFIX_MATCH_THRESHOLD, should exact match.
+ if candidate.len() < SUBJECT_PREFIX_MATCH_THRESHOLD {
+ return Ok(if self.is_subject(candidate)? {
+ Some((candidate.to_string(), true))
+ } else {
+ None
+ });
+ }
+
+ // Otherwise, apply prefix-match.
+ Ok(
+ match self.conn.query_row_and_then_cachable(
+ "SELECT keyword
+ FROM yelp_subjects
+ WHERE keyword BETWEEN :candidate AND :candidate || x'FFFF'
+ ORDER BY LENGTH(keyword) ASC, keyword ASC
+ LIMIT 1",
+ named_params! {
+ ":candidate": candidate.to_lowercase(),
+ },
+ |row| row.get::<_, String>(0),
+ true,
+ ) {
+ Ok(keyword) => {
+ debug_assert!(candidate.len() <= keyword.len());
+ Some((
+ format!("{}{}", candidate, &keyword[candidate.len()..]),
+ candidate.len() == keyword.len(),
+ ))
+ }
+ Err(_) => None,
+ },
+ )
+ }
+
+ fn is_modifier(&self, word: &str, modifier_type: Modifier) -> Result<bool> {
+ let result = self.conn.query_row_and_then_cachable(
+ "
+ SELECT EXISTS (
+ SELECT 1 FROM yelp_modifiers WHERE type = :type AND keyword = :word LIMIT 1
+ )
+ ",
+ named_params! {
+ ":type": modifier_type,
+ ":word": word.to_lowercase(),
+ },
+ |row| row.get::<_, bool>(0),
+ true,
+ )?;
+
+ Ok(result)
+ }
+
+ fn is_subject(&self, word: &str) -> Result<bool> {
+ let result = self.conn.query_row_and_then_cachable(
+ "
+ SELECT EXISTS (
+ SELECT 1 FROM yelp_subjects WHERE keyword = :word LIMIT 1
+ )
+ ",
+ named_params! {
+ ":word": word.to_lowercase(),
+ },
+ |row| row.get::<_, bool>(0),
+ true,
+ )?;
+
+ Ok(result)
+ }
+}
+
+struct SuggestionBuilder<'a> {
+ subject: &'a str,
+ subject_exact_match: bool,
+ pre_modifier: Option<String>,
+ post_modifier: Option<String>,
+ location_sign: Option<String>,
+ location: Option<String>,
+ need_location: bool,
+ icon: Option<Vec<u8>>,
+ score: f64,
+}
+
+impl<'a> From<SuggestionBuilder<'a>> for Suggestion {
+ fn from(builder: SuggestionBuilder<'a>) -> Suggestion {
+ // This location sign such the 'near by' needs to add as a description parameter.
+ let location_modifier = if !builder.need_location {
+ builder.location_sign.as_deref()
+ } else {
+ None
+ };
+ let description = [
+ builder.pre_modifier.as_deref(),
+ Some(builder.subject),
+ builder.post_modifier.as_deref(),
+ location_modifier,
+ ]
+ .iter()
+ .flatten()
+ .copied()
+ .collect::<Vec<_>>()
+ .join(" ");
+
+ // https://www.yelp.com/search?find_desc={description}&find_loc={location}
+ let mut url = String::from("https://www.yelp.com/search?");
+ let mut parameters = form_urlencoded::Serializer::new(String::new());
+ parameters.append_pair("find_desc", &description);
+ if let (Some(location), true) = (&builder.location, builder.need_location) {
+ parameters.append_pair("find_loc", location);
+ }
+ url.push_str(&parameters.finish());
+
+ let title = [
+ builder.pre_modifier.as_deref(),
+ Some(builder.subject),
+ builder.post_modifier.as_deref(),
+ builder.location_sign.as_deref(),
+ builder.location.as_deref(),
+ ]
+ .iter()
+ .flatten()
+ .copied()
+ .collect::<Vec<_>>()
+ .join(" ");
+
+ Suggestion::Yelp {
+ url,
+ title,
+ icon: builder.icon,
+ score: builder.score,
+ has_location_sign: location_modifier.is_none() && builder.location_sign.is_some(),
+ subject_exact_match: builder.subject_exact_match,
+ location_param: "find_loc".to_string(),
+ }
+ }
+}