From d8bbc7858622b6d9c278469aab701ca0b609cddf Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 15 May 2024 05:35:49 +0200 Subject: Merging upstream version 126.0. Signed-off-by: Daniel Baumann --- third_party/rust/suggest/src/db.rs | 720 +++++++++++++++++++------------------ 1 file changed, 376 insertions(+), 344 deletions(-) (limited to 'third_party/rust/suggest/src/db.rs') diff --git a/third_party/rust/suggest/src/db.rs b/third_party/rust/suggest/src/db.rs index 07fc3ab4a2..0412c50d8f 100644 --- a/third_party/rust/suggest/src/db.rs +++ b/third_party/rust/suggest/src/db.rs @@ -23,17 +23,17 @@ use crate::{ rs::{ DownloadedAmoSuggestion, DownloadedAmpSuggestion, DownloadedAmpWikipediaSuggestion, DownloadedMdnSuggestion, DownloadedPocketSuggestion, DownloadedWeatherData, - SuggestRecordId, + DownloadedWikipediaSuggestion, SuggestRecordId, }, - schema::{SuggestConnectionInitializer, VERSION}, + schema::{clear_database, SuggestConnectionInitializer, VERSION}, store::{UnparsableRecord, UnparsableRecords}, suggestion::{cook_raw_suggestion_url, AmpSuggestionType, Suggestion}, Result, SuggestionQuery, }; -/// The metadata key whose value is the timestamp of the last record ingested -/// from the Suggest Remote Settings collection. -pub const LAST_INGEST_META_KEY: &str = "last_quicksuggest_ingest"; +/// The metadata key prefix for the last ingested unparsable record. These are +/// records that were not parsed properly, or were not of the "approved" types. +pub const LAST_INGEST_META_UNPARSABLE: &str = "last_quicksuggest_ingest_unparsable"; /// The metadata key whose value keeps track of records of suggestions /// that aren't parsable and which schema version it was first seen in. pub const UNPARSABLE_RECORDS_META_KEY: &str = "unparsable_records"; @@ -148,20 +148,28 @@ impl<'a> SuggestDao<'a> { self.put_unparsable_record_id(&record_id)?; // Advance the last fetch time, so that we can resume // fetching after this record if we're interrupted. - self.put_last_ingest_if_newer(record.last_modified) + self.put_last_ingest_if_newer(LAST_INGEST_META_UNPARSABLE, record.last_modified) } - pub fn handle_ingested_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> { + pub fn handle_ingested_record( + &mut self, + last_ingest_key: &str, + record: &RemoteSettingsRecord, + ) -> Result<()> { let record_id = SuggestRecordId::from(&record.id); // Remove this record's ID from the list of unparsable // records, since we understand it now. self.drop_unparsable_record_id(&record_id)?; // Advance the last fetch time, so that we can resume // fetching after this record if we're interrupted. - self.put_last_ingest_if_newer(record.last_modified) + self.put_last_ingest_if_newer(last_ingest_key, record.last_modified) } - pub fn handle_deleted_record(&mut self, record: &RemoteSettingsRecord) -> Result<()> { + pub fn handle_deleted_record( + &mut self, + last_ingest_key: &str, + record: &RemoteSettingsRecord, + ) -> Result<()> { let record_id = SuggestRecordId::from(&record.id); // Drop either the icon or suggestions, records only contain one or the other match record_id.as_icon_id() { @@ -173,7 +181,7 @@ impl<'a> SuggestDao<'a> { self.drop_unparsable_record_id(&record_id)?; // Advance the last fetch time, so that we can resume // fetching after this record if we're interrupted. - self.put_last_ingest_if_newer(record.last_modified) + self.put_last_ingest_if_newer(last_ingest_key, record.last_modified) } // =============== Low level API =============== @@ -231,15 +239,20 @@ impl<'a> SuggestDao<'a> { s.title, s.url, s.provider, - s.score + s.score, + fk.full_keyword FROM suggestions s JOIN keywords k ON k.suggestion_id = s.id + LEFT JOIN + full_keywords fk + ON k.full_keyword_id = fk.id WHERE s.provider = :provider AND k.keyword = :keyword + AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url) "#, named_params! { ":keyword": keyword_lowercased, @@ -248,8 +261,9 @@ impl<'a> SuggestDao<'a> { |row| -> Result { let suggestion_id: i64 = row.get("id")?; let title = row.get("title")?; - let raw_url = row.get::<_, String>("url")?; - let score = row.get::<_, f64>("score")?; + let raw_url: String = row.get("url")?; + let score: f64 = row.get("score")?; + let full_keyword_from_db: Option = row.get("full_keyword")?; let keywords: Vec = self.conn.query_rows_and_then_cached( r#" @@ -277,9 +291,12 @@ impl<'a> SuggestDao<'a> { amp.iab_category, amp.impression_url, amp.click_url, - (SELECT i.data FROM icons i WHERE i.id = amp.icon_id) AS icon + i.data AS icon, + i.mimetype AS icon_mimetype FROM amp_custom_details amp + LEFT JOIN + icons i ON amp.icon_id = i.id WHERE amp.suggestion_id = :suggestion_id "#, @@ -298,8 +315,10 @@ impl<'a> SuggestDao<'a> { title, url: cooked_url, raw_url, - full_keyword: full_keyword(keyword_lowercased, &keywords), + full_keyword: full_keyword_from_db + .unwrap_or_else(|| full_keyword(keyword_lowercased, &keywords)), icon: row.get("icon")?, + icon_mimetype: row.get("icon_mimetype")?, impression_url: row.get("impression_url")?, click_url: cooked_click_url, raw_click_url, @@ -330,6 +349,7 @@ impl<'a> SuggestDao<'a> { WHERE s.provider = :provider AND k.keyword = :keyword + AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url) "#, named_params! { ":keyword": keyword_lowercased, @@ -350,36 +370,51 @@ impl<'a> SuggestDao<'a> { }, |row| row.get(0), )?; - let icon = self.conn.try_query_one( - "SELECT i.data + let (icon, icon_mimetype) = self + .conn + .try_query_row( + "SELECT i.data, i.mimetype FROM icons i JOIN wikipedia_custom_details s ON s.icon_id = i.id - WHERE s.suggestion_id = :suggestion_id", - named_params! { - ":suggestion_id": suggestion_id - }, - true, - )?; + WHERE s.suggestion_id = :suggestion_id + LIMIT 1", + named_params! { + ":suggestion_id": suggestion_id + }, + |row| -> Result<_> { + Ok(( + row.get::<_, Option>>(0)?, + row.get::<_, Option>(1)?, + )) + }, + true, + )? + .unwrap_or((None, None)); + Ok(Suggestion::Wikipedia { title, url: raw_url, full_keyword: full_keyword(keyword_lowercased, &keywords), icon, + icon_mimetype, }) }, )?; Ok(suggestions) } - /// Fetches Suggestions of type Amo provider that match the given query - pub fn fetch_amo_suggestions(&self, query: &SuggestionQuery) -> Result> { + /// Query for suggestions using the keyword prefix and provider + fn map_prefix_keywords( + &self, + query: &SuggestionQuery, + provider: &SuggestionProvider, + mut mapper: impl FnMut(&rusqlite::Row, &str) -> Result, + ) -> Result> { let keyword_lowercased = &query.keyword.to_lowercase(); let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased); - let suggestions_limit = &query.limit.unwrap_or(-1); - let suggestions = self - .conn - .query_rows_and_then_cached( - r#" + let suggestions_limit = query.limit.unwrap_or(-1); + self.conn.query_rows_and_then_cached( + r#" SELECT s.id, MAX(k.rank) AS rank, @@ -397,6 +432,7 @@ impl<'a> SuggestDao<'a> { k.keyword_prefix = :keyword_prefix AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF') AND s.provider = :provider + AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url) GROUP BY s.id ORDER BY @@ -405,13 +441,23 @@ impl<'a> SuggestDao<'a> { LIMIT :suggestions_limit "#, - named_params! { - ":keyword_prefix": keyword_prefix, - ":keyword_suffix": keyword_suffix, - ":provider": SuggestionProvider::Amo, - ":suggestions_limit": suggestions_limit, - }, - |row| -> Result> { + &[ + (":keyword_prefix", &keyword_prefix as &dyn ToSql), + (":keyword_suffix", &keyword_suffix as &dyn ToSql), + (":provider", provider as &dyn ToSql), + (":suggestions_limit", &suggestions_limit as &dyn ToSql), + ], + |row| mapper(row, keyword_suffix), + ) + } + + /// Fetches Suggestions of type Amo provider that match the given query + pub fn fetch_amo_suggestions(&self, query: &SuggestionQuery) -> Result> { + let suggestions = self + .map_prefix_keywords( + query, + &SuggestionProvider::Amo, + |row, keyword_suffix| -> Result> { let suggestion_id: i64 = row.get("id")?; let title = row.get("title")?; let raw_url = row.get::<_, String>("url")?; @@ -486,6 +532,7 @@ impl<'a> SuggestDao<'a> { k.keyword_prefix = :keyword_prefix AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF') AND s.provider = :provider + AND NOT EXISTS (SELECT 1 FROM dismissed_suggestions WHERE url=s.url) GROUP BY s.id, k.confidence @@ -534,45 +581,11 @@ impl<'a> SuggestDao<'a> { /// Fetches suggestions for MDN pub fn fetch_mdn_suggestions(&self, query: &SuggestionQuery) -> Result> { - let keyword_lowercased = &query.keyword.to_lowercase(); - let (keyword_prefix, keyword_suffix) = split_keyword(keyword_lowercased); - let suggestions_limit = &query.limit.unwrap_or(-1); let suggestions = self - .conn - .query_rows_and_then_cached( - r#" - SELECT - s.id, - MAX(k.rank) AS rank, - s.title, - s.url, - s.provider, - s.score, - k.keyword_suffix - FROM - suggestions s - JOIN - prefix_keywords k - ON k.suggestion_id = s.id - WHERE - k.keyword_prefix = :keyword_prefix - AND (k.keyword_suffix BETWEEN :keyword_suffix AND :keyword_suffix || x'FFFF') - AND s.provider = :provider - GROUP BY - s.id - ORDER BY - s.score DESC, - rank DESC - LIMIT - :suggestions_limit - "#, - named_params! { - ":keyword_prefix": keyword_prefix, - ":keyword_suffix": keyword_suffix, - ":provider": SuggestionProvider::Mdn, - ":suggestions_limit": suggestions_limit, - }, - |row| -> Result> { + .map_prefix_keywords( + query, + &SuggestionProvider::Mdn, + |row, keyword_suffix| -> Result> { let suggestion_id: i64 = row.get("id")?; let title = row.get("title")?; let raw_url = row.get::<_, String>("url")?; @@ -657,35 +670,15 @@ impl<'a> SuggestDao<'a> { record_id: &SuggestRecordId, suggestions: &[DownloadedAmoSuggestion], ) -> Result<()> { + let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?; for suggestion in suggestions { self.scope.err_if_interrupted()?; - let suggestion_id: i64 = self.conn.query_row_and_then_cachable( - &format!( - "INSERT INTO suggestions( - record_id, - provider, - title, - url, - score - ) - VALUES( - :record_id, - {}, - :title, - :url, - :score - ) - RETURNING id", - SuggestionProvider::Amo as u8 - ), - named_params! { - ":record_id": record_id.as_str(), - ":title": suggestion.title, - ":url": suggestion.url, - ":score": suggestion.score, - }, - |row| row.get(0), - true, + let suggestion_id = suggestion_insert.execute( + record_id, + &suggestion.title, + &suggestion.url, + suggestion.score, + SuggestionProvider::Amo, )?; self.conn.execute( "INSERT INTO amo_custom_details( @@ -747,105 +740,48 @@ impl<'a> SuggestDao<'a> { record_id: &SuggestRecordId, suggestions: &[DownloadedAmpWikipediaSuggestion], ) -> Result<()> { + // Prepare statements outside of the loop. This results in a large performance + // improvement on a fresh ingest, since there are so many rows. + let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?; + let mut amp_insert = AmpInsertStatement::new(self.conn)?; + let mut wiki_insert = WikipediaInsertStatement::new(self.conn)?; + let mut keyword_insert = KeywordInsertStatement::new(self.conn)?; for suggestion in suggestions { self.scope.err_if_interrupted()?; let common_details = suggestion.common_details(); let provider = suggestion.provider(); - let suggestion_id: i64 = self.conn.query_row_and_then_cachable( - &format!( - "INSERT INTO suggestions( - record_id, - provider, - title, - url, - score - ) - VALUES( - :record_id, - {}, - :title, - :url, - :score - ) - RETURNING id", - provider as u8 - ), - named_params! { - ":record_id": record_id.as_str(), - ":title": common_details.title, - ":url": common_details.url, - ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE) - }, - |row| row.get(0), - true, + let suggestion_id = suggestion_insert.execute( + record_id, + &common_details.title, + &common_details.url, + common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE), + provider, )?; match suggestion { DownloadedAmpWikipediaSuggestion::Amp(amp) => { - self.conn.execute( - "INSERT INTO amp_custom_details( - suggestion_id, - advertiser, - block_id, - iab_category, - impression_url, - click_url, - icon_id - ) - VALUES( - :suggestion_id, - :advertiser, - :block_id, - :iab_category, - :impression_url, - :click_url, - :icon_id - )", - named_params! { - ":suggestion_id": suggestion_id, - ":advertiser": amp.advertiser, - ":block_id": amp.block_id, - ":iab_category": amp.iab_category, - ":impression_url": amp.impression_url, - ":click_url": amp.click_url, - ":icon_id": amp.icon_id, - }, - )?; + amp_insert.execute(suggestion_id, amp)?; } DownloadedAmpWikipediaSuggestion::Wikipedia(wikipedia) => { - self.conn.execute( - "INSERT INTO wikipedia_custom_details( - suggestion_id, - icon_id - ) - VALUES( - :suggestion_id, - :icon_id - )", - named_params! { - ":suggestion_id": suggestion_id, - ":icon_id": wikipedia.icon_id, - }, - )?; + wiki_insert.execute(suggestion_id, wikipedia)?; } } - for (index, keyword) in common_details.keywords.iter().enumerate() { - self.conn.execute( - "INSERT INTO keywords( - keyword, - suggestion_id, - rank - ) - VALUES( - :keyword, - :suggestion_id, - :rank - )", - named_params! { - ":keyword": keyword, - ":rank": index, - ":suggestion_id": suggestion_id, - }, + let mut full_keyword_inserter = FullKeywordInserter::new(self.conn, suggestion_id); + for keyword in common_details.keywords() { + let full_keyword_id = match (suggestion, keyword.full_keyword) { + // Try to associate full keyword data. Only do this for AMP, we decided to + // skip it for Wikipedia in https://bugzilla.mozilla.org/show_bug.cgi?id=1876217 + (DownloadedAmpWikipediaSuggestion::Amp(_), Some(full_keyword)) => { + Some(full_keyword_inserter.maybe_insert(full_keyword)?) + } + _ => None, + }; + + keyword_insert.execute( + suggestion_id, + keyword.keyword, + full_keyword_id, + keyword.rank, )?; } } @@ -859,84 +795,32 @@ impl<'a> SuggestDao<'a> { record_id: &SuggestRecordId, suggestions: &[DownloadedAmpSuggestion], ) -> Result<()> { + let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?; + let mut amp_insert = AmpInsertStatement::new(self.conn)?; + let mut keyword_insert = KeywordInsertStatement::new(self.conn)?; for suggestion in suggestions { self.scope.err_if_interrupted()?; let common_details = &suggestion.common_details; - let suggestion_id: i64 = self.conn.query_row_and_then_cachable( - &format!( - "INSERT INTO suggestions( - record_id, - provider, - title, - url, - score - ) - VALUES( - :record_id, - {}, - :title, - :url, - :score - ) - RETURNING id", - SuggestionProvider::AmpMobile as u8 - ), - named_params! { - ":record_id": record_id.as_str(), - ":title": common_details.title, - ":url": common_details.url, - ":score": common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE) - }, - |row| row.get(0), - true, - )?; - self.conn.execute( - "INSERT INTO amp_custom_details( - suggestion_id, - advertiser, - block_id, - iab_category, - impression_url, - click_url, - icon_id - ) - VALUES( - :suggestion_id, - :advertiser, - :block_id, - :iab_category, - :impression_url, - :click_url, - :icon_id - )", - named_params! { - ":suggestion_id": suggestion_id, - ":advertiser": suggestion.advertiser, - ":block_id": suggestion.block_id, - ":iab_category": suggestion.iab_category, - ":impression_url": suggestion.impression_url, - ":click_url": suggestion.click_url, - ":icon_id": suggestion.icon_id, - }, + let suggestion_id = suggestion_insert.execute( + record_id, + &common_details.title, + &common_details.url, + common_details.score.unwrap_or(DEFAULT_SUGGESTION_SCORE), + SuggestionProvider::AmpMobile, )?; + amp_insert.execute(suggestion_id, suggestion)?; - for (index, keyword) in common_details.keywords.iter().enumerate() { - self.conn.execute( - "INSERT INTO keywords( - keyword, - suggestion_id, - rank - ) - VALUES( - :keyword, - :suggestion_id, - :rank - )", - named_params! { - ":keyword": keyword, - ":rank": index, - ":suggestion_id": suggestion_id, - }, + let mut full_keyword_inserter = FullKeywordInserter::new(self.conn, suggestion_id); + for keyword in common_details.keywords() { + let full_keyword_id = keyword + .full_keyword + .map(|full_keyword| full_keyword_inserter.maybe_insert(full_keyword)) + .transpose()?; + keyword_insert.execute( + suggestion_id, + keyword.keyword, + full_keyword_id, + keyword.rank, )?; } } @@ -950,37 +834,16 @@ impl<'a> SuggestDao<'a> { record_id: &SuggestRecordId, suggestions: &[DownloadedPocketSuggestion], ) -> Result<()> { + let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?; for suggestion in suggestions { self.scope.err_if_interrupted()?; - let suggestion_id: i64 = self.conn.query_row_and_then_cachable( - &format!( - "INSERT INTO suggestions( - record_id, - provider, - title, - url, - score - ) - VALUES( - :record_id, - {}, - :title, - :url, - :score - ) - RETURNING id", - SuggestionProvider::Pocket as u8 - ), - named_params! { - ":record_id": record_id.as_str(), - ":title": suggestion.title, - ":url": suggestion.url, - ":score": suggestion.score, - }, - |row| row.get(0), - true, + let suggestion_id = suggestion_insert.execute( + record_id, + &suggestion.title, + &suggestion.url, + suggestion.score, + SuggestionProvider::Pocket, )?; - for ((rank, keyword), confidence) in suggestion .high_confidence_keywords .iter() @@ -1030,35 +893,15 @@ impl<'a> SuggestDao<'a> { record_id: &SuggestRecordId, suggestions: &[DownloadedMdnSuggestion], ) -> Result<()> { + let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?; for suggestion in suggestions { self.scope.err_if_interrupted()?; - let suggestion_id: i64 = self.conn.query_row_and_then_cachable( - &format!( - "INSERT INTO suggestions( - record_id, - provider, - title, - url, - score - ) - VALUES( - :record_id, - {}, - :title, - :url, - :score - ) - RETURNING id", - SuggestionProvider::Mdn as u8 - ), - named_params! { - ":record_id": record_id.as_str(), - ":title": suggestion.title, - ":url": suggestion.url, - ":score": suggestion.score, - }, - |row| row.get(0), - true, + let suggestion_id = suggestion_insert.execute( + record_id, + &suggestion.title, + &suggestion.url, + suggestion.score, + SuggestionProvider::Mdn, )?; self.conn.execute_cached( "INSERT INTO mdn_custom_details( @@ -1107,20 +950,14 @@ impl<'a> SuggestDao<'a> { record_id: &SuggestRecordId, data: &DownloadedWeatherData, ) -> Result<()> { + let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?; self.scope.err_if_interrupted()?; - let suggestion_id: i64 = self.conn.query_row_and_then_cachable( - &format!( - "INSERT INTO suggestions(record_id, provider, title, url, score) - VALUES(:record_id, {}, '', '', :score) - RETURNING id", - SuggestionProvider::Weather as u8 - ), - named_params! { - ":record_id": record_id.as_str(), - ":score": data.weather.score.unwrap_or(DEFAULT_SUGGESTION_SCORE), - }, - |row| row.get(0), - true, + let suggestion_id = suggestion_insert.execute( + record_id, + "", + "", + data.weather.score.unwrap_or(DEFAULT_SUGGESTION_SCORE), + SuggestionProvider::Weather, )?; for (index, keyword) in data.weather.keywords.iter().enumerate() { self.conn.execute( @@ -1141,24 +978,43 @@ impl<'a> SuggestDao<'a> { } /// Inserts or replaces an icon for a suggestion into the database. - pub fn put_icon(&mut self, icon_id: &str, data: &[u8]) -> Result<()> { + pub fn put_icon(&mut self, icon_id: &str, data: &[u8], mimetype: &str) -> Result<()> { self.conn.execute( "INSERT OR REPLACE INTO icons( id, - data + data, + mimetype ) VALUES( :id, - :data + :data, + :mimetype )", named_params! { ":id": icon_id, ":data": data, + ":mimetype": mimetype, }, )?; Ok(()) } + pub fn insert_dismissal(&self, url: &str) -> Result<()> { + self.conn.execute( + "INSERT OR IGNORE INTO dismissed_suggestions(url) + VALUES(:url)", + named_params! { + ":url": url, + }, + )?; + Ok(()) + } + + pub fn clear_dismissals(&self) -> Result<()> { + self.conn.execute("DELETE FROM dismissed_suggestions", ())?; + Ok(()) + } + /// Deletes all suggestions associated with a Remote Settings record from /// the database. pub fn drop_suggestions(&mut self, record_id: &SuggestRecordId) -> Result<()> { @@ -1196,12 +1052,7 @@ impl<'a> SuggestDao<'a> { /// Clears the database, removing all suggestions, icons, and metadata. pub fn clear(&mut self) -> Result<()> { - self.conn.execute_batch( - "DELETE FROM suggestions; - DELETE FROM icons; - DELETE FROM meta;", - )?; - Ok(()) + Ok(clear_database(self.conn)?) } /// Returns the value associated with a metadata key. @@ -1224,12 +1075,14 @@ impl<'a> SuggestDao<'a> { /// Updates the last ingest timestamp if the given last modified time is /// newer than the existing one recorded. - pub fn put_last_ingest_if_newer(&mut self, record_last_modified: u64) -> Result<()> { - let last_ingest = self - .get_meta::(LAST_INGEST_META_KEY)? - .unwrap_or_default(); + pub fn put_last_ingest_if_newer( + &mut self, + last_ingest_key: &str, + record_last_modified: u64, + ) -> Result<()> { + let last_ingest = self.get_meta::(last_ingest_key)?.unwrap_or_default(); if record_last_modified > last_ingest { - self.put_meta(LAST_INGEST_META_KEY, record_last_modified)?; + self.put_meta(last_ingest_key, record_last_modified)?; } Ok(()) @@ -1310,6 +1163,185 @@ impl<'a> SuggestDao<'a> { } } +/// Helper struct to get full_keyword_ids for a suggestion +/// +/// `FullKeywordInserter` handles repeated full keywords efficiently. The first instance will +/// cause a row to be inserted into the database. Subsequent instances will return the same +/// full_keyword_id. +struct FullKeywordInserter<'a> { + conn: &'a Connection, + suggestion_id: i64, + last_inserted: Option<(&'a str, i64)>, +} + +impl<'a> FullKeywordInserter<'a> { + fn new(conn: &'a Connection, suggestion_id: i64) -> Self { + Self { + conn, + suggestion_id, + last_inserted: None, + } + } + + fn maybe_insert(&mut self, full_keyword: &'a str) -> rusqlite::Result { + match self.last_inserted { + Some((s, id)) if s == full_keyword => Ok(id), + _ => { + let full_keyword_id = self.conn.query_row_and_then( + "INSERT INTO full_keywords( + suggestion_id, + full_keyword + ) + VALUES( + :suggestion_id, + :keyword + ) + RETURNING id", + named_params! { + ":keyword": full_keyword, + ":suggestion_id": self.suggestion_id, + }, + |row| row.get(0), + )?; + self.last_inserted = Some((full_keyword, full_keyword_id)); + Ok(full_keyword_id) + } + } + } +} + +// ======================== Statement types ======================== +// +// During ingestion we can insert hundreds of thousands of rows. These types enable speedups by +// allowing us to prepare a statement outside a loop and use it many times inside the loop. +// +// Each type wraps [Connection::prepare] and [Statement] to provide a simplified interface, +// tailored to a specific query. +// +// This pattern is applicable for whenever we execute the same query repeatedly in a loop. +// The impact scales with the number of loop iterations, which is why we currently don't do this +// for providers like Mdn, Pocket, and Weather, which have relatively small number of records +// compared to Amp/Wikipedia. + +struct SuggestionInsertStatement<'conn>(rusqlite::Statement<'conn>); + +impl<'conn> SuggestionInsertStatement<'conn> { + fn new(conn: &'conn Connection) -> Result { + Ok(Self(conn.prepare( + "INSERT INTO suggestions( + record_id, + title, + url, + score, + provider + ) + VALUES(?, ?, ?, ?, ?) + RETURNING id", + )?)) + } + + /// Execute the insert and return the `suggestion_id` for the new row + fn execute( + &mut self, + record_id: &SuggestRecordId, + title: &str, + url: &str, + score: f64, + provider: SuggestionProvider, + ) -> Result { + Ok(self.0.query_row( + (record_id.as_str(), title, url, score, provider as u8), + |row| row.get(0), + )?) + } +} + +struct AmpInsertStatement<'conn>(rusqlite::Statement<'conn>); + +impl<'conn> AmpInsertStatement<'conn> { + fn new(conn: &'conn Connection) -> Result { + Ok(Self(conn.prepare( + "INSERT INTO amp_custom_details( + suggestion_id, + advertiser, + block_id, + iab_category, + impression_url, + click_url, + icon_id + ) + VALUES(?, ?, ?, ?, ?, ?, ?) + ", + )?)) + } + + fn execute(&mut self, suggestion_id: i64, amp: &DownloadedAmpSuggestion) -> Result<()> { + self.0.execute(( + suggestion_id, + &.advertiser, + amp.block_id, + &.iab_category, + &.impression_url, + &.click_url, + &.icon_id, + ))?; + Ok(()) + } +} + +struct WikipediaInsertStatement<'conn>(rusqlite::Statement<'conn>); + +impl<'conn> WikipediaInsertStatement<'conn> { + fn new(conn: &'conn Connection) -> Result { + Ok(Self(conn.prepare( + "INSERT INTO wikipedia_custom_details( + suggestion_id, + icon_id + ) + VALUES(?, ?) + ", + )?)) + } + + fn execute( + &mut self, + suggestion_id: i64, + wikipedia: &DownloadedWikipediaSuggestion, + ) -> Result<()> { + self.0.execute((suggestion_id, &wikipedia.icon_id))?; + Ok(()) + } +} + +struct KeywordInsertStatement<'conn>(rusqlite::Statement<'conn>); + +impl<'conn> KeywordInsertStatement<'conn> { + fn new(conn: &'conn Connection) -> Result { + Ok(Self(conn.prepare( + "INSERT INTO keywords( + suggestion_id, + keyword, + full_keyword_id, + rank + ) + VALUES(?, ?, ?, ?) + ", + )?)) + } + + fn execute( + &mut self, + suggestion_id: i64, + keyword: &str, + full_keyword_id: Option, + rank: usize, + ) -> Result<()> { + self.0 + .execute((suggestion_id, keyword, full_keyword_id, rank))?; + Ok(()) + } +} + fn provider_config_meta_key(provider: SuggestionProvider) -> String { format!("{}{}", PROVIDER_CONFIG_META_KEY_PREFIX, provider as u8) } -- cgit v1.2.3