4 files changed, 281 insertions, 0 deletions
diff --git a/third_party/rust/suggest/src/benchmarks/README.md b/third_party/rust/suggest/src/benchmarks/README.md
new file mode 100644
index 0000000000..45150d8413
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/README.md
@@ -0,0 +1,28 @@
+# Suggest benchmarking code
+
+Use `cargo suggest-bench` to run these benchmarks.
+
+The main benchmarking code lives here, while the criterion integration code lives in the `benches/`
+directory.
+
+## Benchmarks
+
+### ingest-[provider-type]
+
+Time it takes to ingest all suggestions for a provider type on an empty database.
+The bechmark downloads network resources in advance in order to exclude the network request time
+from these measurements.
+
+### Benchmarks it would be nice to have
+
+- Ingestion with synthetic data.  This would isolate the benchmark from changes to the RS database.
+- Fetching suggestions
+
+## cargo suggest-debug-ingestion-sizes
+
+Run this to get row counts for all database tables.  This can be very useful for improving
+benchmarks, since targeting the tables with the largest number of rows will usually lead to the
+largest improvements.
+
+The command also prints out the size of all remote-settings attachments, which can be good to
+optimize on its own since it represents the amount of data user's need to download.
diff --git a/third_party/rust/suggest/src/benchmarks/client.rs b/third_party/rust/suggest/src/benchmarks/client.rs
new file mode 100644
index 0000000000..f5a21fd9cc
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/client.rs
@@ -0,0 +1,97 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::{rs::SuggestRemoteSettingsClient, Result};
+use parking_lot::Mutex;
+use remote_settings::{Client, GetItemsOptions, RemoteSettingsConfig, RemoteSettingsResponse};
+use std::collections::HashMap;
+
+/// Remotes settings client that runs during the benchmark warm-up phase.
+///
+/// This should be used to run a full ingestion.
+/// Then it can be converted into a [RemoteSettingsBenchmarkClient], which allows benchmark code to exclude the network request time.
+/// [RemoteSettingsBenchmarkClient] implements [SuggestRemoteSettingsClient] by getting data from a HashMap rather than hitting the network.
+pub struct RemoteSettingsWarmUpClient {
+    client: Client,
+    pub get_records_responses: Mutex<HashMap<GetItemsOptions, RemoteSettingsResponse>>,
+    pub get_attachment_responses: Mutex<HashMap<String, Vec<u8>>>,
+}
+
+impl RemoteSettingsWarmUpClient {
+    pub fn new() -> Self {
+        Self {
+            client: Client::new(RemoteSettingsConfig {
+                server_url: None,
+                bucket_name: None,
+                collection_name: crate::rs::REMOTE_SETTINGS_COLLECTION.into(),
+            })
+            .unwrap(),
+            get_records_responses: Mutex::new(HashMap::new()),
+            get_attachment_responses: Mutex::new(HashMap::new()),
+        }
+    }
+}
+
+impl Default for RemoteSettingsWarmUpClient {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SuggestRemoteSettingsClient for RemoteSettingsWarmUpClient {
+    fn get_records_with_options(
+        &self,
+        options: &GetItemsOptions,
+    ) -> Result<RemoteSettingsResponse> {
+        let response = self.client.get_records_with_options(options)?;
+        self.get_records_responses
+            .lock()
+            .insert(options.clone(), response.clone());
+        Ok(response)
+    }
+
+    fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+        let response = self.client.get_attachment(location)?;
+        self.get_attachment_responses
+            .lock()
+            .insert(location.to_string(), response.clone());
+        Ok(response)
+    }
+}
+
+#[derive(Clone)]
+pub struct RemoteSettingsBenchmarkClient {
+    pub get_records_responses: HashMap<GetItemsOptions, RemoteSettingsResponse>,
+    pub get_attachment_responses: HashMap<String, Vec<u8>>,
+}
+
+impl SuggestRemoteSettingsClient for RemoteSettingsBenchmarkClient {
+    fn get_records_with_options(
+        &self,
+        options: &GetItemsOptions,
+    ) -> Result<RemoteSettingsResponse> {
+        Ok(self
+            .get_records_responses
+            .get(options)
+            .unwrap_or_else(|| panic!("options not found: {options:?}"))
+            .clone())
+    }
+
+    fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+        Ok(self
+            .get_attachment_responses
+            .get(location)
+            .unwrap_or_else(|| panic!("location not found: {location:?}"))
+            .clone())
+    }
+}
+
+impl From<RemoteSettingsWarmUpClient> for RemoteSettingsBenchmarkClient {
+    fn from(warm_up_client: RemoteSettingsWarmUpClient) -> Self {
+        Self {
+            get_records_responses: warm_up_client.get_records_responses.into_inner(),
+            get_attachment_responses: warm_up_client.get_attachment_responses.into_inner(),
+        }
+    }
+}
diff --git a/third_party/rust/suggest/src/benchmarks/ingest.rs b/third_party/rust/suggest/src/benchmarks/ingest.rs
new file mode 100644
index 0000000000..bbefc6a00a
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/ingest.rs
@@ -0,0 +1,116 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::{
+    benchmarks::{
+        client::{RemoteSettingsBenchmarkClient, RemoteSettingsWarmUpClient},
+        BenchmarkWithInput,
+    },
+    rs::SuggestRecordType,
+    store::SuggestStoreInner,
+    SuggestIngestionConstraints,
+};
+use std::sync::atomic::{AtomicU32, Ordering};
+
+static DB_FILE_COUNTER: AtomicU32 = AtomicU32::new(0);
+
+pub struct IngestBenchmark {
+    temp_dir: tempfile::TempDir,
+    client: RemoteSettingsBenchmarkClient,
+    record_type: SuggestRecordType,
+}
+
+impl IngestBenchmark {
+    pub fn new(record_type: SuggestRecordType) -> Self {
+        let temp_dir = tempfile::tempdir().unwrap();
+        let store = SuggestStoreInner::new(
+            temp_dir.path().join("warmup.sqlite"),
+            RemoteSettingsWarmUpClient::new(),
+        );
+        store.benchmark_ingest_records_by_type(record_type);
+        Self {
+            client: RemoteSettingsBenchmarkClient::from(store.into_settings_client()),
+            temp_dir,
+            record_type,
+        }
+    }
+}
+
+// The input for each benchmark is `SuggestStoreInner` with a fresh database.
+//
+// This is wrapped in a newtype so that it can be exposed in the public trait
+pub struct InputType(SuggestStoreInner<RemoteSettingsBenchmarkClient>);
+
+impl BenchmarkWithInput for IngestBenchmark {
+    type Input = InputType;
+
+    fn generate_input(&self) -> Self::Input {
+        let data_path = self.temp_dir.path().join(format!(
+            "db{}.sqlite",
+            DB_FILE_COUNTER.fetch_add(1, Ordering::Relaxed)
+        ));
+        let store = SuggestStoreInner::new(data_path, self.client.clone());
+        store.ensure_db_initialized();
+        InputType(store)
+    }
+
+    fn benchmarked_code(&self, input: Self::Input) {
+        let InputType(store) = input;
+        store.benchmark_ingest_records_by_type(self.record_type);
+    }
+}
+
+/// Get IngestBenchmark instances for all record types
+pub fn all_benchmarks() -> Vec<(&'static str, IngestBenchmark)> {
+    vec![
+        ("icon", IngestBenchmark::new(SuggestRecordType::Icon)),
+        (
+            "amp-wikipedia",
+            IngestBenchmark::new(SuggestRecordType::AmpWikipedia),
+        ),
+        ("amo", IngestBenchmark::new(SuggestRecordType::Amo)),
+        ("pocket", IngestBenchmark::new(SuggestRecordType::Pocket)),
+        ("yelp", IngestBenchmark::new(SuggestRecordType::Yelp)),
+        ("mdn", IngestBenchmark::new(SuggestRecordType::Mdn)),
+        ("weather", IngestBenchmark::new(SuggestRecordType::Weather)),
+        (
+            "global-config",
+            IngestBenchmark::new(SuggestRecordType::GlobalConfig),
+        ),
+        (
+            "amp-mobile",
+            IngestBenchmark::new(SuggestRecordType::AmpMobile),
+        ),
+    ]
+}
+
+pub fn print_debug_ingestion_sizes() {
+    viaduct_reqwest::use_reqwest_backend();
+    let store = SuggestStoreInner::new(
+        "file:debug_ingestion_sizes?mode=memory&cache=shared",
+        RemoteSettingsWarmUpClient::new(),
+    );
+    store
+        .ingest(SuggestIngestionConstraints::default())
+        .unwrap();
+    let table_row_counts = store.table_row_counts();
+    let client = store.into_settings_client();
+    let total_attachment_size: usize = client
+        .get_attachment_responses
+        .lock()
+        .values()
+        .map(|data| data.len())
+        .sum();
+
+    println!(
+        "Total attachment size: {}kb",
+        (total_attachment_size + 500) / 1000
+    );
+    println!();
+    println!("Database table row counts");
+    println!("-------------------------");
+    for (name, count) in table_row_counts {
+        println!("{name:30}: {count}");
+    }
+}
diff --git a/third_party/rust/suggest/src/benchmarks/mod.rs b/third_party/rust/suggest/src/benchmarks/mod.rs
new file mode 100644
index 0000000000..eb3b2e8abe
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/mod.rs
@@ -0,0 +1,40 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Benchmarking support
+//!
+//! Benchmarks are split up into two parts: the functions to be benchmarked live here, which the benchmarking code itself lives in `benches/bench.rs`.
+//! It's easier to write benchmarking code inside the main crate, where we have access to private items.
+//! However, it's easier to integrate with Cargo and criterion if benchmarks live in a separate crate.
+//!
+//! All benchmarks are defined as structs that implement either the [Benchmark] or [BenchmarkWithInput]
+
+pub mod client;
+pub mod ingest;
+
+/// Trait for simple benchmarks
+///
+/// This supports simple benchmarks that don't require any input.  Note: global setup can be done
+/// in the `new()` method for the struct.
+pub trait Benchmark {
+    /// Perform the operations that we're benchmarking.
+    fn benchmarked_code(&self);
+}
+
+/// Trait for benchmarks that require input
+///
+/// This will run using Criterion's `iter_batched` function.  Criterion will create a batch of
+/// inputs, then pass each one to benchmark.
+///
+/// This supports simple benchmarks that don't require any input.  Note: global setup can be done
+/// in the `new()` method for the struct.
+pub trait BenchmarkWithInput {
+    type Input;
+
+    /// Generate the input (this is not included in the benchmark time)
+    fn generate_input(&self) -> Self::Input;
+
+    /// Perform the operations that we're benchmarking.
+    fn benchmarked_code(&self, input: Self::Input);
+}