summaryrefslogtreecommitdiffstats
path: root/third_party/rust/suggest/src/benchmarks
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--third_party/rust/suggest/src/benchmarks/README.md28
-rw-r--r--third_party/rust/suggest/src/benchmarks/client.rs97
-rw-r--r--third_party/rust/suggest/src/benchmarks/ingest.rs116
-rw-r--r--third_party/rust/suggest/src/benchmarks/mod.rs40
4 files changed, 281 insertions, 0 deletions
diff --git a/third_party/rust/suggest/src/benchmarks/README.md b/third_party/rust/suggest/src/benchmarks/README.md
new file mode 100644
index 0000000000..45150d8413
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/README.md
@@ -0,0 +1,28 @@
+# Suggest benchmarking code
+
+Use `cargo suggest-bench` to run these benchmarks.
+
+The main benchmarking code lives here, while the criterion integration code lives in the `benches/`
+directory.
+
+## Benchmarks
+
+### ingest-[provider-type]
+
+Time it takes to ingest all suggestions for a provider type on an empty database.
+The bechmark downloads network resources in advance in order to exclude the network request time
+from these measurements.
+
+### Benchmarks it would be nice to have
+
+- Ingestion with synthetic data. This would isolate the benchmark from changes to the RS database.
+- Fetching suggestions
+
+## cargo suggest-debug-ingestion-sizes
+
+Run this to get row counts for all database tables. This can be very useful for improving
+benchmarks, since targeting the tables with the largest number of rows will usually lead to the
+largest improvements.
+
+The command also prints out the size of all remote-settings attachments, which can be good to
+optimize on its own since it represents the amount of data user's need to download.
diff --git a/third_party/rust/suggest/src/benchmarks/client.rs b/third_party/rust/suggest/src/benchmarks/client.rs
new file mode 100644
index 0000000000..f5a21fd9cc
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/client.rs
@@ -0,0 +1,97 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::{rs::SuggestRemoteSettingsClient, Result};
+use parking_lot::Mutex;
+use remote_settings::{Client, GetItemsOptions, RemoteSettingsConfig, RemoteSettingsResponse};
+use std::collections::HashMap;
+
+/// Remotes settings client that runs during the benchmark warm-up phase.
+///
+/// This should be used to run a full ingestion.
+/// Then it can be converted into a [RemoteSettingsBenchmarkClient], which allows benchmark code to exclude the network request time.
+/// [RemoteSettingsBenchmarkClient] implements [SuggestRemoteSettingsClient] by getting data from a HashMap rather than hitting the network.
+pub struct RemoteSettingsWarmUpClient {
+ client: Client,
+ pub get_records_responses: Mutex<HashMap<GetItemsOptions, RemoteSettingsResponse>>,
+ pub get_attachment_responses: Mutex<HashMap<String, Vec<u8>>>,
+}
+
+impl RemoteSettingsWarmUpClient {
+ pub fn new() -> Self {
+ Self {
+ client: Client::new(RemoteSettingsConfig {
+ server_url: None,
+ bucket_name: None,
+ collection_name: crate::rs::REMOTE_SETTINGS_COLLECTION.into(),
+ })
+ .unwrap(),
+ get_records_responses: Mutex::new(HashMap::new()),
+ get_attachment_responses: Mutex::new(HashMap::new()),
+ }
+ }
+}
+
+impl Default for RemoteSettingsWarmUpClient {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl SuggestRemoteSettingsClient for RemoteSettingsWarmUpClient {
+ fn get_records_with_options(
+ &self,
+ options: &GetItemsOptions,
+ ) -> Result<RemoteSettingsResponse> {
+ let response = self.client.get_records_with_options(options)?;
+ self.get_records_responses
+ .lock()
+ .insert(options.clone(), response.clone());
+ Ok(response)
+ }
+
+ fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+ let response = self.client.get_attachment(location)?;
+ self.get_attachment_responses
+ .lock()
+ .insert(location.to_string(), response.clone());
+ Ok(response)
+ }
+}
+
+#[derive(Clone)]
+pub struct RemoteSettingsBenchmarkClient {
+ pub get_records_responses: HashMap<GetItemsOptions, RemoteSettingsResponse>,
+ pub get_attachment_responses: HashMap<String, Vec<u8>>,
+}
+
+impl SuggestRemoteSettingsClient for RemoteSettingsBenchmarkClient {
+ fn get_records_with_options(
+ &self,
+ options: &GetItemsOptions,
+ ) -> Result<RemoteSettingsResponse> {
+ Ok(self
+ .get_records_responses
+ .get(options)
+ .unwrap_or_else(|| panic!("options not found: {options:?}"))
+ .clone())
+ }
+
+ fn get_attachment(&self, location: &str) -> Result<Vec<u8>> {
+ Ok(self
+ .get_attachment_responses
+ .get(location)
+ .unwrap_or_else(|| panic!("location not found: {location:?}"))
+ .clone())
+ }
+}
+
+impl From<RemoteSettingsWarmUpClient> for RemoteSettingsBenchmarkClient {
+ fn from(warm_up_client: RemoteSettingsWarmUpClient) -> Self {
+ Self {
+ get_records_responses: warm_up_client.get_records_responses.into_inner(),
+ get_attachment_responses: warm_up_client.get_attachment_responses.into_inner(),
+ }
+ }
+}
diff --git a/third_party/rust/suggest/src/benchmarks/ingest.rs b/third_party/rust/suggest/src/benchmarks/ingest.rs
new file mode 100644
index 0000000000..bbefc6a00a
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/ingest.rs
@@ -0,0 +1,116 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use crate::{
+ benchmarks::{
+ client::{RemoteSettingsBenchmarkClient, RemoteSettingsWarmUpClient},
+ BenchmarkWithInput,
+ },
+ rs::SuggestRecordType,
+ store::SuggestStoreInner,
+ SuggestIngestionConstraints,
+};
+use std::sync::atomic::{AtomicU32, Ordering};
+
+static DB_FILE_COUNTER: AtomicU32 = AtomicU32::new(0);
+
+pub struct IngestBenchmark {
+ temp_dir: tempfile::TempDir,
+ client: RemoteSettingsBenchmarkClient,
+ record_type: SuggestRecordType,
+}
+
+impl IngestBenchmark {
+ pub fn new(record_type: SuggestRecordType) -> Self {
+ let temp_dir = tempfile::tempdir().unwrap();
+ let store = SuggestStoreInner::new(
+ temp_dir.path().join("warmup.sqlite"),
+ RemoteSettingsWarmUpClient::new(),
+ );
+ store.benchmark_ingest_records_by_type(record_type);
+ Self {
+ client: RemoteSettingsBenchmarkClient::from(store.into_settings_client()),
+ temp_dir,
+ record_type,
+ }
+ }
+}
+
+// The input for each benchmark is `SuggestStoreInner` with a fresh database.
+//
+// This is wrapped in a newtype so that it can be exposed in the public trait
+pub struct InputType(SuggestStoreInner<RemoteSettingsBenchmarkClient>);
+
+impl BenchmarkWithInput for IngestBenchmark {
+ type Input = InputType;
+
+ fn generate_input(&self) -> Self::Input {
+ let data_path = self.temp_dir.path().join(format!(
+ "db{}.sqlite",
+ DB_FILE_COUNTER.fetch_add(1, Ordering::Relaxed)
+ ));
+ let store = SuggestStoreInner::new(data_path, self.client.clone());
+ store.ensure_db_initialized();
+ InputType(store)
+ }
+
+ fn benchmarked_code(&self, input: Self::Input) {
+ let InputType(store) = input;
+ store.benchmark_ingest_records_by_type(self.record_type);
+ }
+}
+
+/// Get IngestBenchmark instances for all record types
+pub fn all_benchmarks() -> Vec<(&'static str, IngestBenchmark)> {
+ vec![
+ ("icon", IngestBenchmark::new(SuggestRecordType::Icon)),
+ (
+ "amp-wikipedia",
+ IngestBenchmark::new(SuggestRecordType::AmpWikipedia),
+ ),
+ ("amo", IngestBenchmark::new(SuggestRecordType::Amo)),
+ ("pocket", IngestBenchmark::new(SuggestRecordType::Pocket)),
+ ("yelp", IngestBenchmark::new(SuggestRecordType::Yelp)),
+ ("mdn", IngestBenchmark::new(SuggestRecordType::Mdn)),
+ ("weather", IngestBenchmark::new(SuggestRecordType::Weather)),
+ (
+ "global-config",
+ IngestBenchmark::new(SuggestRecordType::GlobalConfig),
+ ),
+ (
+ "amp-mobile",
+ IngestBenchmark::new(SuggestRecordType::AmpMobile),
+ ),
+ ]
+}
+
+pub fn print_debug_ingestion_sizes() {
+ viaduct_reqwest::use_reqwest_backend();
+ let store = SuggestStoreInner::new(
+ "file:debug_ingestion_sizes?mode=memory&cache=shared",
+ RemoteSettingsWarmUpClient::new(),
+ );
+ store
+ .ingest(SuggestIngestionConstraints::default())
+ .unwrap();
+ let table_row_counts = store.table_row_counts();
+ let client = store.into_settings_client();
+ let total_attachment_size: usize = client
+ .get_attachment_responses
+ .lock()
+ .values()
+ .map(|data| data.len())
+ .sum();
+
+ println!(
+ "Total attachment size: {}kb",
+ (total_attachment_size + 500) / 1000
+ );
+ println!();
+ println!("Database table row counts");
+ println!("-------------------------");
+ for (name, count) in table_row_counts {
+ println!("{name:30}: {count}");
+ }
+}
diff --git a/third_party/rust/suggest/src/benchmarks/mod.rs b/third_party/rust/suggest/src/benchmarks/mod.rs
new file mode 100644
index 0000000000..eb3b2e8abe
--- /dev/null
+++ b/third_party/rust/suggest/src/benchmarks/mod.rs
@@ -0,0 +1,40 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Benchmarking support
+//!
+//! Benchmarks are split up into two parts: the functions to be benchmarked live here, which the benchmarking code itself lives in `benches/bench.rs`.
+//! It's easier to write benchmarking code inside the main crate, where we have access to private items.
+//! However, it's easier to integrate with Cargo and criterion if benchmarks live in a separate crate.
+//!
+//! All benchmarks are defined as structs that implement either the [Benchmark] or [BenchmarkWithInput]
+
+pub mod client;
+pub mod ingest;
+
+/// Trait for simple benchmarks
+///
+/// This supports simple benchmarks that don't require any input. Note: global setup can be done
+/// in the `new()` method for the struct.
+pub trait Benchmark {
+ /// Perform the operations that we're benchmarking.
+ fn benchmarked_code(&self);
+}
+
+/// Trait for benchmarks that require input
+///
+/// This will run using Criterion's `iter_batched` function. Criterion will create a batch of
+/// inputs, then pass each one to benchmark.
+///
+/// This supports simple benchmarks that don't require any input. Note: global setup can be done
+/// in the `new()` method for the struct.
+pub trait BenchmarkWithInput {
+ type Input;
+
+ /// Generate the input (this is not included in the benchmark time)
+ fn generate_input(&self) -> Self::Input;
+
+ /// Perform the operations that we're benchmarking.
+ fn benchmarked_code(&self, input: Self::Input);
+}