From d8bbc7858622b6d9c278469aab701ca0b609cddf Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 15 May 2024 05:35:49 +0200 Subject: Merging upstream version 126.0. Signed-off-by: Daniel Baumann --- third_party/rust/suggest/src/benchmarks/README.md | 28 ++++++ third_party/rust/suggest/src/benchmarks/client.rs | 97 ++++++++++++++++++ third_party/rust/suggest/src/benchmarks/ingest.rs | 116 ++++++++++++++++++++++ third_party/rust/suggest/src/benchmarks/mod.rs | 40 ++++++++ 4 files changed, 281 insertions(+) create mode 100644 third_party/rust/suggest/src/benchmarks/README.md create mode 100644 third_party/rust/suggest/src/benchmarks/client.rs create mode 100644 third_party/rust/suggest/src/benchmarks/ingest.rs create mode 100644 third_party/rust/suggest/src/benchmarks/mod.rs (limited to 'third_party/rust/suggest/src/benchmarks') diff --git a/third_party/rust/suggest/src/benchmarks/README.md b/third_party/rust/suggest/src/benchmarks/README.md new file mode 100644 index 0000000000..45150d8413 --- /dev/null +++ b/third_party/rust/suggest/src/benchmarks/README.md @@ -0,0 +1,28 @@ +# Suggest benchmarking code + +Use `cargo suggest-bench` to run these benchmarks. + +The main benchmarking code lives here, while the criterion integration code lives in the `benches/` +directory. + +## Benchmarks + +### ingest-[provider-type] + +Time it takes to ingest all suggestions for a provider type on an empty database. +The bechmark downloads network resources in advance in order to exclude the network request time +from these measurements. + +### Benchmarks it would be nice to have + +- Ingestion with synthetic data. This would isolate the benchmark from changes to the RS database. +- Fetching suggestions + +## cargo suggest-debug-ingestion-sizes + +Run this to get row counts for all database tables. This can be very useful for improving +benchmarks, since targeting the tables with the largest number of rows will usually lead to the +largest improvements. + +The command also prints out the size of all remote-settings attachments, which can be good to +optimize on its own since it represents the amount of data user's need to download. diff --git a/third_party/rust/suggest/src/benchmarks/client.rs b/third_party/rust/suggest/src/benchmarks/client.rs new file mode 100644 index 0000000000..f5a21fd9cc --- /dev/null +++ b/third_party/rust/suggest/src/benchmarks/client.rs @@ -0,0 +1,97 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::{rs::SuggestRemoteSettingsClient, Result}; +use parking_lot::Mutex; +use remote_settings::{Client, GetItemsOptions, RemoteSettingsConfig, RemoteSettingsResponse}; +use std::collections::HashMap; + +/// Remotes settings client that runs during the benchmark warm-up phase. +/// +/// This should be used to run a full ingestion. +/// Then it can be converted into a [RemoteSettingsBenchmarkClient], which allows benchmark code to exclude the network request time. +/// [RemoteSettingsBenchmarkClient] implements [SuggestRemoteSettingsClient] by getting data from a HashMap rather than hitting the network. +pub struct RemoteSettingsWarmUpClient { + client: Client, + pub get_records_responses: Mutex>, + pub get_attachment_responses: Mutex>>, +} + +impl RemoteSettingsWarmUpClient { + pub fn new() -> Self { + Self { + client: Client::new(RemoteSettingsConfig { + server_url: None, + bucket_name: None, + collection_name: crate::rs::REMOTE_SETTINGS_COLLECTION.into(), + }) + .unwrap(), + get_records_responses: Mutex::new(HashMap::new()), + get_attachment_responses: Mutex::new(HashMap::new()), + } + } +} + +impl Default for RemoteSettingsWarmUpClient { + fn default() -> Self { + Self::new() + } +} + +impl SuggestRemoteSettingsClient for RemoteSettingsWarmUpClient { + fn get_records_with_options( + &self, + options: &GetItemsOptions, + ) -> Result { + let response = self.client.get_records_with_options(options)?; + self.get_records_responses + .lock() + .insert(options.clone(), response.clone()); + Ok(response) + } + + fn get_attachment(&self, location: &str) -> Result> { + let response = self.client.get_attachment(location)?; + self.get_attachment_responses + .lock() + .insert(location.to_string(), response.clone()); + Ok(response) + } +} + +#[derive(Clone)] +pub struct RemoteSettingsBenchmarkClient { + pub get_records_responses: HashMap, + pub get_attachment_responses: HashMap>, +} + +impl SuggestRemoteSettingsClient for RemoteSettingsBenchmarkClient { + fn get_records_with_options( + &self, + options: &GetItemsOptions, + ) -> Result { + Ok(self + .get_records_responses + .get(options) + .unwrap_or_else(|| panic!("options not found: {options:?}")) + .clone()) + } + + fn get_attachment(&self, location: &str) -> Result> { + Ok(self + .get_attachment_responses + .get(location) + .unwrap_or_else(|| panic!("location not found: {location:?}")) + .clone()) + } +} + +impl From for RemoteSettingsBenchmarkClient { + fn from(warm_up_client: RemoteSettingsWarmUpClient) -> Self { + Self { + get_records_responses: warm_up_client.get_records_responses.into_inner(), + get_attachment_responses: warm_up_client.get_attachment_responses.into_inner(), + } + } +} diff --git a/third_party/rust/suggest/src/benchmarks/ingest.rs b/third_party/rust/suggest/src/benchmarks/ingest.rs new file mode 100644 index 0000000000..bbefc6a00a --- /dev/null +++ b/third_party/rust/suggest/src/benchmarks/ingest.rs @@ -0,0 +1,116 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use crate::{ + benchmarks::{ + client::{RemoteSettingsBenchmarkClient, RemoteSettingsWarmUpClient}, + BenchmarkWithInput, + }, + rs::SuggestRecordType, + store::SuggestStoreInner, + SuggestIngestionConstraints, +}; +use std::sync::atomic::{AtomicU32, Ordering}; + +static DB_FILE_COUNTER: AtomicU32 = AtomicU32::new(0); + +pub struct IngestBenchmark { + temp_dir: tempfile::TempDir, + client: RemoteSettingsBenchmarkClient, + record_type: SuggestRecordType, +} + +impl IngestBenchmark { + pub fn new(record_type: SuggestRecordType) -> Self { + let temp_dir = tempfile::tempdir().unwrap(); + let store = SuggestStoreInner::new( + temp_dir.path().join("warmup.sqlite"), + RemoteSettingsWarmUpClient::new(), + ); + store.benchmark_ingest_records_by_type(record_type); + Self { + client: RemoteSettingsBenchmarkClient::from(store.into_settings_client()), + temp_dir, + record_type, + } + } +} + +// The input for each benchmark is `SuggestStoreInner` with a fresh database. +// +// This is wrapped in a newtype so that it can be exposed in the public trait +pub struct InputType(SuggestStoreInner); + +impl BenchmarkWithInput for IngestBenchmark { + type Input = InputType; + + fn generate_input(&self) -> Self::Input { + let data_path = self.temp_dir.path().join(format!( + "db{}.sqlite", + DB_FILE_COUNTER.fetch_add(1, Ordering::Relaxed) + )); + let store = SuggestStoreInner::new(data_path, self.client.clone()); + store.ensure_db_initialized(); + InputType(store) + } + + fn benchmarked_code(&self, input: Self::Input) { + let InputType(store) = input; + store.benchmark_ingest_records_by_type(self.record_type); + } +} + +/// Get IngestBenchmark instances for all record types +pub fn all_benchmarks() -> Vec<(&'static str, IngestBenchmark)> { + vec![ + ("icon", IngestBenchmark::new(SuggestRecordType::Icon)), + ( + "amp-wikipedia", + IngestBenchmark::new(SuggestRecordType::AmpWikipedia), + ), + ("amo", IngestBenchmark::new(SuggestRecordType::Amo)), + ("pocket", IngestBenchmark::new(SuggestRecordType::Pocket)), + ("yelp", IngestBenchmark::new(SuggestRecordType::Yelp)), + ("mdn", IngestBenchmark::new(SuggestRecordType::Mdn)), + ("weather", IngestBenchmark::new(SuggestRecordType::Weather)), + ( + "global-config", + IngestBenchmark::new(SuggestRecordType::GlobalConfig), + ), + ( + "amp-mobile", + IngestBenchmark::new(SuggestRecordType::AmpMobile), + ), + ] +} + +pub fn print_debug_ingestion_sizes() { + viaduct_reqwest::use_reqwest_backend(); + let store = SuggestStoreInner::new( + "file:debug_ingestion_sizes?mode=memory&cache=shared", + RemoteSettingsWarmUpClient::new(), + ); + store + .ingest(SuggestIngestionConstraints::default()) + .unwrap(); + let table_row_counts = store.table_row_counts(); + let client = store.into_settings_client(); + let total_attachment_size: usize = client + .get_attachment_responses + .lock() + .values() + .map(|data| data.len()) + .sum(); + + println!( + "Total attachment size: {}kb", + (total_attachment_size + 500) / 1000 + ); + println!(); + println!("Database table row counts"); + println!("-------------------------"); + for (name, count) in table_row_counts { + println!("{name:30}: {count}"); + } +} diff --git a/third_party/rust/suggest/src/benchmarks/mod.rs b/third_party/rust/suggest/src/benchmarks/mod.rs new file mode 100644 index 0000000000..eb3b2e8abe --- /dev/null +++ b/third_party/rust/suggest/src/benchmarks/mod.rs @@ -0,0 +1,40 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Benchmarking support +//! +//! Benchmarks are split up into two parts: the functions to be benchmarked live here, which the benchmarking code itself lives in `benches/bench.rs`. +//! It's easier to write benchmarking code inside the main crate, where we have access to private items. +//! However, it's easier to integrate with Cargo and criterion if benchmarks live in a separate crate. +//! +//! All benchmarks are defined as structs that implement either the [Benchmark] or [BenchmarkWithInput] + +pub mod client; +pub mod ingest; + +/// Trait for simple benchmarks +/// +/// This supports simple benchmarks that don't require any input. Note: global setup can be done +/// in the `new()` method for the struct. +pub trait Benchmark { + /// Perform the operations that we're benchmarking. + fn benchmarked_code(&self); +} + +/// Trait for benchmarks that require input +/// +/// This will run using Criterion's `iter_batched` function. Criterion will create a batch of +/// inputs, then pass each one to benchmark. +/// +/// This supports simple benchmarks that don't require any input. Note: global setup can be done +/// in the `new()` method for the struct. +pub trait BenchmarkWithInput { + type Input; + + /// Generate the input (this is not included in the benchmark time) + fn generate_input(&self) -> Self::Input; + + /// Perform the operations that we're benchmarking. + fn benchmarked_code(&self, input: Self::Input); +} -- cgit v1.2.3