diff options
Diffstat (limited to 'third_party/rust/prio/benches')
-rw-r--r-- | third_party/rust/prio/benches/cycle_counts.rs | 341 | ||||
-rw-r--r-- | third_party/rust/prio/benches/speed_tests.rs | 876 |
2 files changed, 1217 insertions, 0 deletions
diff --git a/third_party/rust/prio/benches/cycle_counts.rs b/third_party/rust/prio/benches/cycle_counts.rs new file mode 100644 index 0000000000..43a4ccdad0 --- /dev/null +++ b/third_party/rust/prio/benches/cycle_counts.rs @@ -0,0 +1,341 @@ +#![cfg_attr(windows, allow(dead_code))] + +use cfg_if::cfg_if; +use iai::black_box; +#[cfg(feature = "experimental")] +use prio::{ + codec::{Decode, Encode, ParameterizedDecode}, + field::{Field255, FieldElement}, + idpf::{Idpf, IdpfInput, IdpfPublicShare, RingBufferCache}, + vdaf::{poplar1::Poplar1IdpfValue, xof::Seed}, +}; +#[cfg(feature = "prio2")] +use prio::{ + field::FieldPrio2, + vdaf::{ + prio2::{Prio2, Prio2PrepareShare}, + Aggregator, Share, + }, +}; +use prio::{ + field::{random_vector, Field128, Field64}, + vdaf::{ + prio3::{Prio3, Prio3InputShare}, + Client, + }, +}; + +fn prng(size: usize) -> Vec<Field128> { + random_vector(size).unwrap() +} + +fn prng_16() -> Vec<Field128> { + prng(16) +} + +fn prng_256() -> Vec<Field128> { + prng(256) +} + +fn prng_1024() -> Vec<Field128> { + prng(1024) +} + +fn prng_4096() -> Vec<Field128> { + prng(4096) +} + +#[cfg(feature = "prio2")] +fn prio2_client(size: usize) -> Vec<Share<FieldPrio2, 32>> { + let prio2 = Prio2::new(size).unwrap(); + let input = vec![0u32; size]; + let nonce = [0; 16]; + prio2.shard(&black_box(input), &black_box(nonce)).unwrap().1 +} + +#[cfg(feature = "prio2")] +fn prio2_client_10() -> Vec<Share<FieldPrio2, 32>> { + prio2_client(10) +} + +#[cfg(feature = "prio2")] +fn prio2_client_100() -> Vec<Share<FieldPrio2, 32>> { + prio2_client(100) +} + +#[cfg(feature = "prio2")] +fn prio2_client_1000() -> Vec<Share<FieldPrio2, 32>> { + prio2_client(1000) +} + +#[cfg(feature = "prio2")] +fn prio2_shard_and_prepare(size: usize) -> Prio2PrepareShare { + let prio2 = Prio2::new(size).unwrap(); + let input = vec![0u32; size]; + let nonce = [0; 16]; + let (public_share, input_shares) = prio2.shard(&black_box(input), &black_box(nonce)).unwrap(); + prio2 + .prepare_init(&[0; 32], 0, &(), &nonce, &public_share, &input_shares[0]) + .unwrap() + .1 +} + +#[cfg(feature = "prio2")] +fn prio2_shard_and_prepare_10() -> Prio2PrepareShare { + prio2_shard_and_prepare(10) +} + +#[cfg(feature = "prio2")] +fn prio2_shard_and_prepare_100() -> Prio2PrepareShare { + prio2_shard_and_prepare(100) +} + +#[cfg(feature = "prio2")] +fn prio2_shard_and_prepare_1000() -> Prio2PrepareShare { + prio2_shard_and_prepare(1000) +} + +fn prio3_client_count() -> Vec<Prio3InputShare<Field64, 16>> { + let prio3 = Prio3::new_count(2).unwrap(); + let measurement = 1; + let nonce = [0; 16]; + prio3 + .shard(&black_box(measurement), &black_box(nonce)) + .unwrap() + .1 +} + +fn prio3_client_histogram_10() -> Vec<Prio3InputShare<Field128, 16>> { + let prio3 = Prio3::new_histogram(2, 10, 3).unwrap(); + let measurement = 9; + let nonce = [0; 16]; + prio3 + .shard(&black_box(measurement), &black_box(nonce)) + .unwrap() + .1 +} + +fn prio3_client_sum_32() -> Vec<Prio3InputShare<Field128, 16>> { + let prio3 = Prio3::new_sum(2, 16).unwrap(); + let measurement = 1337; + let nonce = [0; 16]; + prio3 + .shard(&black_box(measurement), &black_box(nonce)) + .unwrap() + .1 +} + +fn prio3_client_count_vec_1000() -> Vec<Prio3InputShare<Field128, 16>> { + let len = 1000; + let prio3 = Prio3::new_sum_vec(2, 1, len, 31).unwrap(); + let measurement = vec![0; len]; + let nonce = [0; 16]; + prio3 + .shard(&black_box(measurement), &black_box(nonce)) + .unwrap() + .1 +} + +#[cfg(feature = "multithreaded")] +fn prio3_client_count_vec_multithreaded_1000() -> Vec<Prio3InputShare<Field128, 16>> { + let len = 1000; + let prio3 = Prio3::new_sum_vec_multithreaded(2, 1, len, 31).unwrap(); + let measurement = vec![0; len]; + let nonce = [0; 16]; + prio3 + .shard(&black_box(measurement), &black_box(nonce)) + .unwrap() + .1 +} + +#[cfg(feature = "experimental")] +fn idpf_poplar_gen( + input: &IdpfInput, + inner_values: Vec<Poplar1IdpfValue<Field64>>, + leaf_value: Poplar1IdpfValue<Field255>, +) { + let idpf = Idpf::new((), ()); + idpf.gen(input, inner_values, leaf_value, &[0; 16]).unwrap(); +} + +#[cfg(feature = "experimental")] +fn idpf_poplar_gen_8() { + let input = IdpfInput::from_bytes(b"A"); + let one = Field64::one(); + idpf_poplar_gen( + &input, + vec![Poplar1IdpfValue::new([one, one]); 7], + Poplar1IdpfValue::new([Field255::one(), Field255::one()]), + ); +} + +#[cfg(feature = "experimental")] +fn idpf_poplar_gen_128() { + let input = IdpfInput::from_bytes(b"AAAAAAAAAAAAAAAA"); + let one = Field64::one(); + idpf_poplar_gen( + &input, + vec![Poplar1IdpfValue::new([one, one]); 127], + Poplar1IdpfValue::new([Field255::one(), Field255::one()]), + ); +} + +#[cfg(feature = "experimental")] +fn idpf_poplar_gen_2048() { + let input = IdpfInput::from_bytes(&[0x41; 256]); + let one = Field64::one(); + idpf_poplar_gen( + &input, + vec![Poplar1IdpfValue::new([one, one]); 2047], + Poplar1IdpfValue::new([Field255::one(), Field255::one()]), + ); +} + +#[cfg(feature = "experimental")] +fn idpf_poplar_eval( + input: &IdpfInput, + public_share: &IdpfPublicShare<Poplar1IdpfValue<Field64>, Poplar1IdpfValue<Field255>>, + key: &Seed<16>, +) { + let mut cache = RingBufferCache::new(1); + let idpf = Idpf::new((), ()); + idpf.eval(0, public_share, key, input, &[0; 16], &mut cache) + .unwrap(); +} + +#[cfg(feature = "experimental")] +fn idpf_poplar_eval_8() { + let input = IdpfInput::from_bytes(b"A"); + let public_share = IdpfPublicShare::get_decoded_with_param(&8, &[0x7f; 306]).unwrap(); + let key = Seed::get_decoded(&[0xff; 16]).unwrap(); + idpf_poplar_eval(&input, &public_share, &key); +} + +#[cfg(feature = "experimental")] +fn idpf_poplar_eval_128() { + let input = IdpfInput::from_bytes(b"AAAAAAAAAAAAAAAA"); + let public_share = IdpfPublicShare::get_decoded_with_param(&128, &[0x7f; 4176]).unwrap(); + let key = Seed::get_decoded(&[0xff; 16]).unwrap(); + idpf_poplar_eval(&input, &public_share, &key); +} + +#[cfg(feature = "experimental")] +fn idpf_poplar_eval_2048() { + let input = IdpfInput::from_bytes(&[0x41; 256]); + let public_share = IdpfPublicShare::get_decoded_with_param(&2048, &[0x7f; 66096]).unwrap(); + let key = Seed::get_decoded(&[0xff; 16]).unwrap(); + idpf_poplar_eval(&input, &public_share, &key); +} + +#[cfg(feature = "experimental")] +fn idpf_codec() { + let data = hex::decode(concat!( + "9a", + "0000000000000000000000000000000000000000000000", + "01eb3a1bd6b5fa4a4500000000000000000000000000000000", + "ffffffff0000000022522c3fd5a33cac00000000000000000000000000000000", + "ffffffff0000000069f41eee46542b6900000000000000000000000000000000", + "00000000000000000000000000000000000000000000000000000000000000", + "017d1fd6df94280145a0dcc933ceb706e9219d50e7c4f92fd8ca9a0ffb7d819646", + )) + .unwrap(); + let bits = 4; + let public_share = IdpfPublicShare::<Poplar1IdpfValue<Field64>, Poplar1IdpfValue<Field255>>::get_decoded_with_param(&bits, &data).unwrap(); + let encoded = public_share.get_encoded(); + let _ = black_box(encoded.len()); +} + +macro_rules! main_base { + ( $( $func_name:ident ),* $(,)* ) => { + iai::main!( + prng_16, + prng_256, + prng_1024, + prng_4096, + prio3_client_count, + prio3_client_histogram_10, + prio3_client_sum_32, + prio3_client_count_vec_1000, + $( $func_name, )* + ); + }; +} + +#[cfg(feature = "prio2")] +macro_rules! main_add_prio2 { + ( $( $func_name:ident ),* $(,)* ) => { + main_base!( + prio2_client_10, + prio2_client_100, + prio2_client_1000, + prio2_shard_and_prepare_10, + prio2_shard_and_prepare_100, + prio2_shard_and_prepare_1000, + $( $func_name, )* + ); + }; +} + +#[cfg(not(feature = "prio2"))] +macro_rules! main_add_prio2 { + ( $( $func_name:ident ),* $(,)* ) => { + main_base!( + $( $func_name, )* + ); + }; +} + +#[cfg(feature = "multithreaded")] +macro_rules! main_add_multithreaded { + ( $( $func_name:ident ),* $(,)* ) => { + main_add_prio2!( + prio3_client_count_vec_multithreaded_1000, + $( $func_name, )* + ); + }; +} + +#[cfg(not(feature = "multithreaded"))] +macro_rules! main_add_multithreaded { + ( $( $func_name:ident ),* $(,)* ) => { + main_add_prio2!( + $( $func_name, )* + ); + }; +} + +#[cfg(feature = "experimental")] +macro_rules! main_add_experimental { + ( $( $func_name:ident ),* $(,)* ) => { + main_add_multithreaded!( + idpf_codec, + idpf_poplar_gen_8, + idpf_poplar_gen_128, + idpf_poplar_gen_2048, + idpf_poplar_eval_8, + idpf_poplar_eval_128, + idpf_poplar_eval_2048, + $( $func_name, )* + ); + }; +} + +#[cfg(not(feature = "experimental"))] +macro_rules! main_add_experimental { + ( $( $func_name:ident ),* $(,)* ) => { + main_add_multithreaded!( + $( $func_name, )* + ); + }; +} + +cfg_if! { + if #[cfg(windows)] { + fn main() { + eprintln!("Cycle count benchmarks are not supported on Windows."); + } + } + else { + main_add_experimental!(); + } +} diff --git a/third_party/rust/prio/benches/speed_tests.rs b/third_party/rust/prio/benches/speed_tests.rs new file mode 100644 index 0000000000..66458b1ada --- /dev/null +++ b/third_party/rust/prio/benches/speed_tests.rs @@ -0,0 +1,876 @@ +// SPDX-License-Identifier: MPL-2.0 + +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +#[cfg(feature = "experimental")] +use criterion::{BatchSize, Throughput}; +#[cfg(feature = "experimental")] +use fixed::types::{I1F15, I1F31}; +#[cfg(feature = "experimental")] +use fixed_macro::fixed; +#[cfg(feature = "experimental")] +use num_bigint::BigUint; +#[cfg(feature = "experimental")] +use num_rational::Ratio; +#[cfg(feature = "experimental")] +use num_traits::ToPrimitive; +#[cfg(feature = "experimental")] +use prio::dp::distributions::DiscreteGaussian; +#[cfg(feature = "prio2")] +use prio::vdaf::prio2::Prio2; +use prio::{ + benchmarked::*, + field::{random_vector, Field128 as F, FieldElement}, + flp::gadgets::Mul, + vdaf::{prio3::Prio3, Aggregator, Client}, +}; +#[cfg(feature = "experimental")] +use prio::{ + field::{Field255, Field64}, + flp::types::fixedpoint_l2::FixedPointBoundedL2VecSum, + idpf::{Idpf, IdpfInput, RingBufferCache}, + vdaf::poplar1::{Poplar1, Poplar1AggregationParam, Poplar1IdpfValue}, +}; +#[cfg(feature = "experimental")] +use rand::prelude::*; +#[cfg(feature = "experimental")] +use std::iter; +use std::time::Duration; +#[cfg(feature = "experimental")] +use zipf::ZipfDistribution; + +/// Seed for generation of random benchmark inputs. +/// +/// A fixed RNG seed is used to generate inputs in order to minimize run-to-run variability. The +/// seed value may be freely changed to get a different set of inputs. +#[cfg(feature = "experimental")] +const RNG_SEED: u64 = 0; + +/// Speed test for generating a seed and deriving a pseudorandom sequence of field elements. +fn prng(c: &mut Criterion) { + let mut group = c.benchmark_group("rand"); + let test_sizes = [16, 256, 1024, 4096]; + for size in test_sizes { + group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, size| { + b.iter(|| random_vector::<F>(*size)) + }); + } + group.finish(); +} + +/// Speed test for generating samples from the discrete gaussian distribution using different +/// standard deviations. +#[cfg(feature = "experimental")] +pub fn dp_noise(c: &mut Criterion) { + let mut group = c.benchmark_group("dp_noise"); + let mut rng = StdRng::seed_from_u64(RNG_SEED); + + let test_stds = [ + Ratio::<BigUint>::from_integer(BigUint::from(u128::MAX)).pow(2), + Ratio::<BigUint>::from_integer(BigUint::from(u64::MAX)), + Ratio::<BigUint>::from_integer(BigUint::from(u32::MAX)), + Ratio::<BigUint>::from_integer(BigUint::from(5u8)), + Ratio::<BigUint>::new(BigUint::from(10000u32), BigUint::from(23u32)), + ]; + for std in test_stds { + let sampler = DiscreteGaussian::new(std.clone()).unwrap(); + group.bench_function( + BenchmarkId::new("discrete_gaussian", std.to_f64().unwrap_or(f64::INFINITY)), + |b| b.iter(|| sampler.sample(&mut rng)), + ); + } + group.finish(); +} + +/// The asymptotic cost of polynomial multiplication is `O(n log n)` using FFT and `O(n^2)` using +/// the naive method. This benchmark demonstrates that the latter has better concrete performance +/// for small polynomials. The result is used to pick the `FFT_THRESHOLD` constant in +/// `src/flp/gadgets.rs`. +fn poly_mul(c: &mut Criterion) { + let test_sizes = [1_usize, 30, 60, 90, 120, 150]; + + let mut group = c.benchmark_group("poly_mul"); + for size in test_sizes { + group.bench_with_input(BenchmarkId::new("fft", size), &size, |b, size| { + let m = (size + 1).next_power_of_two(); + let mut g: Mul<F> = Mul::new(*size); + let mut outp = vec![F::zero(); 2 * m]; + let inp = vec![random_vector(m).unwrap(); 2]; + + b.iter(|| { + benchmarked_gadget_mul_call_poly_fft(&mut g, &mut outp, &inp).unwrap(); + }) + }); + + group.bench_with_input(BenchmarkId::new("direct", size), &size, |b, size| { + let m = (size + 1).next_power_of_two(); + let mut g: Mul<F> = Mul::new(*size); + let mut outp = vec![F::zero(); 2 * m]; + let inp = vec![random_vector(m).unwrap(); 2]; + + b.iter(|| { + benchmarked_gadget_mul_call_poly_direct(&mut g, &mut outp, &inp).unwrap(); + }) + }); + } + group.finish(); +} + +/// Benchmark prio2. +#[cfg(feature = "prio2")] +fn prio2(c: &mut Criterion) { + let mut group = c.benchmark_group("prio2_shard"); + for input_length in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::from_parameter(input_length), + &input_length, + |b, input_length| { + let vdaf = Prio2::new(*input_length).unwrap(); + let measurement = (0..u32::try_from(*input_length).unwrap()) + .map(|i| i & 1) + .collect::<Vec<_>>(); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }, + ); + } + group.finish(); + + let mut group = c.benchmark_group("prio2_prepare_init"); + for input_length in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::from_parameter(input_length), + &input_length, + |b, input_length| { + let vdaf = Prio2::new(*input_length).unwrap(); + let measurement = (0..u32::try_from(*input_length).unwrap()) + .map(|i| i & 1) + .collect::<Vec<_>>(); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 32]); + let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0]) + .unwrap(); + }); + }, + ); + } + group.finish(); +} + +/// Benchmark prio3. +fn prio3(c: &mut Criterion) { + let num_shares = 2; + + c.bench_function("prio3count_shard", |b| { + let vdaf = Prio3::new_count(num_shares).unwrap(); + let measurement = black_box(1); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }); + + c.bench_function("prio3count_prepare_init", |b| { + let vdaf = Prio3::new_count(num_shares).unwrap(); + let measurement = black_box(1); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0]) + .unwrap() + }); + }); + + let mut group = c.benchmark_group("prio3sum_shard"); + for bits in [8, 32] { + group.bench_with_input(BenchmarkId::from_parameter(bits), &bits, |b, bits| { + let vdaf = Prio3::new_sum(num_shares, *bits).unwrap(); + let measurement = (1 << bits) - 1; + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }); + } + group.finish(); + + let mut group = c.benchmark_group("prio3sum_prepare_init"); + for bits in [8, 32] { + group.bench_with_input(BenchmarkId::from_parameter(bits), &bits, |b, bits| { + let vdaf = Prio3::new_sum(num_shares, *bits).unwrap(); + let measurement = (1 << bits) - 1; + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0]) + .unwrap() + }); + }); + } + group.finish(); + + let mut group = c.benchmark_group("prio3sumvec_shard"); + for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] { + group.bench_with_input( + BenchmarkId::new("serial", input_length), + &(input_length, chunk_length), + |b, (input_length, chunk_length)| { + let vdaf = Prio3::new_sum_vec(num_shares, 1, *input_length, *chunk_length).unwrap(); + let measurement = (0..u128::try_from(*input_length).unwrap()) + .map(|i| i & 1) + .collect::<Vec<_>>(); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }, + ); + } + + #[cfg(feature = "multithreaded")] + { + for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] { + group.bench_with_input( + BenchmarkId::new("parallel", input_length), + &(input_length, chunk_length), + |b, (input_length, chunk_length)| { + let vdaf = Prio3::new_sum_vec_multithreaded( + num_shares, + 1, + *input_length, + *chunk_length, + ) + .unwrap(); + let measurement = (0..u128::try_from(*input_length).unwrap()) + .map(|i| i & 1) + .collect::<Vec<_>>(); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }, + ); + } + } + group.finish(); + + let mut group = c.benchmark_group("prio3sumvec_prepare_init"); + for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] { + group.bench_with_input( + BenchmarkId::new("serial", input_length), + &(input_length, chunk_length), + |b, (input_length, chunk_length)| { + let vdaf = Prio3::new_sum_vec(num_shares, 1, *input_length, *chunk_length).unwrap(); + let measurement = (0..u128::try_from(*input_length).unwrap()) + .map(|i| i & 1) + .collect::<Vec<_>>(); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0]) + .unwrap() + }); + }, + ); + } + + #[cfg(feature = "multithreaded")] + { + for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] { + group.bench_with_input( + BenchmarkId::new("parallel", input_length), + &(input_length, chunk_length), + |b, (input_length, chunk_length)| { + let vdaf = Prio3::new_sum_vec_multithreaded( + num_shares, + 1, + *input_length, + *chunk_length, + ) + .unwrap(); + let measurement = (0..u128::try_from(*input_length).unwrap()) + .map(|i| i & 1) + .collect::<Vec<_>>(); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init( + &verify_key, + 0, + &(), + &nonce, + &public_share, + &input_shares[0], + ) + .unwrap() + }); + }, + ); + } + } + group.finish(); + + let mut group = c.benchmark_group("prio3histogram_shard"); + for (input_length, chunk_length) in [ + (10, 3), + (100, 10), + (1_000, 31), + (10_000, 100), + (100_000, 316), + ] { + if input_length >= 100_000 { + group.measurement_time(Duration::from_secs(15)); + } + group.bench_with_input( + BenchmarkId::new("serial", input_length), + &(input_length, chunk_length), + |b, (input_length, chunk_length)| { + let vdaf = Prio3::new_histogram(num_shares, *input_length, *chunk_length).unwrap(); + let measurement = black_box(0); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }, + ); + } + + #[cfg(feature = "multithreaded")] + { + for (input_length, chunk_length) in [ + (10, 3), + (100, 10), + (1_000, 31), + (10_000, 100), + (100_000, 316), + ] { + if input_length >= 100_000 { + group.measurement_time(Duration::from_secs(15)); + } + group.bench_with_input( + BenchmarkId::new("parallel", input_length), + &(input_length, chunk_length), + |b, (input_length, chunk_length)| { + let vdaf = Prio3::new_histogram_multithreaded( + num_shares, + *input_length, + *chunk_length, + ) + .unwrap(); + let measurement = black_box(0); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }, + ); + } + } + group.finish(); + + let mut group = c.benchmark_group("prio3histogram_prepare_init"); + for (input_length, chunk_length) in [ + (10, 3), + (100, 10), + (1_000, 31), + (10_000, 100), + (100_000, 316), + ] { + if input_length >= 100_000 { + group.measurement_time(Duration::from_secs(15)); + } + group.bench_with_input( + BenchmarkId::new("serial", input_length), + &(input_length, chunk_length), + |b, (input_length, chunk_length)| { + let vdaf = Prio3::new_histogram(num_shares, *input_length, *chunk_length).unwrap(); + let measurement = black_box(0); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0]) + .unwrap() + }); + }, + ); + } + + #[cfg(feature = "multithreaded")] + { + for (input_length, chunk_length) in [ + (10, 3), + (100, 10), + (1_000, 31), + (10_000, 100), + (100_000, 316), + ] { + if input_length >= 100_000 { + group.measurement_time(Duration::from_secs(15)); + } + group.bench_with_input( + BenchmarkId::new("parallel", input_length), + &(input_length, chunk_length), + |b, (input_length, chunk_length)| { + let vdaf = Prio3::new_histogram_multithreaded( + num_shares, + *input_length, + *chunk_length, + ) + .unwrap(); + let measurement = black_box(0); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init( + &verify_key, + 0, + &(), + &nonce, + &public_share, + &input_shares[0], + ) + .unwrap() + }); + }, + ); + } + } + group.finish(); + + #[cfg(feature = "experimental")] + { + let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f15_shard"); + for dimension in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::new("serial", dimension), + &dimension, + |b, dimension| { + let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> = + Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap(); + let mut measurement = vec![fixed!(0: I1F15); *dimension]; + measurement[0] = fixed!(0.5: I1F15); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }, + ); + } + + #[cfg(feature = "multithreaded")] + { + for dimension in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::new("parallel", dimension), + &dimension, + |b, dimension| { + let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> = + Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded( + num_shares, *dimension, + ) + .unwrap(); + let mut measurement = vec![fixed!(0: I1F15); *dimension]; + measurement[0] = fixed!(0.5: I1F15); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }, + ); + } + } + group.finish(); + + let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f15_prepare_init"); + for dimension in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::new("series", dimension), + &dimension, + |b, dimension| { + let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> = + Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap(); + let mut measurement = vec![fixed!(0: I1F15); *dimension]; + measurement[0] = fixed!(0.5: I1F15); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init( + &verify_key, + 0, + &(), + &nonce, + &public_share, + &input_shares[0], + ) + .unwrap() + }); + }, + ); + } + + #[cfg(feature = "multithreaded")] + { + for dimension in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::new("parallel", dimension), + &dimension, + |b, dimension| { + let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> = + Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded( + num_shares, *dimension, + ) + .unwrap(); + let mut measurement = vec![fixed!(0: I1F15); *dimension]; + measurement[0] = fixed!(0.5: I1F15); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = + vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init( + &verify_key, + 0, + &(), + &nonce, + &public_share, + &input_shares[0], + ) + .unwrap() + }); + }, + ); + } + } + group.finish(); + + let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f31_shard"); + for dimension in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::new("serial", dimension), + &dimension, + |b, dimension| { + let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> = + Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap(); + let mut measurement = vec![fixed!(0: I1F31); *dimension]; + measurement[0] = fixed!(0.5: I1F31); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }, + ); + } + + #[cfg(feature = "multithreaded")] + { + for dimension in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::new("parallel", dimension), + &dimension, + |b, dimension| { + let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> = + Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded( + num_shares, *dimension, + ) + .unwrap(); + let mut measurement = vec![fixed!(0: I1F31); *dimension]; + measurement[0] = fixed!(0.5: I1F31); + let nonce = black_box([0u8; 16]); + b.iter(|| vdaf.shard(&measurement, &nonce).unwrap()); + }, + ); + } + } + group.finish(); + + let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f31_prepare_init"); + for dimension in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::new("series", dimension), + &dimension, + |b, dimension| { + let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> = + Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap(); + let mut measurement = vec![fixed!(0: I1F31); *dimension]; + measurement[0] = fixed!(0.5: I1F31); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init( + &verify_key, + 0, + &(), + &nonce, + &public_share, + &input_shares[0], + ) + .unwrap() + }); + }, + ); + } + + #[cfg(feature = "multithreaded")] + { + for dimension in [10, 100, 1_000] { + group.bench_with_input( + BenchmarkId::new("parallel", dimension), + &dimension, + |b, dimension| { + let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> = + Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded( + num_shares, *dimension, + ) + .unwrap(); + let mut measurement = vec![fixed!(0: I1F31); *dimension]; + measurement[0] = fixed!(0.5: I1F31); + let nonce = black_box([0u8; 16]); + let verify_key = black_box([0u8; 16]); + let (public_share, input_shares) = + vdaf.shard(&measurement, &nonce).unwrap(); + b.iter(|| { + vdaf.prepare_init( + &verify_key, + 0, + &(), + &nonce, + &public_share, + &input_shares[0], + ) + .unwrap() + }); + }, + ); + } + } + group.finish(); + } +} + +/// Benchmark IdpfPoplar performance. +#[cfg(feature = "experimental")] +fn idpf(c: &mut Criterion) { + let test_sizes = [8usize, 8 * 16, 8 * 256]; + + let mut group = c.benchmark_group("idpf_gen"); + for size in test_sizes.iter() { + group.throughput(Throughput::Bytes(*size as u64 / 8)); + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let bits = iter::repeat_with(random).take(size).collect::<Vec<bool>>(); + let input = IdpfInput::from_bools(&bits); + + let inner_values = random_vector::<Field64>(size - 1) + .unwrap() + .into_iter() + .map(|random_element| Poplar1IdpfValue::new([Field64::one(), random_element])) + .collect::<Vec<_>>(); + let leaf_value = Poplar1IdpfValue::new([Field255::one(), random_vector(1).unwrap()[0]]); + + let idpf = Idpf::new((), ()); + b.iter(|| { + idpf.gen(&input, inner_values.clone(), leaf_value, &[0; 16]) + .unwrap(); + }); + }); + } + group.finish(); + + let mut group = c.benchmark_group("idpf_eval"); + for size in test_sizes.iter() { + group.throughput(Throughput::Bytes(*size as u64 / 8)); + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let bits = iter::repeat_with(random).take(size).collect::<Vec<bool>>(); + let input = IdpfInput::from_bools(&bits); + + let inner_values = random_vector::<Field64>(size - 1) + .unwrap() + .into_iter() + .map(|random_element| Poplar1IdpfValue::new([Field64::one(), random_element])) + .collect::<Vec<_>>(); + let leaf_value = Poplar1IdpfValue::new([Field255::one(), random_vector(1).unwrap()[0]]); + + let idpf = Idpf::new((), ()); + let (public_share, keys) = idpf + .gen(&input, inner_values, leaf_value, &[0; 16]) + .unwrap(); + + b.iter(|| { + // This is an aggressively small cache, to minimize its impact on the benchmark. + // In this synthetic benchmark, we are only checking one candidate prefix per level + // (typically there are many candidate prefixes per level) so the cache hit rate + // will be unaffected. + let mut cache = RingBufferCache::new(1); + + for prefix_length in 1..=size { + let prefix = input[..prefix_length].to_owned().into(); + idpf.eval(0, &public_share, &keys[0], &prefix, &[0; 16], &mut cache) + .unwrap(); + } + }); + }); + } + group.finish(); +} + +/// Benchmark Poplar1. +#[cfg(feature = "experimental")] +fn poplar1(c: &mut Criterion) { + let test_sizes = [16_usize, 128, 256]; + + let mut group = c.benchmark_group("poplar1_shard"); + for size in test_sizes.iter() { + group.throughput(Throughput::Bytes(*size as u64 / 8)); + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let vdaf = Poplar1::new_shake128(size); + let mut rng = StdRng::seed_from_u64(RNG_SEED); + let nonce = rng.gen::<[u8; 16]>(); + + b.iter_batched( + || { + let bits = iter::repeat_with(|| rng.gen()) + .take(size) + .collect::<Vec<bool>>(); + IdpfInput::from_bools(&bits) + }, + |measurement| { + vdaf.shard(&measurement, &nonce).unwrap(); + }, + BatchSize::SmallInput, + ); + }); + } + group.finish(); + + let mut group = c.benchmark_group("poplar1_prepare_init"); + for size in test_sizes.iter() { + group.measurement_time(Duration::from_secs(30)); // slower benchmark + group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| { + let vdaf = Poplar1::new_shake128(size); + let mut rng = StdRng::seed_from_u64(RNG_SEED); + + b.iter_batched( + || { + let verify_key: [u8; 16] = rng.gen(); + let nonce: [u8; 16] = rng.gen(); + + // Parameters are chosen to match Chris Wood's experimental setup: + // https://github.com/chris-wood/heavy-hitter-comparison + let (measurements, prefix_tree) = poplar1_generate_zipf_distributed_batch( + &mut rng, // rng + size, // bits + 10, // threshold + 1000, // number of measurements + 128, // Zipf support + 1.03, // Zipf exponent + ); + + // We are benchmarking preparation of a single report. For this test, it doesn't matter + // which measurement we generate a report for, so pick the first measurement + // arbitrarily. + let (public_share, input_shares) = + vdaf.shard(&measurements[0], &nonce).unwrap(); + + // For the aggregation paramter, we use the candidate prefixes from the prefix tree + // for the sampled measurements. Run preparation for the last step, which ought to + // represent the worst-case performance. + let agg_param = + Poplar1AggregationParam::try_from_prefixes(prefix_tree[size - 1].clone()) + .unwrap(); + + ( + verify_key, + nonce, + agg_param, + public_share, + input_shares.into_iter().next().unwrap(), + ) + }, + |(verify_key, nonce, agg_param, public_share, input_share)| { + vdaf.prepare_init( + &verify_key, + 0, + &agg_param, + &nonce, + &public_share, + &input_share, + ) + .unwrap(); + }, + BatchSize::SmallInput, + ); + }); + } + group.finish(); +} + +/// Generate a set of Poplar1 measurements with the given bit length `bits`. They are sampled +/// according to the Zipf distribution with parameters `zipf_support` and `zipf_exponent`. Return +/// the measurements, along with the prefix tree for the desired threshold. +/// +/// The prefix tree consists of a sequence of candidate prefixes for each level. For a given level, +/// the candidate prefixes are computed from the hit counts of the prefixes at the previous level: +/// For any prefix `p` whose hit count is at least the desired threshold, add `p || 0` and `p || 1` +/// to the list. +#[cfg(feature = "experimental")] +fn poplar1_generate_zipf_distributed_batch( + rng: &mut impl Rng, + bits: usize, + threshold: usize, + measurement_count: usize, + zipf_support: usize, + zipf_exponent: f64, +) -> (Vec<IdpfInput>, Vec<Vec<IdpfInput>>) { + // Generate random inputs. + let mut inputs = Vec::with_capacity(zipf_support); + for _ in 0..zipf_support { + let bools: Vec<bool> = (0..bits).map(|_| rng.gen()).collect(); + inputs.push(IdpfInput::from_bools(&bools)); + } + + // Sample a number of inputs according to the Zipf distribution. + let mut samples = Vec::with_capacity(measurement_count); + let zipf = ZipfDistribution::new(zipf_support, zipf_exponent).unwrap(); + for _ in 0..measurement_count { + samples.push(inputs[zipf.sample(rng) - 1].clone()); + } + + // Compute the prefix tree for the desired threshold. + let mut prefix_tree = Vec::with_capacity(bits); + prefix_tree.push(vec![ + IdpfInput::from_bools(&[false]), + IdpfInput::from_bools(&[true]), + ]); + + for level in 0..bits - 1 { + // Compute the hit count of each prefix from the previous level. + let mut hit_counts = vec![0; prefix_tree[level].len()]; + for (hit_count, prefix) in hit_counts.iter_mut().zip(prefix_tree[level].iter()) { + for sample in samples.iter() { + let mut is_prefix = true; + for j in 0..prefix.len() { + if prefix[j] != sample[j] { + is_prefix = false; + break; + } + } + if is_prefix { + *hit_count += 1; + } + } + } + + // Compute the next set of candidate prefixes. + let mut next_prefixes = Vec::new(); + for (hit_count, prefix) in hit_counts.iter().zip(prefix_tree[level].iter()) { + if *hit_count >= threshold { + next_prefixes.push(prefix.clone_with_suffix(&[false])); + next_prefixes.push(prefix.clone_with_suffix(&[true])); + } + } + prefix_tree.push(next_prefixes); + } + + (samples, prefix_tree) +} + +#[cfg(all(feature = "prio2", feature = "experimental"))] +criterion_group!(benches, poplar1, prio3, prio2, poly_mul, prng, idpf, dp_noise); +#[cfg(all(not(feature = "prio2"), feature = "experimental"))] +criterion_group!(benches, poplar1, prio3, poly_mul, prng, idpf, dp_noise); +#[cfg(all(feature = "prio2", not(feature = "experimental")))] +criterion_group!(benches, prio3, prio2, prng, poly_mul); +#[cfg(all(not(feature = "prio2"), not(feature = "experimental")))] +criterion_group!(benches, prio3, prng, poly_mul); + +criterion_main!(benches); |