summaryrefslogtreecommitdiffstats
path: root/third_party/rust/prio/benches
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/rust/prio/benches
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/prio/benches')
-rw-r--r--third_party/rust/prio/benches/cycle_counts.rs341
-rw-r--r--third_party/rust/prio/benches/speed_tests.rs876
2 files changed, 1217 insertions, 0 deletions
diff --git a/third_party/rust/prio/benches/cycle_counts.rs b/third_party/rust/prio/benches/cycle_counts.rs
new file mode 100644
index 0000000000..43a4ccdad0
--- /dev/null
+++ b/third_party/rust/prio/benches/cycle_counts.rs
@@ -0,0 +1,341 @@
+#![cfg_attr(windows, allow(dead_code))]
+
+use cfg_if::cfg_if;
+use iai::black_box;
+#[cfg(feature = "experimental")]
+use prio::{
+ codec::{Decode, Encode, ParameterizedDecode},
+ field::{Field255, FieldElement},
+ idpf::{Idpf, IdpfInput, IdpfPublicShare, RingBufferCache},
+ vdaf::{poplar1::Poplar1IdpfValue, xof::Seed},
+};
+#[cfg(feature = "prio2")]
+use prio::{
+ field::FieldPrio2,
+ vdaf::{
+ prio2::{Prio2, Prio2PrepareShare},
+ Aggregator, Share,
+ },
+};
+use prio::{
+ field::{random_vector, Field128, Field64},
+ vdaf::{
+ prio3::{Prio3, Prio3InputShare},
+ Client,
+ },
+};
+
+fn prng(size: usize) -> Vec<Field128> {
+ random_vector(size).unwrap()
+}
+
+fn prng_16() -> Vec<Field128> {
+ prng(16)
+}
+
+fn prng_256() -> Vec<Field128> {
+ prng(256)
+}
+
+fn prng_1024() -> Vec<Field128> {
+ prng(1024)
+}
+
+fn prng_4096() -> Vec<Field128> {
+ prng(4096)
+}
+
+#[cfg(feature = "prio2")]
+fn prio2_client(size: usize) -> Vec<Share<FieldPrio2, 32>> {
+ let prio2 = Prio2::new(size).unwrap();
+ let input = vec![0u32; size];
+ let nonce = [0; 16];
+ prio2.shard(&black_box(input), &black_box(nonce)).unwrap().1
+}
+
+#[cfg(feature = "prio2")]
+fn prio2_client_10() -> Vec<Share<FieldPrio2, 32>> {
+ prio2_client(10)
+}
+
+#[cfg(feature = "prio2")]
+fn prio2_client_100() -> Vec<Share<FieldPrio2, 32>> {
+ prio2_client(100)
+}
+
+#[cfg(feature = "prio2")]
+fn prio2_client_1000() -> Vec<Share<FieldPrio2, 32>> {
+ prio2_client(1000)
+}
+
+#[cfg(feature = "prio2")]
+fn prio2_shard_and_prepare(size: usize) -> Prio2PrepareShare {
+ let prio2 = Prio2::new(size).unwrap();
+ let input = vec![0u32; size];
+ let nonce = [0; 16];
+ let (public_share, input_shares) = prio2.shard(&black_box(input), &black_box(nonce)).unwrap();
+ prio2
+ .prepare_init(&[0; 32], 0, &(), &nonce, &public_share, &input_shares[0])
+ .unwrap()
+ .1
+}
+
+#[cfg(feature = "prio2")]
+fn prio2_shard_and_prepare_10() -> Prio2PrepareShare {
+ prio2_shard_and_prepare(10)
+}
+
+#[cfg(feature = "prio2")]
+fn prio2_shard_and_prepare_100() -> Prio2PrepareShare {
+ prio2_shard_and_prepare(100)
+}
+
+#[cfg(feature = "prio2")]
+fn prio2_shard_and_prepare_1000() -> Prio2PrepareShare {
+ prio2_shard_and_prepare(1000)
+}
+
+fn prio3_client_count() -> Vec<Prio3InputShare<Field64, 16>> {
+ let prio3 = Prio3::new_count(2).unwrap();
+ let measurement = 1;
+ let nonce = [0; 16];
+ prio3
+ .shard(&black_box(measurement), &black_box(nonce))
+ .unwrap()
+ .1
+}
+
+fn prio3_client_histogram_10() -> Vec<Prio3InputShare<Field128, 16>> {
+ let prio3 = Prio3::new_histogram(2, 10, 3).unwrap();
+ let measurement = 9;
+ let nonce = [0; 16];
+ prio3
+ .shard(&black_box(measurement), &black_box(nonce))
+ .unwrap()
+ .1
+}
+
+fn prio3_client_sum_32() -> Vec<Prio3InputShare<Field128, 16>> {
+ let prio3 = Prio3::new_sum(2, 16).unwrap();
+ let measurement = 1337;
+ let nonce = [0; 16];
+ prio3
+ .shard(&black_box(measurement), &black_box(nonce))
+ .unwrap()
+ .1
+}
+
+fn prio3_client_count_vec_1000() -> Vec<Prio3InputShare<Field128, 16>> {
+ let len = 1000;
+ let prio3 = Prio3::new_sum_vec(2, 1, len, 31).unwrap();
+ let measurement = vec![0; len];
+ let nonce = [0; 16];
+ prio3
+ .shard(&black_box(measurement), &black_box(nonce))
+ .unwrap()
+ .1
+}
+
+#[cfg(feature = "multithreaded")]
+fn prio3_client_count_vec_multithreaded_1000() -> Vec<Prio3InputShare<Field128, 16>> {
+ let len = 1000;
+ let prio3 = Prio3::new_sum_vec_multithreaded(2, 1, len, 31).unwrap();
+ let measurement = vec![0; len];
+ let nonce = [0; 16];
+ prio3
+ .shard(&black_box(measurement), &black_box(nonce))
+ .unwrap()
+ .1
+}
+
+#[cfg(feature = "experimental")]
+fn idpf_poplar_gen(
+ input: &IdpfInput,
+ inner_values: Vec<Poplar1IdpfValue<Field64>>,
+ leaf_value: Poplar1IdpfValue<Field255>,
+) {
+ let idpf = Idpf::new((), ());
+ idpf.gen(input, inner_values, leaf_value, &[0; 16]).unwrap();
+}
+
+#[cfg(feature = "experimental")]
+fn idpf_poplar_gen_8() {
+ let input = IdpfInput::from_bytes(b"A");
+ let one = Field64::one();
+ idpf_poplar_gen(
+ &input,
+ vec![Poplar1IdpfValue::new([one, one]); 7],
+ Poplar1IdpfValue::new([Field255::one(), Field255::one()]),
+ );
+}
+
+#[cfg(feature = "experimental")]
+fn idpf_poplar_gen_128() {
+ let input = IdpfInput::from_bytes(b"AAAAAAAAAAAAAAAA");
+ let one = Field64::one();
+ idpf_poplar_gen(
+ &input,
+ vec![Poplar1IdpfValue::new([one, one]); 127],
+ Poplar1IdpfValue::new([Field255::one(), Field255::one()]),
+ );
+}
+
+#[cfg(feature = "experimental")]
+fn idpf_poplar_gen_2048() {
+ let input = IdpfInput::from_bytes(&[0x41; 256]);
+ let one = Field64::one();
+ idpf_poplar_gen(
+ &input,
+ vec![Poplar1IdpfValue::new([one, one]); 2047],
+ Poplar1IdpfValue::new([Field255::one(), Field255::one()]),
+ );
+}
+
+#[cfg(feature = "experimental")]
+fn idpf_poplar_eval(
+ input: &IdpfInput,
+ public_share: &IdpfPublicShare<Poplar1IdpfValue<Field64>, Poplar1IdpfValue<Field255>>,
+ key: &Seed<16>,
+) {
+ let mut cache = RingBufferCache::new(1);
+ let idpf = Idpf::new((), ());
+ idpf.eval(0, public_share, key, input, &[0; 16], &mut cache)
+ .unwrap();
+}
+
+#[cfg(feature = "experimental")]
+fn idpf_poplar_eval_8() {
+ let input = IdpfInput::from_bytes(b"A");
+ let public_share = IdpfPublicShare::get_decoded_with_param(&8, &[0x7f; 306]).unwrap();
+ let key = Seed::get_decoded(&[0xff; 16]).unwrap();
+ idpf_poplar_eval(&input, &public_share, &key);
+}
+
+#[cfg(feature = "experimental")]
+fn idpf_poplar_eval_128() {
+ let input = IdpfInput::from_bytes(b"AAAAAAAAAAAAAAAA");
+ let public_share = IdpfPublicShare::get_decoded_with_param(&128, &[0x7f; 4176]).unwrap();
+ let key = Seed::get_decoded(&[0xff; 16]).unwrap();
+ idpf_poplar_eval(&input, &public_share, &key);
+}
+
+#[cfg(feature = "experimental")]
+fn idpf_poplar_eval_2048() {
+ let input = IdpfInput::from_bytes(&[0x41; 256]);
+ let public_share = IdpfPublicShare::get_decoded_with_param(&2048, &[0x7f; 66096]).unwrap();
+ let key = Seed::get_decoded(&[0xff; 16]).unwrap();
+ idpf_poplar_eval(&input, &public_share, &key);
+}
+
+#[cfg(feature = "experimental")]
+fn idpf_codec() {
+ let data = hex::decode(concat!(
+ "9a",
+ "0000000000000000000000000000000000000000000000",
+ "01eb3a1bd6b5fa4a4500000000000000000000000000000000",
+ "ffffffff0000000022522c3fd5a33cac00000000000000000000000000000000",
+ "ffffffff0000000069f41eee46542b6900000000000000000000000000000000",
+ "00000000000000000000000000000000000000000000000000000000000000",
+ "017d1fd6df94280145a0dcc933ceb706e9219d50e7c4f92fd8ca9a0ffb7d819646",
+ ))
+ .unwrap();
+ let bits = 4;
+ let public_share = IdpfPublicShare::<Poplar1IdpfValue<Field64>, Poplar1IdpfValue<Field255>>::get_decoded_with_param(&bits, &data).unwrap();
+ let encoded = public_share.get_encoded();
+ let _ = black_box(encoded.len());
+}
+
+macro_rules! main_base {
+ ( $( $func_name:ident ),* $(,)* ) => {
+ iai::main!(
+ prng_16,
+ prng_256,
+ prng_1024,
+ prng_4096,
+ prio3_client_count,
+ prio3_client_histogram_10,
+ prio3_client_sum_32,
+ prio3_client_count_vec_1000,
+ $( $func_name, )*
+ );
+ };
+}
+
+#[cfg(feature = "prio2")]
+macro_rules! main_add_prio2 {
+ ( $( $func_name:ident ),* $(,)* ) => {
+ main_base!(
+ prio2_client_10,
+ prio2_client_100,
+ prio2_client_1000,
+ prio2_shard_and_prepare_10,
+ prio2_shard_and_prepare_100,
+ prio2_shard_and_prepare_1000,
+ $( $func_name, )*
+ );
+ };
+}
+
+#[cfg(not(feature = "prio2"))]
+macro_rules! main_add_prio2 {
+ ( $( $func_name:ident ),* $(,)* ) => {
+ main_base!(
+ $( $func_name, )*
+ );
+ };
+}
+
+#[cfg(feature = "multithreaded")]
+macro_rules! main_add_multithreaded {
+ ( $( $func_name:ident ),* $(,)* ) => {
+ main_add_prio2!(
+ prio3_client_count_vec_multithreaded_1000,
+ $( $func_name, )*
+ );
+ };
+}
+
+#[cfg(not(feature = "multithreaded"))]
+macro_rules! main_add_multithreaded {
+ ( $( $func_name:ident ),* $(,)* ) => {
+ main_add_prio2!(
+ $( $func_name, )*
+ );
+ };
+}
+
+#[cfg(feature = "experimental")]
+macro_rules! main_add_experimental {
+ ( $( $func_name:ident ),* $(,)* ) => {
+ main_add_multithreaded!(
+ idpf_codec,
+ idpf_poplar_gen_8,
+ idpf_poplar_gen_128,
+ idpf_poplar_gen_2048,
+ idpf_poplar_eval_8,
+ idpf_poplar_eval_128,
+ idpf_poplar_eval_2048,
+ $( $func_name, )*
+ );
+ };
+}
+
+#[cfg(not(feature = "experimental"))]
+macro_rules! main_add_experimental {
+ ( $( $func_name:ident ),* $(,)* ) => {
+ main_add_multithreaded!(
+ $( $func_name, )*
+ );
+ };
+}
+
+cfg_if! {
+ if #[cfg(windows)] {
+ fn main() {
+ eprintln!("Cycle count benchmarks are not supported on Windows.");
+ }
+ }
+ else {
+ main_add_experimental!();
+ }
+}
diff --git a/third_party/rust/prio/benches/speed_tests.rs b/third_party/rust/prio/benches/speed_tests.rs
new file mode 100644
index 0000000000..66458b1ada
--- /dev/null
+++ b/third_party/rust/prio/benches/speed_tests.rs
@@ -0,0 +1,876 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+#[cfg(feature = "experimental")]
+use criterion::{BatchSize, Throughput};
+#[cfg(feature = "experimental")]
+use fixed::types::{I1F15, I1F31};
+#[cfg(feature = "experimental")]
+use fixed_macro::fixed;
+#[cfg(feature = "experimental")]
+use num_bigint::BigUint;
+#[cfg(feature = "experimental")]
+use num_rational::Ratio;
+#[cfg(feature = "experimental")]
+use num_traits::ToPrimitive;
+#[cfg(feature = "experimental")]
+use prio::dp::distributions::DiscreteGaussian;
+#[cfg(feature = "prio2")]
+use prio::vdaf::prio2::Prio2;
+use prio::{
+ benchmarked::*,
+ field::{random_vector, Field128 as F, FieldElement},
+ flp::gadgets::Mul,
+ vdaf::{prio3::Prio3, Aggregator, Client},
+};
+#[cfg(feature = "experimental")]
+use prio::{
+ field::{Field255, Field64},
+ flp::types::fixedpoint_l2::FixedPointBoundedL2VecSum,
+ idpf::{Idpf, IdpfInput, RingBufferCache},
+ vdaf::poplar1::{Poplar1, Poplar1AggregationParam, Poplar1IdpfValue},
+};
+#[cfg(feature = "experimental")]
+use rand::prelude::*;
+#[cfg(feature = "experimental")]
+use std::iter;
+use std::time::Duration;
+#[cfg(feature = "experimental")]
+use zipf::ZipfDistribution;
+
+/// Seed for generation of random benchmark inputs.
+///
+/// A fixed RNG seed is used to generate inputs in order to minimize run-to-run variability. The
+/// seed value may be freely changed to get a different set of inputs.
+#[cfg(feature = "experimental")]
+const RNG_SEED: u64 = 0;
+
+/// Speed test for generating a seed and deriving a pseudorandom sequence of field elements.
+fn prng(c: &mut Criterion) {
+ let mut group = c.benchmark_group("rand");
+ let test_sizes = [16, 256, 1024, 4096];
+ for size in test_sizes {
+ group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, size| {
+ b.iter(|| random_vector::<F>(*size))
+ });
+ }
+ group.finish();
+}
+
+/// Speed test for generating samples from the discrete gaussian distribution using different
+/// standard deviations.
+#[cfg(feature = "experimental")]
+pub fn dp_noise(c: &mut Criterion) {
+ let mut group = c.benchmark_group("dp_noise");
+ let mut rng = StdRng::seed_from_u64(RNG_SEED);
+
+ let test_stds = [
+ Ratio::<BigUint>::from_integer(BigUint::from(u128::MAX)).pow(2),
+ Ratio::<BigUint>::from_integer(BigUint::from(u64::MAX)),
+ Ratio::<BigUint>::from_integer(BigUint::from(u32::MAX)),
+ Ratio::<BigUint>::from_integer(BigUint::from(5u8)),
+ Ratio::<BigUint>::new(BigUint::from(10000u32), BigUint::from(23u32)),
+ ];
+ for std in test_stds {
+ let sampler = DiscreteGaussian::new(std.clone()).unwrap();
+ group.bench_function(
+ BenchmarkId::new("discrete_gaussian", std.to_f64().unwrap_or(f64::INFINITY)),
+ |b| b.iter(|| sampler.sample(&mut rng)),
+ );
+ }
+ group.finish();
+}
+
+/// The asymptotic cost of polynomial multiplication is `O(n log n)` using FFT and `O(n^2)` using
+/// the naive method. This benchmark demonstrates that the latter has better concrete performance
+/// for small polynomials. The result is used to pick the `FFT_THRESHOLD` constant in
+/// `src/flp/gadgets.rs`.
+fn poly_mul(c: &mut Criterion) {
+ let test_sizes = [1_usize, 30, 60, 90, 120, 150];
+
+ let mut group = c.benchmark_group("poly_mul");
+ for size in test_sizes {
+ group.bench_with_input(BenchmarkId::new("fft", size), &size, |b, size| {
+ let m = (size + 1).next_power_of_two();
+ let mut g: Mul<F> = Mul::new(*size);
+ let mut outp = vec![F::zero(); 2 * m];
+ let inp = vec![random_vector(m).unwrap(); 2];
+
+ b.iter(|| {
+ benchmarked_gadget_mul_call_poly_fft(&mut g, &mut outp, &inp).unwrap();
+ })
+ });
+
+ group.bench_with_input(BenchmarkId::new("direct", size), &size, |b, size| {
+ let m = (size + 1).next_power_of_two();
+ let mut g: Mul<F> = Mul::new(*size);
+ let mut outp = vec![F::zero(); 2 * m];
+ let inp = vec![random_vector(m).unwrap(); 2];
+
+ b.iter(|| {
+ benchmarked_gadget_mul_call_poly_direct(&mut g, &mut outp, &inp).unwrap();
+ })
+ });
+ }
+ group.finish();
+}
+
+/// Benchmark prio2.
+#[cfg(feature = "prio2")]
+fn prio2(c: &mut Criterion) {
+ let mut group = c.benchmark_group("prio2_shard");
+ for input_length in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::from_parameter(input_length),
+ &input_length,
+ |b, input_length| {
+ let vdaf = Prio2::new(*input_length).unwrap();
+ let measurement = (0..u32::try_from(*input_length).unwrap())
+ .map(|i| i & 1)
+ .collect::<Vec<_>>();
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ },
+ );
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("prio2_prepare_init");
+ for input_length in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::from_parameter(input_length),
+ &input_length,
+ |b, input_length| {
+ let vdaf = Prio2::new(*input_length).unwrap();
+ let measurement = (0..u32::try_from(*input_length).unwrap())
+ .map(|i| i & 1)
+ .collect::<Vec<_>>();
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 32]);
+ let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
+ .unwrap();
+ });
+ },
+ );
+ }
+ group.finish();
+}
+
+/// Benchmark prio3.
+fn prio3(c: &mut Criterion) {
+ let num_shares = 2;
+
+ c.bench_function("prio3count_shard", |b| {
+ let vdaf = Prio3::new_count(num_shares).unwrap();
+ let measurement = black_box(1);
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ });
+
+ c.bench_function("prio3count_prepare_init", |b| {
+ let vdaf = Prio3::new_count(num_shares).unwrap();
+ let measurement = black_box(1);
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
+ .unwrap()
+ });
+ });
+
+ let mut group = c.benchmark_group("prio3sum_shard");
+ for bits in [8, 32] {
+ group.bench_with_input(BenchmarkId::from_parameter(bits), &bits, |b, bits| {
+ let vdaf = Prio3::new_sum(num_shares, *bits).unwrap();
+ let measurement = (1 << bits) - 1;
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ });
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("prio3sum_prepare_init");
+ for bits in [8, 32] {
+ group.bench_with_input(BenchmarkId::from_parameter(bits), &bits, |b, bits| {
+ let vdaf = Prio3::new_sum(num_shares, *bits).unwrap();
+ let measurement = (1 << bits) - 1;
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
+ .unwrap()
+ });
+ });
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("prio3sumvec_shard");
+ for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] {
+ group.bench_with_input(
+ BenchmarkId::new("serial", input_length),
+ &(input_length, chunk_length),
+ |b, (input_length, chunk_length)| {
+ let vdaf = Prio3::new_sum_vec(num_shares, 1, *input_length, *chunk_length).unwrap();
+ let measurement = (0..u128::try_from(*input_length).unwrap())
+ .map(|i| i & 1)
+ .collect::<Vec<_>>();
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ },
+ );
+ }
+
+ #[cfg(feature = "multithreaded")]
+ {
+ for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] {
+ group.bench_with_input(
+ BenchmarkId::new("parallel", input_length),
+ &(input_length, chunk_length),
+ |b, (input_length, chunk_length)| {
+ let vdaf = Prio3::new_sum_vec_multithreaded(
+ num_shares,
+ 1,
+ *input_length,
+ *chunk_length,
+ )
+ .unwrap();
+ let measurement = (0..u128::try_from(*input_length).unwrap())
+ .map(|i| i & 1)
+ .collect::<Vec<_>>();
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ },
+ );
+ }
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("prio3sumvec_prepare_init");
+ for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] {
+ group.bench_with_input(
+ BenchmarkId::new("serial", input_length),
+ &(input_length, chunk_length),
+ |b, (input_length, chunk_length)| {
+ let vdaf = Prio3::new_sum_vec(num_shares, 1, *input_length, *chunk_length).unwrap();
+ let measurement = (0..u128::try_from(*input_length).unwrap())
+ .map(|i| i & 1)
+ .collect::<Vec<_>>();
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
+ .unwrap()
+ });
+ },
+ );
+ }
+
+ #[cfg(feature = "multithreaded")]
+ {
+ for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] {
+ group.bench_with_input(
+ BenchmarkId::new("parallel", input_length),
+ &(input_length, chunk_length),
+ |b, (input_length, chunk_length)| {
+ let vdaf = Prio3::new_sum_vec_multithreaded(
+ num_shares,
+ 1,
+ *input_length,
+ *chunk_length,
+ )
+ .unwrap();
+ let measurement = (0..u128::try_from(*input_length).unwrap())
+ .map(|i| i & 1)
+ .collect::<Vec<_>>();
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(
+ &verify_key,
+ 0,
+ &(),
+ &nonce,
+ &public_share,
+ &input_shares[0],
+ )
+ .unwrap()
+ });
+ },
+ );
+ }
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("prio3histogram_shard");
+ for (input_length, chunk_length) in [
+ (10, 3),
+ (100, 10),
+ (1_000, 31),
+ (10_000, 100),
+ (100_000, 316),
+ ] {
+ if input_length >= 100_000 {
+ group.measurement_time(Duration::from_secs(15));
+ }
+ group.bench_with_input(
+ BenchmarkId::new("serial", input_length),
+ &(input_length, chunk_length),
+ |b, (input_length, chunk_length)| {
+ let vdaf = Prio3::new_histogram(num_shares, *input_length, *chunk_length).unwrap();
+ let measurement = black_box(0);
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ },
+ );
+ }
+
+ #[cfg(feature = "multithreaded")]
+ {
+ for (input_length, chunk_length) in [
+ (10, 3),
+ (100, 10),
+ (1_000, 31),
+ (10_000, 100),
+ (100_000, 316),
+ ] {
+ if input_length >= 100_000 {
+ group.measurement_time(Duration::from_secs(15));
+ }
+ group.bench_with_input(
+ BenchmarkId::new("parallel", input_length),
+ &(input_length, chunk_length),
+ |b, (input_length, chunk_length)| {
+ let vdaf = Prio3::new_histogram_multithreaded(
+ num_shares,
+ *input_length,
+ *chunk_length,
+ )
+ .unwrap();
+ let measurement = black_box(0);
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ },
+ );
+ }
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("prio3histogram_prepare_init");
+ for (input_length, chunk_length) in [
+ (10, 3),
+ (100, 10),
+ (1_000, 31),
+ (10_000, 100),
+ (100_000, 316),
+ ] {
+ if input_length >= 100_000 {
+ group.measurement_time(Duration::from_secs(15));
+ }
+ group.bench_with_input(
+ BenchmarkId::new("serial", input_length),
+ &(input_length, chunk_length),
+ |b, (input_length, chunk_length)| {
+ let vdaf = Prio3::new_histogram(num_shares, *input_length, *chunk_length).unwrap();
+ let measurement = black_box(0);
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
+ .unwrap()
+ });
+ },
+ );
+ }
+
+ #[cfg(feature = "multithreaded")]
+ {
+ for (input_length, chunk_length) in [
+ (10, 3),
+ (100, 10),
+ (1_000, 31),
+ (10_000, 100),
+ (100_000, 316),
+ ] {
+ if input_length >= 100_000 {
+ group.measurement_time(Duration::from_secs(15));
+ }
+ group.bench_with_input(
+ BenchmarkId::new("parallel", input_length),
+ &(input_length, chunk_length),
+ |b, (input_length, chunk_length)| {
+ let vdaf = Prio3::new_histogram_multithreaded(
+ num_shares,
+ *input_length,
+ *chunk_length,
+ )
+ .unwrap();
+ let measurement = black_box(0);
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(
+ &verify_key,
+ 0,
+ &(),
+ &nonce,
+ &public_share,
+ &input_shares[0],
+ )
+ .unwrap()
+ });
+ },
+ );
+ }
+ }
+ group.finish();
+
+ #[cfg(feature = "experimental")]
+ {
+ let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f15_shard");
+ for dimension in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::new("serial", dimension),
+ &dimension,
+ |b, dimension| {
+ let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
+ Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
+ let mut measurement = vec![fixed!(0: I1F15); *dimension];
+ measurement[0] = fixed!(0.5: I1F15);
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ },
+ );
+ }
+
+ #[cfg(feature = "multithreaded")]
+ {
+ for dimension in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::new("parallel", dimension),
+ &dimension,
+ |b, dimension| {
+ let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
+ Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
+ num_shares, *dimension,
+ )
+ .unwrap();
+ let mut measurement = vec![fixed!(0: I1F15); *dimension];
+ measurement[0] = fixed!(0.5: I1F15);
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ },
+ );
+ }
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f15_prepare_init");
+ for dimension in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::new("series", dimension),
+ &dimension,
+ |b, dimension| {
+ let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
+ Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
+ let mut measurement = vec![fixed!(0: I1F15); *dimension];
+ measurement[0] = fixed!(0.5: I1F15);
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(
+ &verify_key,
+ 0,
+ &(),
+ &nonce,
+ &public_share,
+ &input_shares[0],
+ )
+ .unwrap()
+ });
+ },
+ );
+ }
+
+ #[cfg(feature = "multithreaded")]
+ {
+ for dimension in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::new("parallel", dimension),
+ &dimension,
+ |b, dimension| {
+ let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
+ Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
+ num_shares, *dimension,
+ )
+ .unwrap();
+ let mut measurement = vec![fixed!(0: I1F15); *dimension];
+ measurement[0] = fixed!(0.5: I1F15);
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) =
+ vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(
+ &verify_key,
+ 0,
+ &(),
+ &nonce,
+ &public_share,
+ &input_shares[0],
+ )
+ .unwrap()
+ });
+ },
+ );
+ }
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f31_shard");
+ for dimension in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::new("serial", dimension),
+ &dimension,
+ |b, dimension| {
+ let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
+ Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
+ let mut measurement = vec![fixed!(0: I1F31); *dimension];
+ measurement[0] = fixed!(0.5: I1F31);
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ },
+ );
+ }
+
+ #[cfg(feature = "multithreaded")]
+ {
+ for dimension in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::new("parallel", dimension),
+ &dimension,
+ |b, dimension| {
+ let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
+ Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
+ num_shares, *dimension,
+ )
+ .unwrap();
+ let mut measurement = vec![fixed!(0: I1F31); *dimension];
+ measurement[0] = fixed!(0.5: I1F31);
+ let nonce = black_box([0u8; 16]);
+ b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
+ },
+ );
+ }
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f31_prepare_init");
+ for dimension in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::new("series", dimension),
+ &dimension,
+ |b, dimension| {
+ let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
+ Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
+ let mut measurement = vec![fixed!(0: I1F31); *dimension];
+ measurement[0] = fixed!(0.5: I1F31);
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(
+ &verify_key,
+ 0,
+ &(),
+ &nonce,
+ &public_share,
+ &input_shares[0],
+ )
+ .unwrap()
+ });
+ },
+ );
+ }
+
+ #[cfg(feature = "multithreaded")]
+ {
+ for dimension in [10, 100, 1_000] {
+ group.bench_with_input(
+ BenchmarkId::new("parallel", dimension),
+ &dimension,
+ |b, dimension| {
+ let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
+ Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
+ num_shares, *dimension,
+ )
+ .unwrap();
+ let mut measurement = vec![fixed!(0: I1F31); *dimension];
+ measurement[0] = fixed!(0.5: I1F31);
+ let nonce = black_box([0u8; 16]);
+ let verify_key = black_box([0u8; 16]);
+ let (public_share, input_shares) =
+ vdaf.shard(&measurement, &nonce).unwrap();
+ b.iter(|| {
+ vdaf.prepare_init(
+ &verify_key,
+ 0,
+ &(),
+ &nonce,
+ &public_share,
+ &input_shares[0],
+ )
+ .unwrap()
+ });
+ },
+ );
+ }
+ }
+ group.finish();
+ }
+}
+
+/// Benchmark IdpfPoplar performance.
+#[cfg(feature = "experimental")]
+fn idpf(c: &mut Criterion) {
+ let test_sizes = [8usize, 8 * 16, 8 * 256];
+
+ let mut group = c.benchmark_group("idpf_gen");
+ for size in test_sizes.iter() {
+ group.throughput(Throughput::Bytes(*size as u64 / 8));
+ group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
+ let bits = iter::repeat_with(random).take(size).collect::<Vec<bool>>();
+ let input = IdpfInput::from_bools(&bits);
+
+ let inner_values = random_vector::<Field64>(size - 1)
+ .unwrap()
+ .into_iter()
+ .map(|random_element| Poplar1IdpfValue::new([Field64::one(), random_element]))
+ .collect::<Vec<_>>();
+ let leaf_value = Poplar1IdpfValue::new([Field255::one(), random_vector(1).unwrap()[0]]);
+
+ let idpf = Idpf::new((), ());
+ b.iter(|| {
+ idpf.gen(&input, inner_values.clone(), leaf_value, &[0; 16])
+ .unwrap();
+ });
+ });
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("idpf_eval");
+ for size in test_sizes.iter() {
+ group.throughput(Throughput::Bytes(*size as u64 / 8));
+ group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
+ let bits = iter::repeat_with(random).take(size).collect::<Vec<bool>>();
+ let input = IdpfInput::from_bools(&bits);
+
+ let inner_values = random_vector::<Field64>(size - 1)
+ .unwrap()
+ .into_iter()
+ .map(|random_element| Poplar1IdpfValue::new([Field64::one(), random_element]))
+ .collect::<Vec<_>>();
+ let leaf_value = Poplar1IdpfValue::new([Field255::one(), random_vector(1).unwrap()[0]]);
+
+ let idpf = Idpf::new((), ());
+ let (public_share, keys) = idpf
+ .gen(&input, inner_values, leaf_value, &[0; 16])
+ .unwrap();
+
+ b.iter(|| {
+ // This is an aggressively small cache, to minimize its impact on the benchmark.
+ // In this synthetic benchmark, we are only checking one candidate prefix per level
+ // (typically there are many candidate prefixes per level) so the cache hit rate
+ // will be unaffected.
+ let mut cache = RingBufferCache::new(1);
+
+ for prefix_length in 1..=size {
+ let prefix = input[..prefix_length].to_owned().into();
+ idpf.eval(0, &public_share, &keys[0], &prefix, &[0; 16], &mut cache)
+ .unwrap();
+ }
+ });
+ });
+ }
+ group.finish();
+}
+
+/// Benchmark Poplar1.
+#[cfg(feature = "experimental")]
+fn poplar1(c: &mut Criterion) {
+ let test_sizes = [16_usize, 128, 256];
+
+ let mut group = c.benchmark_group("poplar1_shard");
+ for size in test_sizes.iter() {
+ group.throughput(Throughput::Bytes(*size as u64 / 8));
+ group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
+ let vdaf = Poplar1::new_shake128(size);
+ let mut rng = StdRng::seed_from_u64(RNG_SEED);
+ let nonce = rng.gen::<[u8; 16]>();
+
+ b.iter_batched(
+ || {
+ let bits = iter::repeat_with(|| rng.gen())
+ .take(size)
+ .collect::<Vec<bool>>();
+ IdpfInput::from_bools(&bits)
+ },
+ |measurement| {
+ vdaf.shard(&measurement, &nonce).unwrap();
+ },
+ BatchSize::SmallInput,
+ );
+ });
+ }
+ group.finish();
+
+ let mut group = c.benchmark_group("poplar1_prepare_init");
+ for size in test_sizes.iter() {
+ group.measurement_time(Duration::from_secs(30)); // slower benchmark
+ group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
+ let vdaf = Poplar1::new_shake128(size);
+ let mut rng = StdRng::seed_from_u64(RNG_SEED);
+
+ b.iter_batched(
+ || {
+ let verify_key: [u8; 16] = rng.gen();
+ let nonce: [u8; 16] = rng.gen();
+
+ // Parameters are chosen to match Chris Wood's experimental setup:
+ // https://github.com/chris-wood/heavy-hitter-comparison
+ let (measurements, prefix_tree) = poplar1_generate_zipf_distributed_batch(
+ &mut rng, // rng
+ size, // bits
+ 10, // threshold
+ 1000, // number of measurements
+ 128, // Zipf support
+ 1.03, // Zipf exponent
+ );
+
+ // We are benchmarking preparation of a single report. For this test, it doesn't matter
+ // which measurement we generate a report for, so pick the first measurement
+ // arbitrarily.
+ let (public_share, input_shares) =
+ vdaf.shard(&measurements[0], &nonce).unwrap();
+
+ // For the aggregation paramter, we use the candidate prefixes from the prefix tree
+ // for the sampled measurements. Run preparation for the last step, which ought to
+ // represent the worst-case performance.
+ let agg_param =
+ Poplar1AggregationParam::try_from_prefixes(prefix_tree[size - 1].clone())
+ .unwrap();
+
+ (
+ verify_key,
+ nonce,
+ agg_param,
+ public_share,
+ input_shares.into_iter().next().unwrap(),
+ )
+ },
+ |(verify_key, nonce, agg_param, public_share, input_share)| {
+ vdaf.prepare_init(
+ &verify_key,
+ 0,
+ &agg_param,
+ &nonce,
+ &public_share,
+ &input_share,
+ )
+ .unwrap();
+ },
+ BatchSize::SmallInput,
+ );
+ });
+ }
+ group.finish();
+}
+
+/// Generate a set of Poplar1 measurements with the given bit length `bits`. They are sampled
+/// according to the Zipf distribution with parameters `zipf_support` and `zipf_exponent`. Return
+/// the measurements, along with the prefix tree for the desired threshold.
+///
+/// The prefix tree consists of a sequence of candidate prefixes for each level. For a given level,
+/// the candidate prefixes are computed from the hit counts of the prefixes at the previous level:
+/// For any prefix `p` whose hit count is at least the desired threshold, add `p || 0` and `p || 1`
+/// to the list.
+#[cfg(feature = "experimental")]
+fn poplar1_generate_zipf_distributed_batch(
+ rng: &mut impl Rng,
+ bits: usize,
+ threshold: usize,
+ measurement_count: usize,
+ zipf_support: usize,
+ zipf_exponent: f64,
+) -> (Vec<IdpfInput>, Vec<Vec<IdpfInput>>) {
+ // Generate random inputs.
+ let mut inputs = Vec::with_capacity(zipf_support);
+ for _ in 0..zipf_support {
+ let bools: Vec<bool> = (0..bits).map(|_| rng.gen()).collect();
+ inputs.push(IdpfInput::from_bools(&bools));
+ }
+
+ // Sample a number of inputs according to the Zipf distribution.
+ let mut samples = Vec::with_capacity(measurement_count);
+ let zipf = ZipfDistribution::new(zipf_support, zipf_exponent).unwrap();
+ for _ in 0..measurement_count {
+ samples.push(inputs[zipf.sample(rng) - 1].clone());
+ }
+
+ // Compute the prefix tree for the desired threshold.
+ let mut prefix_tree = Vec::with_capacity(bits);
+ prefix_tree.push(vec![
+ IdpfInput::from_bools(&[false]),
+ IdpfInput::from_bools(&[true]),
+ ]);
+
+ for level in 0..bits - 1 {
+ // Compute the hit count of each prefix from the previous level.
+ let mut hit_counts = vec![0; prefix_tree[level].len()];
+ for (hit_count, prefix) in hit_counts.iter_mut().zip(prefix_tree[level].iter()) {
+ for sample in samples.iter() {
+ let mut is_prefix = true;
+ for j in 0..prefix.len() {
+ if prefix[j] != sample[j] {
+ is_prefix = false;
+ break;
+ }
+ }
+ if is_prefix {
+ *hit_count += 1;
+ }
+ }
+ }
+
+ // Compute the next set of candidate prefixes.
+ let mut next_prefixes = Vec::new();
+ for (hit_count, prefix) in hit_counts.iter().zip(prefix_tree[level].iter()) {
+ if *hit_count >= threshold {
+ next_prefixes.push(prefix.clone_with_suffix(&[false]));
+ next_prefixes.push(prefix.clone_with_suffix(&[true]));
+ }
+ }
+ prefix_tree.push(next_prefixes);
+ }
+
+ (samples, prefix_tree)
+}
+
+#[cfg(all(feature = "prio2", feature = "experimental"))]
+criterion_group!(benches, poplar1, prio3, prio2, poly_mul, prng, idpf, dp_noise);
+#[cfg(all(not(feature = "prio2"), feature = "experimental"))]
+criterion_group!(benches, poplar1, prio3, poly_mul, prng, idpf, dp_noise);
+#[cfg(all(feature = "prio2", not(feature = "experimental")))]
+criterion_group!(benches, prio3, prio2, prng, poly_mul);
+#[cfg(all(not(feature = "prio2"), not(feature = "experimental")))]
+criterion_group!(benches, prio3, prng, poly_mul);
+
+criterion_main!(benches);