847 lines
33 KiB
Rust
847 lines
33 KiB
Rust
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
|
#[cfg(feature = "experimental")]
|
|
use criterion::{BatchSize, Throughput};
|
|
#[cfg(feature = "experimental")]
|
|
use fixed::types::{I1F15, I1F31};
|
|
#[cfg(feature = "experimental")]
|
|
use fixed_macro::fixed;
|
|
#[cfg(feature = "experimental")]
|
|
use num_bigint::BigUint;
|
|
#[cfg(feature = "experimental")]
|
|
use num_rational::Ratio;
|
|
#[cfg(feature = "experimental")]
|
|
use num_traits::ToPrimitive;
|
|
#[cfg(feature = "experimental")]
|
|
use prio::dp::distributions::DiscreteGaussian;
|
|
#[cfg(feature = "experimental")]
|
|
use prio::idpf::test_utils::generate_zipf_distributed_batch;
|
|
#[cfg(feature = "experimental")]
|
|
use prio::vdaf::prio2::Prio2;
|
|
use prio::{
|
|
benchmarked::*,
|
|
field::{random_vector, Field128 as F, FieldElement},
|
|
flp::gadgets::Mul,
|
|
vdaf::{prio3::Prio3, Aggregator, Client},
|
|
};
|
|
#[cfg(feature = "experimental")]
|
|
use prio::{
|
|
field::{Field255, Field64},
|
|
flp::types::fixedpoint_l2::FixedPointBoundedL2VecSum,
|
|
idpf::{Idpf, IdpfInput, RingBufferCache},
|
|
vdaf::poplar1::{Poplar1, Poplar1AggregationParam, Poplar1IdpfValue},
|
|
};
|
|
#[cfg(feature = "experimental")]
|
|
use rand::prelude::*;
|
|
#[cfg(feature = "experimental")]
|
|
use std::iter;
|
|
use std::time::Duration;
|
|
|
|
/// Seed for generation of random benchmark inputs.
|
|
///
|
|
/// A fixed RNG seed is used to generate inputs in order to minimize run-to-run variability. The
|
|
/// seed value may be freely changed to get a different set of inputs.
|
|
#[cfg(feature = "experimental")]
|
|
const RNG_SEED: u64 = 0;
|
|
|
|
/// Speed test for generating a seed and deriving a pseudorandom sequence of field elements.
|
|
fn prng(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("rand");
|
|
let test_sizes = [16, 256, 1024, 4096];
|
|
for size in test_sizes {
|
|
group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, size| {
|
|
b.iter(|| random_vector::<F>(*size))
|
|
});
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
/// Speed test for generating samples from the discrete gaussian distribution using different
|
|
/// standard deviations.
|
|
#[cfg(feature = "experimental")]
|
|
pub fn dp_noise(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("dp_noise");
|
|
let mut rng = StdRng::seed_from_u64(RNG_SEED);
|
|
|
|
let test_stds = [
|
|
Ratio::<BigUint>::from_integer(BigUint::from(u128::MAX)).pow(2),
|
|
Ratio::<BigUint>::from_integer(BigUint::from(u64::MAX)),
|
|
Ratio::<BigUint>::from_integer(BigUint::from(u32::MAX)),
|
|
Ratio::<BigUint>::from_integer(BigUint::from(5u8)),
|
|
Ratio::<BigUint>::new(BigUint::from(10000u32), BigUint::from(23u32)),
|
|
];
|
|
for std in test_stds {
|
|
let sampler = DiscreteGaussian::new(std.clone()).unwrap();
|
|
group.bench_function(
|
|
BenchmarkId::new("discrete_gaussian", std.to_f64().unwrap_or(f64::INFINITY)),
|
|
|b| b.iter(|| sampler.sample(&mut rng)),
|
|
);
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
/// The asymptotic cost of polynomial multiplication is `O(n log n)` using FFT and `O(n^2)` using
|
|
/// the naive method. This benchmark demonstrates that the latter has better concrete performance
|
|
/// for small polynomials. The result is used to pick the `FFT_THRESHOLD` constant in
|
|
/// `src/flp/gadgets.rs`.
|
|
fn poly_mul(c: &mut Criterion) {
|
|
let test_sizes = [1_usize, 30, 60, 90, 120, 150];
|
|
|
|
let mut group = c.benchmark_group("poly_mul");
|
|
for size in test_sizes {
|
|
group.bench_with_input(BenchmarkId::new("fft", size), &size, |b, size| {
|
|
let m = (size + 1).next_power_of_two();
|
|
let mut g: Mul<F> = Mul::new(*size);
|
|
let mut outp = vec![F::zero(); 2 * m];
|
|
let inp = vec![random_vector(m).unwrap(); 2];
|
|
|
|
b.iter(|| {
|
|
benchmarked_gadget_mul_call_poly_fft(&mut g, &mut outp, &inp).unwrap();
|
|
})
|
|
});
|
|
|
|
group.bench_with_input(BenchmarkId::new("direct", size), &size, |b, size| {
|
|
let m = (size + 1).next_power_of_two();
|
|
let mut g: Mul<F> = Mul::new(*size);
|
|
let mut outp = vec![F::zero(); 2 * m];
|
|
let inp = vec![random_vector(m).unwrap(); 2];
|
|
|
|
b.iter(|| {
|
|
benchmarked_gadget_mul_call_poly_direct(&mut g, &mut outp, &inp).unwrap();
|
|
})
|
|
});
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark prio2.
|
|
#[cfg(feature = "experimental")]
|
|
fn prio2(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("prio2_shard");
|
|
for input_length in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(input_length),
|
|
&input_length,
|
|
|b, input_length| {
|
|
let vdaf = Prio2::new(*input_length).unwrap();
|
|
let measurement = (0..u32::try_from(*input_length).unwrap())
|
|
.map(|i| i & 1)
|
|
.collect::<Vec<_>>();
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
},
|
|
);
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("prio2_prepare_init");
|
|
for input_length in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::from_parameter(input_length),
|
|
&input_length,
|
|
|b, input_length| {
|
|
let vdaf = Prio2::new(*input_length).unwrap();
|
|
let measurement = (0..u32::try_from(*input_length).unwrap())
|
|
.map(|i| i & 1)
|
|
.collect::<Vec<_>>();
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 32]);
|
|
let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
|
|
.unwrap();
|
|
});
|
|
},
|
|
);
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark prio3.
|
|
fn prio3(c: &mut Criterion) {
|
|
let num_shares = 2;
|
|
|
|
c.bench_function("prio3count_shard", |b| {
|
|
let vdaf = Prio3::new_count(num_shares).unwrap();
|
|
let measurement = black_box(true);
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
});
|
|
|
|
c.bench_function("prio3count_prepare_init", |b| {
|
|
let vdaf = Prio3::new_count(num_shares).unwrap();
|
|
let measurement = black_box(true);
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
|
|
.unwrap()
|
|
});
|
|
});
|
|
|
|
let mut group = c.benchmark_group("prio3sum_shard");
|
|
for bits in [8, 32] {
|
|
group.bench_with_input(BenchmarkId::from_parameter(bits), &bits, |b, bits| {
|
|
let vdaf = Prio3::new_sum(num_shares, *bits).unwrap();
|
|
let measurement = (1 << bits) - 1;
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
});
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("prio3sum_prepare_init");
|
|
for bits in [8, 32] {
|
|
group.bench_with_input(BenchmarkId::from_parameter(bits), &bits, |b, bits| {
|
|
let vdaf = Prio3::new_sum(num_shares, *bits).unwrap();
|
|
let measurement = (1 << bits) - 1;
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
|
|
.unwrap()
|
|
});
|
|
});
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("prio3sumvec_shard");
|
|
for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("serial", input_length),
|
|
&(input_length, chunk_length),
|
|
|b, (input_length, chunk_length)| {
|
|
let vdaf = Prio3::new_sum_vec(num_shares, 1, *input_length, *chunk_length).unwrap();
|
|
let measurement = (0..u128::try_from(*input_length).unwrap())
|
|
.map(|i| i & 1)
|
|
.collect::<Vec<_>>();
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
},
|
|
);
|
|
}
|
|
|
|
#[cfg(feature = "multithreaded")]
|
|
{
|
|
for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("parallel", input_length),
|
|
&(input_length, chunk_length),
|
|
|b, (input_length, chunk_length)| {
|
|
let vdaf = Prio3::new_sum_vec_multithreaded(
|
|
num_shares,
|
|
1,
|
|
*input_length,
|
|
*chunk_length,
|
|
)
|
|
.unwrap();
|
|
let measurement = (0..u128::try_from(*input_length).unwrap())
|
|
.map(|i| i & 1)
|
|
.collect::<Vec<_>>();
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
},
|
|
);
|
|
}
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("prio3sumvec_prepare_init");
|
|
for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("serial", input_length),
|
|
&(input_length, chunk_length),
|
|
|b, (input_length, chunk_length)| {
|
|
let vdaf = Prio3::new_sum_vec(num_shares, 1, *input_length, *chunk_length).unwrap();
|
|
let measurement = (0..u128::try_from(*input_length).unwrap())
|
|
.map(|i| i & 1)
|
|
.collect::<Vec<_>>();
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
|
|
.unwrap()
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
#[cfg(feature = "multithreaded")]
|
|
{
|
|
for (input_length, chunk_length) in [(10, 3), (100, 10), (1_000, 31)] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("parallel", input_length),
|
|
&(input_length, chunk_length),
|
|
|b, (input_length, chunk_length)| {
|
|
let vdaf = Prio3::new_sum_vec_multithreaded(
|
|
num_shares,
|
|
1,
|
|
*input_length,
|
|
*chunk_length,
|
|
)
|
|
.unwrap();
|
|
let measurement = (0..u128::try_from(*input_length).unwrap())
|
|
.map(|i| i & 1)
|
|
.collect::<Vec<_>>();
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(
|
|
&verify_key,
|
|
0,
|
|
&(),
|
|
&nonce,
|
|
&public_share,
|
|
&input_shares[0],
|
|
)
|
|
.unwrap()
|
|
});
|
|
},
|
|
);
|
|
}
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("prio3histogram_shard");
|
|
for (input_length, chunk_length) in [
|
|
(10, 3),
|
|
(100, 10),
|
|
(1_000, 31),
|
|
(10_000, 100),
|
|
(100_000, 316),
|
|
] {
|
|
if input_length >= 100_000 {
|
|
group.measurement_time(Duration::from_secs(15));
|
|
}
|
|
group.bench_with_input(
|
|
BenchmarkId::new("serial", input_length),
|
|
&(input_length, chunk_length),
|
|
|b, (input_length, chunk_length)| {
|
|
let vdaf = Prio3::new_histogram(num_shares, *input_length, *chunk_length).unwrap();
|
|
let measurement = black_box(0);
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
},
|
|
);
|
|
}
|
|
|
|
#[cfg(feature = "multithreaded")]
|
|
{
|
|
for (input_length, chunk_length) in [
|
|
(10, 3),
|
|
(100, 10),
|
|
(1_000, 31),
|
|
(10_000, 100),
|
|
(100_000, 316),
|
|
] {
|
|
if input_length >= 100_000 {
|
|
group.measurement_time(Duration::from_secs(15));
|
|
}
|
|
group.bench_with_input(
|
|
BenchmarkId::new("parallel", input_length),
|
|
&(input_length, chunk_length),
|
|
|b, (input_length, chunk_length)| {
|
|
let vdaf = Prio3::new_histogram_multithreaded(
|
|
num_shares,
|
|
*input_length,
|
|
*chunk_length,
|
|
)
|
|
.unwrap();
|
|
let measurement = black_box(0);
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
},
|
|
);
|
|
}
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("prio3histogram_prepare_init");
|
|
for (input_length, chunk_length) in [
|
|
(10, 3),
|
|
(100, 10),
|
|
(1_000, 31),
|
|
(10_000, 100),
|
|
(100_000, 316),
|
|
] {
|
|
if input_length >= 100_000 {
|
|
group.measurement_time(Duration::from_secs(15));
|
|
}
|
|
group.bench_with_input(
|
|
BenchmarkId::new("serial", input_length),
|
|
&(input_length, chunk_length),
|
|
|b, (input_length, chunk_length)| {
|
|
let vdaf = Prio3::new_histogram(num_shares, *input_length, *chunk_length).unwrap();
|
|
let measurement = black_box(0);
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(&verify_key, 0, &(), &nonce, &public_share, &input_shares[0])
|
|
.unwrap()
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
#[cfg(feature = "multithreaded")]
|
|
{
|
|
for (input_length, chunk_length) in [
|
|
(10, 3),
|
|
(100, 10),
|
|
(1_000, 31),
|
|
(10_000, 100),
|
|
(100_000, 316),
|
|
] {
|
|
if input_length >= 100_000 {
|
|
group.measurement_time(Duration::from_secs(15));
|
|
}
|
|
group.bench_with_input(
|
|
BenchmarkId::new("parallel", input_length),
|
|
&(input_length, chunk_length),
|
|
|b, (input_length, chunk_length)| {
|
|
let vdaf = Prio3::new_histogram_multithreaded(
|
|
num_shares,
|
|
*input_length,
|
|
*chunk_length,
|
|
)
|
|
.unwrap();
|
|
let measurement = black_box(0);
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(
|
|
&verify_key,
|
|
0,
|
|
&(),
|
|
&nonce,
|
|
&public_share,
|
|
&input_shares[0],
|
|
)
|
|
.unwrap()
|
|
});
|
|
},
|
|
);
|
|
}
|
|
}
|
|
group.finish();
|
|
|
|
#[cfg(feature = "experimental")]
|
|
{
|
|
let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f15_shard");
|
|
for dimension in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("serial", dimension),
|
|
&dimension,
|
|
|b, dimension| {
|
|
let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
|
|
Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
|
|
let mut measurement = vec![fixed!(0: I1F15); *dimension];
|
|
measurement[0] = fixed!(0.5: I1F15);
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
},
|
|
);
|
|
}
|
|
|
|
#[cfg(feature = "multithreaded")]
|
|
{
|
|
for dimension in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("parallel", dimension),
|
|
&dimension,
|
|
|b, dimension| {
|
|
let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
|
|
Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
|
|
num_shares, *dimension,
|
|
)
|
|
.unwrap();
|
|
let mut measurement = vec![fixed!(0: I1F15); *dimension];
|
|
measurement[0] = fixed!(0.5: I1F15);
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
},
|
|
);
|
|
}
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f15_prepare_init");
|
|
for dimension in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("series", dimension),
|
|
&dimension,
|
|
|b, dimension| {
|
|
let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
|
|
Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
|
|
let mut measurement = vec![fixed!(0: I1F15); *dimension];
|
|
measurement[0] = fixed!(0.5: I1F15);
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(
|
|
&verify_key,
|
|
0,
|
|
&(),
|
|
&nonce,
|
|
&public_share,
|
|
&input_shares[0],
|
|
)
|
|
.unwrap()
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
#[cfg(feature = "multithreaded")]
|
|
{
|
|
for dimension in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("parallel", dimension),
|
|
&dimension,
|
|
|b, dimension| {
|
|
let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F15, _, _>, _, 16> =
|
|
Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
|
|
num_shares, *dimension,
|
|
)
|
|
.unwrap();
|
|
let mut measurement = vec![fixed!(0: I1F15); *dimension];
|
|
measurement[0] = fixed!(0.5: I1F15);
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) =
|
|
vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(
|
|
&verify_key,
|
|
0,
|
|
&(),
|
|
&nonce,
|
|
&public_share,
|
|
&input_shares[0],
|
|
)
|
|
.unwrap()
|
|
});
|
|
},
|
|
);
|
|
}
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f31_shard");
|
|
for dimension in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("serial", dimension),
|
|
&dimension,
|
|
|b, dimension| {
|
|
let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
|
|
Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
|
|
let mut measurement = vec![fixed!(0: I1F31); *dimension];
|
|
measurement[0] = fixed!(0.5: I1F31);
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
},
|
|
);
|
|
}
|
|
|
|
#[cfg(feature = "multithreaded")]
|
|
{
|
|
for dimension in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("parallel", dimension),
|
|
&dimension,
|
|
|b, dimension| {
|
|
let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
|
|
Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
|
|
num_shares, *dimension,
|
|
)
|
|
.unwrap();
|
|
let mut measurement = vec![fixed!(0: I1F31); *dimension];
|
|
measurement[0] = fixed!(0.5: I1F31);
|
|
let nonce = black_box([0u8; 16]);
|
|
b.iter(|| vdaf.shard(&measurement, &nonce).unwrap());
|
|
},
|
|
);
|
|
}
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("prio3fixedpointboundedl2vecsum_i1f31_prepare_init");
|
|
for dimension in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("series", dimension),
|
|
&dimension,
|
|
|b, dimension| {
|
|
let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
|
|
Prio3::new_fixedpoint_boundedl2_vec_sum(num_shares, *dimension).unwrap();
|
|
let mut measurement = vec![fixed!(0: I1F31); *dimension];
|
|
measurement[0] = fixed!(0.5: I1F31);
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) = vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(
|
|
&verify_key,
|
|
0,
|
|
&(),
|
|
&nonce,
|
|
&public_share,
|
|
&input_shares[0],
|
|
)
|
|
.unwrap()
|
|
});
|
|
},
|
|
);
|
|
}
|
|
|
|
#[cfg(feature = "multithreaded")]
|
|
{
|
|
for dimension in [10, 100, 1_000] {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("parallel", dimension),
|
|
&dimension,
|
|
|b, dimension| {
|
|
let vdaf: Prio3<FixedPointBoundedL2VecSum<I1F31, _, _>, _, 16> =
|
|
Prio3::new_fixedpoint_boundedl2_vec_sum_multithreaded(
|
|
num_shares, *dimension,
|
|
)
|
|
.unwrap();
|
|
let mut measurement = vec![fixed!(0: I1F31); *dimension];
|
|
measurement[0] = fixed!(0.5: I1F31);
|
|
let nonce = black_box([0u8; 16]);
|
|
let verify_key = black_box([0u8; 16]);
|
|
let (public_share, input_shares) =
|
|
vdaf.shard(&measurement, &nonce).unwrap();
|
|
b.iter(|| {
|
|
vdaf.prepare_init(
|
|
&verify_key,
|
|
0,
|
|
&(),
|
|
&nonce,
|
|
&public_share,
|
|
&input_shares[0],
|
|
)
|
|
.unwrap()
|
|
});
|
|
},
|
|
);
|
|
}
|
|
}
|
|
group.finish();
|
|
}
|
|
}
|
|
|
|
/// Benchmark IdpfPoplar performance.
|
|
#[cfg(feature = "experimental")]
|
|
fn idpf(c: &mut Criterion) {
|
|
let test_sizes = [8usize, 8 * 16, 8 * 256];
|
|
|
|
let mut group = c.benchmark_group("idpf_gen");
|
|
for size in test_sizes.iter() {
|
|
group.throughput(Throughput::Bytes(*size as u64 / 8));
|
|
group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
|
|
let bits = iter::repeat_with(random).take(size).collect::<Vec<bool>>();
|
|
let input = IdpfInput::from_bools(&bits);
|
|
|
|
let inner_values = random_vector::<Field64>(size - 1)
|
|
.unwrap()
|
|
.into_iter()
|
|
.map(|random_element| Poplar1IdpfValue::new([Field64::one(), random_element]))
|
|
.collect::<Vec<_>>();
|
|
let leaf_value = Poplar1IdpfValue::new([Field255::one(), random_vector(1).unwrap()[0]]);
|
|
|
|
let idpf = Idpf::new((), ());
|
|
b.iter(|| {
|
|
idpf.gen(&input, inner_values.clone(), leaf_value, &[0; 16])
|
|
.unwrap();
|
|
});
|
|
});
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("idpf_eval");
|
|
for size in test_sizes.iter() {
|
|
group.throughput(Throughput::Bytes(*size as u64 / 8));
|
|
group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
|
|
let bits = iter::repeat_with(random).take(size).collect::<Vec<bool>>();
|
|
let input = IdpfInput::from_bools(&bits);
|
|
|
|
let inner_values = random_vector::<Field64>(size - 1)
|
|
.unwrap()
|
|
.into_iter()
|
|
.map(|random_element| Poplar1IdpfValue::new([Field64::one(), random_element]))
|
|
.collect::<Vec<_>>();
|
|
let leaf_value = Poplar1IdpfValue::new([Field255::one(), random_vector(1).unwrap()[0]]);
|
|
|
|
let idpf = Idpf::new((), ());
|
|
let (public_share, keys) = idpf
|
|
.gen(&input, inner_values, leaf_value, &[0; 16])
|
|
.unwrap();
|
|
|
|
b.iter(|| {
|
|
// This is an aggressively small cache, to minimize its impact on the benchmark.
|
|
// In this synthetic benchmark, we are only checking one candidate prefix per level
|
|
// (typically there are many candidate prefixes per level) so the cache hit rate
|
|
// will be unaffected.
|
|
let mut cache = RingBufferCache::new(1);
|
|
|
|
for prefix_length in 1..=size {
|
|
let prefix = input[..prefix_length].to_owned().into();
|
|
idpf.eval(0, &public_share, &keys[0], &prefix, &[0; 16], &mut cache)
|
|
.unwrap();
|
|
}
|
|
});
|
|
});
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark Poplar1.
|
|
#[cfg(feature = "experimental")]
|
|
fn poplar1(c: &mut Criterion) {
|
|
let test_sizes = [16_usize, 128, 256];
|
|
|
|
let mut group = c.benchmark_group("poplar1_shard");
|
|
for size in test_sizes.iter() {
|
|
group.throughput(Throughput::Bytes(*size as u64 / 8));
|
|
group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
|
|
let vdaf = Poplar1::new_turboshake128(size);
|
|
let mut rng = StdRng::seed_from_u64(RNG_SEED);
|
|
let nonce = rng.gen::<[u8; 16]>();
|
|
|
|
b.iter_batched(
|
|
|| {
|
|
let bits = iter::repeat_with(|| rng.gen())
|
|
.take(size)
|
|
.collect::<Vec<bool>>();
|
|
IdpfInput::from_bools(&bits)
|
|
},
|
|
|measurement| {
|
|
vdaf.shard(&measurement, &nonce).unwrap();
|
|
},
|
|
BatchSize::SmallInput,
|
|
);
|
|
});
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("poplar1_prepare_init");
|
|
for size in test_sizes.iter() {
|
|
group.measurement_time(Duration::from_secs(30)); // slower benchmark
|
|
group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
|
|
let vdaf = Poplar1::new_turboshake128(size);
|
|
let mut rng = StdRng::seed_from_u64(RNG_SEED);
|
|
|
|
b.iter_batched(
|
|
|| {
|
|
let verify_key: [u8; 16] = rng.gen();
|
|
let nonce: [u8; 16] = rng.gen();
|
|
|
|
// Parameters are chosen to match Chris Wood's experimental setup:
|
|
// https://github.com/chris-wood/heavy-hitter-comparison
|
|
let (measurements, prefix_tree) = generate_zipf_distributed_batch(
|
|
&mut rng, // rng
|
|
size, // bits
|
|
10, // threshold
|
|
1000, // number of measurements
|
|
128, // Zipf support
|
|
1.03, // Zipf exponent
|
|
);
|
|
|
|
// We are benchmarking preparation of a single report. For this test, it doesn't matter
|
|
// which measurement we generate a report for, so pick the first measurement
|
|
// arbitrarily.
|
|
let (public_share, input_shares) =
|
|
vdaf.shard(&measurements[0], &nonce).unwrap();
|
|
|
|
// For the aggregation paramter, we use the candidate prefixes from the prefix tree
|
|
// for the sampled measurements. Run preparation for the last step, which ought to
|
|
// represent the worst-case performance.
|
|
let agg_param =
|
|
Poplar1AggregationParam::try_from_prefixes(prefix_tree[size - 1].clone())
|
|
.unwrap();
|
|
|
|
(
|
|
verify_key,
|
|
nonce,
|
|
agg_param,
|
|
public_share,
|
|
input_shares.into_iter().next().unwrap(),
|
|
)
|
|
},
|
|
|(verify_key, nonce, agg_param, public_share, input_share)| {
|
|
vdaf.prepare_init(
|
|
&verify_key,
|
|
0,
|
|
&agg_param,
|
|
&nonce,
|
|
&public_share,
|
|
&input_share,
|
|
)
|
|
.unwrap();
|
|
},
|
|
BatchSize::SmallInput,
|
|
);
|
|
});
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
/// Benchmark VIDPF performance.
|
|
#[cfg(feature = "experimental")]
|
|
fn vidpf(c: &mut Criterion) {
|
|
use prio::vidpf::{Vidpf, VidpfInput, VidpfWeight};
|
|
|
|
let test_sizes = [8usize, 8 * 16, 8 * 256];
|
|
const NONCE_SIZE: usize = 16;
|
|
const NONCE: &[u8; NONCE_SIZE] = b"Test Nonce VIDPF";
|
|
|
|
let mut group = c.benchmark_group("vidpf_gen");
|
|
for size in test_sizes.iter() {
|
|
group.throughput(Throughput::Bytes(*size as u64 / 8));
|
|
group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
|
|
let bits = iter::repeat_with(random).take(size).collect::<Vec<bool>>();
|
|
let input = VidpfInput::from_bools(&bits);
|
|
let weight = VidpfWeight::from(vec![Field255::one(), Field255::one()]);
|
|
|
|
let vidpf = Vidpf::<VidpfWeight<Field255>, NONCE_SIZE>::new(2);
|
|
|
|
b.iter(|| {
|
|
let _ = vidpf.gen(&input, &weight, NONCE).unwrap();
|
|
});
|
|
});
|
|
}
|
|
group.finish();
|
|
|
|
let mut group = c.benchmark_group("vidpf_eval");
|
|
for size in test_sizes.iter() {
|
|
group.throughput(Throughput::Bytes(*size as u64 / 8));
|
|
group.bench_with_input(BenchmarkId::from_parameter(size), size, |b, &size| {
|
|
let bits = iter::repeat_with(random).take(size).collect::<Vec<bool>>();
|
|
let input = VidpfInput::from_bools(&bits);
|
|
let weight = VidpfWeight::from(vec![Field255::one(), Field255::one()]);
|
|
let vidpf = Vidpf::<VidpfWeight<Field255>, NONCE_SIZE>::new(2);
|
|
|
|
let (public, keys) = vidpf.gen(&input, &weight, NONCE).unwrap();
|
|
|
|
b.iter(|| {
|
|
let _ = vidpf.eval(&keys[0], &public, &input, NONCE).unwrap();
|
|
});
|
|
});
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
#[cfg(feature = "experimental")]
|
|
criterion_group!(benches, poplar1, prio3, prio2, poly_mul, prng, idpf, dp_noise, vidpf);
|
|
#[cfg(not(feature = "experimental"))]
|
|
criterion_group!(benches, prio3, prng, poly_mul);
|
|
|
|
criterion_main!(benches);
|