summaryrefslogtreecommitdiffstats
path: root/third_party/rust/once_cell/examples/bench_acquire.rs
blob: 1518047460477b6cd2d73256adc4d1bd906903ca (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
//! Benchmark the overhead that the synchronization of `OnceCell::get` causes.
//! We do some other operations that write to memory to get an imprecise but somewhat realistic
//! measurement.

use once_cell::sync::OnceCell;
use std::sync::atomic::{AtomicUsize, Ordering};

const N_THREADS: usize = 16;
const N_ROUNDS: usize = 1_000_000;

static CELL: OnceCell<usize> = OnceCell::new();
static OTHER: AtomicUsize = AtomicUsize::new(0);

fn main() {
    let start = std::time::Instant::now();
    let threads =
        (0..N_THREADS).map(|i| std::thread::spawn(move || thread_main(i))).collect::<Vec<_>>();
    for thread in threads {
        thread.join().unwrap();
    }
    println!("{:?}", start.elapsed());
    println!("{:?}", OTHER.load(Ordering::Relaxed));
}

#[inline(never)]
fn thread_main(i: usize) {
    // The operations we do here don't really matter, as long as we do multiple writes, and
    // everything is messy enough to prevent the compiler from optimizing the loop away.
    let mut data = [i; 128];
    let mut accum = 0usize;
    for _ in 0..N_ROUNDS {
        let _value = CELL.get_or_init(|| i + 1);
        let k = OTHER.fetch_add(data[accum & 0x7F] as usize, Ordering::Relaxed);
        for j in data.iter_mut() {
            *j = (*j).wrapping_add(accum);
            accum = accum.wrapping_add(k);
        }
    }
}