diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 18:31:44 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-30 18:31:44 +0000 |
commit | c23a457e72abe608715ac76f076f47dc42af07a5 (patch) | |
tree | 2772049aaf84b5c9d0ed12ec8d86812f7a7904b6 /vendor/portable-atomic/src/imp/fallback | |
parent | Releasing progress-linux version 1.73.0+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-c23a457e72abe608715ac76f076f47dc42af07a5.tar.xz rustc-c23a457e72abe608715ac76f076f47dc42af07a5.zip |
Merging upstream version 1.74.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/portable-atomic/src/imp/fallback')
-rw-r--r-- | vendor/portable-atomic/src/imp/fallback/mod.rs | 426 | ||||
-rw-r--r-- | vendor/portable-atomic/src/imp/fallback/outline_atomics.rs | 170 | ||||
-rw-r--r-- | vendor/portable-atomic/src/imp/fallback/seq_lock.rs | 145 | ||||
-rw-r--r-- | vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs | 178 | ||||
-rw-r--r-- | vendor/portable-atomic/src/imp/fallback/utils.rs | 141 |
5 files changed, 1060 insertions, 0 deletions
diff --git a/vendor/portable-atomic/src/imp/fallback/mod.rs b/vendor/portable-atomic/src/imp/fallback/mod.rs new file mode 100644 index 000000000..e4875deac --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/mod.rs @@ -0,0 +1,426 @@ +// Fallback implementation using global locks. +// +// This implementation uses seqlock for global locks. +// +// This is basically based on global locks in crossbeam-utils's `AtomicCell`, +// but seqlock is implemented in a way that does not depend on UB +// (see comments in optimistic_read method in atomic! macro for details). +// +// Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic +// type and the value type must be the same. + +#![cfg_attr( + any( + all( + target_arch = "x86_64", + not(portable_atomic_no_cmpxchg16b_target_feature), + not(portable_atomic_no_outline_atomics), + not(any(target_env = "sgx", miri)), + ), + all( + target_arch = "powerpc64", + feature = "fallback", + not(portable_atomic_no_outline_atomics), + portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all(target_env = "musl", not(target_feature = "crt-static")), + portable_atomic_outline_atomics, + ), + ), + target_os = "freebsd", + ), + not(any(miri, portable_atomic_sanitize_thread)), + ), + all( + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(portable_atomic_no_outline_atomics), + ), + ), + allow(dead_code) +)] + +#[macro_use] +pub(crate) mod utils; + +// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap +// around. +// +// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be +// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the +// counter will not be increased that fast. +// +// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, +// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast, +// so use it to implement normal sequence lock. +cfg_has_fast_atomic_64! { + mod seq_lock; +} +cfg_no_fast_atomic_64! { + #[path = "seq_lock_wide.rs"] + mod seq_lock; +} + +use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; + +use seq_lock::{SeqLock, SeqLockWriteGuard}; +use utils::CachePadded; + +// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, +// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast, +// so use it to reduce chunks of byte-wise atomic memcpy. +use seq_lock::{AtomicChunk, Chunk}; + +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016. +#[inline] +#[must_use] +fn lock(addr: usize) -> &'static SeqLock { + // The number of locks is a prime number because we want to make sure `addr % LEN` gets + // dispersed across all locks. + // + // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded, + // so the actual concurrency level will be smaller. + const LEN: usize = 67; + #[allow(clippy::declare_interior_mutable_const)] + const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new()); + static LOCKS: [CachePadded<SeqLock>; LEN] = [ + L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, + L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, + L, L, L, L, L, L, L, + ]; + + // If the modulus is a constant number, the compiler will use crazy math to transform this into + // a sequence of cheap arithmetic operations rather than using the slow modulo instruction. + &LOCKS[addr % LEN] +} + +macro_rules! atomic { + ($atomic_type:ident, $int_type:ident, $align:literal) => { + #[repr(C, align($align))] + pub(crate) struct $atomic_type { + v: UnsafeCell<$int_type>, + } + + impl $atomic_type { + const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>(); + + #[inline] + unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] { + static_assert!($atomic_type::LEN > 1); + static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0); + + // SAFETY: the caller must uphold the safety contract for `chunks`. + unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) } + } + + #[inline] + fn optimistic_read(&self) -> $int_type { + // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M + let mut dst: [Chunk; Self::LEN] = [0; Self::LEN]; + // SAFETY: + // - There are no threads that perform non-atomic concurrent write operations. + // - There is no writer that updates the value using atomic operations of different granularity. + // + // If the atomic operation is not used here, it will cause a data race + // when `write` performs concurrent write operation. + // Such a data race is sometimes considered virtually unproblematic + // in SeqLock implementations: + // + // - https://github.com/Amanieu/seqlock/issues/2 + // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116 + // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F + // + // However, in our use case, the implementation that loads/stores value as + // chunks of usize is enough fast and sound, so we use that implementation. + // + // See also atomic-memcpy crate, a generic implementation of this pattern: + // https://github.com/taiki-e/atomic-memcpy + let chunks = unsafe { self.chunks() }; + for i in 0..Self::LEN { + dst[i] = chunks[i].load(Ordering::Relaxed); + } + // SAFETY: integers are plain old datatypes so we can always transmute to them. + unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) } + } + + #[inline] + fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type { + // SAFETY: + // - The guard guarantees that we hold the lock to write. + // - The raw pointer is valid because we got it from a reference. + // + // Unlike optimistic_read/write, the atomic operation is not required, + // because we hold the lock to write so that other threads cannot + // perform concurrent write operations. + // + // At the hardware level, core::sync::atomic::Atomic*::load used in optimistic_read + // may be lowered to atomic write operations by LLVM, but it is still considered a + // read operation from the view of the (software) memory model, except that it is + // not allowed in read-only memory (due to UnsafeCell, self.v is not read-only memory). + // See also https://github.com/rust-lang/miri/issues/2463. + // (Note that the above property is about the assembly generated by inline assembly + // or LLVM's backend. Doing it using write operations written in normal Rust code + // or LLVM IR is considered UB, even if it never mutates the value. See also the + // above Miri issue and https://github.com/rust-lang/rust/issues/32976#issuecomment-446775360) + // + // Also, according to atomic-memcpy's asm test, there seems + // to be no tier 1 or tier 2 platform that generates such code + // for a pointer-width relaxed load + acquire fence: + // https://github.com/taiki-e/atomic-memcpy/tree/v0.1.3/tests/asm-test/asm + unsafe { self.v.get().read() } + } + + #[inline] + fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) { + // SAFETY: integers are plain old datatypes so we can always transmute them to arrays of integers. + let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) }; + // SAFETY: + // - The guard guarantees that we hold the lock to write. + // - There are no threads that perform non-atomic concurrent read or write operations. + // + // See optimistic_read for the reason that atomic operations are used here. + let chunks = unsafe { self.chunks() }; + for i in 0..Self::LEN { + chunks[i].store(val[i], Ordering::Relaxed); + } + } + } + + // Send is implicitly implemented. + // SAFETY: any data races are prevented by the lock and atomic operation. + unsafe impl Sync for $atomic_type {} + + impl_default_no_fetch_ops!($atomic_type, $int_type); + impl_default_bit_opts!($atomic_type, $int_type); + impl $atomic_type { + #[inline] + pub(crate) const fn new(v: $int_type) -> Self { + Self { v: UnsafeCell::new(v) } + } + + #[inline] + pub(crate) fn is_lock_free() -> bool { + Self::is_always_lock_free() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + false + } + + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $int_type { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.v.get() } + } + + #[inline] + pub(crate) fn into_inner(self) -> $int_type { + self.v.into_inner() + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn load(&self, order: Ordering) -> $int_type { + crate::utils::assert_load_ordering(order); + let lock = lock(self.v.get() as usize); + + // Try doing an optimistic read first. + if let Some(stamp) = lock.optimistic_read() { + let val = self.optimistic_read(); + + if lock.validate_read(stamp) { + return val; + } + } + + // Grab a regular write lock so that writers don't starve this load. + let guard = lock.write(); + let val = self.read(&guard); + // The value hasn't been changed. Drop the guard without incrementing the stamp. + guard.abort(); + val + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn store(&self, val: $int_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + let guard = lock(self.v.get() as usize).write(); + self.write(val, &guard) + } + + #[inline] + pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(val, &guard); + result + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + if result == current { + self.write(new, &guard); + Ok(result) + } else { + // The value hasn't been changed. Drop the guard without incrementing the stamp. + guard.abort(); + Err(result) + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange_weak( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + self.compare_exchange(current, new, success, failure) + } + + #[inline] + pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result.wrapping_add(val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result.wrapping_sub(val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result & val, &guard); + result + } + + #[inline] + pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(!(result & val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result | val, &guard); + result + } + + #[inline] + pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result ^ val, &guard); + result + } + + #[inline] + pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(core::cmp::max(result, val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(core::cmp::min(result, val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(!result, &guard); + result + } + #[inline] + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + + #[inline] + pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result.wrapping_neg(), &guard); + result + } + #[inline] + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } + + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut $int_type { + self.v.get() + } + } + }; +} + +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any(test, not(target_has_atomic = "64"))) +)] +cfg_no_fast_atomic_64! { + atomic!(AtomicI64, i64, 8); + atomic!(AtomicU64, u64, 8); +} + +atomic!(AtomicI128, i128, 16); +atomic!(AtomicU128, u128, 16); + +#[cfg(test)] +mod tests { + use super::*; + + cfg_no_fast_atomic_64! { + test_atomic_int!(i64); + test_atomic_int!(u64); + } + test_atomic_int!(i128); + test_atomic_int!(u128); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + cfg_no_fast_atomic_64! { + stress_test!(u64); + } + stress_test!(u128); +} diff --git a/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs new file mode 100644 index 000000000..985d9ce83 --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs @@ -0,0 +1,170 @@ +// Helper for outline-atomics. +// +// On architectures where DW atomics are not supported on older CPUs, we use +// fallback implementation when DW atomic instructions are not supported and +// outline-atomics is enabled. +// +// This module provides helpers to implement them. + +use core::sync::atomic::Ordering; + +#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))] +pub(crate) type Udw = u128; +#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))] +pub(crate) type AtomicUdw = super::super::fallback::AtomicU128; +#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))] +pub(crate) type AtomicIdw = super::super::fallback::AtomicI128; + +#[cfg(target_arch = "arm")] +pub(crate) type Udw = u64; +#[cfg(target_arch = "arm")] +pub(crate) type AtomicUdw = super::super::fallback::AtomicU64; +#[cfg(target_arch = "arm")] +pub(crate) type AtomicIdw = super::super::fallback::AtomicI64; + +// Asserts that the function is called in the correct context. +macro_rules! debug_assert_outline_atomics { + () => { + #[cfg(target_arch = "x86_64")] + { + debug_assert!(!super::detect::detect().has_cmpxchg16b()); + } + #[cfg(target_arch = "powerpc64")] + { + debug_assert!(!super::detect::detect().has_quadword_atomics()); + } + #[cfg(target_arch = "arm")] + { + debug_assert!(!super::has_kuser_cmpxchg64()); + } + }; +} + +#[cold] +pub(crate) unsafe fn atomic_load(src: *mut Udw, order: Ordering) -> Udw { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + (*(src as *const AtomicUdw)).load(order) + } +} +fn_alias! { + #[cold] + pub(crate) unsafe fn(src: *mut Udw) -> Udw; + // fallback's atomic load has at least acquire semantics. + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] + atomic_load_non_seqcst = atomic_load(Ordering::Acquire); + atomic_load_seqcst = atomic_load(Ordering::SeqCst); +} + +#[cold] +pub(crate) unsafe fn atomic_store(dst: *mut Udw, val: Udw, order: Ordering) { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + (*(dst as *const AtomicUdw)).store(val, order); + } +} +fn_alias! { + #[cold] + pub(crate) unsafe fn(dst: *mut Udw, val: Udw); + // fallback's atomic store has at least release semantics. + #[cfg(not(target_arch = "arm"))] + atomic_store_non_seqcst = atomic_store(Ordering::Release); + atomic_store_seqcst = atomic_store(Ordering::SeqCst); +} + +#[cold] +pub(crate) unsafe fn atomic_compare_exchange( + dst: *mut Udw, + old: Udw, + new: Udw, + success: Ordering, + failure: Ordering, +) -> (Udw, bool) { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + match (*(dst as *const AtomicUdw)).compare_exchange(old, new, success, failure) { + Ok(v) => (v, true), + Err(v) => (v, false), + } + } +} +fn_alias! { + #[cold] + pub(crate) unsafe fn(dst: *mut Udw, old: Udw, new: Udw) -> (Udw, bool); + // fallback's atomic CAS has at least AcqRel semantics. + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] + atomic_compare_exchange_non_seqcst + = atomic_compare_exchange(Ordering::AcqRel, Ordering::Acquire); + atomic_compare_exchange_seqcst + = atomic_compare_exchange(Ordering::SeqCst, Ordering::SeqCst); +} + +macro_rules! atomic_rmw_3 { + ( + $name:ident($atomic_type:ident::$method_name:ident), + $non_seqcst_alias:ident, $seqcst_alias:ident + ) => { + #[cold] + pub(crate) unsafe fn $name(dst: *mut Udw, val: Udw, order: Ordering) -> Udw { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + (*(dst as *const $atomic_type)).$method_name(val as _, order) as Udw + } + } + fn_alias! { + #[cold] + pub(crate) unsafe fn(dst: *mut Udw, val: Udw) -> Udw; + // fallback's atomic RMW has at least AcqRel semantics. + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] + $non_seqcst_alias = $name(Ordering::AcqRel); + $seqcst_alias = $name(Ordering::SeqCst); + } + }; +} +macro_rules! atomic_rmw_2 { + ( + $name:ident($atomic_type:ident::$method_name:ident), + $non_seqcst_alias:ident, $seqcst_alias:ident + ) => { + #[cold] + pub(crate) unsafe fn $name(dst: *mut Udw, order: Ordering) -> Udw { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + (*(dst as *const $atomic_type)).$method_name(order) as Udw + } + } + fn_alias! { + #[cold] + pub(crate) unsafe fn(dst: *mut Udw) -> Udw; + // fallback's atomic RMW has at least AcqRel semantics. + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] + $non_seqcst_alias = $name(Ordering::AcqRel); + $seqcst_alias = $name(Ordering::SeqCst); + } + }; +} + +atomic_rmw_3!(atomic_swap(AtomicUdw::swap), atomic_swap_non_seqcst, atomic_swap_seqcst); +atomic_rmw_3!(atomic_add(AtomicUdw::fetch_add), atomic_add_non_seqcst, atomic_add_seqcst); +atomic_rmw_3!(atomic_sub(AtomicUdw::fetch_sub), atomic_sub_non_seqcst, atomic_sub_seqcst); +atomic_rmw_3!(atomic_and(AtomicUdw::fetch_and), atomic_and_non_seqcst, atomic_and_seqcst); +atomic_rmw_3!(atomic_nand(AtomicUdw::fetch_nand), atomic_nand_non_seqcst, atomic_nand_seqcst); +atomic_rmw_3!(atomic_or(AtomicUdw::fetch_or), atomic_or_non_seqcst, atomic_or_seqcst); +atomic_rmw_3!(atomic_xor(AtomicUdw::fetch_xor), atomic_xor_non_seqcst, atomic_xor_seqcst); +atomic_rmw_3!(atomic_max(AtomicIdw::fetch_max), atomic_max_non_seqcst, atomic_max_seqcst); +atomic_rmw_3!(atomic_umax(AtomicUdw::fetch_max), atomic_umax_non_seqcst, atomic_umax_seqcst); +atomic_rmw_3!(atomic_min(AtomicIdw::fetch_min), atomic_min_non_seqcst, atomic_min_seqcst); +atomic_rmw_3!(atomic_umin(AtomicUdw::fetch_min), atomic_umin_non_seqcst, atomic_umin_seqcst); + +atomic_rmw_2!(atomic_not(AtomicUdw::fetch_not), atomic_not_non_seqcst, atomic_not_seqcst); +atomic_rmw_2!(atomic_neg(AtomicUdw::fetch_neg), atomic_neg_non_seqcst, atomic_neg_seqcst); diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs new file mode 100644 index 000000000..d86b02e10 --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs @@ -0,0 +1,145 @@ +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock.rs. + +use core::{ + mem::ManuallyDrop, + sync::atomic::{self, Ordering}, +}; + +use super::utils::Backoff; + +// See mod.rs for details. +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] +pub(super) use core::sync::atomic::AtomicU64 as AtomicStamp; +#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))] +pub(super) use core::sync::atomic::AtomicUsize as AtomicStamp; +#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))] +pub(super) type Stamp = usize; +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] +pub(super) type Stamp = u64; + +// See mod.rs for details. +pub(super) type AtomicChunk = AtomicStamp; +pub(super) type Chunk = Stamp; + +/// A simple stamped lock. +pub(super) struct SeqLock { + /// The current state of the lock. + /// + /// All bits except the least significant one hold the current stamp. When locked, the state + /// equals 1 and doesn't contain a valid stamp. + state: AtomicStamp, +} + +impl SeqLock { + #[inline] + pub(super) const fn new() -> Self { + Self { state: AtomicStamp::new(0) } + } + + /// If not locked, returns the current stamp. + /// + /// This method should be called before optimistic reads. + #[inline] + pub(super) fn optimistic_read(&self) -> Option<Stamp> { + let state = self.state.load(Ordering::Acquire); + if state == 1 { + None + } else { + Some(state) + } + } + + /// Returns `true` if the current stamp is equal to `stamp`. + /// + /// This method should be called after optimistic reads to check whether they are valid. The + /// argument `stamp` should correspond to the one returned by method `optimistic_read`. + #[inline] + pub(super) fn validate_read(&self, stamp: Stamp) -> bool { + atomic::fence(Ordering::Acquire); + self.state.load(Ordering::Relaxed) == stamp + } + + /// Grabs the lock for writing. + #[inline] + pub(super) fn write(&self) -> SeqLockWriteGuard<'_> { + let mut backoff = Backoff::new(); + loop { + let previous = self.state.swap(1, Ordering::Acquire); + + if previous != 1 { + atomic::fence(Ordering::Release); + + return SeqLockWriteGuard { lock: self, state: previous }; + } + + while self.state.load(Ordering::Relaxed) == 1 { + backoff.snooze(); + } + } + } +} + +/// An RAII guard that releases the lock and increments the stamp when dropped. +#[must_use] +pub(super) struct SeqLockWriteGuard<'a> { + /// The parent lock. + lock: &'a SeqLock, + + /// The stamp before locking. + state: Stamp, +} + +impl SeqLockWriteGuard<'_> { + /// Releases the lock without incrementing the stamp. + #[inline] + pub(super) fn abort(self) { + // We specifically don't want to call drop(), since that's + // what increments the stamp. + let this = ManuallyDrop::new(self); + + // Restore the stamp. + // + // Release ordering for synchronizing with `optimistic_read`. + this.lock.state.store(this.state, Ordering::Release); + } +} + +impl Drop for SeqLockWriteGuard<'_> { + #[inline] + fn drop(&mut self) { + // Release the lock and increment the stamp. + // + // Release ordering for synchronizing with `optimistic_read`. + self.lock.state.store(self.state.wrapping_add(2), Ordering::Release); + } +} + +#[cfg(test)] +mod tests { + use super::SeqLock; + + #[test] + fn smoke() { + let lock = SeqLock::new(); + let before = lock.optimistic_read().unwrap(); + assert!(lock.validate_read(before)); + { + let _guard = lock.write(); + } + assert!(!lock.validate_read(before)); + let after = lock.optimistic_read().unwrap(); + assert_ne!(before, after); + } + + #[test] + fn test_abort() { + let lock = SeqLock::new(); + let before = lock.optimistic_read().unwrap(); + { + let guard = lock.write(); + guard.abort(); + } + let after = lock.optimistic_read().unwrap(); + assert_eq!(before, after, "aborted write does not update the stamp"); + } +} diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs new file mode 100644 index 000000000..74b08d24f --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs @@ -0,0 +1,178 @@ +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock_wide.rs. + +use core::{ + mem::ManuallyDrop, + sync::atomic::{self, AtomicUsize, Ordering}, +}; + +use super::utils::Backoff; + +// See mod.rs for details. +pub(super) type AtomicChunk = AtomicUsize; +pub(super) type Chunk = usize; + +/// A simple stamped lock. +/// +/// The state is represented as two `AtomicUsize`: `state_hi` for high bits and `state_lo` for low +/// bits. +pub(super) struct SeqLock { + /// The high bits of the current state of the lock. + state_hi: AtomicUsize, + + /// The low bits of the current state of the lock. + /// + /// All bits except the least significant one hold the current stamp. When locked, the state_lo + /// equals 1 and doesn't contain a valid stamp. + state_lo: AtomicUsize, +} + +impl SeqLock { + #[inline] + pub(super) const fn new() -> Self { + Self { state_hi: AtomicUsize::new(0), state_lo: AtomicUsize::new(0) } + } + + /// If not locked, returns the current stamp. + /// + /// This method should be called before optimistic reads. + #[inline] + pub(super) fn optimistic_read(&self) -> Option<(usize, usize)> { + // The acquire loads from `state_hi` and `state_lo` synchronize with the release stores in + // `SeqLockWriteGuard::drop` and `SeqLockWriteGuard::abort`. + // + // As a consequence, we can make sure that (1) all writes within the era of `state_hi - 1` + // happens before now; and therefore, (2) if `state_lo` is even, all writes within the + // critical section of (`state_hi`, `state_lo`) happens before now. + let state_hi = self.state_hi.load(Ordering::Acquire); + let state_lo = self.state_lo.load(Ordering::Acquire); + if state_lo == 1 { + None + } else { + Some((state_hi, state_lo)) + } + } + + /// Returns `true` if the current stamp is equal to `stamp`. + /// + /// This method should be called after optimistic reads to check whether they are valid. The + /// argument `stamp` should correspond to the one returned by method `optimistic_read`. + #[inline] + pub(super) fn validate_read(&self, stamp: (usize, usize)) -> bool { + // Thanks to the fence, if we're noticing any modification to the data at the critical + // section of `(stamp.0, stamp.1)`, then the critical section's write of 1 to state_lo should be + // visible. + atomic::fence(Ordering::Acquire); + + // So if `state_lo` coincides with `stamp.1`, then either (1) we're noticing no modification + // to the data after the critical section of `(stamp.0, stamp.1)`, or (2) `state_lo` wrapped + // around. + // + // If (2) is the case, the acquire ordering ensures we see the new value of `state_hi`. + let state_lo = self.state_lo.load(Ordering::Acquire); + + // If (2) is the case and `state_hi` coincides with `stamp.0`, then `state_hi` also wrapped + // around, which we give up to correctly validate the read. + let state_hi = self.state_hi.load(Ordering::Relaxed); + + // Except for the case that both `state_hi` and `state_lo` wrapped around, the following + // condition implies that we're noticing no modification to the data after the critical + // section of `(stamp.0, stamp.1)`. + (state_hi, state_lo) == stamp + } + + /// Grabs the lock for writing. + #[inline] + pub(super) fn write(&self) -> SeqLockWriteGuard<'_> { + let mut backoff = Backoff::new(); + loop { + let previous = self.state_lo.swap(1, Ordering::Acquire); + + if previous != 1 { + // To synchronize with the acquire fence in `validate_read` via any modification to + // the data at the critical section of `(state_hi, previous)`. + atomic::fence(Ordering::Release); + + return SeqLockWriteGuard { lock: self, state_lo: previous }; + } + + while self.state_lo.load(Ordering::Relaxed) == 1 { + backoff.snooze(); + } + } + } +} + +/// An RAII guard that releases the lock and increments the stamp when dropped. +#[must_use] +pub(super) struct SeqLockWriteGuard<'a> { + /// The parent lock. + lock: &'a SeqLock, + + /// The stamp before locking. + state_lo: usize, +} + +impl SeqLockWriteGuard<'_> { + /// Releases the lock without incrementing the stamp. + #[inline] + pub(super) fn abort(self) { + // We specifically don't want to call drop(), since that's + // what increments the stamp. + let this = ManuallyDrop::new(self); + + // Restore the stamp. + // + // Release ordering for synchronizing with `optimistic_read`. + this.lock.state_lo.store(this.state_lo, Ordering::Release); + } +} + +impl Drop for SeqLockWriteGuard<'_> { + #[inline] + fn drop(&mut self) { + let state_lo = self.state_lo.wrapping_add(2); + + // Increase the high bits if the low bits wrap around. + // + // Release ordering for synchronizing with `optimistic_read`. + if state_lo == 0 { + let state_hi = self.lock.state_hi.load(Ordering::Relaxed); + self.lock.state_hi.store(state_hi.wrapping_add(1), Ordering::Release); + } + + // Release the lock and increment the stamp. + // + // Release ordering for synchronizing with `optimistic_read`. + self.lock.state_lo.store(state_lo, Ordering::Release); + } +} + +#[cfg(test)] +mod tests { + use super::SeqLock; + + #[test] + fn smoke() { + let lock = SeqLock::new(); + let before = lock.optimistic_read().unwrap(); + assert!(lock.validate_read(before)); + { + let _guard = lock.write(); + } + assert!(!lock.validate_read(before)); + let after = lock.optimistic_read().unwrap(); + assert_ne!(before, after); + } + + #[test] + fn test_abort() { + let lock = SeqLock::new(); + let before = lock.optimistic_read().unwrap(); + { + let guard = lock.write(); + guard.abort(); + } + let after = lock.optimistic_read().unwrap(); + assert_eq!(before, after, "aborted write does not update the stamp"); + } +} diff --git a/vendor/portable-atomic/src/imp/fallback/utils.rs b/vendor/portable-atomic/src/imp/fallback/utils.rs new file mode 100644 index 000000000..c78c625b0 --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/utils.rs @@ -0,0 +1,141 @@ +use core::ops; + +// TODO: mips32r6, mips64r6 +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/d49a0f8454499ced8af0b61aeb661379c4eb0588/crossbeam-utils/src/cache_padded.rs. +/// Pads and aligns a value to the length of a cache line. +// Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache +// lines at a time, so we have to align to 128 bytes rather than 64. +// +// Sources: +// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf +// - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107 +// +// ARM's big.LITTLE architecture has asymmetric cores and "big" cores have 128-byte cache line size. +// +// Sources: +// - https://www.mono-project.com/news/2016/09/12/arm64-icache/ +// +// powerpc64 has 128-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9 +#[cfg_attr( + any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64"), + repr(align(128)) +)] +// arm, mips, mips64, riscv64, sparc, and hexagon have 32-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_riscv64.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12 +// +// riscv32 is assumed not to exceed the cache line size of riscv64. +#[cfg_attr( + any( + target_arch = "arm", + target_arch = "mips", + target_arch = "mips64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "sparc", + target_arch = "hexagon", + ), + repr(align(32)) +)] +// m68k has 16-byte cache line size. +// +// Sources: +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9 +#[cfg_attr(target_arch = "m68k", repr(align(16)))] +// s390x has 256-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13 +#[cfg_attr(target_arch = "s390x", repr(align(256)))] +// x86, wasm, and sparc64 have 64-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19 +// +// All others are assumed to have 64-byte cache line size. +#[cfg_attr( + not(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "powerpc64", + target_arch = "arm", + target_arch = "mips", + target_arch = "mips64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "sparc", + target_arch = "hexagon", + target_arch = "m68k", + target_arch = "s390x", + )), + repr(align(64)) +)] +pub(crate) struct CachePadded<T> { + value: T, +} + +impl<T> CachePadded<T> { + #[inline] + pub(crate) const fn new(value: T) -> Self { + Self { value } + } +} + +impl<T> ops::Deref for CachePadded<T> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + &self.value + } +} + +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/backoff.rs. +// Adjusted to reduce spinning. +/// Performs exponential backoff in spin loops. +pub(crate) struct Backoff { + step: u32, +} + +// https://github.com/oneapi-src/oneTBB/blob/v2021.5.0/include/oneapi/tbb/detail/_utils.h#L46-L48 +const SPIN_LIMIT: u32 = 4; + +impl Backoff { + #[inline] + pub(crate) const fn new() -> Self { + Self { step: 0 } + } + + #[inline] + pub(crate) fn snooze(&mut self) { + if self.step <= SPIN_LIMIT { + for _ in 0..1 << self.step { + #[allow(deprecated)] + core::sync::atomic::spin_loop_hint(); + } + self.step += 1; + } else { + #[cfg(not(feature = "std"))] + for _ in 0..1 << self.step { + #[allow(deprecated)] + core::sync::atomic::spin_loop_hint(); + } + + #[cfg(feature = "std")] + std::thread::yield_now(); + } + } +} |