diff options
Diffstat (limited to 'vendor/portable-atomic/src/imp')
34 files changed, 10273 insertions, 0 deletions
diff --git a/vendor/portable-atomic/src/imp/arm_linux.rs b/vendor/portable-atomic/src/imp/arm_linux.rs new file mode 100644 index 000000000..e506254d9 --- /dev/null +++ b/vendor/portable-atomic/src/imp/arm_linux.rs @@ -0,0 +1,484 @@ +// 64-bit atomic implementation using kuser_cmpxchg64 on pre-v6 ARM Linux/Android. +// +// Refs: +// - https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt +// - https://github.com/rust-lang/compiler-builtins/blob/0.1.88/src/arm_linux.rs +// +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use fallback implementation instead. + +// TODO: Since Rust 1.64, the Linux kernel requirement for Rust when using std is 3.2+, so it should +// be possible to omit the dynamic kernel version check if the std feature is enabled on Rust 1.64+. +// https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html + +#[path = "fallback/outline_atomics.rs"] +mod fallback; + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; +use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; + +/// A 64-bit value represented as a pair of 32-bit values. +/// +/// This type is `#[repr(C)]`, both fields have the same in-memory representation +/// and are plain old datatypes, so access to the fields is always safe. +#[derive(Clone, Copy)] +#[repr(C)] +union U64 { + whole: u64, + pair: Pair, +} +#[derive(Clone, Copy)] +#[repr(C)] +struct Pair { + lo: u32, + hi: u32, +} + +// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt +const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC; +// __kuser_helper_version >= 5 (kernel version 3.1+) +const KUSER_CMPXCHG64: usize = 0xFFFF0F60; +#[inline] +fn __kuser_helper_version() -> i32 { + use core::sync::atomic::AtomicI32; + + static CACHE: AtomicI32 = AtomicI32::new(0); + let mut v = CACHE.load(Ordering::Relaxed); + if v != 0 { + return v; + } + // SAFETY: core assumes that at least __kuser_cmpxchg (__kuser_helper_version >= 2) is available + // on this platform. __kuser_helper_version is always available on such a platform. + v = unsafe { (KUSER_HELPER_VERSION as *const i32).read() }; + CACHE.store(v, Ordering::Relaxed); + v +} +#[inline] +fn has_kuser_cmpxchg64() -> bool { + // Note: detect_false cfg is intended to make it easy for portable-atomic developers to + // test cases such as has_cmpxchg16b == false, has_lse == false, + // __kuser_helper_version < 5, etc., and is not a public API. + if cfg!(portable_atomic_test_outline_atomics_detect_false) { + return false; + } + __kuser_helper_version() >= 5 +} +#[inline] +unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool { + // SAFETY: the caller must uphold the safety contract. + unsafe { + let f: extern "C" fn(*const u64, *const u64, *mut u64) -> u32 = + mem::transmute(KUSER_CMPXCHG64 as *const ()); + f(old_val, new_val, ptr) == 0 + } +} + +// 64-bit atomic load by two 32-bit atomic loads. +#[inline] +unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + let (prev_lo, prev_hi); + asm!( + "ldr {prev_lo}, [{src}]", + "ldr {prev_hi}, [{src}, #4]", + src = in(reg) src, + prev_lo = out(reg) prev_lo, + prev_hi = out(reg) prev_hi, + options(pure, nostack, preserves_flags, readonly), + ); + U64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} + +#[inline(always)] +unsafe fn atomic_update_kuser_cmpxchg64<F>(dst: *mut u64, mut f: F) -> u64 +where + F: FnMut(u64) -> u64, +{ + debug_assert!(dst as usize % 8 == 0); + debug_assert!(has_kuser_cmpxchg64()); + // SAFETY: the caller must uphold the safety contract. + unsafe { + loop { + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement byte_wise_atomic_load. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). + let old = byte_wise_atomic_load(dst); + let next = f(old); + if __kuser_cmpxchg64(&old, &next, dst) { + return old; + } + } + } +} + +macro_rules! atomic_with_ifunc { + ( + unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)? { $($kuser_cmpxchg64_fn_body:tt)* } + fallback = $seqcst_fallback_fn:ident + ) => { + #[inline] + unsafe fn $name($($arg)*) $(-> $ret_ty)? { + unsafe fn kuser_cmpxchg64_fn($($arg)*) $(-> $ret_ty)? { + $($kuser_cmpxchg64_fn_body)* + } + // SAFETY: the caller must uphold the safety contract. + // we only calls __kuser_cmpxchg64 if it is available. + unsafe { + ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { + if has_kuser_cmpxchg64() { + kuser_cmpxchg64_fn + } else { + // Use SeqCst because __kuser_cmpxchg64 is always SeqCst. + // https://github.com/torvalds/linux/blob/v6.1/arch/arm/kernel/entry-armv.S#L918-L925 + fallback::$seqcst_fallback_fn + } + }) + } + } + }; +} + +atomic_with_ifunc! { + unsafe fn atomic_load(src: *mut u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(src, |old| old) } + } + fallback = atomic_load_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_store(dst: *mut u64, val: u64) { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |_| val); } + } + fallback = atomic_store_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_swap(dst: *mut u64, val: u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |_| val) } + } + fallback = atomic_swap_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_compare_exchange(dst: *mut u64, old: u64, new: u64) -> (u64, bool) { + // SAFETY: the caller must uphold the safety contract. + let res = unsafe { atomic_update_kuser_cmpxchg64(dst, |v| if v == old { new } else { v }) }; + (res, res == old) + } + fallback = atomic_compare_exchange_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_add(dst: *mut u64, val: u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x.wrapping_add(val)) } + } + fallback = atomic_add_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_sub(dst: *mut u64, val: u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x.wrapping_sub(val)) } + } + fallback = atomic_sub_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_and(dst: *mut u64, val: u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x & val) } + } + fallback = atomic_and_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_nand(dst: *mut u64, val: u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |x| !(x & val)) } + } + fallback = atomic_nand_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_or(dst: *mut u64, val: u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x | val) } + } + fallback = atomic_or_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_xor(dst: *mut u64, val: u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |x| x ^ val) } + } + fallback = atomic_xor_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_max(dst: *mut u64, val: u64) -> u64 { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_update_kuser_cmpxchg64(dst, |x| core::cmp::max(x as i64, val as i64) as u64) + } + } + fallback = atomic_max_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_umax(dst: *mut u64, val: u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |x| core::cmp::max(x, val)) } + } + fallback = atomic_umax_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_min(dst: *mut u64, val: u64) -> u64 { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_update_kuser_cmpxchg64(dst, |x| core::cmp::min(x as i64, val as i64) as u64) + } + } + fallback = atomic_min_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_umin(dst: *mut u64, val: u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |x| core::cmp::min(x, val)) } + } + fallback = atomic_umin_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_not(dst: *mut u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, |x| !x) } + } + fallback = atomic_not_seqcst +} +atomic_with_ifunc! { + unsafe fn atomic_neg(dst: *mut u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update_kuser_cmpxchg64(dst, u64::wrapping_neg) } + } + fallback = atomic_neg_seqcst +} + +macro_rules! atomic64 { + ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => { + #[repr(C, align(8))] + pub(crate) struct $atomic_type { + v: UnsafeCell<$int_type>, + } + + // Send is implicitly implemented. + // SAFETY: any data races are prevented by the kernel user helper or the lock. + unsafe impl Sync for $atomic_type {} + + impl_default_no_fetch_ops!($atomic_type, $int_type); + impl_default_bit_opts!($atomic_type, $int_type); + impl $atomic_type { + #[inline] + pub(crate) const fn new(v: $int_type) -> Self { + Self { v: UnsafeCell::new(v) } + } + + #[inline] + pub(crate) fn is_lock_free() -> bool { + has_kuser_cmpxchg64() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + false + } + + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $int_type { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.v.get() } + } + + #[inline] + pub(crate) fn into_inner(self) -> $int_type { + self.v.into_inner() + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn load(&self, order: Ordering) -> $int_type { + crate::utils::assert_load_ordering(order); + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_load(self.v.get().cast::<u64>()) as $int_type } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn store(&self, val: $int_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_store(self.v.get().cast::<u64>(), val as u64) } + } + + #[inline] + pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_swap(self.v.get().cast::<u64>(), val as u64) as $int_type } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + let (res, ok) = atomic_compare_exchange( + self.v.get().cast::<u64>(), + current as u64, + new as u64, + ); + if ok { + Ok(res as $int_type) + } else { + Err(res as $int_type) + } + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange_weak( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + self.compare_exchange(current, new, success, failure) + } + + #[inline] + pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_add(self.v.get().cast::<u64>(), val as u64) as $int_type } + } + + #[inline] + pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_sub(self.v.get().cast::<u64>(), val as u64) as $int_type } + } + + #[inline] + pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_and(self.v.get().cast::<u64>(), val as u64) as $int_type } + } + + #[inline] + pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_nand(self.v.get().cast::<u64>(), val as u64) as $int_type } + } + + #[inline] + pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_or(self.v.get().cast::<u64>(), val as u64) as $int_type } + } + + #[inline] + pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_xor(self.v.get().cast::<u64>(), val as u64) as $int_type } + } + + #[inline] + pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { $atomic_max(self.v.get().cast::<u64>(), val as u64) as $int_type } + } + + #[inline] + pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { $atomic_min(self.v.get().cast::<u64>(), val as u64) as $int_type } + } + + #[inline] + pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_not(self.v.get().cast::<u64>()) as $int_type } + } + #[inline] + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + + #[inline] + pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by the kernel user helper or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { atomic_neg(self.v.get().cast::<u64>()) as $int_type } + } + #[inline] + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } + + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut $int_type { + self.v.get() + } + } + }; +} + +atomic64!(AtomicI64, i64, atomic_max, atomic_min); +atomic64!(AtomicU64, u64, atomic_umax, atomic_umin); + +#[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks, + clippy::wildcard_imports +)] +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn kuser_helper_version() { + let version = __kuser_helper_version(); + assert!(version >= 5, "{:?}", version); + assert_eq!(version, unsafe { (KUSER_HELPER_VERSION as *const i32).read() }); + } + + test_atomic_int!(i64); + test_atomic_int!(u64); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + stress_test!(u64); +} diff --git a/vendor/portable-atomic/src/imp/atomic128/README.md b/vendor/portable-atomic/src/imp/atomic128/README.md new file mode 100644 index 000000000..6e0c87988 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/README.md @@ -0,0 +1,58 @@ +# Implementation of 128-bit atomics + +## 128-bit atomics instructions + +Here is the table of targets that support 128-bit atomics and the instructions used: + +| target_arch | load | store | CAS | RMW | note | +| ----------- | ---- | ----- | --- | --- | ---- | +| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel or AMD CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ when cmpxchg16b target feature is enabled at compile-time, otherwise requires rustc 1.69+ | +| aarch64 | ldxp/stxp or casp or ldp | ldxp/stxp or casp or stp | ldxp/stxp or casp | ldxp/stxp or casp | casp requires lse target feature, ldp/stp requires lse2 target feature. <br> Both compile-time and run-time detection are supported for lse. lse2 is currently compile-time detection only. <br> Requires rustc 1.59+ | +| powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires target-cpu pwr8+ (powerpc64le is pwr8 by default). Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires nightly | +| s390x | lpq | stpq | cdsg | cdsg | Requires nightly | + +On compiler versions or platforms where these are not supported, the fallback implementation is used. + +See [aarch64.rs](aarch64.rs) module-level comments for more details on the instructions used on aarch64. + +## Comparison with core::intrinsics::atomic_\* (core::sync::atomic::Atomic{I,U}128) + +This directory has target-specific implementations with inline assembly ([aarch64.rs](aarch64.rs), [x86_64.rs](x86_64.rs), [powerpc64.rs](powerpc64.rs), [s390x.rs](s390x.rs)) and an implementation without inline assembly ([intrinsics.rs](intrinsics.rs)). The latter currently always needs nightly compilers and is only used for Miri and ThreadSanitizer, which do not support inline assembly. + +Implementations with inline assembly generate assemblies almost equivalent to the `core::intrinsics::atomic_*` (used in `core::sync::atomic::Atomic{I,U}128`) for many operations, but some operations may or may not generate more efficient code. For example: + +- On x86_64, implementation with inline assembly contains additional optimizations (e.g., [#16](https://github.com/taiki-e/portable-atomic/pull/16)) and is much faster for some operations. +- On aarch64, implementation with inline assembly supports outline-atomics on more operating systems, and may be faster in environments where outline-atomics can improve performance. +- On powerpc64 and s390x, LLVM does not support generating some 128-bit atomic operations (see [intrinsics.rs](intrinsics.rs) module-level comments), and we use CAS loop to implement them, so implementation with inline assembly may be faster for those operations. +- In implementations without inline assembly, the compiler may reuse condition flags that have changed as a result of the operation, or use immediate values instead of registers, depending on the situation. + +As 128-bit atomics-related APIs stabilize in the standard library, implementations with inline assembly are planned to be updated to get the benefits of both. + +## Run-time feature detection + +[detect](detect) module has run-time feature detection implementations. + +Here is the table of targets that support run-time feature detection and the instruction or API used: + +| target_arch | target_os/target_env | instruction/API | note | +| ----------- | -------------------- | --------------- | ---- | +| x86_64 | all (except for sgx) | cpuid | Enabled by default | +| aarch64 | linux | getauxval | Only enabled by default on `*-linux-gnu*`, and `*-linux-musl*"` (default is static linking)/`*-linux-ohos*` (default is dynamic linking) with dynamic linking enabled. | +| aarch64 | android | getauxval | Enabled by default | +| aarch64 | freebsd | elf_aux_info | Enabled by default | +| aarch64 | openbsd | sysctl | Enabled by default | +| aarch64 | macos | sysctl | Currently only used in tests because FEAT_LSE and FEAT_LSE2 are always available at compile-time. | +| aarch64 | windows | IsProcessorFeaturePresent | Enabled by default | +| aarch64 | fuchsia | zx_system_get_features | Enabled by default | +| powerpc64 | linux | getauxval | Disabled by default | +| powerpc64 | freebsd | elf_aux_info | Disabled by default | + +Run-time detection is enabled by default on most targets and can be disabled with `--cfg portable_atomic_no_outline_atomics`. + +On some targets, run-time detection is disabled by default mainly for compatibility with older versions of operating systems or incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.) + +For targets not included in the above table, run-time detection is always disabled and works the same as when `--cfg portable_atomic_no_outline_atomics` is set. + +See [detect/auxv.rs](detect/auxv.rs) module-level comments for more details on Linux/Android/FreeBSD. + +See also [docs on `portable_atomic_no_outline_atomics`](https://github.com/taiki-e/portable-atomic/blob/HEAD/README.md#optional-cfg-no-outline-atomics) in the top-level readme. diff --git a/vendor/portable-atomic/src/imp/atomic128/aarch64.rs b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs new file mode 100644 index 000000000..00418dfb0 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs @@ -0,0 +1,1223 @@ +// Atomic{I,U}128 implementation on AArch64. +// +// There are a few ways to implement 128-bit atomic operations in AArch64. +// +// - LDXP/STXP loop (DW LL/SC) +// - CASP (DWCAS) added as FEAT_LSE (mandatory from armv8.1-a) +// - LDP/STP (DW load/store) if FEAT_LSE2 (optional from armv8.2-a, mandatory from armv8.4-a) is available +// +// If outline-atomics is not enabled and FEAT_LSE is not available at +// compile-time, we use LDXP/STXP loop. +// If outline-atomics is enabled and FEAT_LSE is not available at +// compile-time, we use CASP for CAS if FEAT_LSE is available +// at run-time, otherwise, use LDXP/STXP loop. +// If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW. +// However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP +// loop for RMW (by default, it is set on Apple hardware; see build script for details). +// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store. +// +// Note: FEAT_LSE2 doesn't imply FEAT_LSE. +// +// Note that we do not separate LL and SC into separate functions, but handle +// them within a single asm block. This is because it is theoretically possible +// for the compiler to insert operations that might clear the reservation between +// LL and SC. Considering the type of operations we are providing and the fact +// that [progress64](https://github.com/ARM-software/progress64) uses such code, +// this is probably not a problem for aarch64, but it seems that aarch64 doesn't +// guarantee it and hexagon is the only architecture with hardware guarantees +// that such code works. See also: +// +// - https://yarchive.net/comp/linux/cmpxchg_ll_sc_portability.html +// - https://lists.llvm.org/pipermail/llvm-dev/2016-May/099490.html +// - https://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html +// +// Also, even when using a CAS loop to implement atomic RMW, include the loop itself +// in the asm block because it is more efficient for some codegen backends. +// https://github.com/rust-lang/compiler-builtins/issues/339#issuecomment-1191260474 +// +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use intrinsics.rs instead. +// +// Refs: +// - ARM Compiler armasm User Guide +// https://developer.arm.com/documentation/dui0801/latest +// - Arm A-profile A64 Instruction Set Architecture +// https://developer.arm.com/documentation/ddi0602/latest +// - Arm Architecture Reference Manual for A-profile architecture +// https://developer.arm.com/documentation/ddi0487/latest +// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit +// +// Generated asm: +// - aarch64 https://godbolt.org/z/nds1nWbnq +// - aarch64 msvc https://godbolt.org/z/PTKdhbKqW +// - aarch64 (+lse) https://godbolt.org/z/5GzssfTKc +// - aarch64 msvc (+lse) https://godbolt.org/z/oYE87caM7 +// - aarch64 (+lse,+lse2) https://godbolt.org/z/36dPjMbaG + +include!("macros.rs"); + +// On musl with static linking, it seems that getauxval is not always available. +// See detect/auxv.rs for more. +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] +#[cfg(any( + all( + target_os = "linux", + any( + target_env = "gnu", + all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", +))] +#[path = "detect/auxv.rs"] +mod detect; +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] +#[cfg(target_os = "openbsd")] +#[path = "detect/aarch64_aa64reg.rs"] +mod detect; +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] +#[cfg(target_os = "fuchsia")] +#[path = "detect/aarch64_fuchsia.rs"] +mod detect; +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] +#[cfg(target_os = "windows")] +#[path = "detect/aarch64_windows.rs"] +mod detect; + +// test only +#[cfg(test)] +#[cfg(not(qemu))] +#[cfg(not(valgrind))] +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))] +#[path = "detect/aarch64_aa64reg.rs"] +mod detect_aa64reg; +#[cfg(test)] +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(target_os = "macos")] +#[path = "detect/aarch64_macos.rs"] +mod detect_macos; + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; +use core::sync::atomic::Ordering; + +#[cfg(any( + target_feature = "lse", + portable_atomic_target_feature = "lse", + not(portable_atomic_no_outline_atomics), +))] +macro_rules! debug_assert_lse { + () => { + #[cfg(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "openbsd", + target_os = "fuchsia", + target_os = "windows", + ), + ))] + #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] + { + debug_assert!(detect::detect().has_lse()); + } + }; +} + +#[cfg(target_endian = "little")] +macro_rules! select_le_or_be { + ($le:expr, $be:expr) => { + $le + }; +} +#[cfg(target_endian = "big")] +macro_rules! select_le_or_be { + ($le:expr, $be:expr) => { + $be + }; +} + +/// A 128-bit value represented as a pair of 64-bit values. +/// +/// This type is `#[repr(C)]`, both fields have the same in-memory representation +/// and are plain old datatypes, so access to the fields is always safe. +#[derive(Clone, Copy)] +#[repr(C)] +union U128 { + whole: u128, + pair: Pair, +} +// A pair of 64-bit values in little-endian order (even on big-endian targets). +#[derive(Clone, Copy)] +#[repr(C)] +struct Pair { + lo: u64, + hi: u64, +} + +macro_rules! atomic_rmw { + ($op:ident, $order:ident) => { + atomic_rmw!($op, $order, write = $order) + }; + ($op:ident, $order:ident, write = $write:ident) => { + match $order { + Ordering::Relaxed => $op!("", "", ""), + Ordering::Acquire => $op!("a", "", ""), + Ordering::Release => $op!("", "l", ""), + Ordering::AcqRel => $op!("a", "l", ""), + // In MSVC environments, SeqCst stores/writes needs fences after writes. + // https://reviews.llvm.org/D141748 + #[cfg(target_env = "msvc")] + Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"), + // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments. + Ordering::SeqCst => $op!("a", "l", ""), + _ => unreachable!("{:?}", $order), + } + }; +} + +#[inline] +unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { + #[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE2. + unsafe { + atomic_load_ldp(src, order) + } + #[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))] + { + #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE. + unsafe { + _atomic_load_casp(src, order) + } + #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + _atomic_load_ldxp_stxp(src, order) + } + } +} +// If CPU supports FEAT_LSE2, LDP is single-copy atomic reads, +// otherwise it is two single-copy atomic reads. +// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile +#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] +#[inline] +unsafe fn atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 { + debug_assert!(src as usize % 16 == 0); + + // SAFETY: the caller must guarantee that `dst` is valid for reads, + // 16-byte aligned, that there are no concurrent non-atomic operations. + // + // Refs: + // - LDP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDP--A64- + unsafe { + let (prev_lo, prev_hi); + macro_rules! atomic_load_relaxed { + ($acquire:tt $(, $readonly:tt)?) => { + asm!( + "ldp {prev_lo}, {prev_hi}, [{src}]", + $acquire, + src = in(reg) ptr_reg!(src), + prev_hi = lateout(reg) prev_hi, + prev_lo = lateout(reg) prev_lo, + options(nostack, preserves_flags $(, $readonly)?), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load_relaxed!("", readonly), + Ordering::Acquire => atomic_load_relaxed!("dmb ishld"), + Ordering::SeqCst => { + asm!( + // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp. + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 for details. + "ldar {tmp}, [{src}]", + "ldp {prev_lo}, {prev_hi}, [{src}]", + "dmb ishld", + src = in(reg) ptr_reg!(src), + prev_hi = lateout(reg) prev_hi, + prev_lo = lateout(reg) prev_lo, + tmp = out(reg) _, + options(nostack, preserves_flags), + ); + } + _ => unreachable!("{:?}", order), + } + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} +// Do not use _atomic_compare_exchange_casp because it needs extra MOV to implement load. +#[cfg(any(test, not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))))] +#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] +#[inline] +unsafe fn _atomic_load_casp(src: *mut u128, order: Ordering) -> u128 { + debug_assert!(src as usize % 16 == 0); + debug_assert_lse!(); + + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE. + unsafe { + let (prev_lo, prev_hi); + macro_rules! atomic_load { + ($acquire:tt, $release:tt) => { + asm!( + concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"), + src = in(reg) ptr_reg!(src), + // must be allocated to even/odd register pair + inout("x2") 0_u64 => prev_lo, + inout("x3") 0_u64 => prev_hi, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!("", ""), + Ordering::Acquire => atomic_load!("a", ""), + Ordering::SeqCst => atomic_load!("a", "l"), + _ => unreachable!("{:?}", order), + } + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} +#[cfg(any( + test, + all( + not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")), + not(any(target_feature = "lse", portable_atomic_target_feature = "lse")), + ), +))] +#[inline] +unsafe fn _atomic_load_ldxp_stxp(src: *mut u128, order: Ordering) -> u128 { + debug_assert!(src as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let (mut prev_lo, mut prev_hi); + macro_rules! atomic_load { + ($acquire:tt, $release:tt) => { + asm!( + "2:", + concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{src}]"), + concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{src}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + src = in(reg) ptr_reg!(src), + prev_lo = out(reg) prev_lo, + prev_hi = out(reg) prev_hi, + r = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_load!("", ""), + Ordering::Acquire => atomic_load!("a", ""), + Ordering::SeqCst => atomic_load!("a", "l"), + _ => unreachable!("{:?}", order), + } + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} + +#[inline] +unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { + #[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE2. + unsafe { + atomic_store_stp(dst, val, order); + } + #[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_swap(dst, val, order); + } +} +// If CPU supports FEAT_LSE2, STP is single-copy atomic writes, +// otherwise it is two single-copy atomic writes. +// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile +#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] +#[inline] +unsafe fn atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must guarantee that `dst` is valid for writes, + // 16-byte aligned, that there are no concurrent non-atomic operations. + // + // Refs: + // - STP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STP--A64- + unsafe { + let val = U128 { whole: val }; + macro_rules! atomic_store { + ($acquire:tt, $release:tt) => { + asm!( + $release, + "stp {val_lo}, {val_hi}, [{dst}]", + $acquire, + dst = in(reg) ptr_reg!(dst), + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!("", ""), + Ordering::Release => atomic_store!("", "dmb ish"), + Ordering::SeqCst => atomic_store!("dmb ish", "dmb ish"), + _ => unreachable!("{:?}", order), + } + } +} + +#[inline] +unsafe fn atomic_compare_exchange( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> Result<u128, u128> { + #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE. + let res = unsafe { _atomic_compare_exchange_casp(dst, old, new, success, failure) }; + #[cfg(not(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "openbsd", + target_os = "fuchsia", + target_os = "windows", + ), + )))] + #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] + // SAFETY: the caller must uphold the safety contract. + let res = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success, failure) }; + #[cfg(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "openbsd", + target_os = "fuchsia", + target_os = "windows", + ), + ))] + #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] + let res = { + fn_alias! { + #[target_feature(enable = "lse")] + unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128; + atomic_compare_exchange_casp_relaxed + = _atomic_compare_exchange_casp(Ordering::Relaxed, Ordering::Relaxed); + atomic_compare_exchange_casp_acquire + = _atomic_compare_exchange_casp(Ordering::Acquire, Ordering::Acquire); + atomic_compare_exchange_casp_release + = _atomic_compare_exchange_casp(Ordering::Release, Ordering::Relaxed); + atomic_compare_exchange_casp_acqrel + = _atomic_compare_exchange_casp(Ordering::AcqRel, Ordering::Acquire); + // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments. + #[cfg(target_env = "msvc")] + atomic_compare_exchange_casp_seqcst + = _atomic_compare_exchange_casp(Ordering::SeqCst, Ordering::SeqCst); + } + fn_alias! { + unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128; + atomic_compare_exchange_ldxp_stxp_relaxed + = _atomic_compare_exchange_ldxp_stxp(Ordering::Relaxed, Ordering::Relaxed); + atomic_compare_exchange_ldxp_stxp_acquire + = _atomic_compare_exchange_ldxp_stxp(Ordering::Acquire, Ordering::Acquire); + atomic_compare_exchange_ldxp_stxp_release + = _atomic_compare_exchange_ldxp_stxp(Ordering::Release, Ordering::Relaxed); + atomic_compare_exchange_ldxp_stxp_acqrel + = _atomic_compare_exchange_ldxp_stxp(Ordering::AcqRel, Ordering::Acquire); + // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments. + #[cfg(target_env = "msvc")] + atomic_compare_exchange_ldxp_stxp_seqcst + = _atomic_compare_exchange_ldxp_stxp(Ordering::SeqCst, Ordering::SeqCst); + } + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, that there are no concurrent non-atomic operations, + // and we've checked if FEAT_LSE is available. + unsafe { + let success = crate::utils::upgrade_success_ordering(success, failure); + match success { + Ordering::Relaxed => { + ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { + if detect::detect().has_lse() { + atomic_compare_exchange_casp_relaxed + } else { + atomic_compare_exchange_ldxp_stxp_relaxed + } + }) + } + Ordering::Acquire => { + ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { + if detect::detect().has_lse() { + atomic_compare_exchange_casp_acquire + } else { + atomic_compare_exchange_ldxp_stxp_acquire + } + }) + } + Ordering::Release => { + ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { + if detect::detect().has_lse() { + atomic_compare_exchange_casp_release + } else { + atomic_compare_exchange_ldxp_stxp_release + } + }) + } + // AcqRel and SeqCst RMWs are equivalent in both implementations in non-MSVC environments. + #[cfg(not(target_env = "msvc"))] + Ordering::AcqRel | Ordering::SeqCst => { + ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { + if detect::detect().has_lse() { + atomic_compare_exchange_casp_acqrel + } else { + atomic_compare_exchange_ldxp_stxp_acqrel + } + }) + } + #[cfg(target_env = "msvc")] + Ordering::AcqRel => { + ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { + if detect::detect().has_lse() { + atomic_compare_exchange_casp_acqrel + } else { + atomic_compare_exchange_ldxp_stxp_acqrel + } + }) + } + #[cfg(target_env = "msvc")] + Ordering::SeqCst => { + ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 { + if detect::detect().has_lse() { + atomic_compare_exchange_casp_seqcst + } else { + atomic_compare_exchange_ldxp_stxp_seqcst + } + }) + } + _ => unreachable!("{:?}", success), + } + } + }; + if res == old { + Ok(res) + } else { + Err(res) + } +} +#[cfg(any( + target_feature = "lse", + portable_atomic_target_feature = "lse", + not(portable_atomic_no_outline_atomics), +))] +#[cfg_attr( + not(any(target_feature = "lse", portable_atomic_target_feature = "lse")), + target_feature(enable = "lse") +)] +#[inline] +unsafe fn _atomic_compare_exchange_casp( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_lse!(); + let order = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, that there are no concurrent non-atomic operations, + // and the CPU supports FEAT_LSE. + // + // Refs: + // - https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL--A64- + // - https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-Swap-Pair-of-words-or-doublewords-in-memory- + unsafe { + let old = U128 { whole: old }; + let new = U128 { whole: new }; + let (prev_lo, prev_hi); + macro_rules! cmpxchg { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"), + $fence, + dst = in(reg) ptr_reg!(dst), + // must be allocated to even/odd register pair + inout("x6") old.pair.lo => prev_lo, + inout("x7") old.pair.hi => prev_hi, + // must be allocated to even/odd register pair + in("x4") new.pair.lo, + in("x5") new.pair.hi, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(cmpxchg, order, write = success); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} +#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] +#[inline] +unsafe fn _atomic_compare_exchange_ldxp_stxp( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> u128 { + debug_assert!(dst as usize % 16 == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // + // Refs: + // - LDXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDXP--A64- + // - LDAXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDAXP--A64- + // - STXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STXP--A64- + // - STLXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STLXP--A64- + // + // Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic + // operation (even load/store), a corresponding Store-Exclusive pair must succeed. + // See Arm Architecture Reference Manual for A-profile architecture + // Section B2.2.1 "Requirements for single-copy atomicity", and + // Section B2.9 "Synchronization and semaphores" for more. + unsafe { + let old = U128 { whole: old }; + let new = U128 { whole: new }; + let (mut prev_lo, mut prev_hi); + macro_rules! cmpxchg { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + "2:", + concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{dst}]"), + "cmp {out_lo}, {old_lo}", + "cset {r:w}, ne", + "cmp {out_hi}, {old_hi}", + "cinc {r:w}, {r:w}, ne", + "cbz {r:w}, 3f", + concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + "b 4f", + "3:", + concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + "4:", + $fence, + dst = in(reg) ptr_reg!(dst), + old_lo = in(reg) old.pair.lo, + old_hi = in(reg) old.pair.hi, + new_lo = in(reg) new.pair.lo, + new_hi = in(reg) new.pair.hi, + out_lo = out(reg) prev_lo, + out_hi = out(reg) prev_hi, + r = out(reg) _, + // Do not use `preserves_flags` because CMP modifies the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(cmpxchg, order, write = success); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} + +// casp is always strong, and ldxp requires a corresponding (succeed) stxp for +// its atomicity (see code comment in _atomic_compare_exchange_ldxp_stxp). +// (i.e., aarch64 doesn't have 128-bit weak CAS) +use self::atomic_compare_exchange as atomic_compare_exchange_weak; + +// If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, +// we use CAS-based atomic RMW. +#[cfg(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), +))] +use _atomic_swap_casp as atomic_swap; +#[cfg(not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), +)))] +use _atomic_swap_ldxp_stxp as atomic_swap; +// Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap. +#[cfg(any( + test, + all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ) +))] +#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] +#[inline] +unsafe fn _atomic_swap_casp(dst: *mut u128, val: u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_lse!(); + + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE. + unsafe { + let val = U128 { whole: val }; + let (mut prev_lo, mut prev_hi); + macro_rules! swap { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // If FEAT_LSE2 is not supported, this works like byte-wise atomic. + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + "ldp x4, x5, [{dst}]", + "2:", + // casp writes the current value to the first register pair, + // so copy the `out`'s value for later comparison. + "mov {tmp_lo}, x4", + "mov {tmp_hi}, x5", + concat!("casp", $acquire, $release, " x4, x5, x2, x3, [{dst}]"), + "cmp {tmp_hi}, x5", + "ccmp {tmp_lo}, x4, #0, eq", + "b.ne 2b", + $fence, + dst = in(reg) ptr_reg!(dst), + tmp_lo = out(reg) _, + tmp_hi = out(reg) _, + // must be allocated to even/odd register pair + out("x4") prev_lo, + out("x5") prev_hi, + // must be allocated to even/odd register pair + in("x2") val.pair.lo, + in("x3") val.pair.hi, + // Do not use `preserves_flags` because CMP and CCMP modify the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(swap, order); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} +// Do not use atomic_rmw_ll_sc_3 because it needs extra MOV to implement swap. +#[cfg(any( + test, + not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )) +))] +#[inline] +unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let val = U128 { whole: val }; + let (mut prev_lo, mut prev_hi); + macro_rules! swap { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + "2:", + concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), + concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + $fence, + dst = in(reg) ptr_reg!(dst), + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, + prev_lo = out(reg) prev_lo, + prev_hi = out(reg) prev_hi, + r = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(swap, order); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} + +/// Atomic RMW by LL/SC loop (3 arguments) +/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - val_lo/val_hi pair: val argument (read-only for `$op`) +/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`) +/// - new_lo/new_hi pair: new value that will to stored by sc +macro_rules! atomic_rmw_ll_sc_3 { + ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => { + // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, + // we use CAS-based atomic RMW generated by atomic_rmw_cas_3! macro instead. + #[cfg(not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )))] + use $name as $reexport_name; + #[cfg(any( + test, + not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )) + ))] + #[inline] + unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + // SAFETY: the caller must uphold the safety contract. + unsafe { + let val = U128 { whole: val }; + let (mut prev_lo, mut prev_hi); + macro_rules! op { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + "2:", + concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), + $($op)* + concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + $fence, + dst = in(reg) ptr_reg!(dst), + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, + prev_lo = out(reg) prev_lo, + prev_hi = out(reg) prev_hi, + new_lo = out(reg) _, + new_hi = out(reg) _, + r = out(reg) _, + options(nostack $(, $preserves_flags)?), + ) + }; + } + atomic_rmw!(op, order); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } + } + }; +} +/// Atomic RMW by CAS loop (3 arguments) +/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - val_lo/val_hi pair: val argument (read-only for `$op`) +/// - x6/x7 pair: previous value loaded (read-only for `$op`) +/// - x4/x5 pair: new value that will to stored +macro_rules! atomic_rmw_cas_3 { + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set, + // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead. + #[cfg(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ))] + use $name as $reexport_name; + #[cfg(any( + test, + all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ) + ))] + #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] + #[inline] + unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_lse!(); + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE. + unsafe { + let val = U128 { whole: val }; + let (mut prev_lo, mut prev_hi); + macro_rules! op { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // If FEAT_LSE2 is not supported, this works like byte-wise atomic. + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + "ldp x6, x7, [{dst}]", + "2:", + // casp writes the current value to the first register pair, + // so copy the `out`'s value for later comparison. + "mov {tmp_lo}, x6", + "mov {tmp_hi}, x7", + $($op)* + concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"), + "cmp {tmp_hi}, x7", + "ccmp {tmp_lo}, x6, #0, eq", + "b.ne 2b", + $fence, + dst = in(reg) ptr_reg!(dst), + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, + tmp_lo = out(reg) _, + tmp_hi = out(reg) _, + // must be allocated to even/odd register pair + out("x6") prev_lo, + out("x7") prev_hi, + // must be allocated to even/odd register pair + out("x4") _, + out("x5") _, + // Do not use `preserves_flags` because CMP and CCMP modify the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(op, order); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } + } + }; +} + +/// Atomic RMW by LL/SC loop (2 arguments) +/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`) +/// - new_lo/new_hi pair: new value that will to stored by sc +macro_rules! atomic_rmw_ll_sc_2 { + ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => { + // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, + // we use CAS-based atomic RMW generated by atomic_rmw_cas_2! macro instead. + #[cfg(not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )))] + use $name as $reexport_name; + #[cfg(any( + test, + not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )) + ))] + #[inline] + unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + // SAFETY: the caller must uphold the safety contract. + unsafe { + let (mut prev_lo, mut prev_hi); + macro_rules! op { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + "2:", + concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), + $($op)* + concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {r:w}, 2b", + $fence, + dst = in(reg) ptr_reg!(dst), + prev_lo = out(reg) prev_lo, + prev_hi = out(reg) prev_hi, + new_lo = out(reg) _, + new_hi = out(reg) _, + r = out(reg) _, + options(nostack $(, $preserves_flags)?), + ) + }; + } + atomic_rmw!(op, order); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } + } + }; +} +/// Atomic RMW by CAS loop (2 arguments) +/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - x6/x7 pair: previous value loaded (read-only for `$op`) +/// - x4/x5 pair: new value that will to stored +macro_rules! atomic_rmw_cas_2 { + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set, + // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead. + #[cfg(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ))] + use $name as $reexport_name; + #[cfg(any( + test, + all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ) + ))] + #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] + #[inline] + unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_lse!(); + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE. + unsafe { + let (mut prev_lo, mut prev_hi); + macro_rules! op { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + // If FEAT_LSE2 is not supported, this works like byte-wise atomic. + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + "ldp x6, x7, [{dst}]", + "2:", + // casp writes the current value to the first register pair, + // so copy the `out`'s value for later comparison. + "mov {tmp_lo}, x6", + "mov {tmp_hi}, x7", + $($op)* + concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"), + "cmp {tmp_hi}, x7", + "ccmp {tmp_lo}, x6, #0, eq", + "b.ne 2b", + $fence, + dst = in(reg) ptr_reg!(dst), + tmp_lo = out(reg) _, + tmp_hi = out(reg) _, + // must be allocated to even/odd register pair + out("x6") prev_lo, + out("x7") prev_hi, + // must be allocated to even/odd register pair + out("x4") _, + out("x5") _, + // Do not use `preserves_flags` because CMP and CCMP modify the condition flags. + options(nostack), + ) + }; + } + atomic_rmw!(op, order); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } + } + }; +} + +// Do not use `preserves_flags` because ADDS modifies the condition flags. +atomic_rmw_ll_sc_3! { + _atomic_add_ldxp_stxp as atomic_add, + select_le_or_be!("adds {new_lo}, {prev_lo}, {val_lo}", "adds {new_hi}, {prev_hi}, {val_hi}"), + select_le_or_be!("adc {new_hi}, {prev_hi}, {val_hi}", "adc {new_lo}, {prev_lo}, {val_lo}"), +} +atomic_rmw_cas_3! { + _atomic_add_casp as atomic_add, + select_le_or_be!("adds x4, x6, {val_lo}", "adds x5, x7, {val_hi}"), + select_le_or_be!("adc x5, x7, {val_hi}", "adc x4, x6, {val_lo}"), +} + +// Do not use `preserves_flags` because SUBS modifies the condition flags. +atomic_rmw_ll_sc_3! { + _atomic_sub_ldxp_stxp as atomic_sub, + select_le_or_be!("subs {new_lo}, {prev_lo}, {val_lo}", "subs {new_hi}, {prev_hi}, {val_hi}"), + select_le_or_be!("sbc {new_hi}, {prev_hi}, {val_hi}", "sbc {new_lo}, {prev_lo}, {val_lo}"), +} +atomic_rmw_cas_3! { + _atomic_sub_casp as atomic_sub, + select_le_or_be!("subs x4, x6, {val_lo}", "subs x5, x7, {val_hi}"), + select_le_or_be!("sbc x5, x7, {val_hi}", "sbc x4, x6, {val_lo}"), +} + +atomic_rmw_ll_sc_3! { + _atomic_and_ldxp_stxp as atomic_and (preserves_flags), + "and {new_lo}, {prev_lo}, {val_lo}", + "and {new_hi}, {prev_hi}, {val_hi}", +} +atomic_rmw_cas_3! { + _atomic_and_casp as atomic_and, + "and x4, x6, {val_lo}", + "and x5, x7, {val_hi}", +} + +atomic_rmw_ll_sc_3! { + _atomic_nand_ldxp_stxp as atomic_nand (preserves_flags), + "and {new_lo}, {prev_lo}, {val_lo}", + "mvn {new_lo}, {new_lo}", + "and {new_hi}, {prev_hi}, {val_hi}", + "mvn {new_hi}, {new_hi}", +} +atomic_rmw_cas_3! { + _atomic_nand_casp as atomic_nand, + "and x4, x6, {val_lo}", + "mvn x4, x4", + "and x5, x7, {val_hi}", + "mvn x5, x5", +} + +atomic_rmw_ll_sc_3! { + _atomic_or_ldxp_stxp as atomic_or (preserves_flags), + "orr {new_lo}, {prev_lo}, {val_lo}", + "orr {new_hi}, {prev_hi}, {val_hi}", +} +atomic_rmw_cas_3! { + _atomic_or_casp as atomic_or, + "orr x4, x6, {val_lo}", + "orr x5, x7, {val_hi}", +} + +atomic_rmw_ll_sc_3! { + _atomic_xor_ldxp_stxp as atomic_xor (preserves_flags), + "eor {new_lo}, {prev_lo}, {val_lo}", + "eor {new_hi}, {prev_hi}, {val_hi}", +} +atomic_rmw_cas_3! { + _atomic_xor_casp as atomic_xor, + "eor x4, x6, {val_lo}", + "eor x5, x7, {val_hi}", +} + +atomic_rmw_ll_sc_2! { + _atomic_not_ldxp_stxp as atomic_not (preserves_flags), + "mvn {new_lo}, {prev_lo}", + "mvn {new_hi}, {prev_hi}", +} +atomic_rmw_cas_2! { + _atomic_not_casp as atomic_not, + "mvn x4, x6", + "mvn x5, x7", +} + +// Do not use `preserves_flags` because NEGS modifies the condition flags. +atomic_rmw_ll_sc_2! { + _atomic_neg_ldxp_stxp as atomic_neg, + select_le_or_be!("negs {new_lo}, {prev_lo}", "negs {new_hi}, {prev_hi}"), + select_le_or_be!("ngc {new_hi}, {prev_hi}", "ngc {new_lo}, {prev_lo}"), +} +atomic_rmw_cas_2! { + _atomic_neg_casp as atomic_neg, + select_le_or_be!("negs x4, x6", "negs x5, x7"), + select_le_or_be!("ngc x5, x7", "ngc x4, x6"), +} + +// Do not use `preserves_flags` because CMP and SBCS modify the condition flags. +atomic_rmw_ll_sc_3! { + _atomic_max_ldxp_stxp as atomic_max, + select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"), + select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"), + "csel {new_hi}, {prev_hi}, {val_hi}, lt", // select hi 64-bit + "csel {new_lo}, {prev_lo}, {val_lo}, lt", // select lo 64-bit +} +atomic_rmw_cas_3! { + _atomic_max_casp as atomic_max, + select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"), + select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"), + "csel x5, x7, {val_hi}, lt", // select hi 64-bit + "csel x4, x6, {val_lo}, lt", // select lo 64-bit +} + +// Do not use `preserves_flags` because CMP and SBCS modify the condition flags. +atomic_rmw_ll_sc_3! { + _atomic_umax_ldxp_stxp as atomic_umax, + select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"), + select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"), + "csel {new_hi}, {prev_hi}, {val_hi}, lo", // select hi 64-bit + "csel {new_lo}, {prev_lo}, {val_lo}, lo", // select lo 64-bit +} +atomic_rmw_cas_3! { + _atomic_umax_casp as atomic_umax, + select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"), + select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"), + "csel x5, x7, {val_hi}, lo", // select hi 64-bit + "csel x4, x6, {val_lo}, lo", // select lo 64-bit +} + +// Do not use `preserves_flags` because CMP and SBCS modify the condition flags. +atomic_rmw_ll_sc_3! { + _atomic_min_ldxp_stxp as atomic_min, + select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"), + select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"), + "csel {new_hi}, {prev_hi}, {val_hi}, ge", // select hi 64-bit + "csel {new_lo}, {prev_lo}, {val_lo}, ge", // select lo 64-bit +} +atomic_rmw_cas_3! { + _atomic_min_casp as atomic_min, + select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"), + select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"), + "csel x5, x7, {val_hi}, ge", // select hi 64-bit + "csel x4, x6, {val_lo}, ge", // select lo 64-bit +} + +// Do not use `preserves_flags` because CMP and SBCS modify the condition flags. +atomic_rmw_ll_sc_3! { + _atomic_umin_ldxp_stxp as atomic_umin, + select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"), + select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"), + "csel {new_hi}, {prev_hi}, {val_hi}, hs", // select hi 64-bit + "csel {new_lo}, {prev_lo}, {val_lo}, hs", // select lo 64-bit +} +atomic_rmw_cas_3! { + _atomic_umin_casp as atomic_umin, + select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"), + select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"), + "csel x5, x7, {val_hi}, hs", // select hi 64-bit + "csel x4, x6, {val_lo}, hs", // select lo 64-bit +} + +#[inline] +const fn is_lock_free() -> bool { + IS_ALWAYS_LOCK_FREE +} +const IS_ALWAYS_LOCK_FREE: bool = true; + +atomic128!(AtomicI128, i128, atomic_max, atomic_min); +atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); + +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_int!(i128); + test_atomic_int!(u128); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + stress_test!(u128); +} diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs new file mode 100644 index 000000000..f32c38837 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs @@ -0,0 +1,316 @@ +// Run-time feature detection on aarch64 Linux/FreeBSD/OpenBSD by parsing system registers. +// +// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on OpenBSD. +// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs +// https://github.com/rust-lang/stdarch/pull/1374 +// +// Refs: +// - https://developer.arm.com/documentation/ddi0601/latest/AArch64-Registers +// - https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt +// - https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/aarch64.rs +// +// Supported platforms: +// - Linux 4.11+ (emulate mrs instruction) +// https://github.com/torvalds/linux/commit/77c97b4ee21290f5f083173d957843b615abbff2 +// - FreeBSD 12.0+ (emulate mrs instruction) +// https://github.com/freebsd/freebsd-src/commit/398810619cb32abf349f8de23f29510b2ee0839b +// - OpenBSD 7.1+ (through sysctl) +// https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 +// +// For now, this module is only used on OpenBSD. +// On Linux/FreeBSD, this module is test-only: +// - On Linux, this approach requires a higher kernel version than Rust supports, +// and also does not work with qemu-user (as of QEMU 7.2) and Valgrind. +// (Looking into HWCAP_CPUID in auxvec, it appears that Valgrind is setting it +// to false correctly, but qemu-user is setting it to true.) +// - On FreeBSD, this approach does not work on FreeBSD 12 on QEMU (confirmed on +// FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14). + +include!("common.rs"); + +struct AA64Reg { + aa64isar0: u64, + #[cfg(test)] + aa64isar1: u64, + #[cfg(test)] + aa64mmfr2: u64, +} + +#[cold] +fn _detect(info: &mut CpuInfo) { + let AA64Reg { + aa64isar0, + #[cfg(test)] + aa64isar1, + #[cfg(test)] + aa64mmfr2, + } = imp::aa64reg(); + + // ID_AA64ISAR0_EL1, Instruction Set Attribute Register 0 + // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0?lang=en + let atomic = extract(aa64isar0, 23, 20); + if atomic >= 2 { + info.set(CpuInfo::HAS_LSE); + // we currently only use FEAT_LSE in outline-atomics. + #[cfg(test)] + { + if atomic >= 3 { + info.set(CpuInfo::HAS_LSE128); + } + } + } + // we currently only use FEAT_LSE in outline-atomics. + #[cfg(test)] + { + // ID_AA64ISAR1_EL1, Instruction Set Attribute Register 1 + // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64ISAR1-EL1--AArch64-Instruction-Set-Attribute-Register-1?lang=en + if extract(aa64isar1, 23, 20) >= 3 { + info.set(CpuInfo::HAS_RCPC3); + } + // ID_AA64MMFR2_EL1, AArch64 Memory Model Feature Register 2 + // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64MMFR2-EL1--AArch64-Memory-Model-Feature-Register-2?lang=en + if extract(aa64mmfr2, 35, 32) >= 1 { + info.set(CpuInfo::HAS_LSE2); + } + } +} + +fn extract(x: u64, high: usize, low: usize) -> u64 { + (x >> low) & ((1 << (high - low + 1)) - 1) +} + +#[cfg(not(target_os = "openbsd"))] +mod imp { + // This module is test-only. See parent module docs for details. + + #[cfg(not(portable_atomic_no_asm))] + use core::arch::asm; + + use super::AA64Reg; + + pub(super) fn aa64reg() -> AA64Reg { + // SAFETY: This is safe on FreeBSD 12.0+. FreeBSD 11 was EoL on 2021-09-30. + // Note that stdarch has been doing the same thing since before FreeBSD 11 was EoL. + // https://github.com/rust-lang/stdarch/pull/611 + unsafe { + let aa64isar0: u64; + asm!( + "mrs {}, ID_AA64ISAR0_EL1", + out(reg) aa64isar0, + options(pure, nomem, nostack, preserves_flags) + ); + #[cfg(test)] + let aa64isar1: u64; + #[cfg(test)] + { + asm!( + "mrs {}, ID_AA64ISAR1_EL1", + out(reg) aa64isar1, + options(pure, nomem, nostack, preserves_flags) + ); + } + #[cfg(test)] + let aa64mmfr2: u64; + #[cfg(test)] + { + asm!( + "mrs {}, ID_AA64MMFR2_EL1", + out(reg) aa64mmfr2, + options(pure, nomem, nostack, preserves_flags) + ); + } + AA64Reg { + aa64isar0, + #[cfg(test)] + aa64isar1, + #[cfg(test)] + aa64mmfr2, + } + } + } +} +#[cfg(target_os = "openbsd")] +mod imp { + // OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl. + // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 + // https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925 + + use core::ptr; + + use super::AA64Reg; + + // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47 + #[allow(non_camel_case_types)] + pub(super) mod ffi { + pub(crate) use super::super::c_types::{c_int, c_size_t, c_uint, c_void}; + + // Defined in sys/sysctl.h. + // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/sys/sysctl.h#L82 + pub(crate) const CTL_MACHDEP: c_int = 7; + // Defined in machine/cpu.h. + // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40 + pub(crate) const CPU_ID_AA64ISAR0: c_int = 2; + #[cfg(test)] + pub(crate) const CPU_ID_AA64ISAR1: c_int = 3; + #[cfg(test)] + pub(crate) const CPU_ID_AA64MMFR2: c_int = 7; + + extern "C" { + // Defined in sys/sysctl.h. + // https://man.openbsd.org/sysctl.2 + // https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/sys/sysctl.h + // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/bsd/netbsdlike/openbsd/mod.rs#L1817-L1824 + pub(crate) fn sysctl( + name: *const c_int, + name_len: c_uint, + old_p: *mut c_void, + old_len_p: *mut c_size_t, + new_p: *mut c_void, + new_len: c_size_t, + ) -> c_int; + } + } + + // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+. + // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 + // Others are supported on OpenBSD 7.3+. + // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c + // sysctl returns an unsupported error if operation is not supported, + // so we can safely use this function on older versions of OpenBSD. + pub(super) fn aa64reg() -> AA64Reg { + let aa64isar0 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR0]).unwrap_or(0); + #[cfg(test)] + let aa64isar1 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64ISAR1]).unwrap_or(0); + #[cfg(test)] + let aa64mmfr2 = sysctl64(&[ffi::CTL_MACHDEP, ffi::CPU_ID_AA64MMFR2]).unwrap_or(0); + AA64Reg { + aa64isar0, + #[cfg(test)] + aa64isar1, + #[cfg(test)] + aa64mmfr2, + } + } + + fn sysctl64(mib: &[ffi::c_int]) -> Option<u64> { + const OUT_LEN: ffi::c_size_t = core::mem::size_of::<u64>() as ffi::c_size_t; + let mut out = 0_u64; + let mut out_len = OUT_LEN; + #[allow(clippy::cast_possible_truncation)] + // SAFETY: + // - `mib.len()` does not exceed the size of `mib`. + // - `out_len` does not exceed the size of `out`. + // - `sysctl` is thread-safe. + let res = unsafe { + ffi::sysctl( + mib.as_ptr(), + mib.len() as ffi::c_uint, + (&mut out as *mut u64).cast::<ffi::c_void>(), + &mut out_len, + ptr::null_mut(), + 0, + ) + }; + if res == -1 { + return None; + } + debug_assert_eq!(out_len, OUT_LEN); + Some(out) + } +} + +#[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks, + clippy::wildcard_imports +)] +#[cfg(test)] +mod tests { + use std::{ + process::Command, + string::{String, ToString}, + }; + + use super::*; + + #[test] + fn test_aa64reg() { + let AA64Reg { aa64isar0, aa64isar1, aa64mmfr2 } = imp::aa64reg(); + std::eprintln!("aa64isar0={}", aa64isar0); + std::eprintln!("aa64isar1={}", aa64isar1); + std::eprintln!("aa64mmfr2={}", aa64mmfr2); + if cfg!(target_os = "openbsd") { + let output = Command::new("sysctl").arg("machdep").output().unwrap(); + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).unwrap(); + // OpenBSD 7.1+ + assert_eq!( + stdout.lines().find_map(|s| s.strip_prefix("machdep.id_aa64isar0=")).unwrap_or("0"), + aa64isar0.to_string(), + ); + assert_eq!( + stdout.lines().find_map(|s| s.strip_prefix("machdep.id_aa64isar1=")).unwrap_or("0"), + aa64isar1.to_string(), + ); + // OpenBSD 7.3+ + assert_eq!( + stdout.lines().find_map(|s| s.strip_prefix("machdep.id_aa64mmfr2=")).unwrap_or("0"), + aa64mmfr2.to_string(), + ); + } + if detect().test(CpuInfo::HAS_LSE) { + let atomic = extract(aa64isar0, 23, 20); + if detect().test(CpuInfo::HAS_LSE128) { + assert_eq!(atomic, 3); + } else { + assert_eq!(atomic, 2); + } + } + if detect().test(CpuInfo::HAS_LSE2) { + assert_eq!(extract(aa64mmfr2, 35, 32), 1); + } + if detect().test(CpuInfo::HAS_RCPC3) { + assert_eq!(extract(aa64isar1, 23, 20), 3); + } + } + + // Static assertions for FFI bindings. + // This checks that FFI bindings defined in this crate, FFI bindings defined + // in libc, and FFI bindings generated for the platform's latest header file + // using bindgen have compatible signatures (or the same values if constants). + // Since this is static assertion, we can detect problems with + // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh) + // without actually running tests on these platforms. + // See also tools/codegen/src/ffi.rs. + // TODO(codegen): auto-generate this test + #[cfg(target_os = "openbsd")] + #[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::no_effect_underscore_binding + )] + const _: fn() = || { + use imp::ffi; + use test_helper::{libc, sys}; + let mut _sysctl: unsafe extern "C" fn( + *const ffi::c_int, + ffi::c_uint, + *mut ffi::c_void, + *mut ffi::c_size_t, + *mut ffi::c_void, + ffi::c_size_t, + ) -> ffi::c_int = ffi::sysctl; + _sysctl = libc::sysctl; + _sysctl = sys::sysctl; + static_assert!(ffi::CTL_MACHDEP == libc::CTL_MACHDEP); + static_assert!(ffi::CTL_MACHDEP == sys::CTL_MACHDEP as ffi::c_int); + // static_assert!(ffi::CPU_ID_AA64ISAR0 == libc::CPU_ID_AA64ISAR0); // libc doesn't have this + static_assert!(ffi::CPU_ID_AA64ISAR0 == sys::CPU_ID_AA64ISAR0 as ffi::c_int); + // static_assert!(ffi::CPU_ID_AA64ISAR1 == libc::CPU_ID_AA64ISAR1); // libc doesn't have this + static_assert!(ffi::CPU_ID_AA64ISAR1 == sys::CPU_ID_AA64ISAR1 as ffi::c_int); + // static_assert!(ffi::CPU_ID_AA64MMFR2 == libc::CPU_ID_AA64MMFR2); // libc doesn't have this + static_assert!(ffi::CPU_ID_AA64MMFR2 == sys::CPU_ID_AA64MMFR2 as ffi::c_int); + }; +} diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs new file mode 100644 index 000000000..69aa74ebd --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs @@ -0,0 +1,89 @@ +// Run-time feature detection on aarch64 Fuchsia by using zx_system_get_features. +// +// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on Fuchsia. +// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs +// +// Refs: +// - https://fuchsia.dev/fuchsia-src/reference/syscalls/system_get_features +// - https://github.com/llvm/llvm-project/commit/4e731abc55681751b5d736b613f7720e50eb1ad4 + +include!("common.rs"); + +#[allow(non_camel_case_types)] +mod ffi { + // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/types.h + pub(crate) type zx_status_t = i32; + + // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/errors.h + pub(crate) const ZX_OK: zx_status_t = 0; + // https://fuchsia.googlesource.com/fuchsia/+/refs/heads/main/zircon/system/public/zircon/features.h + pub(crate) const ZX_FEATURE_KIND_CPU: u32 = 0; + pub(crate) const ZX_ARM64_FEATURE_ISA_ATOMICS: u32 = 1 << 8; + + #[link(name = "zircon")] + extern "C" { + // https://fuchsia.dev/fuchsia-src/reference/syscalls/system_get_features + pub(crate) fn zx_system_get_features(kind: u32, features: *mut u32) -> zx_status_t; + } +} + +fn zx_system_get_features(kind: u32) -> u32 { + let mut out = 0_u32; + // SAFETY: the pointer is valid because we got it from a reference. + let res = unsafe { ffi::zx_system_get_features(kind, &mut out) }; + if res != ffi::ZX_OK { + return 0; + } + out +} + +#[cold] +fn _detect(info: &mut CpuInfo) { + let features = zx_system_get_features(ffi::ZX_FEATURE_KIND_CPU); + if features & ffi::ZX_ARM64_FEATURE_ISA_ATOMICS != 0 { + info.set(CpuInfo::HAS_LSE); + } +} + +#[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks, + clippy::wildcard_imports +)] +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_fuchsia() { + let features = zx_system_get_features(ffi::ZX_FEATURE_KIND_CPU); + assert_ne!(features, 0); + std::eprintln!("features: {:b}", features); + } + + // Static assertions for FFI bindings. + // This checks that FFI bindings defined in this crate and FFI bindings + // generated for the platform's latest header file using bindgen have + // compatible signatures (or the same values if constants). + // Since this is static assertion, we can detect problems with + // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh) + // without actually running tests on these platforms. + // See also tools/codegen/src/ffi.rs. + // TODO(codegen): auto-generate this test + #[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::cast_possible_truncation, + clippy::no_effect_underscore_binding + )] + const _: fn() = || { + use test_helper::sys; + // TODO(codegen): zx_system_get_features + let _: ffi::zx_status_t = 0 as sys::zx_status_t; + static_assert!(ffi::ZX_OK == sys::ZX_OK as ffi::zx_status_t); + static_assert!(ffi::ZX_FEATURE_KIND_CPU == sys::ZX_FEATURE_KIND_CPU); + static_assert!(ffi::ZX_ARM64_FEATURE_ISA_ATOMICS == sys::ZX_ARM64_FEATURE_ISA_ATOMICS); + }; +} diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs new file mode 100644 index 000000000..0bf0e6b0f --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs @@ -0,0 +1,150 @@ +// Run-time feature detection on aarch64 macOS by using sysctl. +// +// This module is currently only enabled on tests because aarch64 macOS always supports FEAT_LSE and FEAT_LSE2. +// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L458 +// +// If macOS supporting Armv9.4-a becomes popular in the future, this module will +// be used to support outline atomics for FEAT_LSE128/FEAT_LRCPC3. +// +// Refs: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics +// +// Note that iOS doesn't support sysctl: +// - https://developer.apple.com/forums/thread/9440 +// - https://nabla-c0d3.github.io/blog/2015/06/16/ios9-security-privacy + +include!("common.rs"); + +use core::ptr; + +// core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47 +#[allow(non_camel_case_types)] +mod ffi { + pub(crate) use super::c_types::{c_char, c_int, c_size_t, c_void}; + + extern "C" { + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname + // https://github.com/apple-oss-distributions/xnu/blob/5c2921b07a2480ab43ec66f5b9e41cb872bc554f/bsd/sys/sysctl.h + // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/bsd/apple/mod.rs#L5167-L5173 + pub(crate) fn sysctlbyname( + name: *const c_char, + old_p: *mut c_void, + old_len_p: *mut c_size_t, + new_p: *mut c_void, + new_len: c_size_t, + ) -> c_int; + } +} + +unsafe fn sysctlbyname32(name: &[u8]) -> Option<u32> { + const OUT_LEN: ffi::c_size_t = core::mem::size_of::<u32>() as ffi::c_size_t; + + debug_assert_eq!(name.last(), Some(&0), "{:?}", name); + debug_assert_eq!(name.iter().filter(|&&v| v == 0).count(), 1, "{:?}", name); + + let mut out = 0_u32; + let mut out_len = OUT_LEN; + // SAFETY: + // - the caller must guarantee that `name` a valid C string. + // - `out_len` does not exceed the size of `out`. + // - `sysctlbyname` is thread-safe. + let res = unsafe { + ffi::sysctlbyname( + name.as_ptr().cast::<ffi::c_char>(), + (&mut out as *mut u32).cast::<ffi::c_void>(), + &mut out_len, + ptr::null_mut(), + 0, + ) + }; + if res != 0 { + return None; + } + debug_assert_eq!(out_len, OUT_LEN); + Some(out) +} + +#[cold] +fn _detect(info: &mut CpuInfo) { + // hw.optional.armv8_1_atomics is available on macOS 11+ (note: aarch64 support was added on macOS 11), + // hw.optional.arm.FEAT_* are only available on macOS 12+. + // Query both names in case future versions of macOS remove the old name. + // https://github.com/golang/go/commit/c15593197453b8bf90fc3a9080ba2afeaf7934ea + // https://github.com/google/boringssl/commit/91e0b11eba517d83b910b20fe3740eeb39ecb37e + // SAFETY: we passed a valid C string. + if unsafe { + sysctlbyname32(b"hw.optional.arm.FEAT_LSE\0").unwrap_or(0) != 0 + || sysctlbyname32(b"hw.optional.armv8_1_atomics\0").unwrap_or(0) != 0 + } { + info.set(CpuInfo::HAS_LSE); + } + // we currently only use FEAT_LSE in outline-atomics. + #[cfg(test)] + { + // SAFETY: we passed a valid C string. + if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE2\0").unwrap_or(0) != 0 } { + info.set(CpuInfo::HAS_LSE2); + } + // SAFETY: we passed a valid C string. + if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE128\0").unwrap_or(0) != 0 } { + info.set(CpuInfo::HAS_LSE128); + } + // SAFETY: we passed a valid C string. + if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC3\0").unwrap_or(0) != 0 } { + info.set(CpuInfo::HAS_RCPC3); + } + } +} + +#[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks, + clippy::wildcard_imports +)] +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_macos() { + unsafe { + assert_eq!(sysctlbyname32(b"hw.optional.armv8_1_atomics\0"), Some(1)); + assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LSE\0"), Some(1)); + assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LSE2\0"), Some(1)); + assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LSE128\0"), None); + assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::NotFound); + assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC\0"), Some(1)); + assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC2\0"), Some(1)); + assert_eq!(sysctlbyname32(b"hw.optional.arm.FEAT_LRCPC3\0"), None); + assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::NotFound); + } + } + + // Static assertions for FFI bindings. + // This checks that FFI bindings defined in this crate, FFI bindings defined + // in libc, and FFI bindings generated for the platform's latest header file + // using bindgen have compatible signatures (or the same values if constants). + // Since this is static assertion, we can detect problems with + // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh) + // without actually running tests on these platforms. + // See also tools/codegen/src/ffi.rs. + // TODO(codegen): auto-generate this test + #[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::no_effect_underscore_binding + )] + const _: fn() = || { + use test_helper::{libc, sys}; + let mut _sysctlbyname: unsafe extern "C" fn( + *const ffi::c_char, + *mut ffi::c_void, + *mut ffi::c_size_t, + *mut ffi::c_void, + ffi::c_size_t, + ) -> ffi::c_int = ffi::sysctlbyname; + _sysctlbyname = libc::sysctlbyname; + _sysctlbyname = sys::sysctlbyname; + }; +} diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs new file mode 100644 index 000000000..6922ce4a7 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs @@ -0,0 +1,79 @@ +// Run-time feature detection on aarch64 Windows by using IsProcessorFeaturePresent. +// +// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection of FEAT_LSE on Windows. +// https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/windows/aarch64.rs +// https://github.com/rust-lang/stdarch/pull/1373 +// +// Refs: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + +include!("common.rs"); + +// windows-sys requires Rust 1.48 +#[allow(clippy::upper_case_acronyms)] +mod ffi { + pub(crate) type DWORD = u32; + pub(crate) type BOOL = i32; + + pub(crate) const FALSE: BOOL = 0; + // Defined in winnt.h of Windows SDK. + pub(crate) const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: DWORD = 34; + + extern "system" { + // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + pub(crate) fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL; + } +} + +#[cold] +fn _detect(info: &mut CpuInfo) { + // SAFETY: calling IsProcessorFeaturePresent is safe, and FALSE is also + // returned if the HAL does not support detection of the specified feature. + if unsafe { + ffi::IsProcessorFeaturePresent(ffi::PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != ffi::FALSE + } { + info.set(CpuInfo::HAS_LSE); + } +} + +#[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks, + clippy::wildcard_imports +)] +#[cfg(test)] +mod tests { + use super::*; + + // Static assertions for FFI bindings. + // This checks that FFI bindings defined in this crate and FFI bindings defined + // in windows-sys have compatible signatures (or the same values if constants). + // Since this is static assertion, we can detect problems with + // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh) + // without actually running tests on these platforms. + // (Unlike libc, windows-sys programmatically generates bindings from Windows + // API metadata, so it should be enough to check compatibility with the + // windows-sys' signatures/values.) + // See also tools/codegen/src/ffi.rs. + // TODO(codegen): auto-generate this test + #[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::cast_possible_truncation, + clippy::no_effect_underscore_binding + )] + const _: fn() = || { + use test_helper::windows_sys; + let _: ffi::DWORD = 0 as windows_sys::Win32::System::Threading::PROCESSOR_FEATURE_ID; + let _: ffi::BOOL = 0 as windows_sys::Win32::Foundation::BOOL; + let mut _sysctl: unsafe extern "system" fn(ffi::DWORD) -> ffi::BOOL = + ffi::IsProcessorFeaturePresent; + _sysctl = windows_sys::Win32::System::Threading::IsProcessorFeaturePresent; + static_assert!(ffi::FALSE == windows_sys::Win32::Foundation::FALSE); + static_assert!( + ffi::PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE + == windows_sys::Win32::System::Threading::PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE + ); + }; +} diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs b/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs new file mode 100644 index 000000000..a80350e47 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs @@ -0,0 +1,327 @@ +// Run-time feature detection on aarch64/powerpc64 Linux/Android/FreeBSD by parsing ELF auxiliary vectors. +// +// # Linux/Android +// +// As of nightly-2023-01-23, is_aarch64_feature_detected always uses dlsym by default +// on aarch64 Linux/Android, but on the following platforms, so we can safely assume +// getauxval is linked to the binary. +// +// - On glibc (*-linux-gnu*), [aarch64 support is available on glibc 2.17+](https://sourceware.org/legacy-ml/libc-announce/2012/msg00001.html) +// and is newer than [glibc 2.16 that added getauxval](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html). +// - On musl (*-linux-musl*, *-linux-ohos*), [aarch64 support is available on musl 1.1.7+](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.7#n1422) +// and is newer than [musl 1.1.0 that added getauxval](https://git.musl-libc.org/cgit/musl/tree/WHATSNEW?h=v1.1.0#n1197). +// https://github.com/rust-lang/rust/commit/9a04ae4997493e9260352064163285cddc43de3c +// - On bionic (*-android*), [64-bit architecture support is available on Android 5.0+ (API level 21+)](https://android-developers.googleblog.com/2014/10/whats-new-in-android-50-lollipop.html) +// and is newer than [Android 4.3 (API level 18) that added getauxval](https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h#L49). +// +// However, on musl with static linking, it seems that getauxval is not always available, independent of version requirements: https://github.com/rust-lang/rust/issues/89626 +// (That problem may have been fixed in https://github.com/rust-lang/rust/commit/9a04ae4997493e9260352064163285cddc43de3c, +// but even in the version containing that patch, [there is report](https://github.com/rust-lang/rust/issues/89626#issuecomment-1242636038) +// of the same error.) +// +// On other Linux targets, we cannot assume that getauxval is always available, so we don't enable +// outline-atomics by default (can be enabled by `--cfg portable_atomic_outline_atomics`). +// +// - On musl with static linking. See the above for more. +// Also, in this case, dlsym(getauxval) always returns null. +// - On uClibc-ng (*-linux-uclibc*, *-l4re-uclibc*), [uClibc-ng 1.0.43 (released in 2023-04-05) added getauxval](https://repo.or.cz/uclibc-ng.git/commitdiff/d869bb1600942c01a77539128f9ba5b5b55ad647). +// - On Picolibc, [Picolibc 1.4.6 added getauxval stub](https://github.com/picolibc/picolibc#picolibc-version-146). +// +// See also https://github.com/rust-lang/stdarch/pull/1375 +// +// # FreeBSD +// +// As of nightly-2023-01-23, is_aarch64_feature_detected always uses mrs on +// aarch64 FreeBSD. However, they do not work on FreeBSD 12 on QEMU (confirmed +// on FreeBSD 12.{2,3,4}), and we got SIGILL (worked on FreeBSD 13 and 14). +// +// So use elf_aux_info instead of mrs like compiler-rt does. +// https://man.freebsd.org/elf_aux_info(3) +// https://reviews.llvm.org/D109330 +// +// elf_aux_info is available on FreeBSD 12.0+ and 11.4+: +// https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 +// https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h +// On FreeBSD, [aarch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/relnotes/#hardware-arm), +// but FreeBSD 11 (11.4) was EoL on 2021-09-30, and FreeBSD 11.3 was EoL on 2020-09-30: +// https://www.freebsd.org/security/unsupported +// See also https://github.com/rust-lang/stdarch/pull/611#issuecomment-445464613 +// +// # PowerPC64 +// +// On PowerPC64, outline-atomics is currently disabled by default mainly for +// compatibility with older versions of operating systems +// (can be enabled by `--cfg portable_atomic_outline_atomics`). + +include!("common.rs"); + +use os::ffi; +#[cfg(any(target_os = "linux", target_os = "android"))] +mod os { + // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47 + #[cfg_attr(test, allow(dead_code))] + pub(super) mod ffi { + pub(crate) use super::super::c_types::c_ulong; + #[cfg(all(target_arch = "aarch64", target_os = "android"))] + pub(crate) use super::super::c_types::{c_char, c_int}; + + extern "C" { + // https://man7.org/linux/man-pages/man3/getauxval.3.html + // https://github.com/bminor/glibc/blob/801af9fafd4689337ebf27260aa115335a0cb2bc/misc/sys/auxv.h + // https://github.com/bminor/musl/blob/7d756e1c04de6eb3f2b3d3e1141a218bb329fcfb/include/sys/auxv.h + // https://repo.or.cz/uclibc-ng.git/blob/9d549d7bc6a1b78498ee8d1f39f6a324fdfc9e5d:/include/sys/auxv.h + // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h + // https://github.com/picolibc/picolibc/blob/7a8a58aeaa5946cb662577a518051091b691af3a/newlib/libc/picolib/getauxval.c + // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/linux/gnu/mod.rs#L1201 + // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/linux/musl/mod.rs#L744 + // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/android/b64/mod.rs#L333 + pub(crate) fn getauxval(type_: c_ulong) -> c_ulong; + + // Defined in sys/system_properties.h. + // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/system_properties.h + // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/android/mod.rs#L3471 + #[cfg(all(target_arch = "aarch64", target_os = "android"))] + pub(crate) fn __system_property_get(name: *const c_char, value: *mut c_char) -> c_int; + } + + // https://github.com/torvalds/linux/blob/v6.1/include/uapi/linux/auxvec.h + #[cfg(any(test, target_arch = "aarch64"))] + pub(crate) const AT_HWCAP: c_ulong = 16; + #[cfg(any(test, target_arch = "powerpc64"))] + pub(crate) const AT_HWCAP2: c_ulong = 26; + + // Defined in sys/system_properties.h. + // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/system_properties.h + #[cfg(all(target_arch = "aarch64", target_os = "android"))] + pub(crate) const PROP_VALUE_MAX: c_int = 92; + } + + pub(super) fn getauxval(type_: ffi::c_ulong) -> ffi::c_ulong { + #[cfg(all(target_arch = "aarch64", target_os = "android"))] + { + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // https://reviews.llvm.org/D114523 + let mut arch = [0_u8; ffi::PROP_VALUE_MAX as usize]; + // SAFETY: we've passed a valid C string and a buffer with max length. + let len = unsafe { + ffi::__system_property_get( + b"ro.arch\0".as_ptr().cast::<ffi::c_char>(), + arch.as_mut_ptr().cast::<ffi::c_char>(), + ) + }; + // On Exynos, ro.arch is not available on Android 12+, but it is fine + // because Android 9+ includes the fix. + if len > 0 && arch.starts_with(b"exynos9810") { + return 0; + } + } + + // SAFETY: `getauxval` is thread-safe. See also the module level docs. + unsafe { ffi::getauxval(type_) } + } +} +#[cfg(target_os = "freebsd")] +mod os { + // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47 + #[cfg_attr(test, allow(dead_code))] + pub(super) mod ffi { + pub(crate) use super::super::c_types::{c_int, c_ulong, c_void}; + + extern "C" { + // Defined in sys/auxv.h. + // https://man.freebsd.org/elf_aux_info(3) + // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/sys/auxv.h + pub(crate) fn elf_aux_info(aux: c_int, buf: *mut c_void, buf_len: c_int) -> c_int; + } + + // Defined in sys/elf_common.h. + // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/sys/elf_common.h + #[cfg(any(test, target_arch = "aarch64"))] + pub(crate) const AT_HWCAP: c_int = 25; + #[cfg(any(test, target_arch = "powerpc64"))] + pub(crate) const AT_HWCAP2: c_int = 26; + } + + pub(super) fn getauxval(aux: ffi::c_int) -> ffi::c_ulong { + #[allow(clippy::cast_possible_wrap, clippy::cast_possible_truncation)] + const OUT_LEN: ffi::c_int = core::mem::size_of::<ffi::c_ulong>() as ffi::c_int; + let mut out: ffi::c_ulong = 0; + // SAFETY: + // - the pointer is valid because we got it from a reference. + // - `OUT_LEN` is the same as the size of `out`. + // - `elf_aux_info` is thread-safe. + unsafe { + let res = ffi::elf_aux_info( + aux, + (&mut out as *mut ffi::c_ulong).cast::<ffi::c_void>(), + OUT_LEN, + ); + // If elf_aux_info fails, `out` will be left at zero (which is the proper default value). + debug_assert!(res == 0 || out == 0); + } + out + } +} + +// Basically, Linux and FreeBSD use the same hwcap values. +// FreeBSD supports a subset of the hwcap values supported by Linux. +use arch::_detect; +#[cfg(target_arch = "aarch64")] +mod arch { + use super::{ffi, os, CpuInfo}; + + // Linux + // https://github.com/torvalds/linux/blob/v6.1/arch/arm64/include/uapi/asm/hwcap.h + // FreeBSD + // Defined in machine/elf.h. + // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm64/include/elf.h + // available on FreeBSD 13.0+ and 12.2+ + // https://github.com/freebsd/freebsd-src/blob/release/13.0.0/sys/arm64/include/elf.h + // https://github.com/freebsd/freebsd-src/blob/release/12.2.0/sys/arm64/include/elf.h + pub(super) const HWCAP_ATOMICS: ffi::c_ulong = 1 << 8; + #[cfg(test)] + pub(super) const HWCAP_USCAT: ffi::c_ulong = 1 << 25; + + #[cold] + pub(super) fn _detect(info: &mut CpuInfo) { + let hwcap = os::getauxval(ffi::AT_HWCAP); + + if hwcap & HWCAP_ATOMICS != 0 { + info.set(CpuInfo::HAS_LSE); + } + // we currently only use FEAT_LSE in outline-atomics. + #[cfg(test)] + { + if hwcap & HWCAP_USCAT != 0 { + info.set(CpuInfo::HAS_LSE2); + } + } + } +} +#[cfg(target_arch = "powerpc64")] +mod arch { + use super::{ffi, os, CpuInfo}; + + // Linux + // https://github.com/torvalds/linux/blob/v6.1/arch/powerpc/include/uapi/asm/cputable.h + // FreeBSD + // Defined in machine/cpu.h. + // https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/powerpc/include/cpu.h + // available on FreeBSD 11.0+ + // https://github.com/freebsd/freebsd-src/commit/b0bf7fcd298133457991b27625bbed766e612730 + pub(super) const PPC_FEATURE2_ARCH_2_07: ffi::c_ulong = 0x80000000; + + #[cold] + pub(super) fn _detect(info: &mut CpuInfo) { + let hwcap2 = os::getauxval(ffi::AT_HWCAP2); + + // power8 + if hwcap2 & PPC_FEATURE2_ARCH_2_07 != 0 { + info.set(CpuInfo::HAS_QUADWORD_ATOMICS); + } + } +} + +#[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks, + clippy::wildcard_imports +)] +#[cfg(test)] +mod tests { + use super::*; + + #[allow(clippy::cast_sign_loss)] + #[cfg(all(target_arch = "aarch64", target_os = "android"))] + #[test] + fn test_android() { + unsafe { + let mut arch = [1; ffi::PROP_VALUE_MAX as usize]; + let len = ffi::__system_property_get( + b"ro.arch\0".as_ptr().cast::<ffi::c_char>(), + arch.as_mut_ptr().cast::<ffi::c_char>(), + ); + assert!(len >= 0); + std::eprintln!("len={}", len); + std::eprintln!("arch={:?}", arch); + std::eprintln!( + "arch={:?}", + core::str::from_utf8(core::slice::from_raw_parts(arch.as_ptr(), len as usize)) + .unwrap() + ); + } + } + + // Static assertions for FFI bindings. + // This checks that FFI bindings defined in this crate, FFI bindings defined + // in libc, and FFI bindings generated for the platform's latest header file + // using bindgen have compatible signatures (or the same values if constants). + // Since this is static assertion, we can detect problems with + // `cargo check --tests --target <target>` run in CI (via TESTS=1 build.sh) + // without actually running tests on these platforms. + // See also tools/codegen/src/ffi.rs. + // TODO(codegen): auto-generate this test + #[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::cast_possible_truncation, + clippy::no_effect_underscore_binding + )] + const _: fn() = || { + use test_helper::{libc, sys}; + #[cfg(any(target_os = "linux", target_os = "android"))] + { + let mut _getauxval: unsafe extern "C" fn(ffi::c_ulong) -> ffi::c_ulong = ffi::getauxval; + _getauxval = libc::getauxval; + #[cfg(any(target_env = "musl", target_os = "android"))] // TODO(codegen) + { + _getauxval = sys::getauxval; + } + } + #[cfg(all(target_arch = "aarch64", target_os = "android"))] + { + let mut ___system_property_get: unsafe extern "C" fn( + *const ffi::c_char, + *mut ffi::c_char, + ) -> ffi::c_int = ffi::__system_property_get; + ___system_property_get = libc::__system_property_get; + ___system_property_get = sys::__system_property_get; + static_assert!(ffi::PROP_VALUE_MAX == libc::PROP_VALUE_MAX); + static_assert!(ffi::PROP_VALUE_MAX == sys::PROP_VALUE_MAX as _); + } + #[cfg(target_os = "freebsd")] + { + let mut _elf_aux_info: unsafe extern "C" fn( + ffi::c_int, + *mut ffi::c_void, + ffi::c_int, + ) -> ffi::c_int = ffi::elf_aux_info; + _elf_aux_info = libc::elf_aux_info; + _elf_aux_info = sys::elf_aux_info; + } + #[cfg(not(target_os = "freebsd"))] // libc doesn't have this on FreeBSD + static_assert!(ffi::AT_HWCAP == libc::AT_HWCAP); + static_assert!(ffi::AT_HWCAP == sys::AT_HWCAP as _); + #[cfg(not(target_os = "freebsd"))] // libc doesn't have this on FreeBSD + static_assert!(ffi::AT_HWCAP2 == libc::AT_HWCAP2); + static_assert!(ffi::AT_HWCAP2 == sys::AT_HWCAP2 as _); + #[cfg(target_arch = "aarch64")] + { + // static_assert!(arch::HWCAP_ATOMICS == libc::HWCAP_ATOMICS); // libc doesn't have this + static_assert!(arch::HWCAP_ATOMICS == sys::HWCAP_ATOMICS as ffi::c_ulong); + // static_assert!(HWCAP_USCAT == libc::HWCAP_USCAT); // libc doesn't have this + static_assert!(arch::HWCAP_USCAT == sys::HWCAP_USCAT as ffi::c_ulong); + } + #[cfg(target_arch = "powerpc64")] + { + // static_assert!(arch::PPC_FEATURE2_ARCH_2_07 == libc::PPC_FEATURE2_ARCH_2_07); // libc doesn't have this + static_assert!( + arch::PPC_FEATURE2_ARCH_2_07 == sys::PPC_FEATURE2_ARCH_2_07 as ffi::c_ulong + ); + } + }; +} diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/common.rs b/vendor/portable-atomic/src/imp/atomic128/detect/common.rs new file mode 100644 index 000000000..504718718 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/detect/common.rs @@ -0,0 +1,373 @@ +#[derive(Clone, Copy)] +pub(crate) struct CpuInfo(u32); + +impl CpuInfo { + const INIT: u32 = 0; + + #[inline] + fn set(&mut self, bit: u32) { + self.0 = set(self.0, bit); + } + #[inline] + fn test(self, bit: u32) -> bool { + test(self.0, bit) + } +} + +#[inline] +fn set(x: u32, bit: u32) -> u32 { + x | 1 << bit +} +#[inline] +fn test(x: u32, bit: u32) -> bool { + x & (1 << bit) != 0 +} + +#[inline] +pub(crate) fn detect() -> CpuInfo { + use core::sync::atomic::{AtomicU32, Ordering}; + + static CACHE: AtomicU32 = AtomicU32::new(0); + let mut info = CpuInfo(CACHE.load(Ordering::Relaxed)); + if info.0 != 0 { + return info; + } + info.set(CpuInfo::INIT); + // Note: detect_false cfg is intended to make it easy for portable-atomic developers to + // test cases such as has_cmpxchg16b == false, has_lse == false, + // __kuser_helper_version < 5, etc., and is not a public API. + if !cfg!(portable_atomic_test_outline_atomics_detect_false) { + _detect(&mut info); + } + CACHE.store(info.0, Ordering::Relaxed); + info +} + +#[cfg(target_arch = "aarch64")] +impl CpuInfo { + /// Whether FEAT_LSE is available + const HAS_LSE: u32 = 1; + /// Whether FEAT_LSE2 is available + // This is currently only used in tests. + #[cfg(test)] + const HAS_LSE2: u32 = 2; + /// Whether FEAT_LSE128 is available + // This is currently only used in tests. + #[cfg(test)] + const HAS_LSE128: u32 = 3; + /// Whether FEAT_LRCPC3 is available + // This is currently only used in tests. + #[cfg(test)] + const HAS_RCPC3: u32 = 4; + + #[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] + #[inline] + pub(crate) fn has_lse(self) -> bool { + self.test(CpuInfo::HAS_LSE) + } +} + +#[cfg(target_arch = "x86_64")] +impl CpuInfo { + /// Whether CMPXCHG16B is available + const HAS_CMPXCHG16B: u32 = 1; + /// Whether VMOVDQA is atomic + const HAS_VMOVDQA_ATOMIC: u32 = 2; + + #[cfg(any( + test, + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + ))] + #[inline] + pub(crate) fn has_cmpxchg16b(self) -> bool { + self.test(CpuInfo::HAS_CMPXCHG16B) + } + #[inline] + pub(crate) fn has_vmovdqa_atomic(self) -> bool { + self.test(CpuInfo::HAS_VMOVDQA_ATOMIC) + } +} + +#[cfg(target_arch = "powerpc64")] +impl CpuInfo { + /// Whether lqarx and stqcx. instructions are available + const HAS_QUADWORD_ATOMICS: u32 = 1; + + #[cfg(any( + test, + not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + )), + ))] + #[inline] + pub(crate) fn has_quadword_atomics(self) -> bool { + self.test(CpuInfo::HAS_QUADWORD_ATOMICS) + } +} + +// core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47 +#[cfg(any(target_arch = "aarch64", target_arch = "powerpc64"))] +#[cfg(not(windows))] +#[allow(dead_code, non_camel_case_types)] +mod c_types { + pub(crate) type c_void = core::ffi::c_void; + // c_{,u}int is {i,u}32 on non-16-bit architectures + // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L160 + // (16-bit architectures currently don't use this module) + pub(crate) type c_int = i32; + pub(crate) type c_uint = u32; + // c_{,u}long is {i,u}64 on non-Windows 64-bit targets, otherwise is {i,u}32 + // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L176 + // (Windows currently doesn't use this module - this module is cfg(not(windows))) + #[cfg(target_pointer_width = "64")] + pub(crate) type c_long = i64; + #[cfg(not(target_pointer_width = "64"))] + pub(crate) type c_long = i32; + #[cfg(target_pointer_width = "64")] + pub(crate) type c_ulong = u64; + #[cfg(not(target_pointer_width = "64"))] + pub(crate) type c_ulong = u32; + // c_size_t is usize + // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L88 + pub(crate) type c_size_t = usize; + // c_char is u8 on most non-Apple/non-Windows ARM/PowerPC/RISC-V targets + // (Linux/Android/FreeBSD/NetBSD/OpenBSD/VxWorks/Fuchsia/QNX Neutrino/Horizon) + // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L104 + // (macOS is currently the only Apple target that uses this module, and Windows currently doesn't use this module) + #[cfg(not(target_os = "macos"))] + pub(crate) type c_char = u8; + // c_char is i8 on all Apple targets + #[cfg(target_os = "macos")] + pub(crate) type c_char = i8; + + // Static assertions for C type definitions. + #[cfg(test)] + const _: fn() = || { + use test_helper::{libc, sys}; + let _: c_int = 0 as std::os::raw::c_int; + let _: c_uint = 0 as std::os::raw::c_uint; + let _: c_long = 0 as std::os::raw::c_long; + let _: c_ulong = 0 as std::os::raw::c_ulong; + let _: c_size_t = 0 as libc::size_t; // std::os::raw::c_size_t is unstable + let _: c_char = 0 as std::os::raw::c_char; + let _: c_char = 0 as sys::c_char; + }; +} + +#[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks, + clippy::wildcard_imports +)] +#[cfg(test)] +mod tests_common { + use super::*; + + #[test] + fn test_bit_flags() { + let mut x = CpuInfo(0); + #[cfg(target_arch = "aarch64")] + { + assert!(!x.test(CpuInfo::INIT)); + assert!(!x.test(CpuInfo::HAS_LSE)); + assert!(!x.test(CpuInfo::HAS_LSE2)); + assert!(!x.test(CpuInfo::HAS_LSE128)); + assert!(!x.test(CpuInfo::HAS_RCPC3)); + x.set(CpuInfo::INIT); + assert!(x.test(CpuInfo::INIT)); + assert!(!x.test(CpuInfo::HAS_LSE)); + assert!(!x.test(CpuInfo::HAS_LSE2)); + assert!(!x.test(CpuInfo::HAS_LSE128)); + assert!(!x.test(CpuInfo::HAS_RCPC3)); + x.set(CpuInfo::HAS_LSE); + assert!(x.test(CpuInfo::INIT)); + assert!(x.test(CpuInfo::HAS_LSE)); + assert!(!x.test(CpuInfo::HAS_LSE2)); + assert!(!x.test(CpuInfo::HAS_LSE128)); + assert!(!x.test(CpuInfo::HAS_RCPC3)); + x.set(CpuInfo::HAS_LSE2); + assert!(x.test(CpuInfo::INIT)); + assert!(x.test(CpuInfo::HAS_LSE)); + assert!(x.test(CpuInfo::HAS_LSE2)); + assert!(!x.test(CpuInfo::HAS_LSE128)); + assert!(!x.test(CpuInfo::HAS_RCPC3)); + x.set(CpuInfo::HAS_LSE128); + assert!(x.test(CpuInfo::INIT)); + assert!(x.test(CpuInfo::HAS_LSE)); + assert!(x.test(CpuInfo::HAS_LSE2)); + assert!(x.test(CpuInfo::HAS_LSE128)); + assert!(!x.test(CpuInfo::HAS_RCPC3)); + x.set(CpuInfo::HAS_RCPC3); + assert!(x.test(CpuInfo::INIT)); + assert!(x.test(CpuInfo::HAS_LSE)); + assert!(x.test(CpuInfo::HAS_LSE2)); + assert!(x.test(CpuInfo::HAS_LSE128)); + assert!(x.test(CpuInfo::HAS_RCPC3)); + } + #[cfg(target_arch = "x86_64")] + { + assert!(!x.test(CpuInfo::INIT)); + assert!(!x.test(CpuInfo::HAS_CMPXCHG16B)); + assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC)); + x.set(CpuInfo::INIT); + assert!(x.test(CpuInfo::INIT)); + assert!(!x.test(CpuInfo::HAS_CMPXCHG16B)); + assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC)); + x.set(CpuInfo::HAS_CMPXCHG16B); + assert!(x.test(CpuInfo::INIT)); + assert!(x.test(CpuInfo::HAS_CMPXCHG16B)); + assert!(!x.test(CpuInfo::HAS_VMOVDQA_ATOMIC)); + x.set(CpuInfo::HAS_VMOVDQA_ATOMIC); + assert!(x.test(CpuInfo::INIT)); + assert!(x.test(CpuInfo::HAS_CMPXCHG16B)); + assert!(x.test(CpuInfo::HAS_VMOVDQA_ATOMIC)); + } + #[cfg(target_arch = "powerpc64")] + { + assert!(!x.test(CpuInfo::INIT)); + assert!(!x.test(CpuInfo::HAS_QUADWORD_ATOMICS)); + x.set(CpuInfo::INIT); + assert!(x.test(CpuInfo::INIT)); + assert!(!x.test(CpuInfo::HAS_QUADWORD_ATOMICS)); + x.set(CpuInfo::HAS_QUADWORD_ATOMICS); + assert!(x.test(CpuInfo::INIT)); + assert!(x.test(CpuInfo::HAS_QUADWORD_ATOMICS)); + } + } + + #[test] + fn print_features() { + use std::{fmt::Write as _, io::Write, string::String}; + + let mut features = String::new(); + macro_rules! print_feature { + ($name:expr, $enabled:expr $(,)?) => {{ + let _ = writeln!(features, " {}: {}", $name, $enabled); + }}; + } + #[cfg(target_arch = "aarch64")] + { + features.push_str("run-time:\n"); + print_feature!("lse", detect().test(CpuInfo::HAS_LSE)); + print_feature!("lse2", detect().test(CpuInfo::HAS_LSE2)); + print_feature!("lse128", detect().test(CpuInfo::HAS_LSE128)); + print_feature!("rcpc3", detect().test(CpuInfo::HAS_RCPC3)); + features.push_str("compile-time:\n"); + print_feature!( + "lse", + cfg!(any(target_feature = "lse", portable_atomic_target_feature = "lse")), + ); + print_feature!( + "lse2", + cfg!(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")), + ); + } + #[cfg(target_arch = "x86_64")] + { + features.push_str("run-time:\n"); + print_feature!("cmpxchg16b", detect().test(CpuInfo::HAS_CMPXCHG16B)); + print_feature!("vmovdqa-atomic", detect().test(CpuInfo::HAS_VMOVDQA_ATOMIC)); + features.push_str("compile-time:\n"); + print_feature!( + "cmpxchg16b", + cfg!(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )), + ); + } + #[cfg(target_arch = "powerpc64")] + { + features.push_str("run-time:\n"); + print_feature!("quadword-atomics", detect().test(CpuInfo::HAS_QUADWORD_ATOMICS)); + features.push_str("compile-time:\n"); + print_feature!( + "quadword-atomics", + cfg!(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + )), + ); + } + let stdout = std::io::stderr(); + let mut stdout = stdout.lock(); + let _ = stdout.write_all(features.as_bytes()); + } + + #[cfg(target_arch = "x86_64")] + #[test] + #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)] + fn test_detect() { + if detect().has_cmpxchg16b() { + assert!(detect().test(CpuInfo::HAS_CMPXCHG16B)); + } else { + assert!(!detect().test(CpuInfo::HAS_CMPXCHG16B)); + } + if detect().has_vmovdqa_atomic() { + assert!(detect().test(CpuInfo::HAS_VMOVDQA_ATOMIC)); + } else { + assert!(!detect().test(CpuInfo::HAS_VMOVDQA_ATOMIC)); + } + } + #[cfg(target_arch = "aarch64")] + #[test] + #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)] + fn test_detect() { + let proc_cpuinfo = test_helper::cpuinfo::ProcCpuinfo::new(); + if detect().has_lse() { + assert!(detect().test(CpuInfo::HAS_LSE)); + if let Ok(proc_cpuinfo) = proc_cpuinfo { + assert!(proc_cpuinfo.lse); + } + } else { + assert!(!detect().test(CpuInfo::HAS_LSE)); + if let Ok(proc_cpuinfo) = proc_cpuinfo { + assert!(!proc_cpuinfo.lse); + } + } + if detect().test(CpuInfo::HAS_LSE2) { + assert!(detect().test(CpuInfo::HAS_LSE)); + assert!(detect().test(CpuInfo::HAS_LSE2)); + if let Ok(test_helper::cpuinfo::ProcCpuinfo { lse2: Some(lse2), .. }) = proc_cpuinfo { + assert!(lse2); + } + } else { + assert!(!detect().test(CpuInfo::HAS_LSE2)); + if let Ok(test_helper::cpuinfo::ProcCpuinfo { lse2: Some(lse2), .. }) = proc_cpuinfo { + assert!(!lse2); + } + } + if detect().test(CpuInfo::HAS_LSE128) { + assert!(detect().test(CpuInfo::HAS_LSE)); + assert!(detect().test(CpuInfo::HAS_LSE2)); + assert!(detect().test(CpuInfo::HAS_LSE128)); + } else { + assert!(!detect().test(CpuInfo::HAS_LSE128)); + } + if detect().test(CpuInfo::HAS_RCPC3) { + assert!(detect().test(CpuInfo::HAS_RCPC3)); + } else { + assert!(!detect().test(CpuInfo::HAS_RCPC3)); + } + } + #[cfg(target_arch = "powerpc64")] + #[test] + #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)] + fn test_detect() { + let proc_cpuinfo = test_helper::cpuinfo::ProcCpuinfo::new(); + if detect().has_quadword_atomics() { + assert!(detect().test(CpuInfo::HAS_QUADWORD_ATOMICS)); + if let Ok(proc_cpuinfo) = proc_cpuinfo { + assert!(proc_cpuinfo.power8); + } + } else { + assert!(!detect().test(CpuInfo::HAS_QUADWORD_ATOMICS)); + if let Ok(proc_cpuinfo) = proc_cpuinfo { + assert!(!proc_cpuinfo.power8); + } + } + } +} diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs b/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs new file mode 100644 index 000000000..d162d6599 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs @@ -0,0 +1,124 @@ +// Adapted from https://github.com/rust-lang/stdarch. + +#![cfg_attr(any(not(target_feature = "sse"), portable_atomic_sanitize_thread), allow(dead_code))] + +// Miri doesn't support inline assembly used in __cpuid: https://github.com/rust-lang/miri/issues/932 +// SGX doesn't support CPUID: https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L105 +#[cfg(any(target_env = "sgx", miri))] +compile_error!("internal error: this module is not supported on this target"); + +include!("common.rs"); + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; +use core::arch::x86_64::CpuidResult; + +// Workaround for https://github.com/rust-lang/rust/issues/101346 +// It is not clear if our use cases are affected, but we implement this just in case. +// +// Refs: +// - https://www.felixcloutier.com/x86/cpuid +// - https://en.wikipedia.org/wiki/CPUID +// - https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs +unsafe fn __cpuid(leaf: u32) -> CpuidResult { + let eax; + let mut ebx; + let ecx; + let edx; + // SAFETY: the caller must guarantee that CPU supports `cpuid`. + unsafe { + asm!( + // rbx is reserved by LLVM + "mov {ebx_tmp:r}, rbx", + "cpuid", + "xchg {ebx_tmp:r}, rbx", // restore rbx + ebx_tmp = out(reg) ebx, + inout("eax") leaf => eax, + inout("ecx") 0 => ecx, + out("edx") edx, + options(nostack, preserves_flags), + ); + } + CpuidResult { eax, ebx, ecx, edx } +} + +// https://en.wikipedia.org/wiki/CPUID +const VENDOR_ID_INTEL: [u8; 12] = *b"GenuineIntel"; +const VENDOR_ID_AMD: [u8; 12] = *b"AuthenticAMD"; + +unsafe fn _vendor_id() -> [u8; 12] { + // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L40-L59 + // SAFETY: the caller must guarantee that CPU supports `cpuid`. + let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0) }; + let vendor_id: [[u8; 4]; 3] = [ebx.to_ne_bytes(), edx.to_ne_bytes(), ecx.to_ne_bytes()]; + // SAFETY: transmute is safe because `[u8; 12]` and `[[u8; 4]; 3]` has the same layout. + unsafe { core::mem::transmute(vendor_id) } +} + +#[cold] +fn _detect(info: &mut CpuInfo) { + // Miri doesn't support inline assembly used in __cpuid + // SGX doesn't support CPUID: https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L105 + #[cfg(not(any(target_env = "sgx", miri)))] + { + use core::arch::x86_64::_xgetbv; + + // SAFETY: Calling `_vendor_id`` is safe because the CPU has `cpuid` support. + let vendor_id = unsafe { _vendor_id() }; + + // SAFETY: Calling `__cpuid`` is safe because the CPU has `cpuid` support. + let proc_info_ecx = unsafe { __cpuid(0x0000_0001_u32).ecx }; + + // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L111 + if test(proc_info_ecx, 13) { + info.set(CpuInfo::HAS_CMPXCHG16B); + } + + // VMOVDQA is atomic on Intel and AMD CPUs with AVX. + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details. + if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD { + // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L131-L224 + let cpu_xsave = test(proc_info_ecx, 26); + if cpu_xsave { + let cpu_osxsave = test(proc_info_ecx, 27); + if cpu_osxsave { + // SAFETY: Calling `_xgetbv`` is safe because the CPU has `xsave` support + // and OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + let os_avx_support = xcr0 & 6 == 6; + if os_avx_support && test(proc_info_ecx, 28) { + info.set(CpuInfo::HAS_VMOVDQA_ATOMIC); + } + } + } + } + } +} + +#[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks, + clippy::wildcard_imports +)] +#[cfg(test)] +mod tests { + #[cfg(not(portable_atomic_test_outline_atomics_detect_false))] + use super::*; + + #[cfg(not(portable_atomic_test_outline_atomics_detect_false))] + #[test] + // SGX doesn't support CPUID. + // Miri doesn't support inline assembly used in __cpuid. + #[cfg_attr(any(target_env = "sgx", miri), ignore)] + fn test_cpuid() { + assert_eq!(std::is_x86_feature_detected!("cmpxchg16b"), detect().has_cmpxchg16b()); + let vendor_id = unsafe { _vendor_id() }; + if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD { + assert_eq!(std::is_x86_feature_detected!("avx"), detect().has_vmovdqa_atomic()); + } else { + assert!(!detect().has_vmovdqa_atomic()); + } + } +} diff --git a/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs b/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs new file mode 100644 index 000000000..0365da555 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs @@ -0,0 +1,498 @@ +// Atomic{I,U}128 implementation without inline assembly. +// +// Note: This module is currently only enabled on Miri and ThreadSanitizer which +// do not support inline assembly. +// +// This uses `core::arch::x86_64::cmpxchg16b` on x86_64 and +// `core::intrinsics::atomic_*` on aarch64, powerpc64, and s390x. +// +// See README.md of this directory for performance comparison with the +// implementation with inline assembly. +// +// Note: +// - This currently always needs nightly compilers. On x86_64, the stabilization +// of `core::arch::x86_64::cmpxchg16b` has been recently merged to stdarch: +// https://github.com/rust-lang/stdarch/pull/1358 +// - On powerpc64, this requires LLVM 15+ and pwr8+ (quadword-atomics LLVM target feature): +// https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 +// - On aarch64 big-endian, LLVM (as of 15) generates broken code. +// (on cfg(miri)/cfg(sanitize) it is fine though) +// - On s390x, LLVM (as of 16) generates libcalls for operations other than load/store/cmpxchg: +// https://godbolt.org/z/5a5T4hxMh +// https://github.com/llvm/llvm-project/blob/2cc0c0de802178dc7e5408497e2ec53b6c9728fa/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll +// https://reviews.llvm.org/D146425 +// - On powerpc64, LLVM (as of 16) doesn't support 128-bit atomic min/max: +// https://godbolt.org/z/3rebKcbdf +// +// Refs: https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs + +include!("macros.rs"); + +#[allow(dead_code)] // we only use compare_exchange. +#[cfg(target_arch = "x86_64")] +#[cfg(not(target_feature = "cmpxchg16b"))] +#[path = "../fallback/outline_atomics.rs"] +mod fallback; + +#[cfg(target_arch = "x86_64")] +#[cfg(not(target_feature = "cmpxchg16b"))] +#[path = "detect/x86_64.rs"] +mod detect; + +use core::sync::atomic::Ordering; +#[cfg(not(target_arch = "x86_64"))] +use core::{ + intrinsics, + sync::atomic::Ordering::{AcqRel, Acquire, Relaxed, Release, SeqCst}, +}; + +// https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs#L3128 +#[cfg(target_arch = "x86_64")] +#[inline] +fn strongest_failure_ordering(order: Ordering) -> Ordering { + match order { + Ordering::Release | Ordering::Relaxed => Ordering::Relaxed, + Ordering::SeqCst => Ordering::SeqCst, + Ordering::Acquire | Ordering::AcqRel => Ordering::Acquire, + _ => unreachable!("{:?}", order), + } +} + +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { + #[cfg(target_arch = "x86_64")] + // SAFETY: the caller must uphold the safety contract. + unsafe { + let fail_order = strongest_failure_ordering(order); + match atomic_compare_exchange(src, 0, 0, order, fail_order) { + Ok(v) | Err(v) => v, + } + } + #[cfg(not(target_arch = "x86_64"))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_load_acquire(src), + Relaxed => intrinsics::atomic_load_relaxed(src), + SeqCst => intrinsics::atomic_load_seqcst(src), + _ => unreachable!("{:?}", order), + } + } +} + +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { + #[cfg(target_arch = "x86_64")] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_swap(dst, val, order); + } + #[cfg(not(target_arch = "x86_64"))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Release => intrinsics::atomic_store_release(dst, val), + Relaxed => intrinsics::atomic_store_relaxed(dst, val), + SeqCst => intrinsics::atomic_store_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_compare_exchange( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> Result<u128, u128> { + #[cfg(target_arch = "x86_64")] + let (val, ok) = { + #[cfg_attr(not(target_feature = "cmpxchg16b"), target_feature(enable = "cmpxchg16b"))] + #[cfg_attr(target_feature = "cmpxchg16b", inline)] + #[cfg_attr(not(target_feature = "cmpxchg16b"), inline(never))] + unsafe fn cmpxchg16b( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, + ) -> (u128, bool) { + debug_assert!(dst as usize % 16 == 0); + #[cfg(not(target_feature = "cmpxchg16b"))] + { + debug_assert!(detect::detect().has_cmpxchg16b()); + } + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned (required by CMPXCHG16B), that there are no + // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B. + let res = unsafe { core::arch::x86_64::cmpxchg16b(dst, old, new, success, failure) }; + (res, res == old) + } + #[cfg(portable_atomic_no_cmpxchg16b_intrinsic_stronger_failure_ordering)] + let success = crate::utils::upgrade_success_ordering(success, failure); + #[cfg(target_feature = "cmpxchg16b")] + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, that there are no concurrent non-atomic operations, + // and cfg guarantees that CMPXCHG16B is available at compile-time. + unsafe { + cmpxchg16b(dst, old, new, success, failure) + } + #[cfg(not(target_feature = "cmpxchg16b"))] + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses. + unsafe { + ifunc!(unsafe fn( + dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering + ) -> (u128, bool) { + if detect::detect().has_cmpxchg16b() { + cmpxchg16b + } else { + fallback::atomic_compare_exchange + } + }) + } + }; + #[cfg(not(target_arch = "x86_64"))] + // SAFETY: the caller must uphold the safety contract. + let (val, ok) = unsafe { + match (success, failure) { + (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed_relaxed(dst, old, new), + (Relaxed, Acquire) => intrinsics::atomic_cxchg_relaxed_acquire(dst, old, new), + (Relaxed, SeqCst) => intrinsics::atomic_cxchg_relaxed_seqcst(dst, old, new), + (Acquire, Relaxed) => intrinsics::atomic_cxchg_acquire_relaxed(dst, old, new), + (Acquire, Acquire) => intrinsics::atomic_cxchg_acquire_acquire(dst, old, new), + (Acquire, SeqCst) => intrinsics::atomic_cxchg_acquire_seqcst(dst, old, new), + (Release, Relaxed) => intrinsics::atomic_cxchg_release_relaxed(dst, old, new), + (Release, Acquire) => intrinsics::atomic_cxchg_release_acquire(dst, old, new), + (Release, SeqCst) => intrinsics::atomic_cxchg_release_seqcst(dst, old, new), + (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_relaxed(dst, old, new), + (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel_acquire(dst, old, new), + (AcqRel, SeqCst) => intrinsics::atomic_cxchg_acqrel_seqcst(dst, old, new), + (SeqCst, Relaxed) => intrinsics::atomic_cxchg_seqcst_relaxed(dst, old, new), + (SeqCst, Acquire) => intrinsics::atomic_cxchg_seqcst_acquire(dst, old, new), + (SeqCst, SeqCst) => intrinsics::atomic_cxchg_seqcst_seqcst(dst, old, new), + _ => unreachable!("{:?}, {:?}", success, failure), + } + }; + if ok { + Ok(val) + } else { + Err(val) + } +} + +#[cfg(target_arch = "x86_64")] +use atomic_compare_exchange as atomic_compare_exchange_weak; +#[cfg(not(target_arch = "x86_64"))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_compare_exchange_weak( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> Result<u128, u128> { + // SAFETY: the caller must uphold the safety contract. + let (val, ok) = unsafe { + match (success, failure) { + (Relaxed, Relaxed) => intrinsics::atomic_cxchgweak_relaxed_relaxed(dst, old, new), + (Relaxed, Acquire) => intrinsics::atomic_cxchgweak_relaxed_acquire(dst, old, new), + (Relaxed, SeqCst) => intrinsics::atomic_cxchgweak_relaxed_seqcst(dst, old, new), + (Acquire, Relaxed) => intrinsics::atomic_cxchgweak_acquire_relaxed(dst, old, new), + (Acquire, Acquire) => intrinsics::atomic_cxchgweak_acquire_acquire(dst, old, new), + (Acquire, SeqCst) => intrinsics::atomic_cxchgweak_acquire_seqcst(dst, old, new), + (Release, Relaxed) => intrinsics::atomic_cxchgweak_release_relaxed(dst, old, new), + (Release, Acquire) => intrinsics::atomic_cxchgweak_release_acquire(dst, old, new), + (Release, SeqCst) => intrinsics::atomic_cxchgweak_release_seqcst(dst, old, new), + (AcqRel, Relaxed) => intrinsics::atomic_cxchgweak_acqrel_relaxed(dst, old, new), + (AcqRel, Acquire) => intrinsics::atomic_cxchgweak_acqrel_acquire(dst, old, new), + (AcqRel, SeqCst) => intrinsics::atomic_cxchgweak_acqrel_seqcst(dst, old, new), + (SeqCst, Relaxed) => intrinsics::atomic_cxchgweak_seqcst_relaxed(dst, old, new), + (SeqCst, Acquire) => intrinsics::atomic_cxchgweak_seqcst_acquire(dst, old, new), + (SeqCst, SeqCst) => intrinsics::atomic_cxchgweak_seqcst_seqcst(dst, old, new), + _ => unreachable!("{:?}, {:?}", success, failure), + } + }; + if ok { + Ok(val) + } else { + Err(val) + } +} + +#[inline(always)] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128 +where + F: FnMut(u128) -> u128, +{ + // SAFETY: the caller must uphold the safety contract. + unsafe { + // This is a private function and all instances of `f` only operate on the value + // loaded, so there is no need to synchronize the first load/failed CAS. + let mut old = atomic_load(dst, Ordering::Relaxed); + loop { + let next = f(old); + match atomic_compare_exchange_weak(dst, old, next, order, Ordering::Relaxed) { + Ok(x) => return x, + Err(x) => old = x, + } + } + } +} + +// On x86_64, we use core::arch::x86_64::cmpxchg16b instead of core::intrinsics. +// On s390x, LLVM (as of 16) generates libcalls for operations other than load/store/cmpxchg: https://godbolt.org/z/5a5T4hxMh +#[cfg(any(target_arch = "x86_64", target_arch = "s390x"))] +atomic_rmw_by_atomic_update!(); +// On powerpc64, LLVM (as of 16) doesn't support 128-bit atomic min/max: https://godbolt.org/z/3rebKcbdf +#[cfg(target_arch = "powerpc64")] +atomic_rmw_by_atomic_update!(cmp); + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_xchg_acquire(dst, val), + Release => intrinsics::atomic_xchg_release(dst, val), + AcqRel => intrinsics::atomic_xchg_acqrel(dst, val), + Relaxed => intrinsics::atomic_xchg_relaxed(dst, val), + SeqCst => intrinsics::atomic_xchg_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_xadd_acquire(dst, val), + Release => intrinsics::atomic_xadd_release(dst, val), + AcqRel => intrinsics::atomic_xadd_acqrel(dst, val), + Relaxed => intrinsics::atomic_xadd_relaxed(dst, val), + SeqCst => intrinsics::atomic_xadd_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_xsub_acquire(dst, val), + Release => intrinsics::atomic_xsub_release(dst, val), + AcqRel => intrinsics::atomic_xsub_acqrel(dst, val), + Relaxed => intrinsics::atomic_xsub_relaxed(dst, val), + SeqCst => intrinsics::atomic_xsub_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_and_acquire(dst, val), + Release => intrinsics::atomic_and_release(dst, val), + AcqRel => intrinsics::atomic_and_acqrel(dst, val), + Relaxed => intrinsics::atomic_and_relaxed(dst, val), + SeqCst => intrinsics::atomic_and_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_nand_acquire(dst, val), + Release => intrinsics::atomic_nand_release(dst, val), + AcqRel => intrinsics::atomic_nand_acqrel(dst, val), + Relaxed => intrinsics::atomic_nand_relaxed(dst, val), + SeqCst => intrinsics::atomic_nand_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_or_acquire(dst, val), + Release => intrinsics::atomic_or_release(dst, val), + AcqRel => intrinsics::atomic_or_acqrel(dst, val), + Relaxed => intrinsics::atomic_or_relaxed(dst, val), + SeqCst => intrinsics::atomic_or_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_xor_acquire(dst, val), + Release => intrinsics::atomic_xor_release(dst, val), + AcqRel => intrinsics::atomic_xor_acqrel(dst, val), + Relaxed => intrinsics::atomic_xor_relaxed(dst, val), + SeqCst => intrinsics::atomic_xor_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> i128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_max_acquire(dst.cast::<i128>(), val as i128), + Release => intrinsics::atomic_max_release(dst.cast::<i128>(), val as i128), + AcqRel => intrinsics::atomic_max_acqrel(dst.cast::<i128>(), val as i128), + Relaxed => intrinsics::atomic_max_relaxed(dst.cast::<i128>(), val as i128), + SeqCst => intrinsics::atomic_max_seqcst(dst.cast::<i128>(), val as i128), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> i128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_min_acquire(dst.cast::<i128>(), val as i128), + Release => intrinsics::atomic_min_release(dst.cast::<i128>(), val as i128), + AcqRel => intrinsics::atomic_min_acqrel(dst.cast::<i128>(), val as i128), + Relaxed => intrinsics::atomic_min_relaxed(dst.cast::<i128>(), val as i128), + SeqCst => intrinsics::atomic_min_seqcst(dst.cast::<i128>(), val as i128), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_umax_acquire(dst, val), + Release => intrinsics::atomic_umax_release(dst, val), + AcqRel => intrinsics::atomic_umax_acqrel(dst, val), + Relaxed => intrinsics::atomic_umax_relaxed(dst, val), + SeqCst => intrinsics::atomic_umax_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "powerpc64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + match order { + Acquire => intrinsics::atomic_umin_acquire(dst, val), + Release => intrinsics::atomic_umin_release(dst, val), + AcqRel => intrinsics::atomic_umin_acqrel(dst, val), + Relaxed => intrinsics::atomic_umin_relaxed(dst, val), + SeqCst => intrinsics::atomic_umin_seqcst(dst, val), + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_xor(dst, core::u128::MAX, order) } +} + +#[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] +#[inline] +#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces +unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, u128::wrapping_neg) } +} + +#[cfg(not(target_arch = "x86_64"))] +#[inline] +const fn is_lock_free() -> bool { + IS_ALWAYS_LOCK_FREE +} +#[cfg(not(target_arch = "x86_64"))] +const IS_ALWAYS_LOCK_FREE: bool = true; + +#[cfg(target_arch = "x86_64")] +#[inline] +fn is_lock_free() -> bool { + #[cfg(target_feature = "cmpxchg16b")] + { + // CMPXCHG16B is available at compile-time. + true + } + #[cfg(not(target_feature = "cmpxchg16b"))] + { + detect::detect().has_cmpxchg16b() + } +} +#[cfg(target_arch = "x86_64")] +const IS_ALWAYS_LOCK_FREE: bool = cfg!(target_feature = "cmpxchg16b"); + +atomic128!(AtomicI128, i128, atomic_max, atomic_min); +atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); + +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_int!(i128); + test_atomic_int!(u128); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + stress_test!(u128); +} diff --git a/vendor/portable-atomic/src/imp/atomic128/macros.rs b/vendor/portable-atomic/src/imp/atomic128/macros.rs new file mode 100644 index 000000000..fd71ef63d --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/macros.rs @@ -0,0 +1,321 @@ +macro_rules! atomic128 { + ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => { + #[repr(C, align(16))] + pub(crate) struct $atomic_type { + v: core::cell::UnsafeCell<$int_type>, + } + + // Send is implicitly implemented. + // SAFETY: any data races are prevented by atomic intrinsics. + unsafe impl Sync for $atomic_type {} + + impl_default_no_fetch_ops!($atomic_type, $int_type); + impl_default_bit_opts!($atomic_type, $int_type); + impl $atomic_type { + #[inline] + pub(crate) const fn new(v: $int_type) -> Self { + Self { v: core::cell::UnsafeCell::new(v) } + } + + #[inline] + pub(crate) fn is_lock_free() -> bool { + is_lock_free() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + IS_ALWAYS_LOCK_FREE + } + + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $int_type { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.v.get() } + } + + #[inline] + pub(crate) fn into_inner(self) -> $int_type { + self.v.into_inner() + } + + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn load(&self, order: Ordering) -> $int_type { + crate::utils::assert_load_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_load(self.v.get().cast::<u128>(), order) as $int_type } + } + + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn store(&self, val: $int_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_store(self.v.get().cast::<u128>(), val as u128, order) } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_swap(self.v.get().cast::<u128>(), val as u128, order) as $int_type } + } + + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn compare_exchange( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + match atomic_compare_exchange( + self.v.get().cast::<u128>(), + current as u128, + new as u128, + success, + failure, + ) { + Ok(v) => Ok(v as $int_type), + Err(v) => Err(v as $int_type), + } + } + } + + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn compare_exchange_weak( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + match atomic_compare_exchange_weak( + self.v.get().cast::<u128>(), + current as u128, + new as u128, + success, + failure, + ) { + Ok(v) => Ok(v as $int_type), + Err(v) => Err(v as $int_type), + } + } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_add(self.v.get().cast::<u128>(), val as u128, order) as $int_type } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_sub(self.v.get().cast::<u128>(), val as u128, order) as $int_type } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_and(self.v.get().cast::<u128>(), val as u128, order) as $int_type } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_nand(&self, val: $int_type, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_nand(self.v.get().cast::<u128>(), val as u128, order) as $int_type } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_or(self.v.get().cast::<u128>(), val as u128, order) as $int_type } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_xor(self.v.get().cast::<u128>(), val as u128, order) as $int_type } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { $atomic_max(self.v.get().cast::<u128>(), val as u128, order) as $int_type } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { $atomic_min(self.v.get().cast::<u128>(), val as u128, order) as $int_type } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_not(self.v.get().cast::<u128>(), order) as $int_type } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_neg(self.v.get().cast::<u128>(), order) as $int_type } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } + + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut $int_type { + self.v.get() + } + } + }; +} + +#[cfg(any(target_arch = "powerpc64", target_arch = "s390x", target_arch = "x86_64"))] +#[allow(unused_macros)] // also used by intrinsics.rs +macro_rules! atomic_rmw_by_atomic_update { + () => { + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |_| val) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_add(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x.wrapping_add(val)) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_sub(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x.wrapping_sub(val)) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x & val) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_nand(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| !(x & val)) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x | val) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x ^ val) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| !x) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_neg(dst: *mut u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, u128::wrapping_neg) } + } + atomic_rmw_by_atomic_update!(cmp); + }; + (cmp) => { + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> u128 { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_update(dst, order, |x| core::cmp::max(x as i128, val as i128) as u128) + } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_umax(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| core::cmp::max(x, val)) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> u128 { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_update(dst, order, |x| core::cmp::min(x as i128, val as i128) as u128) + } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| core::cmp::min(x, val)) } + } + }; +} diff --git a/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs new file mode 100644 index 000000000..454f2097f --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs @@ -0,0 +1,900 @@ +// Atomic{I,U}128 implementation on PowerPC64. +// +// powerpc64 on pwr8+ support 128-bit atomics: +// https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 +// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll +// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/test/CodeGen/PowerPC/atomics-i128.ll +// +// powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/PowerPC/PPC.td#L663 +// See also https://github.com/rust-lang/rust/issues/59932 +// +// Note that we do not separate LL and SC into separate functions, but handle +// them within a single asm block. This is because it is theoretically possible +// for the compiler to insert operations that might clear the reservation between +// LL and SC. See aarch64.rs for details. +// +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use intrinsics.rs instead. +// +// Refs: +// - Power ISA https://openpowerfoundation.org/specifications/isa +// - AIX Assembler language reference https://www.ibm.com/docs/en/aix/7.3?topic=aix-assembler-language-reference +// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit +// +// Generated asm: +// - powerpc64 (pwr8) https://godbolt.org/z/sj9ao7qKd +// - powerpc64le https://godbolt.org/z/hY7Wdf6aT + +include!("macros.rs"); + +#[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +)))] +#[path = "../fallback/outline_atomics.rs"] +mod fallback; + +// On musl with static linking, it seems that getauxval is not always available. +// See detect/auxv.rs for more. +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(any(test, portable_atomic_outline_atomics))] // TODO(powerpc64): currently disabled by default +#[cfg(any( + test, + not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + )), +))] +#[cfg(any( + all( + target_os = "linux", + any( + target_env = "gnu", + all(target_env = "musl", not(target_feature = "crt-static")), + portable_atomic_outline_atomics, + ), + ), + target_os = "freebsd", +))] +#[path = "detect/auxv.rs"] +mod detect; + +use core::{arch::asm, sync::atomic::Ordering}; + +macro_rules! debug_assert_pwr8 { + () => { + #[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + )))] + { + debug_assert!(detect::detect().has_quadword_atomics()); + } + }; +} + +// Refs: https://www.ibm.com/docs/en/aix/7.3?topic=ops-machine-pseudo-op +// +// This is similar to #[target_feature(enable = "quadword-atomics")], except that there are +// no compiler guarantees regarding (un)inlining, and the scope is within an asm +// block rather than a function. We use this directive because #[target_feature(enable = "quadword-atomics")] +// is not supported as of Rust 1.70-nightly. +// +// start_pwr8 and end_pwr8 must be used in pairs. +// +// Note: If power8 instructions are not available at compile-time, we must guarantee that +// the function that uses it is not inlined into a function where it is not +// clear whether power8 instructions are available. Otherwise, (even if we checked whether +// power8 instructions are available at run-time) optimizations that reorder its +// instructions across the if condition might introduce undefined behavior. +// (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts) +// However, our code uses the ifunc helper macro that works with function pointers, +// so we usually don't have to worry about this. +#[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +)))] +macro_rules! start_pwr8 { + () => { + ".machine push\n.machine power8" + }; +} +#[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +)))] +macro_rules! end_pwr8 { + () => { + ".machine pop" + }; +} +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +))] +macro_rules! start_pwr8 { + () => { + "" + }; +} +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +))] +macro_rules! end_pwr8 { + () => { + "" + }; +} + +/// A 128-bit value represented as a pair of 64-bit values. +/// +/// This type is `#[repr(C)]`, both fields have the same in-memory representation +/// and are plain old datatypes, so access to the fields is always safe. +#[derive(Clone, Copy)] +#[repr(C)] +union U128 { + whole: u128, + pair: Pair, +} +// A pair of 64-bit values in native-endian order. +#[derive(Clone, Copy)] +#[repr(C)] +struct Pair { + #[cfg(target_endian = "big")] + hi: u64, + lo: u64, + #[cfg(target_endian = "little")] + hi: u64, +} + +macro_rules! atomic_rmw { + ($op:ident, $order:ident) => { + match $order { + Ordering::Relaxed => $op!("", ""), + Ordering::Acquire => $op!("lwsync", ""), + Ordering::Release => $op!("", "lwsync"), + Ordering::AcqRel => $op!("lwsync", "lwsync"), + Ordering::SeqCst => $op!("lwsync", "sync"), + _ => unreachable!("{:?}", $order), + } + }; +} + +#[inline] +unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { + #[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + ))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_load_pwr8(src, order) + } + #[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + fn_alias! { + // inline(never) is just a hint and also not strictly necessary + // because we use ifunc helper macro, but used for clarity. + #[inline(never)] + unsafe fn(src: *mut u128) -> u128; + atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed); + atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire); + atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst); + } + match order { + Ordering::Relaxed => { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + if detect::detect().has_quadword_atomics() { + atomic_load_pwr8_relaxed + } else { + fallback::atomic_load_non_seqcst + } + }) + } + Ordering::Acquire => { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + if detect::detect().has_quadword_atomics() { + atomic_load_pwr8_acquire + } else { + fallback::atomic_load_non_seqcst + } + }) + } + Ordering::SeqCst => { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + if detect::detect().has_quadword_atomics() { + atomic_load_pwr8_seqcst + } else { + fallback::atomic_load_seqcst + } + }) + } + _ => unreachable!("{:?}", order), + } + } +} +#[inline] +unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 { + debug_assert!(src as usize % 16 == 0); + debug_assert_pwr8!(); + + // SAFETY: the caller must uphold the safety contract. + // + // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA + unsafe { + let (out_hi, out_lo); + macro_rules! atomic_load_acquire { + ($release:tt) => { + asm!( + start_pwr8!(), + $release, + "lq %r4, 0({src})", + // Lightweight acquire sync + // Refs: https://github.com/boostorg/atomic/blob/boost-1.79.0/include/boost/atomic/detail/core_arch_ops_gcc_ppc.hpp#L47-L62 + "cmpd %cr7, %r4, %r4", + "bne- %cr7, 2f", + "2:", + "isync", + end_pwr8!(), + src = in(reg_nonzero) ptr_reg!(src), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r4") out_hi, + out("r5") out_lo, + out("cr7") _, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => { + asm!( + start_pwr8!(), + "lq %r4, 0({src})", + end_pwr8!(), + src = in(reg_nonzero) ptr_reg!(src), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r4") out_hi, + out("r5") out_lo, + options(nostack, preserves_flags, readonly), + ); + } + Ordering::Acquire => atomic_load_acquire!(""), + Ordering::SeqCst => atomic_load_acquire!("sync"), + _ => unreachable!("{:?}", order), + } + U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole + } +} + +#[inline] +unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { + #[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + ))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_store_pwr8(dst, val, order); + } + #[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + fn_alias! { + // inline(never) is just a hint and also not strictly necessary + // because we use ifunc helper macro, but used for clarity. + #[inline(never)] + unsafe fn(dst: *mut u128, val: u128); + atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed); + atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release); + atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst); + } + match order { + Ordering::Relaxed => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + if detect::detect().has_quadword_atomics() { + atomic_store_pwr8_relaxed + } else { + fallback::atomic_store_non_seqcst + } + }); + } + Ordering::Release => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + if detect::detect().has_quadword_atomics() { + atomic_store_pwr8_release + } else { + fallback::atomic_store_non_seqcst + } + }); + } + Ordering::SeqCst => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + if detect::detect().has_quadword_atomics() { + atomic_store_pwr8_seqcst + } else { + fallback::atomic_store_seqcst + } + }); + } + _ => unreachable!("{:?}", order), + } + } +} +#[inline] +unsafe fn atomic_store_pwr8(dst: *mut u128, val: u128, order: Ordering) { + debug_assert!(dst as usize % 16 == 0); + debug_assert_pwr8!(); + + // SAFETY: the caller must uphold the safety contract. + // + // Refs: "3.3.4 Fixed Point Load and Store Quadword Instructions" of Power ISA + unsafe { + let val = U128 { whole: val }; + macro_rules! atomic_store { + ($release:tt) => { + asm!( + start_pwr8!(), + $release, + "stq %r4, 0({dst})", + end_pwr8!(), + dst = in(reg_nonzero) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + in("r4") val.pair.hi, + in("r5") val.pair.lo, + options(nostack, preserves_flags), + ) + }; + } + match order { + Ordering::Relaxed => atomic_store!(""), + Ordering::Release => atomic_store!("lwsync"), + Ordering::SeqCst => atomic_store!("sync"), + _ => unreachable!("{:?}", order), + } + } +} + +#[inline] +unsafe fn atomic_compare_exchange( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> Result<u128, u128> { + let success = crate::utils::upgrade_success_ordering(success, failure); + + #[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + ))] + // SAFETY: the caller must uphold the safety contract. + let (res, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) }; + #[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + )))] + // SAFETY: the caller must uphold the safety contract. + let (res, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) }; + if ok { + Ok(res) + } else { + Err(res) + } +} +#[inline] +unsafe fn atomic_compare_exchange_pwr8( + dst: *mut u128, + old: u128, + new: u128, + order: Ordering, +) -> (u128, bool) { + debug_assert!(dst as usize % 16 == 0); + debug_assert_pwr8!(); + + // SAFETY: the caller must uphold the safety contract. + // + // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA + let res = unsafe { + let old = U128 { whole: old }; + let new = U128 { whole: new }; + let (mut prev_hi, mut prev_lo); + macro_rules! cmpxchg { + ($acquire:tt, $release:tt) => { + asm!( + start_pwr8!(), + $release, + "2:", + "lqarx %r8, 0, {dst}", + "xor {tmp_lo}, %r9, {old_lo}", + "xor {tmp_hi}, %r8, {old_hi}", + "or. {tmp_lo}, {tmp_lo}, {tmp_hi}", + "bne %cr0, 3f", + "stqcx. %r6, 0, {dst}", + "bne %cr0, 2b", + "3:", + $acquire, + end_pwr8!(), + dst = in(reg_nonzero) ptr_reg!(dst), + old_hi = in(reg_nonzero) old.pair.hi, + old_lo = in(reg_nonzero) old.pair.lo, + tmp_hi = out(reg_nonzero) _, + tmp_lo = out(reg_nonzero) _, + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + in("r6") new.pair.hi, + in("r7") new.pair.lo, + out("r8") prev_hi, + out("r9") prev_lo, + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(cmpxchg, order); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + }; + (res, res == old) +} + +// TODO: LLVM appears to generate strong CAS for powerpc64 128-bit weak CAS, +// so we always use strong CAS for now. +use atomic_compare_exchange as atomic_compare_exchange_weak; + +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +))] +use atomic_swap_pwr8 as atomic_swap; +// Do not use atomic_rmw_ll_sc_3 because it needs extra MR to implement swap. +#[inline] +unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_pwr8!(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let val = U128 { whole: val }; + let (mut prev_hi, mut prev_lo); + macro_rules! swap { + ($acquire:tt, $release:tt) => { + asm!( + start_pwr8!(), + $release, + "2:", + "lqarx %r6, 0, {dst}", + "stqcx. %r8, 0, {dst}", + "bne %cr0, 2b", + $acquire, + end_pwr8!(), + dst = in(reg_nonzero) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r6") prev_hi, + out("r7") prev_lo, + in("r8") val.pair.hi, + in("r9") val.pair.lo, + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(swap, order); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + } +} + +/// Atomic RMW by LL/SC loop (3 arguments) +/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` +/// +/// $op can use the following registers: +/// - val_hi/val_lo pair: val argument (read-only for `$op`) +/// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) +/// - r8/r9 pair: new value that will to stored by sc +macro_rules! atomic_rmw_ll_sc_3 { + ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { + #[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + ))] + use $name as $reexport_name; + #[inline] + unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_pwr8!(); + // SAFETY: the caller must uphold the safety contract. + unsafe { + let val = U128 { whole: val }; + let (mut prev_hi, mut prev_lo); + macro_rules! op { + ($acquire:tt, $release:tt) => { + asm!( + start_pwr8!(), + $release, + "2:", + "lqarx %r6, 0, {dst}", + $($op)* + "stqcx. %r8, 0, {dst}", + "bne %cr0, 2b", + $acquire, + end_pwr8!(), + dst = in(reg_nonzero) ptr_reg!(dst), + val_hi = in(reg_nonzero) val.pair.hi, + val_lo = in(reg_nonzero) val.pair.lo, + $($reg)* + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r6") prev_hi, + out("r7") prev_lo, + out("r8") _, // new (hi) + out("r9") _, // new (lo) + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(op, order); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + } + } + }; +} +/// Atomic RMW by LL/SC loop (2 arguments) +/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` +/// +/// $op can use the following registers: +/// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) +/// - r8/r9 pair: new value that will to stored by sc +macro_rules! atomic_rmw_ll_sc_2 { + ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { + #[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + ))] + use $name as $reexport_name; + #[inline] + unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_pwr8!(); + // SAFETY: the caller must uphold the safety contract. + unsafe { + let (mut prev_hi, mut prev_lo); + macro_rules! op { + ($acquire:tt, $release:tt) => { + asm!( + start_pwr8!(), + $release, + "2:", + "lqarx %r6, 0, {dst}", + $($op)* + "stqcx. %r8, 0, {dst}", + "bne %cr0, 2b", + $acquire, + end_pwr8!(), + dst = in(reg_nonzero) ptr_reg!(dst), + $($reg)* + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + out("r6") prev_hi, + out("r7") prev_lo, + out("r8") _, // new (hi) + out("r9") _, // new (lo) + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(op, order); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + } + } + }; +} + +atomic_rmw_ll_sc_3! { + atomic_add_pwr8 as atomic_add, [out("xer") _,], + "addc %r9, {val_lo}, %r7", + "adde %r8, {val_hi}, %r6", +} +atomic_rmw_ll_sc_3! { + atomic_sub_pwr8 as atomic_sub, [out("xer") _,], + "subc %r9, %r7, {val_lo}", + "subfe %r8, {val_hi}, %r6", +} +atomic_rmw_ll_sc_3! { + atomic_and_pwr8 as atomic_and, [], + "and %r9, {val_lo}, %r7", + "and %r8, {val_hi}, %r6", +} +atomic_rmw_ll_sc_3! { + atomic_nand_pwr8 as atomic_nand, [], + "nand %r9, {val_lo}, %r7", + "nand %r8, {val_hi}, %r6", +} +atomic_rmw_ll_sc_3! { + atomic_or_pwr8 as atomic_or, [], + "or %r9, {val_lo}, %r7", + "or %r8, {val_hi}, %r6", +} +atomic_rmw_ll_sc_3! { + atomic_xor_pwr8 as atomic_xor, [], + "xor %r9, {val_lo}, %r7", + "xor %r8, {val_hi}, %r6", +} +atomic_rmw_ll_sc_3! { + atomic_max_pwr8 as atomic_max, [out("cr1") _,], + "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0 + "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 + "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1 + "isel %r8, %r7, {val_lo}, 5", // select lo 64-bit based on GT bit in cr1 + "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0 + "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 + "isel %r8, %r6, {val_hi}, 5", // select hi 64-bit based on GT bit in cr1 +} +atomic_rmw_ll_sc_3! { + atomic_umax_pwr8 as atomic_umax, [], + "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0 + "iselgt %r9, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 + "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0 + "iselgt %r8, %r7, {val_lo}", // select lo 64-bit based on GT bit in cr0 + "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 + "iselgt %r8, %r6, {val_hi}", // select hi 64-bit based on GT bit in cr0 +} +atomic_rmw_ll_sc_3! { + atomic_min_pwr8 as atomic_min, [out("cr1") _,], + "cmpld %r7, {val_lo}", // (unsigned) compare lo 64-bit, store result to cr0 + "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 + "cmpd %cr1, %r6, {val_hi}", // (signed) compare hi 64-bit, store result to cr1 + "isel %r8, %r7, {val_lo}, 4", // select lo 64-bit based on LT bit in cr1 + "cmpld %r6, {val_hi}", // (unsigned) compare hi 64-bit, store result to cr0 + "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 + "isel %r8, %r6, {val_hi}, 4", // select hi 64-bit based on LT bit in cr1 +} +atomic_rmw_ll_sc_3! { + atomic_umin_pwr8 as atomic_umin, [], + "cmpld %r7, {val_lo}", // compare lo 64-bit, store result to cr0 + "isellt %r9, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 + "cmpld %r6, {val_hi}", // compare hi 64-bit, store result to cr0 + "isellt %r8, %r7, {val_lo}", // select lo 64-bit based on LT bit in cr0 + "iseleq %r9, %r9, %r8", // select lo 64-bit based on EQ bit in cr0 + "isellt %r8, %r6, {val_hi}", // select hi 64-bit based on LT bit in cr0 +} + +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +))] +use atomic_not_pwr8 as atomic_not; +#[inline] +unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_xor_pwr8(dst, core::u128::MAX, order) } +} + +#[cfg(portable_atomic_llvm_16)] +atomic_rmw_ll_sc_2! { + atomic_neg_pwr8 as atomic_neg, [out("xer") _,], + "subfic %r9, %r7, 0", + "subfze %r8, %r6", +} +// LLVM 15 miscompiles subfic. +#[cfg(not(portable_atomic_llvm_16))] +atomic_rmw_ll_sc_2! { + atomic_neg_pwr8 as atomic_neg, [zero = in(reg_nonzero) 0_u64, out("xer") _,], + "subc %r9, {zero}, %r7", + "subfze %r8, %r6", +} + +macro_rules! atomic_rmw_with_ifunc { + ( + unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?; + pwr8 = $pwr8_fn:ident; + non_seqcst_fallback = $non_seqcst_fallback_fn:ident; + seqcst_fallback = $seqcst_fallback_fn:ident; + ) => { + #[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + )))] + #[inline] + unsafe fn $name($($arg)*, order: Ordering) $(-> $ret_ty)? { + fn_alias! { + // inline(never) is just a hint and also not strictly necessary + // because we use ifunc helper macro, but used for clarity. + #[inline(never)] + unsafe fn($($arg)*) $(-> $ret_ty)?; + pwr8_relaxed_fn = $pwr8_fn(Ordering::Relaxed); + pwr8_acquire_fn = $pwr8_fn(Ordering::Acquire); + pwr8_release_fn = $pwr8_fn(Ordering::Release); + pwr8_acqrel_fn = $pwr8_fn(Ordering::AcqRel); + pwr8_seqcst_fn = $pwr8_fn(Ordering::SeqCst); + } + // SAFETY: the caller must uphold the safety contract. + // we only calls pwr8_fn if quadword-atomics is available. + unsafe { + match order { + Ordering::Relaxed => { + ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { + if detect::detect().has_quadword_atomics() { + pwr8_relaxed_fn + } else { + fallback::$non_seqcst_fallback_fn + } + }) + } + Ordering::Acquire => { + ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { + if detect::detect().has_quadword_atomics() { + pwr8_acquire_fn + } else { + fallback::$non_seqcst_fallback_fn + } + }) + } + Ordering::Release => { + ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { + if detect::detect().has_quadword_atomics() { + pwr8_release_fn + } else { + fallback::$non_seqcst_fallback_fn + } + }) + } + Ordering::AcqRel => { + ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { + if detect::detect().has_quadword_atomics() { + pwr8_acqrel_fn + } else { + fallback::$non_seqcst_fallback_fn + } + }) + } + Ordering::SeqCst => { + ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { + if detect::detect().has_quadword_atomics() { + pwr8_seqcst_fn + } else { + fallback::$seqcst_fallback_fn + } + }) + } + _ => unreachable!("{:?}", order), + } + } + } + }; +} + +atomic_rmw_with_ifunc! { + unsafe fn atomic_compare_exchange_ifunc(dst: *mut u128, old: u128, new: u128) -> (u128, bool); + pwr8 = atomic_compare_exchange_pwr8; + non_seqcst_fallback = atomic_compare_exchange_non_seqcst; + seqcst_fallback = atomic_compare_exchange_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_swap_pwr8; + non_seqcst_fallback = atomic_swap_non_seqcst; + seqcst_fallback = atomic_swap_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_add_pwr8; + non_seqcst_fallback = atomic_add_non_seqcst; + seqcst_fallback = atomic_add_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_sub_pwr8; + non_seqcst_fallback = atomic_sub_non_seqcst; + seqcst_fallback = atomic_sub_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_and_pwr8; + non_seqcst_fallback = atomic_and_non_seqcst; + seqcst_fallback = atomic_and_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_nand_pwr8; + non_seqcst_fallback = atomic_nand_non_seqcst; + seqcst_fallback = atomic_nand_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_or_pwr8; + non_seqcst_fallback = atomic_or_non_seqcst; + seqcst_fallback = atomic_or_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_xor_pwr8; + non_seqcst_fallback = atomic_xor_non_seqcst; + seqcst_fallback = atomic_xor_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_max_pwr8; + non_seqcst_fallback = atomic_max_non_seqcst; + seqcst_fallback = atomic_max_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_umax_pwr8; + non_seqcst_fallback = atomic_umax_non_seqcst; + seqcst_fallback = atomic_umax_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_min_pwr8; + non_seqcst_fallback = atomic_min_non_seqcst; + seqcst_fallback = atomic_min_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128; + pwr8 = atomic_umin_pwr8; + non_seqcst_fallback = atomic_umin_non_seqcst; + seqcst_fallback = atomic_umin_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_not(dst: *mut u128) -> u128; + pwr8 = atomic_not_pwr8; + non_seqcst_fallback = atomic_not_non_seqcst; + seqcst_fallback = atomic_not_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_neg(dst: *mut u128) -> u128; + pwr8 = atomic_neg_pwr8; + non_seqcst_fallback = atomic_neg_non_seqcst; + seqcst_fallback = atomic_neg_seqcst; +} + +#[inline] +fn is_lock_free() -> bool { + #[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + ))] + { + // lqarx and stqcx. instructions are statically available. + true + } + #[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + )))] + { + detect::detect().has_quadword_atomics() + } +} +const IS_ALWAYS_LOCK_FREE: bool = cfg!(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +)); + +atomic128!(AtomicI128, i128, atomic_max, atomic_min); +atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); + +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_int!(i128); + test_atomic_int!(u128); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + stress_test!(u128); +} diff --git a/vendor/portable-atomic/src/imp/atomic128/s390x.rs b/vendor/portable-atomic/src/imp/atomic128/s390x.rs new file mode 100644 index 000000000..b6789aea5 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/s390x.rs @@ -0,0 +1,471 @@ +// Atomic{I,U}128 implementation on s390x. +// +// s390x supports 128-bit atomic load/store/cmpxchg: +// https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc +// +// As of LLVM 16, LLVM's minimal supported architecture level is z10: +// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/SystemZ/SystemZProcessors.td) +// This does not appear to have changed since the current s390x backend was added in LLVM 3.3: +// https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db +// +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use intrinsics.rs instead. +// +// Refs: +// - z/Architecture Principles of Operation https://publibfp.dhe.ibm.com/epubs/pdf/a227832d.pdf +// - z/Architecture Reference Summary https://www.ibm.com/support/pages/zarchitecture-reference-summary +// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit +// +// Generated asm: +// - s390x https://godbolt.org/z/q4cvbaEYh +// - s390x (z196) https://godbolt.org/z/Tj3vonsoW +// - s390x (z15) https://godbolt.org/z/Pz5sq8fTz + +include!("macros.rs"); + +use core::{arch::asm, sync::atomic::Ordering}; + +/// A 128-bit value represented as a pair of 64-bit values. +/// +/// This type is `#[repr(C)]`, both fields have the same in-memory representation +/// and are plain old datatypes, so access to the fields is always safe. +#[derive(Clone, Copy)] +#[repr(C)] +union U128 { + whole: u128, + pair: Pair, +} +// A pair of 64-bit values in native-endian (big-endian) order. +#[derive(Clone, Copy)] +#[repr(C)] +struct Pair { + hi: u64, + lo: u64, +} + +// Use distinct operands on z196 or later, otherwise split to lgr and $op. +#[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))] +macro_rules! distinct_op { + ($op:tt, $a0:tt, $a1:tt, $a2:tt) => { + concat!($op, "k ", $a0, ", ", $a1, ", ", $a2) + }; +} +#[cfg(not(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops")))] +macro_rules! distinct_op { + ($op:tt, $a0:tt, $a1:tt, $a2:tt) => { + concat!("lgr ", $a0, ", ", $a1, "\n", $op, " ", $a0, ", ", $a2) + }; +} + +// Use selgr$cond on z15 or later, otherwise split to locgr$cond and $op. +#[cfg(any( + target_feature = "miscellaneous-extensions-3", + portable_atomic_target_feature = "miscellaneous-extensions-3", +))] +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +macro_rules! select_op { + ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => { + concat!("selgr", $cond, " ", $a0, ", ", $a1, ", ", $a2) + }; +} +#[cfg(not(any( + target_feature = "miscellaneous-extensions-3", + portable_atomic_target_feature = "miscellaneous-extensions-3", +)))] +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +macro_rules! select_op { + ($cond:tt, $a0:tt, $a1:tt, $a2:tt) => { + concat!("lgr ", $a0, ", ", $a2, "\n", "locgr", $cond, " ", $a0, ", ", $a1) + }; +} + +#[inline] +unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { + debug_assert!(src as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic load is always SeqCst. + let (out_hi, out_lo); + asm!( + "lpq %r0, 0({src})", + src = in(reg) ptr_reg!(src), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") out_hi, + out("r1") out_lo, + options(nostack, preserves_flags), + ); + U128 { pair: Pair { hi: out_hi, lo: out_lo } }.whole + } +} + +#[inline] +unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let val = U128 { whole: val }; + macro_rules! atomic_store { + ($fence:tt) => { + asm!( + "stpq %r0, 0({dst})", + $fence, + dst = in(reg) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + in("r0") val.pair.hi, + in("r1") val.pair.lo, + options(nostack, preserves_flags), + ) + }; + } + match order { + // Relaxed and Release stores are equivalent. + Ordering::Relaxed | Ordering::Release => atomic_store!(""), + // bcr 14,0 (fast-BCR-serialization) requires z196 or later. + #[cfg(any( + target_feature = "fast-serialization", + portable_atomic_target_feature = "fast-serialization", + ))] + Ordering::SeqCst => atomic_store!("bcr 14, 0"), + #[cfg(not(any( + target_feature = "fast-serialization", + portable_atomic_target_feature = "fast-serialization", + )))] + Ordering::SeqCst => atomic_store!("bcr 15, 0"), + _ => unreachable!("{:?}", order), + } + } +} + +#[inline] +unsafe fn atomic_compare_exchange( + dst: *mut u128, + old: u128, + new: u128, + _success: Ordering, + _failure: Ordering, +) -> Result<u128, u128> { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + let res = unsafe { + // atomic CAS is always SeqCst. + let old = U128 { whole: old }; + let new = U128 { whole: new }; + let (prev_hi, prev_lo); + asm!( + "cdsg %r0, %r12, 0({dst})", + dst = in(reg) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + inout("r0") old.pair.hi => prev_hi, + inout("r1") old.pair.lo => prev_lo, + in("r12") new.pair.hi, + in("r13") new.pair.lo, + options(nostack), + ); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + }; + if res == old { + Ok(res) + } else { + Err(res) + } +} + +// cdsg is always strong. +use atomic_compare_exchange as atomic_compare_exchange_weak; + +#[cfg(not(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +)))] +#[inline(always)] +unsafe fn atomic_update<F>(dst: *mut u128, order: Ordering, mut f: F) -> u128 +where + F: FnMut(u128) -> u128, +{ + // SAFETY: the caller must uphold the safety contract. + unsafe { + // This is a private function and all instances of `f` only operate on the value + // loaded, so there is no need to synchronize the first load/failed CAS. + let mut old = atomic_load(dst, Ordering::Relaxed); + loop { + let next = f(old); + match atomic_compare_exchange_weak(dst, old, next, order, Ordering::Relaxed) { + Ok(x) => return x, + Err(x) => old = x, + } + } + } +} + +#[inline] +unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + // + // We could use atomic_update here, but using an inline assembly allows omitting + // the comparison of results and the storing/comparing of condition flags. + // + // Do not use atomic_rmw_cas_3 because it needs extra LGR to implement swap. + unsafe { + // atomic swap is always SeqCst. + let val = U128 { whole: val }; + let (mut prev_hi, mut prev_lo); + asm!( + "lpq %r0, 0({dst})", + "2:", + "cdsg %r0, %r12, 0({dst})", + "jl 2b", + dst = in(reg) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") prev_hi, + out("r1") prev_lo, + in("r12") val.pair.hi, + in("r13") val.pair.lo, + options(nostack), + ); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + } +} + +/// Atomic RMW by CAS loop (3 arguments) +/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - val_hi/val_lo pair: val argument (read-only for `$op`) +/// - r0/r1 pair: previous value loaded (read-only for `$op`) +/// - r12/r13 pair: new value that will to stored +// We could use atomic_update here, but using an inline assembly allows omitting +// the comparison of results and the storing/comparing of condition flags. +macro_rules! atomic_rmw_cas_3 { + ($name:ident, [$($reg:tt)*], $($op:tt)*) => { + #[inline] + unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic RMW is always SeqCst. + let val = U128 { whole: val }; + let (mut prev_hi, mut prev_lo); + asm!( + "lpq %r0, 0({dst})", + "2:", + $($op)* + "cdsg %r0, %r12, 0({dst})", + "jl 2b", + dst = in(reg) ptr_reg!(dst), + val_hi = in(reg) val.pair.hi, + val_lo = in(reg) val.pair.lo, + $($reg)* + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") prev_hi, + out("r1") prev_lo, + out("r12") _, + out("r13") _, + options(nostack), + ); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + } + } + }; +} +/// Atomic RMW by CAS loop (2 arguments) +/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - r0/r1 pair: previous value loaded (read-only for `$op`) +/// - r12/r13 pair: new value that will to stored +// We could use atomic_update here, but using an inline assembly allows omitting +// the comparison of results and the storing/comparing of condition flags. +macro_rules! atomic_rmw_cas_2 { + ($name:ident, $($op:tt)*) => { + #[inline] + unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + // SAFETY: the caller must uphold the safety contract. + unsafe { + // atomic RMW is always SeqCst. + let (mut prev_hi, mut prev_lo); + asm!( + "lpq %r0, 0({dst})", + "2:", + $($op)* + "cdsg %r0, %r12, 0({dst})", + "jl 2b", + dst = in(reg) ptr_reg!(dst), + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + out("r0") prev_hi, + out("r1") prev_lo, + out("r12") _, + out("r13") _, + options(nostack), + ); + U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole + } + } + }; +} + +atomic_rmw_cas_3! { + atomic_add, [], + distinct_op!("algr", "%r13", "%r1", "{val_lo}"), + "lgr %r12, %r0", + "alcgr %r12, {val_hi}", +} +atomic_rmw_cas_3! { + atomic_sub, [], + distinct_op!("slgr", "%r13", "%r1", "{val_lo}"), + "lgr %r12, %r0", + "slbgr %r12, {val_hi}", +} +atomic_rmw_cas_3! { + atomic_and, [], + distinct_op!("ngr", "%r13", "%r1", "{val_lo}"), + distinct_op!("ngr", "%r12", "%r0", "{val_hi}"), +} + +// Use nngrk on z15 or later. +#[cfg(any( + target_feature = "miscellaneous-extensions-3", + portable_atomic_target_feature = "miscellaneous-extensions-3", +))] +atomic_rmw_cas_3! { + atomic_nand, [], + "nngrk %r13, %r1, {val_lo}", + "nngrk %r12, %r0, {val_hi}", +} +#[cfg(not(any( + target_feature = "miscellaneous-extensions-3", + portable_atomic_target_feature = "miscellaneous-extensions-3", +)))] +atomic_rmw_cas_3! { + atomic_nand, [], + distinct_op!("ngr", "%r13", "%r1", "{val_lo}"), + "xihf %r13, 4294967295", + "xilf %r13, 4294967295", + distinct_op!("ngr", "%r12", "%r0", "{val_hi}"), + "xihf %r12, 4294967295", + "xilf %r12, 4294967295", +} + +atomic_rmw_cas_3! { + atomic_or, [], + distinct_op!("ogr", "%r13", "%r1", "{val_lo}"), + distinct_op!("ogr", "%r12", "%r0", "{val_hi}"), +} +atomic_rmw_cas_3! { + atomic_xor, [], + distinct_op!("xgr", "%r13", "%r1", "{val_lo}"), + distinct_op!("xgr", "%r12", "%r0", "{val_hi}"), +} + +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +atomic_rmw_cas_3! { + atomic_max, [], + "clgr %r1, {val_lo}", + select_op!("h", "%r12", "%r1", "{val_lo}"), + "cgr %r0, {val_hi}", + select_op!("h", "%r13", "%r1", "{val_lo}"), + "locgre %r13, %r12", + select_op!("h", "%r12", "%r0", "{val_hi}"), +} +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +atomic_rmw_cas_3! { + atomic_umax, [tmp = out(reg) _,], + "clgr %r1, {val_lo}", + select_op!("h", "{tmp}", "%r1", "{val_lo}"), + "clgr %r0, {val_hi}", + select_op!("h", "%r12", "%r0", "{val_hi}"), + select_op!("h", "%r13", "%r1", "{val_lo}"), + "cgr %r0, {val_hi}", + "locgre %r13, {tmp}", +} +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +atomic_rmw_cas_3! { + atomic_min, [], + "clgr %r1, {val_lo}", + select_op!("l", "%r12", "%r1", "{val_lo}"), + "cgr %r0, {val_hi}", + select_op!("l", "%r13", "%r1", "{val_lo}"), + "locgre %r13, %r12", + select_op!("l", "%r12", "%r0", "{val_hi}"), +} +#[cfg(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +))] +atomic_rmw_cas_3! { + atomic_umin, [tmp = out(reg) _,], + "clgr %r1, {val_lo}", + select_op!("l", "{tmp}", "%r1", "{val_lo}"), + "clgr %r0, {val_hi}", + select_op!("l", "%r12", "%r0", "{val_hi}"), + select_op!("l", "%r13", "%r1", "{val_lo}"), + "cgr %r0, {val_hi}", + "locgre %r13, {tmp}", +} +// We use atomic_update for atomic min/max on pre-z196 because +// z10 doesn't seem to have a good way to implement 128-bit min/max. +// loc{,g}r requires z196 or later. +// https://godbolt.org/z/qodPK45qz +#[cfg(not(any( + target_feature = "load-store-on-cond", + portable_atomic_target_feature = "load-store-on-cond", +)))] +atomic_rmw_by_atomic_update!(cmp); + +atomic_rmw_cas_2! { + atomic_not, + "lgr %r13, %r1", + "xihf %r13, 4294967295", + "xilf %r13, 4294967295", + "lgr %r12, %r0", + "xihf %r12, 4294967295", + "xilf %r12, 4294967295", +} +atomic_rmw_cas_2! { + atomic_neg, + "lghi %r13, 0", + "slgr %r13, %r1", + "lghi %r12, 0", + "slbgr %r12, %r0", +} + +#[inline] +const fn is_lock_free() -> bool { + IS_ALWAYS_LOCK_FREE +} +const IS_ALWAYS_LOCK_FREE: bool = true; + +atomic128!(AtomicI128, i128, atomic_max, atomic_min); +atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); + +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_int!(i128); + test_atomic_int!(u128); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + stress_test!(u128); +} diff --git a/vendor/portable-atomic/src/imp/atomic128/x86_64.rs b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs new file mode 100644 index 000000000..e75540cd1 --- /dev/null +++ b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs @@ -0,0 +1,868 @@ +// Atomic{I,U}128 implementation on x86_64 using CMPXCHG16B (DWCAS). +// +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use intrinsics.rs instead. +// +// Refs: +// - x86 and amd64 instruction reference https://www.felixcloutier.com/x86 +// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit +// +// Generated asm: +// - x86_64 (+cmpxchg16b) https://godbolt.org/z/WPvfn16sY + +include!("macros.rs"); + +#[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] +#[path = "../fallback/outline_atomics.rs"] +mod fallback; + +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(not(target_env = "sgx"))] +#[path = "detect/x86_64.rs"] +mod detect; + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; +use core::sync::atomic::Ordering; + +// Asserts that the function is called in the correct context. +macro_rules! debug_assert_cmpxchg16b { + () => { + #[cfg(not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )))] + { + debug_assert!(detect::detect().has_cmpxchg16b()); + } + }; +} +#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(target_feature = "sse")] +macro_rules! debug_assert_vmovdqa_atomic { + () => {{ + debug_assert_cmpxchg16b!(); + debug_assert!(detect::detect().has_vmovdqa_atomic()); + }}; +} + +#[allow(unused_macros)] +#[cfg(target_pointer_width = "32")] +macro_rules! ptr_modifier { + () => { + ":e" + }; +} +#[allow(unused_macros)] +#[cfg(target_pointer_width = "64")] +macro_rules! ptr_modifier { + () => { + "" + }; +} + +/// A 128-bit value represented as a pair of 64-bit values. +/// +/// This type is `#[repr(C)]`, both fields have the same in-memory representation +/// and are plain old datatypes, so access to the fields is always safe. +#[derive(Clone, Copy)] +#[repr(C)] +union U128 { + whole: u128, + pair: Pair, +} +// A pair of 64-bit values in native-endian (little-endian) order. +#[derive(Clone, Copy)] +#[repr(C)] +struct Pair { + lo: u64, + hi: u64, +} + +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] +#[inline] +unsafe fn cmpxchg16b(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { + debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned (required by CMPXCHG16B), that there are no + // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B. + // + // If the value at `dst` (destination operand) and rdx:rax are equal, the + // 128-bit value in rcx:rbx is stored in the `dst`, otherwise the value at + // `dst` is loaded to rdx:rax. + // + // The ZF flag is set if the value at `dst` and rdx:rax are equal, + // otherwise it is cleared. Other flags are unaffected. + // + // Refs: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + unsafe { + // cmpxchg16b is always SeqCst. + let r: u8; + let old = U128 { whole: old }; + let new = U128 { whole: new }; + let (prev_lo, prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "xchg {rbx_tmp}, rbx", + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "sete r8b", + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = inout(reg) new.pair.lo => _, + in("rcx") new.pair.hi, + inout("rax") old.pair.lo => prev_lo, + inout("rdx") old.pair.hi => prev_hi, + in($rdi) dst, + out("r8b") r, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + (U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole, r != 0) + } +} + +// VMOVDQA is atomic on Intel and AMD CPUs with AVX. +// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details. +// +// Refs: https://www.felixcloutier.com/x86/movdqa:vmovdqa32:vmovdqa64 +// +// Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. +// https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html +#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(target_feature = "sse")] +#[target_feature(enable = "avx")] +#[inline] +unsafe fn atomic_load_vmovdqa(src: *mut u128) -> u128 { + debug_assert!(src as usize % 16 == 0); + debug_assert_vmovdqa_atomic!(); + + // SAFETY: the caller must uphold the safety contract. + // + // atomic load by vmovdqa is always SeqCst. + unsafe { + let out: core::arch::x86_64::__m128; + asm!( + concat!("vmovdqa {out}, xmmword ptr [{src", ptr_modifier!(), "}]"), + src = in(reg) src, + out = out(xmm_reg) out, + options(nostack, preserves_flags), + ); + core::mem::transmute(out) + } +} +#[cfg(not(any(portable_atomic_no_outline_atomics, target_env = "sgx")))] +#[cfg(target_feature = "sse")] +#[target_feature(enable = "avx")] +#[inline] +unsafe fn atomic_store_vmovdqa(dst: *mut u128, val: u128, order: Ordering) { + debug_assert!(dst as usize % 16 == 0); + debug_assert_vmovdqa_atomic!(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let val: core::arch::x86_64::__m128 = core::mem::transmute(val); + match order { + // Relaxed and Release stores are equivalent. + Ordering::Relaxed | Ordering::Release => { + asm!( + concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"), + dst = in(reg) dst, + val = in(xmm_reg) val, + options(nostack, preserves_flags), + ); + } + Ordering::SeqCst => { + asm!( + concat!("vmovdqa xmmword ptr [{dst", ptr_modifier!(), "}], {val}"), + "mfence", + dst = in(reg) dst, + val = in(xmm_reg) val, + options(nostack, preserves_flags), + ); + } + _ => unreachable!("{:?}", order), + } + } +} + +#[cfg(not(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), +)))] +macro_rules! load_store_detect { + ( + vmovdqa = $vmovdqa:ident + cmpxchg16b = $cmpxchg16b:ident + fallback = $fallback:ident + ) => {{ + let cpuid = detect::detect(); + #[cfg(not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )))] + { + // Check CMPXCHG16B first to prevent mixing atomic and non-atomic access. + if cpuid.has_cmpxchg16b() { + // We do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. + #[cfg(target_feature = "sse")] + { + if cpuid.has_vmovdqa_atomic() { + $vmovdqa + } else { + $cmpxchg16b + } + } + #[cfg(not(target_feature = "sse"))] + { + $cmpxchg16b + } + } else { + fallback::$fallback + } + } + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + { + if cpuid.has_vmovdqa_atomic() { + $vmovdqa + } else { + $cmpxchg16b + } + } + }}; +} + +#[inline] +unsafe fn atomic_load(src: *mut u128, _order: Ordering) -> u128 { + // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. + // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html + // SGX doesn't support CPUID. + #[cfg(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + ))] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantees that CMPXCHG16B is available at compile-time. + unsafe { + // cmpxchg16b is always SeqCst. + atomic_load_cmpxchg16b(src) + } + #[cfg(not(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + load_store_detect! { + vmovdqa = atomic_load_vmovdqa + cmpxchg16b = atomic_load_cmpxchg16b + // Use SeqCst because cmpxchg16b and atomic load by vmovdqa is always SeqCst. + fallback = atomic_load_seqcst + } + }) + } +} +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] +#[inline] +unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 { + debug_assert!(src as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + + // SAFETY: the caller must guarantee that `src` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // See cmpxchg16b function for more. + // + // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows + // omitting the storing of condition flags and avoid use of xchg to handle rbx. + unsafe { + // cmpxchg16b is always SeqCst. + let (prev_lo, prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + "xor rbx, rbx", // zeroed rbx + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "mov rbx, {rbx_tmp}", // restore rbx + // set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg) + rbx_tmp = out(reg) _, + in("rcx") 0_u64, + inout("rax") 0_u64 => prev_lo, + inout("rdx") 0_u64 => prev_hi, + in($rdi) src, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} + +#[inline] +unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { + // Do not use vector registers on targets such as x86_64-unknown-none unless SSE is explicitly enabled. + // https://doc.rust-lang.org/nightly/rustc/platform-support/x86_64-unknown-none.html + // SGX doesn't support CPUID. + #[cfg(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + ))] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantees that CMPXCHG16B is available at compile-time. + unsafe { + // cmpxchg16b is always SeqCst. + let _ = order; + atomic_store_cmpxchg16b(dst, val); + } + #[cfg(not(all( + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + any(portable_atomic_no_outline_atomics, target_env = "sgx", not(target_feature = "sse")), + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + #[cfg(target_feature = "sse")] + fn_alias! { + #[target_feature(enable = "avx")] + unsafe fn(dst: *mut u128, val: u128); + // atomic store by vmovdqa has at least release semantics. + atomic_store_vmovdqa_non_seqcst = atomic_store_vmovdqa(Ordering::Release); + atomic_store_vmovdqa_seqcst = atomic_store_vmovdqa(Ordering::SeqCst); + } + match order { + // Relaxed and Release stores are equivalent in all implementations + // that may be called here (vmovdqa, asm-based cmpxchg16b, and fallback). + // core::arch's cmpxchg16b will never called here. + Ordering::Relaxed | Ordering::Release => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_non_seqcst + cmpxchg16b = atomic_store_cmpxchg16b + fallback = atomic_store_non_seqcst + } + }); + } + Ordering::SeqCst => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + load_store_detect! { + vmovdqa = atomic_store_vmovdqa_seqcst + cmpxchg16b = atomic_store_cmpxchg16b + fallback = atomic_store_seqcst + } + }); + } + _ => unreachable!("{:?}", order), + } + } +} +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] +unsafe fn atomic_store_cmpxchg16b(dst: *mut u128, val: u128) { + // SAFETY: the caller must uphold the safety contract. + unsafe { + // cmpxchg16b is always SeqCst. + atomic_swap_cmpxchg16b(dst, val, Ordering::SeqCst); + } +} + +#[inline] +unsafe fn atomic_compare_exchange( + dst: *mut u128, + old: u128, + new: u128, + _success: Ordering, + _failure: Ordering, +) -> Result<u128, u128> { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, that there are no concurrent non-atomic operations, + // and cfg guarantees that CMPXCHG16B is available at compile-time. + let (res, ok) = unsafe { cmpxchg16b(dst, old, new) }; + #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses. + let (res, ok) = unsafe { + ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { + if detect::detect().has_cmpxchg16b() { + cmpxchg16b + } else { + // Use SeqCst because cmpxchg16b is always SeqCst. + fallback::atomic_compare_exchange_seqcst + } + }) + }; + if ok { + Ok(res) + } else { + Err(res) + } +} + +// cmpxchg16b is always strong. +use atomic_compare_exchange as atomic_compare_exchange_weak; + +#[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] +use atomic_swap_cmpxchg16b as atomic_swap; +#[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") +)] +#[inline] +unsafe fn atomic_swap_cmpxchg16b(dst: *mut u128, val: u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // See cmpxchg16b function for more. + // + // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows + // omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. + // + // Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap. + unsafe { + // cmpxchg16b is always SeqCst. + let val = U128 { whole: val }; + let (mut prev_lo, mut prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "xchg {rbx_tmp}, rbx", + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // This is based on the code generated for the first load in DW RMWs by LLVM. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement this. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). + concat!("mov rax, qword ptr [", $rdi, "]"), + concat!("mov rdx, qword ptr [", $rdi, " + 8]"), + "2:", + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "jne 2b", + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = inout(reg) val.pair.lo => _, + in("rcx") val.pair.hi, + out("rax") prev_lo, + out("rdx") prev_hi, + in($rdi) dst, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} + +/// Atomic RMW by CAS loop (3 arguments) +/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - rsi/r8 pair: val argument (read-only for `$op`) +/// - rax/rdx pair: previous value loaded (read-only for `$op`) +/// - rbx/rcx pair: new value that will to stored +// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows +// omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. +macro_rules! atomic_rmw_cas_3 { + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + use $name as $reexport_name; + #[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") + )] + #[inline] + unsafe fn $name(dst: *mut u128, val: u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // See cmpxchg16b function for more. + unsafe { + // cmpxchg16b is always SeqCst. + let val = U128 { whole: val }; + let (mut prev_lo, mut prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // This is based on the code generated for the first load in DW RMWs by LLVM. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement this. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). + concat!("mov rax, qword ptr [", $rdi, "]"), + concat!("mov rdx, qword ptr [", $rdi, " + 8]"), + "2:", + $($op)* + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "jne 2b", + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = out(reg) _, + out("rcx") _, + out("rax") prev_lo, + out("rdx") prev_hi, + in($rdi) dst, + in("rsi") val.pair.lo, + in("r8") val.pair.hi, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } + } + }; +} +/// Atomic RMW by CAS loop (2 arguments) +/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` +/// +/// `$op` can use the following registers: +/// - rax/rdx pair: previous value loaded (read-only for `$op`) +/// - rbx/rcx pair: new value that will to stored +// We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows +// omitting the storing of condition flags and avoid use of xchg to handle rbx. +macro_rules! atomic_rmw_cas_2 { + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + use $name as $reexport_name; + #[cfg_attr( + not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), + target_feature(enable = "cmpxchg16b") + )] + #[inline] + unsafe fn $name(dst: *mut u128, _order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + debug_assert_cmpxchg16b!(); + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, and that there are no concurrent non-atomic operations. + // cfg guarantees that the CPU supports CMPXCHG16B. + // + // See cmpxchg16b function for more. + unsafe { + // cmpxchg16b is always SeqCst. + let (mut prev_lo, mut prev_hi); + macro_rules! cmpxchg16b { + ($rdi:tt) => { + asm!( + // rbx is reserved by LLVM + "mov {rbx_tmp}, rbx", + // This is not single-copy atomic reads, but this is ok because subsequent + // CAS will check for consistency. + // + // This is based on the code generated for the first load in DW RMWs by LLVM. + // + // Note that the C++20 memory model does not allow mixed-sized atomic access, + // so we must use inline assembly to implement this. + // (i.e., byte-wise atomic based on the standard library's atomic types + // cannot be used here). + concat!("mov rax, qword ptr [", $rdi, "]"), + concat!("mov rdx, qword ptr [", $rdi, " + 8]"), + "2:", + $($op)* + concat!("lock cmpxchg16b xmmword ptr [", $rdi, "]"), + "jne 2b", + "mov rbx, {rbx_tmp}", // restore rbx + rbx_tmp = out(reg) _, + out("rcx") _, + out("rax") prev_lo, + out("rdx") prev_hi, + in($rdi) dst, + // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. + options(nostack), + ) + }; + } + #[cfg(target_pointer_width = "32")] + cmpxchg16b!("edi"); + #[cfg(target_pointer_width = "64")] + cmpxchg16b!("rdi"); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } + } + }; +} + +atomic_rmw_cas_3! { + atomic_add_cmpxchg16b as atomic_add, + "mov rbx, rax", + "add rbx, rsi", + "mov rcx, rdx", + "adc rcx, r8", +} +atomic_rmw_cas_3! { + atomic_sub_cmpxchg16b as atomic_sub, + "mov rbx, rax", + "sub rbx, rsi", + "mov rcx, rdx", + "sbb rcx, r8", +} +atomic_rmw_cas_3! { + atomic_and_cmpxchg16b as atomic_and, + "mov rbx, rax", + "and rbx, rsi", + "mov rcx, rdx", + "and rcx, r8", +} +atomic_rmw_cas_3! { + atomic_nand_cmpxchg16b as atomic_nand, + "mov rbx, rax", + "and rbx, rsi", + "not rbx", + "mov rcx, rdx", + "and rcx, r8", + "not rcx", +} +atomic_rmw_cas_3! { + atomic_or_cmpxchg16b as atomic_or, + "mov rbx, rax", + "or rbx, rsi", + "mov rcx, rdx", + "or rcx, r8", +} +atomic_rmw_cas_3! { + atomic_xor_cmpxchg16b as atomic_xor, + "mov rbx, rax", + "xor rbx, rsi", + "mov rcx, rdx", + "xor rcx, r8", +} + +atomic_rmw_cas_2! { + atomic_not_cmpxchg16b as atomic_not, + "mov rbx, rax", + "not rbx", + "mov rcx, rdx", + "not rcx", +} +atomic_rmw_cas_2! { + atomic_neg_cmpxchg16b as atomic_neg, + "mov rbx, rax", + "neg rbx", + "mov rcx, 0", + "sbb rcx, rdx", +} + +atomic_rmw_cas_3! { + atomic_max_cmpxchg16b as atomic_max, + "cmp rsi, rax", + "mov rcx, r8", + "sbb rcx, rdx", + "mov rcx, r8", + "cmovl rcx, rdx", + "mov rbx, rsi", + "cmovl rbx, rax", +} +atomic_rmw_cas_3! { + atomic_umax_cmpxchg16b as atomic_umax, + "cmp rsi, rax", + "mov rcx, r8", + "sbb rcx, rdx", + "mov rcx, r8", + "cmovb rcx, rdx", + "mov rbx, rsi", + "cmovb rbx, rax", +} +atomic_rmw_cas_3! { + atomic_min_cmpxchg16b as atomic_min, + "cmp rsi, rax", + "mov rcx, r8", + "sbb rcx, rdx", + "mov rcx, r8", + "cmovge rcx, rdx", + "mov rbx, rsi", + "cmovge rbx, rax", +} +atomic_rmw_cas_3! { + atomic_umin_cmpxchg16b as atomic_umin, + "cmp rsi, rax", + "mov rcx, r8", + "sbb rcx, rdx", + "mov rcx, r8", + "cmovae rcx, rdx", + "mov rbx, rsi", + "cmovae rbx, rax", +} + +macro_rules! atomic_rmw_with_ifunc { + ( + unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)?; + cmpxchg16b = $cmpxchg16b_fn:ident; + fallback = $seqcst_fallback_fn:ident; + ) => { + #[cfg(not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )))] + #[inline] + unsafe fn $name($($arg)*, _order: Ordering) $(-> $ret_ty)? { + fn_alias! { + #[cfg_attr( + not(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + )), + target_feature(enable = "cmpxchg16b") + )] + unsafe fn($($arg)*) $(-> $ret_ty)?; + // cmpxchg16b is always SeqCst. + cmpxchg16b_seqcst_fn = $cmpxchg16b_fn(Ordering::SeqCst); + } + // SAFETY: the caller must uphold the safety contract. + // we only calls cmpxchg16b_fn if cmpxchg16b is available. + unsafe { + ifunc!(unsafe fn($($arg)*) $(-> $ret_ty)? { + if detect::detect().has_cmpxchg16b() { + cmpxchg16b_seqcst_fn + } else { + // Use SeqCst because cmpxchg16b is always SeqCst. + fallback::$seqcst_fallback_fn + } + }) + } + } + }; +} + +atomic_rmw_with_ifunc! { + unsafe fn atomic_swap(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_swap_cmpxchg16b; + fallback = atomic_swap_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_add(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_add_cmpxchg16b; + fallback = atomic_add_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_sub(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_sub_cmpxchg16b; + fallback = atomic_sub_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_and(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_and_cmpxchg16b; + fallback = atomic_and_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_nand(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_nand_cmpxchg16b; + fallback = atomic_nand_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_or(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_or_cmpxchg16b; + fallback = atomic_or_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_xor(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_xor_cmpxchg16b; + fallback = atomic_xor_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_max(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_max_cmpxchg16b; + fallback = atomic_max_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_umax(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_umax_cmpxchg16b; + fallback = atomic_umax_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_min(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_min_cmpxchg16b; + fallback = atomic_min_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_umin(dst: *mut u128, val: u128) -> u128; + cmpxchg16b = atomic_umin_cmpxchg16b; + fallback = atomic_umin_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_not(dst: *mut u128) -> u128; + cmpxchg16b = atomic_not_cmpxchg16b; + fallback = atomic_not_seqcst; +} +atomic_rmw_with_ifunc! { + unsafe fn atomic_neg(dst: *mut u128) -> u128; + cmpxchg16b = atomic_neg_cmpxchg16b; + fallback = atomic_neg_seqcst; +} + +#[inline] +fn is_lock_free() -> bool { + #[cfg(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"))] + { + // CMPXCHG16B is available at compile-time. + true + } + #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] + { + detect::detect().has_cmpxchg16b() + } +} +const IS_ALWAYS_LOCK_FREE: bool = + cfg!(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")); + +atomic128!(AtomicI128, i128, atomic_max, atomic_min); +atomic128!(AtomicU128, u128, atomic_umax, atomic_umin); + +#[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)] +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_int!(i128); + test_atomic_int!(u128); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + stress_test!(u128); +} diff --git a/vendor/portable-atomic/src/imp/core_atomic.rs b/vendor/portable-atomic/src/imp/core_atomic.rs new file mode 100644 index 000000000..72da6fa2a --- /dev/null +++ b/vendor/portable-atomic/src/imp/core_atomic.rs @@ -0,0 +1,437 @@ +// Wrap the standard library's atomic types in newtype. +// +// This is not a reexport, because we want to backport changes like +// https://github.com/rust-lang/rust/pull/98383 to old compilers. + +use core::{cell::UnsafeCell, marker::PhantomData, sync::atomic::Ordering}; + +// core::panic::RefUnwindSafe is only available on Rust 1.56+, so on pre-1.56 +// Rust, we implement RefUnwindSafe when "std" feature is enabled. +// However, on pre-1.56 Rust, the standard library's atomic types implement +// RefUnwindSafe when "linked to std", and that's behavior that our other atomic +// implementations can't emulate, so use PhantomData<NoRefUnwindSafe> to match +// conditions where our other atomic implementations implement RefUnwindSafe. +// If we do not do this, for example, downstream that is only tested on x86_64 +// may incorrectly assume that AtomicU64 always implements RefUnwindSafe even on +// older rustc, and may be broken on platforms where std AtomicU64 is not available. +struct NoRefUnwindSafe(UnsafeCell<()>); +// SAFETY: this is a marker type and we'll never access the value. +unsafe impl Sync for NoRefUnwindSafe {} + +#[repr(transparent)] +pub(crate) struct AtomicPtr<T> { + inner: core::sync::atomic::AtomicPtr<T>, + // Prevent RefUnwindSafe from being propagated from the std atomic type. + _marker: PhantomData<NoRefUnwindSafe>, +} +impl<T> AtomicPtr<T> { + #[inline] + pub(crate) const fn new(v: *mut T) -> Self { + Self { inner: core::sync::atomic::AtomicPtr::new(v), _marker: PhantomData } + } + #[inline] + pub(crate) fn is_lock_free() -> bool { + Self::is_always_lock_free() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + true + } + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut *mut T { + self.inner.get_mut() + } + #[inline] + pub(crate) fn into_inner(self) -> *mut T { + self.inner.into_inner() + } + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn load(&self, order: Ordering) -> *mut T { + crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check) + self.inner.load(order) + } + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn store(&self, ptr: *mut T, order: Ordering) { + crate::utils::assert_store_ordering(order); // for track_caller (compiler can omit double check) + self.inner.store(ptr, order); + } + const_fn! { + const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))]; + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut *mut T { + // SAFETY: Self is #[repr(C)] and internally UnsafeCell<*mut T>. + // See also https://github.com/rust-lang/rust/pull/66705 and + // https://github.com/rust-lang/rust/issues/66136#issuecomment-557867116. + unsafe { (*(self as *const Self as *const UnsafeCell<*mut T>)).get() } + } + } +} +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] +impl<T> AtomicPtr<T> { + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn compare_exchange( + &self, + current: *mut T, + new: *mut T, + success: Ordering, + failure: Ordering, + ) -> Result<*mut T, *mut T> { + crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check) + #[cfg(portable_atomic_no_stronger_failure_ordering)] + let success = crate::utils::upgrade_success_ordering(success, failure); + self.inner.compare_exchange(current, new, success, failure) + } + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn compare_exchange_weak( + &self, + current: *mut T, + new: *mut T, + success: Ordering, + failure: Ordering, + ) -> Result<*mut T, *mut T> { + crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check) + #[cfg(portable_atomic_no_stronger_failure_ordering)] + let success = crate::utils::upgrade_success_ordering(success, failure); + self.inner.compare_exchange_weak(current, new, success, failure) + } +} +impl<T> core::ops::Deref for AtomicPtr<T> { + type Target = core::sync::atomic::AtomicPtr<T>; + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +macro_rules! atomic_int { + ($atomic_type:ident, $int_type:ident) => { + #[repr(transparent)] + pub(crate) struct $atomic_type { + inner: core::sync::atomic::$atomic_type, + // Prevent RefUnwindSafe from being propagated from the std atomic type. + _marker: PhantomData<NoRefUnwindSafe>, + } + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(not(portable_atomic_no_atomic_cas)) + )] + #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] + impl_default_no_fetch_ops!($atomic_type, $int_type); + #[cfg(not(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any(target_arch = "x86", target_arch = "x86_64"), + )))] + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(not(portable_atomic_no_atomic_cas)) + )] + #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] + impl_default_bit_opts!($atomic_type, $int_type); + impl $atomic_type { + #[inline] + pub(crate) const fn new(v: $int_type) -> Self { + Self { inner: core::sync::atomic::$atomic_type::new(v), _marker: PhantomData } + } + #[inline] + pub(crate) fn is_lock_free() -> bool { + Self::is_always_lock_free() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + true + } + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $int_type { + self.inner.get_mut() + } + #[inline] + pub(crate) fn into_inner(self) -> $int_type { + self.inner.into_inner() + } + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn load(&self, order: Ordering) -> $int_type { + crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check) + self.inner.load(order) + } + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn store(&self, val: $int_type, order: Ordering) { + crate::utils::assert_store_ordering(order); // for track_caller (compiler can omit double check) + self.inner.store(val, order); + } + const_fn! { + const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))]; + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut $int_type { + // SAFETY: Self is #[repr(C)] and internally UnsafeCell<$int_type>. + // See also https://github.com/rust-lang/rust/pull/66705 and + // https://github.com/rust-lang/rust/issues/66136#issuecomment-557867116. + unsafe { + (*(self as *const Self as *const UnsafeCell<$int_type>)).get() + } + } + } + } + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(not(portable_atomic_no_atomic_cas)) + )] + #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] + impl $atomic_type { + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn compare_exchange( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check) + #[cfg(portable_atomic_no_stronger_failure_ordering)] + let success = crate::utils::upgrade_success_ordering(success, failure); + self.inner.compare_exchange(current, new, success, failure) + } + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn compare_exchange_weak( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); // for track_caller (compiler can omit double check) + #[cfg(portable_atomic_no_stronger_failure_ordering)] + let success = crate::utils::upgrade_success_ordering(success, failure); + self.inner.compare_exchange_weak(current, new, success, failure) + } + #[allow(dead_code)] + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + fn fetch_update_<F>(&self, order: Ordering, mut f: F) -> $int_type + where + F: FnMut($int_type) -> $int_type, + { + // This is a private function and all instances of `f` only operate on the value + // loaded, so there is no need to synchronize the first load/failed CAS. + let mut prev = self.load(Ordering::Relaxed); + loop { + let next = f(prev); + match self.compare_exchange_weak(prev, next, order, Ordering::Relaxed) { + Ok(x) => return x, + Err(next_prev) => prev = next_prev, + } + } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type { + #[cfg(not(portable_atomic_no_atomic_min_max))] + { + #[cfg(any( + all( + target_arch = "aarch64", + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + ), + all( + target_arch = "arm", + not(any( + target_feature = "v6", + portable_atomic_target_feature = "v6", + )), + ), + // TODO: mips32r6, mips64r6? + target_arch = "mips", + target_arch = "mips64", + target_arch = "powerpc", + target_arch = "powerpc64", + ))] + { + // HACK: the following operations are currently broken (at least on qemu-user): + // - aarch64's `AtomicI{8,16}::fetch_{max,min}` (release mode + lse) + // - armv5te's `Atomic{I,U}{8,16}::fetch_{max,min}` + // - mips's `AtomicI8::fetch_{max,min}` (release mode) + // - mipsel's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least) + // - mips64's `AtomicI8::fetch_{max,min}` (release mode) + // - mips64el's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least) + // - powerpc's `AtomicI{8,16}::fetch_{max,min}` + // - powerpc64's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least) + // - powerpc64le's `AtomicU{8,16}::fetch_{max,min}` (release mode + fat LTO) + // See also: + // https://github.com/llvm/llvm-project/issues/61880 + // https://github.com/llvm/llvm-project/issues/61881 + // https://github.com/llvm/llvm-project/issues/61882 + // https://github.com/taiki-e/portable-atomic/issues/2 + // https://github.com/rust-lang/rust/issues/100650 + if core::mem::size_of::<$int_type>() <= 2 { + return self.fetch_update_(order, |x| core::cmp::max(x, val)); + } + } + self.inner.fetch_max(val, order) + } + #[cfg(portable_atomic_no_atomic_min_max)] + { + self.fetch_update_(order, |x| core::cmp::max(x, val)) + } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type { + #[cfg(not(portable_atomic_no_atomic_min_max))] + { + #[cfg(any( + all( + target_arch = "aarch64", + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + ), + all( + target_arch = "arm", + not(any( + target_feature = "v6", + portable_atomic_target_feature = "v6", + )), + ), + // TODO: mips32r6, mips64r6? + target_arch = "mips", + target_arch = "mips64", + target_arch = "powerpc", + target_arch = "powerpc64", + ))] + { + // HACK: the following operations are currently broken (at least on qemu-user): + // - aarch64's `AtomicI{8,16}::fetch_{max,min}` (release mode + lse) + // - armv5te's `Atomic{I,U}{8,16}::fetch_{max,min}` + // - mips's `AtomicI8::fetch_{max,min}` (release mode) + // - mipsel's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least) + // - mips64's `AtomicI8::fetch_{max,min}` (release mode) + // - mips64el's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least) + // - powerpc's `AtomicI{8,16}::fetch_{max,min}` + // - powerpc64's `AtomicI{8,16}::fetch_{max,min}` (debug mode, at least) + // - powerpc64le's `AtomicU{8,16}::fetch_{max,min}` (release mode + fat LTO) + // See also: + // https://github.com/llvm/llvm-project/issues/61880 + // https://github.com/llvm/llvm-project/issues/61881 + // https://github.com/llvm/llvm-project/issues/61882 + // https://github.com/taiki-e/portable-atomic/issues/2 + // https://github.com/rust-lang/rust/issues/100650 + if core::mem::size_of::<$int_type>() <= 2 { + return self.fetch_update_(order, |x| core::cmp::min(x, val)); + } + } + self.inner.fetch_min(val, order) + } + #[cfg(portable_atomic_no_atomic_min_max)] + { + self.fetch_update_(order, |x| core::cmp::min(x, val)) + } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type { + const NOT_MASK: $int_type = (0 as $int_type).wrapping_sub(1); + self.fetch_xor(NOT_MASK, order) + } + #[cfg(not(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any(target_arch = "x86", target_arch = "x86_64"), + )))] + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type { + self.fetch_update_(order, $int_type::wrapping_neg) + } + #[cfg(not(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any(target_arch = "x86", target_arch = "x86_64"), + )))] + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } + } + impl core::ops::Deref for $atomic_type { + type Target = core::sync::atomic::$atomic_type; + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + fn deref(&self) -> &Self::Target { + &self.inner + } + } + }; +} + +atomic_int!(AtomicIsize, isize); +atomic_int!(AtomicUsize, usize); +#[cfg(not(portable_atomic_no_atomic_load_store))] +atomic_int!(AtomicI8, i8); +#[cfg(not(portable_atomic_no_atomic_load_store))] +atomic_int!(AtomicU8, u8); +#[cfg(not(portable_atomic_no_atomic_load_store))] +atomic_int!(AtomicI16, i16); +#[cfg(not(portable_atomic_no_atomic_load_store))] +atomic_int!(AtomicU16, u16); +#[cfg(not(portable_atomic_no_atomic_load_store))] +#[cfg(not(target_pointer_width = "16"))] +atomic_int!(AtomicI32, i32); +#[cfg(not(portable_atomic_no_atomic_load_store))] +#[cfg(not(target_pointer_width = "16"))] +atomic_int!(AtomicU32, u32); +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_64)))] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "64", + not(any(target_pointer_width = "16", target_pointer_width = "32")), + )) +)] +atomic_int!(AtomicI64, i64); +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_64)))] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "64", + not(any(target_pointer_width = "16", target_pointer_width = "32")), + )) +)] +atomic_int!(AtomicU64, u64); diff --git a/vendor/portable-atomic/src/imp/fallback/mod.rs b/vendor/portable-atomic/src/imp/fallback/mod.rs new file mode 100644 index 000000000..e4875deac --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/mod.rs @@ -0,0 +1,426 @@ +// Fallback implementation using global locks. +// +// This implementation uses seqlock for global locks. +// +// This is basically based on global locks in crossbeam-utils's `AtomicCell`, +// but seqlock is implemented in a way that does not depend on UB +// (see comments in optimistic_read method in atomic! macro for details). +// +// Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic +// type and the value type must be the same. + +#![cfg_attr( + any( + all( + target_arch = "x86_64", + not(portable_atomic_no_cmpxchg16b_target_feature), + not(portable_atomic_no_outline_atomics), + not(any(target_env = "sgx", miri)), + ), + all( + target_arch = "powerpc64", + feature = "fallback", + not(portable_atomic_no_outline_atomics), + portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all(target_env = "musl", not(target_feature = "crt-static")), + portable_atomic_outline_atomics, + ), + ), + target_os = "freebsd", + ), + not(any(miri, portable_atomic_sanitize_thread)), + ), + all( + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(portable_atomic_no_outline_atomics), + ), + ), + allow(dead_code) +)] + +#[macro_use] +pub(crate) mod utils; + +// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap +// around. +// +// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be +// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the +// counter will not be increased that fast. +// +// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, +// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast, +// so use it to implement normal sequence lock. +cfg_has_fast_atomic_64! { + mod seq_lock; +} +cfg_no_fast_atomic_64! { + #[path = "seq_lock_wide.rs"] + mod seq_lock; +} + +use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; + +use seq_lock::{SeqLock, SeqLockWriteGuard}; +use utils::CachePadded; + +// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI, +// aarch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast, +// so use it to reduce chunks of byte-wise atomic memcpy. +use seq_lock::{AtomicChunk, Chunk}; + +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016. +#[inline] +#[must_use] +fn lock(addr: usize) -> &'static SeqLock { + // The number of locks is a prime number because we want to make sure `addr % LEN` gets + // dispersed across all locks. + // + // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded, + // so the actual concurrency level will be smaller. + const LEN: usize = 67; + #[allow(clippy::declare_interior_mutable_const)] + const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new()); + static LOCKS: [CachePadded<SeqLock>; LEN] = [ + L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, + L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, + L, L, L, L, L, L, L, + ]; + + // If the modulus is a constant number, the compiler will use crazy math to transform this into + // a sequence of cheap arithmetic operations rather than using the slow modulo instruction. + &LOCKS[addr % LEN] +} + +macro_rules! atomic { + ($atomic_type:ident, $int_type:ident, $align:literal) => { + #[repr(C, align($align))] + pub(crate) struct $atomic_type { + v: UnsafeCell<$int_type>, + } + + impl $atomic_type { + const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>(); + + #[inline] + unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] { + static_assert!($atomic_type::LEN > 1); + static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0); + + // SAFETY: the caller must uphold the safety contract for `chunks`. + unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) } + } + + #[inline] + fn optimistic_read(&self) -> $int_type { + // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M + let mut dst: [Chunk; Self::LEN] = [0; Self::LEN]; + // SAFETY: + // - There are no threads that perform non-atomic concurrent write operations. + // - There is no writer that updates the value using atomic operations of different granularity. + // + // If the atomic operation is not used here, it will cause a data race + // when `write` performs concurrent write operation. + // Such a data race is sometimes considered virtually unproblematic + // in SeqLock implementations: + // + // - https://github.com/Amanieu/seqlock/issues/2 + // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116 + // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F + // + // However, in our use case, the implementation that loads/stores value as + // chunks of usize is enough fast and sound, so we use that implementation. + // + // See also atomic-memcpy crate, a generic implementation of this pattern: + // https://github.com/taiki-e/atomic-memcpy + let chunks = unsafe { self.chunks() }; + for i in 0..Self::LEN { + dst[i] = chunks[i].load(Ordering::Relaxed); + } + // SAFETY: integers are plain old datatypes so we can always transmute to them. + unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) } + } + + #[inline] + fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type { + // SAFETY: + // - The guard guarantees that we hold the lock to write. + // - The raw pointer is valid because we got it from a reference. + // + // Unlike optimistic_read/write, the atomic operation is not required, + // because we hold the lock to write so that other threads cannot + // perform concurrent write operations. + // + // At the hardware level, core::sync::atomic::Atomic*::load used in optimistic_read + // may be lowered to atomic write operations by LLVM, but it is still considered a + // read operation from the view of the (software) memory model, except that it is + // not allowed in read-only memory (due to UnsafeCell, self.v is not read-only memory). + // See also https://github.com/rust-lang/miri/issues/2463. + // (Note that the above property is about the assembly generated by inline assembly + // or LLVM's backend. Doing it using write operations written in normal Rust code + // or LLVM IR is considered UB, even if it never mutates the value. See also the + // above Miri issue and https://github.com/rust-lang/rust/issues/32976#issuecomment-446775360) + // + // Also, according to atomic-memcpy's asm test, there seems + // to be no tier 1 or tier 2 platform that generates such code + // for a pointer-width relaxed load + acquire fence: + // https://github.com/taiki-e/atomic-memcpy/tree/v0.1.3/tests/asm-test/asm + unsafe { self.v.get().read() } + } + + #[inline] + fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) { + // SAFETY: integers are plain old datatypes so we can always transmute them to arrays of integers. + let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) }; + // SAFETY: + // - The guard guarantees that we hold the lock to write. + // - There are no threads that perform non-atomic concurrent read or write operations. + // + // See optimistic_read for the reason that atomic operations are used here. + let chunks = unsafe { self.chunks() }; + for i in 0..Self::LEN { + chunks[i].store(val[i], Ordering::Relaxed); + } + } + } + + // Send is implicitly implemented. + // SAFETY: any data races are prevented by the lock and atomic operation. + unsafe impl Sync for $atomic_type {} + + impl_default_no_fetch_ops!($atomic_type, $int_type); + impl_default_bit_opts!($atomic_type, $int_type); + impl $atomic_type { + #[inline] + pub(crate) const fn new(v: $int_type) -> Self { + Self { v: UnsafeCell::new(v) } + } + + #[inline] + pub(crate) fn is_lock_free() -> bool { + Self::is_always_lock_free() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + false + } + + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $int_type { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.v.get() } + } + + #[inline] + pub(crate) fn into_inner(self) -> $int_type { + self.v.into_inner() + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn load(&self, order: Ordering) -> $int_type { + crate::utils::assert_load_ordering(order); + let lock = lock(self.v.get() as usize); + + // Try doing an optimistic read first. + if let Some(stamp) = lock.optimistic_read() { + let val = self.optimistic_read(); + + if lock.validate_read(stamp) { + return val; + } + } + + // Grab a regular write lock so that writers don't starve this load. + let guard = lock.write(); + let val = self.read(&guard); + // The value hasn't been changed. Drop the guard without incrementing the stamp. + guard.abort(); + val + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn store(&self, val: $int_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + let guard = lock(self.v.get() as usize).write(); + self.write(val, &guard) + } + + #[inline] + pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(val, &guard); + result + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + if result == current { + self.write(new, &guard); + Ok(result) + } else { + // The value hasn't been changed. Drop the guard without incrementing the stamp. + guard.abort(); + Err(result) + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange_weak( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + self.compare_exchange(current, new, success, failure) + } + + #[inline] + pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result.wrapping_add(val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result.wrapping_sub(val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result & val, &guard); + result + } + + #[inline] + pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(!(result & val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result | val, &guard); + result + } + + #[inline] + pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result ^ val, &guard); + result + } + + #[inline] + pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(core::cmp::max(result, val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(core::cmp::min(result, val), &guard); + result + } + + #[inline] + pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(!result, &guard); + result + } + #[inline] + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + + #[inline] + pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { + let guard = lock(self.v.get() as usize).write(); + let result = self.read(&guard); + self.write(result.wrapping_neg(), &guard); + result + } + #[inline] + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } + + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut $int_type { + self.v.get() + } + } + }; +} + +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any(test, not(target_has_atomic = "64"))) +)] +cfg_no_fast_atomic_64! { + atomic!(AtomicI64, i64, 8); + atomic!(AtomicU64, u64, 8); +} + +atomic!(AtomicI128, i128, 16); +atomic!(AtomicU128, u128, 16); + +#[cfg(test)] +mod tests { + use super::*; + + cfg_no_fast_atomic_64! { + test_atomic_int!(i64); + test_atomic_int!(u64); + } + test_atomic_int!(i128); + test_atomic_int!(u128); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + cfg_no_fast_atomic_64! { + stress_test!(u64); + } + stress_test!(u128); +} diff --git a/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs new file mode 100644 index 000000000..985d9ce83 --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs @@ -0,0 +1,170 @@ +// Helper for outline-atomics. +// +// On architectures where DW atomics are not supported on older CPUs, we use +// fallback implementation when DW atomic instructions are not supported and +// outline-atomics is enabled. +// +// This module provides helpers to implement them. + +use core::sync::atomic::Ordering; + +#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))] +pub(crate) type Udw = u128; +#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))] +pub(crate) type AtomicUdw = super::super::fallback::AtomicU128; +#[cfg(any(target_arch = "x86_64", target_arch = "powerpc64"))] +pub(crate) type AtomicIdw = super::super::fallback::AtomicI128; + +#[cfg(target_arch = "arm")] +pub(crate) type Udw = u64; +#[cfg(target_arch = "arm")] +pub(crate) type AtomicUdw = super::super::fallback::AtomicU64; +#[cfg(target_arch = "arm")] +pub(crate) type AtomicIdw = super::super::fallback::AtomicI64; + +// Asserts that the function is called in the correct context. +macro_rules! debug_assert_outline_atomics { + () => { + #[cfg(target_arch = "x86_64")] + { + debug_assert!(!super::detect::detect().has_cmpxchg16b()); + } + #[cfg(target_arch = "powerpc64")] + { + debug_assert!(!super::detect::detect().has_quadword_atomics()); + } + #[cfg(target_arch = "arm")] + { + debug_assert!(!super::has_kuser_cmpxchg64()); + } + }; +} + +#[cold] +pub(crate) unsafe fn atomic_load(src: *mut Udw, order: Ordering) -> Udw { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + (*(src as *const AtomicUdw)).load(order) + } +} +fn_alias! { + #[cold] + pub(crate) unsafe fn(src: *mut Udw) -> Udw; + // fallback's atomic load has at least acquire semantics. + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] + atomic_load_non_seqcst = atomic_load(Ordering::Acquire); + atomic_load_seqcst = atomic_load(Ordering::SeqCst); +} + +#[cold] +pub(crate) unsafe fn atomic_store(dst: *mut Udw, val: Udw, order: Ordering) { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + (*(dst as *const AtomicUdw)).store(val, order); + } +} +fn_alias! { + #[cold] + pub(crate) unsafe fn(dst: *mut Udw, val: Udw); + // fallback's atomic store has at least release semantics. + #[cfg(not(target_arch = "arm"))] + atomic_store_non_seqcst = atomic_store(Ordering::Release); + atomic_store_seqcst = atomic_store(Ordering::SeqCst); +} + +#[cold] +pub(crate) unsafe fn atomic_compare_exchange( + dst: *mut Udw, + old: Udw, + new: Udw, + success: Ordering, + failure: Ordering, +) -> (Udw, bool) { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + match (*(dst as *const AtomicUdw)).compare_exchange(old, new, success, failure) { + Ok(v) => (v, true), + Err(v) => (v, false), + } + } +} +fn_alias! { + #[cold] + pub(crate) unsafe fn(dst: *mut Udw, old: Udw, new: Udw) -> (Udw, bool); + // fallback's atomic CAS has at least AcqRel semantics. + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] + atomic_compare_exchange_non_seqcst + = atomic_compare_exchange(Ordering::AcqRel, Ordering::Acquire); + atomic_compare_exchange_seqcst + = atomic_compare_exchange(Ordering::SeqCst, Ordering::SeqCst); +} + +macro_rules! atomic_rmw_3 { + ( + $name:ident($atomic_type:ident::$method_name:ident), + $non_seqcst_alias:ident, $seqcst_alias:ident + ) => { + #[cold] + pub(crate) unsafe fn $name(dst: *mut Udw, val: Udw, order: Ordering) -> Udw { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + (*(dst as *const $atomic_type)).$method_name(val as _, order) as Udw + } + } + fn_alias! { + #[cold] + pub(crate) unsafe fn(dst: *mut Udw, val: Udw) -> Udw; + // fallback's atomic RMW has at least AcqRel semantics. + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] + $non_seqcst_alias = $name(Ordering::AcqRel); + $seqcst_alias = $name(Ordering::SeqCst); + } + }; +} +macro_rules! atomic_rmw_2 { + ( + $name:ident($atomic_type:ident::$method_name:ident), + $non_seqcst_alias:ident, $seqcst_alias:ident + ) => { + #[cold] + pub(crate) unsafe fn $name(dst: *mut Udw, order: Ordering) -> Udw { + debug_assert_outline_atomics!(); + #[allow(clippy::cast_ptr_alignment)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + (*(dst as *const $atomic_type)).$method_name(order) as Udw + } + } + fn_alias! { + #[cold] + pub(crate) unsafe fn(dst: *mut Udw) -> Udw; + // fallback's atomic RMW has at least AcqRel semantics. + #[cfg(not(any(target_arch = "arm", target_arch = "x86_64")))] + $non_seqcst_alias = $name(Ordering::AcqRel); + $seqcst_alias = $name(Ordering::SeqCst); + } + }; +} + +atomic_rmw_3!(atomic_swap(AtomicUdw::swap), atomic_swap_non_seqcst, atomic_swap_seqcst); +atomic_rmw_3!(atomic_add(AtomicUdw::fetch_add), atomic_add_non_seqcst, atomic_add_seqcst); +atomic_rmw_3!(atomic_sub(AtomicUdw::fetch_sub), atomic_sub_non_seqcst, atomic_sub_seqcst); +atomic_rmw_3!(atomic_and(AtomicUdw::fetch_and), atomic_and_non_seqcst, atomic_and_seqcst); +atomic_rmw_3!(atomic_nand(AtomicUdw::fetch_nand), atomic_nand_non_seqcst, atomic_nand_seqcst); +atomic_rmw_3!(atomic_or(AtomicUdw::fetch_or), atomic_or_non_seqcst, atomic_or_seqcst); +atomic_rmw_3!(atomic_xor(AtomicUdw::fetch_xor), atomic_xor_non_seqcst, atomic_xor_seqcst); +atomic_rmw_3!(atomic_max(AtomicIdw::fetch_max), atomic_max_non_seqcst, atomic_max_seqcst); +atomic_rmw_3!(atomic_umax(AtomicUdw::fetch_max), atomic_umax_non_seqcst, atomic_umax_seqcst); +atomic_rmw_3!(atomic_min(AtomicIdw::fetch_min), atomic_min_non_seqcst, atomic_min_seqcst); +atomic_rmw_3!(atomic_umin(AtomicUdw::fetch_min), atomic_umin_non_seqcst, atomic_umin_seqcst); + +atomic_rmw_2!(atomic_not(AtomicUdw::fetch_not), atomic_not_non_seqcst, atomic_not_seqcst); +atomic_rmw_2!(atomic_neg(AtomicUdw::fetch_neg), atomic_neg_non_seqcst, atomic_neg_seqcst); diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs new file mode 100644 index 000000000..d86b02e10 --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs @@ -0,0 +1,145 @@ +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock.rs. + +use core::{ + mem::ManuallyDrop, + sync::atomic::{self, Ordering}, +}; + +use super::utils::Backoff; + +// See mod.rs for details. +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] +pub(super) use core::sync::atomic::AtomicU64 as AtomicStamp; +#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))] +pub(super) use core::sync::atomic::AtomicUsize as AtomicStamp; +#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))] +pub(super) type Stamp = usize; +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] +pub(super) type Stamp = u64; + +// See mod.rs for details. +pub(super) type AtomicChunk = AtomicStamp; +pub(super) type Chunk = Stamp; + +/// A simple stamped lock. +pub(super) struct SeqLock { + /// The current state of the lock. + /// + /// All bits except the least significant one hold the current stamp. When locked, the state + /// equals 1 and doesn't contain a valid stamp. + state: AtomicStamp, +} + +impl SeqLock { + #[inline] + pub(super) const fn new() -> Self { + Self { state: AtomicStamp::new(0) } + } + + /// If not locked, returns the current stamp. + /// + /// This method should be called before optimistic reads. + #[inline] + pub(super) fn optimistic_read(&self) -> Option<Stamp> { + let state = self.state.load(Ordering::Acquire); + if state == 1 { + None + } else { + Some(state) + } + } + + /// Returns `true` if the current stamp is equal to `stamp`. + /// + /// This method should be called after optimistic reads to check whether they are valid. The + /// argument `stamp` should correspond to the one returned by method `optimistic_read`. + #[inline] + pub(super) fn validate_read(&self, stamp: Stamp) -> bool { + atomic::fence(Ordering::Acquire); + self.state.load(Ordering::Relaxed) == stamp + } + + /// Grabs the lock for writing. + #[inline] + pub(super) fn write(&self) -> SeqLockWriteGuard<'_> { + let mut backoff = Backoff::new(); + loop { + let previous = self.state.swap(1, Ordering::Acquire); + + if previous != 1 { + atomic::fence(Ordering::Release); + + return SeqLockWriteGuard { lock: self, state: previous }; + } + + while self.state.load(Ordering::Relaxed) == 1 { + backoff.snooze(); + } + } + } +} + +/// An RAII guard that releases the lock and increments the stamp when dropped. +#[must_use] +pub(super) struct SeqLockWriteGuard<'a> { + /// The parent lock. + lock: &'a SeqLock, + + /// The stamp before locking. + state: Stamp, +} + +impl SeqLockWriteGuard<'_> { + /// Releases the lock without incrementing the stamp. + #[inline] + pub(super) fn abort(self) { + // We specifically don't want to call drop(), since that's + // what increments the stamp. + let this = ManuallyDrop::new(self); + + // Restore the stamp. + // + // Release ordering for synchronizing with `optimistic_read`. + this.lock.state.store(this.state, Ordering::Release); + } +} + +impl Drop for SeqLockWriteGuard<'_> { + #[inline] + fn drop(&mut self) { + // Release the lock and increment the stamp. + // + // Release ordering for synchronizing with `optimistic_read`. + self.lock.state.store(self.state.wrapping_add(2), Ordering::Release); + } +} + +#[cfg(test)] +mod tests { + use super::SeqLock; + + #[test] + fn smoke() { + let lock = SeqLock::new(); + let before = lock.optimistic_read().unwrap(); + assert!(lock.validate_read(before)); + { + let _guard = lock.write(); + } + assert!(!lock.validate_read(before)); + let after = lock.optimistic_read().unwrap(); + assert_ne!(before, after); + } + + #[test] + fn test_abort() { + let lock = SeqLock::new(); + let before = lock.optimistic_read().unwrap(); + { + let guard = lock.write(); + guard.abort(); + } + let after = lock.optimistic_read().unwrap(); + assert_eq!(before, after, "aborted write does not update the stamp"); + } +} diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs new file mode 100644 index 000000000..74b08d24f --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs @@ -0,0 +1,178 @@ +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock_wide.rs. + +use core::{ + mem::ManuallyDrop, + sync::atomic::{self, AtomicUsize, Ordering}, +}; + +use super::utils::Backoff; + +// See mod.rs for details. +pub(super) type AtomicChunk = AtomicUsize; +pub(super) type Chunk = usize; + +/// A simple stamped lock. +/// +/// The state is represented as two `AtomicUsize`: `state_hi` for high bits and `state_lo` for low +/// bits. +pub(super) struct SeqLock { + /// The high bits of the current state of the lock. + state_hi: AtomicUsize, + + /// The low bits of the current state of the lock. + /// + /// All bits except the least significant one hold the current stamp. When locked, the state_lo + /// equals 1 and doesn't contain a valid stamp. + state_lo: AtomicUsize, +} + +impl SeqLock { + #[inline] + pub(super) const fn new() -> Self { + Self { state_hi: AtomicUsize::new(0), state_lo: AtomicUsize::new(0) } + } + + /// If not locked, returns the current stamp. + /// + /// This method should be called before optimistic reads. + #[inline] + pub(super) fn optimistic_read(&self) -> Option<(usize, usize)> { + // The acquire loads from `state_hi` and `state_lo` synchronize with the release stores in + // `SeqLockWriteGuard::drop` and `SeqLockWriteGuard::abort`. + // + // As a consequence, we can make sure that (1) all writes within the era of `state_hi - 1` + // happens before now; and therefore, (2) if `state_lo` is even, all writes within the + // critical section of (`state_hi`, `state_lo`) happens before now. + let state_hi = self.state_hi.load(Ordering::Acquire); + let state_lo = self.state_lo.load(Ordering::Acquire); + if state_lo == 1 { + None + } else { + Some((state_hi, state_lo)) + } + } + + /// Returns `true` if the current stamp is equal to `stamp`. + /// + /// This method should be called after optimistic reads to check whether they are valid. The + /// argument `stamp` should correspond to the one returned by method `optimistic_read`. + #[inline] + pub(super) fn validate_read(&self, stamp: (usize, usize)) -> bool { + // Thanks to the fence, if we're noticing any modification to the data at the critical + // section of `(stamp.0, stamp.1)`, then the critical section's write of 1 to state_lo should be + // visible. + atomic::fence(Ordering::Acquire); + + // So if `state_lo` coincides with `stamp.1`, then either (1) we're noticing no modification + // to the data after the critical section of `(stamp.0, stamp.1)`, or (2) `state_lo` wrapped + // around. + // + // If (2) is the case, the acquire ordering ensures we see the new value of `state_hi`. + let state_lo = self.state_lo.load(Ordering::Acquire); + + // If (2) is the case and `state_hi` coincides with `stamp.0`, then `state_hi` also wrapped + // around, which we give up to correctly validate the read. + let state_hi = self.state_hi.load(Ordering::Relaxed); + + // Except for the case that both `state_hi` and `state_lo` wrapped around, the following + // condition implies that we're noticing no modification to the data after the critical + // section of `(stamp.0, stamp.1)`. + (state_hi, state_lo) == stamp + } + + /// Grabs the lock for writing. + #[inline] + pub(super) fn write(&self) -> SeqLockWriteGuard<'_> { + let mut backoff = Backoff::new(); + loop { + let previous = self.state_lo.swap(1, Ordering::Acquire); + + if previous != 1 { + // To synchronize with the acquire fence in `validate_read` via any modification to + // the data at the critical section of `(state_hi, previous)`. + atomic::fence(Ordering::Release); + + return SeqLockWriteGuard { lock: self, state_lo: previous }; + } + + while self.state_lo.load(Ordering::Relaxed) == 1 { + backoff.snooze(); + } + } + } +} + +/// An RAII guard that releases the lock and increments the stamp when dropped. +#[must_use] +pub(super) struct SeqLockWriteGuard<'a> { + /// The parent lock. + lock: &'a SeqLock, + + /// The stamp before locking. + state_lo: usize, +} + +impl SeqLockWriteGuard<'_> { + /// Releases the lock without incrementing the stamp. + #[inline] + pub(super) fn abort(self) { + // We specifically don't want to call drop(), since that's + // what increments the stamp. + let this = ManuallyDrop::new(self); + + // Restore the stamp. + // + // Release ordering for synchronizing with `optimistic_read`. + this.lock.state_lo.store(this.state_lo, Ordering::Release); + } +} + +impl Drop for SeqLockWriteGuard<'_> { + #[inline] + fn drop(&mut self) { + let state_lo = self.state_lo.wrapping_add(2); + + // Increase the high bits if the low bits wrap around. + // + // Release ordering for synchronizing with `optimistic_read`. + if state_lo == 0 { + let state_hi = self.lock.state_hi.load(Ordering::Relaxed); + self.lock.state_hi.store(state_hi.wrapping_add(1), Ordering::Release); + } + + // Release the lock and increment the stamp. + // + // Release ordering for synchronizing with `optimistic_read`. + self.lock.state_lo.store(state_lo, Ordering::Release); + } +} + +#[cfg(test)] +mod tests { + use super::SeqLock; + + #[test] + fn smoke() { + let lock = SeqLock::new(); + let before = lock.optimistic_read().unwrap(); + assert!(lock.validate_read(before)); + { + let _guard = lock.write(); + } + assert!(!lock.validate_read(before)); + let after = lock.optimistic_read().unwrap(); + assert_ne!(before, after); + } + + #[test] + fn test_abort() { + let lock = SeqLock::new(); + let before = lock.optimistic_read().unwrap(); + { + let guard = lock.write(); + guard.abort(); + } + let after = lock.optimistic_read().unwrap(); + assert_eq!(before, after, "aborted write does not update the stamp"); + } +} diff --git a/vendor/portable-atomic/src/imp/fallback/utils.rs b/vendor/portable-atomic/src/imp/fallback/utils.rs new file mode 100644 index 000000000..c78c625b0 --- /dev/null +++ b/vendor/portable-atomic/src/imp/fallback/utils.rs @@ -0,0 +1,141 @@ +use core::ops; + +// TODO: mips32r6, mips64r6 +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/d49a0f8454499ced8af0b61aeb661379c4eb0588/crossbeam-utils/src/cache_padded.rs. +/// Pads and aligns a value to the length of a cache line. +// Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache +// lines at a time, so we have to align to 128 bytes rather than 64. +// +// Sources: +// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf +// - https://github.com/facebook/folly/blob/1b5288e6eea6df074758f877c849b6e73bbb9fbb/folly/lang/Align.h#L107 +// +// ARM's big.LITTLE architecture has asymmetric cores and "big" cores have 128-byte cache line size. +// +// Sources: +// - https://www.mono-project.com/news/2016/09/12/arm64-icache/ +// +// powerpc64 has 128-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9 +#[cfg_attr( + any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64"), + repr(align(128)) +)] +// arm, mips, mips64, riscv64, sparc, and hexagon have 32-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_riscv64.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12 +// +// riscv32 is assumed not to exceed the cache line size of riscv64. +#[cfg_attr( + any( + target_arch = "arm", + target_arch = "mips", + target_arch = "mips64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "sparc", + target_arch = "hexagon", + ), + repr(align(32)) +)] +// m68k has 16-byte cache line size. +// +// Sources: +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9 +#[cfg_attr(target_arch = "m68k", repr(align(16)))] +// s390x has 256-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13 +#[cfg_attr(target_arch = "s390x", repr(align(256)))] +// x86, wasm, and sparc64 have 64-byte cache line size. +// +// Sources: +// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9 +// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19 +// +// All others are assumed to have 64-byte cache line size. +#[cfg_attr( + not(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "powerpc64", + target_arch = "arm", + target_arch = "mips", + target_arch = "mips64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "sparc", + target_arch = "hexagon", + target_arch = "m68k", + target_arch = "s390x", + )), + repr(align(64)) +)] +pub(crate) struct CachePadded<T> { + value: T, +} + +impl<T> CachePadded<T> { + #[inline] + pub(crate) const fn new(value: T) -> Self { + Self { value } + } +} + +impl<T> ops::Deref for CachePadded<T> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + &self.value + } +} + +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/backoff.rs. +// Adjusted to reduce spinning. +/// Performs exponential backoff in spin loops. +pub(crate) struct Backoff { + step: u32, +} + +// https://github.com/oneapi-src/oneTBB/blob/v2021.5.0/include/oneapi/tbb/detail/_utils.h#L46-L48 +const SPIN_LIMIT: u32 = 4; + +impl Backoff { + #[inline] + pub(crate) const fn new() -> Self { + Self { step: 0 } + } + + #[inline] + pub(crate) fn snooze(&mut self) { + if self.step <= SPIN_LIMIT { + for _ in 0..1 << self.step { + #[allow(deprecated)] + core::sync::atomic::spin_loop_hint(); + } + self.step += 1; + } else { + #[cfg(not(feature = "std"))] + for _ in 0..1 << self.step { + #[allow(deprecated)] + core::sync::atomic::spin_loop_hint(); + } + + #[cfg(feature = "std")] + std::thread::yield_now(); + } + } +} diff --git a/vendor/portable-atomic/src/imp/float.rs b/vendor/portable-atomic/src/imp/float.rs new file mode 100644 index 000000000..6d6ac4b07 --- /dev/null +++ b/vendor/portable-atomic/src/imp/float.rs @@ -0,0 +1,216 @@ +// AtomicF{32,64} implementation based on AtomicU{32,64}. +// +// This module provides atomic float implementations using atomic integer. +// +// Note that most of `fetch_*` operations of atomic floats are implemented using +// CAS loops, which can be slower than equivalent operations of atomic integers. +// +// GPU targets have atomic instructions for float, so GPU targets will use +// architecture-specific implementations instead of this implementation in the +// future: https://github.com/taiki-e/portable-atomic/issues/34 + +#![cfg_attr( + all(target_pointer_width = "16", not(feature = "fallback")), + allow(unused_imports, unused_macros) +)] + +use core::{cell::UnsafeCell, sync::atomic::Ordering}; + +macro_rules! atomic_float { + ( + $atomic_type:ident, + $float_type:ident, + $atomic_int_type:ident, + $int_type:ident, + $align:literal + ) => { + #[repr(C, align($align))] + pub(crate) struct $atomic_type { + v: UnsafeCell<$float_type>, + } + + // Send is implicitly implemented. + // SAFETY: any data races are prevented by atomic operations. + unsafe impl Sync for $atomic_type {} + + impl $atomic_type { + #[inline] + pub(crate) const fn new(v: $float_type) -> Self { + Self { v: UnsafeCell::new(v) } + } + + #[inline] + pub(crate) fn is_lock_free() -> bool { + crate::$atomic_int_type::is_lock_free() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + crate::$atomic_int_type::is_always_lock_free() + } + + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $float_type { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.v.get() } + } + + #[inline] + pub(crate) fn into_inner(self) -> $float_type { + self.v.into_inner() + } + + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn load(&self, order: Ordering) -> $float_type { + $float_type::from_bits(self.as_bits().load(order)) + } + + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn store(&self, val: $float_type, order: Ordering) { + self.as_bits().store(val.to_bits(), order) + } + + const_fn! { + const_if: #[cfg(not(portable_atomic_no_const_raw_ptr_deref))]; + #[inline] + pub(crate) const fn as_bits(&self) -> &crate::$atomic_int_type { + // SAFETY: $atomic_type and $atomic_int_type have the same layout, + // and there is no concurrent access to the value that does not go through this method. + unsafe { &*(self as *const Self as *const crate::$atomic_int_type) } + } + } + + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut $float_type { + self.v.get() + } + } + + cfg_has_atomic_cas! { + impl $atomic_type { + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn swap(&self, val: $float_type, order: Ordering) -> $float_type { + $float_type::from_bits(self.as_bits().swap(val.to_bits(), order)) + } + + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn compare_exchange( + &self, + current: $float_type, + new: $float_type, + success: Ordering, + failure: Ordering, + ) -> Result<$float_type, $float_type> { + match self.as_bits().compare_exchange( + current.to_bits(), + new.to_bits(), + success, + failure, + ) { + Ok(v) => Ok($float_type::from_bits(v)), + Err(v) => Err($float_type::from_bits(v)), + } + } + + #[inline] + #[cfg_attr( + any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri), + track_caller + )] + pub(crate) fn compare_exchange_weak( + &self, + current: $float_type, + new: $float_type, + success: Ordering, + failure: Ordering, + ) -> Result<$float_type, $float_type> { + match self.as_bits().compare_exchange_weak( + current.to_bits(), + new.to_bits(), + success, + failure, + ) { + Ok(v) => Ok($float_type::from_bits(v)), + Err(v) => Err($float_type::from_bits(v)), + } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_add(&self, val: $float_type, order: Ordering) -> $float_type { + self.fetch_update_(order, |x| x + val) + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_sub(&self, val: $float_type, order: Ordering) -> $float_type { + self.fetch_update_(order, |x| x - val) + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + fn fetch_update_<F>(&self, order: Ordering, mut f: F) -> $float_type + where + F: FnMut($float_type) -> $float_type, + { + // This is a private function and all instances of `f` only operate on the value + // loaded, so there is no need to synchronize the first load/failed CAS. + let mut prev = self.load(Ordering::Relaxed); + loop { + let next = f(prev); + match self.compare_exchange_weak(prev, next, order, Ordering::Relaxed) { + Ok(x) => return x, + Err(next_prev) => prev = next_prev, + } + } + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_max(&self, val: $float_type, order: Ordering) -> $float_type { + self.fetch_update_(order, |x| x.max(val)) + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_min(&self, val: $float_type, order: Ordering) -> $float_type { + self.fetch_update_(order, |x| x.min(val)) + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_neg(&self, order: Ordering) -> $float_type { + const NEG_MASK: $int_type = !0 / 2 + 1; + $float_type::from_bits(self.as_bits().fetch_xor(NEG_MASK, order)) + } + + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + pub(crate) fn fetch_abs(&self, order: Ordering) -> $float_type { + const ABS_MASK: $int_type = !0 / 2; + $float_type::from_bits(self.as_bits().fetch_and(ABS_MASK, order)) + } + } + } // cfg_has_atomic_cas! + }; +} + +cfg_has_atomic_32! { + atomic_float!(AtomicF32, f32, AtomicU32, u32, 4); +} +cfg_has_atomic_64! { + atomic_float!(AtomicF64, f64, AtomicU64, u64, 8); +} diff --git a/vendor/portable-atomic/src/imp/interrupt/README.md b/vendor/portable-atomic/src/imp/interrupt/README.md new file mode 100644 index 000000000..32c202a8d --- /dev/null +++ b/vendor/portable-atomic/src/imp/interrupt/README.md @@ -0,0 +1,26 @@ +# Implementation of disabling interrupts + +This module is used to provide atomic CAS for targets where atomic CAS is not available in the standard library. + +- On MSP430 and AVR, they are always single-core, so this module is always used. +- On ARMv6-M (thumbv6m), pre-v6 ARM (e.g., thumbv4t, thumbv5te), RISC-V without A-extension, and Xtensa, they could be multi-core, so this module is used when the `unsafe-assume-single-core` feature is enabled. + +The implementation uses privileged instructions to disable interrupts, so it usually doesn't work on unprivileged mode. +Enabling this feature in an environment where privileged instructions are not available, or if the instructions used are not sufficient to disable interrupts in the system, it is also usually considered **unsound**, although the details are system-dependent. + +Consider using the [`critical-section` feature](../../../README.md#optional-features-critical-section) for systems that cannot use the `unsafe-assume-single-core` feature. + +For some targets, the implementation can be changed by explicitly enabling features. + +- On ARMv6-M, this disables interrupts by modifying the PRIMASK register. +- On pre-v6 ARM, this disables interrupts by modifying the I (IRQ mask) bit of the CPSR. +- On pre-v6 ARM with the `disable-fiq` feature, this disables interrupts by modifying the I (IRQ mask) bit and F (FIQ mask) bit of the CPSR. +- On RISC-V (without A-extension), this disables interrupts by modifying the MIE (Machine Interrupt Enable) bit of the `mstatus` register. +- On RISC-V (without A-extension) with the `s-mode` feature, this disables interrupts by modifying the SIE (Supervisor Interrupt Enable) bit of the `sstatus` register. +- On MSP430, this disables interrupts by modifying the GIE (Global Interrupt Enable) bit of the status register (SR). +- On AVR, this disables interrupts by modifying the I (Global Interrupt Enable) bit of the status register (SREG). +- On Xtensa, this disables interrupts by modifying the PS special register. + +Some operations don't require disabling interrupts (loads and stores on targets except for AVR, but additionally on MSP430 `add`, `sub`, `and`, `or`, `xor`, `not`). However, when the `critical-section` feature is enabled, critical sections are taken for all atomic operations. + +Feel free to submit an issue if your target is not supported yet. diff --git a/vendor/portable-atomic/src/imp/interrupt/armv4t.rs b/vendor/portable-atomic/src/imp/interrupt/armv4t.rs new file mode 100644 index 000000000..85c7ec1b5 --- /dev/null +++ b/vendor/portable-atomic/src/imp/interrupt/armv4t.rs @@ -0,0 +1,152 @@ +// Refs: https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/The-System-Level-Programmers--Model/ARM-processor-modes-and-ARM-core-registers/Program-Status-Registers--PSRs-?lang=en +// +// Generated asm: +// - armv5te https://godbolt.org/z/5arYrfzYc + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; + +#[cfg(not(portable_atomic_disable_fiq))] +macro_rules! if_disable_fiq { + ($tt:tt) => { + "" + }; +} +#[cfg(portable_atomic_disable_fiq)] +macro_rules! if_disable_fiq { + ($tt:tt) => { + $tt + }; +} + +pub(super) type State = u32; + +/// Disables interrupts and returns the previous interrupt state. +#[inline] +#[instruction_set(arm::a32)] +pub(super) fn disable() -> State { + let cpsr: State; + // SAFETY: reading CPSR and disabling interrupts are safe. + // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions) + unsafe { + // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled. + asm!( + "mrs {prev}, cpsr", + "orr {new}, {prev}, 0x80", // I (IRQ mask) bit (1 << 7) + // We disable only IRQs by default. See also https://github.com/taiki-e/portable-atomic/pull/28#issuecomment-1214146912. + if_disable_fiq!("orr {new}, {new}, 0x40"), // F (FIQ mask) bit (1 << 6) + "msr cpsr_c, {new}", + prev = out(reg) cpsr, + new = out(reg) _, + options(nostack, preserves_flags), + ); + } + cpsr +} + +/// Restores the previous interrupt state. +/// +/// # Safety +/// +/// The state must be the one retrieved by the previous `disable`. +#[inline] +#[instruction_set(arm::a32)] +pub(super) unsafe fn restore(cpsr: State) { + // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, + unsafe { + // This clobbers the entire CPSR. See msp430.rs to safety on this. + // + // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. + asm!("msr cpsr_c, {0}", in(reg) cpsr, options(nostack)); + } +} + +// On pre-v6 ARM, we cannot use core::sync::atomic here because they call the +// `__sync_*` builtins for non-relaxed load/store (because pre-v6 ARM doesn't +// have Data Memory Barrier). +// +// Generated asm: +// - armv5te https://godbolt.org/z/a7zcs9hKa +pub(crate) mod atomic { + #[cfg(not(portable_atomic_no_asm))] + use core::arch::asm; + use core::{cell::UnsafeCell, sync::atomic::Ordering}; + + macro_rules! atomic { + ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => { + #[repr(transparent)] + pub(crate) struct $atomic_type $(<$($generics)*>)? { + v: UnsafeCell<$value_type>, + } + + // Send is implicitly implemented for atomic integers, but not for atomic pointers. + // SAFETY: any data races are prevented by atomic operations. + unsafe impl $(<$($generics)*>)? Send for $atomic_type $(<$($generics)*>)? {} + // SAFETY: any data races are prevented by atomic operations. + unsafe impl $(<$($generics)*>)? Sync for $atomic_type $(<$($generics)*>)? {} + + impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? { + #[inline] + pub(crate) fn load(&self, order: Ordering) -> $value_type { + let src = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + let out; + match order { + Ordering::Relaxed => { + asm!( + concat!("ldr", $asm_suffix, " {out}, [{src}]"), + src = in(reg) src, + out = lateout(reg) out, + options(nostack, preserves_flags, readonly), + ); + } + Ordering::Acquire | Ordering::SeqCst => { + // inline asm without nomem/readonly implies compiler fence. + // And compiler fence is fine because the user explicitly declares that + // the system is single-core by using an unsafe cfg. + asm!( + concat!("ldr", $asm_suffix, " {out}, [{src}]"), + src = in(reg) src, + out = lateout(reg) out, + options(nostack, preserves_flags), + ); + } + _ => unreachable!("{:?}", order), + } + out + } + } + + #[inline] + pub(crate) fn store(&self, val: $value_type, _order: Ordering) { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + // inline asm without nomem/readonly implies compiler fence. + // And compiler fence is fine because the user explicitly declares that + // the system is single-core by using an unsafe cfg. + asm!( + concat!("str", $asm_suffix, " {val}, [{dst}]"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack, preserves_flags), + ); + } + } + } + }; + } + + atomic!(AtomicI8, i8, "b"); + atomic!(AtomicU8, u8, "b"); + atomic!(AtomicI16, i16, "h"); + atomic!(AtomicU16, u16, "h"); + atomic!(AtomicI32, i32, ""); + atomic!(AtomicU32, u32, ""); + atomic!(AtomicIsize, isize, ""); + atomic!(AtomicUsize, usize, ""); + atomic!([T] AtomicPtr, *mut T, ""); +} diff --git a/vendor/portable-atomic/src/imp/interrupt/armv6m.rs b/vendor/portable-atomic/src/imp/interrupt/armv6m.rs new file mode 100644 index 000000000..00413128c --- /dev/null +++ b/vendor/portable-atomic/src/imp/interrupt/armv6m.rs @@ -0,0 +1,46 @@ +// Adapted from https://github.com/rust-embedded/cortex-m. +// +// Generated asm: +// - armv6-m https://godbolt.org/z/sTezYnaj9 + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; + +pub(super) use core::sync::atomic; + +pub(super) type State = u32; + +/// Disables interrupts and returns the previous interrupt state. +#[inline] +pub(super) fn disable() -> State { + let r: State; + // SAFETY: reading the priority mask register and disabling interrupts are safe. + // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions) + unsafe { + // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled. + asm!( + "mrs {0}, PRIMASK", + "cpsid i", + out(reg) r, + options(nostack, preserves_flags), + ); + } + r +} + +/// Restores the previous interrupt state. +/// +/// # Safety +/// +/// The state must be the one retrieved by the previous `disable`. +#[inline] +pub(super) unsafe fn restore(r: State) { + if r & 0x1 == 0 { + // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, + // and we've checked that interrupts were enabled before disabling interrupts. + unsafe { + // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. + asm!("cpsie i", options(nostack, preserves_flags)); + } + } +} diff --git a/vendor/portable-atomic/src/imp/interrupt/avr.rs b/vendor/portable-atomic/src/imp/interrupt/avr.rs new file mode 100644 index 000000000..7cc48c62e --- /dev/null +++ b/vendor/portable-atomic/src/imp/interrupt/avr.rs @@ -0,0 +1,52 @@ +// Adapted from https://github.com/Rahix/avr-device. + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; + +pub(super) type State = u8; + +/// Disables interrupts and returns the previous interrupt state. +#[inline] +pub(super) fn disable() -> State { + let sreg: State; + // SAFETY: reading the status register (SREG) and disabling interrupts are safe. + // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions) + unsafe { + // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled. + // Do not use `preserves_flags` because CLI modifies the I bit of the status register (SREG). + // Refs: https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf#page=58 + #[cfg(not(portable_atomic_no_asm))] + asm!( + "in {0}, 0x3F", + "cli", + out(reg) sreg, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + { + llvm_asm!("in $0, 0x3F" : "=r"(sreg) ::: "volatile"); + llvm_asm!("cli" ::: "memory" : "volatile"); + } + } + sreg +} + +/// Restores the previous interrupt state. +/// +/// # Safety +/// +/// The state must be the one retrieved by the previous `disable`. +#[inline] +pub(super) unsafe fn restore(sreg: State) { + // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, + unsafe { + // This clobbers the entire status register. See msp430.rs to safety on this. + // + // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. + // Do not use `preserves_flags` because OUT modifies the status register (SREG). + #[cfg(not(portable_atomic_no_asm))] + asm!("out 0x3F, {0}", in(reg) sreg, options(nostack)); + #[cfg(portable_atomic_no_asm)] + llvm_asm!("out 0x3F, $0" :: "r"(sreg) : "memory" : "volatile"); + } +} diff --git a/vendor/portable-atomic/src/imp/interrupt/mod.rs b/vendor/portable-atomic/src/imp/interrupt/mod.rs new file mode 100644 index 000000000..a0ead68a6 --- /dev/null +++ b/vendor/portable-atomic/src/imp/interrupt/mod.rs @@ -0,0 +1,630 @@ +// Critical section based fallback implementations +// +// This module supports two different critical section implementations: +// - Built-in "disable all interrupts". +// - Call into the `critical-section` crate (which allows the user to plug any implementation). +// +// The `critical-section`-based fallback is enabled when the user asks for it with the `critical-section` +// Cargo feature. +// +// The "disable interrupts" fallback is not sound on multi-core systems. +// Also, this uses privileged instructions to disable interrupts, so it usually +// doesn't work on unprivileged mode. Using this fallback in an environment where privileged +// instructions are not available is also usually considered **unsound**, +// although the details are system-dependent. +// +// Therefore, this implementation will only be enabled in one of the following cases: +// +// - When the user explicitly declares that the system is single-core and that +// privileged instructions are available using an unsafe cfg. +// - When we can safely assume that the system is single-core and that +// privileged instructions are available on the system. +// +// AVR, which is single core[^avr1] and LLVM also generates code that disables +// interrupts [^avr2] in atomic ops by default, is considered the latter. +// MSP430 as well. +// +// See also README.md of this directory. +// +// [^avr1]: https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#LL963 +// [^avr2]: https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/test/CodeGen/AVR/atomics/load16.ll#L5 + +// On some platforms, atomic load/store can be implemented in a more efficient +// way than disabling interrupts. On MSP430, some RMWs that do not return the +// previous value can also be optimized. +// +// Note: On single-core systems, it is okay to use critical session-based +// CAS together with atomic load/store. The load/store will not be +// called while interrupts are disabled, and since the load/store is +// atomic, it is not affected by interrupts even if interrupts are enabled. +#[cfg(not(any(target_arch = "avr", feature = "critical-section")))] +use arch::atomic; + +#[cfg(not(feature = "critical-section"))] +#[cfg_attr( + all( + target_arch = "arm", + any(target_feature = "mclass", portable_atomic_target_feature = "mclass"), + ), + path = "armv6m.rs" +)] +#[cfg_attr( + all( + target_arch = "arm", + not(any(target_feature = "mclass", portable_atomic_target_feature = "mclass")), + ), + path = "armv4t.rs" +)] +#[cfg_attr(target_arch = "avr", path = "avr.rs")] +#[cfg_attr(target_arch = "msp430", path = "msp430.rs")] +#[cfg_attr(any(target_arch = "riscv32", target_arch = "riscv64"), path = "riscv.rs")] +#[cfg_attr(target_arch = "xtensa", path = "xtensa.rs")] +mod arch; + +use core::{cell::UnsafeCell, sync::atomic::Ordering}; + +// Critical section implementations might use locks internally. +#[cfg(feature = "critical-section")] +const IS_ALWAYS_LOCK_FREE: bool = false; + +// Consider atomic operations based on disabling interrupts on single-core +// systems are lock-free. (We consider the pre-v6 ARM Linux's atomic operations +// provided in a similar way by the Linux kernel to be lock-free.) +#[cfg(not(feature = "critical-section"))] +const IS_ALWAYS_LOCK_FREE: bool = true; + +#[cfg(feature = "critical-section")] +#[inline] +fn with<F, R>(f: F) -> R +where + F: FnOnce() -> R, +{ + critical_section::with(|_| f()) +} + +#[cfg(not(feature = "critical-section"))] +#[inline] +fn with<F, R>(f: F) -> R +where + F: FnOnce() -> R, +{ + // Get current interrupt state and disable interrupts + let state = arch::disable(); + + let r = f(); + + // Restore interrupt state + // SAFETY: the state was retrieved by the previous `disable`. + unsafe { arch::restore(state) } + + r +} + +#[cfg_attr(target_pointer_width = "16", repr(C, align(2)))] +#[cfg_attr(target_pointer_width = "32", repr(C, align(4)))] +#[cfg_attr(target_pointer_width = "64", repr(C, align(8)))] +#[cfg_attr(target_pointer_width = "128", repr(C, align(16)))] +pub(crate) struct AtomicPtr<T> { + p: UnsafeCell<*mut T>, +} + +// SAFETY: any data races are prevented by disabling interrupts or +// atomic intrinsics (see module-level comments). +unsafe impl<T> Send for AtomicPtr<T> {} +// SAFETY: any data races are prevented by disabling interrupts or +// atomic intrinsics (see module-level comments). +unsafe impl<T> Sync for AtomicPtr<T> {} + +impl<T> AtomicPtr<T> { + #[inline] + pub(crate) const fn new(p: *mut T) -> Self { + Self { p: UnsafeCell::new(p) } + } + + #[inline] + pub(crate) fn is_lock_free() -> bool { + Self::is_always_lock_free() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + IS_ALWAYS_LOCK_FREE + } + + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut *mut T { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.p.get() } + } + + #[inline] + pub(crate) fn into_inner(self) -> *mut T { + self.p.into_inner() + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn load(&self, order: Ordering) -> *mut T { + crate::utils::assert_load_ordering(order); + #[cfg(not(any(target_arch = "avr", feature = "critical-section")))] + { + self.as_native().load(order) + } + #[cfg(any(target_arch = "avr", feature = "critical-section"))] + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { self.p.get().read() }) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn store(&self, ptr: *mut T, order: Ordering) { + crate::utils::assert_store_ordering(order); + #[cfg(not(any(target_arch = "avr", feature = "critical-section")))] + { + self.as_native().store(ptr, order); + } + #[cfg(any(target_arch = "avr", feature = "critical-section"))] + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { self.p.get().write(ptr) }); + } + + #[inline] + pub(crate) fn swap(&self, ptr: *mut T, _order: Ordering) -> *mut T { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { self.p.get().replace(ptr) }) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange( + &self, + current: *mut T, + new: *mut T, + success: Ordering, + failure: Ordering, + ) -> Result<*mut T, *mut T> { + crate::utils::assert_compare_exchange_ordering(success, failure); + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.p.get().read(); + if result == current { + self.p.get().write(new); + Ok(result) + } else { + Err(result) + } + }) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange_weak( + &self, + current: *mut T, + new: *mut T, + success: Ordering, + failure: Ordering, + ) -> Result<*mut T, *mut T> { + self.compare_exchange(current, new, success, failure) + } + + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut *mut T { + self.p.get() + } + + #[cfg(not(any(target_arch = "avr", feature = "critical-section")))] + #[inline] + fn as_native(&self) -> &atomic::AtomicPtr<T> { + // SAFETY: AtomicPtr and atomic::AtomicPtr have the same layout and + // guarantee atomicity in a compatible way. (see module-level comments) + unsafe { &*(self as *const Self as *const atomic::AtomicPtr<T>) } + } +} + +macro_rules! atomic_int { + (base, $atomic_type:ident, $int_type:ident, $align:literal) => { + #[repr(C, align($align))] + pub(crate) struct $atomic_type { + v: UnsafeCell<$int_type>, + } + + // Send is implicitly implemented. + // SAFETY: any data races are prevented by disabling interrupts or + // atomic intrinsics (see module-level comments). + unsafe impl Sync for $atomic_type {} + + impl $atomic_type { + #[inline] + pub(crate) const fn new(v: $int_type) -> Self { + Self { v: UnsafeCell::new(v) } + } + + #[inline] + pub(crate) fn is_lock_free() -> bool { + Self::is_always_lock_free() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + IS_ALWAYS_LOCK_FREE + } + + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $int_type { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.v.get() } + } + + #[inline] + pub(crate) fn into_inner(self) -> $int_type { + self.v.into_inner() + } + + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut $int_type { + self.v.get() + } + } + }; + (load_store_atomic, $atomic_type:ident, $int_type:ident, $align:literal) => { + atomic_int!(base, $atomic_type, $int_type, $align); + atomic_int!(cas, $atomic_type, $int_type); + impl $atomic_type { + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn load(&self, order: Ordering) -> $int_type { + crate::utils::assert_load_ordering(order); + #[cfg(not(any(target_arch = "avr", feature = "critical-section")))] + { + self.as_native().load(order) + } + #[cfg(any(target_arch = "avr", feature = "critical-section"))] + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { self.v.get().read() }) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn store(&self, val: $int_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + #[cfg(not(any(target_arch = "avr", feature = "critical-section")))] + { + self.as_native().store(val, order); + } + #[cfg(any(target_arch = "avr", feature = "critical-section"))] + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { self.v.get().write(val) }); + } + + #[cfg(not(any(target_arch = "avr", feature = "critical-section")))] + #[inline] + fn as_native(&self) -> &atomic::$atomic_type { + // SAFETY: $atomic_type and atomic::$atomic_type have the same layout and + // guarantee atomicity in a compatible way. (see module-level comments) + unsafe { &*(self as *const Self as *const atomic::$atomic_type) } + } + } + + #[cfg(not(all(target_arch = "msp430", not(feature = "critical-section"))))] + impl_default_no_fetch_ops!($atomic_type, $int_type); + impl_default_bit_opts!($atomic_type, $int_type); + #[cfg(not(all(target_arch = "msp430", not(feature = "critical-section"))))] + impl $atomic_type { + #[inline] + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + } + #[cfg(all(target_arch = "msp430", not(feature = "critical-section")))] + impl $atomic_type { + #[inline] + pub(crate) fn add(&self, val: $int_type, order: Ordering) { + self.as_native().add(val, order); + } + #[inline] + pub(crate) fn sub(&self, val: $int_type, order: Ordering) { + self.as_native().sub(val, order); + } + #[inline] + pub(crate) fn and(&self, val: $int_type, order: Ordering) { + self.as_native().and(val, order); + } + #[inline] + pub(crate) fn or(&self, val: $int_type, order: Ordering) { + self.as_native().or(val, order); + } + #[inline] + pub(crate) fn xor(&self, val: $int_type, order: Ordering) { + self.as_native().xor(val, order); + } + #[inline] + pub(crate) fn not(&self, order: Ordering) { + self.as_native().not(order); + } + } + }; + (load_store_critical_session, $atomic_type:ident, $int_type:ident, $align:literal) => { + atomic_int!(base, $atomic_type, $int_type, $align); + atomic_int!(cas, $atomic_type, $int_type); + impl_default_no_fetch_ops!($atomic_type, $int_type); + impl_default_bit_opts!($atomic_type, $int_type); + impl $atomic_type { + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn load(&self, order: Ordering) -> $int_type { + crate::utils::assert_load_ordering(order); + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { self.v.get().read() }) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn store(&self, val: $int_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { self.v.get().write(val) }); + } + + #[inline] + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + } + }; + (cas, $atomic_type:ident, $int_type:ident) => { + impl $atomic_type { + #[inline] + pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { self.v.get().replace(val) }) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + if result == current { + self.v.get().write(new); + Ok(result) + } else { + Err(result) + } + }) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange_weak( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + self.compare_exchange(current, new, success, failure) + } + + #[inline] + pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(result.wrapping_add(val)); + result + }) + } + + #[inline] + pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(result.wrapping_sub(val)); + result + }) + } + + #[inline] + pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(result & val); + result + }) + } + + #[inline] + pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(!(result & val)); + result + }) + } + + #[inline] + pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(result | val); + result + }) + } + + #[inline] + pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(result ^ val); + result + }) + } + + #[inline] + pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(core::cmp::max(result, val)); + result + }) + } + + #[inline] + pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(core::cmp::min(result, val)); + result + }) + } + + #[inline] + pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(!result); + result + }) + } + + #[inline] + pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let result = self.v.get().read(); + self.v.get().write(result.wrapping_neg()); + result + }) + } + #[inline] + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } + } + }; +} + +#[cfg(target_pointer_width = "16")] +atomic_int!(load_store_atomic, AtomicIsize, isize, 2); +#[cfg(target_pointer_width = "16")] +atomic_int!(load_store_atomic, AtomicUsize, usize, 2); +#[cfg(target_pointer_width = "32")] +atomic_int!(load_store_atomic, AtomicIsize, isize, 4); +#[cfg(target_pointer_width = "32")] +atomic_int!(load_store_atomic, AtomicUsize, usize, 4); +#[cfg(target_pointer_width = "64")] +atomic_int!(load_store_atomic, AtomicIsize, isize, 8); +#[cfg(target_pointer_width = "64")] +atomic_int!(load_store_atomic, AtomicUsize, usize, 8); +#[cfg(target_pointer_width = "128")] +atomic_int!(load_store_atomic, AtomicIsize, isize, 16); +#[cfg(target_pointer_width = "128")] +atomic_int!(load_store_atomic, AtomicUsize, usize, 16); + +atomic_int!(load_store_atomic, AtomicI8, i8, 1); +atomic_int!(load_store_atomic, AtomicU8, u8, 1); +atomic_int!(load_store_atomic, AtomicI16, i16, 2); +atomic_int!(load_store_atomic, AtomicU16, u16, 2); + +#[cfg(not(target_pointer_width = "16"))] +atomic_int!(load_store_atomic, AtomicI32, i32, 4); +#[cfg(not(target_pointer_width = "16"))] +atomic_int!(load_store_atomic, AtomicU32, u32, 4); +#[cfg(target_pointer_width = "16")] +#[cfg(any(test, feature = "fallback"))] +atomic_int!(load_store_critical_session, AtomicI32, i32, 4); +#[cfg(target_pointer_width = "16")] +#[cfg(any(test, feature = "fallback"))] +atomic_int!(load_store_critical_session, AtomicU32, u32, 4); + +#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))] +atomic_int!(load_store_atomic, AtomicI64, i64, 8); +#[cfg(not(any(target_pointer_width = "16", target_pointer_width = "32")))] +atomic_int!(load_store_atomic, AtomicU64, u64, 8); +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] +#[cfg(any(test, feature = "fallback"))] +atomic_int!(load_store_critical_session, AtomicI64, i64, 8); +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32"))] +#[cfg(any(test, feature = "fallback"))] +atomic_int!(load_store_critical_session, AtomicU64, u64, 8); + +#[cfg(any(test, feature = "fallback"))] +atomic_int!(load_store_critical_session, AtomicI128, i128, 16); +#[cfg(any(test, feature = "fallback"))] +atomic_int!(load_store_critical_session, AtomicU128, u128, 16); + +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_ptr_single_thread!(); + test_atomic_int_single_thread!(i8); + test_atomic_int_single_thread!(u8); + test_atomic_int_single_thread!(i16); + test_atomic_int_single_thread!(u16); + test_atomic_int_single_thread!(i32); + test_atomic_int_single_thread!(u32); + test_atomic_int_single_thread!(i64); + test_atomic_int_single_thread!(u64); + test_atomic_int_single_thread!(i128); + test_atomic_int_single_thread!(u128); + test_atomic_int_single_thread!(isize); + test_atomic_int_single_thread!(usize); +} diff --git a/vendor/portable-atomic/src/imp/interrupt/msp430.rs b/vendor/portable-atomic/src/imp/interrupt/msp430.rs new file mode 100644 index 000000000..020ed1023 --- /dev/null +++ b/vendor/portable-atomic/src/imp/interrupt/msp430.rs @@ -0,0 +1,61 @@ +// Adapted from https://github.com/rust-embedded/msp430. +// +// See also src/imp/msp430.rs. + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; + +pub(super) use super::super::msp430 as atomic; + +pub(super) type State = u16; + +/// Disables interrupts and returns the previous interrupt state. +#[inline] +pub(super) fn disable() -> State { + let r: State; + // SAFETY: reading the status register and disabling interrupts are safe. + // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions) + unsafe { + // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled. + // Do not use `preserves_flags` because DINT modifies the GIE (global interrupt enable) bit of the status register. + // Refs: https://mspgcc.sourceforge.net/manual/x951.html + #[cfg(not(portable_atomic_no_asm))] + asm!( + "mov R2, {0}", + "dint {{ nop", + out(reg) r, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + { + llvm_asm!("mov R2, $0" : "=r"(r) ::: "volatile"); + llvm_asm!("dint { nop" ::: "memory" : "volatile"); + } + } + r +} + +/// Restores the previous interrupt state. +/// +/// # Safety +/// +/// The state must be the one retrieved by the previous `disable`. +#[inline] +pub(super) unsafe fn restore(r: State) { + // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, + unsafe { + // This clobbers the entire status register, but we never explicitly modify + // flags within a critical session, and the only flags that may be changed + // within a critical session are the arithmetic flags that are changed as + // a side effect of arithmetic operations, etc., which LLVM recognizes, + // so it is safe to clobber them here. + // See also the discussion at https://github.com/taiki-e/portable-atomic/pull/40. + // + // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. + // Do not use `preserves_flags` because MOV modifies the status register. + #[cfg(not(portable_atomic_no_asm))] + asm!("nop {{ mov {0}, R2 {{ nop", in(reg) r, options(nostack)); + #[cfg(portable_atomic_no_asm)] + llvm_asm!("nop { mov $0, R2 { nop" :: "r"(r) : "memory" : "volatile"); + } +} diff --git a/vendor/portable-atomic/src/imp/interrupt/riscv.rs b/vendor/portable-atomic/src/imp/interrupt/riscv.rs new file mode 100644 index 000000000..c08545e1d --- /dev/null +++ b/vendor/portable-atomic/src/imp/interrupt/riscv.rs @@ -0,0 +1,79 @@ +// Refs: +// - https://five-embeddev.com/riscv-isa-manual/latest/machine.html#machine-status-registers-mstatus-and-mstatush +// - https://five-embeddev.com/riscv-isa-manual/latest/supervisor.html#sstatus +// +// Generated asm: +// - riscv64gc https://godbolt.org/z/a78zxf5sW + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; + +pub(super) use super::super::riscv as atomic; + +// Status register +#[cfg(not(portable_atomic_s_mode))] +macro_rules! status { + () => { + "mstatus" + }; +} +#[cfg(portable_atomic_s_mode)] +macro_rules! status { + () => { + "sstatus" + }; +} + +// MIE (Machine Interrupt Enable) bit (1 << 3) +#[cfg(not(portable_atomic_s_mode))] +const MASK: State = 0x8; +#[cfg(not(portable_atomic_s_mode))] +macro_rules! mask { + () => { + "0x8" + }; +} +// SIE (Supervisor Interrupt Enable) bit (1 << 1) +#[cfg(portable_atomic_s_mode)] +const MASK: State = 0x2; +#[cfg(portable_atomic_s_mode)] +macro_rules! mask { + () => { + "0x2" + }; +} + +#[cfg(target_arch = "riscv32")] +pub(super) type State = u32; +#[cfg(target_arch = "riscv64")] +pub(super) type State = u64; + +/// Disables interrupts and returns the previous interrupt state. +#[inline] +pub(super) fn disable() -> State { + let r: State; + // SAFETY: reading mstatus and disabling interrupts is safe. + // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions) + unsafe { + // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled. + asm!(concat!("csrrci {0}, ", status!(), ", ", mask!()), out(reg) r, options(nostack, preserves_flags)); + } + r +} + +/// Restores the previous interrupt state. +/// +/// # Safety +/// +/// The state must be the one retrieved by the previous `disable`. +#[inline] +pub(super) unsafe fn restore(r: State) { + if r & MASK != 0 { + // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, + // and we've checked that interrupts were enabled before disabling interrupts. + unsafe { + // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. + asm!(concat!("csrsi ", status!(), ", ", mask!()), options(nostack, preserves_flags)); + } + } +} diff --git a/vendor/portable-atomic/src/imp/interrupt/xtensa.rs b/vendor/portable-atomic/src/imp/interrupt/xtensa.rs new file mode 100644 index 000000000..3593c25af --- /dev/null +++ b/vendor/portable-atomic/src/imp/interrupt/xtensa.rs @@ -0,0 +1,46 @@ +// Refs: +// - Xtensa Instruction Set Architecture (ISA) Reference Manual https://0x04.net/~mwk/doc/xtensa.pdf +// - Linux kernel's Xtensa atomic implementation https://github.com/torvalds/linux/blob/v6.1/arch/xtensa/include/asm/atomic.h + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; + +pub(super) use core::sync::atomic; + +pub(super) type State = u32; + +/// Disables interrupts and returns the previous interrupt state. +#[inline] +pub(super) fn disable() -> State { + let r: State; + // SAFETY: reading the PS special register and disabling all interrupts is safe. + // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions) + unsafe { + // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled. + // Interrupt level 15 to disable all interrupts. + // SYNC after RSIL is not required. + asm!("rsil {0}, 15", out(reg) r, options(nostack)); + } + r +} + +/// Restores the previous interrupt state. +/// +/// # Safety +/// +/// The state must be the one retrieved by the previous `disable`. +#[inline] +pub(super) unsafe fn restore(r: State) { + // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, + // and we've checked that interrupts were enabled before disabling interrupts. + unsafe { + // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. + // SYNC after WSR is required to guarantee that subsequent RSIL read the written value. + asm!( + "wsr.ps {0}", + "rsync", + in(reg) r, + options(nostack), + ); + } +} diff --git a/vendor/portable-atomic/src/imp/mod.rs b/vendor/portable-atomic/src/imp/mod.rs new file mode 100644 index 000000000..3dbe8e6c2 --- /dev/null +++ b/vendor/portable-atomic/src/imp/mod.rs @@ -0,0 +1,488 @@ +// ----------------------------------------------------------------------------- +// Lock-free implementations + +#[cfg(not(any( + all( + portable_atomic_no_atomic_load_store, + not(all(target_arch = "bpf", not(feature = "critical-section"))), + ), + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430", +)))] +#[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(not(all(feature = "critical-section", portable_atomic_no_atomic_cas))) +)] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(not(all( + any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"), + not(target_has_atomic = "ptr"), + ))) +)] +mod core_atomic; + +#[cfg(any(not(portable_atomic_no_asm), portable_atomic_unstable_asm))] +#[cfg(target_arch = "aarch64")] +// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. +#[cfg_attr( + all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics), + path = "atomic128/intrinsics.rs" +)] +#[cfg_attr( + not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics)), + path = "atomic128/aarch64.rs" +)] +mod aarch64; + +#[cfg(any(not(portable_atomic_no_asm), portable_atomic_unstable_asm))] +#[cfg(target_arch = "x86_64")] +#[cfg(any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + all( + feature = "fallback", + not(portable_atomic_no_cmpxchg16b_target_feature), + not(portable_atomic_no_outline_atomics), + not(any(target_env = "sgx", miri)), + ), +))] +// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. +#[cfg_attr(any(miri, portable_atomic_sanitize_thread), path = "atomic128/intrinsics.rs")] +#[cfg_attr(not(any(miri, portable_atomic_sanitize_thread)), path = "atomic128/x86_64.rs")] +mod x86_64; + +#[cfg(portable_atomic_unstable_asm_experimental_arch)] +#[cfg(target_arch = "powerpc64")] +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + all( + feature = "fallback", + not(portable_atomic_no_outline_atomics), + any(test, portable_atomic_outline_atomics), // TODO(powerpc64): currently disabled by default + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all(target_env = "musl", not(target_feature = "crt-static")), + portable_atomic_outline_atomics, + ), + ), + target_os = "freebsd", + ), + not(any(miri, portable_atomic_sanitize_thread)), + ), +))] +// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. +#[cfg_attr( + all(any(miri, portable_atomic_sanitize_thread), portable_atomic_llvm_15), + path = "atomic128/intrinsics.rs" +)] +#[cfg_attr( + not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_llvm_15)), + path = "atomic128/powerpc64.rs" +)] +mod powerpc64; + +#[cfg(portable_atomic_unstable_asm_experimental_arch)] +#[cfg(target_arch = "s390x")] +// Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. +#[cfg_attr( + all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics), + path = "atomic128/intrinsics.rs" +)] +#[cfg_attr( + not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics)), + path = "atomic128/s390x.rs" +)] +mod s390x; + +// Miri and Sanitizer do not support inline assembly. +#[cfg(feature = "fallback")] +#[cfg(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + not(portable_atomic_no_outline_atomics), +))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))] +mod arm_linux; + +#[cfg(target_arch = "msp430")] +pub(crate) mod msp430; + +#[cfg(any(test, not(feature = "critical-section")))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_cas)))] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any(test, not(target_has_atomic = "ptr"))) +)] +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +mod riscv; + +// Miri and Sanitizer do not support inline assembly. +#[cfg(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any(target_arch = "x86", target_arch = "x86_64"), +))] +mod x86; + +// ----------------------------------------------------------------------------- +// Lock-based fallback implementations + +#[cfg(feature = "fallback")] +#[cfg(any( + test, + not(any( + all( + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "aarch64", + ), + all( + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), + target_arch = "x86_64", + ), + all( + portable_atomic_unstable_asm_experimental_arch, + any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + ), + target_arch = "powerpc64", + ), + all(portable_atomic_unstable_asm_experimental_arch, target_arch = "s390x"), + )) +))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] +mod fallback; + +// ----------------------------------------------------------------------------- +// Critical section based fallback implementations + +// On AVR, we always use critical section based fallback implementation. +// AVR can be safely assumed to be single-core, so this is sound. +// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#LL963 +// MSP430 as well. +#[cfg(any( + all(test, target_os = "none"), + portable_atomic_unsafe_assume_single_core, + feature = "critical-section", + target_arch = "avr", + target_arch = "msp430", +))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_cas)))] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any(test, not(target_has_atomic = "ptr"))) +)] +#[cfg(any( + feature = "critical-section", + target_arch = "arm", + target_arch = "avr", + target_arch = "msp430", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "xtensa", +))] +mod interrupt; + +// ----------------------------------------------------------------------------- +// Atomic float implementations + +#[cfg(feature = "float")] +pub(crate) mod float; + +// ----------------------------------------------------------------------------- + +// Atomic{Isize,Usize,Bool,Ptr}, Atomic{I,U}{8,16} +#[cfg(not(any( + portable_atomic_no_atomic_load_store, + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430", +)))] +#[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(not(all(feature = "critical-section", portable_atomic_no_atomic_cas))) +)] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(not(all( + any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"), + not(target_has_atomic = "ptr"), + ))) +)] +pub(crate) use self::core_atomic::{ + AtomicI16, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU8, AtomicUsize, +}; +// RISC-V without A-extension +#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +pub(crate) use self::riscv::{ + AtomicI16, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU8, AtomicUsize, +}; +// no core Atomic{Isize,Usize,Bool,Ptr}/Atomic{I,U}{8,16} & assume single core => critical section based fallback +#[cfg(any( + portable_atomic_unsafe_assume_single_core, + feature = "critical-section", + target_arch = "avr", + target_arch = "msp430", +))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] +pub(crate) use self::interrupt::{ + AtomicI16, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU8, AtomicUsize, +}; +// bpf +#[cfg(all( + target_arch = "bpf", + portable_atomic_no_atomic_load_store, + not(feature = "critical-section"), +))] +pub(crate) use self::core_atomic::{AtomicI64, AtomicIsize, AtomicPtr, AtomicU64, AtomicUsize}; + +// Atomic{I,U}32 +#[cfg(not(any( + portable_atomic_no_atomic_load_store, + portable_atomic_unsafe_assume_single_core, + target_arch = "avr", + target_arch = "msp430", +)))] +#[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(not(all(feature = "critical-section", portable_atomic_no_atomic_cas))) +)] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(not(all( + any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"), + not(target_has_atomic = "ptr"), + ))) +)] +pub(crate) use self::core_atomic::{AtomicI32, AtomicU32}; +// RISC-V without A-extension +#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +pub(crate) use self::riscv::{AtomicI32, AtomicU32}; +// no core Atomic{I,U}32 & no CAS & assume single core => critical section based fallback +#[cfg(any(not(target_pointer_width = "16"), feature = "fallback"))] +#[cfg(any( + portable_atomic_unsafe_assume_single_core, + feature = "critical-section", + target_arch = "avr", + target_arch = "msp430", +))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] +pub(crate) use self::interrupt::{AtomicI32, AtomicU32}; + +// Atomic{I,U}64 +#[cfg(not(any( + portable_atomic_no_atomic_load_store, + portable_atomic_unsafe_assume_single_core, +)))] +#[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_64), + all( + not(any(target_pointer_width = "16", target_pointer_width = "32")), + not(all(feature = "critical-section", portable_atomic_no_atomic_cas)), + ), + )) +)] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "64", + all( + not(any(target_pointer_width = "16", target_pointer_width = "32")), + not(all( + any( + target_arch = "riscv32", + target_arch = "riscv64", + feature = "critical-section", + ), + not(target_has_atomic = "ptr"), + )), + ), + )) +)] +pub(crate) use self::core_atomic::{AtomicI64, AtomicU64}; +// RISC-V without A-extension +#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] +#[cfg(target_arch = "riscv64")] +pub(crate) use self::riscv::{AtomicI64, AtomicU64}; +// pre-v6 ARM Linux +#[cfg(feature = "fallback")] +#[cfg(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + not(portable_atomic_no_outline_atomics), +))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))] +pub(crate) use self::arm_linux::{AtomicI64, AtomicU64}; +// no core Atomic{I,U}64 & has CAS => use lock-base fallback +#[cfg(feature = "fallback")] +#[cfg(not(all( + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "arm", + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + not(portable_atomic_no_outline_atomics), +)))] +#[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(all(portable_atomic_no_atomic_64, not(portable_atomic_no_atomic_cas))) +)] +#[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(all(not(target_has_atomic = "64"), target_has_atomic = "ptr")) +)] +pub(crate) use self::fallback::{AtomicI64, AtomicU64}; +// no core Atomic{I,U}64 & no CAS & assume single core => critical section based fallback +#[cfg(any( + not(any(target_pointer_width = "16", target_pointer_width = "32")), + feature = "fallback", +))] +#[cfg(any( + portable_atomic_unsafe_assume_single_core, + feature = "critical-section", + target_arch = "avr", + target_arch = "msp430", +))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] +pub(crate) use self::interrupt::{AtomicI64, AtomicU64}; + +// Atomic{I,U}128 +// aarch64 stable +#[cfg(all( + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + target_arch = "aarch64", +))] +pub(crate) use self::aarch64::{AtomicI128, AtomicU128}; +// no core Atomic{I,U}128 & has cmpxchg16b => use cmpxchg16b +#[cfg(all( + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + all( + feature = "fallback", + not(portable_atomic_no_cmpxchg16b_target_feature), + not(portable_atomic_no_outline_atomics), + not(any(target_env = "sgx", miri)), + ), + ), + target_arch = "x86_64", +))] +pub(crate) use self::x86_64::{AtomicI128, AtomicU128}; +// powerpc64 +#[cfg(portable_atomic_unstable_asm_experimental_arch)] +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + all( + feature = "fallback", + not(portable_atomic_no_outline_atomics), + portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all(target_env = "musl", not(target_feature = "crt-static")), + portable_atomic_outline_atomics, + ), + ), + target_os = "freebsd", + ), + not(any(miri, portable_atomic_sanitize_thread)), + ), +))] +#[cfg(target_arch = "powerpc64")] +pub(crate) use self::powerpc64::{AtomicI128, AtomicU128}; +// s390x +#[cfg(portable_atomic_unstable_asm_experimental_arch)] +#[cfg(target_arch = "s390x")] +pub(crate) use self::s390x::{AtomicI128, AtomicU128}; +// no core Atomic{I,U}128 & has CAS => use lock-base fallback +#[cfg(feature = "fallback")] +#[cfg(not(any( + all(any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), target_arch = "aarch64"), + all( + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + all( + feature = "fallback", + not(portable_atomic_no_cmpxchg16b_target_feature), + not(portable_atomic_no_outline_atomics), + not(any(target_env = "sgx", miri)), + ), + ), + target_arch = "x86_64", + ), + all( + portable_atomic_unstable_asm_experimental_arch, + any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + all( + feature = "fallback", + not(portable_atomic_no_outline_atomics), + portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all(target_env = "musl", not(target_feature = "crt-static")), + portable_atomic_outline_atomics, + ), + ), + target_os = "freebsd", + ), + not(any(miri, portable_atomic_sanitize_thread)), + ), + ), + target_arch = "powerpc64", + ), + all(portable_atomic_unstable_asm_experimental_arch, target_arch = "s390x"), +)))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] +pub(crate) use self::fallback::{AtomicI128, AtomicU128}; +// no core Atomic{I,U}128 & no CAS & assume_single_core => critical section based fallback +#[cfg(feature = "fallback")] +#[cfg(any( + portable_atomic_unsafe_assume_single_core, + feature = "critical-section", + target_arch = "avr", + target_arch = "msp430", +))] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] +pub(crate) use self::interrupt::{AtomicI128, AtomicU128}; diff --git a/vendor/portable-atomic/src/imp/msp430.rs b/vendor/portable-atomic/src/imp/msp430.rs new file mode 100644 index 000000000..4928549ab --- /dev/null +++ b/vendor/portable-atomic/src/imp/msp430.rs @@ -0,0 +1,291 @@ +// Atomic load/store implementation on MSP430. +// +// Adapted from https://github.com/pftbest/msp430-atomic. +// Including https://github.com/pftbest/msp430-atomic/pull/4 for a compile error fix. +// Including https://github.com/pftbest/msp430-atomic/pull/5 for a soundness bug fix. +// +// Operations not supported here are provided by disabling interrupts. +// See also src/imp/interrupt/msp430.rs. +// +// Note: Ordering is always SeqCst. + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; +#[cfg(any(test, not(feature = "critical-section")))] +use core::cell::UnsafeCell; +use core::sync::atomic::Ordering; + +/// An atomic fence. +/// +/// # Panics +/// +/// Panics if `order` is [`Relaxed`](Ordering::Relaxed). +#[inline] +#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] +pub fn fence(order: Ordering) { + match order { + Ordering::Relaxed => panic!("there is no such thing as a relaxed fence"), + // MSP430 is single-core and a compiler fence works as an atomic fence. + _ => compiler_fence(order), + } +} + +/// A compiler memory fence. +/// +/// # Panics +/// +/// Panics if `order` is [`Relaxed`](Ordering::Relaxed). +#[inline] +#[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] +pub fn compiler_fence(order: Ordering) { + match order { + Ordering::Relaxed => panic!("there is no such thing as a relaxed compiler fence"), + _ => {} + } + // SAFETY: using an empty asm is safe. + unsafe { + // Do not use `nomem` and `readonly` because prevent preceding and subsequent memory accesses from being reordered. + #[cfg(not(portable_atomic_no_asm))] + asm!("", options(nostack, preserves_flags)); + #[cfg(portable_atomic_no_asm)] + llvm_asm!("" ::: "memory" : "volatile"); + } +} + +macro_rules! atomic { + (load_store, $([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => { + #[cfg(any(test, not(feature = "critical-section")))] + #[repr(transparent)] + pub(crate) struct $atomic_type $(<$($generics)*>)? { + v: UnsafeCell<$value_type>, + } + + #[cfg(any(test, not(feature = "critical-section")))] + // Send is implicitly implemented for atomic integers, but not for atomic pointers. + // SAFETY: any data races are prevented by atomic operations. + unsafe impl $(<$($generics)*>)? Send for $atomic_type $(<$($generics)*>)? {} + #[cfg(any(test, not(feature = "critical-section")))] + // SAFETY: any data races are prevented by atomic operations. + unsafe impl $(<$($generics)*>)? Sync for $atomic_type $(<$($generics)*>)? {} + + #[cfg(any(test, not(feature = "critical-section")))] + impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? { + #[cfg(test)] + #[inline] + pub(crate) const fn new(v: $value_type) -> Self { + Self { v: UnsafeCell::new(v) } + } + + #[cfg(test)] + #[inline] + pub(crate) fn is_lock_free() -> bool { + Self::is_always_lock_free() + } + #[cfg(test)] + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + true + } + + #[cfg(test)] + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $value_type { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.v.get() } + } + + #[cfg(test)] + #[inline] + pub(crate) fn into_inner(self) -> $value_type { + self.v.into_inner() + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn load(&self, order: Ordering) -> $value_type { + crate::utils::assert_load_ordering(order); + let src = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + let out; + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("mov", $asm_suffix, " @{src}, {out}"), + src = in(reg) src, + out = lateout(reg) out, + options(nostack, preserves_flags), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("mov", $asm_suffix, " $1, $0") + : "=r"(out) : "*m"(src) : "memory" : "volatile" + ); + out + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn store(&self, val: $value_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("mov", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack, preserves_flags), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("mov", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + } + }; + ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => { + atomic!(load_store, $([$($generics)*])? $atomic_type, $value_type, $asm_suffix); + #[cfg(any(test, not(feature = "critical-section")))] + impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? { + #[inline] + pub(crate) fn add(&self, val: $value_type, _order: Ordering) { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("add", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("add", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + + #[inline] + pub(crate) fn sub(&self, val: $value_type, _order: Ordering) { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("sub", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("sub", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + + #[inline] + pub(crate) fn and(&self, val: $value_type, _order: Ordering) { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("and", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("and", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + + #[inline] + pub(crate) fn or(&self, val: $value_type, _order: Ordering) { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("bis", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("bis", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + + #[inline] + pub(crate) fn xor(&self, val: $value_type, _order: Ordering) { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("xor", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) dst, + val = in(reg) val, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("xor", $asm_suffix, " $1, $0") + :: "*m"(dst), "ir"(val) : "memory" : "volatile" + ); + } + } + + #[inline] + pub(crate) fn not(&self, _order: Ordering) { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + #[cfg(not(portable_atomic_no_asm))] + asm!( + concat!("inv", $asm_suffix, " 0({dst})"), + dst = in(reg) dst, + options(nostack), + ); + #[cfg(portable_atomic_no_asm)] + llvm_asm!( + concat!("inv", $asm_suffix, " $0") + :: "*m"(dst) : "memory" : "volatile" + ); + } + } + } + } +} + +atomic!(AtomicI8, i8, ".b"); +atomic!(AtomicU8, u8, ".b"); +atomic!(AtomicI16, i16, ".w"); +atomic!(AtomicU16, u16, ".w"); +atomic!(AtomicIsize, isize, ".w"); +atomic!(AtomicUsize, usize, ".w"); +atomic!(load_store, [T] AtomicPtr, *mut T, ".w"); diff --git a/vendor/portable-atomic/src/imp/riscv.rs b/vendor/portable-atomic/src/imp/riscv.rs new file mode 100644 index 000000000..9b4a5cbb7 --- /dev/null +++ b/vendor/portable-atomic/src/imp/riscv.rs @@ -0,0 +1,180 @@ +// Atomic load/store implementation on RISC-V. +// +// Refs: +// - "Mappings from C/C++ primitives to RISC-V primitives." table in RISC-V Instruction Set Manual: +// https://five-embeddev.com/riscv-isa-manual/latest/memory.html#sec:memory:porting +// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit +// +// Generated asm: +// - riscv64gc https://godbolt.org/z/hx4Krb91h + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; +use core::{cell::UnsafeCell, sync::atomic::Ordering}; + +macro_rules! atomic { + ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => { + #[repr(transparent)] + pub(crate) struct $atomic_type $(<$($generics)*>)? { + v: UnsafeCell<$value_type>, + } + + // Send is implicitly implemented for atomic integers, but not for atomic pointers. + // SAFETY: any data races are prevented by atomic operations. + unsafe impl $(<$($generics)*>)? Send for $atomic_type $(<$($generics)*>)? {} + // SAFETY: any data races are prevented by atomic operations. + unsafe impl $(<$($generics)*>)? Sync for $atomic_type $(<$($generics)*>)? {} + + #[cfg(any(test, not(portable_atomic_unsafe_assume_single_core)))] + impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? { + #[inline] + pub(crate) const fn new(v: $value_type) -> Self { + Self { v: UnsafeCell::new(v) } + } + + #[inline] + pub(crate) fn is_lock_free() -> bool { + Self::is_always_lock_free() + } + #[inline] + pub(crate) const fn is_always_lock_free() -> bool { + true + } + + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $value_type { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.v.get() } + } + + #[inline] + pub(crate) fn into_inner(self) -> $value_type { + self.v.into_inner() + } + + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut $value_type { + self.v.get() + } + } + impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? { + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn load(&self, order: Ordering) -> $value_type { + crate::utils::assert_load_ordering(order); + let src = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + let out; + match order { + Ordering::Relaxed => { + asm!( + concat!("l", $asm_suffix, " {out}, 0({src})"), + src = in(reg) ptr_reg!(src), + out = lateout(reg) out, + options(nostack, preserves_flags, readonly), + ); + } + Ordering::Acquire => { + asm!( + concat!("l", $asm_suffix, " {out}, 0({src})"), + "fence r, rw", + src = in(reg) ptr_reg!(src), + out = lateout(reg) out, + options(nostack, preserves_flags), + ); + } + Ordering::SeqCst => { + asm!( + "fence rw, rw", + concat!("l", $asm_suffix, " {out}, 0({src})"), + "fence r, rw", + src = in(reg) ptr_reg!(src), + out = lateout(reg) out, + options(nostack, preserves_flags), + ); + } + _ => unreachable!("{:?}", order), + } + out + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn store(&self, val: $value_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { + match order { + Ordering::Relaxed => { + asm!( + concat!("s", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, + options(nostack, preserves_flags), + ); + } + // Release and SeqCst stores are equivalent. + Ordering::Release | Ordering::SeqCst => { + asm!( + "fence rw, w", + concat!("s", $asm_suffix, " {val}, 0({dst})"), + dst = in(reg) ptr_reg!(dst), + val = in(reg) val, + options(nostack, preserves_flags), + ); + } + _ => unreachable!("{:?}", order), + } + } + } + } + }; +} + +atomic!(AtomicI8, i8, "b"); +atomic!(AtomicU8, u8, "b"); +atomic!(AtomicI16, i16, "h"); +atomic!(AtomicU16, u16, "h"); +atomic!(AtomicI32, i32, "w"); +atomic!(AtomicU32, u32, "w"); +#[cfg(target_arch = "riscv64")] +atomic!(AtomicI64, i64, "d"); +#[cfg(target_arch = "riscv64")] +atomic!(AtomicU64, u64, "d"); +#[cfg(target_pointer_width = "32")] +atomic!(AtomicIsize, isize, "w"); +#[cfg(target_pointer_width = "32")] +atomic!(AtomicUsize, usize, "w"); +#[cfg(target_pointer_width = "32")] +atomic!([T] AtomicPtr, *mut T, "w"); +#[cfg(target_pointer_width = "64")] +atomic!(AtomicIsize, isize, "d"); +#[cfg(target_pointer_width = "64")] +atomic!(AtomicUsize, usize, "d"); +#[cfg(target_pointer_width = "64")] +atomic!([T] AtomicPtr, *mut T, "d"); + +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_ptr_load_store!(); + test_atomic_int_load_store!(i8); + test_atomic_int_load_store!(u8); + test_atomic_int_load_store!(i16); + test_atomic_int_load_store!(u16); + test_atomic_int_load_store!(i32); + test_atomic_int_load_store!(u32); + #[cfg(target_arch = "riscv64")] + test_atomic_int_load_store!(i64); + #[cfg(target_arch = "riscv64")] + test_atomic_int_load_store!(u64); + test_atomic_int_load_store!(isize); + test_atomic_int_load_store!(usize); +} diff --git a/vendor/portable-atomic/src/imp/x86.rs b/vendor/portable-atomic/src/imp/x86.rs new file mode 100644 index 000000000..82afee10b --- /dev/null +++ b/vendor/portable-atomic/src/imp/x86.rs @@ -0,0 +1,228 @@ +// Atomic operations implementation on x86/x86_64. +// +// This module provides atomic operations not supported by LLVM or optimizes +// cases where LLVM code generation is not optimal. +// +// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +// this module and use CAS loop instead. +// +// Generated asm: +// - x86_64 https://godbolt.org/z/8fve4YP1G + +#[cfg(not(portable_atomic_no_asm))] +use core::arch::asm; +use core::sync::atomic::Ordering; + +use super::core_atomic::{ + AtomicI16, AtomicI32, AtomicI64, AtomicI8, AtomicIsize, AtomicU16, AtomicU32, AtomicU64, + AtomicU8, AtomicUsize, +}; + +#[cfg(target_pointer_width = "32")] +macro_rules! ptr_modifier { + () => { + ":e" + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! ptr_modifier { + () => { + "" + }; +} + +macro_rules! atomic_int { + ($atomic_type:ident, $int_type:ident, $ptr_size:tt) => { + impl $atomic_type { + #[inline] + pub(crate) fn not(&self, _order: Ordering) { + let dst = self.as_ptr(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + // + // https://www.felixcloutier.com/x86/not + unsafe { + // atomic RMW is always SeqCst. + asm!( + concat!("lock not ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}]"), + dst = in(reg) dst, + options(nostack, preserves_flags), + ); + } + } + #[inline] + pub(crate) fn neg(&self, _order: Ordering) { + let dst = self.as_ptr(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + // + // https://www.felixcloutier.com/x86/neg + unsafe { + // atomic RMW is always SeqCst. + asm!( + concat!("lock neg ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}]"), + dst = in(reg) dst, + // Do not use `preserves_flags` because NEG modifies the CF, OF, SF, ZF, AF, and PF flag. + options(nostack), + ); + } + } + } + }; +} + +atomic_int!(AtomicI8, i8, "byte"); +atomic_int!(AtomicU8, u8, "byte"); +atomic_int!(AtomicI16, i16, "word"); +atomic_int!(AtomicU16, u16, "word"); +atomic_int!(AtomicI32, i32, "dword"); +atomic_int!(AtomicU32, u32, "dword"); +#[cfg(target_arch = "x86_64")] +atomic_int!(AtomicI64, i64, "qword"); +#[cfg(target_arch = "x86_64")] +atomic_int!(AtomicU64, u64, "qword"); +#[cfg(target_pointer_width = "32")] +atomic_int!(AtomicIsize, isize, "dword"); +#[cfg(target_pointer_width = "32")] +atomic_int!(AtomicUsize, usize, "dword"); +#[cfg(target_pointer_width = "64")] +atomic_int!(AtomicIsize, isize, "qword"); +#[cfg(target_pointer_width = "64")] +atomic_int!(AtomicUsize, usize, "qword"); + +#[cfg(target_arch = "x86")] +impl AtomicI64 { + #[inline] + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + #[inline] + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } +} +#[cfg(target_arch = "x86")] +impl AtomicU64 { + #[inline] + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + #[inline] + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } +} + +macro_rules! atomic_bit_opts { + ($atomic_type:ident, $int_type:ident, $val_modifier:tt, $ptr_size:tt) => { + // LLVM 14 and older don't support generating `lock bt{s,r,c}`. + // https://godbolt.org/z/G1TMKza97 + // LLVM 15 only supports generating `lock bt{s,r,c}` for immediate bit offsets. + // https://godbolt.org/z/dzzhr81z6 + // LLVM 16 can generate `lock bt{s,r,c}` for both immediate and register bit offsets. + // https://godbolt.org/z/7YTvsorn1 + // So, use fetch_* based implementations on LLVM 16+, otherwise use asm based implementations. + #[cfg(portable_atomic_llvm_16)] + impl_default_bit_opts!($atomic_type, $int_type); + #[cfg(not(portable_atomic_llvm_16))] + impl $atomic_type { + // `<integer>::BITS` is not available on old nightly. + const BITS: u32 = (core::mem::size_of::<$int_type>() * 8) as u32; + #[inline] + pub(crate) fn bit_set(&self, bit: u32, _order: Ordering) -> bool { + let dst = self.as_ptr(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + // the masking by the bit size of the type ensures that we do not shift + // out of bounds. + // + // https://www.felixcloutier.com/x86/bts + unsafe { + let out: u8; + // atomic RMW is always SeqCst. + asm!( + concat!("lock bts ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"), + "setb {out}", + dst = in(reg) dst, + bit = in(reg) (bit & (Self::BITS - 1)) as $int_type, + out = out(reg_byte) out, + // Do not use `preserves_flags` because BTS modifies the CF flag. + options(nostack), + ); + out != 0 + } + } + #[inline] + pub(crate) fn bit_clear(&self, bit: u32, _order: Ordering) -> bool { + let dst = self.as_ptr(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + // the masking by the bit size of the type ensures that we do not shift + // out of bounds. + // + // https://www.felixcloutier.com/x86/btr + unsafe { + let out: u8; + // atomic RMW is always SeqCst. + asm!( + concat!("lock btr ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"), + "setb {out}", + dst = in(reg) dst, + bit = in(reg) (bit & (Self::BITS - 1)) as $int_type, + out = out(reg_byte) out, + // Do not use `preserves_flags` because BTR modifies the CF flag. + options(nostack), + ); + out != 0 + } + } + #[inline] + pub(crate) fn bit_toggle(&self, bit: u32, _order: Ordering) -> bool { + let dst = self.as_ptr(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + // the masking by the bit size of the type ensures that we do not shift + // out of bounds. + // + // https://www.felixcloutier.com/x86/btc + unsafe { + let out: u8; + // atomic RMW is always SeqCst. + asm!( + concat!("lock btc ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"), + "setb {out}", + dst = in(reg) dst, + bit = in(reg) (bit & (Self::BITS - 1)) as $int_type, + out = out(reg_byte) out, + // Do not use `preserves_flags` because BTC modifies the CF flag. + options(nostack), + ); + out != 0 + } + } + } + }; +} + +impl_default_bit_opts!(AtomicI8, i8); +impl_default_bit_opts!(AtomicU8, u8); +atomic_bit_opts!(AtomicI16, i16, ":x", "word"); +atomic_bit_opts!(AtomicU16, u16, ":x", "word"); +atomic_bit_opts!(AtomicI32, i32, ":e", "dword"); +atomic_bit_opts!(AtomicU32, u32, ":e", "dword"); +#[cfg(target_arch = "x86_64")] +atomic_bit_opts!(AtomicI64, i64, "", "qword"); +#[cfg(target_arch = "x86_64")] +atomic_bit_opts!(AtomicU64, u64, "", "qword"); +#[cfg(target_arch = "x86")] +impl_default_bit_opts!(AtomicI64, i64); +#[cfg(target_arch = "x86")] +impl_default_bit_opts!(AtomicU64, u64); +#[cfg(target_pointer_width = "32")] +atomic_bit_opts!(AtomicIsize, isize, ":e", "dword"); +#[cfg(target_pointer_width = "32")] +atomic_bit_opts!(AtomicUsize, usize, ":e", "dword"); +#[cfg(target_pointer_width = "64")] +atomic_bit_opts!(AtomicIsize, isize, "", "qword"); +#[cfg(target_pointer_width = "64")] +atomic_bit_opts!(AtomicUsize, usize, "", "qword"); |