diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-07 05:48:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-06-07 05:48:48 +0000 |
commit | ef24de24a82fe681581cc130f342363c47c0969a (patch) | |
tree | 0d494f7e1a38b95c92426f58fe6eaa877303a86c /vendor/portable-atomic/src/imp | |
parent | Releasing progress-linux version 1.74.1+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-ef24de24a82fe681581cc130f342363c47c0969a.tar.xz rustc-ef24de24a82fe681581cc130f342363c47c0969a.zip |
Merging upstream version 1.75.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/portable-atomic/src/imp')
34 files changed, 2890 insertions, 897 deletions
diff --git a/vendor/portable-atomic/src/imp/arm_linux.rs b/vendor/portable-atomic/src/imp/arm_linux.rs index e506254d9..623a28250 100644 --- a/vendor/portable-atomic/src/imp/arm_linux.rs +++ b/vendor/portable-atomic/src/imp/arm_linux.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // 64-bit atomic implementation using kuser_cmpxchg64 on pre-v6 ARM Linux/Android. // // Refs: @@ -14,26 +16,9 @@ #[path = "fallback/outline_atomics.rs"] mod fallback; -#[cfg(not(portable_atomic_no_asm))] -use core::arch::asm; -use core::{cell::UnsafeCell, mem, sync::atomic::Ordering}; - -/// A 64-bit value represented as a pair of 32-bit values. -/// -/// This type is `#[repr(C)]`, both fields have the same in-memory representation -/// and are plain old datatypes, so access to the fields is always safe. -#[derive(Clone, Copy)] -#[repr(C)] -union U64 { - whole: u64, - pair: Pair, -} -#[derive(Clone, Copy)] -#[repr(C)] -struct Pair { - lo: u32, - hi: u32, -} +use core::{arch::asm, cell::UnsafeCell, mem, sync::atomic::Ordering}; + +use crate::utils::{Pair, U64}; // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC; @@ -48,8 +33,8 @@ fn __kuser_helper_version() -> i32 { if v != 0 { return v; } - // SAFETY: core assumes that at least __kuser_cmpxchg (__kuser_helper_version >= 2) is available - // on this platform. __kuser_helper_version is always available on such a platform. + // SAFETY: core assumes that at least __kuser_memory_barrier (__kuser_helper_version >= 3) is + // available on this platform. __kuser_helper_version is always available on such a platform. v = unsafe { (KUSER_HELPER_VERSION as *const i32).read() }; CACHE.store(v, Ordering::Relaxed); v @@ -79,16 +64,16 @@ unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 { // SAFETY: the caller must uphold the safety contract. unsafe { - let (prev_lo, prev_hi); + let (out_lo, out_hi); asm!( - "ldr {prev_lo}, [{src}]", - "ldr {prev_hi}, [{src}, #4]", + "ldr {out_lo}, [{src}]", + "ldr {out_hi}, [{src}, #4]", src = in(reg) src, - prev_lo = out(reg) prev_lo, - prev_hi = out(reg) prev_hi, + out_lo = out(reg) out_lo, + out_hi = out(reg) out_hi, options(pure, nostack, preserves_flags, readonly), ); - U64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + U64 { pair: Pair { lo: out_lo, hi: out_hi } }.whole } } @@ -109,10 +94,10 @@ where // so we must use inline assembly to implement byte_wise_atomic_load. // (i.e., byte-wise atomic based on the standard library's atomic types // cannot be used here). - let old = byte_wise_atomic_load(dst); - let next = f(old); - if __kuser_cmpxchg64(&old, &next, dst) { - return old; + let prev = byte_wise_atomic_load(dst); + let next = f(prev); + if __kuser_cmpxchg64(&prev, &next, dst) { + return prev; } } } @@ -169,8 +154,10 @@ atomic_with_ifunc! { atomic_with_ifunc! { unsafe fn atomic_compare_exchange(dst: *mut u64, old: u64, new: u64) -> (u64, bool) { // SAFETY: the caller must uphold the safety contract. - let res = unsafe { atomic_update_kuser_cmpxchg64(dst, |v| if v == old { new } else { v }) }; - (res, res == old) + let prev = unsafe { + atomic_update_kuser_cmpxchg64(dst, |v| if v == old { new } else { v }) + }; + (prev, prev == old) } fallback = atomic_compare_exchange_seqcst } @@ -343,15 +330,15 @@ macro_rules! atomic64 { // SAFETY: any data races are prevented by the kernel user helper or the lock // and the raw pointer passed in is valid because we got it from a reference. unsafe { - let (res, ok) = atomic_compare_exchange( + let (prev, ok) = atomic_compare_exchange( self.v.get().cast::<u64>(), current as u64, new as u64, ); if ok { - Ok(res as $int_type) + Ok(prev as $int_type) } else { - Err(res as $int_type) + Err(prev as $int_type) } } } diff --git a/vendor/portable-atomic/src/imp/atomic128/README.md b/vendor/portable-atomic/src/imp/atomic128/README.md index 6e0c87988..00a467e80 100644 --- a/vendor/portable-atomic/src/imp/atomic128/README.md +++ b/vendor/portable-atomic/src/imp/atomic128/README.md @@ -6,8 +6,8 @@ Here is the table of targets that support 128-bit atomics and the instructions u | target_arch | load | store | CAS | RMW | note | | ----------- | ---- | ----- | --- | --- | ---- | -| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel or AMD CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ when cmpxchg16b target feature is enabled at compile-time, otherwise requires rustc 1.69+ | -| aarch64 | ldxp/stxp or casp or ldp | ldxp/stxp or casp or stp | ldxp/stxp or casp | ldxp/stxp or casp | casp requires lse target feature, ldp/stp requires lse2 target feature. <br> Both compile-time and run-time detection are supported for lse. lse2 is currently compile-time detection only. <br> Requires rustc 1.59+ | +| x86_64 | cmpxchg16b or vmovdqa | cmpxchg16b or vmovdqa | cmpxchg16b | cmpxchg16b | cmpxchg16b target feature required. vmovdqa requires Intel or AMD CPU with AVX. <br> Both compile-time and run-time detection are supported for cmpxchg16b. vmovdqa is currently run-time detection only. <br> Requires rustc 1.59+ when cmpxchg16b target feature is enabled at compile-time, otherwise requires rustc 1.69+ | +| aarch64 | ldxp/stxp or casp or ldp/ldiapp | ldxp/stxp or casp or stp/stilp/swpp | ldxp/stxp or casp | ldxp/stxp or casp/swpp/ldclrp/ldsetp | casp requires lse target feature, ldp/stp requires lse2 target feature, ldiapp/stilp requires lse2 and rcpc3 target features, swpp/ldclrp/ldsetp requires lse128 target feature. <br> Both compile-time and run-time detection are supported for lse and lse2. Others are currently compile-time detection only. <br> Requires rustc 1.59+ | | powerpc64 | lq | stq | lqarx/stqcx. | lqarx/stqcx. | Requires target-cpu pwr8+ (powerpc64le is pwr8 by default). Both compile-time and run-time detection are supported (run-time detection is currently disabled by default). <br> Requires nightly | | s390x | lpq | stpq | cdsg | cdsg | Requires nightly | @@ -40,6 +40,7 @@ Here is the table of targets that support run-time feature detection and the ins | aarch64 | linux | getauxval | Only enabled by default on `*-linux-gnu*`, and `*-linux-musl*"` (default is static linking)/`*-linux-ohos*` (default is dynamic linking) with dynamic linking enabled. | | aarch64 | android | getauxval | Enabled by default | | aarch64 | freebsd | elf_aux_info | Enabled by default | +| aarch64 | netbsd | sysctl | Enabled by default | | aarch64 | openbsd | sysctl | Enabled by default | | aarch64 | macos | sysctl | Currently only used in tests because FEAT_LSE and FEAT_LSE2 are always available at compile-time. | | aarch64 | windows | IsProcessorFeaturePresent | Enabled by default | diff --git a/vendor/portable-atomic/src/imp/atomic128/aarch64.rs b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs index 00418dfb0..32528a706 100644 --- a/vendor/portable-atomic/src/imp/atomic128/aarch64.rs +++ b/vendor/portable-atomic/src/imp/atomic128/aarch64.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Atomic{I,U}128 implementation on AArch64. // // There are a few ways to implement 128-bit atomic operations in AArch64. @@ -5,6 +7,8 @@ // - LDXP/STXP loop (DW LL/SC) // - CASP (DWCAS) added as FEAT_LSE (mandatory from armv8.1-a) // - LDP/STP (DW load/store) if FEAT_LSE2 (optional from armv8.2-a, mandatory from armv8.4-a) is available +// - LDIAPP/STILP (DW acquire-load/release-store) added as FEAT_LRCPC3 (optional from armv8.9-a/armv9.4-a) (if FEAT_LSE2 is also available) +// - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (optional from armv9.4-a) // // If outline-atomics is not enabled and FEAT_LSE is not available at // compile-time, we use LDXP/STXP loop. @@ -15,8 +19,10 @@ // However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP // loop for RMW (by default, it is set on Apple hardware; see build script for details). // If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store. +// If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store. +// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store. // -// Note: FEAT_LSE2 doesn't imply FEAT_LSE. +// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2. // // Note that we do not separate LL and SC into separate functions, but handle // them within a single asm block. This is because it is theoretically possible @@ -48,18 +54,27 @@ // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - aarch64 https://godbolt.org/z/nds1nWbnq -// - aarch64 msvc https://godbolt.org/z/PTKdhbKqW -// - aarch64 (+lse) https://godbolt.org/z/5GzssfTKc -// - aarch64 msvc (+lse) https://godbolt.org/z/oYE87caM7 -// - aarch64 (+lse,+lse2) https://godbolt.org/z/36dPjMbaG +// - aarch64 https://godbolt.org/z/5Mz1E33vz +// - aarch64 msvc https://godbolt.org/z/P53d1MsGY +// - aarch64 (+lse) https://godbolt.org/z/qvaE8n79K +// - aarch64 msvc (+lse) https://godbolt.org/z/dj4aYerfr +// - aarch64 (+lse,+lse2) https://godbolt.org/z/1E15jjxah +// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/YreM4n84o +// - aarch64 (+lse2,+lse128) https://godbolt.org/z/Kfeqs54ox +// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/n6zhjE77s include!("macros.rs"); // On musl with static linking, it seems that getauxval is not always available. // See detect/auxv.rs for more. #[cfg(not(portable_atomic_no_outline_atomics))] -#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] +#[cfg(any( + test, + not(all( + any(target_feature = "lse2", portable_atomic_target_feature = "lse2"), + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + )), +))] #[cfg(any( all( target_os = "linux", @@ -75,8 +90,21 @@ include!("macros.rs"); #[path = "detect/auxv.rs"] mod detect; #[cfg(not(portable_atomic_no_outline_atomics))] -#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] -#[cfg(target_os = "openbsd")] +#[cfg_attr( + target_os = "netbsd", + cfg(any( + test, + not(all( + any(target_feature = "lse2", portable_atomic_target_feature = "lse2"), + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + )), + )) +)] +#[cfg_attr( + target_os = "openbsd", + cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))) +)] +#[cfg(any(target_os = "netbsd", target_os = "openbsd"))] #[path = "detect/aarch64_aa64reg.rs"] mod detect; #[cfg(not(portable_atomic_no_outline_atomics))] @@ -108,6 +136,8 @@ mod detect_macos; use core::arch::asm; use core::sync::atomic::Ordering; +use crate::utils::{Pair, U128}; + #[cfg(any( target_feature = "lse", portable_atomic_target_feature = "lse", @@ -131,6 +161,7 @@ macro_rules! debug_assert_lse { ), target_os = "android", target_os = "freebsd", + target_os = "netbsd", target_os = "openbsd", target_os = "fuchsia", target_os = "windows", @@ -142,6 +173,82 @@ macro_rules! debug_assert_lse { } }; } +#[rustfmt::skip] +#[cfg(any( + target_feature = "lse2", + portable_atomic_target_feature = "lse2", + not(portable_atomic_no_outline_atomics), +))] +macro_rules! debug_assert_lse2 { + () => { + #[cfg(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + // These don't support detection of FEAT_LSE2. + // target_os = "openbsd", + // target_os = "fuchsia", + // target_os = "windows", + ), + ))] + #[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))] + { + debug_assert!(detect::detect().has_lse2()); + } + }; +} + +// Refs: https://developer.arm.com/documentation/100067/0612/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en +// +// This is similar to #[target_feature(enable = "lse")], except that there are +// no compiler guarantees regarding (un)inlining, and the scope is within an asm +// block rather than a function. We use this directive to support outline-atomics +// on pre-1.61 rustc (aarch64_target_feature stabilized in Rust 1.61). +// +// The .arch_extension directive is effective until the end of the assembly block and +// is not propagated to subsequent code, so the end_lse macro is unneeded. +// https://godbolt.org/z/4oMEW8vWc +// https://github.com/torvalds/linux/commit/e0d5896bd356cd577f9710a02d7a474cdf58426b +// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69 +// (It seems GCC effectively ignores this directive and always allow FEAT_LSE instructions: https://godbolt.org/z/W9W6rensG) +// +// The .arch directive has a similar effect, but we don't use it due to the following issue: +// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69 +// +// This is also needed for compatibility with rustc_codegen_cranelift: +// https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400#issuecomment-1774599775 +// +// Note: If FEAT_LSE is not available at compile-time, we must guarantee that +// the function that uses it is not inlined into a function where it is not +// clear whether FEAT_LSE is available. Otherwise, (even if we checked whether +// FEAT_LSE is available at run-time) optimizations that reorder its +// instructions across the if condition might introduce undefined behavior. +// (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts) +// However, our code uses the ifunc helper macro that works with function pointers, +// so we don't have to worry about this unless calling without helper macro. +#[cfg(any( + target_feature = "lse", + portable_atomic_target_feature = "lse", + not(portable_atomic_no_outline_atomics), +))] +macro_rules! start_lse { + () => { + ".arch_extension lse" + }; +} #[cfg(target_endian = "little")] macro_rules! select_le_or_be { @@ -156,24 +263,6 @@ macro_rules! select_le_or_be { }; } -/// A 128-bit value represented as a pair of 64-bit values. -/// -/// This type is `#[repr(C)]`, both fields have the same in-memory representation -/// and are plain old datatypes, so access to the fields is always safe. -#[derive(Clone, Copy)] -#[repr(C)] -union U128 { - whole: u128, - pair: Pair, -} -// A pair of 64-bit values in little-endian order (even on big-endian targets). -#[derive(Clone, Copy)] -#[repr(C)] -struct Pair { - lo: u64, - hi: u64, -} - macro_rules! atomic_rmw { ($op:ident, $order:ident) => { atomic_rmw!($op, $order, write = $order) @@ -195,16 +284,14 @@ macro_rules! atomic_rmw { }; } +// cfg guarantee that the CPU supports FEAT_LSE2. +#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] +use _atomic_load_ldp as atomic_load; +#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))] #[inline] unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { - #[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] - // SAFETY: the caller must uphold the safety contract. - // cfg guarantee that the CPU supports FEAT_LSE2. - unsafe { - atomic_load_ldp(src, order) - } - #[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))] - { + #[inline] + unsafe fn atomic_load_no_lse2(src: *mut u128, order: Ordering) -> u128 { #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. @@ -217,14 +304,123 @@ unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { _atomic_load_ldxp_stxp(src, order) } } + #[cfg(not(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + // These don't support detection of FEAT_LSE2. + // target_os = "openbsd", + // target_os = "fuchsia", + // target_os = "windows", + ), + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_load_no_lse2(src, order) + } + #[cfg(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + // These don't support detection of FEAT_LSE2. + // target_os = "openbsd", + // target_os = "fuchsia", + // target_os = "windows", + ), + ))] + { + fn_alias! { + // inline(never) is just a hint and also not strictly necessary + // because we use ifunc helper macro, but used for clarity. + #[inline(never)] + unsafe fn(src: *mut u128) -> u128; + atomic_load_lse2_relaxed = _atomic_load_ldp(Ordering::Relaxed); + atomic_load_lse2_acquire = _atomic_load_ldp(Ordering::Acquire); + atomic_load_lse2_seqcst = _atomic_load_ldp(Ordering::SeqCst); + } + fn_alias! { + unsafe fn(src: *mut u128) -> u128; + atomic_load_no_lse2_relaxed = atomic_load_no_lse2(Ordering::Relaxed); + atomic_load_no_lse2_acquire = atomic_load_no_lse2(Ordering::Acquire); + atomic_load_no_lse2_seqcst = atomic_load_no_lse2(Ordering::SeqCst); + } + // SAFETY: the caller must uphold the safety contract. + // and we've checked if FEAT_LSE2 is available. + unsafe { + match order { + Ordering::Relaxed => { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + let cpuinfo = detect::detect(); + if cpuinfo.has_lse2() { + atomic_load_lse2_relaxed + } else { + atomic_load_no_lse2_relaxed + } + }) + } + Ordering::Acquire => { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + let cpuinfo = detect::detect(); + if cpuinfo.has_lse2() { + atomic_load_lse2_acquire + } else { + atomic_load_no_lse2_acquire + } + }) + } + Ordering::SeqCst => { + ifunc!(unsafe fn(src: *mut u128) -> u128 { + let cpuinfo = detect::detect(); + if cpuinfo.has_lse2() { + atomic_load_lse2_seqcst + } else { + atomic_load_no_lse2_seqcst + } + }) + } + _ => unreachable!("{:?}", order), + } + } + } } -// If CPU supports FEAT_LSE2, LDP is single-copy atomic reads, +// If CPU supports FEAT_LSE2, LDP/LDIAPP is single-copy atomic reads, // otherwise it is two single-copy atomic reads. // Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile -#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] +#[cfg(any( + target_feature = "lse2", + portable_atomic_target_feature = "lse2", + not(portable_atomic_no_outline_atomics), +))] #[inline] -unsafe fn atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 { +unsafe fn _atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 { debug_assert!(src as usize % 16 == 0); + debug_assert_lse2!(); // SAFETY: the caller must guarantee that `dst` is valid for reads, // 16-byte aligned, that there are no concurrent non-atomic operations. @@ -232,39 +428,52 @@ unsafe fn atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 { // Refs: // - LDP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDP--A64- unsafe { - let (prev_lo, prev_hi); + let (out_lo, out_hi); macro_rules! atomic_load_relaxed { ($acquire:tt $(, $readonly:tt)?) => { asm!( - "ldp {prev_lo}, {prev_hi}, [{src}]", + "ldp {out_lo}, {out_hi}, [{src}]", $acquire, src = in(reg) ptr_reg!(src), - prev_hi = lateout(reg) prev_hi, - prev_lo = lateout(reg) prev_lo, + out_hi = lateout(reg) out_hi, + out_lo = lateout(reg) out_lo, options(nostack, preserves_flags $(, $readonly)?), ) }; } match order { Ordering::Relaxed => atomic_load_relaxed!("", readonly), + #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))] + Ordering::Acquire => { + // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3. + // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers- + asm!( + "ldiapp {out_lo}, {out_hi}, [{src}]", + src = in(reg) ptr_reg!(src), + out_hi = lateout(reg) out_hi, + out_lo = lateout(reg) out_lo, + options(nostack, preserves_flags), + ); + } + #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] Ordering::Acquire => atomic_load_relaxed!("dmb ishld"), Ordering::SeqCst => { asm!( // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp. // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 for details. "ldar {tmp}, [{src}]", - "ldp {prev_lo}, {prev_hi}, [{src}]", + "ldp {out_lo}, {out_hi}, [{src}]", "dmb ishld", src = in(reg) ptr_reg!(src), - prev_hi = lateout(reg) prev_hi, - prev_lo = lateout(reg) prev_lo, + out_hi = lateout(reg) out_hi, + out_lo = lateout(reg) out_lo, tmp = out(reg) _, options(nostack, preserves_flags), ); } _ => unreachable!("{:?}", order), } - U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole } } // Do not use _atomic_compare_exchange_casp because it needs extra MOV to implement load. @@ -278,15 +487,16 @@ unsafe fn _atomic_load_casp(src: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. unsafe { - let (prev_lo, prev_hi); + let (out_lo, out_hi); macro_rules! atomic_load { ($acquire:tt, $release:tt) => { asm!( + start_lse!(), concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"), src = in(reg) ptr_reg!(src), // must be allocated to even/odd register pair - inout("x2") 0_u64 => prev_lo, - inout("x3") 0_u64 => prev_hi, + inout("x2") 0_u64 => out_lo, + inout("x3") 0_u64 => out_hi, options(nostack, preserves_flags), ) }; @@ -297,7 +507,7 @@ unsafe fn _atomic_load_casp(src: *mut u128, order: Ordering) -> u128 { Ordering::SeqCst => atomic_load!("a", "l"), _ => unreachable!("{:?}", order), } - U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole } } #[cfg(any( @@ -313,18 +523,18 @@ unsafe fn _atomic_load_ldxp_stxp(src: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { - let (mut prev_lo, mut prev_hi); + let (mut out_lo, mut out_hi); macro_rules! atomic_load { ($acquire:tt, $release:tt) => { asm!( "2:", - concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{src}]"), - concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{src}]"), + concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{src}]"), + concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{src}]"), // 0 if the store was successful, 1 if no store was performed "cbnz {r:w}, 2b", src = in(reg) ptr_reg!(src), - prev_lo = out(reg) prev_lo, - prev_hi = out(reg) prev_hi, + out_lo = out(reg) out_lo, + out_hi = out(reg) out_hi, r = out(reg) _, options(nostack, preserves_flags), ) @@ -336,31 +546,155 @@ unsafe fn _atomic_load_ldxp_stxp(src: *mut u128, order: Ordering) -> u128 { Ordering::SeqCst => atomic_load!("a", "l"), _ => unreachable!("{:?}", order), } - U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole } } +// cfg guarantee that the CPU supports FEAT_LSE2. +#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] +use _atomic_store_stp as atomic_store; +#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))] #[inline] unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { - #[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] - // SAFETY: the caller must uphold the safety contract. - // cfg guarantee that the CPU supports FEAT_LSE2. - unsafe { - atomic_store_stp(dst, val, order); + #[inline] + unsafe fn atomic_store_no_lse2(dst: *mut u128, val: u128, order: Ordering) { + // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, + // we use CAS-based atomic RMW. + #[cfg(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ))] + // SAFETY: the caller must uphold the safety contract. + // cfg guarantee that the CPU supports FEAT_LSE. + unsafe { + _atomic_swap_casp(dst, val, order); + } + #[cfg(not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )))] + // SAFETY: the caller must uphold the safety contract. + unsafe { + _atomic_store_ldxp_stxp(dst, val, order); + } } - #[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))] + #[cfg(not(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + // These don't support detection of FEAT_LSE2. + // target_os = "openbsd", + // target_os = "fuchsia", + // target_os = "windows", + ), + )))] // SAFETY: the caller must uphold the safety contract. unsafe { - atomic_swap(dst, val, order); + atomic_store_no_lse2(dst, val, order); + } + #[cfg(all( + not(portable_atomic_no_outline_atomics), + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + target_os = "netbsd", + // These don't support detection of FEAT_LSE2. + // target_os = "openbsd", + // target_os = "fuchsia", + // target_os = "windows", + ), + ))] + { + fn_alias! { + // inline(never) is just a hint and also not strictly necessary + // because we use ifunc helper macro, but used for clarity. + #[inline(never)] + unsafe fn(dst: *mut u128, val: u128); + atomic_store_lse2_relaxed = _atomic_store_stp(Ordering::Relaxed); + atomic_store_lse2_release = _atomic_store_stp(Ordering::Release); + atomic_store_lse2_seqcst = _atomic_store_stp(Ordering::SeqCst); + } + fn_alias! { + unsafe fn(dst: *mut u128, val: u128); + atomic_store_no_lse2_relaxed = atomic_store_no_lse2(Ordering::Relaxed); + atomic_store_no_lse2_release = atomic_store_no_lse2(Ordering::Release); + atomic_store_no_lse2_seqcst = atomic_store_no_lse2(Ordering::SeqCst); + } + // SAFETY: the caller must uphold the safety contract. + // and we've checked if FEAT_LSE2 is available. + unsafe { + match order { + Ordering::Relaxed => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + let cpuinfo = detect::detect(); + if cpuinfo.has_lse2() { + atomic_store_lse2_relaxed + } else { + atomic_store_no_lse2_relaxed + } + }); + } + Ordering::Release => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + let cpuinfo = detect::detect(); + if cpuinfo.has_lse2() { + atomic_store_lse2_release + } else { + atomic_store_no_lse2_release + } + }); + } + Ordering::SeqCst => { + ifunc!(unsafe fn(dst: *mut u128, val: u128) { + let cpuinfo = detect::detect(); + if cpuinfo.has_lse2() { + atomic_store_lse2_seqcst + } else { + atomic_store_no_lse2_seqcst + } + }); + } + _ => unreachable!("{:?}", order), + } + } } } -// If CPU supports FEAT_LSE2, STP is single-copy atomic writes, +// If CPU supports FEAT_LSE2, STP/STILP is single-copy atomic writes, // otherwise it is two single-copy atomic writes. // Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile -#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))] +#[cfg(any( + target_feature = "lse2", + portable_atomic_target_feature = "lse2", + not(portable_atomic_no_outline_atomics), +))] #[inline] -unsafe fn atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) { +unsafe fn _atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) { debug_assert!(dst as usize % 16 == 0); + debug_assert_lse2!(); // SAFETY: the caller must guarantee that `dst` is valid for writes, // 16-byte aligned, that there are no concurrent non-atomic operations. @@ -368,9 +702,10 @@ unsafe fn atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) { // Refs: // - STP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STP--A64- unsafe { - let val = U128 { whole: val }; + #[rustfmt::skip] macro_rules! atomic_store { - ($acquire:tt, $release:tt) => { + ($acquire:tt, $release:tt) => {{ + let val = U128 { whole: val }; asm!( $release, "stp {val_lo}, {val_hi}, [{dst}]", @@ -379,17 +714,83 @@ unsafe fn atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) { val_lo = in(reg) val.pair.lo, val_hi = in(reg) val.pair.hi, options(nostack, preserves_flags), - ) - }; + ); + }}; } match order { Ordering::Relaxed => atomic_store!("", ""), + #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))] + Ordering::Release => { + let val = U128 { whole: val }; + // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3. + // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers- + asm!( + "stilp {val_lo}, {val_hi}, [{dst}]", + dst = in(reg) ptr_reg!(dst), + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, + options(nostack, preserves_flags), + ); + } + #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] + #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] + Ordering::Release => { + // Use swpp if stp requires fences. + // https://reviews.llvm.org/D143506 + // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128. + _atomic_swap_swpp(dst, val, order); + } + #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))] + #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] Ordering::Release => atomic_store!("", "dmb ish"), + #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] + Ordering::SeqCst => { + // Use swpp if stp requires fences. + // https://reviews.llvm.org/D143506 + // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128. + _atomic_swap_swpp(dst, val, order); + } + #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] Ordering::SeqCst => atomic_store!("dmb ish", "dmb ish"), _ => unreachable!("{:?}", order), } } } +// Do not use _atomic_swap_ldxp_stxp because it needs extra registers to implement store. +#[cfg(any( + test, + not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )) +))] +#[inline] +unsafe fn _atomic_store_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let val = U128 { whole: val }; + macro_rules! store { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + "2:", + concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"), + concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"), + // 0 if the store was successful, 1 if no store was performed + "cbnz {tmp:w}, 2b", + $fence, + dst = in(reg) ptr_reg!(dst), + val_lo = in(reg) val.pair.lo, + val_hi = in(reg) val.pair.hi, + tmp = out(reg) _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(store, order); + } +} #[inline] unsafe fn atomic_compare_exchange( @@ -402,7 +803,7 @@ unsafe fn atomic_compare_exchange( #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. - let res = unsafe { _atomic_compare_exchange_casp(dst, old, new, success, failure) }; + let prev = unsafe { _atomic_compare_exchange_casp(dst, old, new, success, failure) }; #[cfg(not(all( not(portable_atomic_no_outline_atomics), any( @@ -419,6 +820,7 @@ unsafe fn atomic_compare_exchange( ), target_os = "android", target_os = "freebsd", + target_os = "netbsd", target_os = "openbsd", target_os = "fuchsia", target_os = "windows", @@ -426,7 +828,7 @@ unsafe fn atomic_compare_exchange( )))] #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] // SAFETY: the caller must uphold the safety contract. - let res = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success, failure) }; + let prev = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success, failure) }; #[cfg(all( not(portable_atomic_no_outline_atomics), any( @@ -443,15 +845,18 @@ unsafe fn atomic_compare_exchange( ), target_os = "android", target_os = "freebsd", + target_os = "netbsd", target_os = "openbsd", target_os = "fuchsia", target_os = "windows", ), ))] #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] - let res = { + let prev = { fn_alias! { - #[target_feature(enable = "lse")] + // inline(never) is just a hint and also not strictly necessary + // because we use ifunc helper macro, but used for clarity. + #[inline(never)] unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128; atomic_compare_exchange_casp_relaxed = _atomic_compare_exchange_casp(Ordering::Relaxed, Ordering::Relaxed); @@ -549,10 +954,10 @@ unsafe fn atomic_compare_exchange( } } }; - if res == old { - Ok(res) + if prev == old { + Ok(prev) } else { - Err(res) + Err(prev) } } #[cfg(any( @@ -560,10 +965,6 @@ unsafe fn atomic_compare_exchange( portable_atomic_target_feature = "lse", not(portable_atomic_no_outline_atomics), ))] -#[cfg_attr( - not(any(target_feature = "lse", portable_atomic_target_feature = "lse")), - target_feature(enable = "lse") -)] #[inline] unsafe fn _atomic_compare_exchange_casp( dst: *mut u128, @@ -590,6 +991,7 @@ unsafe fn _atomic_compare_exchange_casp( macro_rules! cmpxchg { ($acquire:tt, $release:tt, $fence:tt) => { asm!( + start_lse!(), concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"), $fence, dst = in(reg) ptr_reg!(dst), @@ -641,13 +1043,13 @@ unsafe fn _atomic_compare_exchange_ldxp_stxp( ($acquire:tt, $release:tt, $fence:tt) => { asm!( "2:", - concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{dst}]"), - "cmp {out_lo}, {old_lo}", + concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"), + "cmp {prev_lo}, {old_lo}", "cset {r:w}, ne", - "cmp {out_hi}, {old_hi}", + "cmp {prev_hi}, {old_hi}", "cinc {r:w}, {r:w}, ne", "cbz {r:w}, 3f", - concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{dst}]"), + concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{dst}]"), // 0 if the store was successful, 1 if no store was performed "cbnz {r:w}, 2b", "b 4f", @@ -662,8 +1064,8 @@ unsafe fn _atomic_compare_exchange_ldxp_stxp( old_hi = in(reg) old.pair.hi, new_lo = in(reg) new.pair.lo, new_hi = in(reg) new.pair.hi, - out_lo = out(reg) prev_lo, - out_hi = out(reg) prev_hi, + prev_lo = out(reg) prev_lo, + prev_hi = out(reg) prev_hi, r = out(reg) _, // Do not use `preserves_flags` because CMP modifies the condition flags. options(nostack), @@ -682,24 +1084,52 @@ use self::atomic_compare_exchange as atomic_compare_exchange_weak; // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, // we use CAS-based atomic RMW. +#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] #[cfg(all( any(target_feature = "lse", portable_atomic_target_feature = "lse"), not(portable_atomic_ll_sc_rmw), ))] use _atomic_swap_casp as atomic_swap; +#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] #[cfg(not(all( any(target_feature = "lse", portable_atomic_target_feature = "lse"), not(portable_atomic_ll_sc_rmw), )))] use _atomic_swap_ldxp_stxp as atomic_swap; +#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] +use _atomic_swap_swpp as atomic_swap; +#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] +#[inline] +unsafe fn _atomic_swap_swpp(dst: *mut u128, val: u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, that there are no concurrent non-atomic operations, + // and the CPU supports FEAT_LSE128. + // + // Refs: + // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/SWPP--SWPPA--SWPPAL--SWPPL--Swap-quadword-in-memory-?lang=en + unsafe { + let val = U128 { whole: val }; + let (prev_lo, prev_hi); + macro_rules! swap { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), + $fence, + dst = in(reg) ptr_reg!(dst), + val_lo = inout(reg) val.pair.lo => prev_lo, + val_hi = inout(reg) val.pair.hi => prev_hi, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(swap, order); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} // Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap. -#[cfg(any( - test, - all( - any(target_feature = "lse", portable_atomic_target_feature = "lse"), - not(portable_atomic_ll_sc_rmw), - ) -))] +#[cfg(any(test, not(portable_atomic_ll_sc_rmw)))] #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] #[inline] unsafe fn _atomic_swap_casp(dst: *mut u128, val: u128, order: Ordering) -> u128 { @@ -714,6 +1144,7 @@ unsafe fn _atomic_swap_casp(dst: *mut u128, val: u128, order: Ordering) -> u128 macro_rules! swap { ($acquire:tt, $release:tt, $fence:tt) => { asm!( + start_lse!(), // If FEAT_LSE2 is not supported, this works like byte-wise atomic. // This is not single-copy atomic reads, but this is ok because subsequent // CAS will check for consistency. @@ -792,7 +1223,7 @@ unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> /// `$op` can use the following registers: /// - val_lo/val_hi pair: val argument (read-only for `$op`) /// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`) -/// - new_lo/new_hi pair: new value that will to stored by sc +/// - new_lo/new_hi pair: new value that will be stored by sc macro_rules! atomic_rmw_ll_sc_3 { ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => { // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, @@ -850,7 +1281,7 @@ macro_rules! atomic_rmw_ll_sc_3 { /// `$op` can use the following registers: /// - val_lo/val_hi pair: val argument (read-only for `$op`) /// - x6/x7 pair: previous value loaded (read-only for `$op`) -/// - x4/x5 pair: new value that will to stored +/// - x4/x5 pair: new value that will be stored macro_rules! atomic_rmw_cas_3 { ($name:ident as $reexport_name:ident, $($op:tt)*) => { // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set, @@ -860,13 +1291,7 @@ macro_rules! atomic_rmw_cas_3 { not(portable_atomic_ll_sc_rmw), ))] use $name as $reexport_name; - #[cfg(any( - test, - all( - any(target_feature = "lse", portable_atomic_target_feature = "lse"), - not(portable_atomic_ll_sc_rmw), - ) - ))] + #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))] #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] #[inline] unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { @@ -880,6 +1305,7 @@ macro_rules! atomic_rmw_cas_3 { macro_rules! op { ($acquire:tt, $release:tt, $fence:tt) => { asm!( + start_lse!(), // If FEAT_LSE2 is not supported, this works like byte-wise atomic. // This is not single-copy atomic reads, but this is ok because subsequent // CAS will check for consistency. @@ -923,7 +1349,7 @@ macro_rules! atomic_rmw_cas_3 { /// /// `$op` can use the following registers: /// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`) -/// - new_lo/new_hi pair: new value that will to stored by sc +/// - new_lo/new_hi pair: new value that will be stored by sc macro_rules! atomic_rmw_ll_sc_2 { ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => { // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set, @@ -977,7 +1403,7 @@ macro_rules! atomic_rmw_ll_sc_2 { /// /// `$op` can use the following registers: /// - x6/x7 pair: previous value loaded (read-only for `$op`) -/// - x4/x5 pair: new value that will to stored +/// - x4/x5 pair: new value that will be stored macro_rules! atomic_rmw_cas_2 { ($name:ident as $reexport_name:ident, $($op:tt)*) => { // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set, @@ -987,13 +1413,7 @@ macro_rules! atomic_rmw_cas_2 { not(portable_atomic_ll_sc_rmw), ))] use $name as $reexport_name; - #[cfg(any( - test, - all( - any(target_feature = "lse", portable_atomic_target_feature = "lse"), - not(portable_atomic_ll_sc_rmw), - ) - ))] + #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))] #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] #[inline] unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { @@ -1006,6 +1426,7 @@ macro_rules! atomic_rmw_cas_2 { macro_rules! op { ($acquire:tt, $release:tt, $fence:tt) => { asm!( + start_lse!(), // If FEAT_LSE2 is not supported, this works like byte-wise atomic. // This is not single-copy atomic reads, but this is ok because subsequent // CAS will check for consistency. @@ -1066,16 +1487,48 @@ atomic_rmw_cas_3! { select_le_or_be!("sbc x5, x7, {val_hi}", "sbc x4, x6, {val_lo}"), } +#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] atomic_rmw_ll_sc_3! { _atomic_and_ldxp_stxp as atomic_and (preserves_flags), "and {new_lo}, {prev_lo}, {val_lo}", "and {new_hi}, {prev_hi}, {val_hi}", } +#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] atomic_rmw_cas_3! { _atomic_and_casp as atomic_and, "and x4, x6, {val_lo}", "and x5, x7, {val_hi}", } +#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] +#[inline] +unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, that there are no concurrent non-atomic operations, + // and the CPU supports FEAT_LSE128. + // + // Refs: + // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDCLRP--LDCLRPA--LDCLRPAL--LDCLRPL--Atomic-bit-clear-on-quadword-in-memory-?lang=en + unsafe { + let val = U128 { whole: !val }; + let (prev_lo, prev_hi); + macro_rules! and { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + concat!("ldclrp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), + $fence, + dst = in(reg) ptr_reg!(dst), + val_lo = inout(reg) val.pair.lo => prev_lo, + val_hi = inout(reg) val.pair.hi => prev_hi, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(and, order); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} atomic_rmw_ll_sc_3! { _atomic_nand_ldxp_stxp as atomic_nand (preserves_flags), @@ -1092,16 +1545,48 @@ atomic_rmw_cas_3! { "mvn x5, x5", } +#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] atomic_rmw_ll_sc_3! { _atomic_or_ldxp_stxp as atomic_or (preserves_flags), "orr {new_lo}, {prev_lo}, {val_lo}", "orr {new_hi}, {prev_hi}, {val_hi}", } +#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))] atomic_rmw_cas_3! { _atomic_or_casp as atomic_or, "orr x4, x6, {val_lo}", "orr x5, x7, {val_hi}", } +#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))] +#[inline] +unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 { + debug_assert!(dst as usize % 16 == 0); + + // SAFETY: the caller must guarantee that `dst` is valid for both writes and + // reads, 16-byte aligned, that there are no concurrent non-atomic operations, + // and the CPU supports FEAT_LSE128. + // + // Refs: + // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDSETP--LDSETPA--LDSETPAL--LDSETPL--Atomic-bit-set-on-quadword-in-memory-?lang=en + unsafe { + let val = U128 { whole: val }; + let (prev_lo, prev_hi); + macro_rules! or { + ($acquire:tt, $release:tt, $fence:tt) => { + asm!( + concat!("ldsetp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"), + $fence, + dst = in(reg) ptr_reg!(dst), + val_lo = inout(reg) val.pair.lo => prev_lo, + val_hi = inout(reg) val.pair.hi => prev_hi, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(or, order); + U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + } +} atomic_rmw_ll_sc_3! { _atomic_xor_ldxp_stxp as atomic_xor (preserves_flags), diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs index f32c38837..4cbdb51ff 100644 --- a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs +++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_aa64reg.rs @@ -1,6 +1,8 @@ -// Run-time feature detection on aarch64 Linux/FreeBSD/OpenBSD by parsing system registers. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Run-time feature detection on aarch64 Linux/FreeBSD/NetBSD/OpenBSD by parsing system registers. // -// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on OpenBSD. +// As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on NetBSD/OpenBSD. // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/mod.rs // https://github.com/rust-lang/stdarch/pull/1374 // @@ -14,10 +16,12 @@ // https://github.com/torvalds/linux/commit/77c97b4ee21290f5f083173d957843b615abbff2 // - FreeBSD 12.0+ (emulate mrs instruction) // https://github.com/freebsd/freebsd-src/commit/398810619cb32abf349f8de23f29510b2ee0839b +// - NetBSD 9.0+ (through sysctl) +// https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb // - OpenBSD 7.1+ (through sysctl) // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 // -// For now, this module is only used on OpenBSD. +// For now, this module is only used on NetBSD/OpenBSD. // On Linux/FreeBSD, this module is test-only: // - On Linux, this approach requires a higher kernel version than Rust supports, // and also does not work with qemu-user (as of QEMU 7.2) and Valgrind. @@ -28,11 +32,14 @@ include!("common.rs"); +#[cfg_attr(test, derive(Debug, PartialEq))] struct AA64Reg { aa64isar0: u64, #[cfg(test)] aa64isar1: u64, - #[cfg(test)] + // OpenBSD has an API to get this, but currently always returns 0. + // https://github.com/openbsd/src/blob/6a233889798dc3ecb18acc52dce1e57862af2957/sys/arch/arm64/arm64/machdep.c#L371-L377 + #[cfg_attr(target_os = "openbsd", cfg(test))] aa64mmfr2: u64, } @@ -42,7 +49,7 @@ fn _detect(info: &mut CpuInfo) { aa64isar0, #[cfg(test)] aa64isar1, - #[cfg(test)] + #[cfg_attr(target_os = "openbsd", cfg(test))] aa64mmfr2, } = imp::aa64reg(); @@ -51,7 +58,7 @@ fn _detect(info: &mut CpuInfo) { let atomic = extract(aa64isar0, 23, 20); if atomic >= 2 { info.set(CpuInfo::HAS_LSE); - // we currently only use FEAT_LSE in outline-atomics. + // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics. #[cfg(test)] { if atomic >= 3 { @@ -59,7 +66,7 @@ fn _detect(info: &mut CpuInfo) { } } } - // we currently only use FEAT_LSE in outline-atomics. + // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics. #[cfg(test)] { // ID_AA64ISAR1_EL1, Instruction Set Attribute Register 1 @@ -67,6 +74,11 @@ fn _detect(info: &mut CpuInfo) { if extract(aa64isar1, 23, 20) >= 3 { info.set(CpuInfo::HAS_RCPC3); } + } + // OpenBSD has an API to get this, but currently always returns 0. + // https://github.com/openbsd/src/blob/6a233889798dc3ecb18acc52dce1e57862af2957/sys/arch/arm64/arm64/machdep.c#L371-L377 + #[cfg_attr(target_os = "openbsd", cfg(test))] + { // ID_AA64MMFR2_EL1, AArch64 Memory Model Feature Register 2 // https://developer.arm.com/documentation/ddi0601/2023-06/AArch64-Registers/ID-AA64MMFR2-EL1--AArch64-Memory-Model-Feature-Register-2?lang=en if extract(aa64mmfr2, 35, 32) >= 1 { @@ -79,7 +91,7 @@ fn extract(x: u64, high: usize, low: usize) -> u64 { (x >> low) & ((1 << (high - low + 1)) - 1) } -#[cfg(not(target_os = "openbsd"))] +#[cfg(not(any(target_os = "netbsd", target_os = "openbsd")))] mod imp { // This module is test-only. See parent module docs for details. @@ -95,7 +107,7 @@ mod imp { unsafe { let aa64isar0: u64; asm!( - "mrs {}, ID_AA64ISAR0_EL1", + "mrs {0}, ID_AA64ISAR0_EL1", out(reg) aa64isar0, options(pure, nomem, nostack, preserves_flags) ); @@ -104,31 +116,140 @@ mod imp { #[cfg(test)] { asm!( - "mrs {}, ID_AA64ISAR1_EL1", + "mrs {0}, ID_AA64ISAR1_EL1", out(reg) aa64isar1, options(pure, nomem, nostack, preserves_flags) ); } - #[cfg(test)] let aa64mmfr2: u64; - #[cfg(test)] - { - asm!( - "mrs {}, ID_AA64MMFR2_EL1", - out(reg) aa64mmfr2, - options(pure, nomem, nostack, preserves_flags) - ); - } + asm!( + "mrs {0}, ID_AA64MMFR2_EL1", + out(reg) aa64mmfr2, + options(pure, nomem, nostack, preserves_flags) + ); AA64Reg { aa64isar0, #[cfg(test)] aa64isar1, - #[cfg(test)] aa64mmfr2, } } } } +#[cfg(target_os = "netbsd")] +mod imp { + // NetBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl. + // https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb + // https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f + + use core::ptr; + + use super::AA64Reg; + + // core::ffi::c_* (except c_void) requires Rust 1.64, libc will soon require Rust 1.47 + #[allow(non_camel_case_types)] + pub(super) mod ffi { + pub(crate) use super::super::c_types::{c_char, c_int, c_size_t, c_void}; + + extern "C" { + // Defined in sys/sysctl.h. + // https://man.netbsd.org/sysctl.3 + // https://github.com/NetBSD/src/blob/167403557cf60bed09a63fc84d941a1a4bd7d52e/sys/sys/sysctl.h + // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/bsd/netbsdlike/netbsd/mod.rs#L2582 + pub(crate) fn sysctlbyname( + name: *const c_char, + old_p: *mut c_void, + old_len_p: *mut c_size_t, + new_p: *const c_void, + new_len: c_size_t, + ) -> c_int; + } + + // Defined in aarch64/armreg.h. + // https://github.com/NetBSD/src/blob/167403557cf60bed09a63fc84d941a1a4bd7d52e/sys/arch/aarch64/include/armreg.h#L1626 + #[derive(Clone, Copy)] + #[repr(C)] + pub(crate) struct aarch64_sysctl_cpu_id { + // NetBSD 9.0+ + // https://github.com/NetBSD/src/commit/0e9d25528729f7fea53e78275d1bc5039dfe8ffb + pub(crate) midr: u64, + pub(crate) revidr: u64, + pub(crate) mpidr: u64, + pub(crate) aa64dfr0: u64, + pub(crate) aa64dfr1: u64, + pub(crate) aa64isar0: u64, + pub(crate) aa64isar1: u64, + pub(crate) aa64mmfr0: u64, + pub(crate) aa64mmfr1: u64, + pub(crate) aa64mmfr2: u64, + pub(crate) aa64pfr0: u64, + pub(crate) aa64pfr1: u64, + pub(crate) aa64zfr0: u64, + pub(crate) mvfr0: u32, + pub(crate) mvfr1: u32, + pub(crate) mvfr2: u32, + // NetBSD 10.0+ + // https://github.com/NetBSD/src/commit/0c7bdc13f0e332cccec56e307f023b4888638973 + pub(crate) pad: u32, + pub(crate) clidr: u64, + pub(crate) ctr: u64, + } + } + + pub(super) unsafe fn sysctl_cpu_id(name: &[u8]) -> Option<AA64Reg> { + const OUT_LEN: ffi::c_size_t = + core::mem::size_of::<ffi::aarch64_sysctl_cpu_id>() as ffi::c_size_t; + + debug_assert_eq!(name.last(), Some(&0), "{:?}", name); + debug_assert_eq!(name.iter().filter(|&&v| v == 0).count(), 1, "{:?}", name); + + // SAFETY: all fields of aarch64_sysctl_cpu_id are zero-able and we use + // the result when machdep.cpuN.cpu_id sysctl was successful. + let mut buf: ffi::aarch64_sysctl_cpu_id = unsafe { core::mem::zeroed() }; + let mut out_len = OUT_LEN; + // SAFETY: + // - the caller must guarantee that `name` is ` machdep.cpuN.cpu_id` in a C string. + // - `out_len` does not exceed the size of the value at `buf`. + // - `sysctlbyname` is thread-safe. + let res = unsafe { + ffi::sysctlbyname( + name.as_ptr().cast::<ffi::c_char>(), + (&mut buf as *mut ffi::aarch64_sysctl_cpu_id).cast::<ffi::c_void>(), + &mut out_len, + ptr::null_mut(), + 0, + ) + }; + if res != 0 { + return None; + } + Some(AA64Reg { + aa64isar0: buf.aa64isar0, + #[cfg(test)] + aa64isar1: buf.aa64isar1, + aa64mmfr2: buf.aa64mmfr2, + }) + } + + pub(super) fn aa64reg() -> AA64Reg { + // Get system registers for cpu0. + // If failed, returns default because machdep.cpuN.cpu_id sysctl is not available. + // machdep.cpuN.cpu_id sysctl was added on NetBSD 9.0 so it is not available on older versions. + // SAFETY: we passed a valid name in a C string. + // It is ok to check only cpu0, even if there are more CPUs. + // https://github.com/NetBSD/src/commit/bd9707e06ea7d21b5c24df6dfc14cb37c2819416 + // https://github.com/golang/sys/commit/ef9fd89ba245e184bdd308f7f2b4f3c551fa5b0f + match unsafe { sysctl_cpu_id(b"machdep.cpu0.cpu_id\0") } { + Some(cpu_id) => cpu_id, + None => AA64Reg { + aa64isar0: 0, + #[cfg(test)] + aa64isar1: 0, + aa64mmfr2: 0, + }, + } + } +} #[cfg(target_os = "openbsd")] mod imp { // OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl. @@ -152,6 +273,8 @@ mod imp { pub(crate) const CPU_ID_AA64ISAR0: c_int = 2; #[cfg(test)] pub(crate) const CPU_ID_AA64ISAR1: c_int = 3; + // OpenBSD has an API to get this, but currently always returns 0. + // https://github.com/openbsd/src/blob/6a233889798dc3ecb18acc52dce1e57862af2957/sys/arch/arm64/arm64/machdep.c#L371-L377 #[cfg(test)] pub(crate) const CPU_ID_AA64MMFR2: c_int = 7; @@ -276,6 +399,145 @@ mod tests { } } + #[allow(clippy::cast_possible_wrap)] + #[cfg(target_os = "netbsd")] + #[test] + fn test_netbsd() { + use c_types::*; + use core::{arch::asm, mem, ptr}; + use imp::ffi; + use test_helper::sys; + + // Call syscall using asm instead of libc. + // Note that NetBSD does not guarantee the stability of raw syscall as + // much as Linux does (It may actually be stable enough, though: https://lists.llvm.org/pipermail/llvm-dev/2019-June/133393.html). + // + // This is currently used only for testing. + unsafe fn sysctl_cpu_id_asm_syscall(name: &[&[u8]]) -> Result<AA64Reg, c_int> { + // https://github.com/golang/go/blob/4badad8d477ffd7a6b762c35bc69aed82faface7/src/syscall/asm_netbsd_arm64.s + #[inline] + unsafe fn sysctl( + name: *const c_int, + name_len: c_uint, + old_p: *mut c_void, + old_len_p: *mut c_size_t, + new_p: *const c_void, + new_len: c_size_t, + ) -> Result<c_int, c_int> { + #[allow(clippy::cast_possible_truncation)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut n = sys::SYS___sysctl as u64; + let r: i64; + asm!( + "svc 0", + "b.cc 2f", + "mov x17, x0", + "mov x0, #-1", + "2:", + inout("x17") n, + inout("x0") ptr_reg!(name) => r, + inout("x1") name_len as u64 => _, + in("x2") ptr_reg!(old_p), + in("x3") ptr_reg!(old_len_p), + in("x4") ptr_reg!(new_p), + in("x5") new_len as u64, + options(nostack), + ); + if r as c_int == -1 { + Err(n as c_int) + } else { + Ok(r as c_int) + } + } + } + + // https://github.com/golang/sys/blob/4badad8d477ffd7a6b762c35bc69aed82faface7/cpu/cpu_netbsd_arm64.go. + use std::{vec, vec::Vec}; + fn sysctl_nodes(mib: &mut Vec<i32>) -> Result<Vec<sys::sysctlnode>, i32> { + mib.push(sys::CTL_QUERY); + let mut q_node = sys::sysctlnode { + sysctl_flags: sys::SYSCTL_VERS_1, + ..unsafe { mem::zeroed() } + }; + let qp = (&mut q_node as *mut sys::sysctlnode).cast::<ffi::c_void>(); + let sz = mem::size_of::<sys::sysctlnode>(); + let mut olen = 0; + #[allow(clippy::cast_possible_truncation)] + unsafe { + sysctl(mib.as_ptr(), mib.len() as c_uint, ptr::null_mut(), &mut olen, qp, sz)?; + } + + let mut nodes = Vec::<sys::sysctlnode>::with_capacity(olen / sz); + let np = nodes.as_mut_ptr().cast::<ffi::c_void>(); + #[allow(clippy::cast_possible_truncation)] + unsafe { + sysctl(mib.as_ptr(), mib.len() as c_uint, np, &mut olen, qp, sz)?; + nodes.set_len(olen / sz); + } + + mib.pop(); // pop CTL_QUERY + Ok(nodes) + } + fn name_to_mib(parts: &[&[u8]]) -> Result<Vec<i32>, i32> { + let mut mib = vec![]; + for (part_no, &part) in parts.iter().enumerate() { + let nodes = sysctl_nodes(&mut mib)?; + for node in nodes { + let mut n = vec![]; + for b in node.sysctl_name { + if b != 0 { + n.push(b); + } + } + if n == part { + mib.push(node.sysctl_num); + break; + } + } + if mib.len() != part_no + 1 { + return Err(0); + } + } + + Ok(mib) + } + + const OUT_LEN: ffi::c_size_t = + core::mem::size_of::<ffi::aarch64_sysctl_cpu_id>() as ffi::c_size_t; + + let mib = name_to_mib(name)?; + + let mut buf: ffi::aarch64_sysctl_cpu_id = unsafe { core::mem::zeroed() }; + let mut out_len = OUT_LEN; + #[allow(clippy::cast_possible_truncation)] + unsafe { + sysctl( + mib.as_ptr(), + mib.len() as c_uint, + (&mut buf as *mut ffi::aarch64_sysctl_cpu_id).cast::<ffi::c_void>(), + &mut out_len, + ptr::null_mut(), + 0, + )?; + } + Ok(AA64Reg { + aa64isar0: buf.aa64isar0, + #[cfg(test)] + aa64isar1: buf.aa64isar1, + #[cfg(test)] + aa64mmfr2: buf.aa64mmfr2, + }) + } + + unsafe { + assert_eq!( + imp::sysctl_cpu_id(b"machdep.cpu0.cpu_id\0").unwrap(), + sysctl_cpu_id_asm_syscall(&[b"machdep", b"cpu0", b"cpu_id"]).unwrap() + ); + } + } + // Static assertions for FFI bindings. // This checks that FFI bindings defined in this crate, FFI bindings defined // in libc, and FFI bindings generated for the platform's latest header file @@ -285,6 +547,56 @@ mod tests { // without actually running tests on these platforms. // See also tools/codegen/src/ffi.rs. // TODO(codegen): auto-generate this test + #[cfg(target_os = "netbsd")] + #[allow( + clippy::cast_possible_wrap, + clippy::cast_sign_loss, + clippy::no_effect_underscore_binding, + clippy::used_underscore_binding + )] + const _: fn() = || { + use core::mem::size_of; + use imp::ffi; + use test_helper::{libc, sys}; + let mut _sysctlbyname: unsafe extern "C" fn( + *const ffi::c_char, + *mut ffi::c_void, + *mut ffi::c_size_t, + *const ffi::c_void, + ffi::c_size_t, + ) -> ffi::c_int = ffi::sysctlbyname; + _sysctlbyname = libc::sysctlbyname; + _sysctlbyname = sys::sysctlbyname; + // libc doesn't have this + // static_assert!( + // size_of::<ffi::aarch64_sysctl_cpu_id>() == size_of::<libc::aarch64_sysctl_cpu_id>() + // ); + static_assert!( + size_of::<ffi::aarch64_sysctl_cpu_id>() == size_of::<sys::aarch64_sysctl_cpu_id>() + ); + let ffi: ffi::aarch64_sysctl_cpu_id = unsafe { core::mem::zeroed() }; + let _ = sys::aarch64_sysctl_cpu_id { + ac_midr: ffi.midr, + ac_revidr: ffi.revidr, + ac_mpidr: ffi.mpidr, + ac_aa64dfr0: ffi.aa64dfr0, + ac_aa64dfr1: ffi.aa64dfr1, + ac_aa64isar0: ffi.aa64isar0, + ac_aa64isar1: ffi.aa64isar1, + ac_aa64mmfr0: ffi.aa64mmfr0, + ac_aa64mmfr1: ffi.aa64mmfr1, + ac_aa64mmfr2: ffi.aa64mmfr2, + ac_aa64pfr0: ffi.aa64pfr0, + ac_aa64pfr1: ffi.aa64pfr1, + ac_aa64zfr0: ffi.aa64zfr0, + ac_mvfr0: ffi.mvfr0, + ac_mvfr1: ffi.mvfr1, + ac_mvfr2: ffi.mvfr2, + ac_pad: ffi.pad, + ac_clidr: ffi.clidr, + ac_ctr: ffi.ctr, + }; + }; #[cfg(target_os = "openbsd")] #[allow( clippy::cast_possible_wrap, diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs index 69aa74ebd..978418c5b 100644 --- a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs +++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_fuchsia.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Run-time feature detection on aarch64 Fuchsia by using zx_system_get_features. // // As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection on Fuchsia. diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs index 0bf0e6b0f..d6bf9d002 100644 --- a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs +++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_macos.rs @@ -1,10 +1,12 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Run-time feature detection on aarch64 macOS by using sysctl. // // This module is currently only enabled on tests because aarch64 macOS always supports FEAT_LSE and FEAT_LSE2. -// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L458 +// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/include/llvm/TargetParser/AArch64TargetParser.h#L494 // // If macOS supporting Armv9.4-a becomes popular in the future, this module will -// be used to support outline atomics for FEAT_LSE128/FEAT_LRCPC3. +// be used to support outline-atomics for FEAT_LSE128/FEAT_LRCPC3. // // Refs: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics // @@ -77,14 +79,14 @@ fn _detect(info: &mut CpuInfo) { } { info.set(CpuInfo::HAS_LSE); } - // we currently only use FEAT_LSE in outline-atomics. + // SAFETY: we passed a valid C string. + if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE2\0").unwrap_or(0) != 0 } { + info.set(CpuInfo::HAS_LSE2); + } + // we currently only use FEAT_LSE and FEAT_LSE2 in outline-atomics. #[cfg(test)] { // SAFETY: we passed a valid C string. - if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE2\0").unwrap_or(0) != 0 } { - info.set(CpuInfo::HAS_LSE2); - } - // SAFETY: we passed a valid C string. if unsafe { sysctlbyname32(b"hw.optional.arm.FEAT_LSE128\0").unwrap_or(0) != 0 } { info.set(CpuInfo::HAS_LSE128); } diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs index 6922ce4a7..e19c4b8b7 100644 --- a/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs +++ b/vendor/portable-atomic/src/imp/atomic128/detect/aarch64_windows.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Run-time feature detection on aarch64 Windows by using IsProcessorFeaturePresent. // // As of nightly-2023-01-23, is_aarch64_feature_detected doesn't support run-time detection of FEAT_LSE on Windows. diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs b/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs index a80350e47..a4455911b 100644 --- a/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs +++ b/vendor/portable-atomic/src/imp/atomic128/detect/auxv.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Run-time feature detection on aarch64/powerpc64 Linux/Android/FreeBSD by parsing ELF auxiliary vectors. // // # Linux/Android @@ -24,11 +26,13 @@ // // - On musl with static linking. See the above for more. // Also, in this case, dlsym(getauxval) always returns null. -// - On uClibc-ng (*-linux-uclibc*, *-l4re-uclibc*), [uClibc-ng 1.0.43 (released in 2023-04-05) added getauxval](https://repo.or.cz/uclibc-ng.git/commitdiff/d869bb1600942c01a77539128f9ba5b5b55ad647). +// - On uClibc-ng (*-linux-uclibc*, *-l4re-uclibc*), [uClibc-ng 1.0.43 (released in 2023-04-05) added getauxval](https://github.com/wbx-github/uclibc-ng/commit/d869bb1600942c01a77539128f9ba5b5b55ad647). // - On Picolibc, [Picolibc 1.4.6 added getauxval stub](https://github.com/picolibc/picolibc#picolibc-version-146). // // See also https://github.com/rust-lang/stdarch/pull/1375 // +// See tests::test_linux_like and aarch64_aa64reg.rs for (test-only) alternative implementations. +// // # FreeBSD // // As of nightly-2023-01-23, is_aarch64_feature_detected always uses mrs on @@ -47,6 +51,8 @@ // https://www.freebsd.org/security/unsupported // See also https://github.com/rust-lang/stdarch/pull/611#issuecomment-445464613 // +// See tests::test_freebsd and aarch64_aa64reg.rs for (test-only) alternative implementations. +// // # PowerPC64 // // On PowerPC64, outline-atomics is currently disabled by default mainly for @@ -69,7 +75,7 @@ mod os { // https://man7.org/linux/man-pages/man3/getauxval.3.html // https://github.com/bminor/glibc/blob/801af9fafd4689337ebf27260aa115335a0cb2bc/misc/sys/auxv.h // https://github.com/bminor/musl/blob/7d756e1c04de6eb3f2b3d3e1141a218bb329fcfb/include/sys/auxv.h - // https://repo.or.cz/uclibc-ng.git/blob/9d549d7bc6a1b78498ee8d1f39f6a324fdfc9e5d:/include/sys/auxv.h + // https://github.com/wbx-github/uclibc-ng/blob/cdb07d2cd52af39feb425e6d36c02b30916b9f0a/include/sys/auxv.h // https://github.com/aosp-mirror/platform_bionic/blob/d3ebc2f7c49a9893b114124d4a6b315f3a328764/libc/include/sys/auxv.h // https://github.com/picolibc/picolibc/blob/7a8a58aeaa5946cb662577a518051091b691af3a/newlib/libc/picolib/getauxval.c // https://github.com/rust-lang/libc/blob/0.2.139/src/unix/linux_like/linux/gnu/mod.rs#L1201 @@ -181,7 +187,6 @@ mod arch { // https://github.com/freebsd/freebsd-src/blob/release/13.0.0/sys/arm64/include/elf.h // https://github.com/freebsd/freebsd-src/blob/release/12.2.0/sys/arm64/include/elf.h pub(super) const HWCAP_ATOMICS: ffi::c_ulong = 1 << 8; - #[cfg(test)] pub(super) const HWCAP_USCAT: ffi::c_ulong = 1 << 25; #[cold] @@ -191,12 +196,8 @@ mod arch { if hwcap & HWCAP_ATOMICS != 0 { info.set(CpuInfo::HAS_LSE); } - // we currently only use FEAT_LSE in outline-atomics. - #[cfg(test)] - { - if hwcap & HWCAP_USCAT != 0 { - info.set(CpuInfo::HAS_LSE2); - } + if hwcap & HWCAP_USCAT != 0 { + info.set(CpuInfo::HAS_LSE2); } } } @@ -235,6 +236,120 @@ mod arch { mod tests { use super::*; + #[cfg(any(target_os = "linux", target_os = "android"))] + #[cfg(target_pointer_width = "64")] + #[test] + fn test_linux_like() { + use c_types::*; + use core::{arch::asm, mem}; + use std::vec; + use test_helper::{libc, sys}; + + // Linux kernel 6.4 has added a way to read auxv without depending on either libc or mrs trap. + // https://github.com/torvalds/linux/commit/ddc65971bb677aa9f6a4c21f76d3133e106f88eb + // + // This is currently used only for testing. + fn getauxval_pr_get_auxv(type_: ffi::c_ulong) -> Result<ffi::c_ulong, c_int> { + #[cfg(target_arch = "aarch64")] + unsafe fn prctl_get_auxv(out: *mut c_void, len: usize) -> Result<usize, c_int> { + let r: i64; + unsafe { + asm!( + "svc 0", + in("x8") sys::__NR_prctl as u64, + inout("x0") sys::PR_GET_AUXV as u64 => r, + in("x1") ptr_reg!(out), + in("x2") len as u64, + // arg4 and arg5 must be zero. + in("x3") 0_u64, + in("x4") 0_u64, + options(nostack, preserves_flags) + ); + } + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + if (r as c_int) < 0 { + Err(r as c_int) + } else { + Ok(r as usize) + } + } + #[cfg(target_arch = "powerpc64")] + unsafe fn prctl_get_auxv(out: *mut c_void, len: usize) -> Result<usize, c_int> { + let r: i64; + unsafe { + asm!( + "sc", + "bns+ 2f", + "neg %r3, %r3", + "2:", + inout("r0") sys::__NR_prctl as u64 => _, + inout("r3") sys::PR_GET_AUXV as u64 => r, + inout("r4") ptr_reg!(out) => _, + inout("r5") len as u64 => _, + // arg4 and arg5 must be zero. + inout("r6") 0_u64 => _, + inout("r7") 0_u64 => _, + out("r8") _, + out("r9") _, + out("r10") _, + out("r11") _, + out("r12") _, + out("cr0") _, + options(nostack, preserves_flags) + ); + } + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + if (r as c_int) < 0 { + Err(r as c_int) + } else { + Ok(r as usize) + } + } + + let mut auxv = vec![unsafe { mem::zeroed::<sys::Elf64_auxv_t>() }; 38]; + + let old_len = auxv.len() * mem::size_of::<sys::Elf64_auxv_t>(); + + // SAFETY: + // - `out_len` does not exceed the size of `auxv`. + let _len = unsafe { prctl_get_auxv(auxv.as_mut_ptr().cast::<c_void>(), old_len)? }; + + for aux in &auxv { + if aux.a_type == type_ { + // SAFETY: aux.a_un is #[repr(C)] union and all fields have + // the same size and can be safely transmuted to integers. + return Ok(unsafe { aux.a_un.a_val }); + } + } + Err(0) + } + + unsafe { + let mut u = mem::zeroed(); + assert_eq!(libc::uname(&mut u), 0); + let release = std::ffi::CStr::from_ptr(u.release.as_ptr()); + let release = core::str::from_utf8(release.to_bytes()).unwrap(); + let mut digits = release.split('.'); + let major = digits.next().unwrap().parse::<u32>().unwrap(); + let minor = digits.next().unwrap().parse::<u32>().unwrap(); + if (major, minor) < (6, 4) { + std::eprintln!("kernel version: {major}.{minor} (no pr_get_auxv)"); + assert_eq!(getauxval_pr_get_auxv(ffi::AT_HWCAP).unwrap_err(), -22); + assert_eq!(getauxval_pr_get_auxv(ffi::AT_HWCAP2).unwrap_err(), -22); + } else { + std::eprintln!("kernel version: {major}.{minor} (has pr_get_auxv)"); + assert_eq!( + os::getauxval(ffi::AT_HWCAP), + getauxval_pr_get_auxv(ffi::AT_HWCAP).unwrap() + ); + assert_eq!( + os::getauxval(ffi::AT_HWCAP2), + getauxval_pr_get_auxv(ffi::AT_HWCAP2).unwrap() + ); + } + } + } + #[allow(clippy::cast_sign_loss)] #[cfg(all(target_arch = "aarch64", target_os = "android"))] #[test] @@ -256,6 +371,262 @@ mod tests { } } + #[allow(clippy::cast_possible_wrap)] + #[cfg(target_os = "freebsd")] + #[test] + fn test_freebsd() { + use c_types::*; + use core::{arch::asm, mem, ptr}; + use test_helper::sys; + + // This is almost equivalent to what elf_aux_info does. + // https://man.freebsd.org/elf_aux_info(3) + // On FreeBSD, [aarch64 support is available on FreeBSD 11.0+](https://www.freebsd.org/releases/11.0R/relnotes/#hardware-arm), + // but elf_aux_info is available on FreeBSD 12.0+ and 11.4+: + // https://github.com/freebsd/freebsd-src/commit/0b08ae2120cdd08c20a2b806e2fcef4d0a36c470 + // https://github.com/freebsd/freebsd-src/blob/release/11.4.0/sys/sys/auxv.h + // so use sysctl instead of elf_aux_info. + // Note that FreeBSD 11 (11.4) was EoL on 2021-09-30, and FreeBSD 11.3 was EoL on 2020-09-30: + // https://www.freebsd.org/security/unsupported + // + // std_detect uses this way, but it appears to be somewhat incorrect + // (the type of arg4 of sysctl, auxv is smaller than AT_COUNT, etc.). + // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/freebsd/auxvec.rs#L52 + // + // This is currently used only for testing. + // If you want us to use this implementation for compatibility with the older FreeBSD + // version that came to EoL a few years ago, please open an issue. + fn getauxval_sysctl_libc(type_: ffi::c_int) -> ffi::c_ulong { + let mut auxv: [sys::Elf64_Auxinfo; sys::AT_COUNT as usize] = unsafe { mem::zeroed() }; + + let mut len = core::mem::size_of_val(&auxv) as c_size_t; + + // SAFETY: calling getpid is safe. + let pid = unsafe { sys::getpid() }; + let mib = [ + sys::CTL_KERN as c_int, + sys::KERN_PROC as c_int, + sys::KERN_PROC_AUXV as c_int, + pid, + ]; + + #[allow(clippy::cast_possible_truncation)] + // SAFETY: + // - `mib.len()` does not exceed the size of `mib`. + // - `len` does not exceed the size of `auxv`. + // - `sysctl` is thread-safe. + let res = unsafe { + sys::sysctl( + mib.as_ptr(), + mib.len() as c_uint, + auxv.as_mut_ptr().cast::<c_void>(), + &mut len, + ptr::null_mut(), + 0, + ) + }; + + if res != -1 { + for aux in &auxv { + if aux.a_type == type_ as c_long { + // SAFETY: aux.a_un is #[repr(C)] union and all fields have + // the same size and can be safely transmuted to integers. + return unsafe { aux.a_un.a_val as c_ulong }; + } + } + } + 0 + } + // Similar to the above, but call syscall using asm instead of libc. + // Note that FreeBSD does not guarantee the stability of raw syscall as + // much as Linux does (It may actually be stable enough, though: + // https://lists.llvm.org/pipermail/llvm-dev/2019-June/133393.html, + // https://github.com/ziglang/zig/issues/16590). + // + // This is currently used only for testing. + fn getauxval_sysctl_asm_syscall(type_: ffi::c_int) -> Result<ffi::c_ulong, c_int> { + #[allow(non_camel_case_types)] + type pid_t = c_int; + + // https://github.com/freebsd/freebsd-src/blob/9888a79adad22ba06b5aff17d05abac0029c537a/lib/libc/aarch64/SYS.h + // https://github.com/golang/go/blob/4badad8d477ffd7a6b762c35bc69aed82faface7/src/syscall/asm_freebsd_arm64.s + #[cfg(target_arch = "aarch64")] + #[inline] + fn getpid() -> pid_t { + #[allow(clippy::cast_possible_truncation)] + // SAFETY: calling getpid is safe. + unsafe { + let n = sys::SYS_getpid; + let r: i64; + asm!( + "svc 0", + in("x8") n as u64, + out("x0") r, + options(nostack, readonly), + ); + r as pid_t + } + } + #[cfg(target_arch = "aarch64")] + #[inline] + unsafe fn sysctl( + name: *const c_int, + name_len: c_uint, + old_p: *mut c_void, + old_len_p: *mut c_size_t, + new_p: *const c_void, + new_len: c_size_t, + ) -> Result<c_int, c_int> { + #[allow(clippy::cast_possible_truncation)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut n = sys::SYS___sysctl as u64; + let r: i64; + asm!( + "svc 0", + "b.cc 2f", + "mov x8, x0", + "mov x0, #-1", + "2:", + inout("x8") n, + inout("x0") ptr_reg!(name) => r, + inout("x1") name_len as u64 => _, + in("x2") ptr_reg!(old_p), + in("x3") ptr_reg!(old_len_p), + in("x4") ptr_reg!(new_p), + in("x5") new_len as u64, + options(nostack), + ); + if r as c_int == -1 { + Err(n as c_int) + } else { + Ok(r as c_int) + } + } + } + + // https://github.com/freebsd/freebsd-src/blob/9888a79adad22ba06b5aff17d05abac0029c537a/lib/libc/powerpc64/SYS.h + #[cfg(target_arch = "powerpc64")] + #[inline] + fn getpid() -> pid_t { + #[allow(clippy::cast_possible_truncation)] + // SAFETY: calling getpid is safe. + unsafe { + let n = sys::SYS_getpid; + let r: i64; + asm!( + "sc", + inout("r0") n as u64 => _, + out("r3") r, + out("r4") _, + out("r5") _, + out("r6") _, + out("r7") _, + out("r8") _, + out("r9") _, + out("r10") _, + out("r11") _, + out("r12") _, + out("cr0") _, + options(nostack, preserves_flags, readonly), + ); + r as pid_t + } + } + #[cfg(target_arch = "powerpc64")] + #[inline] + unsafe fn sysctl( + name: *const c_int, + name_len: c_uint, + old_p: *mut c_void, + old_len_p: *mut c_size_t, + new_p: *const c_void, + new_len: c_size_t, + ) -> Result<c_int, c_int> { + #[allow(clippy::cast_possible_truncation)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut n = sys::SYS___sysctl as u64; + let r: i64; + asm!( + "sc", + "bns+ 2f", + "mr %r0, %r3", + "li %r3, -1", + "2:", + inout("r0") n, + inout("r3") ptr_reg!(name) => r, + inout("r4") name_len as u64 => _, + inout("r5") ptr_reg!(old_p) => _, + inout("r6") ptr_reg!(old_len_p) => _, + inout("r7") ptr_reg!(new_p) => _, + inout("r8") new_len as u64 => _, + out("r9") _, + out("r10") _, + out("r11") _, + out("r12") _, + out("cr0") _, + options(nostack, preserves_flags) + ); + if r as c_int == -1 { + Err(n as c_int) + } else { + Ok(r as c_int) + } + } + } + + let mut auxv: [sys::Elf64_Auxinfo; sys::AT_COUNT as usize] = unsafe { mem::zeroed() }; + + let mut len = core::mem::size_of_val(&auxv) as c_size_t; + + let pid = getpid(); + let mib = [ + sys::CTL_KERN as c_int, + sys::KERN_PROC as c_int, + sys::KERN_PROC_AUXV as c_int, + pid, + ]; + + #[allow(clippy::cast_possible_truncation)] + // SAFETY: + // - `mib.len()` does not exceed the size of `mib`. + // - `len` does not exceed the size of `auxv`. + // - `sysctl` is thread-safe. + unsafe { + sysctl( + mib.as_ptr(), + mib.len() as c_uint, + auxv.as_mut_ptr().cast::<c_void>(), + &mut len, + ptr::null_mut(), + 0, + )?; + } + + for aux in &auxv { + if aux.a_type == type_ as c_long { + // SAFETY: aux.a_un is #[repr(C)] union and all fields have + // the same size and can be safely transmuted to integers. + return Ok(unsafe { aux.a_un.a_val as c_ulong }); + } + } + Err(0) + } + + assert_eq!(os::getauxval(ffi::AT_HWCAP), getauxval_sysctl_libc(ffi::AT_HWCAP)); + assert_eq!(os::getauxval(ffi::AT_HWCAP2), getauxval_sysctl_libc(ffi::AT_HWCAP2)); + assert_eq!( + os::getauxval(ffi::AT_HWCAP), + getauxval_sysctl_asm_syscall(ffi::AT_HWCAP).unwrap() + ); + assert_eq!( + os::getauxval(ffi::AT_HWCAP2), + // AT_HWCAP2 is only available on FreeBSD 13+, at least for aarch64. + getauxval_sysctl_asm_syscall(ffi::AT_HWCAP2).unwrap_or(0) + ); + } + // Static assertions for FFI bindings. // This checks that FFI bindings defined in this crate, FFI bindings defined // in libc, and FFI bindings generated for the platform's latest header file @@ -273,14 +644,15 @@ mod tests { )] const _: fn() = || { use test_helper::{libc, sys}; + #[cfg(not(target_os = "freebsd"))] + type AtType = ffi::c_ulong; + #[cfg(target_os = "freebsd")] + type AtType = ffi::c_int; #[cfg(any(target_os = "linux", target_os = "android"))] { let mut _getauxval: unsafe extern "C" fn(ffi::c_ulong) -> ffi::c_ulong = ffi::getauxval; _getauxval = libc::getauxval; - #[cfg(any(target_env = "musl", target_os = "android"))] // TODO(codegen) - { - _getauxval = sys::getauxval; - } + _getauxval = sys::getauxval; } #[cfg(all(target_arch = "aarch64", target_os = "android"))] { @@ -291,7 +663,7 @@ mod tests { ___system_property_get = libc::__system_property_get; ___system_property_get = sys::__system_property_get; static_assert!(ffi::PROP_VALUE_MAX == libc::PROP_VALUE_MAX); - static_assert!(ffi::PROP_VALUE_MAX == sys::PROP_VALUE_MAX as _); + static_assert!(ffi::PROP_VALUE_MAX == sys::PROP_VALUE_MAX as ffi::c_int); } #[cfg(target_os = "freebsd")] { @@ -305,10 +677,10 @@ mod tests { } #[cfg(not(target_os = "freebsd"))] // libc doesn't have this on FreeBSD static_assert!(ffi::AT_HWCAP == libc::AT_HWCAP); - static_assert!(ffi::AT_HWCAP == sys::AT_HWCAP as _); + static_assert!(ffi::AT_HWCAP == sys::AT_HWCAP as AtType); #[cfg(not(target_os = "freebsd"))] // libc doesn't have this on FreeBSD static_assert!(ffi::AT_HWCAP2 == libc::AT_HWCAP2); - static_assert!(ffi::AT_HWCAP2 == sys::AT_HWCAP2 as _); + static_assert!(ffi::AT_HWCAP2 == sys::AT_HWCAP2 as AtType); #[cfg(target_arch = "aarch64")] { // static_assert!(arch::HWCAP_ATOMICS == libc::HWCAP_ATOMICS); // libc doesn't have this diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/common.rs b/vendor/portable-atomic/src/imp/atomic128/detect/common.rs index 504718718..b87caa351 100644 --- a/vendor/portable-atomic/src/imp/atomic128/detect/common.rs +++ b/vendor/portable-atomic/src/imp/atomic128/detect/common.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + #[derive(Clone, Copy)] pub(crate) struct CpuInfo(u32); @@ -48,8 +50,7 @@ impl CpuInfo { /// Whether FEAT_LSE is available const HAS_LSE: u32 = 1; /// Whether FEAT_LSE2 is available - // This is currently only used in tests. - #[cfg(test)] + #[cfg_attr(not(test), allow(dead_code))] const HAS_LSE2: u32 = 2; /// Whether FEAT_LSE128 is available // This is currently only used in tests. @@ -65,6 +66,22 @@ impl CpuInfo { pub(crate) fn has_lse(self) -> bool { self.test(CpuInfo::HAS_LSE) } + #[cfg_attr(not(test), allow(dead_code))] + #[cfg(any(test, not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))))] + #[inline] + pub(crate) fn has_lse2(self) -> bool { + self.test(CpuInfo::HAS_LSE2) + } + #[cfg(test)] + #[inline] + pub(crate) fn has_lse128(self) -> bool { + self.test(CpuInfo::HAS_LSE128) + } + #[cfg(test)] + #[inline] + pub(crate) fn has_rcpc3(self) -> bool { + self.test(CpuInfo::HAS_RCPC3) + } } #[cfg(target_arch = "x86_64")] @@ -128,12 +145,17 @@ mod c_types { pub(crate) type c_ulong = u64; #[cfg(not(target_pointer_width = "64"))] pub(crate) type c_ulong = u32; - // c_size_t is usize + // c_size_t is currently always usize // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L88 pub(crate) type c_size_t = usize; - // c_char is u8 on most non-Apple/non-Windows ARM/PowerPC/RISC-V targets - // (Linux/Android/FreeBSD/NetBSD/OpenBSD/VxWorks/Fuchsia/QNX Neutrino/Horizon) + // c_char is u8 by default on most non-Apple/non-Windows ARM/PowerPC/RISC-V/s390x/Hexagon targets + // (Linux/Android/FreeBSD/NetBSD/OpenBSD/VxWorks/Fuchsia/QNX Neutrino/Horizon/AIX/z/OS) // https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/ffi/mod.rs#L104 + // https://github.com/llvm/llvm-project/blob/9734b2256d89cb4c61a4dbf4a3c3f3f942fe9b8c/lldb/source/Utility/ArchSpec.cpp#L712 + // RISC-V https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/HEAD/riscv-cc.adoc#cc-type-representations + // Hexagon https://lists.llvm.org/pipermail/llvm-dev/attachments/20190916/21516a52/attachment-0001.pdf + // AIX https://www.ibm.com/docs/en/xl-c-aix/13.1.2?topic=descriptions-qchars + // z/OS https://www.ibm.com/docs/en/zos/2.5.0?topic=specifiers-character-types // (macOS is currently the only Apple target that uses this module, and Windows currently doesn't use this module) #[cfg(not(target_os = "macos"))] pub(crate) type c_char = u8; @@ -328,7 +350,7 @@ mod tests_common { assert!(!proc_cpuinfo.lse); } } - if detect().test(CpuInfo::HAS_LSE2) { + if detect().has_lse2() { assert!(detect().test(CpuInfo::HAS_LSE)); assert!(detect().test(CpuInfo::HAS_LSE2)); if let Ok(test_helper::cpuinfo::ProcCpuinfo { lse2: Some(lse2), .. }) = proc_cpuinfo { @@ -340,14 +362,14 @@ mod tests_common { assert!(!lse2); } } - if detect().test(CpuInfo::HAS_LSE128) { + if detect().has_lse128() { assert!(detect().test(CpuInfo::HAS_LSE)); assert!(detect().test(CpuInfo::HAS_LSE2)); assert!(detect().test(CpuInfo::HAS_LSE128)); } else { assert!(!detect().test(CpuInfo::HAS_LSE128)); } - if detect().test(CpuInfo::HAS_RCPC3) { + if detect().has_rcpc3() { assert!(detect().test(CpuInfo::HAS_RCPC3)); } else { assert!(!detect().test(CpuInfo::HAS_RCPC3)); diff --git a/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs b/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs index d162d6599..80eefed53 100644 --- a/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs +++ b/vendor/portable-atomic/src/imp/atomic128/detect/x86_64.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Adapted from https://github.com/rust-lang/stdarch. #![cfg_attr(any(not(target_feature = "sse"), portable_atomic_sanitize_thread), allow(dead_code))] @@ -5,13 +7,13 @@ // Miri doesn't support inline assembly used in __cpuid: https://github.com/rust-lang/miri/issues/932 // SGX doesn't support CPUID: https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L105 #[cfg(any(target_env = "sgx", miri))] -compile_error!("internal error: this module is not supported on this target"); +compile_error!("internal error: this module is not supported on this environment"); include!("common.rs"); #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; -use core::arch::x86_64::CpuidResult; +use core::arch::x86_64::{CpuidResult, _xgetbv}; // Workaround for https://github.com/rust-lang/rust/issues/101346 // It is not clear if our use cases are affected, but we implement this just in case. @@ -57,38 +59,31 @@ unsafe fn _vendor_id() -> [u8; 12] { #[cold] fn _detect(info: &mut CpuInfo) { - // Miri doesn't support inline assembly used in __cpuid - // SGX doesn't support CPUID: https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/core_arch/src/x86/cpuid.rs#L102-L105 - #[cfg(not(any(target_env = "sgx", miri)))] - { - use core::arch::x86_64::_xgetbv; - - // SAFETY: Calling `_vendor_id`` is safe because the CPU has `cpuid` support. - let vendor_id = unsafe { _vendor_id() }; + // SAFETY: Calling `_vendor_id`` is safe because the CPU has `cpuid` support. + let vendor_id = unsafe { _vendor_id() }; - // SAFETY: Calling `__cpuid`` is safe because the CPU has `cpuid` support. - let proc_info_ecx = unsafe { __cpuid(0x0000_0001_u32).ecx }; + // SAFETY: Calling `__cpuid`` is safe because the CPU has `cpuid` support. + let proc_info_ecx = unsafe { __cpuid(0x0000_0001_u32).ecx }; - // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L111 - if test(proc_info_ecx, 13) { - info.set(CpuInfo::HAS_CMPXCHG16B); - } + // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L111 + if test(proc_info_ecx, 13) { + info.set(CpuInfo::HAS_CMPXCHG16B); + } - // VMOVDQA is atomic on Intel and AMD CPUs with AVX. - // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details. - if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD { - // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L131-L224 - let cpu_xsave = test(proc_info_ecx, 26); - if cpu_xsave { - let cpu_osxsave = test(proc_info_ecx, 27); - if cpu_osxsave { - // SAFETY: Calling `_xgetbv`` is safe because the CPU has `xsave` support - // and OS has set `osxsave`. - let xcr0 = unsafe { _xgetbv(0) }; - let os_avx_support = xcr0 & 6 == 6; - if os_avx_support && test(proc_info_ecx, 28) { - info.set(CpuInfo::HAS_VMOVDQA_ATOMIC); - } + // VMOVDQA is atomic on Intel and AMD CPUs with AVX. + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688 for details. + if vendor_id == VENDOR_ID_INTEL || vendor_id == VENDOR_ID_AMD { + // https://github.com/rust-lang/stdarch/blob/a0c30f3e3c75adcd6ee7efc94014ebcead61c507/crates/std_detect/src/detect/os/x86.rs#L131-L224 + let cpu_xsave = test(proc_info_ecx, 26); + if cpu_xsave { + let cpu_osxsave = test(proc_info_ecx, 27); + if cpu_osxsave { + // SAFETY: Calling `_xgetbv`` is safe because the CPU has `xsave` support + // and OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + let os_avx_support = xcr0 & 6 == 6; + if os_avx_support && test(proc_info_ecx, 28) { + info.set(CpuInfo::HAS_VMOVDQA_ATOMIC); } } } @@ -109,9 +104,6 @@ mod tests { #[cfg(not(portable_atomic_test_outline_atomics_detect_false))] #[test] - // SGX doesn't support CPUID. - // Miri doesn't support inline assembly used in __cpuid. - #[cfg_attr(any(target_env = "sgx", miri), ignore)] fn test_cpuid() { assert_eq!(std::is_x86_feature_detected!("cmpxchg16b"), detect().has_cmpxchg16b()); let vendor_id = unsafe { _vendor_id() }; diff --git a/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs b/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs index 0365da555..6982bd1fd 100644 --- a/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs +++ b/vendor/portable-atomic/src/imp/atomic128/intrinsics.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Atomic{I,U}128 implementation without inline assembly. // // Note: This module is currently only enabled on Miri and ThreadSanitizer which @@ -10,19 +12,18 @@ // implementation with inline assembly. // // Note: -// - This currently always needs nightly compilers. On x86_64, the stabilization -// of `core::arch::x86_64::cmpxchg16b` has been recently merged to stdarch: -// https://github.com/rust-lang/stdarch/pull/1358 +// - This currently needs Rust 1.70 on x86_64, otherwise nightly compilers. // - On powerpc64, this requires LLVM 15+ and pwr8+ (quadword-atomics LLVM target feature): // https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 -// - On aarch64 big-endian, LLVM (as of 15) generates broken code. -// (on cfg(miri)/cfg(sanitize) it is fine though) -// - On s390x, LLVM (as of 16) generates libcalls for operations other than load/store/cmpxchg: +// - On aarch64 big-endian, LLVM (as of 17) generates broken code. (wrong result in stress test) +// (on cfg(miri)/cfg(sanitize) it may be fine though) +// - On s390x, LLVM (as of 17) generates libcalls for operations other than load/store/cmpxchg: // https://godbolt.org/z/5a5T4hxMh -// https://github.com/llvm/llvm-project/blob/2cc0c0de802178dc7e5408497e2ec53b6c9728fa/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll +// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll // https://reviews.llvm.org/D146425 -// - On powerpc64, LLVM (as of 16) doesn't support 128-bit atomic min/max: -// https://godbolt.org/z/3rebKcbdf +// - On powerpc64, LLVM (as of 17) doesn't support 128-bit atomic min/max: +// https://github.com/llvm/llvm-project/issues/68390 +// - On powerpc64le, LLVM (as of 17) generates broken code. (wrong result from fetch_add) // // Refs: https://github.com/rust-lang/rust/blob/1.70.0/library/core/src/sync/atomic.rs @@ -130,10 +131,12 @@ unsafe fn atomic_compare_exchange( // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned (required by CMPXCHG16B), that there are no // concurrent non-atomic operations, and that the CPU supports CMPXCHG16B. - let res = unsafe { core::arch::x86_64::cmpxchg16b(dst, old, new, success, failure) }; - (res, res == old) + let prev = unsafe { core::arch::x86_64::cmpxchg16b(dst, old, new, success, failure) }; + (prev, prev == old) } - #[cfg(portable_atomic_no_cmpxchg16b_intrinsic_stronger_failure_ordering)] + // The stronger failure ordering in cmpxchg16b_intrinsic is actually supported + // before stabilization, but we do not have a specific cfg for it. + #[cfg(portable_atomic_unstable_cmpxchg16b_intrinsic)] let success = crate::utils::upgrade_success_ordering(success, failure); #[cfg(target_feature = "cmpxchg16b")] // SAFETY: the caller must guarantee that `dst` is valid for both writes and @@ -236,22 +239,22 @@ where unsafe { // This is a private function and all instances of `f` only operate on the value // loaded, so there is no need to synchronize the first load/failed CAS. - let mut old = atomic_load(dst, Ordering::Relaxed); + let mut prev = atomic_load(dst, Ordering::Relaxed); loop { - let next = f(old); - match atomic_compare_exchange_weak(dst, old, next, order, Ordering::Relaxed) { + let next = f(prev); + match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) { Ok(x) => return x, - Err(x) => old = x, + Err(x) => prev = x, } } } } // On x86_64, we use core::arch::x86_64::cmpxchg16b instead of core::intrinsics. -// On s390x, LLVM (as of 16) generates libcalls for operations other than load/store/cmpxchg: https://godbolt.org/z/5a5T4hxMh +// On s390x, LLVM generates libcalls for operations other than load/store/cmpxchg (see also module-level comment). #[cfg(any(target_arch = "x86_64", target_arch = "s390x"))] atomic_rmw_by_atomic_update!(); -// On powerpc64, LLVM (as of 16) doesn't support 128-bit atomic min/max: https://godbolt.org/z/3rebKcbdf +// On powerpc64, LLVM doesn't support 128-bit atomic min/max (see also module-level comment). #[cfg(target_arch = "powerpc64")] atomic_rmw_by_atomic_update!(cmp); @@ -378,6 +381,7 @@ unsafe fn atomic_xor(dst: *mut u128, val: u128, order: Ordering) -> u128 { #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> i128 { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] // SAFETY: the caller must uphold the safety contract. unsafe { match order { @@ -395,6 +399,7 @@ unsafe fn atomic_max(dst: *mut u128, val: u128, order: Ordering) -> i128 { #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces unsafe fn atomic_min(dst: *mut u128, val: u128, order: Ordering) -> i128 { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] // SAFETY: the caller must uphold the safety contract. unsafe { match order { @@ -447,7 +452,7 @@ unsafe fn atomic_umin(dst: *mut u128, val: u128, order: Ordering) -> u128 { #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces unsafe fn atomic_not(dst: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. - unsafe { atomic_xor(dst, core::u128::MAX, order) } + unsafe { atomic_xor(dst, u128::MAX, order) } } #[cfg(not(any(target_arch = "x86_64", target_arch = "s390x")))] diff --git a/vendor/portable-atomic/src/imp/atomic128/macros.rs b/vendor/portable-atomic/src/imp/atomic128/macros.rs index fd71ef63d..d32217e8a 100644 --- a/vendor/portable-atomic/src/imp/atomic128/macros.rs +++ b/vendor/portable-atomic/src/imp/atomic128/macros.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + macro_rules! atomic128 { ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => { #[repr(C, align(16))] diff --git a/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs index 454f2097f..bb4164ecc 100644 --- a/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs +++ b/vendor/portable-atomic/src/imp/atomic128/powerpc64.rs @@ -1,11 +1,13 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Atomic{I,U}128 implementation on PowerPC64. // // powerpc64 on pwr8+ support 128-bit atomics: // https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445 -// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll -// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/test/CodeGen/PowerPC/atomics-i128.ll +// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll +// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/PowerPC/atomics-i128.ll // -// powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/PowerPC/PPC.td#L663 +// powerpc64le is pwr8+ by default https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/PowerPC/PPC.td#L663 // See also https://github.com/rust-lang/rust/issues/59932 // // Note that we do not separate LL and SC into separate functions, but handle @@ -22,8 +24,8 @@ // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - powerpc64 (pwr8) https://godbolt.org/z/sj9ao7qKd -// - powerpc64le https://godbolt.org/z/hY7Wdf6aT +// - powerpc64 (pwr8) https://godbolt.org/z/nG5dGa38a +// - powerpc64le https://godbolt.org/z/6c99s75e4 include!("macros.rs"); @@ -50,10 +52,11 @@ mod fallback; target_os = "linux", any( target_env = "gnu", - all(target_env = "musl", not(target_feature = "crt-static")), + all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")), portable_atomic_outline_atomics, ), ), + target_os = "android", target_os = "freebsd", ))] #[path = "detect/auxv.rs"] @@ -61,6 +64,8 @@ mod detect; use core::{arch::asm, sync::atomic::Ordering}; +use crate::utils::{Pair, U128}; + macro_rules! debug_assert_pwr8 { () => { #[cfg(not(any( @@ -89,64 +94,17 @@ macro_rules! debug_assert_pwr8 { // instructions across the if condition might introduce undefined behavior. // (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts) // However, our code uses the ifunc helper macro that works with function pointers, -// so we usually don't have to worry about this. -#[cfg(not(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", -)))] +// so we don't have to worry about this unless calling without helper macro. macro_rules! start_pwr8 { () => { ".machine push\n.machine power8" }; } -#[cfg(not(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", -)))] macro_rules! end_pwr8 { () => { ".machine pop" }; } -#[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", -))] -macro_rules! start_pwr8 { - () => { - "" - }; -} -#[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", -))] -macro_rules! end_pwr8 { - () => { - "" - }; -} - -/// A 128-bit value represented as a pair of 64-bit values. -/// -/// This type is `#[repr(C)]`, both fields have the same in-memory representation -/// and are plain old datatypes, so access to the fields is always safe. -#[derive(Clone, Copy)] -#[repr(C)] -union U128 { - whole: u128, - pair: Pair, -} -// A pair of 64-bit values in native-endian order. -#[derive(Clone, Copy)] -#[repr(C)] -struct Pair { - #[cfg(target_endian = "big")] - hi: u64, - lo: u64, - #[cfg(target_endian = "little")] - hi: u64, -} macro_rules! atomic_rmw { ($op:ident, $order:ident) => { @@ -161,31 +119,35 @@ macro_rules! atomic_rmw { }; } +// Extracts and checks the EQ bit of cr0. +#[inline] +fn extract_cr0(r: u64) -> bool { + r & 0x20000000 != 0 +} + +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +))] +use atomic_load_pwr8 as atomic_load; +#[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +)))] #[inline] unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 { - #[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", - ))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - atomic_load_pwr8(src, order) + fn_alias! { + // inline(never) is just a hint and also not strictly necessary + // because we use ifunc helper macro, but used for clarity. + #[inline(never)] + unsafe fn(src: *mut u128) -> u128; + atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed); + atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire); + atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst); } - #[cfg(not(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", - )))] // SAFETY: the caller must uphold the safety contract. + // we only calls atomic_load_pwr8 if quadword-atomics is available. unsafe { - fn_alias! { - // inline(never) is just a hint and also not strictly necessary - // because we use ifunc helper macro, but used for clarity. - #[inline(never)] - unsafe fn(src: *mut u128) -> u128; - atomic_load_pwr8_relaxed = atomic_load_pwr8(Ordering::Relaxed); - atomic_load_pwr8_acquire = atomic_load_pwr8(Ordering::Acquire); - atomic_load_pwr8_seqcst = atomic_load_pwr8(Ordering::SeqCst); - } match order { Ordering::Relaxed => { ifunc!(unsafe fn(src: *mut u128) -> u128 { @@ -273,31 +235,29 @@ unsafe fn atomic_load_pwr8(src: *mut u128, order: Ordering) -> u128 { } } +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +))] +use atomic_store_pwr8 as atomic_store; +#[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +)))] #[inline] unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) { - #[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", - ))] - // SAFETY: the caller must uphold the safety contract. - unsafe { - atomic_store_pwr8(dst, val, order); + fn_alias! { + // inline(never) is just a hint and also not strictly necessary + // because we use ifunc helper macro, but used for clarity. + #[inline(never)] + unsafe fn(dst: *mut u128, val: u128); + atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed); + atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release); + atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst); } - #[cfg(not(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", - )))] // SAFETY: the caller must uphold the safety contract. + // we only calls atomic_store_pwr8 if quadword-atomics is available. unsafe { - fn_alias! { - // inline(never) is just a hint and also not strictly necessary - // because we use ifunc helper macro, but used for clarity. - #[inline(never)] - unsafe fn(dst: *mut u128, val: u128); - atomic_store_pwr8_relaxed = atomic_store_pwr8(Ordering::Relaxed); - atomic_store_pwr8_release = atomic_store_pwr8(Ordering::Release); - atomic_store_pwr8_seqcst = atomic_store_pwr8(Ordering::SeqCst); - } match order { Ordering::Relaxed => { ifunc!(unsafe fn(dst: *mut u128, val: u128) { @@ -380,17 +340,18 @@ unsafe fn atomic_compare_exchange( portable_atomic_target_feature = "quadword-atomics", ))] // SAFETY: the caller must uphold the safety contract. - let (res, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) }; + // cfg guarantees that quadword atomics instructions are available at compile-time. + let (prev, ok) = unsafe { atomic_compare_exchange_pwr8(dst, old, new, success) }; #[cfg(not(any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", )))] // SAFETY: the caller must uphold the safety contract. - let (res, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) }; + let (prev, ok) = unsafe { atomic_compare_exchange_ifunc(dst, old, new, success) }; if ok { - Ok(res) + Ok(prev) } else { - Err(res) + Err(prev) } } #[inline] @@ -406,10 +367,11 @@ unsafe fn atomic_compare_exchange_pwr8( // SAFETY: the caller must uphold the safety contract. // // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA - let res = unsafe { + unsafe { let old = U128 { whole: old }; let new = U128 { whole: new }; let (mut prev_hi, mut prev_lo); + let mut r; macro_rules! cmpxchg { ($acquire:tt, $release:tt) => { asm!( @@ -420,17 +382,19 @@ unsafe fn atomic_compare_exchange_pwr8( "xor {tmp_lo}, %r9, {old_lo}", "xor {tmp_hi}, %r8, {old_hi}", "or. {tmp_lo}, {tmp_lo}, {tmp_hi}", - "bne %cr0, 3f", + "bne %cr0, 3f", // jump if compare failed "stqcx. %r6, 0, {dst}", - "bne %cr0, 2b", + "bne %cr0, 2b", // continue loop if store failed "3:", + // if compare failed EQ bit is cleared, if stqcx succeeds EQ bit is set. + "mfcr {tmp_lo}", $acquire, end_pwr8!(), dst = in(reg_nonzero) ptr_reg!(dst), - old_hi = in(reg_nonzero) old.pair.hi, - old_lo = in(reg_nonzero) old.pair.lo, - tmp_hi = out(reg_nonzero) _, - tmp_lo = out(reg_nonzero) _, + old_hi = in(reg) old.pair.hi, + old_lo = in(reg) old.pair.lo, + tmp_hi = out(reg) _, + tmp_lo = out(reg) r, // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. in("r6") new.pair.hi, @@ -443,14 +407,97 @@ unsafe fn atomic_compare_exchange_pwr8( }; } atomic_rmw!(cmpxchg, order); - U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole - }; - (res, res == old) + (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r)) + } } -// TODO: LLVM appears to generate strong CAS for powerpc64 128-bit weak CAS, -// so we always use strong CAS for now. +// Always use strong CAS for outline-atomics. +#[cfg(not(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +)))] use atomic_compare_exchange as atomic_compare_exchange_weak; +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +))] +#[inline] +unsafe fn atomic_compare_exchange_weak( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> Result<u128, u128> { + let success = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract. + // cfg guarantees that quadword atomics instructions are available at compile-time. + let (prev, ok) = unsafe { atomic_compare_exchange_weak_pwr8(dst, old, new, success) }; + if ok { + Ok(prev) + } else { + Err(prev) + } +} +#[cfg(any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", +))] +#[inline] +unsafe fn atomic_compare_exchange_weak_pwr8( + dst: *mut u128, + old: u128, + new: u128, + order: Ordering, +) -> (u128, bool) { + debug_assert!(dst as usize % 16 == 0); + debug_assert_pwr8!(); + + // SAFETY: the caller must uphold the safety contract. + // + // Refs: "4.6.2.2 128-bit Load And Reserve and Store Conditional Instructions" of Power ISA + unsafe { + let old = U128 { whole: old }; + let new = U128 { whole: new }; + let (mut prev_hi, mut prev_lo); + let mut r; + macro_rules! cmpxchg_weak { + ($acquire:tt, $release:tt) => { + asm!( + start_pwr8!(), + $release, + "lqarx %r8, 0, {dst}", + "xor {tmp_lo}, %r9, {old_lo}", + "xor {tmp_hi}, %r8, {old_hi}", + "or. {tmp_lo}, {tmp_lo}, {tmp_hi}", + "bne %cr0, 3f", // jump if compare failed + "stqcx. %r6, 0, {dst}", + "3:", + // if compare or stqcx failed EQ bit is cleared, if stqcx succeeds EQ bit is set. + "mfcr {tmp_lo}", + $acquire, + end_pwr8!(), + dst = in(reg_nonzero) ptr_reg!(dst), + old_hi = in(reg) old.pair.hi, + old_lo = in(reg) old.pair.lo, + tmp_hi = out(reg) _, + tmp_lo = out(reg) r, + // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. + // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. + in("r6") new.pair.hi, + in("r7") new.pair.lo, + out("r8") prev_hi, + out("r9") prev_lo, + out("cr0") _, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw!(cmpxchg_weak, order); + (U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole, extract_cr0(r)) + } +} #[cfg(any( target_feature = "quadword-atomics", @@ -501,7 +548,7 @@ unsafe fn atomic_swap_pwr8(dst: *mut u128, val: u128, order: Ordering) -> u128 { /// $op can use the following registers: /// - val_hi/val_lo pair: val argument (read-only for `$op`) /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) -/// - r8/r9 pair: new value that will to stored by sc +/// - r8/r9 pair: new value that will be stored by sc macro_rules! atomic_rmw_ll_sc_3 { ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { #[cfg(any( @@ -530,8 +577,8 @@ macro_rules! atomic_rmw_ll_sc_3 { $acquire, end_pwr8!(), dst = in(reg_nonzero) ptr_reg!(dst), - val_hi = in(reg_nonzero) val.pair.hi, - val_lo = in(reg_nonzero) val.pair.lo, + val_hi = in(reg) val.pair.hi, + val_lo = in(reg) val.pair.lo, $($reg)* // Quadword atomic instructions work with even/odd pair of specified register and subsequent register. // We cannot use r1 (sp) and r2 (system reserved), so start with r4 or grater. @@ -555,7 +602,7 @@ macro_rules! atomic_rmw_ll_sc_3 { /// /// $op can use the following registers: /// - r6/r7 pair: previous value loaded by ll (read-only for `$op`) -/// - r8/r9 pair: new value that will to stored by sc +/// - r8/r9 pair: new value that will be stored by sc macro_rules! atomic_rmw_ll_sc_2 { ($name:ident as $reexport_name:ident, [$($reg:tt)*], $($op:tt)*) => { #[cfg(any( @@ -679,7 +726,7 @@ use atomic_not_pwr8 as atomic_not; #[inline] unsafe fn atomic_not_pwr8(dst: *mut u128, order: Ordering) -> u128 { // SAFETY: the caller must uphold the safety contract. - unsafe { atomic_xor_pwr8(dst, core::u128::MAX, order) } + unsafe { atomic_xor_pwr8(dst, u128::MAX, order) } } #[cfg(portable_atomic_llvm_16)] @@ -691,7 +738,7 @@ atomic_rmw_ll_sc_2! { // LLVM 15 miscompiles subfic. #[cfg(not(portable_atomic_llvm_16))] atomic_rmw_ll_sc_2! { - atomic_neg_pwr8 as atomic_neg, [zero = in(reg_nonzero) 0_u64, out("xer") _,], + atomic_neg_pwr8 as atomic_neg, [zero = in(reg) 0_u64, out("xer") _,], "subc %r9, {zero}, %r7", "subfze %r8, %r6", } diff --git a/vendor/portable-atomic/src/imp/atomic128/s390x.rs b/vendor/portable-atomic/src/imp/atomic128/s390x.rs index b6789aea5..37c2063aa 100644 --- a/vendor/portable-atomic/src/imp/atomic128/s390x.rs +++ b/vendor/portable-atomic/src/imp/atomic128/s390x.rs @@ -1,10 +1,12 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Atomic{I,U}128 implementation on s390x. // // s390x supports 128-bit atomic load/store/cmpxchg: // https://github.com/llvm/llvm-project/commit/a11f63a952664f700f076fd754476a2b9eb158cc // -// As of LLVM 16, LLVM's minimal supported architecture level is z10: -// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/SystemZ/SystemZProcessors.td) +// LLVM's minimal supported architecture level is z10: +// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/SystemZ/SystemZProcessors.td) // This does not appear to have changed since the current s390x backend was added in LLVM 3.3: // https://github.com/llvm/llvm-project/commit/5f613dfd1f7edb0ae95d521b7107b582d9df5103#diff-cbaef692b3958312e80fd5507a7e2aff071f1acb086f10e8a96bc06a7bb289db // @@ -17,31 +19,15 @@ // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - s390x https://godbolt.org/z/q4cvbaEYh -// - s390x (z196) https://godbolt.org/z/Tj3vonsoW -// - s390x (z15) https://godbolt.org/z/Pz5sq8fTz +// - s390x https://godbolt.org/z/b11znnEh4 +// - s390x (z196) https://godbolt.org/z/s5n9PGcv6 +// - s390x (z15) https://godbolt.org/z/Wf49h7bPf include!("macros.rs"); use core::{arch::asm, sync::atomic::Ordering}; -/// A 128-bit value represented as a pair of 64-bit values. -/// -/// This type is `#[repr(C)]`, both fields have the same in-memory representation -/// and are plain old datatypes, so access to the fields is always safe. -#[derive(Clone, Copy)] -#[repr(C)] -union U128 { - whole: u128, - pair: Pair, -} -// A pair of 64-bit values in native-endian (big-endian) order. -#[derive(Clone, Copy)] -#[repr(C)] -struct Pair { - hi: u64, - lo: u64, -} +use crate::utils::{Pair, U128}; // Use distinct operands on z196 or later, otherwise split to lgr and $op. #[cfg(any(target_feature = "distinct-ops", portable_atomic_target_feature = "distinct-ops"))] @@ -155,7 +141,7 @@ unsafe fn atomic_compare_exchange( debug_assert!(dst as usize % 16 == 0); // SAFETY: the caller must uphold the safety contract. - let res = unsafe { + let prev = unsafe { // atomic CAS is always SeqCst. let old = U128 { whole: old }; let new = U128 { whole: new }; @@ -168,14 +154,15 @@ unsafe fn atomic_compare_exchange( inout("r1") old.pair.lo => prev_lo, in("r12") new.pair.hi, in("r13") new.pair.lo, + // Do not use `preserves_flags` because CDSG modifies the condition code. options(nostack), ); U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole }; - if res == old { - Ok(res) + if prev == old { + Ok(prev) } else { - Err(res) + Err(prev) } } @@ -195,12 +182,12 @@ where unsafe { // This is a private function and all instances of `f` only operate on the value // loaded, so there is no need to synchronize the first load/failed CAS. - let mut old = atomic_load(dst, Ordering::Relaxed); + let mut prev = atomic_load(dst, Ordering::Relaxed); loop { - let next = f(old); - match atomic_compare_exchange_weak(dst, old, next, order, Ordering::Relaxed) { + let next = f(prev); + match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) { Ok(x) => return x, - Err(x) => old = x, + Err(x) => prev = x, } } } @@ -231,6 +218,7 @@ unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 { out("r1") prev_lo, in("r12") val.pair.hi, in("r13") val.pair.lo, + // Do not use `preserves_flags` because CDSG modifies the condition code. options(nostack), ); U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole @@ -243,7 +231,7 @@ unsafe fn atomic_swap(dst: *mut u128, val: u128, _order: Ordering) -> u128 { /// `$op` can use the following registers: /// - val_hi/val_lo pair: val argument (read-only for `$op`) /// - r0/r1 pair: previous value loaded (read-only for `$op`) -/// - r12/r13 pair: new value that will to stored +/// - r12/r13 pair: new value that will be stored // We could use atomic_update here, but using an inline assembly allows omitting // the comparison of results and the storing/comparing of condition flags. macro_rules! atomic_rmw_cas_3 { @@ -271,6 +259,7 @@ macro_rules! atomic_rmw_cas_3 { out("r1") prev_lo, out("r12") _, out("r13") _, + // Do not use `preserves_flags` because CDSG modifies the condition code. options(nostack), ); U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole @@ -283,7 +272,7 @@ macro_rules! atomic_rmw_cas_3 { /// /// `$op` can use the following registers: /// - r0/r1 pair: previous value loaded (read-only for `$op`) -/// - r12/r13 pair: new value that will to stored +/// - r12/r13 pair: new value that will be stored // We could use atomic_update here, but using an inline assembly allows omitting // the comparison of results and the storing/comparing of condition flags. macro_rules! atomic_rmw_cas_2 { @@ -307,6 +296,7 @@ macro_rules! atomic_rmw_cas_2 { out("r1") prev_lo, out("r12") _, out("r13") _, + // Do not use `preserves_flags` because CDSG modifies the condition code. options(nostack), ); U128 { pair: Pair { hi: prev_hi, lo: prev_lo } }.whole @@ -425,7 +415,7 @@ atomic_rmw_cas_3! { // We use atomic_update for atomic min/max on pre-z196 because // z10 doesn't seem to have a good way to implement 128-bit min/max. // loc{,g}r requires z196 or later. -// https://godbolt.org/z/qodPK45qz +// https://godbolt.org/z/j8KG9q5oq #[cfg(not(any( target_feature = "load-store-on-cond", portable_atomic_target_feature = "load-store-on-cond", diff --git a/vendor/portable-atomic/src/imp/atomic128/x86_64.rs b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs index e75540cd1..3b9d141ab 100644 --- a/vendor/portable-atomic/src/imp/atomic128/x86_64.rs +++ b/vendor/portable-atomic/src/imp/atomic128/x86_64.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Atomic{I,U}128 implementation on x86_64 using CMPXCHG16B (DWCAS). // // Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use @@ -8,7 +10,7 @@ // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - x86_64 (+cmpxchg16b) https://godbolt.org/z/WPvfn16sY +// - x86_64 (+cmpxchg16b) https://godbolt.org/z/55n54WeKr include!("macros.rs"); @@ -25,6 +27,8 @@ mod detect; use core::arch::asm; use core::sync::atomic::Ordering; +use crate::utils::{Pair, U128}; + // Asserts that the function is called in the correct context. macro_rules! debug_assert_cmpxchg16b { () => { @@ -61,24 +65,6 @@ macro_rules! ptr_modifier { }; } -/// A 128-bit value represented as a pair of 64-bit values. -/// -/// This type is `#[repr(C)]`, both fields have the same in-memory representation -/// and are plain old datatypes, so access to the fields is always safe. -#[derive(Clone, Copy)] -#[repr(C)] -union U128 { - whole: u128, - pair: Pair, -} -// A pair of 64-bit values in native-endian (little-endian) order. -#[derive(Clone, Copy)] -#[repr(C)] -struct Pair { - lo: u64, - hi: u64, -} - #[cfg_attr( not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")), target_feature(enable = "cmpxchg16b") @@ -293,7 +279,7 @@ unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 { // omitting the storing of condition flags and avoid use of xchg to handle rbx. unsafe { // cmpxchg16b is always SeqCst. - let (prev_lo, prev_hi); + let (out_lo, out_hi); macro_rules! cmpxchg16b { ($rdi:tt) => { asm!( @@ -305,8 +291,8 @@ unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 { // set old/new args of cmpxchg16b to 0 (rbx is zeroed after saved to rbx_tmp, to avoid xchg) rbx_tmp = out(reg) _, in("rcx") 0_u64, - inout("rax") 0_u64 => prev_lo, - inout("rdx") 0_u64 => prev_hi, + inout("rax") 0_u64 => out_lo, + inout("rdx") 0_u64 => out_hi, in($rdi) src, // Do not use `preserves_flags` because CMPXCHG16B modifies the ZF flag. options(nostack), @@ -317,7 +303,7 @@ unsafe fn atomic_load_cmpxchg16b(src: *mut u128) -> u128 { cmpxchg16b!("edi"); #[cfg(target_pointer_width = "64")] cmpxchg16b!("rdi"); - U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole } } @@ -401,11 +387,11 @@ unsafe fn atomic_compare_exchange( // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, that there are no concurrent non-atomic operations, // and cfg guarantees that CMPXCHG16B is available at compile-time. - let (res, ok) = unsafe { cmpxchg16b(dst, old, new) }; + let (prev, ok) = unsafe { cmpxchg16b(dst, old, new) }; #[cfg(not(any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b")))] // SAFETY: the caller must guarantee that `dst` is valid for both writes and // reads, 16-byte aligned, and that there are no different kinds of concurrent accesses. - let (res, ok) = unsafe { + let (prev, ok) = unsafe { ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> (u128, bool) { if detect::detect().has_cmpxchg16b() { cmpxchg16b @@ -416,9 +402,9 @@ unsafe fn atomic_compare_exchange( }) }; if ok { - Ok(res) + Ok(prev) } else { - Err(res) + Err(prev) } } @@ -494,7 +480,7 @@ unsafe fn atomic_swap_cmpxchg16b(dst: *mut u128, val: u128, _order: Ordering) -> /// `$op` can use the following registers: /// - rsi/r8 pair: val argument (read-only for `$op`) /// - rax/rdx pair: previous value loaded (read-only for `$op`) -/// - rbx/rcx pair: new value that will to stored +/// - rbx/rcx pair: new value that will be stored // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows // omitting the storing/comparing of condition flags and reducing uses of xchg/mov to handle rbx. macro_rules! atomic_rmw_cas_3 { @@ -565,7 +551,7 @@ macro_rules! atomic_rmw_cas_3 { /// /// `$op` can use the following registers: /// - rax/rdx pair: previous value loaded (read-only for `$op`) -/// - rbx/rcx pair: new value that will to stored +/// - rbx/rcx pair: new value that will be stored // We could use CAS loop by atomic_compare_exchange here, but using an inline assembly allows // omitting the storing of condition flags and avoid use of xchg to handle rbx. macro_rules! atomic_rmw_cas_2 { diff --git a/vendor/portable-atomic/src/imp/core_atomic.rs b/vendor/portable-atomic/src/imp/core_atomic.rs index 72da6fa2a..c7f713b53 100644 --- a/vendor/portable-atomic/src/imp/core_atomic.rs +++ b/vendor/portable-atomic/src/imp/core_atomic.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Wrap the standard library's atomic types in newtype. // // This is not a reexport, because we want to backport changes like @@ -136,9 +138,9 @@ macro_rules! atomic_int { #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] impl_default_no_fetch_ops!($atomic_type, $int_type); #[cfg(not(all( - not(any(miri, portable_atomic_sanitize_thread)), - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), any(target_arch = "x86", target_arch = "x86_64"), + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), )))] #[cfg_attr( portable_atomic_no_cfg_target_has_atomic, @@ -157,7 +159,12 @@ macro_rules! atomic_int { } #[inline] pub(crate) const fn is_always_lock_free() -> bool { - true + // ESP-IDF targets' 64-bit atomics are not lock-free. + // https://github.com/rust-lang/rust/pull/115577#issuecomment-1732259297 + cfg!(not(all( + any(target_arch = "riscv32", target_arch = "xtensa"), + target_os = "espidf", + ))) | (core::mem::size_of::<$int_type>() < 8) } #[inline] pub(crate) fn get_mut(&mut self) -> &mut $int_type { @@ -273,9 +280,10 @@ macro_rules! atomic_int { portable_atomic_target_feature = "v6", )), ), - // TODO: mips32r6, mips64r6? target_arch = "mips", + target_arch = "mips32r6", target_arch = "mips64", + target_arch = "mips64r6", target_arch = "powerpc", target_arch = "powerpc64", ))] @@ -324,9 +332,10 @@ macro_rules! atomic_int { portable_atomic_target_feature = "v6", )), ), - // TODO: mips32r6, mips64r6? target_arch = "mips", + target_arch = "mips32r6", target_arch = "mips64", + target_arch = "mips64r6", target_arch = "powerpc", target_arch = "powerpc64", ))] @@ -365,9 +374,9 @@ macro_rules! atomic_int { self.fetch_xor(NOT_MASK, order) } #[cfg(not(all( - not(any(miri, portable_atomic_sanitize_thread)), - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), any(target_arch = "x86", target_arch = "x86_64"), + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), )))] #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces @@ -380,9 +389,9 @@ macro_rules! atomic_int { self.fetch_update_(order, $int_type::wrapping_neg) } #[cfg(not(all( - not(any(miri, portable_atomic_sanitize_thread)), - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), any(target_arch = "x86", target_arch = "x86_64"), + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), )))] #[inline] #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces diff --git a/vendor/portable-atomic/src/imp/fallback/mod.rs b/vendor/portable-atomic/src/imp/fallback/mod.rs index e4875deac..283c98c01 100644 --- a/vendor/portable-atomic/src/imp/fallback/mod.rs +++ b/vendor/portable-atomic/src/imp/fallback/mod.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Fallback implementation using global locks. // // This implementation uses seqlock for global locks. @@ -27,17 +29,21 @@ target_os = "linux", any( target_env = "gnu", - all(target_env = "musl", not(target_feature = "crt-static")), + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), portable_atomic_outline_atomics, ), ), + target_os = "android", target_os = "freebsd", ), not(any(miri, portable_atomic_sanitize_thread)), ), all( - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), target_arch = "arm", + not(portable_atomic_no_asm), any(target_os = "linux", target_os = "android"), not(portable_atomic_no_outline_atomics), ), @@ -144,40 +150,20 @@ macro_rules! atomic { for i in 0..Self::LEN { dst[i] = chunks[i].load(Ordering::Relaxed); } - // SAFETY: integers are plain old datatypes so we can always transmute to them. + // SAFETY: integers are plain old data types so we can always transmute to them. unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) } } #[inline] fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type { - // SAFETY: - // - The guard guarantees that we hold the lock to write. - // - The raw pointer is valid because we got it from a reference. - // - // Unlike optimistic_read/write, the atomic operation is not required, - // because we hold the lock to write so that other threads cannot - // perform concurrent write operations. - // - // At the hardware level, core::sync::atomic::Atomic*::load used in optimistic_read - // may be lowered to atomic write operations by LLVM, but it is still considered a - // read operation from the view of the (software) memory model, except that it is - // not allowed in read-only memory (due to UnsafeCell, self.v is not read-only memory). - // See also https://github.com/rust-lang/miri/issues/2463. - // (Note that the above property is about the assembly generated by inline assembly - // or LLVM's backend. Doing it using write operations written in normal Rust code - // or LLVM IR is considered UB, even if it never mutates the value. See also the - // above Miri issue and https://github.com/rust-lang/rust/issues/32976#issuecomment-446775360) - // - // Also, according to atomic-memcpy's asm test, there seems - // to be no tier 1 or tier 2 platform that generates such code - // for a pointer-width relaxed load + acquire fence: - // https://github.com/taiki-e/atomic-memcpy/tree/v0.1.3/tests/asm-test/asm - unsafe { self.v.get().read() } + // This calls optimistic_read that can return teared value, but the resulting value + // is guaranteed not to be teared because we hold the lock to write. + self.optimistic_read() } #[inline] fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) { - // SAFETY: integers are plain old datatypes so we can always transmute them to arrays of integers. + // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers. let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) }; // SAFETY: // - The guard guarantees that we hold the lock to write. @@ -258,9 +244,9 @@ macro_rules! atomic { #[inline] pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); + let prev = self.read(&guard); self.write(val, &guard); - result + prev } #[inline] @@ -274,14 +260,14 @@ macro_rules! atomic { ) -> Result<$int_type, $int_type> { crate::utils::assert_compare_exchange_ordering(success, failure); let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - if result == current { + let prev = self.read(&guard); + if prev == current { self.write(new, &guard); - Ok(result) + Ok(prev) } else { // The value hasn't been changed. Drop the guard without incrementing the stamp. guard.abort(); - Err(result) + Err(prev) } } @@ -300,73 +286,73 @@ macro_rules! atomic { #[inline] pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(result.wrapping_add(val), &guard); - result + let prev = self.read(&guard); + self.write(prev.wrapping_add(val), &guard); + prev } #[inline] pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(result.wrapping_sub(val), &guard); - result + let prev = self.read(&guard); + self.write(prev.wrapping_sub(val), &guard); + prev } #[inline] pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(result & val, &guard); - result + let prev = self.read(&guard); + self.write(prev & val, &guard); + prev } #[inline] pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(!(result & val), &guard); - result + let prev = self.read(&guard); + self.write(!(prev & val), &guard); + prev } #[inline] pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(result | val, &guard); - result + let prev = self.read(&guard); + self.write(prev | val, &guard); + prev } #[inline] pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(result ^ val, &guard); - result + let prev = self.read(&guard); + self.write(prev ^ val, &guard); + prev } #[inline] pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(core::cmp::max(result, val), &guard); - result + let prev = self.read(&guard); + self.write(core::cmp::max(prev, val), &guard); + prev } #[inline] pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(core::cmp::min(result, val), &guard); - result + let prev = self.read(&guard); + self.write(core::cmp::min(prev, val), &guard); + prev } #[inline] pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(!result, &guard); - result + let prev = self.read(&guard); + self.write(!prev, &guard); + prev } #[inline] pub(crate) fn not(&self, order: Ordering) { @@ -376,9 +362,9 @@ macro_rules! atomic { #[inline] pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { let guard = lock(self.v.get() as usize).write(); - let result = self.read(&guard); - self.write(result.wrapping_neg(), &guard); - result + let prev = self.read(&guard); + self.write(prev.wrapping_neg(), &guard); + prev } #[inline] pub(crate) fn neg(&self, order: Ordering) { diff --git a/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs index 985d9ce83..895b60c85 100644 --- a/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs +++ b/vendor/portable-atomic/src/imp/fallback/outline_atomics.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Helper for outline-atomics. // // On architectures where DW atomics are not supported on older CPUs, we use diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs index d86b02e10..fb6803f0b 100644 --- a/vendor/portable-atomic/src/imp/fallback/seq_lock.rs +++ b/vendor/portable-atomic/src/imp/fallback/seq_lock.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock.rs. use core::{ diff --git a/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs index 74b08d24f..e12996f56 100644 --- a/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs +++ b/vendor/portable-atomic/src/imp/fallback/seq_lock_wide.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/seq_lock_wide.rs. use core::{ diff --git a/vendor/portable-atomic/src/imp/fallback/utils.rs b/vendor/portable-atomic/src/imp/fallback/utils.rs index c78c625b0..760834058 100644 --- a/vendor/portable-atomic/src/imp/fallback/utils.rs +++ b/vendor/portable-atomic/src/imp/fallback/utils.rs @@ -1,7 +1,8 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + use core::ops; -// TODO: mips32r6, mips64r6 -// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/d49a0f8454499ced8af0b61aeb661379c4eb0588/crossbeam-utils/src/cache_padded.rs. +// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/9384f1eb2b356364e201ad38545e03c837d55f3a/crossbeam-utils/src/cache_padded.rs. /// Pads and aligns a value to the length of a cache line. // Starting from Intel's Sandy Bridge, spatial prefetcher is now pulling pairs of 64-byte cache // lines at a time, so we have to align to 128 bytes rather than 64. @@ -19,29 +20,27 @@ use core::ops; // // Sources: // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/powerpc/include/asm/cache.h#L26 #[cfg_attr( any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "powerpc64"), repr(align(128)) )] -// arm, mips, mips64, riscv64, sparc, and hexagon have 32-byte cache line size. +// arm, mips, mips64, sparc, and hexagon have 32-byte cache line size. // // Sources: // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9 -// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_riscv64.go#L7 // - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17 // - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12 -// -// riscv32 is assumed not to exceed the cache line size of riscv64. #[cfg_attr( any( target_arch = "arm", target_arch = "mips", + target_arch = "mips32r6", target_arch = "mips64", - target_arch = "riscv32", - target_arch = "riscv64", + target_arch = "mips64r6", target_arch = "sparc", target_arch = "hexagon", ), @@ -58,11 +57,12 @@ use core::ops; // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7 // - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13 #[cfg_attr(target_arch = "s390x", repr(align(256)))] -// x86, wasm, and sparc64 have 64-byte cache line size. +// x86, wasm, riscv, and sparc64 have 64-byte cache line size. // // Sources: // - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9 // - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7 +// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/riscv/include/asm/cache.h#L10 // - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19 // // All others are assumed to have 64-byte cache line size. @@ -73,9 +73,9 @@ use core::ops; target_arch = "powerpc64", target_arch = "arm", target_arch = "mips", + target_arch = "mips32r6", target_arch = "mips64", - target_arch = "riscv32", - target_arch = "riscv64", + target_arch = "mips64r6", target_arch = "sparc", target_arch = "hexagon", target_arch = "m68k", diff --git a/vendor/portable-atomic/src/imp/float.rs b/vendor/portable-atomic/src/imp/float.rs index 6d6ac4b07..965f98387 100644 --- a/vendor/portable-atomic/src/imp/float.rs +++ b/vendor/portable-atomic/src/imp/float.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // AtomicF{32,64} implementation based on AtomicU{32,64}. // // This module provides atomic float implementations using atomic integer. @@ -18,10 +20,7 @@ use core::{cell::UnsafeCell, sync::atomic::Ordering}; macro_rules! atomic_float { ( - $atomic_type:ident, - $float_type:ident, - $atomic_int_type:ident, - $int_type:ident, + $atomic_type:ident, $float_type:ident, $atomic_int_type:ident, $int_type:ident, $align:literal ) => { #[repr(C, align($align))] diff --git a/vendor/portable-atomic/src/imp/interrupt/README.md b/vendor/portable-atomic/src/imp/interrupt/README.md index 32c202a8d..edc5fbf2e 100644 --- a/vendor/portable-atomic/src/imp/interrupt/README.md +++ b/vendor/portable-atomic/src/imp/interrupt/README.md @@ -17,10 +17,11 @@ For some targets, the implementation can be changed by explicitly enabling featu - On pre-v6 ARM with the `disable-fiq` feature, this disables interrupts by modifying the I (IRQ mask) bit and F (FIQ mask) bit of the CPSR. - On RISC-V (without A-extension), this disables interrupts by modifying the MIE (Machine Interrupt Enable) bit of the `mstatus` register. - On RISC-V (without A-extension) with the `s-mode` feature, this disables interrupts by modifying the SIE (Supervisor Interrupt Enable) bit of the `sstatus` register. +- On RISC-V (without A-extension) with the `force-amo` feature, this uses AMO instructions for RMWs that have corresponding AMO instructions even if A-extension is disabled. For other RMWs, this disables interrupts as usual. - On MSP430, this disables interrupts by modifying the GIE (Global Interrupt Enable) bit of the status register (SR). - On AVR, this disables interrupts by modifying the I (Global Interrupt Enable) bit of the status register (SREG). - On Xtensa, this disables interrupts by modifying the PS special register. -Some operations don't require disabling interrupts (loads and stores on targets except for AVR, but additionally on MSP430 `add`, `sub`, `and`, `or`, `xor`, `not`). However, when the `critical-section` feature is enabled, critical sections are taken for all atomic operations. +Some operations don't require disabling interrupts (loads and stores on targets except for AVR, but additionally on MSP430 {8,16}-bit `add,sub,and,or,xor,not`, on RISC-V with the `force-amo` feature 32-bit(RV32)/{32,64}-bit(RV64) `swap,fetch_{add,sub,and,or,xor,not,max,min},add,sub,and,or,xor,not` and {8,16}-bit `fetch_{and,or,xor,not},and,or,xor,not`). However, when the `critical-section` feature is enabled, critical sections are taken for all atomic operations. Feel free to submit an issue if your target is not supported yet. diff --git a/vendor/portable-atomic/src/imp/interrupt/armv4t.rs b/vendor/portable-atomic/src/imp/interrupt/armv4t.rs index 85c7ec1b5..20f7089ce 100644 --- a/vendor/portable-atomic/src/imp/interrupt/armv4t.rs +++ b/vendor/portable-atomic/src/imp/interrupt/armv4t.rs @@ -1,21 +1,26 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Refs: https://developer.arm.com/documentation/ddi0406/cb/System-Level-Architecture/The-System-Level-Programmers--Model/ARM-processor-modes-and-ARM-core-registers/Program-Status-Registers--PSRs-?lang=en // // Generated asm: -// - armv5te https://godbolt.org/z/5arYrfzYc +// - armv5te https://godbolt.org/z/Teh7WajMs #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; +// - 0x80 - I (IRQ mask) bit (1 << 7) +// - 0x40 - F (FIQ mask) bit (1 << 6) +// We disable only IRQs by default. See also https://github.com/taiki-e/portable-atomic/pull/28#issuecomment-1214146912. #[cfg(not(portable_atomic_disable_fiq))] -macro_rules! if_disable_fiq { - ($tt:tt) => { - "" +macro_rules! mask { + () => { + "0x80" }; } #[cfg(portable_atomic_disable_fiq)] -macro_rules! if_disable_fiq { - ($tt:tt) => { - $tt +macro_rules! mask { + () => { + "0xC0" // 0x80 | 0x40 }; } @@ -29,15 +34,13 @@ pub(super) fn disable() -> State { // SAFETY: reading CPSR and disabling interrupts are safe. // (see module-level comments of interrupt/mod.rs on the safety of using privileged instructions) unsafe { - // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled. asm!( "mrs {prev}, cpsr", - "orr {new}, {prev}, 0x80", // I (IRQ mask) bit (1 << 7) - // We disable only IRQs by default. See also https://github.com/taiki-e/portable-atomic/pull/28#issuecomment-1214146912. - if_disable_fiq!("orr {new}, {new}, 0x40"), // F (FIQ mask) bit (1 << 6) + concat!("orr {new}, {prev}, ", mask!()), "msr cpsr_c, {new}", prev = out(reg) cpsr, new = out(reg) _, + // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled. options(nostack, preserves_flags), ); } @@ -53,11 +56,14 @@ pub(super) fn disable() -> State { #[instruction_set(arm::a32)] pub(super) unsafe fn restore(cpsr: State) { // SAFETY: the caller must guarantee that the state was retrieved by the previous `disable`, + // + // This clobbers the control field mask byte of CPSR. See msp430.rs to safety on this. + // (preserves_flags is fine because we only clobber the I, F, T, and M bits of CPSR.) + // + // Refs: https://developer.arm.com/documentation/dui0473/m/arm-and-thumb-instructions/msr--general-purpose-register-to-psr- unsafe { - // This clobbers the entire CPSR. See msp430.rs to safety on this. - // // Do not use `nomem` and `readonly` because prevent preceding memory accesses from being reordered after interrupts are enabled. - asm!("msr cpsr_c, {0}", in(reg) cpsr, options(nostack)); + asm!("msr cpsr_c, {0}", in(reg) cpsr, options(nostack, preserves_flags)); } } @@ -66,7 +72,7 @@ pub(super) unsafe fn restore(cpsr: State) { // have Data Memory Barrier). // // Generated asm: -// - armv5te https://godbolt.org/z/a7zcs9hKa +// - armv5te https://godbolt.org/z/bMxK7M8Ta pub(crate) mod atomic { #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/vendor/portable-atomic/src/imp/interrupt/armv6m.rs b/vendor/portable-atomic/src/imp/interrupt/armv6m.rs index 00413128c..85037a3ea 100644 --- a/vendor/portable-atomic/src/imp/interrupt/armv6m.rs +++ b/vendor/portable-atomic/src/imp/interrupt/armv6m.rs @@ -1,7 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Adapted from https://github.com/rust-embedded/cortex-m. // // Generated asm: -// - armv6-m https://godbolt.org/z/sTezYnaj9 +// - armv6-m https://godbolt.org/z/YxME38xcM #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/vendor/portable-atomic/src/imp/interrupt/avr.rs b/vendor/portable-atomic/src/imp/interrupt/avr.rs index 7cc48c62e..76d99c142 100644 --- a/vendor/portable-atomic/src/imp/interrupt/avr.rs +++ b/vendor/portable-atomic/src/imp/interrupt/avr.rs @@ -1,4 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Adapted from https://github.com/Rahix/avr-device. +// +// Refs: +// - AVR Instruction Set Manual https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/vendor/portable-atomic/src/imp/interrupt/mod.rs b/vendor/portable-atomic/src/imp/interrupt/mod.rs index a0ead68a6..e0ed0f6e6 100644 --- a/vendor/portable-atomic/src/imp/interrupt/mod.rs +++ b/vendor/portable-atomic/src/imp/interrupt/mod.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Critical section based fallback implementations // // This module supports two different critical section implementations: @@ -26,8 +28,8 @@ // // See also README.md of this directory. // -// [^avr1]: https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#LL963 -// [^avr2]: https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/test/CodeGen/AVR/atomics/load16.ll#L5 +// [^avr1]: https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#L1074 +// [^avr2]: https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/test/CodeGen/AVR/atomics/load16.ll#L5 // On some platforms, atomic load/store can be implemented in a more efficient // way than disabling interrupts. On MSP430, some RMWs that do not return the @@ -173,11 +175,21 @@ impl<T> AtomicPtr<T> { } #[inline] - pub(crate) fn swap(&self, ptr: *mut T, _order: Ordering) -> *mut T { + pub(crate) fn swap(&self, ptr: *mut T, order: Ordering) -> *mut T { + let _ = order; + #[cfg(portable_atomic_force_amo)] + { + self.as_native().swap(ptr, order) + } + #[cfg(not(portable_atomic_force_amo))] // SAFETY: any data races are prevented by disabling interrupts (see // module-level comments) and the raw pointer is valid because we got it // from a reference. - with(|| unsafe { self.p.get().replace(ptr) }) + with(|| unsafe { + let prev = self.p.get().read(); + self.p.get().write(ptr); + prev + }) } #[inline] @@ -194,12 +206,12 @@ impl<T> AtomicPtr<T> { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.p.get().read(); - if result == current { + let prev = self.p.get().read(); + if prev == current { self.p.get().write(new); - Ok(result) + Ok(prev) } else { - Err(result) + Err(prev) } }) } @@ -275,9 +287,12 @@ macro_rules! atomic_int { } } }; - (load_store_atomic, $atomic_type:ident, $int_type:ident, $align:literal) => { + (load_store_atomic $([$kind:ident])?, $atomic_type:ident, $int_type:ident, $align:literal) => { atomic_int!(base, $atomic_type, $int_type, $align); - atomic_int!(cas, $atomic_type, $int_type); + #[cfg(not(portable_atomic_force_amo))] + atomic_int!(cas[emulate], $atomic_type, $int_type); + #[cfg(portable_atomic_force_amo)] + atomic_int!(cas $([$kind])?, $atomic_type, $int_type); impl $atomic_type { #[inline] #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] @@ -358,7 +373,7 @@ macro_rules! atomic_int { }; (load_store_critical_session, $atomic_type:ident, $int_type:ident, $align:literal) => { atomic_int!(base, $atomic_type, $int_type, $align); - atomic_int!(cas, $atomic_type, $int_type); + atomic_int!(cas[emulate], $atomic_type, $int_type); impl_default_no_fetch_ops!($atomic_type, $int_type); impl_default_bit_opts!($atomic_type, $int_type); impl $atomic_type { @@ -388,14 +403,18 @@ macro_rules! atomic_int { } } }; - (cas, $atomic_type:ident, $int_type:ident) => { + (cas[emulate], $atomic_type:ident, $int_type:ident) => { impl $atomic_type { #[inline] pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { // SAFETY: any data races are prevented by disabling interrupts (see // module-level comments) and the raw pointer is valid because we got it // from a reference. - with(|| unsafe { self.v.get().replace(val) }) + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(val); + prev + }) } #[inline] @@ -412,12 +431,12 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - if result == current { + let prev = self.v.get().read(); + if prev == current { self.v.get().write(new); - Ok(result) + Ok(prev) } else { - Err(result) + Err(prev) } }) } @@ -440,9 +459,9 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(result.wrapping_add(val)); - result + let prev = self.v.get().read(); + self.v.get().write(prev.wrapping_add(val)); + prev }) } @@ -452,9 +471,9 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(result.wrapping_sub(val)); - result + let prev = self.v.get().read(); + self.v.get().write(prev.wrapping_sub(val)); + prev }) } @@ -464,9 +483,9 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(result & val); - result + let prev = self.v.get().read(); + self.v.get().write(prev & val); + prev }) } @@ -476,9 +495,9 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(!(result & val)); - result + let prev = self.v.get().read(); + self.v.get().write(!(prev & val)); + prev }) } @@ -488,9 +507,9 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(result | val); - result + let prev = self.v.get().read(); + self.v.get().write(prev | val); + prev }) } @@ -500,9 +519,9 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(result ^ val); - result + let prev = self.v.get().read(); + self.v.get().write(prev ^ val); + prev }) } @@ -512,9 +531,9 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(core::cmp::max(result, val)); - result + let prev = self.v.get().read(); + self.v.get().write(core::cmp::max(prev, val)); + prev }) } @@ -524,9 +543,9 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(core::cmp::min(result, val)); - result + let prev = self.v.get().read(); + self.v.get().write(core::cmp::min(prev, val)); + prev }) } @@ -536,21 +555,275 @@ macro_rules! atomic_int { // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(!result); - result + let prev = self.v.get().read(); + self.v.get().write(!prev); + prev + }) + } + + #[inline] + pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(prev.wrapping_neg()); + prev + }) + } + #[inline] + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } + } + }; + // cfg(portable_atomic_force_amo) 32-bit(RV32)/{32,64}-bit(RV64) RMW + (cas, $atomic_type:ident, $int_type:ident) => { + impl $atomic_type { + #[inline] + pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().swap(val, order) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + if prev == current { + self.v.get().write(new); + Ok(prev) + } else { + Err(prev) + } + }) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange_weak( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + self.compare_exchange(current, new, success, failure) + } + + #[inline] + pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_add(val, order) + } + #[inline] + pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_sub(val, order) + } + #[inline] + pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_and(val, order) + } + + #[inline] + pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(!(prev & val)); + prev + }) + } + + #[inline] + pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_or(val, order) + } + #[inline] + pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_xor(val, order) + } + #[inline] + pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_max(val, order) + } + #[inline] + pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_min(val, order) + } + #[inline] + pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type { + self.as_native().fetch_not(order) + } + + #[inline] + pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(prev.wrapping_neg()); + prev + }) + } + #[inline] + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } + } + }; + // cfg(portable_atomic_force_amo) {8,16}-bit RMW + (cas[sub_word], $atomic_type:ident, $int_type:ident) => { + impl $atomic_type { + #[inline] + pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(val); + prev + }) + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + if prev == current { + self.v.get().write(new); + Ok(prev) + } else { + Err(prev) + } }) } #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange_weak( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + self.compare_exchange(current, new, success, failure) + } + + #[inline] + pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(prev.wrapping_add(val)); + prev + }) + } + + #[inline] + pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(prev.wrapping_sub(val)); + prev + }) + } + + #[inline] + pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_and(val, order) + } + + #[inline] + pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(!(prev & val)); + prev + }) + } + + #[inline] + pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_or(val, order) + } + #[inline] + pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type { + self.as_native().fetch_xor(val, order) + } + + #[inline] + pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(core::cmp::max(prev, val)); + prev + }) + } + + #[inline] + pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type { + // SAFETY: any data races are prevented by disabling interrupts (see + // module-level comments) and the raw pointer is valid because we got it + // from a reference. + with(|| unsafe { + let prev = self.v.get().read(); + self.v.get().write(core::cmp::min(prev, val)); + prev + }) + } + + #[inline] + pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type { + self.as_native().fetch_not(order) + } + + #[inline] pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type { // SAFETY: any data races are prevented by disabling interrupts (see // module-level comments) and the raw pointer is valid because we got it // from a reference. with(|| unsafe { - let result = self.v.get().read(); - self.v.get().write(result.wrapping_neg()); - result + let prev = self.v.get().read(); + self.v.get().write(prev.wrapping_neg()); + prev }) } #[inline] @@ -578,10 +851,10 @@ atomic_int!(load_store_atomic, AtomicIsize, isize, 16); #[cfg(target_pointer_width = "128")] atomic_int!(load_store_atomic, AtomicUsize, usize, 16); -atomic_int!(load_store_atomic, AtomicI8, i8, 1); -atomic_int!(load_store_atomic, AtomicU8, u8, 1); -atomic_int!(load_store_atomic, AtomicI16, i16, 2); -atomic_int!(load_store_atomic, AtomicU16, u16, 2); +atomic_int!(load_store_atomic[sub_word], AtomicI8, i8, 1); +atomic_int!(load_store_atomic[sub_word], AtomicU8, u8, 1); +atomic_int!(load_store_atomic[sub_word], AtomicI16, i16, 2); +atomic_int!(load_store_atomic[sub_word], AtomicU16, u16, 2); #[cfg(not(target_pointer_width = "16"))] atomic_int!(load_store_atomic, AtomicI32, i32, 4); diff --git a/vendor/portable-atomic/src/imp/interrupt/msp430.rs b/vendor/portable-atomic/src/imp/interrupt/msp430.rs index 020ed1023..8c1ca80ee 100644 --- a/vendor/portable-atomic/src/imp/interrupt/msp430.rs +++ b/vendor/portable-atomic/src/imp/interrupt/msp430.rs @@ -1,6 +1,10 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Adapted from https://github.com/rust-embedded/msp430. // // See also src/imp/msp430.rs. +// +// Refs: https://www.ti.com/lit/ug/slau208q/slau208q.pdf #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; @@ -18,7 +22,6 @@ pub(super) fn disable() -> State { unsafe { // Do not use `nomem` and `readonly` because prevent subsequent memory accesses from being reordered before interrupts are disabled. // Do not use `preserves_flags` because DINT modifies the GIE (global interrupt enable) bit of the status register. - // Refs: https://mspgcc.sourceforge.net/manual/x951.html #[cfg(not(portable_atomic_no_asm))] asm!( "mov R2, {0}", diff --git a/vendor/portable-atomic/src/imp/interrupt/riscv.rs b/vendor/portable-atomic/src/imp/interrupt/riscv.rs index c08545e1d..65b1af2ff 100644 --- a/vendor/portable-atomic/src/imp/interrupt/riscv.rs +++ b/vendor/portable-atomic/src/imp/interrupt/riscv.rs @@ -1,9 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Refs: // - https://five-embeddev.com/riscv-isa-manual/latest/machine.html#machine-status-registers-mstatus-and-mstatush // - https://five-embeddev.com/riscv-isa-manual/latest/supervisor.html#sstatus // // Generated asm: -// - riscv64gc https://godbolt.org/z/a78zxf5sW +// - riscv64gc https://godbolt.org/z/osbzsT679 #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; diff --git a/vendor/portable-atomic/src/imp/interrupt/xtensa.rs b/vendor/portable-atomic/src/imp/interrupt/xtensa.rs index 3593c25af..6cbb4cffb 100644 --- a/vendor/portable-atomic/src/imp/interrupt/xtensa.rs +++ b/vendor/portable-atomic/src/imp/interrupt/xtensa.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Refs: // - Xtensa Instruction Set Architecture (ISA) Reference Manual https://0x04.net/~mwk/doc/xtensa.pdf // - Linux kernel's Xtensa atomic implementation https://github.com/torvalds/linux/blob/v6.1/arch/xtensa/include/asm/atomic.h diff --git a/vendor/portable-atomic/src/imp/mod.rs b/vendor/portable-atomic/src/imp/mod.rs index 3dbe8e6c2..cea71eb3f 100644 --- a/vendor/portable-atomic/src/imp/mod.rs +++ b/vendor/portable-atomic/src/imp/mod.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // ----------------------------------------------------------------------------- // Lock-free implementations @@ -12,7 +14,10 @@ )))] #[cfg_attr( portable_atomic_no_cfg_target_has_atomic, - cfg(not(all(feature = "critical-section", portable_atomic_no_atomic_cas))) + cfg(not(all( + any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"), + portable_atomic_no_atomic_cas, + ))) )] #[cfg_attr( not(portable_atomic_no_cfg_target_has_atomic), @@ -23,8 +28,11 @@ )] mod core_atomic; -#[cfg(any(not(portable_atomic_no_asm), portable_atomic_unstable_asm))] -#[cfg(target_arch = "aarch64")] +// aarch64 128-bit atomics +#[cfg(all( + target_arch = "aarch64", + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), +))] // Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. #[cfg_attr( all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics), @@ -36,16 +44,19 @@ mod core_atomic; )] mod aarch64; -#[cfg(any(not(portable_atomic_no_asm), portable_atomic_unstable_asm))] -#[cfg(target_arch = "x86_64")] -#[cfg(any( - target_feature = "cmpxchg16b", - portable_atomic_target_feature = "cmpxchg16b", - all( - feature = "fallback", - not(portable_atomic_no_cmpxchg16b_target_feature), - not(portable_atomic_no_outline_atomics), - not(any(target_env = "sgx", miri)), +// x86_64 128-bit atomics +#[cfg(all( + target_arch = "x86_64", + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + all( + feature = "fallback", + not(portable_atomic_no_cmpxchg16b_target_feature), + not(portable_atomic_no_outline_atomics), + not(any(target_env = "sgx", miri)), + ), ), ))] // Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. @@ -53,27 +64,34 @@ mod aarch64; #[cfg_attr(not(any(miri, portable_atomic_sanitize_thread)), path = "atomic128/x86_64.rs")] mod x86_64; -#[cfg(portable_atomic_unstable_asm_experimental_arch)] -#[cfg(target_arch = "powerpc64")] -#[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", - all( - feature = "fallback", - not(portable_atomic_no_outline_atomics), - any(test, portable_atomic_outline_atomics), // TODO(powerpc64): currently disabled by default - any( - all( - target_os = "linux", - any( - target_env = "gnu", - all(target_env = "musl", not(target_feature = "crt-static")), - portable_atomic_outline_atomics, +// powerpc64 128-bit atomics +#[cfg(all( + target_arch = "powerpc64", + portable_atomic_unstable_asm_experimental_arch, + any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + all( + feature = "fallback", + not(portable_atomic_no_outline_atomics), + any(test, portable_atomic_outline_atomics), // TODO(powerpc64): currently disabled by default + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), ), + target_os = "android", + target_os = "freebsd", ), - target_os = "freebsd", + not(any(miri, portable_atomic_sanitize_thread)), ), - not(any(miri, portable_atomic_sanitize_thread)), ), ))] // Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. @@ -87,25 +105,20 @@ mod x86_64; )] mod powerpc64; -#[cfg(portable_atomic_unstable_asm_experimental_arch)] -#[cfg(target_arch = "s390x")] +// s390x 128-bit atomics +#[cfg(all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch))] // Use intrinsics.rs on Miri and Sanitizer that do not support inline assembly. -#[cfg_attr( - all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics), - path = "atomic128/intrinsics.rs" -)] -#[cfg_attr( - not(all(any(miri, portable_atomic_sanitize_thread), portable_atomic_new_atomic_intrinsics)), - path = "atomic128/s390x.rs" -)] +#[cfg_attr(any(miri, portable_atomic_sanitize_thread), path = "atomic128/intrinsics.rs")] +#[cfg_attr(not(any(miri, portable_atomic_sanitize_thread)), path = "atomic128/s390x.rs")] mod s390x; -// Miri and Sanitizer do not support inline assembly. +// pre-v6 ARM Linux 64-bit atomics #[cfg(feature = "fallback")] +// Miri and Sanitizer do not support inline assembly. #[cfg(all( - not(any(miri, portable_atomic_sanitize_thread)), - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), target_arch = "arm", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), any(target_os = "linux", target_os = "android"), not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), not(portable_atomic_no_outline_atomics), @@ -114,9 +127,11 @@ mod s390x; #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))] mod arm_linux; +// MSP430 atomics #[cfg(target_arch = "msp430")] pub(crate) mod msp430; +// atomic load/store for RISC-V without A-extension #[cfg(any(test, not(feature = "critical-section")))] #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_cas)))] #[cfg_attr( @@ -126,11 +141,12 @@ pub(crate) mod msp430; #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] mod riscv; +// x86-specific optimizations // Miri and Sanitizer do not support inline assembly. #[cfg(all( - not(any(miri, portable_atomic_sanitize_thread)), - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), any(target_arch = "x86", target_arch = "x86_64"), + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), ))] mod x86; @@ -138,31 +154,31 @@ mod x86; // Lock-based fallback implementations #[cfg(feature = "fallback")] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] #[cfg(any( test, not(any( all( - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), target_arch = "aarch64", + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), ), all( + target_arch = "x86_64", any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), any(target_feature = "cmpxchg16b", portable_atomic_target_feature = "cmpxchg16b"), - target_arch = "x86_64", ), all( + target_arch = "powerpc64", portable_atomic_unstable_asm_experimental_arch, any( target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics", ), - target_arch = "powerpc64", ), - all(portable_atomic_unstable_asm_experimental_arch, target_arch = "s390x"), + all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch), )) ))] -#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] -#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] mod fallback; // ----------------------------------------------------------------------------- @@ -170,7 +186,7 @@ mod fallback; // On AVR, we always use critical section based fallback implementation. // AVR can be safely assumed to be single-core, so this is sound. -// https://github.com/llvm/llvm-project/blob/llvmorg-16.0.0/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#LL963 +// https://github.com/llvm/llvm-project/blob/llvmorg-17.0.0-rc2/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp#L1074 // MSP430 as well. #[cfg(any( all(test, target_os = "none"), @@ -185,13 +201,13 @@ mod fallback; cfg(any(test, not(target_has_atomic = "ptr"))) )] #[cfg(any( - feature = "critical-section", target_arch = "arm", target_arch = "avr", target_arch = "msp430", target_arch = "riscv32", target_arch = "riscv64", target_arch = "xtensa", + feature = "critical-section", ))] mod interrupt; @@ -203,7 +219,6 @@ pub(crate) mod float; // ----------------------------------------------------------------------------- -// Atomic{Isize,Usize,Bool,Ptr}, Atomic{I,U}{8,16} #[cfg(not(any( portable_atomic_no_atomic_load_store, portable_atomic_unsafe_assume_single_core, @@ -212,7 +227,10 @@ pub(crate) mod float; )))] #[cfg_attr( portable_atomic_no_cfg_target_has_atomic, - cfg(not(all(feature = "critical-section", portable_atomic_no_atomic_cas))) + cfg(not(all( + any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"), + portable_atomic_no_atomic_cas, + ))) )] #[cfg_attr( not(portable_atomic_no_cfg_target_has_atomic), @@ -221,29 +239,27 @@ pub(crate) mod float; not(target_has_atomic = "ptr"), ))) )] -pub(crate) use self::core_atomic::{ - AtomicI16, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU8, AtomicUsize, -}; -// RISC-V without A-extension -#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))] -#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] -#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] -#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] -pub(crate) use self::riscv::{ - AtomicI16, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU8, AtomicUsize, -}; -// no core Atomic{Isize,Usize,Bool,Ptr}/Atomic{I,U}{8,16} & assume single core => critical section based fallback -#[cfg(any( - portable_atomic_unsafe_assume_single_core, - feature = "critical-section", - target_arch = "avr", - target_arch = "msp430", -))] -#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] -#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] -pub(crate) use self::interrupt::{ - AtomicI16, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU8, AtomicUsize, -}; +items! { + pub(crate) use self::core_atomic::{ + AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8, + AtomicUsize, + }; + #[cfg_attr( + portable_atomic_no_cfg_target_has_atomic, + cfg(any( + not(portable_atomic_no_atomic_64), + not(any(target_pointer_width = "16", target_pointer_width = "32")), + )) + )] + #[cfg_attr( + not(portable_atomic_no_cfg_target_has_atomic), + cfg(any( + target_has_atomic = "64", + not(any(target_pointer_width = "16", target_pointer_width = "32")), + )) + )] + pub(crate) use self::core_atomic::{AtomicI64, AtomicU64}; +} // bpf #[cfg(all( target_arch = "bpf", @@ -252,33 +268,21 @@ pub(crate) use self::interrupt::{ ))] pub(crate) use self::core_atomic::{AtomicI64, AtomicIsize, AtomicPtr, AtomicU64, AtomicUsize}; -// Atomic{I,U}32 -#[cfg(not(any( - portable_atomic_no_atomic_load_store, - portable_atomic_unsafe_assume_single_core, - target_arch = "avr", - target_arch = "msp430", -)))] -#[cfg_attr( - portable_atomic_no_cfg_target_has_atomic, - cfg(not(all(feature = "critical-section", portable_atomic_no_atomic_cas))) -)] -#[cfg_attr( - not(portable_atomic_no_cfg_target_has_atomic), - cfg(not(all( - any(target_arch = "riscv32", target_arch = "riscv64", feature = "critical-section"), - not(target_has_atomic = "ptr"), - ))) -)] -pub(crate) use self::core_atomic::{AtomicI32, AtomicU32}; -// RISC-V without A-extension +// RISC-V without A-extension & !(assume single core | critical section) #[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))] #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] -pub(crate) use self::riscv::{AtomicI32, AtomicU32}; -// no core Atomic{I,U}32 & no CAS & assume single core => critical section based fallback -#[cfg(any(not(target_pointer_width = "16"), feature = "fallback"))] +items! { + pub(crate) use self::riscv::{ + AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8, + AtomicUsize, + }; + #[cfg(target_arch = "riscv64")] + pub(crate) use self::riscv::{AtomicI64, AtomicU64}; +} + +// no core atomic CAS & (assume single core | critical section) => critical section based fallback #[cfg(any( portable_atomic_unsafe_assume_single_core, feature = "critical-section", @@ -287,53 +291,97 @@ pub(crate) use self::riscv::{AtomicI32, AtomicU32}; ))] #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] -pub(crate) use self::interrupt::{AtomicI32, AtomicU32}; +items! { + pub(crate) use self::interrupt::{ + AtomicI16, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU8, AtomicUsize, + }; + #[cfg(any(not(target_pointer_width = "16"), feature = "fallback"))] + pub(crate) use self::interrupt::{AtomicI32, AtomicU32}; + #[cfg(any( + not(any(target_pointer_width = "16", target_pointer_width = "32")), + feature = "fallback", + ))] + pub(crate) use self::interrupt::{AtomicI64, AtomicU64}; + #[cfg(feature = "fallback")] + pub(crate) use self::interrupt::{AtomicI128, AtomicU128}; +} -// Atomic{I,U}64 -#[cfg(not(any( - portable_atomic_no_atomic_load_store, - portable_atomic_unsafe_assume_single_core, -)))] -#[cfg_attr( - portable_atomic_no_cfg_target_has_atomic, - cfg(any( - not(portable_atomic_no_atomic_64), +// no core (64-bit | 128-bit) atomic & has CAS => use lock-base fallback +#[cfg(feature = "fallback")] +#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] +#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] +items! { + #[cfg(not(all( + target_arch = "arm", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + not(portable_atomic_no_outline_atomics), + )))] + #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))] + #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))] + pub(crate) use self::fallback::{AtomicI64, AtomicU64}; + #[cfg(not(any( all( - not(any(target_pointer_width = "16", target_pointer_width = "32")), - not(all(feature = "critical-section", portable_atomic_no_atomic_cas)), + target_arch = "aarch64", + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), ), - )) -)] -#[cfg_attr( - not(portable_atomic_no_cfg_target_has_atomic), - cfg(any( - target_has_atomic = "64", all( - not(any(target_pointer_width = "16", target_pointer_width = "32")), - not(all( - any( - target_arch = "riscv32", - target_arch = "riscv64", - feature = "critical-section", + target_arch = "x86_64", + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any( + target_feature = "cmpxchg16b", + portable_atomic_target_feature = "cmpxchg16b", + all( + feature = "fallback", + not(portable_atomic_no_cmpxchg16b_target_feature), + not(portable_atomic_no_outline_atomics), + not(any(target_env = "sgx", miri)), ), - not(target_has_atomic = "ptr"), - )), + ), ), - )) -)] -pub(crate) use self::core_atomic::{AtomicI64, AtomicU64}; -// RISC-V without A-extension -#[cfg(not(any(portable_atomic_unsafe_assume_single_core, feature = "critical-section")))] -#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] -#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] -#[cfg(target_arch = "riscv64")] -pub(crate) use self::riscv::{AtomicI64, AtomicU64}; + all( + target_arch = "powerpc64", + portable_atomic_unstable_asm_experimental_arch, + any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + all( + feature = "fallback", + not(portable_atomic_no_outline_atomics), + portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", + target_os = "freebsd", + ), + not(any(miri, portable_atomic_sanitize_thread)), + ), + ), + ), + all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch), + )))] + pub(crate) use self::fallback::{AtomicI128, AtomicU128}; +} + +// 64-bit atomics (platform-specific) // pre-v6 ARM Linux #[cfg(feature = "fallback")] #[cfg(all( - not(any(miri, portable_atomic_sanitize_thread)), - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), target_arch = "arm", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), any(target_os = "linux", target_os = "android"), not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), not(portable_atomic_no_outline_atomics), @@ -341,49 +389,17 @@ pub(crate) use self::riscv::{AtomicI64, AtomicU64}; #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))] #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))] pub(crate) use self::arm_linux::{AtomicI64, AtomicU64}; -// no core Atomic{I,U}64 & has CAS => use lock-base fallback -#[cfg(feature = "fallback")] -#[cfg(not(all( - not(any(miri, portable_atomic_sanitize_thread)), - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), - target_arch = "arm", - any(target_os = "linux", target_os = "android"), - not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), - not(portable_atomic_no_outline_atomics), -)))] -#[cfg_attr( - portable_atomic_no_cfg_target_has_atomic, - cfg(all(portable_atomic_no_atomic_64, not(portable_atomic_no_atomic_cas))) -)] -#[cfg_attr( - not(portable_atomic_no_cfg_target_has_atomic), - cfg(all(not(target_has_atomic = "64"), target_has_atomic = "ptr")) -)] -pub(crate) use self::fallback::{AtomicI64, AtomicU64}; -// no core Atomic{I,U}64 & no CAS & assume single core => critical section based fallback -#[cfg(any( - not(any(target_pointer_width = "16", target_pointer_width = "32")), - feature = "fallback", -))] -#[cfg(any( - portable_atomic_unsafe_assume_single_core, - feature = "critical-section", - target_arch = "avr", - target_arch = "msp430", -))] -#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] -#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] -pub(crate) use self::interrupt::{AtomicI64, AtomicU64}; -// Atomic{I,U}128 -// aarch64 stable +// 128-bit atomics (platform-specific) +// aarch64 #[cfg(all( - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), target_arch = "aarch64", + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), ))] pub(crate) use self::aarch64::{AtomicI128, AtomicU128}; -// no core Atomic{I,U}128 & has cmpxchg16b => use cmpxchg16b +// x86_64 & (cmpxchg16b | outline-atomics) #[cfg(all( + target_arch = "x86_64", any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), any( target_feature = "cmpxchg16b", @@ -395,94 +411,39 @@ pub(crate) use self::aarch64::{AtomicI128, AtomicU128}; not(any(target_env = "sgx", miri)), ), ), - target_arch = "x86_64", ))] pub(crate) use self::x86_64::{AtomicI128, AtomicU128}; -// powerpc64 -#[cfg(portable_atomic_unstable_asm_experimental_arch)] -#[cfg(any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", - all( - feature = "fallback", - not(portable_atomic_no_outline_atomics), - portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default - any( - all( - target_os = "linux", - any( - target_env = "gnu", - all(target_env = "musl", not(target_feature = "crt-static")), - portable_atomic_outline_atomics, +// powerpc64 & (pwr8 | outline-atomics) +#[cfg(all( + target_arch = "powerpc64", + portable_atomic_unstable_asm_experimental_arch, + any( + target_feature = "quadword-atomics", + portable_atomic_target_feature = "quadword-atomics", + all( + feature = "fallback", + not(portable_atomic_no_outline_atomics), + portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default + any( + all( + target_os = "linux", + any( + target_env = "gnu", + all( + any(target_env = "musl", target_env = "ohos"), + not(target_feature = "crt-static"), + ), + portable_atomic_outline_atomics, + ), ), + target_os = "android", + target_os = "freebsd", ), - target_os = "freebsd", + not(any(miri, portable_atomic_sanitize_thread)), ), - not(any(miri, portable_atomic_sanitize_thread)), ), ))] -#[cfg(target_arch = "powerpc64")] pub(crate) use self::powerpc64::{AtomicI128, AtomicU128}; // s390x -#[cfg(portable_atomic_unstable_asm_experimental_arch)] -#[cfg(target_arch = "s390x")] +#[cfg(all(target_arch = "s390x", portable_atomic_unstable_asm_experimental_arch))] pub(crate) use self::s390x::{AtomicI128, AtomicU128}; -// no core Atomic{I,U}128 & has CAS => use lock-base fallback -#[cfg(feature = "fallback")] -#[cfg(not(any( - all(any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), target_arch = "aarch64"), - all( - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), - any( - target_feature = "cmpxchg16b", - portable_atomic_target_feature = "cmpxchg16b", - all( - feature = "fallback", - not(portable_atomic_no_cmpxchg16b_target_feature), - not(portable_atomic_no_outline_atomics), - not(any(target_env = "sgx", miri)), - ), - ), - target_arch = "x86_64", - ), - all( - portable_atomic_unstable_asm_experimental_arch, - any( - target_feature = "quadword-atomics", - portable_atomic_target_feature = "quadword-atomics", - all( - feature = "fallback", - not(portable_atomic_no_outline_atomics), - portable_atomic_outline_atomics, // TODO(powerpc64): currently disabled by default - any( - all( - target_os = "linux", - any( - target_env = "gnu", - all(target_env = "musl", not(target_feature = "crt-static")), - portable_atomic_outline_atomics, - ), - ), - target_os = "freebsd", - ), - not(any(miri, portable_atomic_sanitize_thread)), - ), - ), - target_arch = "powerpc64", - ), - all(portable_atomic_unstable_asm_experimental_arch, target_arch = "s390x"), -)))] -#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] -#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] -pub(crate) use self::fallback::{AtomicI128, AtomicU128}; -// no core Atomic{I,U}128 & no CAS & assume_single_core => critical section based fallback -#[cfg(feature = "fallback")] -#[cfg(any( - portable_atomic_unsafe_assume_single_core, - feature = "critical-section", - target_arch = "avr", - target_arch = "msp430", -))] -#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_cas))] -#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "ptr")))] -pub(crate) use self::interrupt::{AtomicI128, AtomicU128}; diff --git a/vendor/portable-atomic/src/imp/msp430.rs b/vendor/portable-atomic/src/imp/msp430.rs index 4928549ab..f6990ddee 100644 --- a/vendor/portable-atomic/src/imp/msp430.rs +++ b/vendor/portable-atomic/src/imp/msp430.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Atomic load/store implementation on MSP430. // // Adapted from https://github.com/pftbest/msp430-atomic. @@ -8,6 +10,8 @@ // See also src/imp/interrupt/msp430.rs. // // Note: Ordering is always SeqCst. +// +// Refs: https://www.ti.com/lit/ug/slau208q/slau208q.pdf #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; @@ -165,6 +169,7 @@ macro_rules! atomic { concat!("add", $asm_suffix, " {val}, 0({dst})"), dst = in(reg) dst, val = in(reg) val, + // Do not use `preserves_flags` because ADD modifies the V, N, Z, and C bits of the status register. options(nostack), ); #[cfg(portable_atomic_no_asm)] @@ -186,6 +191,7 @@ macro_rules! atomic { concat!("sub", $asm_suffix, " {val}, 0({dst})"), dst = in(reg) dst, val = in(reg) val, + // Do not use `preserves_flags` because SUB modifies the V, N, Z, and C bits of the status register. options(nostack), ); #[cfg(portable_atomic_no_asm)] @@ -207,6 +213,7 @@ macro_rules! atomic { concat!("and", $asm_suffix, " {val}, 0({dst})"), dst = in(reg) dst, val = in(reg) val, + // Do not use `preserves_flags` because AND modifies the V, N, Z, and C bits of the status register. options(nostack), ); #[cfg(portable_atomic_no_asm)] @@ -228,7 +235,7 @@ macro_rules! atomic { concat!("bis", $asm_suffix, " {val}, 0({dst})"), dst = in(reg) dst, val = in(reg) val, - options(nostack), + options(nostack, preserves_flags), ); #[cfg(portable_atomic_no_asm)] llvm_asm!( @@ -249,6 +256,7 @@ macro_rules! atomic { concat!("xor", $asm_suffix, " {val}, 0({dst})"), dst = in(reg) dst, val = in(reg) val, + // Do not use `preserves_flags` because XOR modifies the V, N, Z, and C bits of the status register. options(nostack), ); #[cfg(portable_atomic_no_asm)] @@ -269,6 +277,7 @@ macro_rules! atomic { asm!( concat!("inv", $asm_suffix, " 0({dst})"), dst = in(reg) dst, + // Do not use `preserves_flags` because INV modifies the V, N, Z, and C bits of the status register. options(nostack), ); #[cfg(portable_atomic_no_asm)] diff --git a/vendor/portable-atomic/src/imp/riscv.rs b/vendor/portable-atomic/src/imp/riscv.rs index 9b4a5cbb7..373607757 100644 --- a/vendor/portable-atomic/src/imp/riscv.rs +++ b/vendor/portable-atomic/src/imp/riscv.rs @@ -1,18 +1,94 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Atomic load/store implementation on RISC-V. // // Refs: +// - RISC-V Instruction Set Manual Volume I: Unprivileged ISA +// https://riscv.org/wp-content/uploads/2019/12/riscv-spec-20191213.pdf +// - RISC-V Atomics ABI Specification +// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/HEAD/riscv-atomic.adoc // - "Mappings from C/C++ primitives to RISC-V primitives." table in RISC-V Instruction Set Manual: // https://five-embeddev.com/riscv-isa-manual/latest/memory.html#sec:memory:porting // - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit // // Generated asm: -// - riscv64gc https://godbolt.org/z/hx4Krb91h +// - riscv64gc https://godbolt.org/z/EETebx7TE +// - riscv32imac https://godbolt.org/z/8zzv73bKh #[cfg(not(portable_atomic_no_asm))] use core::arch::asm; use core::{cell::UnsafeCell, sync::atomic::Ordering}; -macro_rules! atomic { +#[cfg(any(test, portable_atomic_force_amo))] +macro_rules! atomic_rmw_amo_order { + ($op:ident, $order:ident) => { + match $order { + Ordering::Relaxed => $op!(""), + Ordering::Acquire => $op!(".aq"), + Ordering::Release => $op!(".rl"), + // AcqRel and SeqCst RMWs are equivalent. + Ordering::AcqRel | Ordering::SeqCst => $op!(".aqrl"), + _ => unreachable!("{:?}", $order), + } + }; +} +#[cfg(any(test, portable_atomic_force_amo))] +macro_rules! atomic_rmw_amo { + ($op:ident, $dst:ident, $val:ident, $order:ident, $asm_suffix:tt) => {{ + let out; + macro_rules! op { + ($asm_order:tt) => { + // SAFETY: The user guaranteed that the AMO instruction is available in this + // system by setting the portable_atomic_force_amo and + // portable_atomic_unsafe_assume_single_core. + // The caller of this macro must guarantee the validity of the pointer. + asm!( + ".option push", + // https://github.com/riscv-non-isa/riscv-asm-manual/blob/HEAD/riscv-asm.md#arch + ".option arch, +a", + concat!("amo", stringify!($op), ".", $asm_suffix, $asm_order, " {out}, {val}, 0({dst})"), + ".option pop", + dst = in(reg) ptr_reg!($dst), + val = in(reg) $val, + out = lateout(reg) out, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw_amo_order!(op, $order); + out + }}; +} +// 32-bit val.wrapping_shl(shift) but no extra `& (u32::BITS - 1)` +#[cfg(any(test, portable_atomic_force_amo))] +#[inline] +fn sllw(val: u32, shift: u32) -> u32 { + // SAFETY: Calling sll{,w} is safe. + unsafe { + let out; + #[cfg(target_arch = "riscv32")] + asm!("sll {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags)); + #[cfg(target_arch = "riscv64")] + asm!("sllw {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags)); + out + } +} +// 32-bit val.wrapping_shr(shift) but no extra `& (u32::BITS - 1)` +#[cfg(any(test, portable_atomic_force_amo))] +#[inline] +fn srlw(val: u32, shift: u32) -> u32 { + // SAFETY: Calling srl{,w} is safe. + unsafe { + let out; + #[cfg(target_arch = "riscv32")] + asm!("srl {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags)); + #[cfg(target_arch = "riscv64")] + asm!("srlw {out}, {val}, {shift}", out = lateout(reg) out, val = in(reg) val, shift = in(reg) shift, options(pure, nomem, nostack, preserves_flags)); + out + } +} + +macro_rules! atomic_load_store { ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => { #[repr(transparent)] pub(crate) struct $atomic_type $(<$($generics)*>)? { @@ -137,28 +213,172 @@ macro_rules! atomic { }; } -atomic!(AtomicI8, i8, "b"); -atomic!(AtomicU8, u8, "b"); -atomic!(AtomicI16, i16, "h"); -atomic!(AtomicU16, u16, "h"); -atomic!(AtomicI32, i32, "w"); -atomic!(AtomicU32, u32, "w"); +macro_rules! atomic_ptr { + ($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => { + atomic_load_store!($([$($generics)*])? $atomic_type, $value_type, $asm_suffix); + #[cfg(portable_atomic_force_amo)] + impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? { + #[inline] + pub(crate) fn swap(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_rmw_amo!(swap, dst, val, order, $asm_suffix) } + } + } + }; +} + +macro_rules! atomic { + ($atomic_type:ident, $value_type:ty, $asm_suffix:tt, $max:tt, $min:tt) => { + atomic_load_store!($atomic_type, $value_type, $asm_suffix); + // There is no amo{sub,nand,neg}. + #[cfg(any(test, portable_atomic_force_amo))] + impl $atomic_type { + #[inline] + pub(crate) fn swap(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_rmw_amo!(swap, dst, val, order, $asm_suffix) } + } + + #[inline] + pub(crate) fn fetch_add(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_rmw_amo!(add, dst, val, order, $asm_suffix) } + } + + #[inline] + pub(crate) fn fetch_sub(&self, val: $value_type, order: Ordering) -> $value_type { + self.fetch_add(val.wrapping_neg(), order) + } + + #[inline] + pub(crate) fn fetch_and(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_rmw_amo!(and, dst, val, order, $asm_suffix) } + } + + #[inline] + pub(crate) fn fetch_or(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_rmw_amo!(or, dst, val, order, $asm_suffix) } + } + + #[inline] + pub(crate) fn fetch_xor(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_rmw_amo!(xor, dst, val, order, $asm_suffix) } + } + + #[inline] + pub(crate) fn fetch_not(&self, order: Ordering) -> $value_type { + const NOT_MASK: $value_type = (0 as $value_type).wrapping_sub(1); + self.fetch_xor(NOT_MASK, order) + } + + #[inline] + pub(crate) fn fetch_max(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_rmw_amo!($max, dst, val, order, $asm_suffix) } + } + + #[inline] + pub(crate) fn fetch_min(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + unsafe { atomic_rmw_amo!($min, dst, val, order, $asm_suffix) } + } + } + }; +} + +macro_rules! atomic_sub_word { + ($atomic_type:ident, $value_type:ty, $unsigned_type:ty, $asm_suffix:tt) => { + atomic_load_store!($atomic_type, $value_type, $asm_suffix); + #[cfg(any(test, portable_atomic_force_amo))] + impl $atomic_type { + #[inline] + pub(crate) fn fetch_and(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + let (dst, shift, mask) = crate::utils::create_sub_word_mask_values(dst); + let mask = !sllw(mask as u32, shift as u32); + // TODO: use zero_extend helper instead of cast for val. + let val = sllw(val as $unsigned_type as u32, shift as u32); + let val = val | mask; + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + let out: u32 = unsafe { atomic_rmw_amo!(and, dst, val, order, "w") }; + srlw(out, shift as u32) as $unsigned_type as $value_type + } + + #[inline] + pub(crate) fn fetch_or(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst); + // TODO: use zero_extend helper instead of cast for val. + let val = sllw(val as $unsigned_type as u32, shift as u32); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + let out: u32 = unsafe { atomic_rmw_amo!(or, dst, val, order, "w") }; + srlw(out, shift as u32) as $unsigned_type as $value_type + } + + #[inline] + pub(crate) fn fetch_xor(&self, val: $value_type, order: Ordering) -> $value_type { + let dst = self.v.get(); + let (dst, shift, _mask) = crate::utils::create_sub_word_mask_values(dst); + // TODO: use zero_extend helper instead of cast for val. + let val = sllw(val as $unsigned_type as u32, shift as u32); + // SAFETY: any data races are prevented by atomic intrinsics and the raw + // pointer passed in is valid because we got it from a reference. + let out: u32 = unsafe { atomic_rmw_amo!(xor, dst, val, order, "w") }; + srlw(out, shift as u32) as $unsigned_type as $value_type + } + + #[inline] + pub(crate) fn fetch_not(&self, order: Ordering) -> $value_type { + const NOT_MASK: $value_type = (0 as $value_type).wrapping_sub(1); + self.fetch_xor(NOT_MASK, order) + } + } + }; +} + +atomic_sub_word!(AtomicI8, i8, u8, "b"); +atomic_sub_word!(AtomicU8, u8, u8, "b"); +atomic_sub_word!(AtomicI16, i16, u16, "h"); +atomic_sub_word!(AtomicU16, u16, u16, "h"); +atomic!(AtomicI32, i32, "w", max, min); +atomic!(AtomicU32, u32, "w", maxu, minu); #[cfg(target_arch = "riscv64")] -atomic!(AtomicI64, i64, "d"); +atomic!(AtomicI64, i64, "d", max, min); #[cfg(target_arch = "riscv64")] -atomic!(AtomicU64, u64, "d"); +atomic!(AtomicU64, u64, "d", maxu, minu); #[cfg(target_pointer_width = "32")] -atomic!(AtomicIsize, isize, "w"); +atomic!(AtomicIsize, isize, "w", max, min); #[cfg(target_pointer_width = "32")] -atomic!(AtomicUsize, usize, "w"); +atomic!(AtomicUsize, usize, "w", maxu, minu); #[cfg(target_pointer_width = "32")] -atomic!([T] AtomicPtr, *mut T, "w"); +atomic_ptr!([T] AtomicPtr, *mut T, "w"); #[cfg(target_pointer_width = "64")] -atomic!(AtomicIsize, isize, "d"); +atomic!(AtomicIsize, isize, "d", max, min); #[cfg(target_pointer_width = "64")] -atomic!(AtomicUsize, usize, "d"); +atomic!(AtomicUsize, usize, "d", maxu, minu); #[cfg(target_pointer_width = "64")] -atomic!([T] AtomicPtr, *mut T, "d"); +atomic_ptr!([T] AtomicPtr, *mut T, "d"); #[cfg(test)] mod tests { @@ -177,4 +397,305 @@ mod tests { test_atomic_int_load_store!(u64); test_atomic_int_load_store!(isize); test_atomic_int_load_store!(usize); + + macro_rules! test_atomic_int_amo { + ($int_type:ident) => { + paste::paste! { + #[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks + )] + mod [<test_atomic_ $int_type _amo>] { + use super::*; + test_atomic_int_amo!([<Atomic $int_type:camel>], $int_type); + } + } + }; + ($atomic_type:ty, $int_type:ident) => { + ::quickcheck::quickcheck! { + fn quickcheck_swap(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + let a = <$atomic_type>::new(x); + assert_eq!(a.swap(y, order), x); + assert_eq!(a.swap(x, order), y); + } + true + } + fn quickcheck_fetch_add(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + let a = <$atomic_type>::new(x); + assert_eq!(a.fetch_add(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), x.wrapping_add(y)); + let a = <$atomic_type>::new(y); + assert_eq!(a.fetch_add(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), y.wrapping_add(x)); + } + true + } + fn quickcheck_fetch_sub(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + let a = <$atomic_type>::new(x); + assert_eq!(a.fetch_sub(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), x.wrapping_sub(y)); + let a = <$atomic_type>::new(y); + assert_eq!(a.fetch_sub(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), y.wrapping_sub(x)); + } + true + } + fn quickcheck_fetch_and(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + let a = <$atomic_type>::new(x); + assert_eq!(a.fetch_and(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), x & y); + let a = <$atomic_type>::new(y); + assert_eq!(a.fetch_and(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), y & x); + } + true + } + fn quickcheck_fetch_or(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + let a = <$atomic_type>::new(x); + assert_eq!(a.fetch_or(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), x | y); + let a = <$atomic_type>::new(y); + assert_eq!(a.fetch_or(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), y | x); + } + true + } + fn quickcheck_fetch_xor(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + let a = <$atomic_type>::new(x); + assert_eq!(a.fetch_xor(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), x ^ y); + let a = <$atomic_type>::new(y); + assert_eq!(a.fetch_xor(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), y ^ x); + } + true + } + fn quickcheck_fetch_max(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + let a = <$atomic_type>::new(x); + assert_eq!(a.fetch_max(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(x, y)); + let a = <$atomic_type>::new(y); + assert_eq!(a.fetch_max(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), core::cmp::max(y, x)); + } + true + } + fn quickcheck_fetch_min(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + let a = <$atomic_type>::new(x); + assert_eq!(a.fetch_min(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(x, y)); + let a = <$atomic_type>::new(y); + assert_eq!(a.fetch_min(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), core::cmp::min(y, x)); + } + true + } + fn quickcheck_fetch_not(x: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + let a = <$atomic_type>::new(x); + assert_eq!(a.fetch_not(order), x); + assert_eq!(a.load(Ordering::Relaxed), !x); + assert_eq!(a.fetch_not(order), !x); + assert_eq!(a.load(Ordering::Relaxed), x); + } + true + } + } + }; + } + macro_rules! test_atomic_int_amo_sub_word { + ($int_type:ident) => { + paste::paste! { + #[allow( + clippy::alloc_instead_of_core, + clippy::std_instead_of_alloc, + clippy::std_instead_of_core, + clippy::undocumented_unsafe_blocks + )] + mod [<test_atomic_ $int_type _amo>] { + use super::*; + test_atomic_int_amo_sub_word!([<Atomic $int_type:camel>], $int_type); + } + } + }; + ($atomic_type:ty, $int_type:ident) => { + use crate::tests::helper::*; + ::quickcheck::quickcheck! { + fn quickcheck_fetch_and(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + for base in [0, !0] { + let mut arr = Align16([ + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + ]); + let a_idx = fastrand::usize(3..=6); + arr.0[a_idx] = <$atomic_type>::new(x); + let a = &arr.0[a_idx]; + assert_eq!(a.fetch_and(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), x & y); + for i in 0..a_idx { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + for i in a_idx + 1..arr.0.len() { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + arr.0[a_idx] = <$atomic_type>::new(y); + let a = &arr.0[a_idx]; + assert_eq!(a.fetch_and(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), y & x); + for i in 0..a_idx { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + for i in a_idx + 1..arr.0.len() { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + } + } + true + } + fn quickcheck_fetch_or(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + for base in [0, !0] { + let mut arr = Align16([ + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + ]); + let a_idx = fastrand::usize(3..=6); + arr.0[a_idx] = <$atomic_type>::new(x); + let a = &arr.0[a_idx]; + assert_eq!(a.fetch_or(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), x | y); + for i in 0..a_idx { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + for i in a_idx + 1..arr.0.len() { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + arr.0[a_idx] = <$atomic_type>::new(y); + let a = &arr.0[a_idx]; + assert_eq!(a.fetch_or(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), y | x); + for i in 0..a_idx { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + for i in a_idx + 1..arr.0.len() { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + } + } + true + } + fn quickcheck_fetch_xor(x: $int_type, y: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + for base in [0, !0] { + let mut arr = Align16([ + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + ]); + let a_idx = fastrand::usize(3..=6); + arr.0[a_idx] = <$atomic_type>::new(x); + let a = &arr.0[a_idx]; + assert_eq!(a.fetch_xor(y, order), x); + assert_eq!(a.load(Ordering::Relaxed), x ^ y); + for i in 0..a_idx { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + for i in a_idx + 1..arr.0.len() { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + arr.0[a_idx] = <$atomic_type>::new(y); + let a = &arr.0[a_idx]; + assert_eq!(a.fetch_xor(x, order), y); + assert_eq!(a.load(Ordering::Relaxed), y ^ x); + for i in 0..a_idx { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + for i in a_idx + 1..arr.0.len() { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + } + } + true + } + fn quickcheck_fetch_not(x: $int_type) -> bool { + for &order in &test_helper::SWAP_ORDERINGS { + for base in [0, !0] { + let mut arr = Align16([ + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + <$atomic_type>::new(base), + ]); + let a_idx = fastrand::usize(3..=6); + arr.0[a_idx] = <$atomic_type>::new(x); + let a = &arr.0[a_idx]; + assert_eq!(a.fetch_not(order), x); + assert_eq!(a.load(Ordering::Relaxed), !x); + assert_eq!(a.fetch_not(order), !x); + assert_eq!(a.load(Ordering::Relaxed), x); + for i in 0..a_idx { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + for i in a_idx + 1..arr.0.len() { + assert_eq!(arr.0[i].load(Ordering::Relaxed), base, "invalid value written"); + } + } + } + true + } + } + }; + } + test_atomic_int_amo_sub_word!(i8); + test_atomic_int_amo_sub_word!(u8); + test_atomic_int_amo_sub_word!(i16); + test_atomic_int_amo_sub_word!(u16); + test_atomic_int_amo!(i32); + test_atomic_int_amo!(u32); + #[cfg(target_arch = "riscv64")] + test_atomic_int_amo!(i64); + #[cfg(target_arch = "riscv64")] + test_atomic_int_amo!(u64); + test_atomic_int_amo!(isize); + test_atomic_int_amo!(usize); } diff --git a/vendor/portable-atomic/src/imp/x86.rs b/vendor/portable-atomic/src/imp/x86.rs index 82afee10b..7caa20301 100644 --- a/vendor/portable-atomic/src/imp/x86.rs +++ b/vendor/portable-atomic/src/imp/x86.rs @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + // Atomic operations implementation on x86/x86_64. // // This module provides atomic operations not supported by LLVM or optimizes @@ -6,12 +8,13 @@ // Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use // this module and use CAS loop instead. // +// Refs: +// - x86 and amd64 instruction reference https://www.felixcloutier.com/x86 +// // Generated asm: -// - x86_64 https://godbolt.org/z/8fve4YP1G +// - x86_64 https://godbolt.org/z/d17eTs5Ec -#[cfg(not(portable_atomic_no_asm))] -use core::arch::asm; -use core::sync::atomic::Ordering; +use core::{arch::asm, sync::atomic::Ordering}; use super::core_atomic::{ AtomicI16, AtomicI32, AtomicI64, AtomicI8, AtomicIsize, AtomicU16, AtomicU32, AtomicU64, @@ -32,7 +35,7 @@ macro_rules! ptr_modifier { } macro_rules! atomic_int { - ($atomic_type:ident, $int_type:ident, $ptr_size:tt) => { + ($atomic_type:ident, $ptr_size:tt) => { impl $atomic_type { #[inline] pub(crate) fn not(&self, _order: Ordering) { @@ -71,24 +74,24 @@ macro_rules! atomic_int { }; } -atomic_int!(AtomicI8, i8, "byte"); -atomic_int!(AtomicU8, u8, "byte"); -atomic_int!(AtomicI16, i16, "word"); -atomic_int!(AtomicU16, u16, "word"); -atomic_int!(AtomicI32, i32, "dword"); -atomic_int!(AtomicU32, u32, "dword"); +atomic_int!(AtomicI8, "byte"); +atomic_int!(AtomicU8, "byte"); +atomic_int!(AtomicI16, "word"); +atomic_int!(AtomicU16, "word"); +atomic_int!(AtomicI32, "dword"); +atomic_int!(AtomicU32, "dword"); #[cfg(target_arch = "x86_64")] -atomic_int!(AtomicI64, i64, "qword"); +atomic_int!(AtomicI64, "qword"); #[cfg(target_arch = "x86_64")] -atomic_int!(AtomicU64, u64, "qword"); +atomic_int!(AtomicU64, "qword"); #[cfg(target_pointer_width = "32")] -atomic_int!(AtomicIsize, isize, "dword"); +atomic_int!(AtomicIsize, "dword"); #[cfg(target_pointer_width = "32")] -atomic_int!(AtomicUsize, usize, "dword"); +atomic_int!(AtomicUsize, "dword"); #[cfg(target_pointer_width = "64")] -atomic_int!(AtomicIsize, isize, "qword"); +atomic_int!(AtomicIsize, "qword"); #[cfg(target_pointer_width = "64")] -atomic_int!(AtomicUsize, usize, "qword"); +atomic_int!(AtomicUsize, "qword"); #[cfg(target_arch = "x86")] impl AtomicI64 { @@ -116,11 +119,9 @@ impl AtomicU64 { macro_rules! atomic_bit_opts { ($atomic_type:ident, $int_type:ident, $val_modifier:tt, $ptr_size:tt) => { // LLVM 14 and older don't support generating `lock bt{s,r,c}`. - // https://godbolt.org/z/G1TMKza97 // LLVM 15 only supports generating `lock bt{s,r,c}` for immediate bit offsets. - // https://godbolt.org/z/dzzhr81z6 - // LLVM 16 can generate `lock bt{s,r,c}` for both immediate and register bit offsets. - // https://godbolt.org/z/7YTvsorn1 + // LLVM 16+ can generate `lock bt{s,r,c}` for both immediate and register bit offsets. + // https://godbolt.org/z/TGhr5z4ds // So, use fetch_* based implementations on LLVM 16+, otherwise use asm based implementations. #[cfg(portable_atomic_llvm_16)] impl_default_bit_opts!($atomic_type, $int_type); @@ -138,18 +139,18 @@ macro_rules! atomic_bit_opts { // // https://www.felixcloutier.com/x86/bts unsafe { - let out: u8; + let r: u8; // atomic RMW is always SeqCst. asm!( concat!("lock bts ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"), - "setb {out}", + "setb {r}", dst = in(reg) dst, bit = in(reg) (bit & (Self::BITS - 1)) as $int_type, - out = out(reg_byte) out, + r = out(reg_byte) r, // Do not use `preserves_flags` because BTS modifies the CF flag. options(nostack), ); - out != 0 + r != 0 } } #[inline] @@ -162,18 +163,18 @@ macro_rules! atomic_bit_opts { // // https://www.felixcloutier.com/x86/btr unsafe { - let out: u8; + let r: u8; // atomic RMW is always SeqCst. asm!( concat!("lock btr ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"), - "setb {out}", + "setb {r}", dst = in(reg) dst, bit = in(reg) (bit & (Self::BITS - 1)) as $int_type, - out = out(reg_byte) out, + r = out(reg_byte) r, // Do not use `preserves_flags` because BTR modifies the CF flag. options(nostack), ); - out != 0 + r != 0 } } #[inline] @@ -186,18 +187,18 @@ macro_rules! atomic_bit_opts { // // https://www.felixcloutier.com/x86/btc unsafe { - let out: u8; + let r: u8; // atomic RMW is always SeqCst. asm!( concat!("lock btc ", $ptr_size, " ptr [{dst", ptr_modifier!(), "}], {bit", $val_modifier, "}"), - "setb {out}", + "setb {r}", dst = in(reg) dst, bit = in(reg) (bit & (Self::BITS - 1)) as $int_type, - out = out(reg_byte) out, + r = out(reg_byte) r, // Do not use `preserves_flags` because BTC modifies the CF flag. options(nostack), ); - out != 0 + r != 0 } } } |