diff options
Diffstat (limited to '')
6 files changed, 42 insertions, 52 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs index 081609ece..24f9c0301 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs @@ -1195,7 +1195,7 @@ pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>( /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where -/// `scale` should be 1, 2, 4 and 8. +/// `scale` should be 1, 2, 4 or 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi64) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs index 47d565cea..49d78ed60 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs @@ -8545,9 +8545,6 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask32&expand=3207) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))] -#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd - //llvm.x86.avx512.kadd.d pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { transmute(a + b) } @@ -8557,9 +8554,6 @@ pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask64&expand=3208) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))] -#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd - //llvm.x86.avx512.kadd.d pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { transmute(a + b) } @@ -8569,7 +8563,6 @@ pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask32&expand=3213) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandd pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { transmute(a & b) } @@ -8579,7 +8572,6 @@ pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask64&expand=3214) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandq pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { transmute(a & b) } @@ -8607,7 +8599,6 @@ pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask32&expand=3219) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandnd pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { transmute(_knot_mask32(a) & b) } @@ -8617,7 +8608,6 @@ pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask64&expand=3220) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandnq pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { transmute(_knot_mask64(a) & b) } @@ -8627,7 +8617,6 @@ pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask32&expand=3240) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(or))] // generate normal and code instead of kord pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { transmute(a | b) } @@ -8637,7 +8626,6 @@ pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask64&expand=3241) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(or))] // generate normal and code instead of korq pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { transmute(a | b) } @@ -8647,7 +8635,6 @@ pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask32&expand=3292) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxord pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { transmute(a ^ b) } @@ -8657,7 +8644,6 @@ pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask64&expand=3293) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxorq pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { transmute(a ^ b) } @@ -8667,7 +8653,6 @@ pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask32&expand=3286) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxnord pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { transmute(_knot_mask32(a ^ b)) } @@ -8677,7 +8662,6 @@ pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask64&expand=3287) #[inline] #[target_feature(enable = "avx512bw")] -#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxnorq pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { transmute(_knot_mask64(a ^ b)) } diff --git a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs b/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs index d8ac5c29c..66fd1c2e1 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs @@ -829,21 +829,21 @@ mod tests { #[target_feature(enable = "sse2")] unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i { let byte_offset = word_index * 16 / size_of::<T>(); - let pointer = data.as_ptr().offset(byte_offset as isize) as *const __m128i; + let pointer = data.as_ptr().add(byte_offset) as *const __m128i; _mm_loadu_si128(black_box(pointer)) } #[target_feature(enable = "avx")] unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i { let byte_offset = word_index * 32 / size_of::<T>(); - let pointer = data.as_ptr().offset(byte_offset as isize) as *const __m256i; + let pointer = data.as_ptr().add(byte_offset) as *const __m256i; _mm256_loadu_si256(black_box(pointer)) } #[target_feature(enable = "avx512f")] unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i { let byte_offset = word_index * 64 / size_of::<T>(); - let pointer = data.as_ptr().offset(byte_offset as isize) as *const i32; + let pointer = data.as_ptr().add(byte_offset) as *const i32; _mm512_loadu_si512(black_box(pointer)) } diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs index 2c4295ef6..03c3a14a5 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse.rs @@ -1185,9 +1185,9 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 { /// /// ```text /// let a0 = *p; -/// let a1 = *p.offset(1); -/// let a2 = *p.offset(2); -/// let a3 = *p.offset(3); +/// let a1 = *p.add(1); +/// let a2 = *p.add(2); +/// let a3 = *p.add(3); /// __m128::new(a3, a2, a1, a0) /// ``` /// @@ -1241,9 +1241,9 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) { /// ```text /// let x = a.extract(0); /// *p = x; -/// *p.offset(1) = x; -/// *p.offset(2) = x; -/// *p.offset(3) = x; +/// *p.add(1) = x; +/// *p.add(2) = x; +/// *p.add(3) = x; /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps) @@ -1317,9 +1317,9 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) { /// /// ```text /// *p = a.extract(3); -/// *p.offset(1) = a.extract(2); -/// *p.offset(2) = a.extract(1); -/// *p.offset(3) = a.extract(0); +/// *p.add(1) = a.extract(2); +/// *p.add(2) = a.extract(1); +/// *p.add(3) = a.extract(0); /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps) @@ -3006,9 +3006,9 @@ mod tests { let unalignment = (p as usize) & 0xf; if unalignment != 0 { - let delta = ((16 - unalignment) >> 2) as isize; + let delta = (16 - unalignment) >> 2; fixup = delta as f32; - p = p.offset(delta); + p = p.add(delta); } let r = _mm_load_ps(p); @@ -3019,7 +3019,7 @@ mod tests { #[simd_test(enable = "sse")] unsafe fn test_mm_loadu_ps() { let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; - let p = vals.as_ptr().offset(3); + let p = vals.as_ptr().add(3); let r = _mm_loadu_ps(black_box(p)); assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0)); } @@ -3036,9 +3036,9 @@ mod tests { let unalignment = (p as usize) & 0xf; if unalignment != 0 { - let delta = ((16 - unalignment) >> 2) as isize; + let delta = (16 - unalignment) >> 2; fixup = delta as f32; - p = p.offset(delta); + p = p.add(delta); } let r = _mm_loadr_ps(p); @@ -3057,7 +3057,7 @@ mod tests { unsafe fn test_mm_store_ss() { let mut vals = [0.0f32; 8]; let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - _mm_store_ss(vals.as_mut_ptr().offset(1), a); + _mm_store_ss(vals.as_mut_ptr().add(1), a); assert_eq!(vals[0], 0.0); assert_eq!(vals[1], 1.0); @@ -3152,7 +3152,7 @@ mod tests { // Make sure p is **not** aligned to 16-byte boundary if (p as usize) & 0xf == 0 { ofs = 1; - p = p.offset(1); + p = p.add(1); } _mm_storeu_ps(p, *black_box(&a)); diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs index 5a9120042..d82b8641f 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs @@ -4518,7 +4518,7 @@ mod tests { // Make sure p is **not** aligned to 16-byte boundary if (p as usize) & 0xf == 0 { ofs = 1; - p = p.offset(1); + p = p.add(1); } _mm_storeu_pd(p, *black_box(&a)); @@ -4606,7 +4606,7 @@ mod tests { let mut offset = 0; if (d as usize) & 0xf == 0 { offset = 1; - d = d.offset(offset as isize); + d = d.add(offset); } let r = _mm_loadu_pd(d); diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs index 391daed20..a262932af 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs @@ -34,11 +34,11 @@ use stdarch_test::assert_instr; /// support `cmpxchg16b` and the program enters an execution path that /// eventually would reach this function the behavior is undefined. /// -/// The `success` ordering must also be stronger or equal to `failure`, or this -/// function call is undefined. See the `Atomic*` documentation's -/// `compare_exchange` function for more information. When `compare_exchange` -/// panics, this is undefined behavior. Currently this function aborts the -/// process with an undefined instruction. +/// The failure ordering must be [`Ordering::SeqCst`], [`Ordering::Acquire`] or +/// [`Ordering::Relaxed`], or this function call is undefined. See the `Atomic*` +/// documentation's `compare_exchange` function for more information. When +/// `compare_exchange` panics, this is undefined behavior. Currently this +/// function aborts the process with an undefined instruction. #[inline] #[cfg_attr(test, assert_instr(cmpxchg16b, success = Ordering::SeqCst, failure = Ordering::SeqCst))] #[target_feature(enable = "cmpxchg16b")] @@ -54,15 +54,21 @@ pub unsafe fn cmpxchg16b( debug_assert!(dst as usize % 16 == 0); let (val, _ok) = match (success, failure) { - (Acquire, Acquire) => intrinsics::atomic_cxchg_acq(dst, old, new), - (Release, Relaxed) => intrinsics::atomic_cxchg_rel(dst, old, new), - (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel(dst, old, new), - (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed(dst, old, new), - (SeqCst, SeqCst) => intrinsics::atomic_cxchg(dst, old, new), - (Acquire, Relaxed) => intrinsics::atomic_cxchg_acq_failrelaxed(dst, old, new), - (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_failrelaxed(dst, old, new), - (SeqCst, Relaxed) => intrinsics::atomic_cxchg_failrelaxed(dst, old, new), - (SeqCst, Acquire) => intrinsics::atomic_cxchg_failacq(dst, old, new), + (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed_relaxed(dst, old, new), + (Relaxed, Acquire) => intrinsics::atomic_cxchg_relaxed_acquire(dst, old, new), + (Relaxed, SeqCst) => intrinsics::atomic_cxchg_relaxed_seqcst(dst, old, new), + (Acquire, Relaxed) => intrinsics::atomic_cxchg_acquire_relaxed(dst, old, new), + (Acquire, Acquire) => intrinsics::atomic_cxchg_acquire_acquire(dst, old, new), + (Acquire, SeqCst) => intrinsics::atomic_cxchg_acquire_seqcst(dst, old, new), + (Release, Relaxed) => intrinsics::atomic_cxchg_release_relaxed(dst, old, new), + (Release, Acquire) => intrinsics::atomic_cxchg_release_acquire(dst, old, new), + (Release, SeqCst) => intrinsics::atomic_cxchg_release_seqcst(dst, old, new), + (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_relaxed(dst, old, new), + (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel_acquire(dst, old, new), + (AcqRel, SeqCst) => intrinsics::atomic_cxchg_acqrel_seqcst(dst, old, new), + (SeqCst, Relaxed) => intrinsics::atomic_cxchg_seqcst_relaxed(dst, old, new), + (SeqCst, Acquire) => intrinsics::atomic_cxchg_seqcst_acquire(dst, old, new), + (SeqCst, SeqCst) => intrinsics::atomic_cxchg_seqcst_seqcst(dst, old, new), // The above block is all copied from libcore, and this statement is // also copied from libcore except that it's a panic in libcore and we |