9 files changed, 61 insertions, 79 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index 081609ece..16add3dbb 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -1195,7 +1195,7 @@ pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
 
 /// Returns values from `slice` at offsets determined by `offsets * scale`,
 /// where
-/// `scale` should be 1, 2, 4 and 8.
+/// `scale` should be 1, 2, 4 or 8.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi64)
 #[inline]
@@ -2001,7 +2001,7 @@ pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpmovmskb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
-    pmovmskb(a.as_i8x32())
+    simd_bitmask::<_, u32>(a.as_i8x32()) as i32
 }
 
 /// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
@@ -3642,8 +3642,6 @@ extern "C" {
     fn pminud(a: u32x8, b: u32x8) -> u32x8;
     #[link_name = "llvm.x86.avx2.pminu.b"]
     fn pminub(a: u8x32, b: u8x32) -> u8x32;
-    #[link_name = "llvm.x86.avx2.pmovmskb"]
-    fn pmovmskb(a: i8x32) -> i32;
     #[link_name = "llvm.x86.avx2.mpsadbw"]
     fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
     #[link_name = "llvm.x86.avx2.pmulhu.w"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 47d565cea..49d78ed60 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -8545,9 +8545,6 @@ pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask32&expand=3207)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
-#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
-                                                                  //llvm.x86.avx512.kadd.d
 pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     transmute(a + b)
 }
@@ -8557,9 +8554,6 @@ pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask64&expand=3208)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
-#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
-                                                                  //llvm.x86.avx512.kadd.d
 pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     transmute(a + b)
 }
@@ -8569,7 +8563,6 @@ pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask32&expand=3213)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandd
 pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     transmute(a & b)
 }
@@ -8579,7 +8572,6 @@ pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask64&expand=3214)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandq
 pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     transmute(a & b)
 }
@@ -8607,7 +8599,6 @@ pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask32&expand=3219)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandnd
 pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     transmute(_knot_mask32(a) & b)
 }
@@ -8617,7 +8608,6 @@ pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask64&expand=3220)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandnq
 pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     transmute(_knot_mask64(a) & b)
 }
@@ -8627,7 +8617,6 @@ pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask32&expand=3240)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(or))] // generate normal and code instead of kord
 pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     transmute(a | b)
 }
@@ -8637,7 +8626,6 @@ pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask64&expand=3241)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(or))] // generate normal and code instead of korq
 pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     transmute(a | b)
 }
@@ -8647,7 +8635,6 @@ pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask32&expand=3292)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxord
 pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     transmute(a ^ b)
 }
@@ -8657,7 +8644,6 @@ pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask64&expand=3293)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxorq
 pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     transmute(a ^ b)
 }
@@ -8667,7 +8653,6 @@ pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask32&expand=3286)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxnord
 pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     transmute(_knot_mask32(a ^ b))
 }
@@ -8677,7 +8662,6 @@ pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask64&expand=3287)
 #[inline]
 #[target_feature(enable = "avx512bw")]
-#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxnorq
 pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     transmute(_knot_mask64(a ^ b))
 }
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs b/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs
index d8ac5c29c..66fd1c2e1 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs
@@ -829,21 +829,21 @@ mod tests {
     #[target_feature(enable = "sse2")]
     unsafe fn load_m128i_word<T>(data: &[T], word_index: usize) -> __m128i {
         let byte_offset = word_index * 16 / size_of::<T>();
-        let pointer = data.as_ptr().offset(byte_offset as isize) as *const __m128i;
+        let pointer = data.as_ptr().add(byte_offset) as *const __m128i;
         _mm_loadu_si128(black_box(pointer))
     }
 
     #[target_feature(enable = "avx")]
     unsafe fn load_m256i_word<T>(data: &[T], word_index: usize) -> __m256i {
         let byte_offset = word_index * 32 / size_of::<T>();
-        let pointer = data.as_ptr().offset(byte_offset as isize) as *const __m256i;
+        let pointer = data.as_ptr().add(byte_offset) as *const __m256i;
         _mm256_loadu_si256(black_box(pointer))
     }
 
     #[target_feature(enable = "avx512f")]
     unsafe fn load_m512i_word<T>(data: &[T], word_index: usize) -> __m512i {
         let byte_offset = word_index * 64 / size_of::<T>();
-        let pointer = data.as_ptr().offset(byte_offset as isize) as *const i32;
+        let pointer = data.as_ptr().add(byte_offset) as *const i32;
         _mm512_loadu_si512(black_box(pointer))
     }
 
diff --git a/library/stdarch/crates/core_arch/src/x86/cpuid.rs b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
index 6b90295ef..2624e8bdf 100644
--- a/library/stdarch/crates/core_arch/src/x86/cpuid.rs
+++ b/library/stdarch/crates/core_arch/src/x86/cpuid.rs
@@ -62,27 +62,27 @@ pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
     #[cfg(target_arch = "x86")]
     {
         asm!(
-            "movl %ebx, {0}",
+            "mov {0}, ebx",
             "cpuid",
-            "xchgl %ebx, {0}",
-            lateout(reg) ebx,
-            inlateout("eax") leaf => eax,
-            inlateout("ecx") sub_leaf => ecx,
-            lateout("edx") edx,
-            options(nostack, preserves_flags, att_syntax),
+            "xchg {0}, ebx",
+            out(reg) ebx,
+            inout("eax") leaf => eax,
+            inout("ecx") sub_leaf => ecx,
+            out("edx") edx,
+            options(nostack, preserves_flags),
         );
     }
     #[cfg(target_arch = "x86_64")]
     {
         asm!(
-            "movq %rbx, {0:r}",
+            "mov {0:r}, rbx",
             "cpuid",
-            "xchgq %rbx, {0:r}",
-            lateout(reg) ebx,
-            inlateout("eax") leaf => eax,
-            inlateout("ecx") sub_leaf => ecx,
-            lateout("edx") edx,
-            options(nostack, preserves_flags, att_syntax),
+            "xchg {0:r}, rbx",
+            out(reg) ebx,
+            inout("eax") leaf => eax,
+            inout("ecx") sub_leaf => ecx,
+            out("edx") edx,
+            options(nostack, preserves_flags),
         );
     }
     CpuidResult { eax, ebx, ecx, edx }
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index 547bfe67d..6b50e95b2 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -306,7 +306,7 @@ types! {
 
     /// 256-bit wide set of 16 'u16' types, x86-specific
     ///
-    /// This type is the same as the `__m128bh` type defined by Intel,
+    /// This type is the same as the `__m256bh` type defined by Intel,
     /// representing a 256-bit SIMD register which internally is consisted of
     /// 16 packed `u16` instances. Its purpose is for bf16 related intrinsic
     /// implementations.
@@ -317,7 +317,7 @@ types! {
 
     /// 512-bit wide set of 32 'u16' types, x86-specific
     ///
-    /// This type is the same as the `__m128bh` type defined by Intel,
+    /// This type is the same as the `__m512bh` type defined by Intel,
     /// representing a 512-bit SIMD register which internally is consisted of
     /// 32 packed `u16` instances. Its purpose is for bf16 related intrinsic
     /// implementations.
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index 2c4295ef6..03c3a14a5 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -1185,9 +1185,9 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
 ///
 /// ```text
 /// let a0 = *p;
-/// let a1 = *p.offset(1);
-/// let a2 = *p.offset(2);
-/// let a3 = *p.offset(3);
+/// let a1 = *p.add(1);
+/// let a2 = *p.add(2);
+/// let a3 = *p.add(3);
 /// __m128::new(a3, a2, a1, a0)
 /// ```
 ///
@@ -1241,9 +1241,9 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
 /// ```text
 /// let x = a.extract(0);
 /// *p = x;
-/// *p.offset(1) = x;
-/// *p.offset(2) = x;
-/// *p.offset(3) = x;
+/// *p.add(1) = x;
+/// *p.add(2) = x;
+/// *p.add(3) = x;
 /// ```
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps)
@@ -1317,9 +1317,9 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
 ///
 /// ```text
 /// *p = a.extract(3);
-/// *p.offset(1) = a.extract(2);
-/// *p.offset(2) = a.extract(1);
-/// *p.offset(3) = a.extract(0);
+/// *p.add(1) = a.extract(2);
+/// *p.add(2) = a.extract(1);
+/// *p.add(3) = a.extract(0);
 /// ```
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps)
@@ -3006,9 +3006,9 @@ mod tests {
 
         let unalignment = (p as usize) & 0xf;
         if unalignment != 0 {
-            let delta = ((16 - unalignment) >> 2) as isize;
+            let delta = (16 - unalignment) >> 2;
             fixup = delta as f32;
-            p = p.offset(delta);
+            p = p.add(delta);
         }
 
         let r = _mm_load_ps(p);
@@ -3019,7 +3019,7 @@ mod tests {
     #[simd_test(enable = "sse")]
     unsafe fn test_mm_loadu_ps() {
         let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
-        let p = vals.as_ptr().offset(3);
+        let p = vals.as_ptr().add(3);
         let r = _mm_loadu_ps(black_box(p));
         assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
     }
@@ -3036,9 +3036,9 @@ mod tests {
 
         let unalignment = (p as usize) & 0xf;
         if unalignment != 0 {
-            let delta = ((16 - unalignment) >> 2) as isize;
+            let delta = (16 - unalignment) >> 2;
             fixup = delta as f32;
-            p = p.offset(delta);
+            p = p.add(delta);
         }
 
         let r = _mm_loadr_ps(p);
@@ -3057,7 +3057,7 @@ mod tests {
     unsafe fn test_mm_store_ss() {
         let mut vals = [0.0f32; 8];
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-        _mm_store_ss(vals.as_mut_ptr().offset(1), a);
+        _mm_store_ss(vals.as_mut_ptr().add(1), a);
 
         assert_eq!(vals[0], 0.0);
         assert_eq!(vals[1], 1.0);
@@ -3152,7 +3152,7 @@ mod tests {
         // Make sure p is **not** aligned to 16-byte boundary
         if (p as usize) & 0xf == 0 {
             ofs = 1;
-            p = p.offset(1);
+            p = p.add(1);
         }
 
         _mm_storeu_ps(p, *black_box(&a));
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index 5a9120042..3e79b3539 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -1378,7 +1378,7 @@ pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
 #[cfg_attr(test, assert_instr(pmovmskb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
-    pmovmskb(a.as_i8x16())
+    simd_bitmask::<_, u16>(a.as_i8x16()) as u32 as i32
 }
 
 /// Shuffles 32-bit integers in `a` using the control in `IMM8`.
@@ -2856,8 +2856,6 @@ extern "C" {
     fn packssdw(a: i32x4, b: i32x4) -> i16x8;
     #[link_name = "llvm.x86.sse2.packuswb.128"]
     fn packuswb(a: i16x8, b: i16x8) -> u8x16;
-    #[link_name = "llvm.x86.sse2.pmovmskb.128"]
-    fn pmovmskb(a: i8x16) -> i32;
     #[link_name = "llvm.x86.sse2.max.sd"]
     fn maxsd(a: __m128d, b: __m128d) -> __m128d;
     #[link_name = "llvm.x86.sse2.max.pd"]
@@ -4518,7 +4516,7 @@ mod tests {
         // Make sure p is **not** aligned to 16-byte boundary
         if (p as usize) & 0xf == 0 {
             ofs = 1;
-            p = p.offset(1);
+            p = p.add(1);
         }
 
         _mm_storeu_pd(p, *black_box(&a));
@@ -4606,7 +4604,7 @@ mod tests {
         let mut offset = 0;
         if (d as usize) & 0xf == 0 {
             offset = 1;
-            d = d.offset(offset as isize);
+            d = d.add(offset);
         }
 
         let r = _mm_loadu_pd(d);
diff --git a/library/stdarch/crates/core_arch/src/x86/sse3.rs b/library/stdarch/crates/core_arch/src/x86/sse3.rs
index ab0dd38fe..61f8a4e78 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse3.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse3.rs
@@ -1,11 +1,7 @@
 //! Streaming SIMD Extensions 3 (SSE3)
 
 use crate::{
-    core_arch::{
-        simd::*,
-        simd_llvm::{simd_shuffle2, simd_shuffle4},
-        x86::*,
-    },
+    core_arch::{simd::*, simd_llvm::simd_shuffle, x86::*},
     mem::transmute,
 };
 
diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
index 391daed20..a262932af 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
@@ -34,11 +34,11 @@ use stdarch_test::assert_instr;
 /// support `cmpxchg16b` and the program enters an execution path that
 /// eventually would reach this function the behavior is undefined.
 ///
-/// The `success` ordering must also be stronger or equal to `failure`, or this
-/// function call is undefined. See the `Atomic*` documentation's
-/// `compare_exchange` function for more information. When `compare_exchange`
-/// panics, this is undefined behavior. Currently this function aborts the
-/// process with an undefined instruction.
+/// The failure ordering must be [`Ordering::SeqCst`], [`Ordering::Acquire`] or
+/// [`Ordering::Relaxed`], or this function call is undefined. See the `Atomic*`
+/// documentation's `compare_exchange` function for more information. When
+/// `compare_exchange` panics, this is undefined behavior. Currently this
+/// function aborts the process with an undefined instruction.
 #[inline]
 #[cfg_attr(test, assert_instr(cmpxchg16b, success = Ordering::SeqCst, failure = Ordering::SeqCst))]
 #[target_feature(enable = "cmpxchg16b")]
@@ -54,15 +54,21 @@ pub unsafe fn cmpxchg16b(
     debug_assert!(dst as usize % 16 == 0);
 
     let (val, _ok) = match (success, failure) {
-        (Acquire, Acquire) => intrinsics::atomic_cxchg_acq(dst, old, new),
-        (Release, Relaxed) => intrinsics::atomic_cxchg_rel(dst, old, new),
-        (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel(dst, old, new),
-        (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed(dst, old, new),
-        (SeqCst, SeqCst) => intrinsics::atomic_cxchg(dst, old, new),
-        (Acquire, Relaxed) => intrinsics::atomic_cxchg_acq_failrelaxed(dst, old, new),
-        (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_failrelaxed(dst, old, new),
-        (SeqCst, Relaxed) => intrinsics::atomic_cxchg_failrelaxed(dst, old, new),
-        (SeqCst, Acquire) => intrinsics::atomic_cxchg_failacq(dst, old, new),
+        (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed_relaxed(dst, old, new),
+        (Relaxed, Acquire) => intrinsics::atomic_cxchg_relaxed_acquire(dst, old, new),
+        (Relaxed, SeqCst) => intrinsics::atomic_cxchg_relaxed_seqcst(dst, old, new),
+        (Acquire, Relaxed) => intrinsics::atomic_cxchg_acquire_relaxed(dst, old, new),
+        (Acquire, Acquire) => intrinsics::atomic_cxchg_acquire_acquire(dst, old, new),
+        (Acquire, SeqCst) => intrinsics::atomic_cxchg_acquire_seqcst(dst, old, new),
+        (Release, Relaxed) => intrinsics::atomic_cxchg_release_relaxed(dst, old, new),
+        (Release, Acquire) => intrinsics::atomic_cxchg_release_acquire(dst, old, new),
+        (Release, SeqCst) => intrinsics::atomic_cxchg_release_seqcst(dst, old, new),
+        (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_relaxed(dst, old, new),
+        (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel_acquire(dst, old, new),
+        (AcqRel, SeqCst) => intrinsics::atomic_cxchg_acqrel_seqcst(dst, old, new),
+        (SeqCst, Relaxed) => intrinsics::atomic_cxchg_seqcst_relaxed(dst, old, new),
+        (SeqCst, Acquire) => intrinsics::atomic_cxchg_seqcst_acquire(dst, old, new),
+        (SeqCst, SeqCst) => intrinsics::atomic_cxchg_seqcst_seqcst(dst, old, new),
 
         // The above block is all copied from libcore, and this statement is
         // also copied from libcore except that it's a panic in libcore and we