summaryrefslogtreecommitdiffstats
path: root/vendor/compiler_builtins/src/mem
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/compiler_builtins/src/mem')
-rw-r--r--vendor/compiler_builtins/src/mem/impls.rs267
-rw-r--r--vendor/compiler_builtins/src/mem/mod.rs211
-rw-r--r--vendor/compiler_builtins/src/mem/x86_64.rs100
3 files changed, 578 insertions, 0 deletions
diff --git a/vendor/compiler_builtins/src/mem/impls.rs b/vendor/compiler_builtins/src/mem/impls.rs
new file mode 100644
index 000000000..815132425
--- /dev/null
+++ b/vendor/compiler_builtins/src/mem/impls.rs
@@ -0,0 +1,267 @@
+use core::intrinsics::likely;
+
+const WORD_SIZE: usize = core::mem::size_of::<usize>();
+const WORD_MASK: usize = WORD_SIZE - 1;
+
+// If the number of bytes involved exceed this threshold we will opt in word-wise copy.
+// The value here selected is max(2 * WORD_SIZE, 16):
+// * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through
+// word-wise copy.
+// * The word-wise copy logic needs to perform some checks so it has some small overhead.
+// ensures that even on 32-bit platforms we have copied at least 8 bytes through
+// word-wise copy so the saving of word-wise copy outweights the fixed overhead.
+const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 {
+ 2 * WORD_SIZE
+} else {
+ 16
+};
+
+#[cfg(feature = "mem-unaligned")]
+unsafe fn read_usize_unaligned(x: *const usize) -> usize {
+ // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which
+ // is translated to memcpy in LLVM.
+ let x_read = (x as *const [u8; core::mem::size_of::<usize>()]).read();
+ core::mem::transmute(x_read)
+}
+
+#[inline(always)]
+pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
+ #[inline(always)]
+ unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
+ let dest_end = dest.add(n);
+ while dest < dest_end {
+ *dest = *src;
+ dest = dest.add(1);
+ src = src.add(1);
+ }
+ }
+
+ #[inline(always)]
+ unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
+ let mut dest_usize = dest as *mut usize;
+ let mut src_usize = src as *mut usize;
+ let dest_end = dest.add(n) as *mut usize;
+
+ while dest_usize < dest_end {
+ *dest_usize = *src_usize;
+ dest_usize = dest_usize.add(1);
+ src_usize = src_usize.add(1);
+ }
+ }
+
+ #[cfg(not(feature = "mem-unaligned"))]
+ #[inline(always)]
+ unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
+ let mut dest_usize = dest as *mut usize;
+ let dest_end = dest.add(n) as *mut usize;
+
+ // Calculate the misalignment offset and shift needed to reassemble value.
+ let offset = src as usize & WORD_MASK;
+ let shift = offset * 8;
+
+ // Realign src
+ let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
+ // This will read (but won't use) bytes out of bound.
+ // cfg needed because not all targets will have atomic loads that can be lowered
+ // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
+ #[cfg(target_has_atomic_load_store = "ptr")]
+ let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
+ #[cfg(not(target_has_atomic_load_store = "ptr"))]
+ let mut prev_word = core::ptr::read_volatile(src_aligned);
+
+ while dest_usize < dest_end {
+ src_aligned = src_aligned.add(1);
+ let cur_word = *src_aligned;
+ #[cfg(target_endian = "little")]
+ let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
+ #[cfg(target_endian = "big")]
+ let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
+ prev_word = cur_word;
+
+ *dest_usize = resembled;
+ dest_usize = dest_usize.add(1);
+ }
+ }
+
+ #[cfg(feature = "mem-unaligned")]
+ #[inline(always)]
+ unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
+ let mut dest_usize = dest as *mut usize;
+ let mut src_usize = src as *mut usize;
+ let dest_end = dest.add(n) as *mut usize;
+
+ while dest_usize < dest_end {
+ *dest_usize = read_usize_unaligned(src_usize);
+ dest_usize = dest_usize.add(1);
+ src_usize = src_usize.add(1);
+ }
+ }
+
+ if n >= WORD_COPY_THRESHOLD {
+ // Align dest
+ // Because of n >= 2 * WORD_SIZE, dst_misalignment < n
+ let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK;
+ copy_forward_bytes(dest, src, dest_misalignment);
+ dest = dest.add(dest_misalignment);
+ src = src.add(dest_misalignment);
+ n -= dest_misalignment;
+
+ let n_words = n & !WORD_MASK;
+ let src_misalignment = src as usize & WORD_MASK;
+ if likely(src_misalignment == 0) {
+ copy_forward_aligned_words(dest, src, n_words);
+ } else {
+ copy_forward_misaligned_words(dest, src, n_words);
+ }
+ dest = dest.add(n_words);
+ src = src.add(n_words);
+ n -= n_words;
+ }
+ copy_forward_bytes(dest, src, n);
+}
+
+#[inline(always)]
+pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
+ // The following backward copy helper functions uses the pointers past the end
+ // as their inputs instead of pointers to the start!
+ #[inline(always)]
+ unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
+ let dest_start = dest.sub(n);
+ while dest_start < dest {
+ dest = dest.sub(1);
+ src = src.sub(1);
+ *dest = *src;
+ }
+ }
+
+ #[inline(always)]
+ unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
+ let mut dest_usize = dest as *mut usize;
+ let mut src_usize = src as *mut usize;
+ let dest_start = dest.sub(n) as *mut usize;
+
+ while dest_start < dest_usize {
+ dest_usize = dest_usize.sub(1);
+ src_usize = src_usize.sub(1);
+ *dest_usize = *src_usize;
+ }
+ }
+
+ #[cfg(not(feature = "mem-unaligned"))]
+ #[inline(always)]
+ unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
+ let mut dest_usize = dest as *mut usize;
+ let dest_start = dest.sub(n) as *mut usize;
+
+ // Calculate the misalignment offset and shift needed to reassemble value.
+ let offset = src as usize & WORD_MASK;
+ let shift = offset * 8;
+
+ // Realign src_aligned
+ let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
+ // This will read (but won't use) bytes out of bound.
+ // cfg needed because not all targets will have atomic loads that can be lowered
+ // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
+ #[cfg(target_has_atomic_load_store = "ptr")]
+ let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
+ #[cfg(not(target_has_atomic_load_store = "ptr"))]
+ let mut prev_word = core::ptr::read_volatile(src_aligned);
+
+ while dest_start < dest_usize {
+ src_aligned = src_aligned.sub(1);
+ let cur_word = *src_aligned;
+ #[cfg(target_endian = "little")]
+ let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
+ #[cfg(target_endian = "big")]
+ let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
+ prev_word = cur_word;
+
+ dest_usize = dest_usize.sub(1);
+ *dest_usize = resembled;
+ }
+ }
+
+ #[cfg(feature = "mem-unaligned")]
+ #[inline(always)]
+ unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
+ let mut dest_usize = dest as *mut usize;
+ let mut src_usize = src as *mut usize;
+ let dest_start = dest.sub(n) as *mut usize;
+
+ while dest_start < dest_usize {
+ dest_usize = dest_usize.sub(1);
+ src_usize = src_usize.sub(1);
+ *dest_usize = read_usize_unaligned(src_usize);
+ }
+ }
+
+ let mut dest = dest.add(n);
+ let mut src = src.add(n);
+
+ if n >= WORD_COPY_THRESHOLD {
+ // Align dest
+ // Because of n >= 2 * WORD_SIZE, dst_misalignment < n
+ let dest_misalignment = dest as usize & WORD_MASK;
+ copy_backward_bytes(dest, src, dest_misalignment);
+ dest = dest.sub(dest_misalignment);
+ src = src.sub(dest_misalignment);
+ n -= dest_misalignment;
+
+ let n_words = n & !WORD_MASK;
+ let src_misalignment = src as usize & WORD_MASK;
+ if likely(src_misalignment == 0) {
+ copy_backward_aligned_words(dest, src, n_words);
+ } else {
+ copy_backward_misaligned_words(dest, src, n_words);
+ }
+ dest = dest.sub(n_words);
+ src = src.sub(n_words);
+ n -= n_words;
+ }
+ copy_backward_bytes(dest, src, n);
+}
+
+#[inline(always)]
+pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
+ #[inline(always)]
+ pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) {
+ let end = s.add(n);
+ while s < end {
+ *s = c;
+ s = s.add(1);
+ }
+ }
+
+ #[inline(always)]
+ pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) {
+ let mut broadcast = c as usize;
+ let mut bits = 8;
+ while bits < WORD_SIZE * 8 {
+ broadcast |= broadcast << bits;
+ bits *= 2;
+ }
+
+ let mut s_usize = s as *mut usize;
+ let end = s.add(n) as *mut usize;
+
+ while s_usize < end {
+ *s_usize = broadcast;
+ s_usize = s_usize.add(1);
+ }
+ }
+
+ if likely(n >= WORD_COPY_THRESHOLD) {
+ // Align s
+ // Because of n >= 2 * WORD_SIZE, dst_misalignment < n
+ let misalignment = (s as usize).wrapping_neg() & WORD_MASK;
+ set_bytes_bytes(s, c, misalignment);
+ s = s.add(misalignment);
+ n -= misalignment;
+
+ let n_words = n & !WORD_MASK;
+ set_bytes_words(s, c, n_words);
+ s = s.add(n_words);
+ n -= n_words;
+ }
+ set_bytes_bytes(s, c, n);
+}
diff --git a/vendor/compiler_builtins/src/mem/mod.rs b/vendor/compiler_builtins/src/mem/mod.rs
new file mode 100644
index 000000000..a55113861
--- /dev/null
+++ b/vendor/compiler_builtins/src/mem/mod.rs
@@ -0,0 +1,211 @@
+// Trying to satisfy clippy here is hopeless
+#![allow(clippy::style)]
+
+#[allow(warnings)]
+#[cfg(target_pointer_width = "16")]
+type c_int = i16;
+#[allow(warnings)]
+#[cfg(not(target_pointer_width = "16"))]
+type c_int = i32;
+
+use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, exact_div};
+use core::mem;
+use core::ops::{BitOr, Shl};
+
+// memcpy/memmove/memset have optimized implementations on some architectures
+#[cfg_attr(
+ all(not(feature = "no-asm"), target_arch = "x86_64"),
+ path = "x86_64.rs"
+)]
+mod impls;
+
+intrinsics! {
+ #[mem_builtin]
+ #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+ pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 {
+ impls::copy_forward(dest, src, n);
+ dest
+ }
+
+ #[mem_builtin]
+ #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+ pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 {
+ let delta = (dest as usize).wrapping_sub(src as usize);
+ if delta >= n {
+ // We can copy forwards because either dest is far enough ahead of src,
+ // or src is ahead of dest (and delta overflowed).
+ impls::copy_forward(dest, src, n);
+ } else {
+ impls::copy_backward(dest, src, n);
+ }
+ dest
+ }
+
+ #[mem_builtin]
+ #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+ pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 {
+ impls::set_bytes(s, c as u8, n);
+ s
+ }
+
+ #[mem_builtin]
+ #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+ pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+ let mut i = 0;
+ while i < n {
+ let a = *s1.add(i);
+ let b = *s2.add(i);
+ if a != b {
+ return a as i32 - b as i32;
+ }
+ i += 1;
+ }
+ 0
+ }
+
+ #[mem_builtin]
+ #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+ pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+ memcmp(s1, s2, n)
+ }
+
+ #[mem_builtin]
+ #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+ pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize {
+ let mut n = 0;
+ let mut s = s;
+ while *s != 0 {
+ n += 1;
+ s = s.offset(1);
+ }
+ n
+ }
+}
+
+// `bytes` must be a multiple of `mem::size_of::<T>()`
+#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))]
+fn memcpy_element_unordered_atomic<T: Copy>(dest: *mut T, src: *const T, bytes: usize) {
+ unsafe {
+ let n = exact_div(bytes, mem::size_of::<T>());
+ let mut i = 0;
+ while i < n {
+ atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i)));
+ i += 1;
+ }
+ }
+}
+
+// `bytes` must be a multiple of `mem::size_of::<T>()`
+#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))]
+fn memmove_element_unordered_atomic<T: Copy>(dest: *mut T, src: *const T, bytes: usize) {
+ unsafe {
+ let n = exact_div(bytes, mem::size_of::<T>());
+ if src < dest as *const T {
+ // copy from end
+ let mut i = n;
+ while i != 0 {
+ i -= 1;
+ atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i)));
+ }
+ } else {
+ // copy from beginning
+ let mut i = 0;
+ while i < n {
+ atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i)));
+ i += 1;
+ }
+ }
+ }
+}
+
+// `T` must be a primitive integer type, and `bytes` must be a multiple of `mem::size_of::<T>()`
+#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))]
+fn memset_element_unordered_atomic<T>(s: *mut T, c: u8, bytes: usize)
+where
+ T: Copy + From<u8> + Shl<u32, Output = T> + BitOr<T, Output = T>,
+{
+ unsafe {
+ let n = exact_div(bytes, mem::size_of::<T>());
+
+ // Construct a value of type `T` consisting of repeated `c`
+ // bytes, to let us ensure we write each `T` atomically.
+ let mut x = T::from(c);
+ let mut i = 1;
+ while i < mem::size_of::<T>() {
+ x = x << 8 | T::from(c);
+ i += 1;
+ }
+
+ // Write it to `s`
+ let mut i = 0;
+ while i < n {
+ atomic_store_unordered(s.add(i), x);
+ i += 1;
+ }
+ }
+}
+
+intrinsics! {
+ #[cfg(target_has_atomic_load_store = "8")]
+ pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () {
+ memcpy_element_unordered_atomic(dest, src, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "16")]
+ pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () {
+ memcpy_element_unordered_atomic(dest, src, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "32")]
+ pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () {
+ memcpy_element_unordered_atomic(dest, src, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "64")]
+ pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () {
+ memcpy_element_unordered_atomic(dest, src, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "128")]
+ pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () {
+ memcpy_element_unordered_atomic(dest, src, bytes);
+ }
+
+ #[cfg(target_has_atomic_load_store = "8")]
+ pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () {
+ memmove_element_unordered_atomic(dest, src, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "16")]
+ pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () {
+ memmove_element_unordered_atomic(dest, src, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "32")]
+ pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () {
+ memmove_element_unordered_atomic(dest, src, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "64")]
+ pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () {
+ memmove_element_unordered_atomic(dest, src, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "128")]
+ pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () {
+ memmove_element_unordered_atomic(dest, src, bytes);
+ }
+
+ #[cfg(target_has_atomic_load_store = "8")]
+ pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () {
+ memset_element_unordered_atomic(s, c, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "16")]
+ pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () {
+ memset_element_unordered_atomic(s, c, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "32")]
+ pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () {
+ memset_element_unordered_atomic(s, c, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "64")]
+ pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () {
+ memset_element_unordered_atomic(s, c, bytes);
+ }
+ #[cfg(target_has_atomic_load_store = "128")]
+ pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () {
+ memset_element_unordered_atomic(s, c, bytes);
+ }
+}
diff --git a/vendor/compiler_builtins/src/mem/x86_64.rs b/vendor/compiler_builtins/src/mem/x86_64.rs
new file mode 100644
index 000000000..a7ab6f605
--- /dev/null
+++ b/vendor/compiler_builtins/src/mem/x86_64.rs
@@ -0,0 +1,100 @@
+// On most modern Intel and AMD processors, "rep movsq" and "rep stosq" have
+// been enhanced to perform better than an simple qword loop, making them ideal
+// for implementing memcpy/memset. Note that "rep cmps" has received no such
+// enhancement, so it is not used to implement memcmp.
+//
+// On certain recent Intel processors, "rep movsb" and "rep stosb" have been
+// further enhanced to automatically select the best microarchitectural
+// implementation based on length and alignment. See the following features from
+// the "IntelĀ® 64 and IA-32 Architectures Optimization Reference Manual":
+// - ERMSB - Enhanced REP MOVSB and STOSB (Ivy Bridge and later)
+// - FSRM - Fast Short REP MOV (Ice Lake and later)
+// - Fast Zero-Length MOVSB (On no current hardware)
+// - Fast Short STOSB (On no current hardware)
+//
+// To simplify things, we switch to using the byte-based variants if the "ermsb"
+// feature is present at compile-time. We don't bother detecting other features.
+// Note that ERMSB does not enhance the backwards (DF=1) "rep movsb".
+
+#[inline(always)]
+#[cfg(target_feature = "ermsb")]
+pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
+ // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+ core::arch::asm!(
+ "repe movsb (%rsi), (%rdi)",
+ inout("rcx") count => _,
+ inout("rdi") dest => _,
+ inout("rsi") src => _,
+ options(att_syntax, nostack, preserves_flags)
+ );
+}
+
+#[inline(always)]
+#[cfg(not(target_feature = "ermsb"))]
+pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
+ let qword_count = count >> 3;
+ let byte_count = count & 0b111;
+ // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+ core::arch::asm!(
+ "repe movsq (%rsi), (%rdi)",
+ "mov {byte_count:e}, %ecx",
+ "repe movsb (%rsi), (%rdi)",
+ byte_count = in(reg) byte_count,
+ inout("rcx") qword_count => _,
+ inout("rdi") dest => _,
+ inout("rsi") src => _,
+ options(att_syntax, nostack, preserves_flags)
+ );
+}
+
+#[inline(always)]
+pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) {
+ let qword_count = count >> 3;
+ let byte_count = count & 0b111;
+ // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+ core::arch::asm!(
+ "std",
+ "repe movsq (%rsi), (%rdi)",
+ "movl {byte_count:e}, %ecx",
+ "addq $7, %rdi",
+ "addq $7, %rsi",
+ "repe movsb (%rsi), (%rdi)",
+ "cld",
+ byte_count = in(reg) byte_count,
+ inout("rcx") qword_count => _,
+ inout("rdi") dest.add(count).wrapping_sub(8) => _,
+ inout("rsi") src.add(count).wrapping_sub(8) => _,
+ options(att_syntax, nostack)
+ );
+}
+
+#[inline(always)]
+#[cfg(target_feature = "ermsb")]
+pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
+ // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+ core::arch::asm!(
+ "repe stosb %al, (%rdi)",
+ inout("rcx") count => _,
+ inout("rdi") dest => _,
+ inout("al") c => _,
+ options(att_syntax, nostack, preserves_flags)
+ )
+}
+
+#[inline(always)]
+#[cfg(not(target_feature = "ermsb"))]
+pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
+ let qword_count = count >> 3;
+ let byte_count = count & 0b111;
+ // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+ core::arch::asm!(
+ "repe stosq %rax, (%rdi)",
+ "mov {byte_count:e}, %ecx",
+ "repe stosb %al, (%rdi)",
+ byte_count = in(reg) byte_count,
+ inout("rcx") qword_count => _,
+ inout("rdi") dest => _,
+ in("rax") (c as u64) * 0x0101010101010101,
+ options(att_syntax, nostack, preserves_flags)
+ );
+}