diff options
Diffstat (limited to 'vendor/compiler_builtins/src/mem/x86_64.rs')
-rw-r--r-- | vendor/compiler_builtins/src/mem/x86_64.rs | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/vendor/compiler_builtins/src/mem/x86_64.rs b/vendor/compiler_builtins/src/mem/x86_64.rs new file mode 100644 index 000000000..a7ab6f605 --- /dev/null +++ b/vendor/compiler_builtins/src/mem/x86_64.rs @@ -0,0 +1,100 @@ +// On most modern Intel and AMD processors, "rep movsq" and "rep stosq" have +// been enhanced to perform better than an simple qword loop, making them ideal +// for implementing memcpy/memset. Note that "rep cmps" has received no such +// enhancement, so it is not used to implement memcmp. +// +// On certain recent Intel processors, "rep movsb" and "rep stosb" have been +// further enhanced to automatically select the best microarchitectural +// implementation based on length and alignment. See the following features from +// the "IntelĀ® 64 and IA-32 Architectures Optimization Reference Manual": +// - ERMSB - Enhanced REP MOVSB and STOSB (Ivy Bridge and later) +// - FSRM - Fast Short REP MOV (Ice Lake and later) +// - Fast Zero-Length MOVSB (On no current hardware) +// - Fast Short STOSB (On no current hardware) +// +// To simplify things, we switch to using the byte-based variants if the "ermsb" +// feature is present at compile-time. We don't bother detecting other features. +// Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". + +#[inline(always)] +#[cfg(target_feature = "ermsb")] +pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe movsb (%rsi), (%rdi)", + inout("rcx") count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(att_syntax, nostack, preserves_flags) + ); +} + +#[inline(always)] +#[cfg(not(target_feature = "ermsb"))] +pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { + let qword_count = count >> 3; + let byte_count = count & 0b111; + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe movsq (%rsi), (%rdi)", + "mov {byte_count:e}, %ecx", + "repe movsb (%rsi), (%rdi)", + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest => _, + inout("rsi") src => _, + options(att_syntax, nostack, preserves_flags) + ); +} + +#[inline(always)] +pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { + let qword_count = count >> 3; + let byte_count = count & 0b111; + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "std", + "repe movsq (%rsi), (%rdi)", + "movl {byte_count:e}, %ecx", + "addq $7, %rdi", + "addq $7, %rsi", + "repe movsb (%rsi), (%rdi)", + "cld", + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest.add(count).wrapping_sub(8) => _, + inout("rsi") src.add(count).wrapping_sub(8) => _, + options(att_syntax, nostack) + ); +} + +#[inline(always)] +#[cfg(target_feature = "ermsb")] +pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe stosb %al, (%rdi)", + inout("rcx") count => _, + inout("rdi") dest => _, + inout("al") c => _, + options(att_syntax, nostack, preserves_flags) + ) +} + +#[inline(always)] +#[cfg(not(target_feature = "ermsb"))] +pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { + let qword_count = count >> 3; + let byte_count = count & 0b111; + // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. + core::arch::asm!( + "repe stosq %rax, (%rdi)", + "mov {byte_count:e}, %ecx", + "repe stosb %al, (%rdi)", + byte_count = in(reg) byte_count, + inout("rcx") qword_count => _, + inout("rdi") dest => _, + in("rax") (c as u64) * 0x0101010101010101, + options(att_syntax, nostack, preserves_flags) + ); +} |