diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:06:31 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:06:31 +0000 |
commit | 2ff14448863ac1a1dd9533461708e29aae170c2d (patch) | |
tree | 85b9fea2bbfe3f06473cfa381eed11f273b57c5c /vendor/compiler_builtins/src/mem | |
parent | Adding debian version 1.64.0+dfsg1-1. (diff) | |
download | rustc-2ff14448863ac1a1dd9533461708e29aae170c2d.tar.xz rustc-2ff14448863ac1a1dd9533461708e29aae170c2d.zip |
Adding debian version 1.65.0+dfsg1-2.debian/1.65.0+dfsg1-2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | vendor/compiler_builtins/src/mem/impls.rs | 14 | ||||
-rw-r--r-- | vendor/compiler_builtins/src/mem/mod.rs | 11 | ||||
-rw-r--r-- | vendor/compiler_builtins/src/mem/x86_64.rs | 150 |
3 files changed, 132 insertions, 43 deletions
diff --git a/vendor/compiler_builtins/src/mem/impls.rs b/vendor/compiler_builtins/src/mem/impls.rs index 815132425..72003a5c4 100644 --- a/vendor/compiler_builtins/src/mem/impls.rs +++ b/vendor/compiler_builtins/src/mem/impls.rs @@ -265,3 +265,17 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { } set_bytes_bytes(s, c, n); } + +#[inline(always)] +pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { + let mut i = 0; + while i < n { + let a = *s1.add(i); + let b = *s2.add(i); + if a != b { + return a as i32 - b as i32; + } + i += 1; + } + 0 +} diff --git a/vendor/compiler_builtins/src/mem/mod.rs b/vendor/compiler_builtins/src/mem/mod.rs index a55113861..c5b0ddc16 100644 --- a/vendor/compiler_builtins/src/mem/mod.rs +++ b/vendor/compiler_builtins/src/mem/mod.rs @@ -51,16 +51,7 @@ intrinsics! { #[mem_builtin] #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { - let mut i = 0; - while i < n { - let a = *s1.add(i); - let b = *s2.add(i); - if a != b { - return a as i32 - b as i32; - } - i += 1; - } - 0 + impls::compare_bytes(s1, s2, n) } #[mem_builtin] diff --git a/vendor/compiler_builtins/src/mem/x86_64.rs b/vendor/compiler_builtins/src/mem/x86_64.rs index a7ab6f605..17b461f79 100644 --- a/vendor/compiler_builtins/src/mem/x86_64.rs +++ b/vendor/compiler_builtins/src/mem/x86_64.rs @@ -16,6 +16,10 @@ // feature is present at compile-time. We don't bother detecting other features. // Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". +use core::arch::asm; +use core::intrinsics; +use core::mem; + #[inline(always)] #[cfg(target_feature = "ermsb")] pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { @@ -31,16 +35,26 @@ pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { #[inline(always)] #[cfg(not(target_feature = "ermsb"))] -pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { - let qword_count = count >> 3; - let byte_count = count & 0b111; - // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - core::arch::asm!( - "repe movsq (%rsi), (%rdi)", - "mov {byte_count:e}, %ecx", - "repe movsb (%rsi), (%rdi)", - byte_count = in(reg) byte_count, +pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) { + let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); + // Separating the blocks gives the compiler more freedom to reorder instructions. + asm!( + "rep movsb", + inout("ecx") pre_byte_count => _, + inout("rdi") dest => dest, + inout("rsi") src => src, + options(att_syntax, nostack, preserves_flags) + ); + asm!( + "rep movsq", inout("rcx") qword_count => _, + inout("rdi") dest => dest, + inout("rsi") src => src, + options(att_syntax, nostack, preserves_flags) + ); + asm!( + "rep movsb", + inout("ecx") byte_count => _, inout("rdi") dest => _, inout("rsi") src => _, options(att_syntax, nostack, preserves_flags) @@ -49,22 +63,28 @@ pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { #[inline(always)] pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { - let qword_count = count >> 3; - let byte_count = count & 0b111; - // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - core::arch::asm!( + let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); + // We can't separate this block due to std/cld + asm!( "std", - "repe movsq (%rsi), (%rdi)", - "movl {byte_count:e}, %ecx", - "addq $7, %rdi", - "addq $7, %rsi", - "repe movsb (%rsi), (%rdi)", + "rep movsb", + "sub $7, %rsi", + "sub $7, %rdi", + "mov {qword_count}, %rcx", + "rep movsq", + "test {pre_byte_count:e}, {pre_byte_count:e}", + "add $7, %rsi", + "add $7, %rdi", + "mov {pre_byte_count:e}, %ecx", + "rep movsb", "cld", - byte_count = in(reg) byte_count, - inout("rcx") qword_count => _, - inout("rdi") dest.add(count).wrapping_sub(8) => _, - inout("rsi") src.add(count).wrapping_sub(8) => _, - options(att_syntax, nostack) + pre_byte_count = in(reg) pre_byte_count, + qword_count = in(reg) qword_count, + inout("ecx") byte_count => _, + inout("rdi") dest.add(count - 1) => _, + inout("rsi") src.add(count - 1) => _, + // We modify flags, but we restore it afterwards + options(att_syntax, nostack, preserves_flags) ); } @@ -83,18 +103,82 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { #[inline(always)] #[cfg(not(target_feature = "ermsb"))] -pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { - let qword_count = count >> 3; - let byte_count = count & 0b111; - // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. - core::arch::asm!( - "repe stosq %rax, (%rdi)", - "mov {byte_count:e}, %ecx", - "repe stosb %al, (%rdi)", - byte_count = in(reg) byte_count, +pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { + let c = c as u64 * 0x0101_0101_0101_0101; + let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); + // Separating the blocks gives the compiler more freedom to reorder instructions. + asm!( + "rep stosb", + inout("ecx") pre_byte_count => _, + inout("rdi") dest => dest, + in("rax") c, + options(att_syntax, nostack, preserves_flags) + ); + asm!( + "rep stosq", inout("rcx") qword_count => _, + inout("rdi") dest => dest, + in("rax") c, + options(att_syntax, nostack, preserves_flags) + ); + asm!( + "rep stosb", + inout("ecx") byte_count => _, inout("rdi") dest => _, - in("rax") (c as u64) * 0x0101010101010101, + in("rax") c, options(att_syntax, nostack, preserves_flags) ); } + +#[inline(always)] +pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { + #[inline(always)] + unsafe fn cmp<T, U, F>(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32 + where + T: Clone + Copy + Eq, + U: Clone + Copy + Eq, + F: FnOnce(*const U, *const U, usize) -> i32, + { + // Ensure T is not a ZST. + const { assert!(mem::size_of::<T>() != 0) }; + + let end = a.add(intrinsics::unchecked_div(n, mem::size_of::<T>())); + while a != end { + if a.read_unaligned() != b.read_unaligned() { + return f(a.cast(), b.cast(), mem::size_of::<T>()); + } + a = a.add(1); + b = b.add(1); + } + f( + a.cast(), + b.cast(), + intrinsics::unchecked_rem(n, mem::size_of::<T>()), + ) + } + let c1 = |mut a: *const u8, mut b: *const u8, n| { + for _ in 0..n { + if a.read() != b.read() { + return i32::from(a.read()) - i32::from(b.read()); + } + a = a.add(1); + b = b.add(1); + } + 0 + }; + let c2 = |a: *const u16, b, n| cmp(a, b, n, c1); + let c4 = |a: *const u32, b, n| cmp(a, b, n, c2); + let c8 = |a: *const u64, b, n| cmp(a, b, n, c4); + let c16 = |a: *const u128, b, n| cmp(a, b, n, c8); + c16(a.cast(), b.cast(), n) +} + +/// Determine optimal parameters for a `rep` instruction. +fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { + // Unaligned writes are still slow on modern processors, so align the destination address. + let pre_byte_count = ((8 - (dest as usize & 0b111)) & 0b111).min(count); + count -= pre_byte_count; + let qword_count = count >> 3; + let byte_count = count & 0b111; + (pre_byte_count, qword_count, byte_count) +} |