diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:20:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:20:29 +0000 |
commit | 631cd5845e8de329d0e227aaa707d7ea228b8f8f (patch) | |
tree | a1b87c8f8cad01cf18f7c5f57a08f102771ed303 /vendor/compiler_builtins/src | |
parent | Adding debian version 1.69.0+dfsg1-1. (diff) | |
download | rustc-631cd5845e8de329d0e227aaa707d7ea228b8f8f.tar.xz rustc-631cd5845e8de329d0e227aaa707d7ea228b8f8f.zip |
Merging upstream version 1.70.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/compiler_builtins/src')
-rw-r--r-- | vendor/compiler_builtins/src/int/shift.rs | 18 | ||||
-rw-r--r-- | vendor/compiler_builtins/src/lib.rs | 1 | ||||
-rw-r--r-- | vendor/compiler_builtins/src/math.rs | 32 | ||||
-rw-r--r-- | vendor/compiler_builtins/src/mem/impls.rs | 10 | ||||
-rw-r--r-- | vendor/compiler_builtins/src/mem/mod.rs | 8 | ||||
-rw-r--r-- | vendor/compiler_builtins/src/mem/x86_64.rs | 130 | ||||
-rw-r--r-- | vendor/compiler_builtins/src/riscv.rs | 8 |
7 files changed, 186 insertions, 21 deletions
diff --git a/vendor/compiler_builtins/src/int/shift.rs b/vendor/compiler_builtins/src/int/shift.rs index 2d2c081a6..c90cf1de3 100644 --- a/vendor/compiler_builtins/src/int/shift.rs +++ b/vendor/compiler_builtins/src/int/shift.rs @@ -12,7 +12,7 @@ trait Ashl: DInt { } else { Self::from_lo_hi( self.lo().wrapping_shl(shl), - self.lo().logical_shr(n_h - shl) | self.hi().wrapping_shl(shl), + self.lo().logical_shr(n_h.wrapping_sub(shl)) | self.hi().wrapping_shl(shl), ) } } @@ -36,7 +36,7 @@ trait Ashr: DInt { self } else { Self::from_lo_hi( - self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr), + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h.wrapping_sub(shr)), self.hi().wrapping_shr(shr), ) } @@ -57,7 +57,7 @@ trait Lshr: DInt { self } else { Self::from_lo_hi( - self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr), + self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h.wrapping_sub(shr)), self.hi().logical_shr(shr), ) } @@ -78,8 +78,8 @@ intrinsics! { #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsl] - pub extern "C" fn __ashldi3(a: u64, b: u32) -> u64 { - a.ashl(b) + pub extern "C" fn __ashldi3(a: u64, b: core::ffi::c_uint) -> u64 { + a.ashl(b as u32) } #[avr_skip] @@ -96,8 +96,8 @@ intrinsics! { #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lasr] - pub extern "C" fn __ashrdi3(a: i64, b: u32) -> i64 { - a.ashr(b) + pub extern "C" fn __ashrdi3(a: i64, b: core::ffi::c_uint) -> i64 { + a.ashr(b as u32) } #[avr_skip] @@ -114,8 +114,8 @@ intrinsics! { #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsr] - pub extern "C" fn __lshrdi3(a: u64, b: u32) -> u64 { - a.lshr(b) + pub extern "C" fn __lshrdi3(a: u64, b: core::ffi::c_uint) -> u64 { + a.lshr(b as u32) } #[avr_skip] diff --git a/vendor/compiler_builtins/src/lib.rs b/vendor/compiler_builtins/src/lib.rs index 10b4aafec..71f249c8e 100644 --- a/vendor/compiler_builtins/src/lib.rs +++ b/vendor/compiler_builtins/src/lib.rs @@ -47,6 +47,7 @@ pub mod int; all(target_arch = "x86_64", target_os = "none"), all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "arm", target_os = "none"), + all(target_arch = "xtensa", target_os = "none"), target_os = "xous", all(target_vendor = "fortanix", target_env = "sgx") ))] diff --git a/vendor/compiler_builtins/src/math.rs b/vendor/compiler_builtins/src/math.rs index c64984e9e..498e4d85f 100644 --- a/vendor/compiler_builtins/src/math.rs +++ b/vendor/compiler_builtins/src/math.rs @@ -86,7 +86,36 @@ no_mangle! { fn tanf(n: f32) -> f32; } -#[cfg(any(target_os = "xous", target_os = "uefi"))] +#[cfg(any( + all( + target_family = "wasm", + target_os = "unknown", + not(target_env = "wasi") + ), + target_os = "xous", + all(target_arch = "x86_64", target_os = "uefi"), + all(target_arch = "xtensa", target_os = "none"), + all(target_vendor = "fortanix", target_env = "sgx") +))] +intrinsics! { + pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { + let r = self::libm::lgamma_r(x); + *s = r.1; + r.0 + } + + pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { + let r = self::libm::lgammaf_r(x); + *s = r.1; + r.0 + } +} + +#[cfg(any( + target_os = "xous", + target_os = "uefi", + all(target_arch = "xtensa", target_os = "none"), +))] no_mangle! { fn sqrtf(x: f32) -> f32; fn sqrt(x: f64) -> f64; @@ -94,6 +123,7 @@ no_mangle! { #[cfg(any( all(target_vendor = "fortanix", target_env = "sgx"), + all(target_arch = "xtensa", target_os = "none"), target_os = "xous", target_os = "uefi" ))] diff --git a/vendor/compiler_builtins/src/mem/impls.rs b/vendor/compiler_builtins/src/mem/impls.rs index 72003a5c4..23c9d8d32 100644 --- a/vendor/compiler_builtins/src/mem/impls.rs +++ b/vendor/compiler_builtins/src/mem/impls.rs @@ -279,3 +279,13 @@ pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { } 0 } + +#[inline(always)] +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { + let mut n = 0; + while *s != 0 { + n += 1; + s = s.add(1); + } + n +} diff --git a/vendor/compiler_builtins/src/mem/mod.rs b/vendor/compiler_builtins/src/mem/mod.rs index c5b0ddc16..be118778b 100644 --- a/vendor/compiler_builtins/src/mem/mod.rs +++ b/vendor/compiler_builtins/src/mem/mod.rs @@ -63,13 +63,7 @@ intrinsics! { #[mem_builtin] #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize { - let mut n = 0; - let mut s = s; - while *s != 0 { - n += 1; - s = s.offset(1); - } - n + impls::c_string_length(s) } } diff --git a/vendor/compiler_builtins/src/mem/x86_64.rs b/vendor/compiler_builtins/src/mem/x86_64.rs index 17b461f79..40b67093f 100644 --- a/vendor/compiler_builtins/src/mem/x86_64.rs +++ b/vendor/compiler_builtins/src/mem/x86_64.rs @@ -173,6 +173,136 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { c16(a.cast(), b.cast(), n) } +// In order to process more than on byte simultaneously when executing strlen, +// two things must be considered: +// * An n byte read with an n-byte aligned address will never cross +// a page boundary and will always succeed. Any smaller alignment +// may result in a read that will cross a page boundary, which may +// trigger an access violation. +// * Surface Rust considers any kind of out-of-bounds read as undefined +// behaviour. To dodge this, memory access operations are written +// using inline assembly. + +#[cfg(target_feature = "sse2")] +#[inline(always)] +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { + use core::arch::x86_64::{__m128i, _mm_cmpeq_epi8, _mm_movemask_epi8, _mm_set1_epi8}; + + let mut n = 0; + + // The use of _mm_movemask_epi8 and company allow for speedups, + // but they aren't cheap by themselves. Thus, possibly small strings + // are handled in simple loops. + + for _ in 0..4 { + if *s == 0 { + return n; + } + + n += 1; + s = s.add(1); + } + + // Shave of the least significand bits to align the address to a 16 + // byte boundary. The shaved of bits are used to correct the first iteration. + + let align = s as usize & 15; + let mut s = ((s as usize) - align) as *const __m128i; + let zero = _mm_set1_epi8(0); + + let x = { + let r; + asm!( + "movdqa ({addr}), {dest}", + addr = in(reg) s, + dest = out(xmm_reg) r, + options(att_syntax, nostack), + ); + r + }; + let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) >> align; + + if cmp != 0 { + return n + cmp.trailing_zeros() as usize; + } + + n += 16 - align; + s = s.add(1); + + loop { + let x = { + let r; + asm!( + "movdqa ({addr}), {dest}", + addr = in(reg) s, + dest = out(xmm_reg) r, + options(att_syntax, nostack), + ); + r + }; + let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) as u32; + if cmp == 0 { + n += 16; + s = s.add(1); + } else { + return n + cmp.trailing_zeros() as usize; + } + } +} + +// Provided for scenarios like kernel development, where SSE might not +// be available. +#[cfg(not(target_feature = "sse2"))] +#[inline(always)] +pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { + let mut n = 0; + + // Check bytes in steps of one until + // either a zero byte is discovered or + // pointer is aligned to an eight byte boundary. + + while s as usize & 7 != 0 { + if *s == 0 { + return n; + } + n += 1; + s = s.add(1); + } + + // Check bytes in steps of eight until a zero + // byte is discovered. + + let mut s = s as *const u64; + + loop { + let mut cs = { + let r: u64; + asm!( + "mov ({addr}), {dest}", + addr = in(reg) s, + dest = out(reg) r, + options(att_syntax, nostack), + ); + r + }; + // Detect if a word has a zero byte, taken from + // https://graphics.stanford.edu/~seander/bithacks.html + if (cs.wrapping_sub(0x0101010101010101) & !cs & 0x8080808080808080) != 0 { + loop { + if cs & 255 == 0 { + return n; + } else { + cs >>= 8; + n += 1; + } + } + } else { + n += 8; + s = s.add(1); + } + } +} + /// Determine optimal parameters for a `rep` instruction. fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { // Unaligned writes are still slow on modern processors, so align the destination address. diff --git a/vendor/compiler_builtins/src/riscv.rs b/vendor/compiler_builtins/src/riscv.rs index ae361b33a..bf3125533 100644 --- a/vendor/compiler_builtins/src/riscv.rs +++ b/vendor/compiler_builtins/src/riscv.rs @@ -19,11 +19,11 @@ intrinsics! { // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/riscv/int_mul_impl.inc pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 { let (mut a, mut b) = (a, b); - let mut r = 0; + let mut r: u32 = 0; while a > 0 { if a & 1 > 0 { - r += b; + r = r.wrapping_add(b); } a >>= 1; b <<= 1; @@ -35,11 +35,11 @@ intrinsics! { #[cfg(not(target_feature = "m"))] pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { let (mut a, mut b) = (a, b); - let mut r = 0; + let mut r: u64 = 0; while a > 0 { if a & 1 > 0 { - r += b; + r = r.wrapping_add(b); } a >>= 1; b <<= 1; |