Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-17 12:02:58 +0000
commit: 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree: 173a775858bd501c378080a10dca74132f05bc50 /vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs
parent: Initial commit. (diff)
download: rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
1 files changed, 306 insertions, 0 deletions
diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs
new file mode 100644
index 000000000..6ec4675df
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs
@@ -0,0 +1,306 @@
+// TODO: when `unsafe_block_in_unsafe_fn` is stabilized, remove this
+#![allow(unused_unsafe)]
+// The functions are complex with many branches, and explicit
+// `return`s makes it clear where function exit points are
+#![allow(clippy::needless_return)]
+#![allow(clippy::comparison_chain)]
+// Clippy is confused by the complex configuration
+#![allow(clippy::if_same_then_else)]
+#![allow(clippy::needless_bool)]
+
+//! This `specialized_div_rem` module is originally from version 1.0.0 of the
+//! `specialized-div-rem` crate. Note that `for` loops with ranges are not used in this
+//! module, since unoptimized compilation may generate references to `memcpy`.
+//!
+//! The purpose of these macros is to easily change the both the division algorithm used
+//! for a given integer size and the half division used by that algorithm. The way
+//! functions call each other is also constructed such that linkers will find the chain of
+//! software and hardware divisions needed for every size of signed and unsigned division.
+//! For example, most target compilations do the following:
+//!
+//!  - Many 128 bit division functions like `u128::wrapping_div` use
+//!    `std::intrinsics::unchecked_div`, which gets replaced by `__udivti3` because there
+//!    is not a 128 bit by 128 bit hardware division function in most architectures.
+//!    `__udivti3` uses `u128_div_rem` (this extra level of function calls exists because
+//!    `__umodti3` and `__udivmodti4` also exist, and `specialized_div_rem` supplies just
+//!    one function to calculate both the quotient and remainder. If configuration flags
+//!    enable it, `impl_trifecta!` defines `u128_div_rem` to use the trifecta algorithm,
+//!    which requires the half sized division `u64_by_u64_div_rem`. If the architecture
+//!    supplies a 64 bit hardware division instruction, `u64_by_u64_div_rem` will be
+//!    reduced to those instructions. Note that we do not specify the half size division
+//!    directly to be `__udivdi3`, because hardware division would never be introduced.
+//!  - If the architecture does not supply a 64 bit hardware division instruction, u64
+//!    divisions will use functions such as `__udivdi3`. This will call `u64_div_rem`
+//!    which is defined by `impl_delegate!`. The half division for this algorithm is
+//!    `u32_by_u32_div_rem` which in turn becomes hardware division instructions or more
+//!    software division algorithms.
+//!  - If the architecture does not supply a 32 bit hardware instruction, linkers will
+//!    look for `__udivsi3`. `impl_binary_long!` is used, but this  algorithm uses no half
+//!    division, so the chain of calls ends here.
+//!
+//! On some architectures like x86_64, an asymmetrically sized division is supplied, in
+//! which 128 bit numbers can be divided by 64 bit numbers. `impl_asymmetric!` is used to
+//! extend the 128 by 64 bit division to a full 128 by 128 bit division.
+
+// `allow(dead_code)` is used in various places, because the configuration code would otherwise be
+// ridiculously complex
+
+#[macro_use]
+mod norm_shift;
+
+#[macro_use]
+mod binary_long;
+
+#[macro_use]
+mod delegate;
+
+// used on SPARC
+#[allow(unused_imports)]
+#[cfg(not(feature = "public-test-deps"))]
+pub(crate) use self::delegate::u128_divide_sparc;
+
+#[cfg(feature = "public-test-deps")]
+pub use self::delegate::u128_divide_sparc;
+
+#[macro_use]
+mod trifecta;
+
+#[macro_use]
+mod asymmetric;
+
+/// The behavior of all divisions by zero is controlled by this function. This function should be
+/// impossible to reach by Rust users, unless `compiler-builtins` public division functions or
+/// `core/std::unchecked_div/rem` are directly used without a zero check in front.
+fn zero_div_fn() -> ! {
+    unsafe { core::hint::unreachable_unchecked() }
+}
+
+const USE_LZ: bool = {
+    if cfg!(target_arch = "arm") {
+        if cfg!(target_feature = "thumb-mode") {
+            // ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is
+            // supported. This is needed to successfully differentiate between targets like
+            // `thumbv8.base` and `thumbv8.main`.
+            cfg!(target_feature = "v6t2")
+        } else {
+            // Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is
+            // supported. Technically, ARMv5T was the first to have CLZ, but the "v5t" target
+            // feature does not seem to work.
+            cfg!(target_feature = "v5te")
+        }
+    } else if cfg!(any(target_arch = "sparc", target_arch = "sparc64")) {
+        // LZD or LZCNT on SPARC only exists for the VIS 3 extension and later.
+        cfg!(target_feature = "vis3")
+    } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
+        // The `B` extension on RISC-V determines if a CLZ assembly instruction exists
+        cfg!(target_feature = "b")
+    } else {
+        // All other common targets Rust supports should have CLZ instructions
+        true
+    }
+};
+
+impl_normalization_shift!(
+    u32_normalization_shift,
+    USE_LZ,
+    32,
+    u32,
+    i32,
+    allow(dead_code)
+);
+impl_normalization_shift!(
+    u64_normalization_shift,
+    USE_LZ,
+    64,
+    u64,
+    i64,
+    allow(dead_code)
+);
+
+/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
+/// `checked_div` and `checked_rem` are used to avoid bringing in panic function
+/// dependencies.
+#[inline]
+fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
+    if let Some(quo) = duo.checked_div(div) {
+        if let Some(rem) = duo.checked_rem(div) {
+            return (quo, rem);
+        }
+    }
+    zero_div_fn()
+}
+
+// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
+// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
+// faster if the target pointer width is at least 64.
+#[cfg(all(
+    not(any(target_pointer_width = "16", target_pointer_width = "32")),
+    not(all(not(feature = "no-asm"), target_arch = "x86_64")),
+    not(any(target_arch = "sparc", target_arch = "sparc64"))
+))]
+impl_trifecta!(
+    u128_div_rem,
+    zero_div_fn,
+    u64_by_u64_div_rem,
+    32,
+    u32,
+    u64,
+    u128
+);
+
+// If the pointer width less than 64, then the target architecture almost certainly does not have
+// the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster.
+#[cfg(all(
+    any(target_pointer_width = "16", target_pointer_width = "32"),
+    not(all(not(feature = "no-asm"), target_arch = "x86_64")),
+    not(any(target_arch = "sparc", target_arch = "sparc64"))
+))]
+impl_delegate!(
+    u128_div_rem,
+    zero_div_fn,
+    u64_normalization_shift,
+    u64_by_u64_div_rem,
+    32,
+    u32,
+    u64,
+    u128,
+    i128
+);
+
+/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
+///
+/// # Safety
+///
+/// If the quotient does not fit in a `u64`, a floating point exception occurs.
+/// If `div == 0`, then a division by zero exception occurs.
+#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
+#[inline]
+unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
+    let duo_lo = duo as u64;
+    let duo_hi = (duo >> 64) as u64;
+    let quo: u64;
+    let rem: u64;
+    unsafe {
+        // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this)
+        // by `div`. The quotient is stored in rax and the remainder in rdx.
+        // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+        core::arch::asm!(
+            "div {0}",
+            in(reg) div,
+            inlateout("rax") duo_lo => quo,
+            inlateout("rdx") duo_hi => rem,
+            options(att_syntax, pure, nomem, nostack)
+        );
+    }
+    (quo, rem)
+}
+
+// use `asymmetric` instead of `trifecta` on x86_64
+#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
+impl_asymmetric!(
+    u128_div_rem,
+    zero_div_fn,
+    u64_by_u64_div_rem,
+    u128_by_u64_div_rem,
+    32,
+    u32,
+    u64,
+    u128
+);
+
+/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
+/// `checked_div` and `checked_rem` are used to avoid bringing in panic function
+/// dependencies.
+#[inline]
+#[allow(dead_code)]
+fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) {
+    if let Some(quo) = duo.checked_div(div) {
+        if let Some(rem) = duo.checked_rem(div) {
+            return (quo, rem);
+        }
+    }
+    zero_div_fn()
+}
+
+// When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger
+// than register size.
+#[cfg(all(
+    not(all(not(feature = "no-asm"), target_arch = "x86")),
+    not(target_pointer_width = "64")
+))]
+impl_delegate!(
+    u64_div_rem,
+    zero_div_fn,
+    u32_normalization_shift,
+    u32_by_u32_div_rem,
+    16,
+    u16,
+    u32,
+    u64,
+    i64
+);
+
+// When not on x86 and the pointer width is 64, use `binary_long`.
+#[cfg(all(
+    not(all(not(feature = "no-asm"), target_arch = "x86")),
+    target_pointer_width = "64"
+))]
+impl_binary_long!(
+    u64_div_rem,
+    zero_div_fn,
+    u64_normalization_shift,
+    64,
+    u64,
+    i64
+);
+
+/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
+///
+/// # Safety
+///
+/// If the quotient does not fit in a `u32`, a floating point exception occurs.
+/// If `div == 0`, then a division by zero exception occurs.
+#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
+#[inline]
+unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
+    let duo_lo = duo as u32;
+    let duo_hi = (duo >> 32) as u32;
+    let quo: u32;
+    let rem: u32;
+    unsafe {
+        // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this)
+        // by `div`. The quotient is stored in rax and the remainder in rdx.
+        // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+        core::arch::asm!(
+            "div {0}",
+            in(reg) div,
+            inlateout("rax") duo_lo => quo,
+            inlateout("rdx") duo_hi => rem,
+            options(att_syntax, pure, nomem, nostack)
+        );
+    }
+    (quo, rem)
+}
+
+// use `asymmetric` instead of `delegate` on x86
+#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
+impl_asymmetric!(
+    u64_div_rem,
+    zero_div_fn,
+    u32_by_u32_div_rem,
+    u64_by_u32_div_rem,
+    16,
+    u16,
+    u32,
+    u64
+);
+
+// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
+impl_binary_long!(
+    u32_div_rem,
+    zero_div_fn,
+    u32_normalization_shift,
+    32,
+    u32,
+    i32
+);
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-17 12:02:58 +0000
commit	698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree	173a775858bd501c378080a10dca74132f05bc50 /vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs
parent	Initial commit. (diff)
download	rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip