From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 17 Apr 2024 14:02:58 +0200
Subject: Adding upstream version 1.64.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 vendor/compiler_builtins/src/arm.rs                | 183 +++++++
 vendor/compiler_builtins/src/arm_linux.rs          | 214 ++++++++
 vendor/compiler_builtins/src/float/add.rs          | 213 ++++++++
 vendor/compiler_builtins/src/float/cmp.rs          | 253 ++++++++++
 vendor/compiler_builtins/src/float/conv.rs         | 351 +++++++++++++
 vendor/compiler_builtins/src/float/div.rs          | 467 ++++++++++++++++++
 vendor/compiler_builtins/src/float/extend.rs       |  83 ++++
 vendor/compiler_builtins/src/float/mod.rs          | 175 +++++++
 vendor/compiler_builtins/src/float/mul.rs          | 209 ++++++++
 vendor/compiler_builtins/src/float/pow.rs          |  36 ++
 vendor/compiler_builtins/src/float/sub.rs          |  25 +
 vendor/compiler_builtins/src/float/trunc.rs        | 125 +++++
 vendor/compiler_builtins/src/int/addsub.rs         |  96 ++++
 vendor/compiler_builtins/src/int/leading_zeros.rs  | 149 ++++++
 vendor/compiler_builtins/src/int/mod.rs            | 390 +++++++++++++++
 vendor/compiler_builtins/src/int/mul.rs            | 138 ++++++
 vendor/compiler_builtins/src/int/sdiv.rs           | 169 +++++++
 vendor/compiler_builtins/src/int/shift.rs          | 116 +++++
 .../src/int/specialized_div_rem/asymmetric.rs      |  69 +++
 .../src/int/specialized_div_rem/binary_long.rs     | 548 +++++++++++++++++++++
 .../src/int/specialized_div_rem/delegate.rs        | 319 ++++++++++++
 .../src/int/specialized_div_rem/mod.rs             | 306 ++++++++++++
 .../src/int/specialized_div_rem/norm_shift.rs      | 106 ++++
 .../src/int/specialized_div_rem/trifecta.rs        | 386 +++++++++++++++
 vendor/compiler_builtins/src/int/udiv.rs           | 106 ++++
 vendor/compiler_builtins/src/lib.rs                |  72 +++
 vendor/compiler_builtins/src/macros.rs             | 448 +++++++++++++++++
 vendor/compiler_builtins/src/math.rs               | 117 +++++
 vendor/compiler_builtins/src/mem/impls.rs          | 267 ++++++++++
 vendor/compiler_builtins/src/mem/mod.rs            | 211 ++++++++
 vendor/compiler_builtins/src/mem/x86_64.rs         | 100 ++++
 vendor/compiler_builtins/src/probestack.rs         | 350 +++++++++++++
 vendor/compiler_builtins/src/riscv.rs              |  34 ++
 vendor/compiler_builtins/src/x86.rs                |  85 ++++
 vendor/compiler_builtins/src/x86_64.rs             |  94 ++++
 35 files changed, 7010 insertions(+)
 create mode 100644 vendor/compiler_builtins/src/arm.rs
 create mode 100644 vendor/compiler_builtins/src/arm_linux.rs
 create mode 100644 vendor/compiler_builtins/src/float/add.rs
 create mode 100644 vendor/compiler_builtins/src/float/cmp.rs
 create mode 100644 vendor/compiler_builtins/src/float/conv.rs
 create mode 100644 vendor/compiler_builtins/src/float/div.rs
 create mode 100644 vendor/compiler_builtins/src/float/extend.rs
 create mode 100644 vendor/compiler_builtins/src/float/mod.rs
 create mode 100644 vendor/compiler_builtins/src/float/mul.rs
 create mode 100644 vendor/compiler_builtins/src/float/pow.rs
 create mode 100644 vendor/compiler_builtins/src/float/sub.rs
 create mode 100644 vendor/compiler_builtins/src/float/trunc.rs
 create mode 100644 vendor/compiler_builtins/src/int/addsub.rs
 create mode 100644 vendor/compiler_builtins/src/int/leading_zeros.rs
 create mode 100644 vendor/compiler_builtins/src/int/mod.rs
 create mode 100644 vendor/compiler_builtins/src/int/mul.rs
 create mode 100644 vendor/compiler_builtins/src/int/sdiv.rs
 create mode 100644 vendor/compiler_builtins/src/int/shift.rs
 create mode 100644 vendor/compiler_builtins/src/int/specialized_div_rem/asymmetric.rs
 create mode 100644 vendor/compiler_builtins/src/int/specialized_div_rem/binary_long.rs
 create mode 100644 vendor/compiler_builtins/src/int/specialized_div_rem/delegate.rs
 create mode 100644 vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs
 create mode 100644 vendor/compiler_builtins/src/int/specialized_div_rem/norm_shift.rs
 create mode 100644 vendor/compiler_builtins/src/int/specialized_div_rem/trifecta.rs
 create mode 100644 vendor/compiler_builtins/src/int/udiv.rs
 create mode 100644 vendor/compiler_builtins/src/lib.rs
 create mode 100644 vendor/compiler_builtins/src/macros.rs
 create mode 100644 vendor/compiler_builtins/src/math.rs
 create mode 100644 vendor/compiler_builtins/src/mem/impls.rs
 create mode 100644 vendor/compiler_builtins/src/mem/mod.rs
 create mode 100644 vendor/compiler_builtins/src/mem/x86_64.rs
 create mode 100644 vendor/compiler_builtins/src/probestack.rs
 create mode 100644 vendor/compiler_builtins/src/riscv.rs
 create mode 100644 vendor/compiler_builtins/src/x86.rs
 create mode 100644 vendor/compiler_builtins/src/x86_64.rs

(limited to 'vendor/compiler_builtins/src')

diff --git a/vendor/compiler_builtins/src/arm.rs b/vendor/compiler_builtins/src/arm.rs
new file mode 100644
index 000000000..9c1b6ad12
--- /dev/null
+++ b/vendor/compiler_builtins/src/arm.rs
@@ -0,0 +1,183 @@
+#![cfg(not(feature = "no-asm"))]
+#![allow(unused_imports)]
+
+use core::intrinsics;
+
+// iOS symbols have a leading underscore.
+#[cfg(target_os = "ios")]
+macro_rules! bl {
+    ($func:literal) => {
+        concat!("bl _", $func)
+    };
+}
+#[cfg(not(target_os = "ios"))]
+macro_rules! bl {
+    ($func:literal) => {
+        concat!("bl ", $func)
+    };
+}
+
+intrinsics! {
+    // NOTE This function and the ones below are implemented using assembly because they are using a
+    // custom calling convention which can't be implemented using a normal Rust function.
+    #[naked]
+    #[cfg(not(target_env = "msvc"))]
+    pub unsafe extern "C" fn __aeabi_uidivmod() {
+        core::arch::asm!(
+            "push {{lr}}",
+            "sub sp, sp, #4",
+            "mov r2, sp",
+            bl!("__udivmodsi4"),
+            "ldr r1, [sp]",
+            "add sp, sp, #4",
+            "pop {{pc}}",
+            options(noreturn)
+        );
+    }
+
+    #[naked]
+    pub unsafe extern "C" fn __aeabi_uldivmod() {
+        core::arch::asm!(
+            "push {{r4, lr}}",
+            "sub sp, sp, #16",
+            "add r4, sp, #8",
+            "str r4, [sp]",
+            bl!("__udivmoddi4"),
+            "ldr r2, [sp, #8]",
+            "ldr r3, [sp, #12]",
+            "add sp, sp, #16",
+            "pop {{r4, pc}}",
+            options(noreturn)
+        );
+    }
+
+    #[naked]
+    pub unsafe extern "C" fn __aeabi_idivmod() {
+        core::arch::asm!(
+            "push {{r0, r1, r4, lr}}",
+            bl!("__aeabi_idiv"),
+            "pop {{r1, r2}}",
+            "muls r2, r2, r0",
+            "subs r1, r1, r2",
+            "pop {{r4, pc}}",
+            options(noreturn)
+        );
+    }
+
+    #[naked]
+    pub unsafe extern "C" fn __aeabi_ldivmod() {
+        core::arch::asm!(
+            "push {{r4, lr}}",
+            "sub sp, sp, #16",
+            "add r4, sp, #8",
+            "str r4, [sp]",
+            bl!("__divmoddi4"),
+            "ldr r2, [sp, #8]",
+            "ldr r3, [sp, #12]",
+            "add sp, sp, #16",
+            "pop {{r4, pc}}",
+            options(noreturn)
+        );
+    }
+
+    // The following functions use weak linkage to allow users to override
+    // with custom implementation.
+    // FIXME: The `*4` and `*8` variants should be defined as aliases.
+
+    #[cfg(not(target_os = "ios"))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
+        ::mem::memcpy(dest, src, n);
+    }
+
+    #[cfg(not(target_os = "ios"))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
+        // We are guaranteed 4-alignment, so accessing at u32 is okay.
+        let mut dest = dest as *mut u32;
+        let mut src = src as *mut u32;
+        let mut n = n;
+
+        while n >= 4 {
+            *dest = *src;
+            dest = dest.offset(1);
+            src = src.offset(1);
+            n -= 4;
+        }
+
+        __aeabi_memcpy(dest as *mut u8, src as *const u8, n);
+    }
+
+    #[cfg(not(target_os = "ios"))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
+        __aeabi_memcpy4(dest, src, n);
+    }
+
+    #[cfg(not(target_os = "ios"))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
+        ::mem::memmove(dest, src, n);
+    }
+
+    #[cfg(not(any(target_os = "ios", target_env = "msvc")))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
+        __aeabi_memmove(dest, src, n);
+    }
+
+    #[cfg(not(any(target_os = "ios", target_env = "msvc")))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
+        __aeabi_memmove(dest, src, n);
+    }
+
+    #[cfg(not(target_os = "ios"))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
+        // Note the different argument order
+        ::mem::memset(dest, c, n);
+    }
+
+    #[cfg(not(target_os = "ios"))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
+        let mut dest = dest as *mut u32;
+        let mut n = n;
+
+        let byte = (c as u32) & 0xff;
+        let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;
+
+        while n >= 4 {
+            *dest = c;
+            dest = dest.offset(1);
+            n -= 4;
+        }
+
+        __aeabi_memset(dest as *mut u8, n, byte as i32);
+    }
+
+    #[cfg(not(target_os = "ios"))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
+        __aeabi_memset4(dest, n, c);
+    }
+
+    #[cfg(not(target_os = "ios"))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
+        __aeabi_memset(dest, n, 0);
+    }
+
+    #[cfg(not(any(target_os = "ios", target_env = "msvc")))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
+        __aeabi_memset4(dest, n, 0);
+    }
+
+    #[cfg(not(any(target_os = "ios", target_env = "msvc")))]
+    #[linkage = "weak"]
+    pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
+        __aeabi_memset4(dest, n, 0);
+    }
+}
diff --git a/vendor/compiler_builtins/src/arm_linux.rs b/vendor/compiler_builtins/src/arm_linux.rs
new file mode 100644
index 000000000..8fe09485b
--- /dev/null
+++ b/vendor/compiler_builtins/src/arm_linux.rs
@@ -0,0 +1,214 @@
+use core::intrinsics;
+use core::mem;
+
+// Kernel-provided user-mode helper functions:
+// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
+unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool {
+    let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ());
+    f(oldval, newval, ptr) == 0
+}
+unsafe fn __kuser_memory_barrier() {
+    let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ());
+    f();
+}
+
+// Word-align a pointer
+fn align_ptr<T>(ptr: *mut T) -> *mut u32 {
+    // This gives us a mask of 0 when T == u32 since the pointer is already
+    // supposed to be aligned, which avoids any masking in that case.
+    let ptr_mask = 3 & (4 - mem::size_of::<T>());
+    (ptr as usize & !ptr_mask) as *mut u32
+}
+
+// Calculate the shift and mask of a value inside an aligned word
+fn get_shift_mask<T>(ptr: *mut T) -> (u32, u32) {
+    // Mask to get the low byte/halfword/word
+    let mask = match mem::size_of::<T>() {
+        1 => 0xff,
+        2 => 0xffff,
+        4 => 0xffffffff,
+        _ => unreachable!(),
+    };
+
+    // If we are on big-endian then we need to adjust the shift accordingly
+    let endian_adjust = if cfg!(target_endian = "little") {
+        0
+    } else {
+        4 - mem::size_of::<T>() as u32
+    };
+
+    // Shift to get the desired element in the word
+    let ptr_mask = 3 & (4 - mem::size_of::<T>());
+    let shift = ((ptr as usize & ptr_mask) as u32 ^ endian_adjust) * 8;
+
+    (shift, mask)
+}
+
+// Extract a value from an aligned word
+fn extract_aligned(aligned: u32, shift: u32, mask: u32) -> u32 {
+    (aligned >> shift) & mask
+}
+
+// Insert a value into an aligned word
+fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 {
+    (aligned & !(mask << shift)) | ((val & mask) << shift)
+}
+
+// Generic atomic read-modify-write operation
+unsafe fn atomic_rmw<T, F: Fn(u32) -> u32>(ptr: *mut T, f: F) -> u32 {
+    let aligned_ptr = align_ptr(ptr);
+    let (shift, mask) = get_shift_mask(ptr);
+
+    loop {
+        let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr);
+        let curval = extract_aligned(curval_aligned, shift, mask);
+        let newval = f(curval);
+        let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
+        if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
+            return curval;
+        }
+    }
+}
+
+// Generic atomic compare-exchange operation
+unsafe fn atomic_cmpxchg<T>(ptr: *mut T, oldval: u32, newval: u32) -> u32 {
+    let aligned_ptr = align_ptr(ptr);
+    let (shift, mask) = get_shift_mask(ptr);
+
+    loop {
+        let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr);
+        let curval = extract_aligned(curval_aligned, shift, mask);
+        if curval != oldval {
+            return curval;
+        }
+        let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
+        if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
+            return oldval;
+        }
+    }
+}
+
+macro_rules! atomic_rmw {
+    ($name:ident, $ty:ty, $op:expr) => {
+        intrinsics! {
+            pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty {
+                atomic_rmw(ptr, |x| $op(x as $ty, val) as u32) as $ty
+            }
+        }
+    };
+}
+macro_rules! atomic_cmpxchg {
+    ($name:ident, $ty:ty) => {
+        intrinsics! {
+            pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty {
+                atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty
+            }
+        }
+    };
+}
+
+atomic_rmw!(__sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b));
+atomic_rmw!(__sync_fetch_and_add_2, u16, |a: u16, b: u16| a
+    .wrapping_add(b));
+atomic_rmw!(__sync_fetch_and_add_4, u32, |a: u32, b: u32| a
+    .wrapping_add(b));
+
+atomic_rmw!(__sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
+atomic_rmw!(__sync_fetch_and_sub_2, u16, |a: u16, b: u16| a
+    .wrapping_sub(b));
+atomic_rmw!(__sync_fetch_and_sub_4, u32, |a: u32, b: u32| a
+    .wrapping_sub(b));
+
+atomic_rmw!(__sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b);
+atomic_rmw!(__sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b);
+atomic_rmw!(__sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b);
+
+atomic_rmw!(__sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b);
+atomic_rmw!(__sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b);
+atomic_rmw!(__sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b);
+
+atomic_rmw!(__sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b);
+atomic_rmw!(__sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b);
+atomic_rmw!(__sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b);
+
+atomic_rmw!(__sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b));
+atomic_rmw!(__sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b));
+atomic_rmw!(__sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b));
+
+atomic_rmw!(__sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b {
+    a
+} else {
+    b
+});
+atomic_rmw!(__sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b {
+    a
+} else {
+    b
+});
+atomic_rmw!(__sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b {
+    a
+} else {
+    b
+});
+
+atomic_rmw!(__sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b {
+    a
+} else {
+    b
+});
+atomic_rmw!(__sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b {
+    a
+} else {
+    b
+});
+atomic_rmw!(__sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b {
+    a
+} else {
+    b
+});
+
+atomic_rmw!(__sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b {
+    a
+} else {
+    b
+});
+atomic_rmw!(__sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b {
+    a
+} else {
+    b
+});
+atomic_rmw!(__sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b {
+    a
+} else {
+    b
+});
+
+atomic_rmw!(__sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b {
+    a
+} else {
+    b
+});
+atomic_rmw!(__sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b {
+    a
+} else {
+    b
+});
+atomic_rmw!(__sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b {
+    a
+} else {
+    b
+});
+
+atomic_rmw!(__sync_lock_test_and_set_1, u8, |_: u8, b: u8| b);
+atomic_rmw!(__sync_lock_test_and_set_2, u16, |_: u16, b: u16| b);
+atomic_rmw!(__sync_lock_test_and_set_4, u32, |_: u32, b: u32| b);
+
+atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8);
+atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16);
+atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32);
+
+intrinsics! {
+    pub unsafe extern "C" fn __sync_synchronize() {
+        __kuser_memory_barrier();
+    }
+}
diff --git a/vendor/compiler_builtins/src/float/add.rs b/vendor/compiler_builtins/src/float/add.rs
new file mode 100644
index 000000000..67f6c2c14
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/add.rs
@@ -0,0 +1,213 @@
+use float::Float;
+use int::{CastInto, Int};
+
+/// Returns `a + b`
+fn add<F: Float>(a: F, b: F) -> F
+where
+    u32: CastInto<F::Int>,
+    F::Int: CastInto<u32>,
+    i32: CastInto<F::Int>,
+    F::Int: CastInto<i32>,
+{
+    let one = F::Int::ONE;
+    let zero = F::Int::ZERO;
+
+    let bits = F::BITS.cast();
+    let significand_bits = F::SIGNIFICAND_BITS;
+    let max_exponent = F::EXPONENT_MAX;
+
+    let implicit_bit = F::IMPLICIT_BIT;
+    let significand_mask = F::SIGNIFICAND_MASK;
+    let sign_bit = F::SIGN_MASK as F::Int;
+    let abs_mask = sign_bit - one;
+    let exponent_mask = F::EXPONENT_MASK;
+    let inf_rep = exponent_mask;
+    let quiet_bit = implicit_bit >> 1;
+    let qnan_rep = exponent_mask | quiet_bit;
+
+    let mut a_rep = a.repr();
+    let mut b_rep = b.repr();
+    let a_abs = a_rep & abs_mask;
+    let b_abs = b_rep & abs_mask;
+
+    // Detect if a or b is zero, infinity, or NaN.
+    if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one {
+        // NaN + anything = qNaN
+        if a_abs > inf_rep {
+            return F::from_repr(a_abs | quiet_bit);
+        }
+        // anything + NaN = qNaN
+        if b_abs > inf_rep {
+            return F::from_repr(b_abs | quiet_bit);
+        }
+
+        if a_abs == inf_rep {
+            // +/-infinity + -/+infinity = qNaN
+            if (a.repr() ^ b.repr()) == sign_bit {
+                return F::from_repr(qnan_rep);
+            } else {
+                // +/-infinity + anything remaining = +/- infinity
+                return a;
+            }
+        }
+
+        // anything remaining + +/-infinity = +/-infinity
+        if b_abs == inf_rep {
+            return b;
+        }
+
+        // zero + anything = anything
+        if a_abs == Int::ZERO {
+            // but we need to get the sign right for zero + zero
+            if b_abs == Int::ZERO {
+                return F::from_repr(a.repr() & b.repr());
+            } else {
+                return b;
+            }
+        }
+
+        // anything + zero = anything
+        if b_abs == Int::ZERO {
+            return a;
+        }
+    }
+
+    // Swap a and b if necessary so that a has the larger absolute value.
+    if b_abs > a_abs {
+        // Don't use mem::swap because it may generate references to memcpy in unoptimized code.
+        let tmp = a_rep;
+        a_rep = b_rep;
+        b_rep = tmp;
+    }
+
+    // Extract the exponent and significand from the (possibly swapped) a and b.
+    let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits).cast();
+    let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits).cast();
+    let mut a_significand = a_rep & significand_mask;
+    let mut b_significand = b_rep & significand_mask;
+
+    // normalize any denormals, and adjust the exponent accordingly.
+    if a_exponent == 0 {
+        let (exponent, significand) = F::normalize(a_significand);
+        a_exponent = exponent;
+        a_significand = significand;
+    }
+    if b_exponent == 0 {
+        let (exponent, significand) = F::normalize(b_significand);
+        b_exponent = exponent;
+        b_significand = significand;
+    }
+
+    // The sign of the result is the sign of the larger operand, a.  If they
+    // have opposite signs, we are performing a subtraction; otherwise addition.
+    let result_sign = a_rep & sign_bit;
+    let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero;
+
+    // Shift the significands to give us round, guard and sticky, and or in the
+    // implicit significand bit.  (If we fell through from the denormal path it
+    // was already set by normalize(), but setting it twice won't hurt
+    // anything.)
+    a_significand = (a_significand | implicit_bit) << 3;
+    b_significand = (b_significand | implicit_bit) << 3;
+
+    // Shift the significand of b by the difference in exponents, with a sticky
+    // bottom bit to get rounding correct.
+    let align = a_exponent.wrapping_sub(b_exponent).cast();
+    if align != Int::ZERO {
+        if align < bits {
+            let sticky =
+                F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO);
+            b_significand = (b_significand >> align.cast()) | sticky;
+        } else {
+            b_significand = one; // sticky; b is known to be non-zero.
+        }
+    }
+    if subtraction {
+        a_significand = a_significand.wrapping_sub(b_significand);
+        // If a == -b, return +zero.
+        if a_significand == Int::ZERO {
+            return F::from_repr(Int::ZERO);
+        }
+
+        // If partial cancellation occured, we need to left-shift the result
+        // and adjust the exponent:
+        if a_significand < implicit_bit << 3 {
+            let shift =
+                a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32;
+            a_significand <<= shift;
+            a_exponent -= shift;
+        }
+    } else {
+        // addition
+        a_significand += b_significand;
+
+        // If the addition carried up, we need to right-shift the result and
+        // adjust the exponent:
+        if a_significand & implicit_bit << 4 != Int::ZERO {
+            let sticky = F::Int::from_bool(a_significand & one != Int::ZERO);
+            a_significand = a_significand >> 1 | sticky;
+            a_exponent += 1;
+        }
+    }
+
+    // If we have overflowed the type, return +/- infinity:
+    if a_exponent >= max_exponent as i32 {
+        return F::from_repr(inf_rep | result_sign);
+    }
+
+    if a_exponent <= 0 {
+        // Result is denormal before rounding; the exponent is zero and we
+        // need to shift the significand.
+        let shift = (1 - a_exponent).cast();
+        let sticky =
+            F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO);
+        a_significand = a_significand >> shift.cast() | sticky;
+        a_exponent = 0;
+    }
+
+    // Low three bits are round, guard, and sticky.
+    let a_significand_i32: i32 = a_significand.cast();
+    let round_guard_sticky: i32 = a_significand_i32 & 0x7;
+
+    // Shift the significand into place, and mask off the implicit bit.
+    let mut result = a_significand >> 3 & significand_mask;
+
+    // Insert the exponent and sign.
+    result |= a_exponent.cast() << significand_bits;
+    result |= result_sign;
+
+    // Final rounding.  The result may overflow to infinity, but that is the
+    // correct result in that case.
+    if round_guard_sticky > 0x4 {
+        result += one;
+    }
+    if round_guard_sticky == 0x4 {
+        result += result & one;
+    }
+
+    F::from_repr(result)
+}
+
+intrinsics! {
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_fadd]
+    pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 {
+        add(a, b)
+    }
+
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_dadd]
+    pub extern "C" fn __adddf3(a: f64, b: f64) -> f64 {
+        add(a, b)
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn __addsf3vfp(a: f32, b: f32) -> f32 {
+        a + b
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn __adddf3vfp(a: f64, b: f64) -> f64 {
+        a + b
+    }
+}
diff --git a/vendor/compiler_builtins/src/float/cmp.rs b/vendor/compiler_builtins/src/float/cmp.rs
new file mode 100644
index 000000000..1d4e38433
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/cmp.rs
@@ -0,0 +1,253 @@
+#![allow(unreachable_code)]
+
+use float::Float;
+use int::Int;
+
+#[derive(Clone, Copy)]
+enum Result {
+    Less,
+    Equal,
+    Greater,
+    Unordered,
+}
+
+impl Result {
+    fn to_le_abi(self) -> i32 {
+        match self {
+            Result::Less => -1,
+            Result::Equal => 0,
+            Result::Greater => 1,
+            Result::Unordered => 1,
+        }
+    }
+
+    fn to_ge_abi(self) -> i32 {
+        match self {
+            Result::Less => -1,
+            Result::Equal => 0,
+            Result::Greater => 1,
+            Result::Unordered => -1,
+        }
+    }
+}
+
+fn cmp<F: Float>(a: F, b: F) -> Result {
+    let one = F::Int::ONE;
+    let zero = F::Int::ZERO;
+    let szero = F::SignedInt::ZERO;
+
+    let sign_bit = F::SIGN_MASK as F::Int;
+    let abs_mask = sign_bit - one;
+    let exponent_mask = F::EXPONENT_MASK;
+    let inf_rep = exponent_mask;
+
+    let a_rep = a.repr();
+    let b_rep = b.repr();
+    let a_abs = a_rep & abs_mask;
+    let b_abs = b_rep & abs_mask;
+
+    // If either a or b is NaN, they are unordered.
+    if a_abs > inf_rep || b_abs > inf_rep {
+        return Result::Unordered;
+    }
+
+    // If a and b are both zeros, they are equal.
+    if a_abs | b_abs == zero {
+        return Result::Equal;
+    }
+
+    let a_srep = a.signed_repr();
+    let b_srep = b.signed_repr();
+
+    // If at least one of a and b is positive, we get the same result comparing
+    // a and b as signed integers as we would with a fp_ting-point compare.
+    if a_srep & b_srep >= szero {
+        if a_srep < b_srep {
+            Result::Less
+        } else if a_srep == b_srep {
+            Result::Equal
+        } else {
+            Result::Greater
+        }
+    // Otherwise, both are negative, so we need to flip the sense of the
+    // comparison to get the correct result.  (This assumes a twos- or ones-
+    // complement integer representation; if integers are represented in a
+    // sign-magnitude representation, then this flip is incorrect).
+    } else if a_srep > b_srep {
+        Result::Less
+    } else if a_srep == b_srep {
+        Result::Equal
+    } else {
+        Result::Greater
+    }
+}
+
+fn unord<F: Float>(a: F, b: F) -> bool {
+    let one = F::Int::ONE;
+
+    let sign_bit = F::SIGN_MASK as F::Int;
+    let abs_mask = sign_bit - one;
+    let exponent_mask = F::EXPONENT_MASK;
+    let inf_rep = exponent_mask;
+
+    let a_rep = a.repr();
+    let b_rep = b.repr();
+    let a_abs = a_rep & abs_mask;
+    let b_abs = b_rep & abs_mask;
+
+    a_abs > inf_rep || b_abs > inf_rep
+}
+
+intrinsics! {
+    pub extern "C" fn __lesf2(a: f32, b: f32) -> i32 {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __gesf2(a: f32, b: f32) -> i32 {
+        cmp(a, b).to_ge_abi()
+    }
+
+    #[arm_aeabi_alias = __aeabi_fcmpun]
+    pub extern "C" fn __unordsf2(a: f32, b: f32) -> i32 {
+        unord(a, b) as i32
+    }
+
+    pub extern "C" fn __eqsf2(a: f32, b: f32) -> i32 {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __ltsf2(a: f32, b: f32) -> i32 {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __nesf2(a: f32, b: f32) -> i32 {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __gtsf2(a: f32, b: f32) -> i32 {
+        cmp(a, b).to_ge_abi()
+    }
+
+    pub extern "C" fn __ledf2(a: f64, b: f64) -> i32 {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __gedf2(a: f64, b: f64) -> i32 {
+        cmp(a, b).to_ge_abi()
+    }
+
+    #[arm_aeabi_alias = __aeabi_dcmpun]
+    pub extern "C" fn __unorddf2(a: f64, b: f64) -> i32 {
+        unord(a, b) as i32
+    }
+
+    pub extern "C" fn __eqdf2(a: f64, b: f64) -> i32 {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __ltdf2(a: f64, b: f64) -> i32 {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __nedf2(a: f64, b: f64) -> i32 {
+        cmp(a, b).to_le_abi()
+    }
+
+    pub extern "C" fn __gtdf2(a: f64, b: f64) -> i32 {
+        cmp(a, b).to_ge_abi()
+    }
+}
+
+#[cfg(target_arch = "arm")]
+intrinsics! {
+    pub extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 {
+        (__lesf2(a, b) <= 0) as i32
+    }
+
+    pub extern "aapcs" fn __aeabi_fcmpge(a: f32, b: f32) -> i32 {
+        (__gesf2(a, b) >= 0) as i32
+    }
+
+    pub extern "aapcs" fn __aeabi_fcmpeq(a: f32, b: f32) -> i32 {
+        (__eqsf2(a, b) == 0) as i32
+    }
+
+    pub extern "aapcs" fn __aeabi_fcmplt(a: f32, b: f32) -> i32 {
+        (__ltsf2(a, b) < 0) as i32
+    }
+
+    pub extern "aapcs" fn __aeabi_fcmpgt(a: f32, b: f32) -> i32 {
+        (__gtsf2(a, b) > 0) as i32
+    }
+
+    pub extern "aapcs" fn __aeabi_dcmple(a: f64, b: f64) -> i32 {
+        (__ledf2(a, b) <= 0) as i32
+    }
+
+    pub extern "aapcs" fn __aeabi_dcmpge(a: f64, b: f64) -> i32 {
+        (__gedf2(a, b) >= 0) as i32
+    }
+
+    pub extern "aapcs" fn __aeabi_dcmpeq(a: f64, b: f64) -> i32 {
+        (__eqdf2(a, b) == 0) as i32
+    }
+
+    pub extern "aapcs" fn __aeabi_dcmplt(a: f64, b: f64) -> i32 {
+        (__ltdf2(a, b) < 0) as i32
+    }
+
+    pub extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 {
+        (__gtdf2(a, b) > 0) as i32
+    }
+
+    // On hard-float targets LLVM will use native instructions
+    // for all VFP intrinsics below
+
+    pub extern "C" fn __gesf2vfp(a: f32, b: f32) -> i32 {
+        (a >= b) as i32
+    }
+
+    pub extern "C" fn __gedf2vfp(a: f64, b: f64) -> i32 {
+        (a >= b) as i32
+    }
+
+    pub extern "C" fn __gtsf2vfp(a: f32, b: f32) -> i32 {
+        (a > b) as i32
+    }
+
+    pub extern "C" fn __gtdf2vfp(a: f64, b: f64) -> i32 {
+        (a > b) as i32
+    }
+
+    pub extern "C" fn __ltsf2vfp(a: f32, b: f32) -> i32 {
+        (a < b) as i32
+    }
+
+    pub extern "C" fn __ltdf2vfp(a: f64, b: f64) -> i32 {
+        (a < b) as i32
+    }
+
+    pub extern "C" fn __lesf2vfp(a: f32, b: f32) -> i32 {
+        (a <= b) as i32
+    }
+
+    pub extern "C" fn __ledf2vfp(a: f64, b: f64) -> i32 {
+        (a <= b) as i32
+    }
+
+    pub extern "C" fn __nesf2vfp(a: f32, b: f32) -> i32 {
+        (a != b) as i32
+    }
+
+    pub extern "C" fn __nedf2vfp(a: f64, b: f64) -> i32 {
+        (a != b) as i32
+    }
+
+    pub extern "C" fn __eqsf2vfp(a: f32, b: f32) -> i32 {
+        (a == b) as i32
+    }
+
+    pub extern "C" fn __eqdf2vfp(a: f64, b: f64) -> i32 {
+        (a == b) as i32
+    }
+}
diff --git a/vendor/compiler_builtins/src/float/conv.rs b/vendor/compiler_builtins/src/float/conv.rs
new file mode 100644
index 000000000..07b58f3d2
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/conv.rs
@@ -0,0 +1,351 @@
+/// Conversions from integers to floats.
+///
+/// These are hand-optimized bit twiddling code,
+/// which unfortunately isn't the easiest kind of code to read.
+///
+/// The algorithm is explained here: https://blog.m-ou.se/floats/
+mod int_to_float {
+    pub fn u32_to_f32_bits(i: u32) -> u32 {
+        if i == 0 {
+            return 0;
+        }
+        let n = i.leading_zeros();
+        let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact.
+        let b = (i << n) << 24; // Insignificant bits, only relevant for rounding.
+        let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
+        let e = 157 - n as u32; // Exponent plus 127, minus one.
+        (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
+    }
+
+    pub fn u32_to_f64_bits(i: u32) -> u64 {
+        if i == 0 {
+            return 0;
+        }
+        let n = i.leading_zeros();
+        let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact.
+        let e = 1053 - n as u64; // Exponent plus 1023, minus one.
+        (e << 52) + m // Bit 53 of m will overflow into e.
+    }
+
+    pub fn u64_to_f32_bits(i: u64) -> u32 {
+        let n = i.leading_zeros();
+        let y = i.wrapping_shl(n);
+        let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact.
+        let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding.
+        let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
+        let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero.
+        (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
+    }
+
+    pub fn u64_to_f64_bits(i: u64) -> u64 {
+        if i == 0 {
+            return 0;
+        }
+        let n = i.leading_zeros();
+        let a = ((i << n) >> 11) as u64; // Significant bits, with bit 53 still in tact.
+        let b = ((i << n) << 53) as u64; // Insignificant bits, only relevant for rounding.
+        let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
+        let e = 1085 - n as u64; // Exponent plus 1023, minus one.
+        (e << 52) + m // + not |, so the mantissa can overflow into the exponent.
+    }
+
+    pub fn u128_to_f32_bits(i: u128) -> u32 {
+        let n = i.leading_zeros();
+        let y = i.wrapping_shl(n);
+        let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact.
+        let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding.
+        let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
+        let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero.
+        (e << 23) + m // + not |, so the mantissa can overflow into the exponent.
+    }
+
+    pub fn u128_to_f64_bits(i: u128) -> u64 {
+        let n = i.leading_zeros();
+        let y = i.wrapping_shl(n);
+        let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact.
+        let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding.
+        let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
+        let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero.
+        (e << 52) + m // + not |, so the mantissa can overflow into the exponent.
+    }
+}
+
+// Conversions from unsigned integers to floats.
+intrinsics! {
+    #[arm_aeabi_alias = __aeabi_ui2f]
+    pub extern "C" fn __floatunsisf(i: u32) -> f32 {
+        f32::from_bits(int_to_float::u32_to_f32_bits(i))
+    }
+
+    #[arm_aeabi_alias = __aeabi_ui2d]
+    pub extern "C" fn __floatunsidf(i: u32) -> f64 {
+        f64::from_bits(int_to_float::u32_to_f64_bits(i))
+    }
+
+    #[arm_aeabi_alias = __aeabi_ul2f]
+    pub extern "C" fn __floatundisf(i: u64) -> f32 {
+        f32::from_bits(int_to_float::u64_to_f32_bits(i))
+    }
+
+    #[arm_aeabi_alias = __aeabi_ul2d]
+    pub extern "C" fn __floatundidf(i: u64) -> f64 {
+        f64::from_bits(int_to_float::u64_to_f64_bits(i))
+    }
+
+    #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)]
+    pub extern "C" fn __floatuntisf(i: u128) -> f32 {
+        f32::from_bits(int_to_float::u128_to_f32_bits(i))
+    }
+
+    #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)]
+    pub extern "C" fn __floatuntidf(i: u128) -> f64 {
+        f64::from_bits(int_to_float::u128_to_f64_bits(i))
+    }
+}
+
+// Conversions from signed integers to floats.
+intrinsics! {
+    #[arm_aeabi_alias = __aeabi_i2f]
+    pub extern "C" fn __floatsisf(i: i32) -> f32 {
+        let sign_bit = ((i >> 31) as u32) << 31;
+        f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit)
+    }
+
+    #[arm_aeabi_alias = __aeabi_i2d]
+    pub extern "C" fn __floatsidf(i: i32) -> f64 {
+        let sign_bit = ((i >> 31) as u64) << 63;
+        f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit)
+    }
+
+    #[arm_aeabi_alias = __aeabi_l2f]
+    pub extern "C" fn __floatdisf(i: i64) -> f32 {
+        let sign_bit = ((i >> 63) as u32) << 31;
+        f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit)
+    }
+
+    #[arm_aeabi_alias = __aeabi_l2d]
+    pub extern "C" fn __floatdidf(i: i64) -> f64 {
+        let sign_bit = ((i >> 63) as u64) << 63;
+        f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit)
+    }
+
+    #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)]
+    pub extern "C" fn __floattisf(i: i128) -> f32 {
+        let sign_bit = ((i >> 127) as u32) << 31;
+        f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit)
+    }
+
+    #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)]
+    pub extern "C" fn __floattidf(i: i128) -> f64 {
+        let sign_bit = ((i >> 127) as u64) << 63;
+        f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit)
+    }
+}
+
+// Conversions from floats to unsigned integers.
+intrinsics! {
+    #[arm_aeabi_alias = __aeabi_f2uiz]
+    pub extern "C" fn __fixunssfsi(f: f32) -> u32 {
+        let fbits = f.to_bits();
+        if fbits < 127 << 23 { // >= 0, < 1
+            0
+        } else if fbits < 159 << 23 { // >= 1, < max
+            let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit.
+            let s = 158 - (fbits >> 23); // Shift based on the exponent and bias.
+            m >> s
+        } else if fbits <= 255 << 23 { // >= max (incl. inf)
+            u32::MAX
+        } else { // Negative or NaN
+            0
+        }
+    }
+
+    #[arm_aeabi_alias = __aeabi_f2ulz]
+    pub extern "C" fn __fixunssfdi(f: f32) -> u64 {
+        let fbits = f.to_bits();
+        if fbits < 127 << 23 { // >= 0, < 1
+            0
+        } else if fbits < 191 << 23 { // >= 1, < max
+            let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit.
+            let s = 190 - (fbits >> 23); // Shift based on the exponent and bias.
+            m >> s
+        } else if fbits <= 255 << 23 { // >= max (incl. inf)
+            u64::MAX
+        } else { // Negative or NaN
+            0
+        }
+    }
+
+    #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)]
+    #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)]
+    pub extern "C" fn __fixunssfti(f: f32) -> u128 {
+        let fbits = f.to_bits();
+        if fbits < 127 << 23 { // >= 0, < 1
+            0
+        } else if fbits < 255 << 23 { // >= 1, < inf
+            let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit.
+            let s = 254 - (fbits >> 23); // Shift based on the exponent and bias.
+            m >> s
+        } else if fbits == 255 << 23 { // == inf
+            u128::MAX
+        } else { // Negative or NaN
+            0
+        }
+    }
+
+    #[arm_aeabi_alias = __aeabi_d2uiz]
+    pub extern "C" fn __fixunsdfsi(f: f64) -> u32 {
+        let fbits = f.to_bits();
+        if fbits < 1023 << 52 { // >= 0, < 1
+            0
+        } else if fbits < 1055 << 52 { // >= 1, < max
+            let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit.
+            let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias.
+            m >> s
+        } else if fbits <= 2047 << 52 { // >= max (incl. inf)
+            u32::MAX
+        } else { // Negative or NaN
+            0
+        }
+    }
+
+    #[arm_aeabi_alias = __aeabi_d2ulz]
+    pub extern "C" fn __fixunsdfdi(f: f64) -> u64 {
+        let fbits = f.to_bits();
+        if fbits < 1023 << 52 { // >= 0, < 1
+            0
+        } else if fbits < 1087 << 52 { // >= 1, < max
+            let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit.
+            let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias.
+            m >> s
+        } else if fbits <= 2047 << 52 { // >= max (incl. inf)
+            u64::MAX
+        } else { // Negative or NaN
+            0
+        }
+    }
+
+    #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)]
+    #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)]
+    pub extern "C" fn __fixunsdfti(f: f64) -> u128 {
+        let fbits = f.to_bits();
+        if fbits < 1023 << 52 { // >= 0, < 1
+            0
+        } else if fbits < 1151 << 52 { // >= 1, < max
+            let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit.
+            let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias.
+            m >> s
+        } else if fbits <= 2047 << 52 { // >= max (incl. inf)
+            u128::MAX
+        } else { // Negative or NaN
+            0
+        }
+    }
+}
+
+// Conversions from floats to signed integers.
+intrinsics! {
+    #[arm_aeabi_alias = __aeabi_f2iz]
+    pub extern "C" fn __fixsfsi(f: f32) -> i32 {
+        let fbits = f.to_bits() & !0 >> 1; // Remove sign bit.
+        if fbits < 127 << 23 { // >= 0, < 1
+            0
+        } else if fbits < 158 << 23 { // >= 1, < max
+            let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit.
+            let s = 158 - (fbits >> 23); // Shift based on the exponent and bias.
+            let u = (m >> s) as i32; // Unsigned result.
+            if f.is_sign_negative() { -u } else { u }
+        } else if fbits <= 255 << 23 { // >= max (incl. inf)
+            if f.is_sign_negative() { i32::MIN } else { i32::MAX }
+        } else { // NaN
+            0
+        }
+    }
+
+    #[arm_aeabi_alias = __aeabi_f2lz]
+    pub extern "C" fn __fixsfdi(f: f32) -> i64 {
+        let fbits = f.to_bits() & !0 >> 1; // Remove sign bit.
+        if fbits < 127 << 23 { // >= 0, < 1
+            0
+        } else if fbits < 190 << 23 { // >= 1, < max
+            let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit.
+            let s = 190 - (fbits >> 23); // Shift based on the exponent and bias.
+            let u = (m >> s) as i64; // Unsigned result.
+            if f.is_sign_negative() { -u } else { u }
+        } else if fbits <= 255 << 23 { // >= max (incl. inf)
+            if f.is_sign_negative() { i64::MIN } else { i64::MAX }
+        } else { // NaN
+            0
+        }
+    }
+
+    #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)]
+    #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)]
+    pub extern "C" fn __fixsfti(f: f32) -> i128 {
+        let fbits = f.to_bits() & !0 >> 1; // Remove sign bit.
+        if fbits < 127 << 23 { // >= 0, < 1
+            0
+        } else if fbits < 254 << 23 { // >= 1, < max
+            let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit.
+            let s = 254 - (fbits >> 23); // Shift based on the exponent and bias.
+            let u = (m >> s) as i128; // Unsigned result.
+            if f.is_sign_negative() { -u } else { u }
+        } else if fbits <= 255 << 23 { // >= max (incl. inf)
+            if f.is_sign_negative() { i128::MIN } else { i128::MAX }
+        } else { // NaN
+            0
+        }
+    }
+
+    #[arm_aeabi_alias = __aeabi_d2iz]
+    pub extern "C" fn __fixdfsi(f: f64) -> i32 {
+        let fbits = f.to_bits() & !0 >> 1; // Remove sign bit.
+        if fbits < 1023 << 52 { // >= 0, < 1
+            0
+        } else if fbits < 1054 << 52 { // >= 1, < max
+            let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit.
+            let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias.
+            let u = (m >> s) as i32; // Unsigned result.
+            if f.is_sign_negative() { -u } else { u }
+        } else if fbits <= 2047 << 52 { // >= max (incl. inf)
+            if f.is_sign_negative() { i32::MIN } else { i32::MAX }
+        } else { // NaN
+            0
+        }
+    }
+
+    #[arm_aeabi_alias = __aeabi_d2lz]
+    pub extern "C" fn __fixdfdi(f: f64) -> i64 {
+        let fbits = f.to_bits() & !0 >> 1; // Remove sign bit.
+        if fbits < 1023 << 52 { // >= 0, < 1
+            0
+        } else if fbits < 1086 << 52 { // >= 1, < max
+            let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit.
+            let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias.
+            let u = (m >> s) as i64; // Unsigned result.
+            if f.is_sign_negative() { -u } else { u }
+        } else if fbits <= 2047 << 52 { // >= max (incl. inf)
+            if f.is_sign_negative() { i64::MIN } else { i64::MAX }
+        } else { // NaN
+            0
+        }
+    }
+
+    #[cfg_attr(target_feature = "llvm14-builtins-abi", win64_128bit_abi_hack)]
+    #[cfg_attr(not(target_feature = "llvm14-builtins-abi"), unadjusted_on_win64)]
+    pub extern "C" fn __fixdfti(f: f64) -> i128 {
+        let fbits = f.to_bits() & !0 >> 1; // Remove sign bit.
+        if fbits < 1023 << 52 { // >= 0, < 1
+            0
+        } else if fbits < 1150 << 52 { // >= 1, < max
+            let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit.
+            let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias.
+            let u = (m >> s) as i128; // Unsigned result.
+            if f.is_sign_negative() { -u } else { u }
+        } else if fbits <= 2047 << 52 { // >= max (incl. inf)
+            if f.is_sign_negative() { i128::MIN } else { i128::MAX }
+        } else { // NaN
+            0
+        }
+    }
+}
diff --git a/vendor/compiler_builtins/src/float/div.rs b/vendor/compiler_builtins/src/float/div.rs
new file mode 100644
index 000000000..528a8368d
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/div.rs
@@ -0,0 +1,467 @@
+// The functions are complex with many branches, and explicit
+// `return`s makes it clear where function exit points are
+#![allow(clippy::needless_return)]
+
+use float::Float;
+use int::{CastInto, DInt, HInt, Int};
+
+fn div32<F: Float>(a: F, b: F) -> F
+where
+    u32: CastInto<F::Int>,
+    F::Int: CastInto<u32>,
+    i32: CastInto<F::Int>,
+    F::Int: CastInto<i32>,
+    F::Int: HInt,
+{
+    let one = F::Int::ONE;
+    let zero = F::Int::ZERO;
+
+    // let bits = F::BITS;
+    let significand_bits = F::SIGNIFICAND_BITS;
+    let max_exponent = F::EXPONENT_MAX;
+
+    let exponent_bias = F::EXPONENT_BIAS;
+
+    let implicit_bit = F::IMPLICIT_BIT;
+    let significand_mask = F::SIGNIFICAND_MASK;
+    let sign_bit = F::SIGN_MASK as F::Int;
+    let abs_mask = sign_bit - one;
+    let exponent_mask = F::EXPONENT_MASK;
+    let inf_rep = exponent_mask;
+    let quiet_bit = implicit_bit >> 1;
+    let qnan_rep = exponent_mask | quiet_bit;
+
+    #[inline(always)]
+    fn negate_u32(a: u32) -> u32 {
+        (<i32>::wrapping_neg(a as i32)) as u32
+    }
+
+    let a_rep = a.repr();
+    let b_rep = b.repr();
+
+    let a_exponent = (a_rep >> significand_bits) & max_exponent.cast();
+    let b_exponent = (b_rep >> significand_bits) & max_exponent.cast();
+    let quotient_sign = (a_rep ^ b_rep) & sign_bit;
+
+    let mut a_significand = a_rep & significand_mask;
+    let mut b_significand = b_rep & significand_mask;
+    let mut scale = 0;
+
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
+        || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
+    {
+        let a_abs = a_rep & abs_mask;
+        let b_abs = b_rep & abs_mask;
+
+        // NaN / anything = qNaN
+        if a_abs > inf_rep {
+            return F::from_repr(a_rep | quiet_bit);
+        }
+        // anything / NaN = qNaN
+        if b_abs > inf_rep {
+            return F::from_repr(b_rep | quiet_bit);
+        }
+
+        if a_abs == inf_rep {
+            if b_abs == inf_rep {
+                // infinity / infinity = NaN
+                return F::from_repr(qnan_rep);
+            } else {
+                // infinity / anything else = +/- infinity
+                return F::from_repr(a_abs | quotient_sign);
+            }
+        }
+
+        // anything else / infinity = +/- 0
+        if b_abs == inf_rep {
+            return F::from_repr(quotient_sign);
+        }
+
+        if a_abs == zero {
+            if b_abs == zero {
+                // zero / zero = NaN
+                return F::from_repr(qnan_rep);
+            } else {
+                // zero / anything else = +/- zero
+                return F::from_repr(quotient_sign);
+            }
+        }
+
+        // anything else / zero = +/- infinity
+        if b_abs == zero {
+            return F::from_repr(inf_rep | quotient_sign);
+        }
+
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if a_abs < implicit_bit {
+            let (exponent, significand) = F::normalize(a_significand);
+            scale += exponent;
+            a_significand = significand;
+        }
+
+        if b_abs < implicit_bit {
+            let (exponent, significand) = F::normalize(b_significand);
+            scale -= exponent;
+            b_significand = significand;
+        }
+    }
+
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    a_significand |= implicit_bit;
+    b_significand |= implicit_bit;
+    let mut quotient_exponent: i32 = CastInto::<i32>::cast(a_exponent)
+        .wrapping_sub(CastInto::<i32>::cast(b_exponent))
+        .wrapping_add(scale);
+
+    // Align the significand of b as a Q31 fixed-point number in the range
+    // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
+    // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
+    // is accurate to about 3.5 binary digits.
+    let q31b = CastInto::<u32>::cast(b_significand << 8.cast());
+    let mut reciprocal = (0x7504f333u32).wrapping_sub(q31b);
+
+    // Now refine the reciprocal estimate using a Newton-Raphson iteration:
+    //
+    //     x1 = x0 * (2 - x0 * b)
+    //
+    // This doubles the number of correct binary digits in the approximation
+    // with each iteration, so after three iterations, we have about 28 binary
+    // digits of accuracy.
+
+    let mut correction: u32 =
+        negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32);
+    reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32;
+    correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32);
+    reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32;
+    correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32);
+    reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32;
+
+    // Exhaustive testing shows that the error in reciprocal after three steps
+    // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our
+    // expectations.  We bump the reciprocal by a tiny value to force the error
+    // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to
+    // be specific).  This also causes 1/1 to give a sensible approximation
+    // instead of zero (due to overflow).
+    reciprocal = reciprocal.wrapping_sub(2);
+
+    // The numerical reciprocal is accurate to within 2^-28, lies in the
+    // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller
+    // than the true reciprocal of b.  Multiplying a by this reciprocal thus
+    // gives a numerical q = a/b in Q24 with the following properties:
+    //
+    //    1. q < a/b
+    //    2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)
+    //    3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes
+    //       from the fact that we truncate the product, and the 2^27 term
+    //       is the error in the reciprocal of b scaled by the maximum
+    //       possible value of a.  As a consequence of this error bound,
+    //       either q or nextafter(q) is the correctly rounded
+    let mut quotient = (a_significand << 1).widen_mul(reciprocal.cast()).hi();
+
+    // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
+    // In either case, we are going to compute a residual of the form
+    //
+    //     r = a - q*b
+    //
+    // We know from the construction of q that r satisfies:
+    //
+    //     0 <= r < ulp(q)*b
+    //
+    // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
+    // already have the correct result.  The exact halfway case cannot occur.
+    // We also take this time to right shift quotient if it falls in the [1,2)
+    // range and adjust the exponent accordingly.
+    let residual = if quotient < (implicit_bit << 1) {
+        quotient_exponent = quotient_exponent.wrapping_sub(1);
+        (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand))
+    } else {
+        quotient >>= 1;
+        (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand))
+    };
+
+    let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32);
+
+    if written_exponent >= max_exponent as i32 {
+        // If we have overflowed the exponent, return infinity.
+        return F::from_repr(inf_rep | quotient_sign);
+    } else if written_exponent < 1 {
+        // Flush denormals to zero.  In the future, it would be nice to add
+        // code to round them correctly.
+        return F::from_repr(quotient_sign);
+    } else {
+        let round = ((residual << 1) > b_significand) as u32;
+        // Clear the implicit bits
+        let mut abs_result = quotient & significand_mask;
+        // Insert the exponent
+        abs_result |= written_exponent.cast() << significand_bits;
+        // Round
+        abs_result = abs_result.wrapping_add(round.cast());
+        // Insert the sign and return
+        return F::from_repr(abs_result | quotient_sign);
+    }
+}
+
+fn div64<F: Float>(a: F, b: F) -> F
+where
+    u32: CastInto<F::Int>,
+    F::Int: CastInto<u32>,
+    i32: CastInto<F::Int>,
+    F::Int: CastInto<i32>,
+    u64: CastInto<F::Int>,
+    F::Int: CastInto<u64>,
+    i64: CastInto<F::Int>,
+    F::Int: CastInto<i64>,
+    F::Int: HInt,
+{
+    let one = F::Int::ONE;
+    let zero = F::Int::ZERO;
+
+    // let bits = F::BITS;
+    let significand_bits = F::SIGNIFICAND_BITS;
+    let max_exponent = F::EXPONENT_MAX;
+
+    let exponent_bias = F::EXPONENT_BIAS;
+
+    let implicit_bit = F::IMPLICIT_BIT;
+    let significand_mask = F::SIGNIFICAND_MASK;
+    let sign_bit = F::SIGN_MASK as F::Int;
+    let abs_mask = sign_bit - one;
+    let exponent_mask = F::EXPONENT_MASK;
+    let inf_rep = exponent_mask;
+    let quiet_bit = implicit_bit >> 1;
+    let qnan_rep = exponent_mask | quiet_bit;
+    // let exponent_bits = F::EXPONENT_BITS;
+
+    #[inline(always)]
+    fn negate_u32(a: u32) -> u32 {
+        (<i32>::wrapping_neg(a as i32)) as u32
+    }
+
+    #[inline(always)]
+    fn negate_u64(a: u64) -> u64 {
+        (<i64>::wrapping_neg(a as i64)) as u64
+    }
+
+    let a_rep = a.repr();
+    let b_rep = b.repr();
+
+    let a_exponent = (a_rep >> significand_bits) & max_exponent.cast();
+    let b_exponent = (b_rep >> significand_bits) & max_exponent.cast();
+    let quotient_sign = (a_rep ^ b_rep) & sign_bit;
+
+    let mut a_significand = a_rep & significand_mask;
+    let mut b_significand = b_rep & significand_mask;
+    let mut scale = 0;
+
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
+        || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
+    {
+        let a_abs = a_rep & abs_mask;
+        let b_abs = b_rep & abs_mask;
+
+        // NaN / anything = qNaN
+        if a_abs > inf_rep {
+            return F::from_repr(a_rep | quiet_bit);
+        }
+        // anything / NaN = qNaN
+        if b_abs > inf_rep {
+            return F::from_repr(b_rep | quiet_bit);
+        }
+
+        if a_abs == inf_rep {
+            if b_abs == inf_rep {
+                // infinity / infinity = NaN
+                return F::from_repr(qnan_rep);
+            } else {
+                // infinity / anything else = +/- infinity
+                return F::from_repr(a_abs | quotient_sign);
+            }
+        }
+
+        // anything else / infinity = +/- 0
+        if b_abs == inf_rep {
+            return F::from_repr(quotient_sign);
+        }
+
+        if a_abs == zero {
+            if b_abs == zero {
+                // zero / zero = NaN
+                return F::from_repr(qnan_rep);
+            } else {
+                // zero / anything else = +/- zero
+                return F::from_repr(quotient_sign);
+            }
+        }
+
+        // anything else / zero = +/- infinity
+        if b_abs == zero {
+            return F::from_repr(inf_rep | quotient_sign);
+        }
+
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if a_abs < implicit_bit {
+            let (exponent, significand) = F::normalize(a_significand);
+            scale += exponent;
+            a_significand = significand;
+        }
+
+        if b_abs < implicit_bit {
+            let (exponent, significand) = F::normalize(b_significand);
+            scale -= exponent;
+            b_significand = significand;
+        }
+    }
+
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    a_significand |= implicit_bit;
+    b_significand |= implicit_bit;
+    let mut quotient_exponent: i32 = CastInto::<i32>::cast(a_exponent)
+        .wrapping_sub(CastInto::<i32>::cast(b_exponent))
+        .wrapping_add(scale);
+
+    // Align the significand of b as a Q31 fixed-point number in the range
+    // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
+    // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
+    // is accurate to about 3.5 binary digits.
+    let q31b = CastInto::<u32>::cast(b_significand >> 21.cast());
+    let mut recip32 = (0x7504f333u32).wrapping_sub(q31b);
+
+    // Now refine the reciprocal estimate using a Newton-Raphson iteration:
+    //
+    //     x1 = x0 * (2 - x0 * b)
+    //
+    // This doubles the number of correct binary digits in the approximation
+    // with each iteration, so after three iterations, we have about 28 binary
+    // digits of accuracy.
+
+    let mut correction32: u32 =
+        negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32);
+    recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32;
+    correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32);
+    recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32;
+    correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32);
+    recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32;
+
+    // recip32 might have overflowed to exactly zero in the preceeding
+    // computation if the high word of b is exactly 1.0.  This would sabotage
+    // the full-width final stage of the computation that follows, so we adjust
+    // recip32 downward by one bit.
+    recip32 = recip32.wrapping_sub(1);
+
+    // We need to perform one more iteration to get us to 56 binary digits;
+    // The last iteration needs to happen with extra precision.
+    let q63blo = CastInto::<u32>::cast(b_significand << 11.cast());
+
+    let correction: u64 = negate_u64(
+        (recip32 as u64)
+            .wrapping_mul(q31b as u64)
+            .wrapping_add((recip32 as u64).wrapping_mul(q63blo as u64) >> 32),
+    );
+    let c_hi = (correction >> 32) as u32;
+    let c_lo = correction as u32;
+    let mut reciprocal: u64 = (recip32 as u64)
+        .wrapping_mul(c_hi as u64)
+        .wrapping_add((recip32 as u64).wrapping_mul(c_lo as u64) >> 32);
+
+    // We already adjusted the 32-bit estimate, now we need to adjust the final
+    // 64-bit reciprocal estimate downward to ensure that it is strictly smaller
+    // than the infinitely precise exact reciprocal.  Because the computation
+    // of the Newton-Raphson step is truncating at every step, this adjustment
+    // is small; most of the work is already done.
+    reciprocal = reciprocal.wrapping_sub(2);
+
+    // The numerical reciprocal is accurate to within 2^-56, lies in the
+    // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
+    // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b
+    // in Q53 with the following properties:
+    //
+    //    1. q < a/b
+    //    2. q is in the interval [0.5, 2.0)
+    //    3. the error in q is bounded away from 2^-53 (actually, we have a
+    //       couple of bits to spare, but this is all we need).
+
+    // We need a 64 x 64 multiply high to compute q, which isn't a basic
+    // operation in C, so we need to be a little bit fussy.
+    // let mut quotient: F::Int = ((((reciprocal as u64)
+    //     .wrapping_mul(CastInto::<u32>::cast(a_significand << 1) as u64))
+    //     >> 32) as u32)
+    //     .cast();
+
+    // We need a 64 x 64 multiply high to compute q, which isn't a basic
+    // operation in C, so we need to be a little bit fussy.
+    let mut quotient = (a_significand << 2).widen_mul(reciprocal.cast()).hi();
+
+    // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
+    // In either case, we are going to compute a residual of the form
+    //
+    //     r = a - q*b
+    //
+    // We know from the construction of q that r satisfies:
+    //
+    //     0 <= r < ulp(q)*b
+    //
+    // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
+    // already have the correct result.  The exact halfway case cannot occur.
+    // We also take this time to right shift quotient if it falls in the [1,2)
+    // range and adjust the exponent accordingly.
+    let residual = if quotient < (implicit_bit << 1) {
+        quotient_exponent = quotient_exponent.wrapping_sub(1);
+        (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand))
+    } else {
+        quotient >>= 1;
+        (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand))
+    };
+
+    let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32);
+
+    if written_exponent >= max_exponent as i32 {
+        // If we have overflowed the exponent, return infinity.
+        return F::from_repr(inf_rep | quotient_sign);
+    } else if written_exponent < 1 {
+        // Flush denormals to zero.  In the future, it would be nice to add
+        // code to round them correctly.
+        return F::from_repr(quotient_sign);
+    } else {
+        let round = ((residual << 1) > b_significand) as u32;
+        // Clear the implicit bits
+        let mut abs_result = quotient & significand_mask;
+        // Insert the exponent
+        abs_result |= written_exponent.cast() << significand_bits;
+        // Round
+        abs_result = abs_result.wrapping_add(round.cast());
+        // Insert the sign and return
+        return F::from_repr(abs_result | quotient_sign);
+    }
+}
+
+intrinsics! {
+    #[arm_aeabi_alias = __aeabi_fdiv]
+    pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 {
+        div32(a, b)
+    }
+
+    #[arm_aeabi_alias = __aeabi_ddiv]
+    pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 {
+        div64(a, b)
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 {
+        a / b
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn __divdf3vfp(a: f64, b: f64) -> f64 {
+        a / b
+    }
+}
diff --git a/vendor/compiler_builtins/src/float/extend.rs b/vendor/compiler_builtins/src/float/extend.rs
new file mode 100644
index 000000000..39633773b
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/extend.rs
@@ -0,0 +1,83 @@
+use float::Float;
+use int::{CastInto, Int};
+
+/// Generic conversion from a narrower to a wider IEEE-754 floating-point type
+fn extend<F: Float, R: Float>(a: F) -> R
+where
+    F::Int: CastInto<u64>,
+    u64: CastInto<F::Int>,
+    u32: CastInto<R::Int>,
+    R::Int: CastInto<u32>,
+    R::Int: CastInto<u64>,
+    u64: CastInto<R::Int>,
+    F::Int: CastInto<R::Int>,
+{
+    let src_zero = F::Int::ZERO;
+    let src_one = F::Int::ONE;
+    let src_bits = F::BITS;
+    let src_sign_bits = F::SIGNIFICAND_BITS;
+    let src_exp_bias = F::EXPONENT_BIAS;
+    let src_min_normal = F::IMPLICIT_BIT;
+    let src_infinity = F::EXPONENT_MASK;
+    let src_sign_mask = F::SIGN_MASK as F::Int;
+    let src_abs_mask = src_sign_mask - src_one;
+    let src_qnan = F::SIGNIFICAND_MASK;
+    let src_nan_code = src_qnan - src_one;
+
+    let dst_bits = R::BITS;
+    let dst_sign_bits = R::SIGNIFICAND_BITS;
+    let dst_inf_exp = R::EXPONENT_MAX;
+    let dst_exp_bias = R::EXPONENT_BIAS;
+    let dst_min_normal = R::IMPLICIT_BIT;
+
+    let sign_bits_delta = dst_sign_bits - src_sign_bits;
+    let exp_bias_delta = dst_exp_bias - src_exp_bias;
+    let a_abs = a.repr() & src_abs_mask;
+    let mut abs_result = R::Int::ZERO;
+
+    if a_abs.wrapping_sub(src_min_normal) < src_infinity.wrapping_sub(src_min_normal) {
+        // a is a normal number.
+        // Extend to the destination type by shifting the significand and
+        // exponent into the proper position and rebiasing the exponent.
+        let abs_dst: R::Int = a_abs.cast();
+        let bias_dst: R::Int = exp_bias_delta.cast();
+        abs_result = abs_dst.wrapping_shl(sign_bits_delta);
+        abs_result += bias_dst.wrapping_shl(dst_sign_bits);
+    } else if a_abs >= src_infinity {
+        // a is NaN or infinity.
+        // Conjure the result by beginning with infinity, then setting the qNaN
+        // bit (if needed) and right-aligning the rest of the trailing NaN
+        // payload field.
+        let qnan_dst: R::Int = (a_abs & src_qnan).cast();
+        let nan_code_dst: R::Int = (a_abs & src_nan_code).cast();
+        let inf_exp_dst: R::Int = dst_inf_exp.cast();
+        abs_result = inf_exp_dst.wrapping_shl(dst_sign_bits);
+        abs_result |= qnan_dst.wrapping_shl(sign_bits_delta);
+        abs_result |= nan_code_dst.wrapping_shl(sign_bits_delta);
+    } else if a_abs != src_zero {
+        // a is denormal.
+        // Renormalize the significand and clear the leading bit, then insert
+        // the correct adjusted exponent in the destination type.
+        let scale = a_abs.leading_zeros() - src_min_normal.leading_zeros();
+        let abs_dst: R::Int = a_abs.cast();
+        let bias_dst: R::Int = (exp_bias_delta - scale + 1).cast();
+        abs_result = abs_dst.wrapping_shl(sign_bits_delta + scale);
+        abs_result = (abs_result ^ dst_min_normal) | (bias_dst.wrapping_shl(dst_sign_bits));
+    }
+
+    let sign_result: R::Int = (a.repr() & src_sign_mask).cast();
+    R::from_repr(abs_result | (sign_result.wrapping_shl(dst_bits - src_bits)))
+}
+
+intrinsics! {
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_f2d]
+    pub extern "C" fn  __extendsfdf2(a: f32) -> f64 {
+        extend(a)
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn  __extendsfdf2vfp(a: f32) -> f64 {
+        a as f64 // LLVM generate 'fcvtds'
+    }
+}
diff --git a/vendor/compiler_builtins/src/float/mod.rs b/vendor/compiler_builtins/src/float/mod.rs
new file mode 100644
index 000000000..01a5504d5
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/mod.rs
@@ -0,0 +1,175 @@
+use core::ops;
+
+use super::int::Int;
+
+pub mod add;
+pub mod cmp;
+pub mod conv;
+pub mod div;
+pub mod extend;
+pub mod mul;
+pub mod pow;
+pub mod sub;
+pub mod trunc;
+
+public_test_dep! {
+/// Trait for some basic operations on floats
+pub(crate) trait Float:
+    Copy
+    + core::fmt::Debug
+    + PartialEq
+    + PartialOrd
+    + ops::AddAssign
+    + ops::MulAssign
+    + ops::Add<Output = Self>
+    + ops::Sub<Output = Self>
+    + ops::Div<Output = Self>
+    + ops::Rem<Output = Self>
+{
+    /// A uint of the same with as the float
+    type Int: Int;
+
+    /// A int of the same with as the float
+    type SignedInt: Int;
+
+    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
+    type ExpInt: Int;
+
+    const ZERO: Self;
+    const ONE: Self;
+
+    /// The bitwidth of the float type
+    const BITS: u32;
+
+    /// The bitwidth of the significand
+    const SIGNIFICAND_BITS: u32;
+
+    /// The bitwidth of the exponent
+    const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
+
+    /// The maximum value of the exponent
+    const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
+
+    /// The exponent bias value
+    const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
+
+    /// A mask for the sign bit
+    const SIGN_MASK: Self::Int;
+
+    /// A mask for the significand
+    const SIGNIFICAND_MASK: Self::Int;
+
+    // The implicit bit of the float format
+    const IMPLICIT_BIT: Self::Int;
+
+    /// A mask for the exponent
+    const EXPONENT_MASK: Self::Int;
+
+    /// Returns `self` transmuted to `Self::Int`
+    fn repr(self) -> Self::Int;
+
+    /// Returns `self` transmuted to `Self::SignedInt`
+    fn signed_repr(self) -> Self::SignedInt;
+
+    /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
+    /// represented in multiple different ways. This method returns `true` if two NaNs are
+    /// compared.
+    fn eq_repr(self, rhs: Self) -> bool;
+
+    /// Returns the sign bit
+    fn sign(self) -> bool;
+
+    /// Returns the exponent with bias
+    fn exp(self) -> Self::ExpInt;
+
+    /// Returns the significand with no implicit bit (or the "fractional" part)
+    fn frac(self) -> Self::Int;
+
+    /// Returns the significand with implicit bit
+    fn imp_frac(self) -> Self::Int;
+
+    /// Returns a `Self::Int` transmuted back to `Self`
+    fn from_repr(a: Self::Int) -> Self;
+
+    /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
+    fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self;
+
+    /// Returns (normalized exponent, normalized significand)
+    fn normalize(significand: Self::Int) -> (i32, Self::Int);
+
+    /// Returns if `self` is subnormal
+    fn is_subnormal(self) -> bool;
+}
+}
+
+macro_rules! float_impl {
+    ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
+        impl Float for $ty {
+            type Int = $ity;
+            type SignedInt = $sity;
+            type ExpInt = $expty;
+
+            const ZERO: Self = 0.0;
+            const ONE: Self = 1.0;
+
+            const BITS: u32 = $bits;
+            const SIGNIFICAND_BITS: u32 = $significand_bits;
+
+            const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
+            const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1;
+            const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS;
+            const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
+
+            fn repr(self) -> Self::Int {
+                self.to_bits()
+            }
+            fn signed_repr(self) -> Self::SignedInt {
+                self.to_bits() as Self::SignedInt
+            }
+            fn eq_repr(self, rhs: Self) -> bool {
+                if self.is_nan() && rhs.is_nan() {
+                    true
+                } else {
+                    self.repr() == rhs.repr()
+                }
+            }
+            fn sign(self) -> bool {
+                self.signed_repr() < Self::SignedInt::ZERO
+            }
+            fn exp(self) -> Self::ExpInt {
+                ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt
+            }
+            fn frac(self) -> Self::Int {
+                self.to_bits() & Self::SIGNIFICAND_MASK
+            }
+            fn imp_frac(self) -> Self::Int {
+                self.frac() | Self::IMPLICIT_BIT
+            }
+            fn from_repr(a: Self::Int) -> Self {
+                Self::from_bits(a)
+            }
+            fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self {
+                Self::from_repr(
+                    ((sign as Self::Int) << (Self::BITS - 1))
+                        | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
+                        | (significand & Self::SIGNIFICAND_MASK),
+                )
+            }
+            fn normalize(significand: Self::Int) -> (i32, Self::Int) {
+                let shift = significand
+                    .leading_zeros()
+                    .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros());
+                (
+                    1i32.wrapping_sub(shift as i32),
+                    significand << shift as Self::Int,
+                )
+            }
+            fn is_subnormal(self) -> bool {
+                (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO
+            }
+        }
+    };
+}
+
+float_impl!(f32, u32, i32, i16, 32, 23);
+float_impl!(f64, u64, i64, i16, 64, 52);
diff --git a/vendor/compiler_builtins/src/float/mul.rs b/vendor/compiler_builtins/src/float/mul.rs
new file mode 100644
index 000000000..c89f22756
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/mul.rs
@@ -0,0 +1,209 @@
+use float::Float;
+use int::{CastInto, DInt, HInt, Int};
+
+fn mul<F: Float>(a: F, b: F) -> F
+where
+    u32: CastInto<F::Int>,
+    F::Int: CastInto<u32>,
+    i32: CastInto<F::Int>,
+    F::Int: CastInto<i32>,
+    F::Int: HInt,
+{
+    let one = F::Int::ONE;
+    let zero = F::Int::ZERO;
+
+    let bits = F::BITS;
+    let significand_bits = F::SIGNIFICAND_BITS;
+    let max_exponent = F::EXPONENT_MAX;
+
+    let exponent_bias = F::EXPONENT_BIAS;
+
+    let implicit_bit = F::IMPLICIT_BIT;
+    let significand_mask = F::SIGNIFICAND_MASK;
+    let sign_bit = F::SIGN_MASK as F::Int;
+    let abs_mask = sign_bit - one;
+    let exponent_mask = F::EXPONENT_MASK;
+    let inf_rep = exponent_mask;
+    let quiet_bit = implicit_bit >> 1;
+    let qnan_rep = exponent_mask | quiet_bit;
+    let exponent_bits = F::EXPONENT_BITS;
+
+    let a_rep = a.repr();
+    let b_rep = b.repr();
+
+    let a_exponent = (a_rep >> significand_bits) & max_exponent.cast();
+    let b_exponent = (b_rep >> significand_bits) & max_exponent.cast();
+    let product_sign = (a_rep ^ b_rep) & sign_bit;
+
+    let mut a_significand = a_rep & significand_mask;
+    let mut b_significand = b_rep & significand_mask;
+    let mut scale = 0;
+
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
+        || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
+    {
+        let a_abs = a_rep & abs_mask;
+        let b_abs = b_rep & abs_mask;
+
+        // NaN + anything = qNaN
+        if a_abs > inf_rep {
+            return F::from_repr(a_rep | quiet_bit);
+        }
+        // anything + NaN = qNaN
+        if b_abs > inf_rep {
+            return F::from_repr(b_rep | quiet_bit);
+        }
+
+        if a_abs == inf_rep {
+            if b_abs != zero {
+                // infinity * non-zero = +/- infinity
+                return F::from_repr(a_abs | product_sign);
+            } else {
+                // infinity * zero = NaN
+                return F::from_repr(qnan_rep);
+            }
+        }
+
+        if b_abs == inf_rep {
+            if a_abs != zero {
+                // infinity * non-zero = +/- infinity
+                return F::from_repr(b_abs | product_sign);
+            } else {
+                // infinity * zero = NaN
+                return F::from_repr(qnan_rep);
+            }
+        }
+
+        // zero * anything = +/- zero
+        if a_abs == zero {
+            return F::from_repr(product_sign);
+        }
+
+        // anything * zero = +/- zero
+        if b_abs == zero {
+            return F::from_repr(product_sign);
+        }
+
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if a_abs < implicit_bit {
+            let (exponent, significand) = F::normalize(a_significand);
+            scale += exponent;
+            a_significand = significand;
+        }
+
+        if b_abs < implicit_bit {
+            let (exponent, significand) = F::normalize(b_significand);
+            scale += exponent;
+            b_significand = significand;
+        }
+    }
+
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    a_significand |= implicit_bit;
+    b_significand |= implicit_bit;
+
+    // Get the significand of a*b.  Before multiplying the significands, shift
+    // one of them left to left-align it in the field.  Thus, the product will
+    // have (exponentBits + 2) integral digits, all but two of which must be
+    // zero.  Normalizing this result is just a conditional left-shift by one
+    // and bumping the exponent accordingly.
+    let (mut product_low, mut product_high) = a_significand
+        .widen_mul(b_significand << exponent_bits)
+        .lo_hi();
+
+    let a_exponent_i32: i32 = a_exponent.cast();
+    let b_exponent_i32: i32 = b_exponent.cast();
+    let mut product_exponent: i32 = a_exponent_i32
+        .wrapping_add(b_exponent_i32)
+        .wrapping_add(scale)
+        .wrapping_sub(exponent_bias as i32);
+
+    // Normalize the significand, adjust exponent if needed.
+    if (product_high & implicit_bit) != zero {
+        product_exponent = product_exponent.wrapping_add(1);
+    } else {
+        product_high = (product_high << 1) | (product_low >> (bits - 1));
+        product_low <<= 1;
+    }
+
+    // If we have overflowed the type, return +/- infinity.
+    if product_exponent >= max_exponent as i32 {
+        return F::from_repr(inf_rep | product_sign);
+    }
+
+    if product_exponent <= 0 {
+        // Result is denormal before rounding
+        //
+        // If the result is so small that it just underflows to zero, return
+        // a zero of the appropriate sign.  Mathematically there is no need to
+        // handle this case separately, but we make it a special case to
+        // simplify the shift logic.
+        let shift = one.wrapping_sub(product_exponent.cast()).cast();
+        if shift >= bits {
+            return F::from_repr(product_sign);
+        }
+
+        // Otherwise, shift the significand of the result so that the round
+        // bit is the high bit of productLo.
+        if shift < bits {
+            let sticky = product_low << (bits - shift);
+            product_low = product_high << (bits - shift) | product_low >> shift | sticky;
+            product_high >>= shift;
+        } else if shift < (2 * bits) {
+            let sticky = product_high << (2 * bits - shift) | product_low;
+            product_low = product_high >> (shift - bits) | sticky;
+            product_high = zero;
+        } else {
+            product_high = zero;
+        }
+    } else {
+        // Result is normal before rounding; insert the exponent.
+        product_high &= significand_mask;
+        product_high |= product_exponent.cast() << significand_bits;
+    }
+
+    // Insert the sign of the result:
+    product_high |= product_sign;
+
+    // Final rounding.  The final result may overflow to infinity, or underflow
+    // to zero, but those are the correct results in those cases.  We use the
+    // default IEEE-754 round-to-nearest, ties-to-even rounding mode.
+    if product_low > sign_bit {
+        product_high += one;
+    }
+
+    if product_low == sign_bit {
+        product_high += product_high & one;
+    }
+
+    F::from_repr(product_high)
+}
+
+intrinsics! {
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_fmul]
+    pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 {
+        mul(a, b)
+    }
+
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_dmul]
+    pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 {
+        mul(a, b)
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn __mulsf3vfp(a: f32, b: f32) -> f32 {
+        a * b
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn __muldf3vfp(a: f64, b: f64) -> f64 {
+        a * b
+    }
+}
diff --git a/vendor/compiler_builtins/src/float/pow.rs b/vendor/compiler_builtins/src/float/pow.rs
new file mode 100644
index 000000000..a75340c30
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/pow.rs
@@ -0,0 +1,36 @@
+use float::Float;
+use int::Int;
+
+/// Returns `a` raised to the power `b`
+fn pow<F: Float>(a: F, b: i32) -> F {
+    let mut a = a;
+    let recip = b < 0;
+    let mut pow = Int::abs_diff(b, 0);
+    let mut mul = F::ONE;
+    loop {
+        if (pow & 1) != 0 {
+            mul *= a;
+        }
+        pow >>= 1;
+        if pow == 0 {
+            break;
+        }
+        a *= a;
+    }
+
+    if recip {
+        F::ONE / mul
+    } else {
+        mul
+    }
+}
+
+intrinsics! {
+    pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 {
+        pow(a, b)
+    }
+
+    pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 {
+        pow(a, b)
+    }
+}
diff --git a/vendor/compiler_builtins/src/float/sub.rs b/vendor/compiler_builtins/src/float/sub.rs
new file mode 100644
index 000000000..8d300e9d2
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/sub.rs
@@ -0,0 +1,25 @@
+use float::add::__adddf3;
+use float::add::__addsf3;
+use float::Float;
+
+intrinsics! {
+    #[arm_aeabi_alias = __aeabi_fsub]
+    pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 {
+        __addsf3(a, f32::from_repr(b.repr() ^ f32::SIGN_MASK))
+    }
+
+    #[arm_aeabi_alias = __aeabi_dsub]
+    pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 {
+        __adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK))
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn __subsf3vfp(a: f32, b: f32) -> f32 {
+        a - b
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn __subdf3vfp(a: f64, b: f64) -> f64 {
+        a - b
+    }
+}
diff --git a/vendor/compiler_builtins/src/float/trunc.rs b/vendor/compiler_builtins/src/float/trunc.rs
new file mode 100644
index 000000000..d73713084
--- /dev/null
+++ b/vendor/compiler_builtins/src/float/trunc.rs
@@ -0,0 +1,125 @@
+use float::Float;
+use int::{CastInto, Int};
+
+fn trunc<F: Float, R: Float>(a: F) -> R
+where
+    F::Int: CastInto<u64>,
+    F::Int: CastInto<u32>,
+    u64: CastInto<F::Int>,
+    u32: CastInto<F::Int>,
+
+    R::Int: CastInto<u32>,
+    u32: CastInto<R::Int>,
+    F::Int: CastInto<R::Int>,
+{
+    let src_zero = F::Int::ZERO;
+    let src_one = F::Int::ONE;
+    let src_bits = F::BITS;
+    let src_exp_bias = F::EXPONENT_BIAS;
+
+    let src_min_normal = F::IMPLICIT_BIT;
+    let src_significand_mask = F::SIGNIFICAND_MASK;
+    let src_infinity = F::EXPONENT_MASK;
+    let src_sign_mask = F::SIGN_MASK;
+    let src_abs_mask = src_sign_mask - src_one;
+    let round_mask = (src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)) - src_one;
+    let halfway = src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS - 1);
+    let src_qnan = src_one << (F::SIGNIFICAND_BITS - 1);
+    let src_nan_code = src_qnan - src_one;
+
+    let dst_zero = R::Int::ZERO;
+    let dst_one = R::Int::ONE;
+    let dst_bits = R::BITS;
+    let dst_inf_exp = R::EXPONENT_MAX;
+    let dst_exp_bias = R::EXPONENT_BIAS;
+
+    let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast();
+    let overflow_exponent: F::Int = (src_exp_bias + dst_inf_exp - dst_exp_bias).cast();
+    let underflow: F::Int = underflow_exponent << F::SIGNIFICAND_BITS;
+    let overflow: F::Int = overflow_exponent << F::SIGNIFICAND_BITS;
+
+    let dst_qnan = R::Int::ONE << (R::SIGNIFICAND_BITS - 1);
+    let dst_nan_code = dst_qnan - dst_one;
+
+    let sign_bits_delta = F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS;
+    // Break a into a sign and representation of the absolute value.
+    let a_abs = a.repr() & src_abs_mask;
+    let sign = a.repr() & src_sign_mask;
+    let mut abs_result: R::Int;
+
+    if a_abs.wrapping_sub(underflow) < a_abs.wrapping_sub(overflow) {
+        // The exponent of a is within the range of normal numbers in the
+        // destination format.  We can convert by simply right-shifting with
+        // rounding and adjusting the exponent.
+        abs_result = (a_abs >> sign_bits_delta).cast();
+        let tmp = src_exp_bias.wrapping_sub(dst_exp_bias) << R::SIGNIFICAND_BITS;
+        abs_result = abs_result.wrapping_sub(tmp.cast());
+
+        let round_bits = a_abs & round_mask;
+        if round_bits > halfway {
+            // Round to nearest.
+            abs_result += dst_one;
+        } else if round_bits == halfway {
+            // Tie to even.
+            abs_result += abs_result & dst_one;
+        };
+    } else if a_abs > src_infinity {
+        // a is NaN.
+        // Conjure the result by beginning with infinity, setting the qNaN
+        // bit and inserting the (truncated) trailing NaN field.
+        abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast();
+        abs_result |= dst_qnan;
+        abs_result |= dst_nan_code
+            & ((a_abs & src_nan_code) >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast();
+    } else if a_abs >= overflow {
+        // a overflows to infinity.
+        abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast();
+    } else {
+        // a underflows on conversion to the destination type or is an exact
+        // zero.  The result may be a denormal or zero.  Extract the exponent
+        // to get the shift amount for the denormalization.
+        let a_exp: u32 = (a_abs >> F::SIGNIFICAND_BITS).cast();
+        let shift = src_exp_bias - dst_exp_bias - a_exp + 1;
+
+        let significand = (a.repr() & src_significand_mask) | src_min_normal;
+
+        // Right shift by the denormalization amount with sticky.
+        if shift > F::SIGNIFICAND_BITS {
+            abs_result = dst_zero;
+        } else {
+            let sticky = if (significand << (src_bits - shift)) != src_zero {
+                src_one
+            } else {
+                src_zero
+            };
+            let denormalized_significand: F::Int = significand >> shift | sticky;
+            abs_result =
+                (denormalized_significand >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast();
+            let round_bits = denormalized_significand & round_mask;
+            // Round to nearest
+            if round_bits > halfway {
+                abs_result += dst_one;
+            }
+            // Ties to even
+            else if round_bits == halfway {
+                abs_result += abs_result & dst_one;
+            };
+        }
+    }
+
+    // Apply the signbit to the absolute value.
+    R::from_repr(abs_result | sign.wrapping_shr(src_bits - dst_bits).cast())
+}
+
+intrinsics! {
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_d2f]
+    pub extern "C" fn __truncdfsf2(a: f64) -> f32 {
+        trunc(a)
+    }
+
+    #[cfg(target_arch = "arm")]
+    pub extern "C" fn __truncdfsf2vfp(a: f64) -> f32 {
+        a as f32
+    }
+}
diff --git a/vendor/compiler_builtins/src/int/addsub.rs b/vendor/compiler_builtins/src/int/addsub.rs
new file mode 100644
index 000000000..f4841e90f
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/addsub.rs
@@ -0,0 +1,96 @@
+use int::{DInt, Int};
+
+trait UAddSub: DInt {
+    fn uadd(self, other: Self) -> Self {
+        let (lo, carry) = self.lo().overflowing_add(other.lo());
+        let hi = self.hi().wrapping_add(other.hi());
+        let carry = if carry { Self::H::ONE } else { Self::H::ZERO };
+        Self::from_lo_hi(lo, hi.wrapping_add(carry))
+    }
+    fn uadd_one(self) -> Self {
+        let (lo, carry) = self.lo().overflowing_add(Self::H::ONE);
+        let carry = if carry { Self::H::ONE } else { Self::H::ZERO };
+        Self::from_lo_hi(lo, self.hi().wrapping_add(carry))
+    }
+    fn usub(self, other: Self) -> Self {
+        let uneg = (!other).uadd_one();
+        self.uadd(uneg)
+    }
+}
+
+impl UAddSub for u128 {}
+
+trait AddSub: Int
+where
+    <Self as Int>::UnsignedInt: UAddSub,
+{
+    fn add(self, other: Self) -> Self {
+        Self::from_unsigned(self.unsigned().uadd(other.unsigned()))
+    }
+    fn sub(self, other: Self) -> Self {
+        Self::from_unsigned(self.unsigned().usub(other.unsigned()))
+    }
+}
+
+impl AddSub for u128 {}
+impl AddSub for i128 {}
+
+trait Addo: AddSub
+where
+    <Self as Int>::UnsignedInt: UAddSub,
+{
+    fn addo(self, other: Self) -> (Self, bool) {
+        let sum = AddSub::add(self, other);
+        (sum, (other < Self::ZERO) != (sum < self))
+    }
+}
+
+impl Addo for i128 {}
+impl Addo for u128 {}
+
+trait Subo: AddSub
+where
+    <Self as Int>::UnsignedInt: UAddSub,
+{
+    fn subo(self, other: Self) -> (Self, bool) {
+        let sum = AddSub::sub(self, other);
+        (sum, (other < Self::ZERO) != (self < sum))
+    }
+}
+
+impl Subo for i128 {}
+impl Subo for u128 {}
+
+intrinsics! {
+    pub extern "C" fn __rust_i128_add(a: i128, b: i128) -> i128 {
+        AddSub::add(a,b)
+    }
+
+    pub extern "C" fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool) {
+        a.addo(b)
+    }
+
+    pub extern "C" fn __rust_u128_add(a: u128, b: u128) -> u128 {
+        AddSub::add(a,b)
+    }
+
+    pub extern "C" fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool) {
+        a.addo(b)
+    }
+
+    pub extern "C" fn __rust_i128_sub(a: i128, b: i128) -> i128 {
+        AddSub::sub(a,b)
+    }
+
+    pub extern "C" fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool) {
+        a.subo(b)
+    }
+
+    pub extern "C" fn __rust_u128_sub(a: u128, b: u128) -> u128 {
+        AddSub::sub(a,b)
+    }
+
+    pub extern "C" fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool) {
+        a.subo(b)
+    }
+}
diff --git a/vendor/compiler_builtins/src/int/leading_zeros.rs b/vendor/compiler_builtins/src/int/leading_zeros.rs
new file mode 100644
index 000000000..9e60ab0d7
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/leading_zeros.rs
@@ -0,0 +1,149 @@
+// Note: these functions happen to produce the correct `usize::leading_zeros(0)` value
+// without a explicit zero check. Zero is probably common enough that it could warrant
+// adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`.
+// Compilers will insert the check for zero in cases where it is needed.
+
+public_test_dep! {
+/// Returns the number of leading binary zeros in `x`.
+#[allow(dead_code)]
+pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
+    // The basic idea is to test if the higher bits of `x` are zero and bisect the number
+    // of leading zeros. It is possible for all branches of the bisection to use the same
+    // code path by conditionally shifting the higher parts down to let the next bisection
+    // step work on the higher or lower parts of `x`. Instead of starting with `z == 0`
+    // and adding to the number of zeros, it is slightly faster to start with
+    // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros,
+    // because it simplifies the final bisection step.
+    let mut x = x;
+    // the number of potential leading zeros
+    let mut z = usize::MAX.count_ones() as usize;
+    // a temporary
+    let mut t: usize;
+    #[cfg(target_pointer_width = "64")]
+    {
+        t = x >> 32;
+        if t != 0 {
+            z -= 32;
+            x = t;
+        }
+    }
+    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+    {
+        t = x >> 16;
+        if t != 0 {
+            z -= 16;
+            x = t;
+        }
+    }
+    t = x >> 8;
+    if t != 0 {
+        z -= 8;
+        x = t;
+    }
+    t = x >> 4;
+    if t != 0 {
+        z -= 4;
+        x = t;
+    }
+    t = x >> 2;
+    if t != 0 {
+        z -= 2;
+        x = t;
+    }
+    // the last two bisections are combined into one conditional
+    t = x >> 1;
+    if t != 0 {
+        z - 2
+    } else {
+        z - x
+    }
+
+    // We could potentially save a few cycles by using the LUT trick from
+    // "https://embeddedgurus.com/state-space/2014/09/
+    // fast-deterministic-and-portable-counting-leading-zeros/".
+    // However, 256 bytes for a LUT is too large for embedded use cases. We could remove
+    // the last 3 bisections  and use this 16 byte LUT for the rest of the work:
+    //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4];
+    //z -= LUT[x] as usize;
+    //z
+    // However, it ends up generating about the same number of instructions. When benchmarked
+    // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO
+    // execution effects. Changing to using a LUT and branching is risky for smaller cores.
+}
+}
+
+// The above method does not compile well on RISC-V (because of the lack of predicated
+// instructions), producing code with many branches or using an excessively long
+// branchless solution. This method takes advantage of the set-if-less-than instruction on
+// RISC-V that allows `(x >= power-of-two) as usize` to be branchless.
+
+public_test_dep! {
+/// Returns the number of leading binary zeros in `x`.
+#[allow(dead_code)]
+pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
+    let mut x = x;
+    // the number of potential leading zeros
+    let mut z = usize::MAX.count_ones() as usize;
+    // a temporary
+    let mut t: usize;
+
+    // RISC-V does not have a set-if-greater-than-or-equal instruction and
+    // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is
+    // still the most optimal method. A conditional set can only be turned into a single
+    // immediate instruction if `x` is compared with an immediate `imm` (that can fit into
+    // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the
+    // right). If we try to save an instruction by using `x < imm` for each bisection, we
+    // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
+    // but the immediate will never fit into 12 bits and never save an instruction.
+    #[cfg(target_pointer_width = "64")]
+    {
+        // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
+        // `t` is set to 0.
+        t = ((x >= (1 << 32)) as usize) << 5;
+        // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
+        // next step to process.
+        x >>= t;
+        // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential
+        // leading zeros
+        z -= t;
+    }
+    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+    {
+        t = ((x >= (1 << 16)) as usize) << 4;
+        x >>= t;
+        z -= t;
+    }
+    t = ((x >= (1 << 8)) as usize) << 3;
+    x >>= t;
+    z -= t;
+    t = ((x >= (1 << 4)) as usize) << 2;
+    x >>= t;
+    z -= t;
+    t = ((x >= (1 << 2)) as usize) << 1;
+    x >>= t;
+    z -= t;
+    t = (x >= (1 << 1)) as usize;
+    x >>= t;
+    z -= t;
+    // All bits except the LSB are guaranteed to be zero for this final bisection step.
+    // If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
+    z - x
+}
+}
+
+intrinsics! {
+    #[maybe_use_optimized_c_shim]
+    #[cfg(any(
+        target_pointer_width = "16",
+        target_pointer_width = "32",
+        target_pointer_width = "64"
+    ))]
+    /// Returns the number of leading binary zeros in `x`.
+    pub extern "C" fn __clzsi2(x: usize) -> usize {
+        if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
+            usize_leading_zeros_riscv(x)
+        } else {
+            usize_leading_zeros_default(x)
+        }
+    }
+}
diff --git a/vendor/compiler_builtins/src/int/mod.rs b/vendor/compiler_builtins/src/int/mod.rs
new file mode 100644
index 000000000..509f9fdae
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/mod.rs
@@ -0,0 +1,390 @@
+use core::ops;
+
+mod specialized_div_rem;
+
+pub mod addsub;
+pub mod leading_zeros;
+pub mod mul;
+pub mod sdiv;
+pub mod shift;
+pub mod udiv;
+
+pub use self::leading_zeros::__clzsi2;
+
+public_test_dep! {
+/// Trait for some basic operations on integers
+pub(crate) trait Int:
+    Copy
+    + core::fmt::Debug
+    + PartialEq
+    + PartialOrd
+    + ops::AddAssign
+    + ops::SubAssign
+    + ops::BitAndAssign
+    + ops::BitOrAssign
+    + ops::BitXorAssign
+    + ops::ShlAssign<i32>
+    + ops::ShrAssign<u32>
+    + ops::Add<Output = Self>
+    + ops::Sub<Output = Self>
+    + ops::Div<Output = Self>
+    + ops::Shl<u32, Output = Self>
+    + ops::Shr<u32, Output = Self>
+    + ops::BitOr<Output = Self>
+    + ops::BitXor<Output = Self>
+    + ops::BitAnd<Output = Self>
+    + ops::Not<Output = Self>
+{
+    /// Type with the same width but other signedness
+    type OtherSign: Int;
+    /// Unsigned version of Self
+    type UnsignedInt: Int;
+
+    /// If `Self` is a signed integer
+    const SIGNED: bool;
+
+    /// The bitwidth of the int type
+    const BITS: u32;
+
+    const ZERO: Self;
+    const ONE: Self;
+    const MIN: Self;
+    const MAX: Self;
+
+    /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing
+    /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111,
+    /// 112,119,120,125,126,127].
+    const FUZZ_LENGTHS: [u8; 20];
+    /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128.
+    const FUZZ_NUM: usize;
+
+    fn unsigned(self) -> Self::UnsignedInt;
+    fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
+
+    fn from_bool(b: bool) -> Self;
+
+    /// Prevents the need for excessive conversions between signed and unsigned
+    fn logical_shr(self, other: u32) -> Self;
+
+    /// Absolute difference between two integers.
+    fn abs_diff(self, other: Self) -> Self::UnsignedInt;
+
+    // copied from primitive integers, but put in a trait
+    fn is_zero(self) -> bool;
+    fn wrapping_neg(self) -> Self;
+    fn wrapping_add(self, other: Self) -> Self;
+    fn wrapping_mul(self, other: Self) -> Self;
+    fn wrapping_sub(self, other: Self) -> Self;
+    fn wrapping_shl(self, other: u32) -> Self;
+    fn wrapping_shr(self, other: u32) -> Self;
+    fn rotate_left(self, other: u32) -> Self;
+    fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn leading_zeros(self) -> u32;
+}
+}
+
+macro_rules! int_impl_common {
+    ($ty:ty) => {
+        const BITS: u32 = <Self as Int>::ZERO.count_zeros();
+        const SIGNED: bool = Self::MIN != Self::ZERO;
+
+        const ZERO: Self = 0;
+        const ONE: Self = 1;
+        const MIN: Self = <Self>::MIN;
+        const MAX: Self = <Self>::MAX;
+
+        const FUZZ_LENGTHS: [u8; 20] = {
+            let bits = <Self as Int>::BITS;
+            let mut v = [0u8; 20];
+            v[0] = 0;
+            v[1] = 1;
+            v[2] = 2; // important for parity and the iX::MIN case when reversed
+            let mut i = 3;
+            // No need for any more until the byte boundary, because there should be no algorithms
+            // that are sensitive to anything not next to byte boundaries after 2. We also scale
+            // in powers of two, which is important to prevent u128 corner tests from getting too
+            // big.
+            let mut l = 8;
+            loop {
+                if l >= ((bits / 2) as u8) {
+                    break;
+                }
+                // get both sides of the byte boundary
+                v[i] = l - 1;
+                i += 1;
+                v[i] = l;
+                i += 1;
+                l *= 2;
+            }
+
+            if bits != 8 {
+                // add the lower side of the middle boundary
+                v[i] = ((bits / 2) - 1) as u8;
+                i += 1;
+            }
+
+            // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS
+            // boundary because of algorithms that split the high part up. We reverse the scaling
+            // as we go to Self::BITS.
+            let mid = i;
+            let mut j = 1;
+            loop {
+                v[i] = (bits as u8) - (v[mid - j]) - 1;
+                if j == mid {
+                    break;
+                }
+                i += 1;
+                j += 1;
+            }
+            v
+        };
+
+        const FUZZ_NUM: usize = {
+            let log2 = (<Self as Int>::BITS - 1).count_ones() as usize;
+            if log2 == 3 {
+                // case for u8
+                6
+            } else {
+                // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate
+                // boundaries.
+                8 + (4 * (log2 - 4))
+            }
+        };
+
+        fn from_bool(b: bool) -> Self {
+            b as $ty
+        }
+
+        fn logical_shr(self, other: u32) -> Self {
+            Self::from_unsigned(self.unsigned().wrapping_shr(other))
+        }
+
+        fn is_zero(self) -> bool {
+            self == Self::ZERO
+        }
+
+        fn wrapping_neg(self) -> Self {
+            <Self>::wrapping_neg(self)
+        }
+
+        fn wrapping_add(self, other: Self) -> Self {
+            <Self>::wrapping_add(self, other)
+        }
+
+        fn wrapping_mul(self, other: Self) -> Self {
+            <Self>::wrapping_mul(self, other)
+        }
+
+        fn wrapping_sub(self, other: Self) -> Self {
+            <Self>::wrapping_sub(self, other)
+        }
+
+        fn wrapping_shl(self, other: u32) -> Self {
+            <Self>::wrapping_shl(self, other)
+        }
+
+        fn wrapping_shr(self, other: u32) -> Self {
+            <Self>::wrapping_shr(self, other)
+        }
+
+        fn rotate_left(self, other: u32) -> Self {
+            <Self>::rotate_left(self, other)
+        }
+
+        fn overflowing_add(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_add(self, other)
+        }
+
+        fn leading_zeros(self) -> u32 {
+            <Self>::leading_zeros(self)
+        }
+    };
+}
+
+macro_rules! int_impl {
+    ($ity:ty, $uty:ty) => {
+        impl Int for $uty {
+            type OtherSign = $ity;
+            type UnsignedInt = $uty;
+
+            fn unsigned(self) -> $uty {
+                self
+            }
+
+            // It makes writing macros easier if this is implemented for both signed and unsigned
+            #[allow(clippy::wrong_self_convention)]
+            fn from_unsigned(me: $uty) -> Self {
+                me
+            }
+
+            fn abs_diff(self, other: Self) -> Self {
+                if self < other {
+                    other.wrapping_sub(self)
+                } else {
+                    self.wrapping_sub(other)
+                }
+            }
+
+            int_impl_common!($uty);
+        }
+
+        impl Int for $ity {
+            type OtherSign = $uty;
+            type UnsignedInt = $uty;
+
+            fn unsigned(self) -> $uty {
+                self as $uty
+            }
+
+            fn from_unsigned(me: $uty) -> Self {
+                me as $ity
+            }
+
+            fn abs_diff(self, other: Self) -> $uty {
+                self.wrapping_sub(other).wrapping_abs() as $uty
+            }
+
+            int_impl_common!($ity);
+        }
+    };
+}
+
+int_impl!(isize, usize);
+int_impl!(i8, u8);
+int_impl!(i16, u16);
+int_impl!(i32, u32);
+int_impl!(i64, u64);
+int_impl!(i128, u128);
+
+public_test_dep! {
+/// Trait for integers twice the bit width of another integer. This is implemented for all
+/// primitives except for `u8`, because there is not a smaller primitive.
+pub(crate) trait DInt: Int {
+    /// Integer that is half the bit width of the integer this trait is implemented for
+    type H: HInt<D = Self> + Int;
+
+    /// Returns the low half of `self`
+    fn lo(self) -> Self::H;
+    /// Returns the high half of `self`
+    fn hi(self) -> Self::H;
+    /// Returns the low and high halves of `self` as a tuple
+    fn lo_hi(self) -> (Self::H, Self::H);
+    /// Constructs an integer using lower and higher half parts
+    fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self;
+}
+}
+
+public_test_dep! {
+/// Trait for integers half the bit width of another integer. This is implemented for all
+/// primitives except for `u128`, because it there is not a larger primitive.
+pub(crate) trait HInt: Int {
+    /// Integer that is double the bit width of the integer this trait is implemented for
+    type D: DInt<H = Self> + Int;
+
+    /// Widens (using default extension) the integer to have double bit width
+    fn widen(self) -> Self::D;
+    /// Widens (zero extension only) the integer to have double bit width. This is needed to get
+    /// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable
+    fn zero_widen(self) -> Self::D;
+    /// Widens the integer to have double bit width and shifts the integer into the higher bits
+    fn widen_hi(self) -> Self::D;
+    /// Widening multiplication with zero widening. This cannot overflow.
+    fn zero_widen_mul(self, rhs: Self) -> Self::D;
+    /// Widening multiplication. This cannot overflow.
+    fn widen_mul(self, rhs: Self) -> Self::D;
+}
+}
+
+macro_rules! impl_d_int {
+    ($($X:ident $D:ident),*) => {
+        $(
+            impl DInt for $D {
+                type H = $X;
+
+                fn lo(self) -> Self::H {
+                    self as $X
+                }
+                fn hi(self) -> Self::H {
+                    (self >> <$X as Int>::BITS) as $X
+                }
+                fn lo_hi(self) -> (Self::H, Self::H) {
+                    (self.lo(), self.hi())
+                }
+                fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
+                    lo.zero_widen() | hi.widen_hi()
+                }
+            }
+        )*
+    };
+}
+
+macro_rules! impl_h_int {
+    ($($H:ident $uH:ident $X:ident),*) => {
+        $(
+            impl HInt for $H {
+                type D = $X;
+
+                fn widen(self) -> Self::D {
+                    self as $X
+                }
+                fn zero_widen(self) -> Self::D {
+                    (self as $uH) as $X
+                }
+                fn widen_hi(self) -> Self::D {
+                    (self as $X) << <$H as Int>::BITS
+                }
+                fn zero_widen_mul(self, rhs: Self) -> Self::D {
+                    self.zero_widen().wrapping_mul(rhs.zero_widen())
+                }
+                fn widen_mul(self, rhs: Self) -> Self::D {
+                    self.widen().wrapping_mul(rhs.widen())
+                }
+            }
+        )*
+    };
+}
+
+impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128);
+impl_h_int!(
+    u8 u8 u16,
+    u16 u16 u32,
+    u32 u32 u64,
+    u64 u64 u128,
+    i8 u8 i16,
+    i16 u16 i32,
+    i32 u32 i64,
+    i64 u64 i128
+);
+
+public_test_dep! {
+/// Trait to express (possibly lossy) casting of integers
+pub(crate) trait CastInto<T: Copy>: Copy {
+    fn cast(self) -> T;
+}
+}
+
+macro_rules! cast_into {
+    ($ty:ty) => {
+        cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128);
+    };
+    ($ty:ty; $($into:ty),*) => {$(
+        impl CastInto<$into> for $ty {
+            fn cast(self) -> $into {
+                self as $into
+            }
+        }
+    )*};
+}
+
+cast_into!(usize);
+cast_into!(isize);
+cast_into!(u8);
+cast_into!(i8);
+cast_into!(u16);
+cast_into!(i16);
+cast_into!(u32);
+cast_into!(i32);
+cast_into!(u64);
+cast_into!(i64);
+cast_into!(u128);
+cast_into!(i128);
diff --git a/vendor/compiler_builtins/src/int/mul.rs b/vendor/compiler_builtins/src/int/mul.rs
new file mode 100644
index 000000000..07ce061c9
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/mul.rs
@@ -0,0 +1,138 @@
+use int::{DInt, HInt, Int};
+
+trait Mul: DInt
+where
+    Self::H: DInt,
+{
+    fn mul(self, rhs: Self) -> Self {
+        // In order to prevent infinite recursion, we cannot use the `widen_mul` in this:
+        //self.lo().widen_mul(rhs.lo())
+        //    .wrapping_add(self.lo().wrapping_mul(rhs.hi()).widen_hi())
+        //    .wrapping_add(self.hi().wrapping_mul(rhs.lo()).widen_hi())
+
+        let lhs_lo = self.lo();
+        let rhs_lo = rhs.lo();
+        // construct the widening multiplication using only `Self::H` sized multiplications
+        let tmp_0 = lhs_lo.lo().zero_widen_mul(rhs_lo.lo());
+        let tmp_1 = lhs_lo.lo().zero_widen_mul(rhs_lo.hi());
+        let tmp_2 = lhs_lo.hi().zero_widen_mul(rhs_lo.lo());
+        let tmp_3 = lhs_lo.hi().zero_widen_mul(rhs_lo.hi());
+        // sum up all widening partials
+        let mul = Self::from_lo_hi(tmp_0, tmp_3)
+            .wrapping_add(tmp_1.zero_widen() << (Self::BITS / 4))
+            .wrapping_add(tmp_2.zero_widen() << (Self::BITS / 4));
+        // add the higher partials
+        mul.wrapping_add(lhs_lo.wrapping_mul(rhs.hi()).widen_hi())
+            .wrapping_add(self.hi().wrapping_mul(rhs_lo).widen_hi())
+    }
+}
+
+impl Mul for u64 {}
+impl Mul for i128 {}
+
+pub(crate) trait UMulo: Int + DInt {
+    fn mulo(self, rhs: Self) -> (Self, bool) {
+        match (self.hi().is_zero(), rhs.hi().is_zero()) {
+            // overflow is guaranteed
+            (false, false) => (self.wrapping_mul(rhs), true),
+            (true, false) => {
+                let mul_lo = self.lo().widen_mul(rhs.lo());
+                let mul_hi = self.lo().widen_mul(rhs.hi());
+                let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi());
+                (mul, o || !mul_hi.hi().is_zero())
+            }
+            (false, true) => {
+                let mul_lo = rhs.lo().widen_mul(self.lo());
+                let mul_hi = rhs.lo().widen_mul(self.hi());
+                let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi());
+                (mul, o || !mul_hi.hi().is_zero())
+            }
+            // overflow is guaranteed to not happen, and use a smaller widening multiplication
+            (true, true) => (self.lo().widen_mul(rhs.lo()), false),
+        }
+    }
+}
+
+impl UMulo for u32 {}
+impl UMulo for u64 {}
+impl UMulo for u128 {}
+
+macro_rules! impl_signed_mulo {
+    ($fn:ident, $iD:ident, $uD:ident) => {
+        fn $fn(lhs: $iD, rhs: $iD) -> ($iD, bool) {
+            let mut lhs = lhs;
+            let mut rhs = rhs;
+            // the test against `mul_neg` below fails without this early return
+            if lhs == 0 || rhs == 0 {
+                return (0, false);
+            }
+
+            let lhs_neg = lhs < 0;
+            let rhs_neg = rhs < 0;
+            if lhs_neg {
+                lhs = lhs.wrapping_neg();
+            }
+            if rhs_neg {
+                rhs = rhs.wrapping_neg();
+            }
+            let mul_neg = lhs_neg != rhs_neg;
+
+            let (mul, o) = (lhs as $uD).mulo(rhs as $uD);
+            let mut mul = mul as $iD;
+
+            if mul_neg {
+                mul = mul.wrapping_neg();
+            }
+            if (mul < 0) != mul_neg {
+                // this one check happens to catch all edge cases related to `$iD::MIN`
+                (mul, true)
+            } else {
+                (mul, o)
+            }
+        }
+    };
+}
+
+impl_signed_mulo!(i32_overflowing_mul, i32, u32);
+impl_signed_mulo!(i64_overflowing_mul, i64, u64);
+impl_signed_mulo!(i128_overflowing_mul, i128, u128);
+
+intrinsics! {
+    #[maybe_use_optimized_c_shim]
+    #[arm_aeabi_alias = __aeabi_lmul]
+    #[cfg(any(not(any(target_arch = "riscv32", target_arch = "riscv64")), target_feature = "m"))]
+    pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 {
+        a.mul(b)
+    }
+
+    pub extern "C" fn __multi3(a: i128, b: i128) -> i128 {
+        a.mul(b)
+    }
+
+    pub extern "C" fn __mulosi4(a: i32, b: i32, oflow: &mut i32) -> i32 {
+        let (mul, o) = i32_overflowing_mul(a, b);
+        *oflow = o as i32;
+        mul
+    }
+
+    pub extern "C" fn __mulodi4(a: i64, b: i64, oflow: &mut i32) -> i64 {
+        let (mul, o) = i64_overflowing_mul(a, b);
+        *oflow = o as i32;
+        mul
+    }
+
+    #[unadjusted_on_win64]
+    pub extern "C" fn __muloti4(a: i128, b: i128, oflow: &mut i32) -> i128 {
+        let (mul, o) = i128_overflowing_mul(a, b);
+        *oflow = o as i32;
+        mul
+    }
+
+    pub extern "C" fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool) {
+        i128_overflowing_mul(a, b)
+    }
+
+    pub extern "C" fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool) {
+        a.mulo(b)
+    }
+}
diff --git a/vendor/compiler_builtins/src/int/sdiv.rs b/vendor/compiler_builtins/src/int/sdiv.rs
new file mode 100644
index 000000000..f1822f0f8
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/sdiv.rs
@@ -0,0 +1,169 @@
+use int::udiv::*;
+
+macro_rules! sdivmod {
+    (
+        $unsigned_fn:ident, // name of the unsigned division function
+        $signed_fn:ident, // name of the signed division function
+        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
+        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
+        $($attr:tt),* // attributes
+    ) => {
+        intrinsics! {
+            #[avr_skip]
+            $(
+                #[$attr]
+            )*
+            /// Returns `n / d` and sets `*rem = n % d`
+            pub extern "C" fn $signed_fn(a: $iX, b: $iX, rem: &mut $iX) -> $iX {
+                let a_neg = a < 0;
+                let b_neg = b < 0;
+                let mut a = a;
+                let mut b = b;
+                if a_neg {
+                    a = a.wrapping_neg();
+                }
+                if b_neg {
+                    b = b.wrapping_neg();
+                }
+                let mut r = *rem as $uX;
+                let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX;
+                let mut r = r as $iX;
+                if a_neg {
+                    r = r.wrapping_neg();
+                }
+                *rem = r;
+                if a_neg != b_neg {
+                    t.wrapping_neg()
+                } else {
+                    t
+                }
+            }
+        }
+    }
+}
+
+macro_rules! sdiv {
+    (
+        $unsigned_fn:ident, // name of the unsigned division function
+        $signed_fn:ident, // name of the signed division function
+        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
+        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
+        $($attr:tt),* // attributes
+    ) => {
+        intrinsics! {
+            #[avr_skip]
+            $(
+                #[$attr]
+            )*
+            /// Returns `n / d`
+            pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
+                let a_neg = a < 0;
+                let b_neg = b < 0;
+                let mut a = a;
+                let mut b = b;
+                if a_neg {
+                    a = a.wrapping_neg();
+                }
+                if b_neg {
+                    b = b.wrapping_neg();
+                }
+                let t = $unsigned_fn(a as $uX, b as $uX) as $iX;
+                if a_neg != b_neg {
+                    t.wrapping_neg()
+                } else {
+                    t
+                }
+            }
+        }
+    }
+}
+
+macro_rules! smod {
+    (
+        $unsigned_fn:ident, // name of the unsigned division function
+        $signed_fn:ident, // name of the signed division function
+        $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
+        $iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
+        $($attr:tt),* // attributes
+    ) => {
+        intrinsics! {
+            #[avr_skip]
+            $(
+                #[$attr]
+            )*
+            /// Returns `n % d`
+            pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
+                let a_neg = a < 0;
+                let b_neg = b < 0;
+                let mut a = a;
+                let mut b = b;
+                if a_neg {
+                    a = a.wrapping_neg();
+                }
+                if b_neg {
+                    b = b.wrapping_neg();
+                }
+                let r = $unsigned_fn(a as $uX, b as $uX) as $iX;
+                if a_neg {
+                    r.wrapping_neg()
+                } else {
+                    r
+                }
+            }
+        }
+    }
+}
+
+sdivmod!(
+    __udivmodsi4,
+    __divmodsi4,
+    u32,
+    i32,
+    maybe_use_optimized_c_shim
+);
+// The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros
+intrinsics! {
+    #[maybe_use_optimized_c_shim]
+    #[arm_aeabi_alias = __aeabi_idiv]
+    /// Returns `n / d`
+    pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 {
+        let a_neg = a < 0;
+        let b_neg = b < 0;
+        let mut a = a;
+        let mut b = b;
+        if a_neg {
+            a = a.wrapping_neg();
+        }
+        if b_neg {
+            b = b.wrapping_neg();
+        }
+        let t = __udivsi3(a as u32, b as u32) as i32;
+        if a_neg != b_neg {
+            t.wrapping_neg()
+        } else {
+            t
+        }
+    }
+}
+smod!(__umodsi3, __modsi3, u32, i32, maybe_use_optimized_c_shim);
+
+sdivmod!(
+    __udivmoddi4,
+    __divmoddi4,
+    u64,
+    i64,
+    maybe_use_optimized_c_shim
+);
+sdiv!(__udivdi3, __divdi3, u64, i64, maybe_use_optimized_c_shim);
+smod!(__umoddi3, __moddi3, u64, i64, maybe_use_optimized_c_shim);
+
+// LLVM does not currently have a `__divmodti4` function, but GCC does
+sdivmod!(
+    __udivmodti4,
+    __divmodti4,
+    u128,
+    i128,
+    maybe_use_optimized_c_shim
+);
+sdiv!(__udivti3, __divti3, u128, i128, win64_128bit_abi_hack);
+smod!(__umodti3, __modti3, u128, i128, win64_128bit_abi_hack);
diff --git a/vendor/compiler_builtins/src/int/shift.rs b/vendor/compiler_builtins/src/int/shift.rs
new file mode 100644
index 000000000..908e619e1
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/shift.rs
@@ -0,0 +1,116 @@
+use int::{DInt, HInt, Int};
+
+trait Ashl: DInt {
+    /// Returns `a << b`, requires `b < Self::BITS`
+    fn ashl(self, shl: u32) -> Self {
+        let n_h = Self::H::BITS;
+        if shl & n_h != 0 {
+            // we only need `self.lo()` because `self.hi()` will be shifted out entirely
+            self.lo().wrapping_shl(shl - n_h).widen_hi()
+        } else if shl == 0 {
+            self
+        } else {
+            Self::from_lo_hi(
+                self.lo().wrapping_shl(shl),
+                self.lo().logical_shr(n_h - shl) | self.hi().wrapping_shl(shl),
+            )
+        }
+    }
+}
+
+impl Ashl for u32 {}
+impl Ashl for u64 {}
+impl Ashl for u128 {}
+
+trait Ashr: DInt {
+    /// Returns arithmetic `a >> b`, requires `b < Self::BITS`
+    fn ashr(self, shr: u32) -> Self {
+        let n_h = Self::H::BITS;
+        if shr & n_h != 0 {
+            Self::from_lo_hi(
+                self.hi().wrapping_shr(shr - n_h),
+                // smear the sign bit
+                self.hi().wrapping_shr(n_h - 1),
+            )
+        } else if shr == 0 {
+            self
+        } else {
+            Self::from_lo_hi(
+                self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr),
+                self.hi().wrapping_shr(shr),
+            )
+        }
+    }
+}
+
+impl Ashr for i32 {}
+impl Ashr for i64 {}
+impl Ashr for i128 {}
+
+trait Lshr: DInt {
+    /// Returns logical `a >> b`, requires `b < Self::BITS`
+    fn lshr(self, shr: u32) -> Self {
+        let n_h = Self::H::BITS;
+        if shr & n_h != 0 {
+            self.hi().logical_shr(shr - n_h).zero_widen()
+        } else if shr == 0 {
+            self
+        } else {
+            Self::from_lo_hi(
+                self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h - shr),
+                self.hi().logical_shr(shr),
+            )
+        }
+    }
+}
+
+impl Lshr for u32 {}
+impl Lshr for u64 {}
+impl Lshr for u128 {}
+
+intrinsics! {
+    #[maybe_use_optimized_c_shim]
+    pub extern "C" fn __ashlsi3(a: u32, b: u32) -> u32 {
+        a.ashl(b)
+    }
+
+    #[maybe_use_optimized_c_shim]
+    #[arm_aeabi_alias = __aeabi_llsl]
+    pub extern "C" fn __ashldi3(a: u64, b: u32) -> u64 {
+        a.ashl(b)
+    }
+
+    pub extern "C" fn __ashlti3(a: u128, b: u32) -> u128 {
+        a.ashl(b)
+    }
+
+    #[maybe_use_optimized_c_shim]
+    pub extern "C" fn __ashrsi3(a: i32, b: u32) -> i32 {
+        a.ashr(b)
+    }
+
+    #[maybe_use_optimized_c_shim]
+    #[arm_aeabi_alias = __aeabi_lasr]
+    pub extern "C" fn __ashrdi3(a: i64, b: u32) -> i64 {
+        a.ashr(b)
+    }
+
+    pub extern "C" fn __ashrti3(a: i128, b: u32) -> i128 {
+        a.ashr(b)
+    }
+
+    #[maybe_use_optimized_c_shim]
+    pub extern "C" fn __lshrsi3(a: u32, b: u32) -> u32 {
+        a.lshr(b)
+    }
+
+    #[maybe_use_optimized_c_shim]
+    #[arm_aeabi_alias = __aeabi_llsr]
+    pub extern "C" fn __lshrdi3(a: u64, b: u32) -> u64 {
+        a.lshr(b)
+    }
+
+    pub extern "C" fn __lshrti3(a: u128, b: u32) -> u128 {
+        a.lshr(b)
+    }
+}
diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/asymmetric.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/asymmetric.rs
new file mode 100644
index 000000000..56ce188a3
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/specialized_div_rem/asymmetric.rs
@@ -0,0 +1,69 @@
+/// Creates an unsigned division function optimized for dividing integers with the same
+/// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an
+/// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits
+/// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to
+/// construct a full 128 bit by 128 bit division.
+#[allow(unused_macros)]
+macro_rules! impl_asymmetric {
+    (
+        $fn:ident, // name of the unsigned division function
+        $zero_div_fn:ident, // function called when division by zero is attempted
+        $half_division:ident, // function for division of a $uX by a $uX
+        $asymmetric_division:ident, // function for division of a $uD by a $uX
+        $n_h:expr, // the number of bits in a $iH or $uH
+        $uH:ident, // unsigned integer with half the bit width of $uX
+        $uX:ident, // unsigned integer with half the bit width of $uD
+        $uD:ident // unsigned integer type for the inputs and outputs of `$fn`
+    ) => {
+        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
+        /// tuple.
+        pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
+            let n: u32 = $n_h * 2;
+
+            let duo_lo = duo as $uX;
+            let duo_hi = (duo >> n) as $uX;
+            let div_lo = div as $uX;
+            let div_hi = (div >> n) as $uX;
+            if div_hi == 0 {
+                if div_lo == 0 {
+                    $zero_div_fn()
+                }
+                if duo_hi < div_lo {
+                    // `$uD` by `$uX` division with a quotient that will fit into a `$uX`
+                    let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) };
+                    return (quo as $uD, rem as $uD);
+                } else {
+                    // Short division using the $uD by $uX division
+                    let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo);
+                    let tmp = unsafe {
+                        $asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo)
+                    };
+                    return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD);
+                }
+            }
+
+            // This has been adapted from
+            // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn
+            // adapted from Hacker's Delight. This is similar to the two possibility algorithm
+            // in that it uses only more significant parts of `duo` and `div` to divide a large
+            // integer with a smaller division instruction.
+            let div_lz = div_hi.leading_zeros();
+            let div_extra = n - div_lz;
+            let div_sig_n = (div >> div_extra) as $uX;
+            let tmp = unsafe { $asymmetric_division(duo >> 1, div_sig_n) };
+
+            let mut quo = tmp.0 >> ((n - 1) - div_lz);
+            if quo != 0 {
+                quo -= 1;
+            }
+
+            // Note that this is a full `$uD` multiplication being used here
+            let mut rem = duo - (quo as $uD).wrapping_mul(div);
+            if div <= rem {
+                quo += 1;
+                rem -= div;
+            }
+            return (quo as $uD, rem);
+        }
+    };
+}
diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/binary_long.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/binary_long.rs
new file mode 100644
index 000000000..0d7822882
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/specialized_div_rem/binary_long.rs
@@ -0,0 +1,548 @@
+/// Creates an unsigned division function that uses binary long division, designed for
+/// computer architectures without division instructions. These functions have good performance for
+/// microarchitectures with large branch miss penalties and architectures without the ability to
+/// predicate instructions. For architectures with predicated instructions, one of the algorithms
+/// described in the documentation of these functions probably has higher performance, and a custom
+/// assembly routine should be used instead.
+#[allow(unused_macros)]
+macro_rules! impl_binary_long {
+    (
+        $fn:ident, // name of the unsigned division function
+        $zero_div_fn:ident, // function called when division by zero is attempted
+        $normalization_shift:ident, // function for finding the normalization shift
+        $n:tt, // the number of bits in a $iX or $uX
+        $uX:ident, // unsigned integer type for the inputs and outputs of `$fn`
+        $iX:ident // signed integer type with same bitwidth as `$uX`
+    ) => {
+        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
+        /// tuple.
+        pub fn $fn(duo: $uX, div: $uX) -> ($uX, $uX) {
+            let mut duo = duo;
+            // handle edge cases before calling `$normalization_shift`
+            if div == 0 {
+                $zero_div_fn()
+            }
+            if duo < div {
+                return (0, duo);
+            }
+
+            // There are many variations of binary division algorithm that could be used. This
+            // documentation gives a tour of different methods so that future readers wanting to
+            // optimize further do not have to painstakingly derive them. The SWAR variation is
+            // especially hard to understand without reading the less convoluted methods first.
+
+            // You may notice that a `duo < div_original` check is included in many these
+            // algorithms. A critical optimization that many algorithms miss is handling of
+            // quotients that will turn out to have many trailing zeros or many leading zeros. This
+            // happens in cases of exact or close-to-exact divisions, divisions by power of two, and
+            // in cases where the quotient is small. The `duo < div_original` check handles these
+            // cases of early returns and ends up replacing other kinds of mundane checks that
+            // normally terminate a binary division algorithm.
+            //
+            // Something you may see in other algorithms that is not special-cased here is checks
+            // for division by powers of two. The `duo < div_original` check handles this case and
+            // more, however it can be checked up front before the bisection using the
+            // `((div > 0) && ((div & (div - 1)) == 0))` trick. This is not special-cased because
+            // compilers should handle most cases where divisions by power of two occur, and we do
+            // not want to add on a few cycles for every division operation just to save a few
+            // cycles rarely.
+
+            // The following example is the most straightforward translation from the way binary
+            // long division is typically visualized:
+            // Dividing 178u8 (0b10110010) by 6u8 (0b110). `div` is shifted left by 5, according to
+            // the result from `$normalization_shift(duo, div, false)`.
+            //
+            // Step 0: `sub` is negative, so there is not full normalization, so no `quo` bit is set
+            // and `duo` is kept unchanged.
+            // duo:10110010, div_shifted:11000000, sub:11110010, quo:00000000, shl:5
+            //
+            // Step 1: `sub` is positive, set a `quo` bit and update `duo` for next step.
+            // duo:10110010, div_shifted:01100000, sub:01010010, quo:00010000, shl:4
+            //
+            // Step 2: Continue based on `sub`. The `quo` bits start accumulating.
+            // duo:01010010, div_shifted:00110000, sub:00100010, quo:00011000, shl:3
+            // duo:00100010, div_shifted:00011000, sub:00001010, quo:00011100, shl:2
+            // duo:00001010, div_shifted:00001100, sub:11111110, quo:00011100, shl:1
+            // duo:00001010, div_shifted:00000110, sub:00000100, quo:00011100, shl:0
+            // The `duo < div_original` check terminates the algorithm with the correct quotient of
+            // 29u8 and remainder of 4u8
+            /*
+            let div_original = div;
+            let mut shl = $normalization_shift(duo, div, false);
+            let mut quo = 0;
+            loop {
+                let div_shifted = div << shl;
+                let sub = duo.wrapping_sub(div_shifted);
+                // it is recommended to use `println!`s like this if functionality is unclear
+                /*
+                println!("duo:{:08b}, div_shifted:{:08b}, sub:{:08b}, quo:{:08b}, shl:{}",
+                    duo,
+                    div_shifted,
+                    sub,
+                    quo,
+                    shl
+                );
+                */
+                if 0 <= (sub as $iX) {
+                    duo = sub;
+                    quo += 1 << shl;
+                    if duo < div_original {
+                        // this branch is optional
+                        return (quo, duo)
+                    }
+                }
+                if shl == 0 {
+                    return (quo, duo)
+                }
+                shl -= 1;
+            }
+            */
+
+            // This restoring binary long division algorithm reduces the number of operations
+            // overall via:
+            // - `pow` can be shifted right instead of recalculating from `shl`
+            // - starting `div` shifted left and shifting it right for each step instead of
+            //   recalculating from `shl`
+            // - The `duo < div_original` branch is used to terminate the algorithm instead of the
+            //   `shl == 0` branch. This check is strong enough to prevent set bits of `pow` and
+            //   `div` from being shifted off the end. This check also only occurs on half of steps
+            //   on average, since it is behind the `(sub as $iX) >= 0` branch.
+            // - `shl` is now not needed by any aspect of of the loop and thus only 3 variables are
+            //   being updated between steps
+            //
+            // There are many variations of this algorithm, but this encompases the largest number
+            // of architectures and does not rely on carry flags, add-with-carry, or SWAR
+            // complications to be decently fast.
+            /*
+            let div_original = div;
+            let shl = $normalization_shift(duo, div, false);
+            let mut div: $uX = div << shl;
+            let mut pow: $uX = 1 << shl;
+            let mut quo: $uX = 0;
+            loop {
+                let sub = duo.wrapping_sub(div);
+                if 0 <= (sub as $iX) {
+                    duo = sub;
+                    quo |= pow;
+                    if duo < div_original {
+                        return (quo, duo)
+                    }
+                }
+                div >>= 1;
+                pow >>= 1;
+            }
+            */
+
+            // If the architecture has flags and predicated arithmetic instructions, it is possible
+            // to do binary long division without branching and in only 3 or 4 instructions. This is
+            // a variation of a 3 instruction central loop from
+            // http://www.chiark.greenend.org.uk/~theom/riscos/docs/ultimate/a252div.txt.
+            //
+            // What allows doing division in only 3 instructions is realizing that instead of
+            // keeping `duo` in place and shifting `div` right to align bits, `div` can be kept in
+            // place and `duo` can be shifted left. This means `div` does not have to be updated,
+            // but causes edge case problems and makes `duo < div_original` tests harder. Some
+            // architectures have an option to shift an argument in an arithmetic operation, which
+            // means `duo` can be shifted left and subtracted from in one instruction. The other two
+            // instructions are updating `quo` and undoing the subtraction if it turns out things
+            // were not normalized.
+
+            /*
+            // Perform one binary long division step on the already normalized arguments, because
+            // the main. Note that this does a full normalization since the central loop needs
+            // `duo.leading_zeros()` to be at least 1 more than `div.leading_zeros()`. The original
+            // variation only did normalization to the nearest 4 steps, but this makes handling edge
+            // cases much harder. We do a full normalization and perform a binary long division
+            // step. In the edge case where the msbs of `duo` and `div` are set, it clears the msb
+            // of `duo`, then the edge case handler shifts `div` right and does another long
+            // division step to always insure `duo.leading_zeros() + 1 >= div.leading_zeros()`.
+            let div_original = div;
+            let mut shl = $normalization_shift(duo, div, true);
+            let mut div: $uX = (div << shl);
+            let mut quo: $uX = 1;
+            duo = duo.wrapping_sub(div);
+            if duo < div_original {
+                return (1 << shl, duo);
+            }
+            let div_neg: $uX;
+            if (div as $iX) < 0 {
+                // A very ugly edge case where the most significant bit of `div` is set (after
+                // shifting to match `duo` when its most significant bit is at the sign bit), which
+                // leads to the sign bit of `div_neg` being cut off and carries not happening when
+                // they should. This branch performs a long division step that keeps `duo` in place
+                // and shifts `div` down.
+                div >>= 1;
+                div_neg = div.wrapping_neg();
+                let (sub, carry) = duo.overflowing_add(div_neg);
+                duo = sub;
+                quo = quo.wrapping_add(quo).wrapping_add(carry as $uX);
+                if !carry {
+                    duo = duo.wrapping_add(div);
+                }
+                shl -= 1;
+            } else {
+                div_neg = div.wrapping_neg();
+            }
+            // The add-with-carry that updates `quo` needs to have the carry set when a normalized
+            // subtract happens. Using `duo.wrapping_shl(1).overflowing_sub(div)` to do the
+            // subtraction generates a carry when an unnormalized subtract happens, which is the
+            // opposite of what we want. Instead, we use
+            // `duo.wrapping_shl(1).overflowing_add(div_neg)`, where `div_neg` is negative `div`.
+            let mut i = shl;
+            loop {
+                if i == 0 {
+                    break;
+                }
+                i -= 1;
+                // `ADDS duo, div, duo, LSL #1`
+                // (add `div` to `duo << 1` and set flags)
+                let (sub, carry) = duo.wrapping_shl(1).overflowing_add(div_neg);
+                duo = sub;
+                // `ADC quo, quo, quo`
+                // (add with carry). Effectively shifts `quo` left by 1 and sets the least
+                // significant bit to the carry.
+                quo = quo.wrapping_add(quo).wrapping_add(carry as $uX);
+                // `ADDCC duo, duo, div`
+                // (add if carry clear). Undoes the subtraction if no carry was generated.
+                if !carry {
+                    duo = duo.wrapping_add(div);
+                }
+            }
+            return (quo, duo >> shl);
+            */
+
+            // This is the SWAR (SIMD within in a register) restoring division algorithm.
+            // This combines several ideas of the above algorithms:
+            //  - If `duo` is shifted left instead of shifting `div` right like in the 3 instruction
+            //    restoring division algorithm, some architectures can do the shifting and
+            //    subtraction step in one instruction.
+            //  - `quo` can be constructed by adding powers-of-two to it or shifting it left by one
+            //    and adding one.
+            //  - Every time `duo` is shifted left, there is another unused 0 bit shifted into the
+            //    LSB, so what if we use those bits to store `quo`?
+            // Through a complex setup, it is possible to manage `duo` and `quo` in the same
+            // register, and perform one step with 2 or 3 instructions. The only major downsides are
+            // that there is significant setup (it is only saves instructions if `shl` is
+            // approximately more than 4), `duo < div_original` checks are impractical once SWAR is
+            // initiated, and the number of division steps taken has to be exact (we cannot do more
+            // division steps than `shl`, because it introduces edge cases where quotient bits in
+            // `duo` start to collide with the real part of `div`.
+            /*
+            // first step. The quotient bit is stored in `quo` for now
+            let div_original = div;
+            let mut shl = $normalization_shift(duo, div, true);
+            let mut div: $uX = (div << shl);
+            duo = duo.wrapping_sub(div);
+            let mut quo: $uX = 1 << shl;
+            if duo < div_original {
+                return (quo, duo);
+            }
+
+            let mask: $uX;
+            if (div as $iX) < 0 {
+                // deal with same edge case as the 3 instruction restoring division algorithm, but
+                // the quotient bit from this step also has to be stored in `quo`
+                div >>= 1;
+                shl -= 1;
+                let tmp = 1 << shl;
+                mask = tmp - 1;
+                let sub = duo.wrapping_sub(div);
+                if (sub as $iX) >= 0 {
+                    // restore
+                    duo = sub;
+                    quo |= tmp;
+                }
+                if duo < div_original {
+                    return (quo, duo);
+                }
+            } else {
+                mask = quo - 1;
+            }
+            // There is now room for quotient bits in `duo`.
+
+            // Note that `div` is already shifted left and has `shl` unset bits. We subtract 1 from
+            // `div` and end up with the subset of `shl` bits being all being set. This subset acts
+            // just like a two's complement negative one. The subset of `div` containing the divisor
+            // had 1 subtracted from it, but a carry will always be generated from the `shl` subset
+            // as long as the quotient stays positive.
+            //
+            // When the modified `div` is subtracted from `duo.wrapping_shl(1)`, the `shl` subset
+            // adds a quotient bit to the least significant bit.
+            // For example, 89 (0b01011001) divided by 3 (0b11):
+            //
+            // shl:4, div:0b00110000
+            // first step:
+            //       duo:0b01011001
+            // + div_neg:0b11010000
+            // ____________________
+            //           0b00101001
+            // quo is set to 0b00010000 and mask is set to 0b00001111 for later
+            //
+            // 1 is subtracted from `div`. I will differentiate the `shl` part of `div` and the
+            // quotient part of `duo` with `^`s.
+            // chars.
+            //     div:0b00110000
+            //               ^^^^
+            //   +     0b11111111
+            //   ________________
+            //         0b00101111
+            //               ^^^^
+            // div_neg:0b11010001
+            //
+            // first SWAR step:
+            //  duo_shl1:0b01010010
+            //                    ^
+            // + div_neg:0b11010001
+            // ____________________
+            //           0b00100011
+            //                    ^
+            // second:
+            //  duo_shl1:0b01000110
+            //                   ^^
+            // + div_neg:0b11010001
+            // ____________________
+            //           0b00010111
+            //                   ^^
+            // third:
+            //  duo_shl1:0b00101110
+            //                  ^^^
+            // + div_neg:0b11010001
+            // ____________________
+            //           0b11111111
+            //                  ^^^
+            // 3 steps resulted in the quotient with 3 set bits as expected, but currently the real
+            // part of `duo` is negative and the third step was an unnormalized step. The restore
+            // branch then restores `duo`. Note that the restore branch does not shift `duo` left.
+            //
+            //   duo:0b11111111
+            //              ^^^
+            // + div:0b00101111
+            //             ^^^^
+            // ________________
+            //       0b00101110
+            //              ^^^
+            // `duo` is now back in the `duo_shl1` state it was at in the the third step, with an
+            // unset quotient bit.
+            //
+            // final step (`shl` was 4, so exactly 4 steps must be taken)
+            //  duo_shl1:0b01011100
+            //                 ^^^^
+            // + div_neg:0b11010001
+            // ____________________
+            //           0b00101101
+            //                 ^^^^
+            // The quotient includes the `^` bits added with the `quo` bits from the beginning that
+            // contained the first step and potential edge case step,
+            // `quo:0b00010000 + (duo:0b00101101 & mask:0b00001111) == 0b00011101 == 29u8`.
+            // The remainder is the bits remaining in `duo` that are not part of the quotient bits,
+            // `duo:0b00101101 >> shl == 0b0010 == 2u8`.
+            let div: $uX = div.wrapping_sub(1);
+            let mut i = shl;
+            loop {
+                if i == 0 {
+                    break;
+                }
+                i -= 1;
+                duo = duo.wrapping_shl(1).wrapping_sub(div);
+                if (duo as $iX) < 0 {
+                    // restore
+                    duo = duo.wrapping_add(div);
+                }
+            }
+            // unpack the results of SWAR
+            return ((duo & mask) | quo, duo >> shl);
+            */
+
+            // The problem with the conditional restoring SWAR algorithm above is that, in practice,
+            // it requires assembly code to bring out its full unrolled potential (It seems that
+            // LLVM can't use unrolled conditionals optimally and ends up erasing all the benefit
+            // that my algorithm intends. On architectures without predicated instructions, the code
+            // gen is especially bad. We need a default software division algorithm that is
+            // guaranteed to get decent code gen for the central loop.
+
+            // For non-SWAR algorithms, there is a way to do binary long division without
+            // predication or even branching. This involves creating a mask from the sign bit and
+            // performing different kinds of steps using that.
+            /*
+            let shl = $normalization_shift(duo, div, true);
+            let mut div: $uX = div << shl;
+            let mut pow: $uX = 1 << shl;
+            let mut quo: $uX = 0;
+            loop {
+                let sub = duo.wrapping_sub(div);
+                let sign_mask = !((sub as $iX).wrapping_shr($n - 1) as $uX);
+                duo -= div & sign_mask;
+                quo |= pow & sign_mask;
+                div >>= 1;
+                pow >>= 1;
+                if pow == 0 {
+                    break;
+                }
+            }
+            return (quo, duo);
+            */
+            // However, it requires about 4 extra operations (smearing the sign bit, negating the
+            // mask, and applying the mask twice) on top of the operations done by the actual
+            // algorithm. With SWAR however, just 2 extra operations are needed, making it
+            // practical and even the most optimal algorithm for some architectures.
+
+            // What we do is use custom assembly for predicated architectures that need software
+            // division, and for the default algorithm use a mask based restoring SWAR algorithm
+            // without conditionals or branches. On almost all architectures, this Rust code is
+            // guaranteed to compile down to 5 assembly instructions or less for each step, and LLVM
+            // will unroll it in a decent way.
+
+            // standard opening for SWAR algorithm with first step and edge case handling
+            let div_original = div;
+            let mut shl = $normalization_shift(duo, div, true);
+            let mut div: $uX = (div << shl);
+            duo = duo.wrapping_sub(div);
+            let mut quo: $uX = 1 << shl;
+            if duo < div_original {
+                return (quo, duo);
+            }
+            let mask: $uX;
+            if (div as $iX) < 0 {
+                div >>= 1;
+                shl -= 1;
+                let tmp = 1 << shl;
+                mask = tmp - 1;
+                let sub = duo.wrapping_sub(div);
+                if (sub as $iX) >= 0 {
+                    duo = sub;
+                    quo |= tmp;
+                }
+                if duo < div_original {
+                    return (quo, duo);
+                }
+            } else {
+                mask = quo - 1;
+            }
+
+            // central loop
+            div = div.wrapping_sub(1);
+            let mut i = shl;
+            loop {
+                if i == 0 {
+                    break;
+                }
+                i -= 1;
+                // shift left 1 and subtract
+                duo = duo.wrapping_shl(1).wrapping_sub(div);
+                // create mask
+                let mask = (duo as $iX).wrapping_shr($n - 1) as $uX;
+                // restore
+                duo = duo.wrapping_add(div & mask);
+            }
+            // unpack
+            return ((duo & mask) | quo, duo >> shl);
+
+            // miscellanious binary long division algorithms that might be better for specific
+            // architectures
+
+            // Another kind of long division uses an interesting fact that `div` and `pow` can be
+            // negated when `duo` is negative to perform a "negated" division step that works in
+            // place of any normalization mechanism. This is a non-restoring division algorithm that
+            // is very similar to the non-restoring division algorithms that can be found on the
+            // internet, except there is only one test for `duo < 0`. The subtraction from `quo` can
+            // be viewed as shifting the least significant set bit right (e.x. if we enter a series
+            // of negated binary long division steps starting with `quo == 0b1011_0000` and
+            // `pow == 0b0000_1000`, `quo` will progress like this: 0b1010_1000, 0b1010_0100,
+            // 0b1010_0010, 0b1010_0001).
+            /*
+            let div_original = div;
+            let shl = $normalization_shift(duo, div, true);
+            let mut div: $uX = (div << shl);
+            let mut pow: $uX = 1 << shl;
+            let mut quo: $uX = pow;
+            duo = duo.wrapping_sub(div);
+            if duo < div_original {
+                return (quo, duo);
+            }
+            div >>= 1;
+            pow >>= 1;
+            loop {
+                if (duo as $iX) < 0 {
+                    // Negated binary long division step.
+                    duo = duo.wrapping_add(div);
+                    quo = quo.wrapping_sub(pow);
+                } else {
+                    // Normal long division step.
+                    if duo < div_original {
+                        return (quo, duo)
+                    }
+                    duo = duo.wrapping_sub(div);
+                    quo = quo.wrapping_add(pow);
+                }
+                pow >>= 1;
+                div >>= 1;
+            }
+            */
+
+            // This is the Nonrestoring SWAR algorithm, combining the nonrestoring algorithm with
+            // SWAR techniques that makes the only difference between steps be negation of `div`.
+            // If there was an architecture with an instruction that negated inputs to an adder
+            // based on conditionals, and in place shifting (or a three input addition operation
+            // that can have `duo` as two of the inputs to effectively shift it left by 1), then a
+            // single instruction central loop is possible. Microarchitectures often have inputs to
+            // their ALU that can invert the arguments and carry in of adders, but the architectures
+            // unfortunately do not have an instruction to dynamically invert this input based on
+            // conditionals.
+            /*
+            // SWAR opening
+            let div_original = div;
+            let mut shl = $normalization_shift(duo, div, true);
+            let mut div: $uX = (div << shl);
+            duo = duo.wrapping_sub(div);
+            let mut quo: $uX = 1 << shl;
+            if duo < div_original {
+                return (quo, duo);
+            }
+            let mask: $uX;
+            if (div as $iX) < 0 {
+                div >>= 1;
+                shl -= 1;
+                let tmp = 1 << shl;
+                let sub = duo.wrapping_sub(div);
+                if (sub as $iX) >= 0 {
+                    // restore
+                    duo = sub;
+                    quo |= tmp;
+                }
+                if duo < div_original {
+                    return (quo, duo);
+                }
+                mask = tmp - 1;
+            } else {
+                mask = quo - 1;
+            }
+
+            // central loop
+            let div: $uX = div.wrapping_sub(1);
+            let mut i = shl;
+            loop {
+                if i == 0 {
+                    break;
+                }
+                i -= 1;
+                // note: the `wrapping_shl(1)` can be factored out, but would require another
+                // restoring division step to prevent `(duo as $iX)` from overflowing
+                if (duo as $iX) < 0 {
+                    // Negated binary long division step.
+                    duo = duo.wrapping_shl(1).wrapping_add(div);
+                } else {
+                    // Normal long division step.
+                    duo = duo.wrapping_shl(1).wrapping_sub(div);
+                }
+            }
+            if (duo as $iX) < 0 {
+                // Restore. This was not needed in the original nonrestoring algorithm because of
+                // the `duo < div_original` checks.
+                duo = duo.wrapping_add(div);
+            }
+            // unpack
+            return ((duo & mask) | quo, duo >> shl);
+            */
+        }
+    };
+}
diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/delegate.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/delegate.rs
new file mode 100644
index 000000000..330c6e4f8
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/specialized_div_rem/delegate.rs
@@ -0,0 +1,319 @@
+/// Creates an unsigned division function that uses a combination of hardware division and
+/// binary long division to divide integers larger than what hardware division by itself can do. This
+/// function is intended for microarchitectures that have division hardware, but not fast enough
+/// multiplication hardware for `impl_trifecta` to be faster.
+#[allow(unused_macros)]
+macro_rules! impl_delegate {
+    (
+        $fn:ident, // name of the unsigned division function
+        $zero_div_fn:ident, // function called when division by zero is attempted
+        $half_normalization_shift:ident, // function for finding the normalization shift of $uX
+        $half_division:ident, // function for division of a $uX by a $uX
+        $n_h:expr, // the number of bits in $iH or $uH
+        $uH:ident, // unsigned integer with half the bit width of $uX
+        $uX:ident, // unsigned integer with half the bit width of $uD.
+        $uD:ident, // unsigned integer type for the inputs and outputs of `$fn`
+        $iD:ident // signed integer type with the same bitwidth as `$uD`
+    ) => {
+        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
+        /// tuple.
+        pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
+            // The two possibility algorithm, undersubtracting long division algorithm, or any kind
+            // of reciprocal based algorithm will not be fastest, because they involve large
+            // multiplications that we assume to not be fast enough relative to the divisions to
+            // outweigh setup times.
+
+            // the number of bits in a $uX
+            let n = $n_h * 2;
+
+            let duo_lo = duo as $uX;
+            let duo_hi = (duo >> n) as $uX;
+            let div_lo = div as $uX;
+            let div_hi = (div >> n) as $uX;
+
+            match (div_lo == 0, div_hi == 0, duo_hi == 0) {
+                (true, true, _) => $zero_div_fn(),
+                (_, false, true) => {
+                    // `duo` < `div`
+                    return (0, duo);
+                }
+                (false, true, true) => {
+                    // delegate to smaller division
+                    let tmp = $half_division(duo_lo, div_lo);
+                    return (tmp.0 as $uD, tmp.1 as $uD);
+                }
+                (false, true, false) => {
+                    if duo_hi < div_lo {
+                        // `quo_hi` will always be 0. This performs a binary long division algorithm
+                        // to zero `duo_hi` followed by a half division.
+
+                        // We can calculate the normalization shift using only `$uX` size functions.
+                        // If we calculated the normalization shift using
+                        // `$half_normalization_shift(duo_hi, div_lo false)`, it would break the
+                        // assumption the function has that the first argument is more than the
+                        // second argument. If the arguments are switched, the assumption holds true
+                        // since `duo_hi < div_lo`.
+                        let norm_shift = $half_normalization_shift(div_lo, duo_hi, false);
+                        let shl = if norm_shift == 0 {
+                            // Consider what happens if the msbs of `duo_hi` and `div_lo` align with
+                            // no shifting. The normalization shift will always return
+                            // `norm_shift == 0` regardless of whether it is fully normalized,
+                            // because `duo_hi < div_lo`. In that edge case, `n - norm_shift` would
+                            // result in shift overflow down the line. For the edge case, because
+                            // both `duo_hi < div_lo` and we are comparing all the significant bits
+                            // of `duo_hi` and `div`, we can make `shl = n - 1`.
+                            n - 1
+                        } else {
+                            // We also cannot just use `shl = n - norm_shift - 1` in the general
+                            // case, because when we are not in the edge case comparing all the
+                            // significant bits, then the full `duo < div` may not be true and thus
+                            // breaks the division algorithm.
+                            n - norm_shift
+                        };
+
+                        // The 3 variable restoring division algorithm (see binary_long.rs) is ideal
+                        // for this task, since `pow` and `quo` can be `$uX` and the delegation
+                        // check is simple.
+                        let mut div: $uD = div << shl;
+                        let mut pow_lo: $uX = 1 << shl;
+                        let mut quo_lo: $uX = 0;
+                        let mut duo = duo;
+                        loop {
+                            let sub = duo.wrapping_sub(div);
+                            if 0 <= (sub as $iD) {
+                                duo = sub;
+                                quo_lo |= pow_lo;
+                                let duo_hi = (duo >> n) as $uX;
+                                if duo_hi == 0 {
+                                    // Delegate to get the rest of the quotient. Note that the
+                                    // `div_lo` here is the original unshifted `div`.
+                                    let tmp = $half_division(duo as $uX, div_lo);
+                                    return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD);
+                                }
+                            }
+                            div >>= 1;
+                            pow_lo >>= 1;
+                        }
+                    } else if duo_hi == div_lo {
+                        // `quo_hi == 1`. This branch is cheap and helps with edge cases.
+                        let tmp = $half_division(duo as $uX, div as $uX);
+                        return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD);
+                    } else {
+                        // `div_lo < duo_hi`
+                        // `rem_hi == 0`
+                        if (div_lo >> $n_h) == 0 {
+                            // Short division of $uD by a $uH, using $uX by $uX division
+                            let div_0 = div_lo as $uH as $uX;
+                            let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
+
+                            let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h);
+                            let (quo_1, rem_2) = $half_division(duo_mid, div_0);
+
+                            let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h);
+                            let (quo_0, rem_1) = $half_division(duo_lo, div_0);
+
+                            return (
+                                (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n),
+                                rem_1 as $uD,
+                            );
+                        }
+
+                        // This is basically a short division composed of a half division for the hi
+                        // part, specialized 3 variable binary long division in the middle, and
+                        // another half division for the lo part.
+                        let duo_lo = duo as $uX;
+                        let tmp = $half_division(duo_hi, div_lo);
+                        let quo_hi = tmp.0;
+                        let mut duo = (duo_lo as $uD) | ((tmp.1 as $uD) << n);
+                        // This check is required to avoid breaking the long division below.
+                        if duo < div {
+                            return ((quo_hi as $uD) << n, duo);
+                        }
+
+                        // The half division handled all shift alignments down to `n`, so this
+                        // division can continue with a shift of `n - 1`.
+                        let mut div: $uD = div << (n - 1);
+                        let mut pow_lo: $uX = 1 << (n - 1);
+                        let mut quo_lo: $uX = 0;
+                        loop {
+                            let sub = duo.wrapping_sub(div);
+                            if 0 <= (sub as $iD) {
+                                duo = sub;
+                                quo_lo |= pow_lo;
+                                let duo_hi = (duo >> n) as $uX;
+                                if duo_hi == 0 {
+                                    // Delegate to get the rest of the quotient. Note that the
+                                    // `div_lo` here is the original unshifted `div`.
+                                    let tmp = $half_division(duo as $uX, div_lo);
+                                    return (
+                                        (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n),
+                                        tmp.1 as $uD,
+                                    );
+                                }
+                            }
+                            div >>= 1;
+                            pow_lo >>= 1;
+                        }
+                    }
+                }
+                (_, false, false) => {
+                    // Full $uD by $uD binary long division. `quo_hi` will always be 0.
+                    if duo < div {
+                        return (0, duo);
+                    }
+                    let div_original = div;
+                    let shl = $half_normalization_shift(duo_hi, div_hi, false);
+                    let mut duo = duo;
+                    let mut div: $uD = div << shl;
+                    let mut pow_lo: $uX = 1 << shl;
+                    let mut quo_lo: $uX = 0;
+                    loop {
+                        let sub = duo.wrapping_sub(div);
+                        if 0 <= (sub as $iD) {
+                            duo = sub;
+                            quo_lo |= pow_lo;
+                            if duo < div_original {
+                                return (quo_lo as $uD, duo);
+                            }
+                        }
+                        div >>= 1;
+                        pow_lo >>= 1;
+                    }
+                }
+            }
+        }
+    };
+}
+
+public_test_dep! {
+/// Returns `n / d` and sets `*rem = n % d`.
+///
+/// This specialization exists because:
+///  - The LLVM backend for 32-bit SPARC cannot compile functions that return `(u128, u128)`,
+///    so we have to use an old fashioned `&mut u128` argument to return the remainder.
+///  - 64-bit SPARC does not have u64 * u64 => u128 widening multiplication, which makes the
+///    delegate algorithm strategy the only reasonably fast way to perform `u128` division.
+// used on SPARC
+#[allow(dead_code)]
+pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
+    use super::*;
+    let duo_lo = duo as u64;
+    let duo_hi = (duo >> 64) as u64;
+    let div_lo = div as u64;
+    let div_hi = (div >> 64) as u64;
+
+    match (div_lo == 0, div_hi == 0, duo_hi == 0) {
+        (true, true, _) => zero_div_fn(),
+        (_, false, true) => {
+            *rem = duo;
+            return 0;
+        }
+        (false, true, true) => {
+            let tmp = u64_by_u64_div_rem(duo_lo, div_lo);
+            *rem = tmp.1 as u128;
+            return tmp.0 as u128;
+        }
+        (false, true, false) => {
+            if duo_hi < div_lo {
+                let norm_shift = u64_normalization_shift(div_lo, duo_hi, false);
+                let shl = if norm_shift == 0 {
+                    64 - 1
+                } else {
+                    64 - norm_shift
+                };
+
+                let mut div: u128 = div << shl;
+                let mut pow_lo: u64 = 1 << shl;
+                let mut quo_lo: u64 = 0;
+                let mut duo = duo;
+                loop {
+                    let sub = duo.wrapping_sub(div);
+                    if 0 <= (sub as i128) {
+                        duo = sub;
+                        quo_lo |= pow_lo;
+                        let duo_hi = (duo >> 64) as u64;
+                        if duo_hi == 0 {
+                            let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
+                            *rem = tmp.1 as u128;
+                            return (quo_lo | tmp.0) as u128;
+                        }
+                    }
+                    div >>= 1;
+                    pow_lo >>= 1;
+                }
+            } else if duo_hi == div_lo {
+                let tmp = u64_by_u64_div_rem(duo as u64, div as u64);
+                *rem = tmp.1 as u128;
+                return (1 << 64) | (tmp.0 as u128);
+            } else {
+                if (div_lo >> 32) == 0 {
+                    let div_0 = div_lo as u32 as u64;
+                    let (quo_hi, rem_3) = u64_by_u64_div_rem(duo_hi, div_0);
+
+                    let duo_mid = ((duo >> 32) as u32 as u64) | (rem_3 << 32);
+                    let (quo_1, rem_2) = u64_by_u64_div_rem(duo_mid, div_0);
+
+                    let duo_lo = (duo as u32 as u64) | (rem_2 << 32);
+                    let (quo_0, rem_1) = u64_by_u64_div_rem(duo_lo, div_0);
+
+                    *rem = rem_1 as u128;
+                    return (quo_0 as u128) | ((quo_1 as u128) << 32) | ((quo_hi as u128) << 64);
+                }
+
+                let duo_lo = duo as u64;
+                let tmp = u64_by_u64_div_rem(duo_hi, div_lo);
+                let quo_hi = tmp.0;
+                let mut duo = (duo_lo as u128) | ((tmp.1 as u128) << 64);
+                if duo < div {
+                    *rem = duo;
+                    return (quo_hi as u128) << 64;
+                }
+
+                let mut div: u128 = div << (64 - 1);
+                let mut pow_lo: u64 = 1 << (64 - 1);
+                let mut quo_lo: u64 = 0;
+                loop {
+                    let sub = duo.wrapping_sub(div);
+                    if 0 <= (sub as i128) {
+                        duo = sub;
+                        quo_lo |= pow_lo;
+                        let duo_hi = (duo >> 64) as u64;
+                        if duo_hi == 0 {
+                            let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
+                            *rem = tmp.1 as u128;
+                            return (tmp.0) as u128 | (quo_lo as u128) | ((quo_hi as u128) << 64);
+                        }
+                    }
+                    div >>= 1;
+                    pow_lo >>= 1;
+                }
+            }
+        }
+        (_, false, false) => {
+            if duo < div {
+                *rem = duo;
+                return 0;
+            }
+            let div_original = div;
+            let shl = u64_normalization_shift(duo_hi, div_hi, false);
+            let mut duo = duo;
+            let mut div: u128 = div << shl;
+            let mut pow_lo: u64 = 1 << shl;
+            let mut quo_lo: u64 = 0;
+            loop {
+                let sub = duo.wrapping_sub(div);
+                if 0 <= (sub as i128) {
+                    duo = sub;
+                    quo_lo |= pow_lo;
+                    if duo < div_original {
+                        *rem = duo;
+                        return quo_lo as u128;
+                    }
+                }
+                div >>= 1;
+                pow_lo >>= 1;
+            }
+        }
+    }
+}
+}
diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs
new file mode 100644
index 000000000..6ec4675df
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/specialized_div_rem/mod.rs
@@ -0,0 +1,306 @@
+// TODO: when `unsafe_block_in_unsafe_fn` is stabilized, remove this
+#![allow(unused_unsafe)]
+// The functions are complex with many branches, and explicit
+// `return`s makes it clear where function exit points are
+#![allow(clippy::needless_return)]
+#![allow(clippy::comparison_chain)]
+// Clippy is confused by the complex configuration
+#![allow(clippy::if_same_then_else)]
+#![allow(clippy::needless_bool)]
+
+//! This `specialized_div_rem` module is originally from version 1.0.0 of the
+//! `specialized-div-rem` crate. Note that `for` loops with ranges are not used in this
+//! module, since unoptimized compilation may generate references to `memcpy`.
+//!
+//! The purpose of these macros is to easily change the both the division algorithm used
+//! for a given integer size and the half division used by that algorithm. The way
+//! functions call each other is also constructed such that linkers will find the chain of
+//! software and hardware divisions needed for every size of signed and unsigned division.
+//! For example, most target compilations do the following:
+//!
+//!  - Many 128 bit division functions like `u128::wrapping_div` use
+//!    `std::intrinsics::unchecked_div`, which gets replaced by `__udivti3` because there
+//!    is not a 128 bit by 128 bit hardware division function in most architectures.
+//!    `__udivti3` uses `u128_div_rem` (this extra level of function calls exists because
+//!    `__umodti3` and `__udivmodti4` also exist, and `specialized_div_rem` supplies just
+//!    one function to calculate both the quotient and remainder. If configuration flags
+//!    enable it, `impl_trifecta!` defines `u128_div_rem` to use the trifecta algorithm,
+//!    which requires the half sized division `u64_by_u64_div_rem`. If the architecture
+//!    supplies a 64 bit hardware division instruction, `u64_by_u64_div_rem` will be
+//!    reduced to those instructions. Note that we do not specify the half size division
+//!    directly to be `__udivdi3`, because hardware division would never be introduced.
+//!  - If the architecture does not supply a 64 bit hardware division instruction, u64
+//!    divisions will use functions such as `__udivdi3`. This will call `u64_div_rem`
+//!    which is defined by `impl_delegate!`. The half division for this algorithm is
+//!    `u32_by_u32_div_rem` which in turn becomes hardware division instructions or more
+//!    software division algorithms.
+//!  - If the architecture does not supply a 32 bit hardware instruction, linkers will
+//!    look for `__udivsi3`. `impl_binary_long!` is used, but this  algorithm uses no half
+//!    division, so the chain of calls ends here.
+//!
+//! On some architectures like x86_64, an asymmetrically sized division is supplied, in
+//! which 128 bit numbers can be divided by 64 bit numbers. `impl_asymmetric!` is used to
+//! extend the 128 by 64 bit division to a full 128 by 128 bit division.
+
+// `allow(dead_code)` is used in various places, because the configuration code would otherwise be
+// ridiculously complex
+
+#[macro_use]
+mod norm_shift;
+
+#[macro_use]
+mod binary_long;
+
+#[macro_use]
+mod delegate;
+
+// used on SPARC
+#[allow(unused_imports)]
+#[cfg(not(feature = "public-test-deps"))]
+pub(crate) use self::delegate::u128_divide_sparc;
+
+#[cfg(feature = "public-test-deps")]
+pub use self::delegate::u128_divide_sparc;
+
+#[macro_use]
+mod trifecta;
+
+#[macro_use]
+mod asymmetric;
+
+/// The behavior of all divisions by zero is controlled by this function. This function should be
+/// impossible to reach by Rust users, unless `compiler-builtins` public division functions or
+/// `core/std::unchecked_div/rem` are directly used without a zero check in front.
+fn zero_div_fn() -> ! {
+    unsafe { core::hint::unreachable_unchecked() }
+}
+
+const USE_LZ: bool = {
+    if cfg!(target_arch = "arm") {
+        if cfg!(target_feature = "thumb-mode") {
+            // ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is
+            // supported. This is needed to successfully differentiate between targets like
+            // `thumbv8.base` and `thumbv8.main`.
+            cfg!(target_feature = "v6t2")
+        } else {
+            // Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is
+            // supported. Technically, ARMv5T was the first to have CLZ, but the "v5t" target
+            // feature does not seem to work.
+            cfg!(target_feature = "v5te")
+        }
+    } else if cfg!(any(target_arch = "sparc", target_arch = "sparc64")) {
+        // LZD or LZCNT on SPARC only exists for the VIS 3 extension and later.
+        cfg!(target_feature = "vis3")
+    } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
+        // The `B` extension on RISC-V determines if a CLZ assembly instruction exists
+        cfg!(target_feature = "b")
+    } else {
+        // All other common targets Rust supports should have CLZ instructions
+        true
+    }
+};
+
+impl_normalization_shift!(
+    u32_normalization_shift,
+    USE_LZ,
+    32,
+    u32,
+    i32,
+    allow(dead_code)
+);
+impl_normalization_shift!(
+    u64_normalization_shift,
+    USE_LZ,
+    64,
+    u64,
+    i64,
+    allow(dead_code)
+);
+
+/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
+/// `checked_div` and `checked_rem` are used to avoid bringing in panic function
+/// dependencies.
+#[inline]
+fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
+    if let Some(quo) = duo.checked_div(div) {
+        if let Some(rem) = duo.checked_rem(div) {
+            return (quo, rem);
+        }
+    }
+    zero_div_fn()
+}
+
+// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
+// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
+// faster if the target pointer width is at least 64.
+#[cfg(all(
+    not(any(target_pointer_width = "16", target_pointer_width = "32")),
+    not(all(not(feature = "no-asm"), target_arch = "x86_64")),
+    not(any(target_arch = "sparc", target_arch = "sparc64"))
+))]
+impl_trifecta!(
+    u128_div_rem,
+    zero_div_fn,
+    u64_by_u64_div_rem,
+    32,
+    u32,
+    u64,
+    u128
+);
+
+// If the pointer width less than 64, then the target architecture almost certainly does not have
+// the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster.
+#[cfg(all(
+    any(target_pointer_width = "16", target_pointer_width = "32"),
+    not(all(not(feature = "no-asm"), target_arch = "x86_64")),
+    not(any(target_arch = "sparc", target_arch = "sparc64"))
+))]
+impl_delegate!(
+    u128_div_rem,
+    zero_div_fn,
+    u64_normalization_shift,
+    u64_by_u64_div_rem,
+    32,
+    u32,
+    u64,
+    u128,
+    i128
+);
+
+/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
+///
+/// # Safety
+///
+/// If the quotient does not fit in a `u64`, a floating point exception occurs.
+/// If `div == 0`, then a division by zero exception occurs.
+#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
+#[inline]
+unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
+    let duo_lo = duo as u64;
+    let duo_hi = (duo >> 64) as u64;
+    let quo: u64;
+    let rem: u64;
+    unsafe {
+        // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this)
+        // by `div`. The quotient is stored in rax and the remainder in rdx.
+        // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+        core::arch::asm!(
+            "div {0}",
+            in(reg) div,
+            inlateout("rax") duo_lo => quo,
+            inlateout("rdx") duo_hi => rem,
+            options(att_syntax, pure, nomem, nostack)
+        );
+    }
+    (quo, rem)
+}
+
+// use `asymmetric` instead of `trifecta` on x86_64
+#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
+impl_asymmetric!(
+    u128_div_rem,
+    zero_div_fn,
+    u64_by_u64_div_rem,
+    u128_by_u64_div_rem,
+    32,
+    u32,
+    u64,
+    u128
+);
+
+/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
+/// `checked_div` and `checked_rem` are used to avoid bringing in panic function
+/// dependencies.
+#[inline]
+#[allow(dead_code)]
+fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) {
+    if let Some(quo) = duo.checked_div(div) {
+        if let Some(rem) = duo.checked_rem(div) {
+            return (quo, rem);
+        }
+    }
+    zero_div_fn()
+}
+
+// When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger
+// than register size.
+#[cfg(all(
+    not(all(not(feature = "no-asm"), target_arch = "x86")),
+    not(target_pointer_width = "64")
+))]
+impl_delegate!(
+    u64_div_rem,
+    zero_div_fn,
+    u32_normalization_shift,
+    u32_by_u32_div_rem,
+    16,
+    u16,
+    u32,
+    u64,
+    i64
+);
+
+// When not on x86 and the pointer width is 64, use `binary_long`.
+#[cfg(all(
+    not(all(not(feature = "no-asm"), target_arch = "x86")),
+    target_pointer_width = "64"
+))]
+impl_binary_long!(
+    u64_div_rem,
+    zero_div_fn,
+    u64_normalization_shift,
+    64,
+    u64,
+    i64
+);
+
+/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
+///
+/// # Safety
+///
+/// If the quotient does not fit in a `u32`, a floating point exception occurs.
+/// If `div == 0`, then a division by zero exception occurs.
+#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
+#[inline]
+unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
+    let duo_lo = duo as u32;
+    let duo_hi = (duo >> 32) as u32;
+    let quo: u32;
+    let rem: u32;
+    unsafe {
+        // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this)
+        // by `div`. The quotient is stored in rax and the remainder in rdx.
+        // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+        core::arch::asm!(
+            "div {0}",
+            in(reg) div,
+            inlateout("rax") duo_lo => quo,
+            inlateout("rdx") duo_hi => rem,
+            options(att_syntax, pure, nomem, nostack)
+        );
+    }
+    (quo, rem)
+}
+
+// use `asymmetric` instead of `delegate` on x86
+#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
+impl_asymmetric!(
+    u64_div_rem,
+    zero_div_fn,
+    u32_by_u32_div_rem,
+    u64_by_u32_div_rem,
+    16,
+    u16,
+    u32,
+    u64
+);
+
+// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
+impl_binary_long!(
+    u32_div_rem,
+    zero_div_fn,
+    u32_normalization_shift,
+    32,
+    u32,
+    i32
+);
diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/norm_shift.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/norm_shift.rs
new file mode 100644
index 000000000..61b67b6bc
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/specialized_div_rem/norm_shift.rs
@@ -0,0 +1,106 @@
+/// Creates a function used by some division algorithms to compute the "normalization shift".
+#[allow(unused_macros)]
+macro_rules! impl_normalization_shift {
+    (
+        $name:ident, // name of the normalization shift function
+        // boolean for if `$uX::leading_zeros` should be used (if an architecture does not have a
+        // hardware instruction for `usize::leading_zeros`, then this should be `true`)
+        $use_lz:ident,
+        $n:tt, // the number of bits in a $iX or $uX
+        $uX:ident, // unsigned integer type for the inputs of `$name`
+        $iX:ident, // signed integer type for the inputs of `$name`
+        $($unsigned_attr:meta),* // attributes for the function
+    ) => {
+        /// Finds the shift left that the divisor `div` would need to be normalized for a binary
+        /// long division step with the dividend `duo`. NOTE: This function assumes that these edge
+        /// cases have been handled before reaching it:
+        /// `
+        /// if div == 0 {
+        ///     panic!("attempt to divide by zero")
+        /// }
+        /// if duo < div {
+        ///     return (0, duo)
+        /// }
+        /// `
+        ///
+        /// Normalization is defined as (where `shl` is the output of this function):
+        /// `
+        /// if duo.leading_zeros() != (div << shl).leading_zeros() {
+        ///     // If the most significant bits of `duo` and `div << shl` are not in the same place,
+        ///     // then `div << shl` has one more leading zero than `duo`.
+        ///     assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros());
+        ///     // Also, `2*(div << shl)` is not more than `duo` (otherwise the first division step
+        ///     // would not be able to clear the msb of `duo`)
+        ///     assert!(duo < (div << (shl + 1)));
+        /// }
+        /// if full_normalization {
+        ///     // Some algorithms do not need "full" normalization, which means that `duo` is
+        ///     // larger than `div << shl` when the most significant bits are aligned.
+        ///     assert!((div << shl) <= duo);
+        /// }
+        /// `
+        ///
+        /// Note: If the software bisection algorithm is being used in this function, it happens
+        /// that full normalization always occurs, so be careful that new algorithms are not
+        /// invisibly depending on this invariant when `full_normalization` is set to `false`.
+        $(
+            #[$unsigned_attr]
+        )*
+        fn $name(duo: $uX, div: $uX, full_normalization: bool) -> usize {
+            // We have to find the leading zeros of `div` to know where its msb (most significant
+            // set bit) is to even begin binary long division. It is also good to know where the msb
+            // of `duo` is so that useful work can be started instead of shifting `div` for all
+            // possible quotients (many division steps are wasted if `duo.leading_zeros()` is large
+            // and `div` starts out being shifted all the way to the msb). Aligning the msbs of
+            // `div` and `duo` could be done by shifting `div` left by
+            // `div.leading_zeros() - duo.leading_zeros()`, but some CPUs without division hardware
+            // also do not have single instructions for calculating `leading_zeros`. Instead of
+            // software doing two bisections to find the two `leading_zeros`, we do one bisection to
+            // find `div.leading_zeros() - duo.leading_zeros()` without actually knowing either of
+            // the leading zeros values.
+
+            let mut shl: usize;
+            if $use_lz {
+                shl = (div.leading_zeros() - duo.leading_zeros()) as usize;
+                if full_normalization {
+                    if duo < (div << shl) {
+                        // when the msb of `duo` and `div` are aligned, the resulting `div` may be
+                        // larger than `duo`, so we decrease the shift by 1.
+                        shl -= 1;
+                    }
+                }
+            } else {
+                let mut test = duo;
+                shl = 0usize;
+                let mut lvl = $n >> 1;
+                loop {
+                    let tmp = test >> lvl;
+                    // It happens that a final `duo < (div << shl)` check is not needed, because the
+                    // `div <= tmp` check insures that the msb of `test` never passes the msb of
+                    // `div`, and any set bits shifted off the end of `test` would still keep
+                    // `div <= tmp` true.
+                    if div <= tmp {
+                        test = tmp;
+                        shl += lvl;
+                    }
+                    // narrow down bisection
+                    lvl >>= 1;
+                    if lvl == 0 {
+                        break
+                    }
+                }
+            }
+            // tests the invariants that should hold before beginning binary long division
+            /*
+            if full_normalization {
+                assert!((div << shl) <= duo);
+            }
+            if duo.leading_zeros() != (div << shl).leading_zeros() {
+                assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros());
+                assert!(duo < (div << (shl + 1)));
+            }
+            */
+            shl
+        }
+    }
+}
diff --git a/vendor/compiler_builtins/src/int/specialized_div_rem/trifecta.rs b/vendor/compiler_builtins/src/int/specialized_div_rem/trifecta.rs
new file mode 100644
index 000000000..7e104053b
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/specialized_div_rem/trifecta.rs
@@ -0,0 +1,386 @@
+/// Creates an unsigned division function optimized for division of integers with bitwidths
+/// larger than the largest hardware integer division supported. These functions use large radix
+/// division algorithms that require both fast division and very fast widening multiplication on the
+/// target microarchitecture. Otherwise, `impl_delegate` should be used instead.
+#[allow(unused_macros)]
+macro_rules! impl_trifecta {
+    (
+        $fn:ident, // name of the unsigned division function
+        $zero_div_fn:ident, // function called when division by zero is attempted
+        $half_division:ident, // function for division of a $uX by a $uX
+        $n_h:expr, // the number of bits in $iH or $uH
+        $uH:ident, // unsigned integer with half the bit width of $uX
+        $uX:ident, // unsigned integer with half the bit width of $uD
+        $uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name`
+    ) => {
+        /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
+        /// tuple.
+        pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
+            // This is called the trifecta algorithm because it uses three main algorithms: short
+            // division for small divisors, the two possibility algorithm for large divisors, and an
+            // undersubtracting long division algorithm for intermediate cases.
+
+            // This replicates `carrying_mul` (rust-lang rfc #2417). LLVM correctly optimizes this
+            // to use a widening multiply to 128 bits on the relevant architectures.
+            fn carrying_mul(lhs: $uX, rhs: $uX) -> ($uX, $uX) {
+                let tmp = (lhs as $uD).wrapping_mul(rhs as $uD);
+                (tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
+            }
+            fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) {
+                let tmp = (lhs as $uD)
+                    .wrapping_mul(mul as $uD)
+                    .wrapping_add(add as $uD);
+                (tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
+            }
+
+            // the number of bits in a $uX
+            let n = $n_h * 2;
+
+            if div == 0 {
+                $zero_div_fn()
+            }
+
+            // Trying to use a normalization shift function will cause inelegancies in the code and
+            // inefficiencies for architectures with a native count leading zeros instruction. The
+            // undersubtracting algorithm needs both values (keeping the original `div_lz` but
+            // updating `duo_lz` multiple times), so we assume hardware support for fast
+            // `leading_zeros` calculation.
+            let div_lz = div.leading_zeros();
+            let mut duo_lz = duo.leading_zeros();
+
+            // the possible ranges of `duo` and `div` at this point:
+            // `0 <= duo < 2^n_d`
+            // `1 <= div < 2^n_d`
+
+            // quotient is 0 or 1 branch
+            if div_lz <= duo_lz {
+                // The quotient cannot be more than 1. The highest set bit of `duo` needs to be at
+                // least one place higher than `div` for the quotient to be more than 1.
+                if duo >= div {
+                    return (1, duo - div);
+                } else {
+                    return (0, duo);
+                }
+            }
+
+            // `_sb` is the number of significant bits (from the ones place to the highest set bit)
+            // `{2, 2^div_sb} <= duo < 2^n_d`
+            // `1 <= div < {2^duo_sb, 2^(n_d - 1)}`
+            // smaller division branch
+            if duo_lz >= n {
+                // `duo < 2^n` so it will fit in a $uX. `div` will also fit in a $uX (because of the
+                // `div_lz <= duo_lz` branch) so no numerical error.
+                let (quo, rem) = $half_division(duo as $uX, div as $uX);
+                return (quo as $uD, rem as $uD);
+            }
+
+            // `{2^n, 2^div_sb} <= duo < 2^n_d`
+            // `1 <= div < {2^duo_sb, 2^(n_d - 1)}`
+            // short division branch
+            if div_lz >= (n + $n_h) {
+                // `1 <= div < {2^duo_sb, 2^n_h}`
+
+                // It is barely possible to improve the performance of this by calculating the
+                // reciprocal and removing one `$half_division`, but only if the CPU can do fast
+                // multiplications in parallel. Other reciprocal based methods can remove two
+                // `$half_division`s, but have multiplications that cannot be done in parallel and
+                // reduce performance. I have decided to use this trivial short division method and
+                // rely on the CPU having quick divisions.
+
+                let duo_hi = (duo >> n) as $uX;
+                let div_0 = div as $uH as $uX;
+                let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
+
+                let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h);
+                let (quo_1, rem_2) = $half_division(duo_mid, div_0);
+
+                let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h);
+                let (quo_0, rem_1) = $half_division(duo_lo, div_0);
+
+                return (
+                    (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n),
+                    rem_1 as $uD,
+                );
+            }
+
+            // relative leading significant bits, cannot overflow because of above branches
+            let lz_diff = div_lz - duo_lz;
+
+            // `{2^n, 2^div_sb} <= duo < 2^n_d`
+            // `2^n_h <= div < {2^duo_sb, 2^(n_d - 1)}`
+            // `mul` or `mul - 1` branch
+            if lz_diff < $n_h {
+                // Two possibility division algorithm
+
+                // The most significant bits of `duo` and `div` are within `$n_h` bits of each
+                // other. If we take the `n` most significant bits of `duo` and divide them by the
+                // corresponding bits in `div`, it produces a quotient value `quo`. It happens that
+                // `quo` or `quo - 1` will always be the correct quotient for the whole number. In
+                // other words, the bits less significant than the `n` most significant bits of
+                // `duo` and `div` can only influence the quotient to be one of two values.
+                // Because there are only two possibilities, there only needs to be one `$uH` sized
+                // division, a `$uH` by `$uD` multiplication, and only one branch with a few simple
+                // operations.
+                //
+                // Proof that the true quotient can only be `quo` or `quo - 1`.
+                // All `/` operators here are floored divisions.
+                //
+                // `shift` is the number of bits not in the higher `n` significant bits of `duo`.
+                // (definitions)
+                // 0. shift = n - duo_lz
+                // 1. duo_sig_n == duo / 2^shift
+                // 2. div_sig_n == div / 2^shift
+                // 3. quo == duo_sig_n / div_sig_n
+                //
+                //
+                // We are trying to find the true quotient, `true_quo`.
+                // 4. true_quo = duo / div. (definition)
+                //
+                // This is true because of the bits that are cut off during the bit shift.
+                // 5. duo_sig_n * 2^shift <= duo < (duo_sig_n + 1) * 2^shift.
+                // 6. div_sig_n * 2^shift <= div < (div_sig_n + 1) * 2^shift.
+                //
+                // Dividing each bound of (5) by each bound of (6) gives 4 possibilities for what
+                // `true_quo == duo / div` is bounded by:
+                // (duo_sig_n * 2^shift) / (div_sig_n * 2^shift)
+                // (duo_sig_n * 2^shift) / ((div_sig_n + 1) * 2^shift)
+                // ((duo_sig_n + 1) * 2^shift) / (div_sig_n * 2^shift)
+                // ((duo_sig_n + 1) * 2^shift) / ((div_sig_n + 1) * 2^shift)
+                //
+                // Simplifying each of these four:
+                // duo_sig_n / div_sig_n
+                // duo_sig_n / (div_sig_n + 1)
+                // (duo_sig_n + 1) / div_sig_n
+                // (duo_sig_n + 1) / (div_sig_n + 1)
+                //
+                // Taking the smallest and the largest of these as the low and high bounds
+                // and replacing `duo / div` with `true_quo`:
+                // 7. duo_sig_n / (div_sig_n + 1) <= true_quo < (duo_sig_n + 1) / div_sig_n
+                //
+                // The `lz_diff < n_h` conditional on this branch makes sure that `div_sig_n` is at
+                // least `2^n_h`, and the `div_lz <= duo_lz` branch makes sure that the highest bit
+                // of `div_sig_n` is not the `2^(n - 1)` bit.
+                // 8. `2^(n - 1) <= duo_sig_n < 2^n`
+                // 9. `2^n_h <= div_sig_n < 2^(n - 1)`
+                //
+                // We want to prove that either
+                // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1)` or that
+                // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1) + 1`.
+                //
+                // We also want to prove that `quo` is one of these:
+                // `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or
+                // `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n`.
+                //
+                // When 1 is added to the numerator of `duo_sig_n / div_sig_n` to produce
+                // `(duo_sig_n + 1) / div_sig_n`, it is not possible that the value increases by
+                // more than 1 with floored integer arithmetic and `div_sig_n != 0`. Consider
+                // `x/y + 1 < (x + 1)/y` <=> `x/y + 1 < x/y + 1/y` <=> `1 < 1/y` <=> `y < 1`.
+                // `div_sig_n` is a nonzero integer. Thus,
+                // 10. `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n` or
+                //     `(duo_sig_n / div_sig_n) + 1 == (duo_sig_n + 1) / div_sig_n.
+                //
+                // When 1 is added to the denominator of `duo_sig_n / div_sig_n` to produce
+                // `duo_sig_n / (div_sig_n + 1)`, it is not possible that the value decreases by
+                // more than 1 with the bounds (8) and (9). Consider `x/y - 1 <= x/(y + 1)` <=>
+                // `(x - y)/y < x/(y + 1)` <=> `(y + 1)*(x - y) < x*y` <=> `x*y - y*y + x - y < x*y`
+                // <=> `x < y*y + y`. The smallest value of `div_sig_n` is `2^n_h` and the largest
+                // value of `duo_sig_n` is `2^n - 1`. Substituting reveals `2^n - 1 < 2^n + 2^n_h`.
+                // Thus,
+                // 11. `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or
+                //     `(duo_sig_n / div_sig_n) - 1` == duo_sig_n / (div_sig_n + 1)`
+                //
+                // Combining both (10) and (11), we know that
+                // `quo - 1 <= duo_sig_n / (div_sig_n + 1) <= true_quo
+                // < (duo_sig_n + 1) / div_sig_n <= quo + 1` and therefore:
+                // 12. quo - 1 <= true_quo < quo + 1
+                //
+                // In a lot of division algorithms using smaller divisions to construct a larger
+                // division, we often encounter a situation where the approximate `quo` value
+                // calculated from a smaller division is multiple increments away from the true
+                // `quo` value. In those algorithms, multiple correction steps have to be applied.
+                // Those correction steps may need more multiplications to test `duo - (quo*div)`
+                // again. Because of the fact that our `quo` can only be one of two values, we can
+                // see if `duo - (quo*div)` overflows. If it did overflow, then we know that we have
+                // the larger of the two values (since the true quotient is unique, and any larger
+                // quotient will cause `duo - (quo*div)` to be negative). Also because there is only
+                // one correction needed, we can calculate the remainder `duo - (true_quo*div) ==
+                // duo - ((quo - 1)*div) == duo - (quo*div - div) == duo + div - quo*div`.
+                // If `duo - (quo*div)` did not overflow, then we have the correct answer.
+                let shift = n - duo_lz;
+                let duo_sig_n = (duo >> shift) as $uX;
+                let div_sig_n = (div >> shift) as $uX;
+                let quo = $half_division(duo_sig_n, div_sig_n).0;
+
+                // The larger `quo` value can overflow `$uD` in the right circumstances. This is a
+                // manual `carrying_mul_add` with overflow checking.
+                let div_lo = div as $uX;
+                let div_hi = (div >> n) as $uX;
+                let (tmp_lo, carry) = carrying_mul(quo, div_lo);
+                let (tmp_hi, overflow) = carrying_mul_add(quo, div_hi, carry);
+                let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n);
+                if (overflow != 0) || (duo < tmp) {
+                    return (
+                        (quo - 1) as $uD,
+                        // Both the addition and subtraction can overflow, but when combined end up
+                        // as a correct positive number.
+                        duo.wrapping_add(div).wrapping_sub(tmp),
+                    );
+                } else {
+                    return (quo as $uD, duo - tmp);
+                }
+            }
+
+            // Undersubtracting long division algorithm.
+            // Instead of clearing a minimum of 1 bit from `duo` per iteration via binary long
+            // division, `n_h - 1` bits are cleared per iteration with this algorithm. It is a more
+            // complicated version of regular long division. Most integer division algorithms tend
+            // to guess a part of the quotient, and may have a larger quotient than the true
+            // quotient (which when multiplied by `div` will "oversubtract" the original dividend).
+            // They then check if the quotient was in fact too large and then have to correct it.
+            // This long division algorithm has been carefully constructed to always underguess the
+            // quotient by slim margins. This allows different subalgorithms to be blindly jumped to
+            // without needing an extra correction step.
+            //
+            // The only problem is that this subalgorithm will not work for many ranges of `duo` and
+            // `div`. Fortunately, the short division, two possibility algorithm, and other simple
+            // cases happen to exactly fill these gaps.
+            //
+            // For an example, consider the division of 76543210 by 213 and assume that `n_h` is
+            // equal to two decimal digits (note: we are working with base 10 here for readability).
+            // The first `sig_n_h` part of the divisor (21) is taken and is incremented by 1 to
+            // prevent oversubtraction. We also record the number of extra places not a part of
+            // the `sig_n` or `sig_n_h` parts.
+            //
+            // sig_n_h == 2 digits, sig_n == 4 digits
+            //
+            // vvvv     <- `duo_sig_n`
+            // 76543210
+            //     ^^^^ <- extra places in duo, `duo_extra == 4`
+            //
+            // vv  <- `div_sig_n_h`
+            // 213
+            //   ^ <- extra places in div, `div_extra == 1`
+            //
+            // The difference in extra places, `duo_extra - div_extra == extra_shl == 3`, is used
+            // for shifting partial sums in the long division.
+            //
+            // In the first step, the first `sig_n` part of duo (7654) is divided by
+            // `div_sig_n_h_add_1` (22), which results in a partial quotient of 347. This is
+            // multiplied by the whole divisor to make 73911, which is shifted left by `extra_shl`
+            // and subtracted from duo. The partial quotient is also shifted left by `extra_shl` to
+            // be added to `quo`.
+            //
+            //    347
+            //  ________
+            // |76543210
+            // -73911
+            //   2632210
+            //
+            // Variables dependent on duo have to be updated:
+            //
+            // vvvv    <- `duo_sig_n == 2632`
+            // 2632210
+            //     ^^^ <- `duo_extra == 3`
+            //
+            // `extra_shl == 2`
+            //
+            // Two more steps are taken after this and then duo fits into `n` bits, and then a final
+            // normal long division step is made. The partial quotients are all progressively added
+            // to each other in the actual algorithm, but here I have left them all in a tower that
+            // can be added together to produce the quotient, 359357.
+            //
+            //        14
+            //       443
+            //     119
+            //    347
+            //  ________
+            // |76543210
+            // -73911
+            //   2632210
+            //  -25347
+            //     97510
+            //    -94359
+            //      3151
+            //     -2982
+            //       169 <- the remainder
+
+            let mut duo = duo;
+            let mut quo: $uD = 0;
+
+            // The number of lesser significant bits not a part of `div_sig_n_h`
+            let div_extra = (n + $n_h) - div_lz;
+
+            // The most significant `n_h` bits of div
+            let div_sig_n_h = (div >> div_extra) as $uH;
+
+            // This needs to be a `$uX` in case of overflow from the increment
+            let div_sig_n_h_add1 = (div_sig_n_h as $uX) + 1;
+
+            // `{2^n, 2^(div_sb + n_h)} <= duo < 2^n_d`
+            // `2^n_h <= div < {2^(duo_sb - n_h), 2^n}`
+            loop {
+                // The number of lesser significant bits not a part of `duo_sig_n`
+                let duo_extra = n - duo_lz;
+
+                // The most significant `n` bits of `duo`
+                let duo_sig_n = (duo >> duo_extra) as $uX;
+
+                // the two possibility algorithm requires that the difference between msbs is less
+                // than `n_h`, so the comparison is `<=` here.
+                if div_extra <= duo_extra {
+                    // Undersubtracting long division step
+                    let quo_part = $half_division(duo_sig_n, div_sig_n_h_add1).0 as $uD;
+                    let extra_shl = duo_extra - div_extra;
+
+                    // Addition to the quotient.
+                    quo += (quo_part << extra_shl);
+
+                    // Subtraction from `duo`. At least `n_h - 1` bits are cleared from `duo` here.
+                    duo -= (div.wrapping_mul(quo_part) << extra_shl);
+                } else {
+                    // Two possibility algorithm
+                    let shift = n - duo_lz;
+                    let duo_sig_n = (duo >> shift) as $uX;
+                    let div_sig_n = (div >> shift) as $uX;
+                    let quo_part = $half_division(duo_sig_n, div_sig_n).0;
+                    let div_lo = div as $uX;
+                    let div_hi = (div >> n) as $uX;
+
+                    let (tmp_lo, carry) = carrying_mul(quo_part, div_lo);
+                    // The undersubtracting long division algorithm has already run once, so
+                    // overflow beyond `$uD` bits is not possible here
+                    let (tmp_hi, _) = carrying_mul_add(quo_part, div_hi, carry);
+                    let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n);
+
+                    if duo < tmp {
+                        return (
+                            quo + ((quo_part - 1) as $uD),
+                            duo.wrapping_add(div).wrapping_sub(tmp),
+                        );
+                    } else {
+                        return (quo + (quo_part as $uD), duo - tmp);
+                    }
+                }
+
+                duo_lz = duo.leading_zeros();
+
+                if div_lz <= duo_lz {
+                    // quotient can have 0 or 1 added to it
+                    if div <= duo {
+                        return (quo + 1, duo - div);
+                    } else {
+                        return (quo, duo);
+                    }
+                }
+
+                // This can only happen if `div_sd < n` (because of previous "quo = 0 or 1"
+                // branches), but it is not worth it to unroll further.
+                if n <= duo_lz {
+                    // simple division and addition
+                    let tmp = $half_division(duo as $uX, div as $uX);
+                    return (quo + (tmp.0 as $uD), tmp.1 as $uD);
+                }
+            }
+        }
+    };
+}
diff --git a/vendor/compiler_builtins/src/int/udiv.rs b/vendor/compiler_builtins/src/int/udiv.rs
new file mode 100644
index 000000000..fb09f87d8
--- /dev/null
+++ b/vendor/compiler_builtins/src/int/udiv.rs
@@ -0,0 +1,106 @@
+#[cfg(not(feature = "public-test-deps"))]
+pub(crate) use int::specialized_div_rem::*;
+
+#[cfg(feature = "public-test-deps")]
+pub use int::specialized_div_rem::*;
+
+intrinsics! {
+    #[maybe_use_optimized_c_shim]
+    #[arm_aeabi_alias = __aeabi_uidiv]
+    /// Returns `n / d`
+    pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 {
+        u32_div_rem(n, d).0
+    }
+
+    #[maybe_use_optimized_c_shim]
+    /// Returns `n % d`
+    pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 {
+        u32_div_rem(n, d).1
+    }
+
+    #[avr_skip]
+    #[maybe_use_optimized_c_shim]
+    /// Returns `n / d` and sets `*rem = n % d`
+    pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 {
+        let quo_rem = u32_div_rem(n, d);
+        if let Some(rem) = rem {
+            *rem = quo_rem.1;
+        }
+        quo_rem.0
+    }
+
+    #[avr_skip]
+    #[maybe_use_optimized_c_shim]
+    /// Returns `n / d`
+    pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 {
+        u64_div_rem(n, d).0
+    }
+
+    #[avr_skip]
+    #[maybe_use_optimized_c_shim]
+    /// Returns `n % d`
+    pub extern "C" fn __umoddi3(n: u64, d: u64) -> u64 {
+        u64_div_rem(n, d).1
+    }
+
+    #[avr_skip]
+    #[maybe_use_optimized_c_shim]
+    /// Returns `n / d` and sets `*rem = n % d`
+    pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 {
+        let quo_rem = u64_div_rem(n, d);
+        if let Some(rem) = rem {
+            *rem = quo_rem.1;
+        }
+        quo_rem.0
+    }
+
+    // Note: we use block configuration and not `if cfg!(...)`, because we need to entirely disable
+    // the existence of `u128_div_rem` to get 32-bit SPARC to compile, see `u128_divide_sparc` docs.
+
+    #[avr_skip]
+    #[win64_128bit_abi_hack]
+    /// Returns `n / d`
+    pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 {
+        #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] {
+            u128_div_rem(n, d).0
+        }
+        #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] {
+            u128_divide_sparc(n, d, &mut 0)
+        }
+    }
+
+    #[avr_skip]
+    #[win64_128bit_abi_hack]
+    /// Returns `n % d`
+    pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 {
+        #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] {
+            u128_div_rem(n, d).1
+        }
+        #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] {
+            let mut rem = 0;
+            u128_divide_sparc(n, d, &mut rem);
+            rem
+        }
+    }
+
+    #[avr_skip]
+    #[win64_128bit_abi_hack]
+    /// Returns `n / d` and sets `*rem = n % d`
+    pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 {
+        #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] {
+            let quo_rem = u128_div_rem(n, d);
+            if let Some(rem) = rem {
+                *rem = quo_rem.1;
+            }
+            quo_rem.0
+        }
+        #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] {
+            let mut tmp = 0;
+            let quo = u128_divide_sparc(n, d, &mut tmp);
+            if let Some(rem) = rem {
+                *rem = tmp;
+            }
+            quo
+        }
+    }
+}
diff --git a/vendor/compiler_builtins/src/lib.rs b/vendor/compiler_builtins/src/lib.rs
new file mode 100644
index 000000000..009923d27
--- /dev/null
+++ b/vendor/compiler_builtins/src/lib.rs
@@ -0,0 +1,72 @@
+#![cfg_attr(feature = "compiler-builtins", compiler_builtins)]
+#![cfg_attr(not(feature = "no-asm"), feature(asm))]
+#![feature(abi_unadjusted)]
+#![cfg_attr(not(feature = "no-asm"), feature(global_asm))]
+#![feature(cfg_target_has_atomic)]
+#![feature(compiler_builtins)]
+#![feature(core_ffi_c)]
+#![feature(core_intrinsics)]
+#![feature(lang_items)]
+#![feature(linkage)]
+#![feature(naked_functions)]
+#![feature(repr_simd)]
+#![no_builtins]
+#![no_std]
+#![allow(unused_features)]
+// We use `u128` in a whole bunch of places which we currently agree with the
+// compiler on ABIs and such, so we should be "good enough" for now and changes
+// to the `u128` ABI will be reflected here.
+#![allow(improper_ctypes, improper_ctypes_definitions)]
+// `mem::swap` cannot be used because it may generate references to memcpy in unoptimized code.
+#![allow(clippy::manual_swap)]
+// Support compiling on both stage0 and stage1 which may differ in supported stable features.
+#![allow(stable_features)]
+
+// We disable #[no_mangle] for tests so that we can verify the test results
+// against the native compiler-rt implementations of the builtins.
+
+// NOTE cfg(all(feature = "c", ..)) indicate that compiler-rt provides an arch optimized
+// implementation of that intrinsic and we'll prefer to use that
+
+// NOTE(aapcs, aeabi, arm) ARM targets use intrinsics named __aeabi_* instead of the intrinsics
+// that follow "x86 naming convention" (e.g. addsf3). Those aeabi intrinsics must adhere to the
+// AAPCS calling convention (`extern "aapcs"`) because that's how LLVM will call them.
+
+#[cfg(test)]
+extern crate core;
+
+#[macro_use]
+mod macros;
+
+pub mod float;
+pub mod int;
+
+#[cfg(any(
+    all(target_family = "wasm", target_os = "unknown"),
+    all(target_arch = "x86_64", target_os = "uefi"),
+    all(target_arch = "arm", target_os = "none"),
+    all(target_vendor = "fortanix", target_env = "sgx")
+))]
+pub mod math;
+pub mod mem;
+
+#[cfg(target_arch = "arm")]
+pub mod arm;
+
+#[cfg(all(
+    kernel_user_helpers,
+    any(target_os = "linux", target_os = "android"),
+    target_arch = "arm"
+))]
+pub mod arm_linux;
+
+#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
+pub mod riscv;
+
+#[cfg(target_arch = "x86")]
+pub mod x86;
+
+#[cfg(target_arch = "x86_64")]
+pub mod x86_64;
+
+pub mod probestack;
diff --git a/vendor/compiler_builtins/src/macros.rs b/vendor/compiler_builtins/src/macros.rs
new file mode 100644
index 000000000..518a18d4d
--- /dev/null
+++ b/vendor/compiler_builtins/src/macros.rs
@@ -0,0 +1,448 @@
+//! Macros shared throughout the compiler-builtins implementation
+
+/// Changes the visibility to `pub` if feature "public-test-deps" is set
+#[cfg(not(feature = "public-test-deps"))]
+macro_rules! public_test_dep {
+    ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => {
+        $(#[$($meta)*])* pub(crate) $ident $($tokens)*
+    };
+}
+
+/// Changes the visibility to `pub` if feature "public-test-deps" is set
+#[cfg(feature = "public-test-deps")]
+macro_rules! public_test_dep {
+    {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => {
+        $(#[$($meta)*])* pub $ident $($tokens)*
+    };
+}
+
+/// The "main macro" used for defining intrinsics.
+///
+/// The compiler-builtins library is super platform-specific with tons of crazy
+/// little tweaks for various platforms. As a result it *could* involve a lot of
+/// #[cfg] and macro soup, but the intention is that this macro alleviates a lot
+/// of that complexity. Ideally this macro has all the weird ABI things
+/// platforms need and elsewhere in this library it just looks like normal Rust
+/// code.
+///
+/// This macro is structured to be invoked with a bunch of functions that looks
+/// like:
+///
+///     intrinsics! {
+///         pub extern "C" fn foo(a: i32) -> u32 {
+///             // ...
+///         }
+///
+///         #[nonstandard_attribute]
+///         pub extern "C" fn bar(a: i32) -> u32 {
+///             // ...
+///         }
+///     }
+///
+/// Each function is defined in a manner that looks like a normal Rust function.
+/// The macro then accepts a few nonstandard attributes that can decorate
+/// various functions. Each of the attributes is documented below with what it
+/// can do, and each of them slightly tweaks how further expansion happens.
+///
+/// A quick overview of attributes supported right now are:
+///
+/// * `maybe_use_optimized_c_shim` - indicates that the Rust implementation is
+///   ignored if an optimized C version was compiled.
+/// * `aapcs_on_arm` - forces the ABI of the function to be `"aapcs"` on ARM and
+///   the specified ABI everywhere else.
+/// * `unadjusted_on_win64` - like `aapcs_on_arm` this switches to the
+///   `"unadjusted"` abi on Win64 and the specified abi elsewhere.
+/// * `win64_128bit_abi_hack` - this attribute is used for 128-bit integer
+///   intrinsics where the ABI is slightly tweaked on Windows platforms, but
+///   it's a normal ABI elsewhere for returning a 128 bit integer.
+/// * `arm_aeabi_alias` - handles the "aliasing" of various intrinsics on ARM
+///   their otherwise typical names to other prefixed ones.
+///
+macro_rules! intrinsics {
+    () => ();
+
+    // Support cfg_attr:
+    (
+        #[cfg_attr($e:meta, $($attr:tt)*)]
+        $(#[$($attrs:tt)*])*
+        pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+        $($rest:tt)*
+    ) => (
+        #[cfg($e)]
+        intrinsics! {
+            #[$($attr)*]
+            $(#[$($attrs)*])*
+            pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        #[cfg(not($e))]
+        intrinsics! {
+            $(#[$($attrs)*])*
+            pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // Right now there's a bunch of architecture-optimized intrinsics in the
+    // stock compiler-rt implementation. Not all of these have been ported over
+    // to Rust yet so when the `c` feature of this crate is enabled we fall back
+    // to the architecture-specific versions which should be more optimized. The
+    // purpose of this macro is to easily allow specifying this.
+    //
+    // The `#[maybe_use_optimized_c_shim]` attribute indicates that this
+    // intrinsic may have an optimized C version. In these situations the build
+    // script, if the C code is enabled and compiled, will emit a cfg directive
+    // to get passed to rustc for our compilation. If that cfg is set we skip
+    // the Rust implementation, but if the attribute is not enabled then we
+    // compile in the Rust implementation.
+    (
+        #[maybe_use_optimized_c_shim]
+        $(#[$($attr:tt)*])*
+        pub extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        #[cfg($name = "optimized-c")]
+        pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+            extern $abi {
+                fn $name($($argname: $ty),*) $(-> $ret)?;
+            }
+            unsafe {
+                $name($($argname),*)
+            }
+        }
+
+        #[cfg(not($name = "optimized-c"))]
+        intrinsics! {
+            $(#[$($attr)*])*
+            pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // We recognize the `#[aapcs_on_arm]` attribute here and generate the
+    // same intrinsic but force it to have the `"aapcs"` calling convention on
+    // ARM and `"C"` elsewhere.
+    (
+        #[aapcs_on_arm]
+        $(#[$($attr:tt)*])*
+        pub extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        #[cfg(target_arch = "arm")]
+        intrinsics! {
+            $(#[$($attr)*])*
+            pub extern "aapcs" fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        #[cfg(not(target_arch = "arm"))]
+        intrinsics! {
+            $(#[$($attr)*])*
+            pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // Like aapcs above we recognize an attribute for the "unadjusted" abi on
+    // win64 for some methods.
+    (
+        #[unadjusted_on_win64]
+        $(#[$($attr:tt)*])*
+        pub extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        #[cfg(all(windows, target_pointer_width = "64"))]
+        intrinsics! {
+            $(#[$($attr)*])*
+            pub extern "unadjusted" fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        #[cfg(not(all(windows, target_pointer_width = "64")))]
+        intrinsics! {
+            $(#[$($attr)*])*
+            pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // Some intrinsics on win64 which return a 128-bit integer have an.. unusual
+    // calling convention. That's managed here with this "abi hack" which alters
+    // the generated symbol's ABI.
+    //
+    // This will still define a function in this crate with the given name and
+    // signature, but the actual symbol for the intrinsic may have a slightly
+    // different ABI on win64.
+    (
+        #[win64_128bit_abi_hack]
+        $(#[$($attr:tt)*])*
+        pub extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        #[cfg(all(windows, target_arch = "x86_64"))]
+        $(#[$($attr)*])*
+        pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+            $($body)*
+        }
+
+        #[cfg(all(windows, target_arch = "x86_64"))]
+        pub mod $name {
+            #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+            pub extern $abi fn $name( $($argname: $ty),* )
+                -> ::macros::win64_128bit_abi_hack::U64x2
+            {
+                let e: $($ret)? = super::$name($($argname),*);
+                ::macros::win64_128bit_abi_hack::U64x2::from(e)
+            }
+        }
+
+        #[cfg(not(all(windows, target_arch = "x86_64")))]
+        intrinsics! {
+            $(#[$($attr)*])*
+            pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // A bunch of intrinsics on ARM are aliased in the standard compiler-rt
+    // build under `__aeabi_*` aliases, and LLVM will call these instead of the
+    // original function. The aliasing here is used to generate these symbols in
+    // the object file.
+    (
+        #[arm_aeabi_alias = $alias:ident]
+        $(#[$($attr:tt)*])*
+        pub extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        #[cfg(target_arch = "arm")]
+        pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+            $($body)*
+        }
+
+        #[cfg(target_arch = "arm")]
+        pub mod $name {
+            #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+            pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                super::$name($($argname),*)
+            }
+        }
+
+        #[cfg(target_arch = "arm")]
+        pub mod $alias {
+            #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+            pub extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? {
+                super::$name($($argname),*)
+            }
+        }
+
+        #[cfg(not(target_arch = "arm"))]
+        intrinsics! {
+            $(#[$($attr)*])*
+            pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // C mem* functions are only generated when the "mem" feature is enabled.
+    (
+        #[mem_builtin]
+        $(#[$($attr:tt)*])*
+        pub unsafe extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        $(#[$($attr)*])*
+        pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+            $($body)*
+        }
+
+        #[cfg(feature = "mem")]
+        pub mod $name {
+            $(#[$($attr)*])*
+            #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+            pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                super::$name($($argname),*)
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // Naked functions are special: we can't generate wrappers for them since
+    // they use a custom calling convention.
+    (
+        #[naked]
+        $(#[$($attr:tt)*])*
+        pub unsafe extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        pub mod $name {
+            #[naked]
+            $(#[$($attr)*])*
+            #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+            pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // For division and modulo, AVR uses a custom calling convention¹ that does
+    // not match our definitions here. Ideally we would just use hand-written
+    // naked functions, but that's quite a lot of code to port² - so for the
+    // time being we are just ignoring the problematic functions, letting
+    // avr-gcc (which is required to compile to AVR anyway) link them from
+    // libgcc.
+    //
+    // ¹ https://gcc.gnu.org/wiki/avr-gcc (see "Exceptions to the Calling
+    //   Convention")
+    // ² https://github.com/gcc-mirror/gcc/blob/31048012db98f5ec9c2ba537bfd850374bdd771f/libgcc/config/avr/lib1funcs.S
+    (
+        #[avr_skip]
+        $(#[$($attr:tt)*])*
+        pub extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        #[cfg(not(target_arch = "avr"))]
+        intrinsics! {
+            $(#[$($attr)*])*
+            pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                $($body)*
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // This is the final catch-all rule. At this point we generate an
+    // intrinsic with a conditional `#[no_mangle]` directive to avoid
+    // interfering with duplicate symbols and whatnot during testing.
+    //
+    // The implementation is placed in a separate module, to take advantage
+    // of the fact that rustc partitions functions into code generation
+    // units based on module they are defined in. As a result we will have
+    // a separate object file for each intrinsic. For further details see
+    // corresponding PR in rustc https://github.com/rust-lang/rust/pull/70846
+    //
+    // After the intrinsic is defined we just continue with the rest of the
+    // input we were given.
+    (
+        $(#[$($attr:tt)*])*
+        pub extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        $(#[$($attr)*])*
+        pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+            $($body)*
+        }
+
+        pub mod $name {
+            $(#[$($attr)*])*
+            #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+            pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                super::$name($($argname),*)
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+
+    // Same as the above for unsafe functions.
+    (
+        $(#[$($attr:tt)*])*
+        pub unsafe extern $abi:tt fn $name:ident( $($argname:ident:  $ty:ty),* ) $(-> $ret:ty)? {
+            $($body:tt)*
+        }
+
+        $($rest:tt)*
+    ) => (
+        $(#[$($attr)*])*
+        pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+            $($body)*
+        }
+
+        pub mod $name {
+            $(#[$($attr)*])*
+            #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+            pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
+                super::$name($($argname),*)
+            }
+        }
+
+        intrinsics!($($rest)*);
+    );
+}
+
+// Hack for LLVM expectations for ABI on windows. This is used by the
+// `#[win64_128bit_abi_hack]` attribute recognized above
+#[cfg(all(windows, target_pointer_width = "64"))]
+pub mod win64_128bit_abi_hack {
+    #[repr(simd)]
+    pub struct U64x2(u64, u64);
+
+    impl From<i128> for U64x2 {
+        fn from(i: i128) -> U64x2 {
+            use int::DInt;
+            let j = i as u128;
+            U64x2(j.lo(), j.hi())
+        }
+    }
+
+    impl From<u128> for U64x2 {
+        fn from(i: u128) -> U64x2 {
+            use int::DInt;
+            U64x2(i.lo(), i.hi())
+        }
+    }
+}
diff --git a/vendor/compiler_builtins/src/math.rs b/vendor/compiler_builtins/src/math.rs
new file mode 100644
index 000000000..fa59753f8
--- /dev/null
+++ b/vendor/compiler_builtins/src/math.rs
@@ -0,0 +1,117 @@
+#[allow(dead_code)]
+#[path = "../libm/src/math/mod.rs"]
+mod libm;
+
+macro_rules! no_mangle {
+    ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => {
+        intrinsics! {
+            $(
+                pub extern "C" fn $fun($($iid: $ity),+) -> $oty {
+                    self::libm::$fun($($iid),+)
+                }
+            )+
+        }
+    }
+}
+
+#[cfg(any(
+    all(
+        target_family = "wasm",
+        target_os = "unknown",
+        not(target_env = "wasi")
+    ),
+    all(target_arch = "x86_64", target_os = "uefi"),
+    all(target_arch = "xtensa", target_os = "none"),
+    all(target_vendor = "fortanix", target_env = "sgx")
+))]
+no_mangle! {
+    fn acos(x: f64) -> f64;
+    fn asin(x: f64) -> f64;
+    fn cbrt(x: f64) -> f64;
+    fn expm1(x: f64) -> f64;
+    fn hypot(x: f64, y: f64) -> f64;
+    fn tan(x: f64) -> f64;
+    fn cos(x: f64) -> f64;
+    fn expf(x: f32) -> f32;
+    fn log2(x: f64) -> f64;
+    fn log2f(x: f32) -> f32;
+    fn log10(x: f64) -> f64;
+    fn log10f(x: f32) -> f32;
+    fn log(x: f64) -> f64;
+    fn logf(x: f32) -> f32;
+    fn fmin(x: f64, y: f64) -> f64;
+    fn fminf(x: f32, y: f32) -> f32;
+    fn fmax(x: f64, y: f64) -> f64;
+    fn fmaxf(x: f32, y: f32) -> f32;
+    fn round(x: f64) -> f64;
+    fn roundf(x: f32) -> f32;
+    fn sin(x: f64) -> f64;
+    fn pow(x: f64, y: f64) -> f64;
+    fn powf(x: f32, y: f32) -> f32;
+    fn fmod(x: f64, y: f64) -> f64;
+    fn fmodf(x: f32, y: f32) -> f32;
+    fn acosf(n: f32) -> f32;
+    fn atan2f(a: f32, b: f32) -> f32;
+    fn atanf(n: f32) -> f32;
+    fn coshf(n: f32) -> f32;
+    fn expm1f(n: f32) -> f32;
+    fn fdim(a: f64, b: f64) -> f64;
+    fn fdimf(a: f32, b: f32) -> f32;
+    fn log1pf(n: f32) -> f32;
+    fn sinhf(n: f32) -> f32;
+    fn tanhf(n: f32) -> f32;
+    fn ldexp(f: f64, n: i32) -> f64;
+    fn ldexpf(f: f32, n: i32) -> f32;
+}
+
+#[cfg(any(
+    all(
+        target_family = "wasm",
+        target_os = "unknown",
+        not(target_env = "wasi")
+    ),
+    all(target_arch = "xtensa", target_os = "none"),
+    all(target_vendor = "fortanix", target_env = "sgx")
+))]
+no_mangle! {
+    fn atan(x: f64) -> f64;
+    fn atan2(x: f64, y: f64) -> f64;
+    fn cosh(x: f64) -> f64;
+    fn log1p(x: f64) -> f64;
+    fn sinh(x: f64) -> f64;
+    fn tanh(x: f64) -> f64;
+    fn cosf(x: f32) -> f32;
+    fn exp(x: f64) -> f64;
+    fn sinf(x: f32) -> f32;
+    fn exp2(x: f64) -> f64;
+    fn exp2f(x: f32) -> f32;
+    fn fma(x: f64, y: f64, z: f64) -> f64;
+    fn fmaf(x: f32, y: f32, z: f32) -> f32;
+    fn asinf(n: f32) -> f32;
+    fn cbrtf(n: f32) -> f32;
+    fn hypotf(x: f32, y: f32) -> f32;
+    fn tanf(n: f32) -> f32;
+}
+
+#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))]
+no_mangle! {
+    fn ceil(x: f64) -> f64;
+    fn ceilf(x: f32) -> f32;
+    fn floor(x: f64) -> f64;
+    fn floorf(x: f32) -> f32;
+    fn trunc(x: f64) -> f64;
+    fn truncf(x: f32) -> f32;
+}
+
+// only for the thumb*-none-eabi* targets
+#[cfg(all(target_arch = "arm", target_os = "none"))]
+no_mangle! {
+    fn fmin(x: f64, y: f64) -> f64;
+    fn fminf(x: f32, y: f32) -> f32;
+    fn fmax(x: f64, y: f64) -> f64;
+    fn fmaxf(x: f32, y: f32) -> f32;
+    // `f64 % f64`
+    fn fmod(x: f64, y: f64) -> f64;
+    // `f32 % f32`
+    fn fmodf(x: f32, y: f32) -> f32;
+}
diff --git a/vendor/compiler_builtins/src/mem/impls.rs b/vendor/compiler_builtins/src/mem/impls.rs
new file mode 100644
index 000000000..815132425
--- /dev/null
+++ b/vendor/compiler_builtins/src/mem/impls.rs
@@ -0,0 +1,267 @@
+use core::intrinsics::likely;
+
+const WORD_SIZE: usize = core::mem::size_of::<usize>();
+const WORD_MASK: usize = WORD_SIZE - 1;
+
+// If the number of bytes involved exceed this threshold we will opt in word-wise copy.
+// The value here selected is max(2 * WORD_SIZE, 16):
+// * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through
+//   word-wise copy.
+// * The word-wise copy logic needs to perform some checks so it has some small overhead.
+//   ensures that even on 32-bit platforms we have copied at least 8 bytes through
+//   word-wise copy so the saving of word-wise copy outweights the fixed overhead.
+const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 {
+    2 * WORD_SIZE
+} else {
+    16
+};
+
+#[cfg(feature = "mem-unaligned")]
+unsafe fn read_usize_unaligned(x: *const usize) -> usize {
+    // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which
+    // is translated to memcpy in LLVM.
+    let x_read = (x as *const [u8; core::mem::size_of::<usize>()]).read();
+    core::mem::transmute(x_read)
+}
+
+#[inline(always)]
+pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
+    #[inline(always)]
+    unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
+        let dest_end = dest.add(n);
+        while dest < dest_end {
+            *dest = *src;
+            dest = dest.add(1);
+            src = src.add(1);
+        }
+    }
+
+    #[inline(always)]
+    unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
+        let mut dest_usize = dest as *mut usize;
+        let mut src_usize = src as *mut usize;
+        let dest_end = dest.add(n) as *mut usize;
+
+        while dest_usize < dest_end {
+            *dest_usize = *src_usize;
+            dest_usize = dest_usize.add(1);
+            src_usize = src_usize.add(1);
+        }
+    }
+
+    #[cfg(not(feature = "mem-unaligned"))]
+    #[inline(always)]
+    unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
+        let mut dest_usize = dest as *mut usize;
+        let dest_end = dest.add(n) as *mut usize;
+
+        // Calculate the misalignment offset and shift needed to reassemble value.
+        let offset = src as usize & WORD_MASK;
+        let shift = offset * 8;
+
+        // Realign src
+        let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
+        // This will read (but won't use) bytes out of bound.
+        // cfg needed because not all targets will have atomic loads that can be lowered
+        // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
+        #[cfg(target_has_atomic_load_store = "ptr")]
+        let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
+        #[cfg(not(target_has_atomic_load_store = "ptr"))]
+        let mut prev_word = core::ptr::read_volatile(src_aligned);
+
+        while dest_usize < dest_end {
+            src_aligned = src_aligned.add(1);
+            let cur_word = *src_aligned;
+            #[cfg(target_endian = "little")]
+            let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
+            #[cfg(target_endian = "big")]
+            let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
+            prev_word = cur_word;
+
+            *dest_usize = resembled;
+            dest_usize = dest_usize.add(1);
+        }
+    }
+
+    #[cfg(feature = "mem-unaligned")]
+    #[inline(always)]
+    unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
+        let mut dest_usize = dest as *mut usize;
+        let mut src_usize = src as *mut usize;
+        let dest_end = dest.add(n) as *mut usize;
+
+        while dest_usize < dest_end {
+            *dest_usize = read_usize_unaligned(src_usize);
+            dest_usize = dest_usize.add(1);
+            src_usize = src_usize.add(1);
+        }
+    }
+
+    if n >= WORD_COPY_THRESHOLD {
+        // Align dest
+        // Because of n >= 2 * WORD_SIZE, dst_misalignment < n
+        let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK;
+        copy_forward_bytes(dest, src, dest_misalignment);
+        dest = dest.add(dest_misalignment);
+        src = src.add(dest_misalignment);
+        n -= dest_misalignment;
+
+        let n_words = n & !WORD_MASK;
+        let src_misalignment = src as usize & WORD_MASK;
+        if likely(src_misalignment == 0) {
+            copy_forward_aligned_words(dest, src, n_words);
+        } else {
+            copy_forward_misaligned_words(dest, src, n_words);
+        }
+        dest = dest.add(n_words);
+        src = src.add(n_words);
+        n -= n_words;
+    }
+    copy_forward_bytes(dest, src, n);
+}
+
+#[inline(always)]
+pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
+    // The following backward copy helper functions uses the pointers past the end
+    // as their inputs instead of pointers to the start!
+    #[inline(always)]
+    unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
+        let dest_start = dest.sub(n);
+        while dest_start < dest {
+            dest = dest.sub(1);
+            src = src.sub(1);
+            *dest = *src;
+        }
+    }
+
+    #[inline(always)]
+    unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
+        let mut dest_usize = dest as *mut usize;
+        let mut src_usize = src as *mut usize;
+        let dest_start = dest.sub(n) as *mut usize;
+
+        while dest_start < dest_usize {
+            dest_usize = dest_usize.sub(1);
+            src_usize = src_usize.sub(1);
+            *dest_usize = *src_usize;
+        }
+    }
+
+    #[cfg(not(feature = "mem-unaligned"))]
+    #[inline(always)]
+    unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
+        let mut dest_usize = dest as *mut usize;
+        let dest_start = dest.sub(n) as *mut usize;
+
+        // Calculate the misalignment offset and shift needed to reassemble value.
+        let offset = src as usize & WORD_MASK;
+        let shift = offset * 8;
+
+        // Realign src_aligned
+        let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
+        // This will read (but won't use) bytes out of bound.
+        // cfg needed because not all targets will have atomic loads that can be lowered
+        // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
+        #[cfg(target_has_atomic_load_store = "ptr")]
+        let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
+        #[cfg(not(target_has_atomic_load_store = "ptr"))]
+        let mut prev_word = core::ptr::read_volatile(src_aligned);
+
+        while dest_start < dest_usize {
+            src_aligned = src_aligned.sub(1);
+            let cur_word = *src_aligned;
+            #[cfg(target_endian = "little")]
+            let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
+            #[cfg(target_endian = "big")]
+            let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
+            prev_word = cur_word;
+
+            dest_usize = dest_usize.sub(1);
+            *dest_usize = resembled;
+        }
+    }
+
+    #[cfg(feature = "mem-unaligned")]
+    #[inline(always)]
+    unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
+        let mut dest_usize = dest as *mut usize;
+        let mut src_usize = src as *mut usize;
+        let dest_start = dest.sub(n) as *mut usize;
+
+        while dest_start < dest_usize {
+            dest_usize = dest_usize.sub(1);
+            src_usize = src_usize.sub(1);
+            *dest_usize = read_usize_unaligned(src_usize);
+        }
+    }
+
+    let mut dest = dest.add(n);
+    let mut src = src.add(n);
+
+    if n >= WORD_COPY_THRESHOLD {
+        // Align dest
+        // Because of n >= 2 * WORD_SIZE, dst_misalignment < n
+        let dest_misalignment = dest as usize & WORD_MASK;
+        copy_backward_bytes(dest, src, dest_misalignment);
+        dest = dest.sub(dest_misalignment);
+        src = src.sub(dest_misalignment);
+        n -= dest_misalignment;
+
+        let n_words = n & !WORD_MASK;
+        let src_misalignment = src as usize & WORD_MASK;
+        if likely(src_misalignment == 0) {
+            copy_backward_aligned_words(dest, src, n_words);
+        } else {
+            copy_backward_misaligned_words(dest, src, n_words);
+        }
+        dest = dest.sub(n_words);
+        src = src.sub(n_words);
+        n -= n_words;
+    }
+    copy_backward_bytes(dest, src, n);
+}
+
+#[inline(always)]
+pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
+    #[inline(always)]
+    pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) {
+        let end = s.add(n);
+        while s < end {
+            *s = c;
+            s = s.add(1);
+        }
+    }
+
+    #[inline(always)]
+    pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) {
+        let mut broadcast = c as usize;
+        let mut bits = 8;
+        while bits < WORD_SIZE * 8 {
+            broadcast |= broadcast << bits;
+            bits *= 2;
+        }
+
+        let mut s_usize = s as *mut usize;
+        let end = s.add(n) as *mut usize;
+
+        while s_usize < end {
+            *s_usize = broadcast;
+            s_usize = s_usize.add(1);
+        }
+    }
+
+    if likely(n >= WORD_COPY_THRESHOLD) {
+        // Align s
+        // Because of n >= 2 * WORD_SIZE, dst_misalignment < n
+        let misalignment = (s as usize).wrapping_neg() & WORD_MASK;
+        set_bytes_bytes(s, c, misalignment);
+        s = s.add(misalignment);
+        n -= misalignment;
+
+        let n_words = n & !WORD_MASK;
+        set_bytes_words(s, c, n_words);
+        s = s.add(n_words);
+        n -= n_words;
+    }
+    set_bytes_bytes(s, c, n);
+}
diff --git a/vendor/compiler_builtins/src/mem/mod.rs b/vendor/compiler_builtins/src/mem/mod.rs
new file mode 100644
index 000000000..a55113861
--- /dev/null
+++ b/vendor/compiler_builtins/src/mem/mod.rs
@@ -0,0 +1,211 @@
+// Trying to satisfy clippy here is hopeless
+#![allow(clippy::style)]
+
+#[allow(warnings)]
+#[cfg(target_pointer_width = "16")]
+type c_int = i16;
+#[allow(warnings)]
+#[cfg(not(target_pointer_width = "16"))]
+type c_int = i32;
+
+use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, exact_div};
+use core::mem;
+use core::ops::{BitOr, Shl};
+
+// memcpy/memmove/memset have optimized implementations on some architectures
+#[cfg_attr(
+    all(not(feature = "no-asm"), target_arch = "x86_64"),
+    path = "x86_64.rs"
+)]
+mod impls;
+
+intrinsics! {
+    #[mem_builtin]
+    #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+    pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 {
+        impls::copy_forward(dest, src, n);
+        dest
+    }
+
+    #[mem_builtin]
+    #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+    pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 {
+        let delta = (dest as usize).wrapping_sub(src as usize);
+        if delta >= n {
+            // We can copy forwards because either dest is far enough ahead of src,
+            // or src is ahead of dest (and delta overflowed).
+            impls::copy_forward(dest, src, n);
+        } else {
+            impls::copy_backward(dest, src, n);
+        }
+        dest
+    }
+
+    #[mem_builtin]
+    #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+    pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 {
+        impls::set_bytes(s, c as u8, n);
+        s
+    }
+
+    #[mem_builtin]
+    #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+    pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+        let mut i = 0;
+        while i < n {
+            let a = *s1.add(i);
+            let b = *s2.add(i);
+            if a != b {
+                return a as i32 - b as i32;
+            }
+            i += 1;
+        }
+        0
+    }
+
+    #[mem_builtin]
+    #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+    pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
+        memcmp(s1, s2, n)
+    }
+
+    #[mem_builtin]
+    #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
+    pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize {
+        let mut n = 0;
+        let mut s = s;
+        while *s != 0 {
+            n += 1;
+            s = s.offset(1);
+        }
+        n
+    }
+}
+
+// `bytes` must be a multiple of `mem::size_of::<T>()`
+#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))]
+fn memcpy_element_unordered_atomic<T: Copy>(dest: *mut T, src: *const T, bytes: usize) {
+    unsafe {
+        let n = exact_div(bytes, mem::size_of::<T>());
+        let mut i = 0;
+        while i < n {
+            atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i)));
+            i += 1;
+        }
+    }
+}
+
+// `bytes` must be a multiple of `mem::size_of::<T>()`
+#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))]
+fn memmove_element_unordered_atomic<T: Copy>(dest: *mut T, src: *const T, bytes: usize) {
+    unsafe {
+        let n = exact_div(bytes, mem::size_of::<T>());
+        if src < dest as *const T {
+            // copy from end
+            let mut i = n;
+            while i != 0 {
+                i -= 1;
+                atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i)));
+            }
+        } else {
+            // copy from beginning
+            let mut i = 0;
+            while i < n {
+                atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i)));
+                i += 1;
+            }
+        }
+    }
+}
+
+// `T` must be a primitive integer type, and `bytes` must be a multiple of `mem::size_of::<T>()`
+#[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))]
+fn memset_element_unordered_atomic<T>(s: *mut T, c: u8, bytes: usize)
+where
+    T: Copy + From<u8> + Shl<u32, Output = T> + BitOr<T, Output = T>,
+{
+    unsafe {
+        let n = exact_div(bytes, mem::size_of::<T>());
+
+        // Construct a value of type `T` consisting of repeated `c`
+        // bytes, to let us ensure we write each `T` atomically.
+        let mut x = T::from(c);
+        let mut i = 1;
+        while i < mem::size_of::<T>() {
+            x = x << 8 | T::from(c);
+            i += 1;
+        }
+
+        // Write it to `s`
+        let mut i = 0;
+        while i < n {
+            atomic_store_unordered(s.add(i), x);
+            i += 1;
+        }
+    }
+}
+
+intrinsics! {
+    #[cfg(target_has_atomic_load_store = "8")]
+    pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () {
+        memcpy_element_unordered_atomic(dest, src, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "16")]
+    pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () {
+        memcpy_element_unordered_atomic(dest, src, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "32")]
+    pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () {
+        memcpy_element_unordered_atomic(dest, src, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "64")]
+    pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () {
+        memcpy_element_unordered_atomic(dest, src, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "128")]
+    pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () {
+        memcpy_element_unordered_atomic(dest, src, bytes);
+    }
+
+    #[cfg(target_has_atomic_load_store = "8")]
+    pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () {
+        memmove_element_unordered_atomic(dest, src, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "16")]
+    pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () {
+        memmove_element_unordered_atomic(dest, src, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "32")]
+    pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () {
+        memmove_element_unordered_atomic(dest, src, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "64")]
+    pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () {
+        memmove_element_unordered_atomic(dest, src, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "128")]
+    pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () {
+        memmove_element_unordered_atomic(dest, src, bytes);
+    }
+
+    #[cfg(target_has_atomic_load_store = "8")]
+    pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () {
+        memset_element_unordered_atomic(s, c, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "16")]
+    pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () {
+        memset_element_unordered_atomic(s, c, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "32")]
+    pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () {
+        memset_element_unordered_atomic(s, c, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "64")]
+    pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () {
+        memset_element_unordered_atomic(s, c, bytes);
+    }
+    #[cfg(target_has_atomic_load_store = "128")]
+    pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () {
+        memset_element_unordered_atomic(s, c, bytes);
+    }
+}
diff --git a/vendor/compiler_builtins/src/mem/x86_64.rs b/vendor/compiler_builtins/src/mem/x86_64.rs
new file mode 100644
index 000000000..a7ab6f605
--- /dev/null
+++ b/vendor/compiler_builtins/src/mem/x86_64.rs
@@ -0,0 +1,100 @@
+// On most modern Intel and AMD processors, "rep movsq" and "rep stosq" have
+// been enhanced to perform better than an simple qword loop, making them ideal
+// for implementing memcpy/memset. Note that "rep cmps" has received no such
+// enhancement, so it is not used to implement memcmp.
+//
+// On certain recent Intel processors, "rep movsb" and "rep stosb" have been
+// further enhanced to automatically select the best microarchitectural
+// implementation based on length and alignment. See the following features from
+// the "Intel® 64 and IA-32 Architectures Optimization Reference Manual":
+//  - ERMSB - Enhanced REP MOVSB and STOSB (Ivy Bridge and later)
+//  - FSRM - Fast Short REP MOV (Ice Lake and later)
+//  - Fast Zero-Length MOVSB (On no current hardware)
+//  - Fast Short STOSB (On no current hardware)
+//
+// To simplify things, we switch to using the byte-based variants if the "ermsb"
+// feature is present at compile-time. We don't bother detecting other features.
+// Note that ERMSB does not enhance the backwards (DF=1) "rep movsb".
+
+#[inline(always)]
+#[cfg(target_feature = "ermsb")]
+pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
+    // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+    core::arch::asm!(
+        "repe movsb (%rsi), (%rdi)",
+        inout("rcx") count => _,
+        inout("rdi") dest => _,
+        inout("rsi") src => _,
+        options(att_syntax, nostack, preserves_flags)
+    );
+}
+
+#[inline(always)]
+#[cfg(not(target_feature = "ermsb"))]
+pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
+    let qword_count = count >> 3;
+    let byte_count = count & 0b111;
+    // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+    core::arch::asm!(
+        "repe movsq (%rsi), (%rdi)",
+        "mov {byte_count:e}, %ecx",
+        "repe movsb (%rsi), (%rdi)",
+        byte_count = in(reg) byte_count,
+        inout("rcx") qword_count => _,
+        inout("rdi") dest => _,
+        inout("rsi") src => _,
+        options(att_syntax, nostack, preserves_flags)
+    );
+}
+
+#[inline(always)]
+pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) {
+    let qword_count = count >> 3;
+    let byte_count = count & 0b111;
+    // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+    core::arch::asm!(
+        "std",
+        "repe movsq (%rsi), (%rdi)",
+        "movl {byte_count:e}, %ecx",
+        "addq $7, %rdi",
+        "addq $7, %rsi",
+        "repe movsb (%rsi), (%rdi)",
+        "cld",
+        byte_count = in(reg) byte_count,
+        inout("rcx") qword_count => _,
+        inout("rdi") dest.add(count).wrapping_sub(8) => _,
+        inout("rsi") src.add(count).wrapping_sub(8) => _,
+        options(att_syntax, nostack)
+    );
+}
+
+#[inline(always)]
+#[cfg(target_feature = "ermsb")]
+pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
+    // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+    core::arch::asm!(
+        "repe stosb %al, (%rdi)",
+        inout("rcx") count => _,
+        inout("rdi") dest => _,
+        inout("al") c => _,
+        options(att_syntax, nostack, preserves_flags)
+    )
+}
+
+#[inline(always)]
+#[cfg(not(target_feature = "ermsb"))]
+pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
+    let qword_count = count >> 3;
+    let byte_count = count & 0b111;
+    // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust.
+    core::arch::asm!(
+        "repe stosq %rax, (%rdi)",
+        "mov {byte_count:e}, %ecx",
+        "repe stosb %al, (%rdi)",
+        byte_count = in(reg) byte_count,
+        inout("rcx") qword_count => _,
+        inout("rdi") dest => _,
+        in("rax") (c as u64) * 0x0101010101010101,
+        options(att_syntax, nostack, preserves_flags)
+    );
+}
diff --git a/vendor/compiler_builtins/src/probestack.rs b/vendor/compiler_builtins/src/probestack.rs
new file mode 100644
index 000000000..0c30384db
--- /dev/null
+++ b/vendor/compiler_builtins/src/probestack.rs
@@ -0,0 +1,350 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! This module defines the `__rust_probestack` intrinsic which is used in the
+//! implementation of "stack probes" on certain platforms.
+//!
+//! The purpose of a stack probe is to provide a static guarantee that if a
+//! thread has a guard page then a stack overflow is guaranteed to hit that
+//! guard page. If a function did not have a stack probe then there's a risk of
+//! having a stack frame *larger* than the guard page, so a function call could
+//! skip over the guard page entirely and then later hit maybe the heap or
+//! another thread, possibly leading to security vulnerabilities such as [The
+//! Stack Clash], for example.
+//!
+//! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash
+//!
+//! The `__rust_probestack` is called in the prologue of functions whose stack
+//! size is larger than the guard page, for example larger than 4096 bytes on
+//! x86. This function is then responsible for "touching" all pages relevant to
+//! the stack to ensure that that if any of them are the guard page we'll hit
+//! them guaranteed.
+//!
+//! The precise ABI for how this function operates is defined by LLVM. There's
+//! no real documentation as to what this is, so you'd basically need to read
+//! the LLVM source code for reference. Often though the test cases can be
+//! illuminating as to the ABI that's generated, or just looking at the output
+//! of `llc`.
+//!
+//! Note that `#[naked]` is typically used here for the stack probe because the
+//! ABI corresponds to no actual ABI.
+//!
+//! Finally it's worth noting that at the time of this writing LLVM only has
+//! support for stack probes on x86 and x86_64. There's no support for stack
+//! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
+//! be more than welcome to accept such a change!
+
+#![cfg(not(feature = "mangled-names"))]
+// Windows already has builtins to do this.
+#![cfg(not(windows))]
+// All these builtins require assembly
+#![cfg(not(feature = "no-asm"))]
+// We only define stack probing for these architectures today.
+#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
+
+extern "C" {
+    pub fn __rust_probestack();
+}
+
+// A wrapper for our implementation of __rust_probestack, which allows us to
+// keep the assembly inline while controlling all CFI directives in the assembly
+// emitted for the function.
+//
+// This is the ELF version.
+#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
+macro_rules! define_rust_probestack {
+    ($body: expr) => {
+        concat!(
+            "
+            .pushsection .text.__rust_probestack
+            .globl __rust_probestack
+            .type  __rust_probestack, @function
+            .hidden __rust_probestack
+        __rust_probestack:
+            ",
+            $body,
+            "
+            .size __rust_probestack, . - __rust_probestack
+            .popsection
+            "
+        )
+    };
+}
+
+#[cfg(all(target_os = "uefi", target_arch = "x86_64"))]
+macro_rules! define_rust_probestack {
+    ($body: expr) => {
+        concat!(
+            "
+            .globl __rust_probestack
+        __rust_probestack:
+            ",
+            $body
+        )
+    };
+}
+
+// Same as above, but for Mach-O. Note that the triple underscore
+// is deliberate
+#[cfg(target_vendor = "apple")]
+macro_rules! define_rust_probestack {
+    ($body: expr) => {
+        concat!(
+            "
+            .globl ___rust_probestack
+        ___rust_probestack:
+            ",
+            $body
+        )
+    };
+}
+
+// In UEFI x86 arch, triple underscore is deliberate.
+#[cfg(all(target_os = "uefi", target_arch = "x86"))]
+macro_rules! define_rust_probestack {
+    ($body: expr) => {
+        concat!(
+            "
+            .globl ___rust_probestack
+        ___rust_probestack:
+            ",
+            $body
+        )
+    };
+}
+
+// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
+// ensuring that if any pages are unmapped we'll make a page fault.
+//
+// The ABI here is that the stack frame size is located in `%rax`. Upon
+// return we're not supposed to modify `%rsp` or `%rax`.
+//
+// Any changes to this function should be replicated to the SGX version below.
+#[cfg(all(
+    target_arch = "x86_64",
+    not(all(target_env = "sgx", target_vendor = "fortanix"))
+))]
+core::arch::global_asm!(
+    define_rust_probestack!(
+        "
+    .cfi_startproc
+    pushq  %rbp
+    .cfi_adjust_cfa_offset 8
+    .cfi_offset %rbp, -16
+    movq   %rsp, %rbp
+    .cfi_def_cfa_register %rbp
+
+    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
+
+    // Main loop, taken in one page increments. We're decrementing rsp by
+    // a page each time until there's less than a page remaining. We're
+    // guaranteed that this function isn't called unless there's more than a
+    // page needed.
+    //
+    // Note that we're also testing against `8(%rsp)` to account for the 8
+    // bytes pushed on the stack orginally with our return address. Using
+    // `8(%rsp)` simulates us testing the stack pointer in the caller's
+    // context.
+
+    // It's usually called when %rax >= 0x1000, but that's not always true.
+    // Dynamic stack allocation, which is needed to implement unsized
+    // rvalues, triggers stackprobe even if %rax < 0x1000.
+    // Thus we have to check %r11 first to avoid segfault.
+    cmp    $0x1000,%r11
+    jna    3f
+2:
+    sub    $0x1000,%rsp
+    test   %rsp,8(%rsp)
+    sub    $0x1000,%r11
+    cmp    $0x1000,%r11
+    ja     2b
+
+3:
+    // Finish up the last remaining stack space requested, getting the last
+    // bits out of r11
+    sub    %r11,%rsp
+    test   %rsp,8(%rsp)
+
+    // Restore the stack pointer to what it previously was when entering
+    // this function. The caller will readjust the stack pointer after we
+    // return.
+    add    %rax,%rsp
+
+    leave
+    .cfi_def_cfa_register %rsp
+    .cfi_adjust_cfa_offset -8
+    ret
+    .cfi_endproc
+    "
+    ),
+    options(att_syntax)
+);
+
+// This function is the same as above, except that some instructions are
+// [manually patched for LVI].
+//
+// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
+#[cfg(all(
+    target_arch = "x86_64",
+    all(target_env = "sgx", target_vendor = "fortanix")
+))]
+core::arch::global_asm!(
+    define_rust_probestack!(
+        "
+    .cfi_startproc
+    pushq  %rbp
+    .cfi_adjust_cfa_offset 8
+    .cfi_offset %rbp, -16
+    movq   %rsp, %rbp
+    .cfi_def_cfa_register %rbp
+
+    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
+
+    // Main loop, taken in one page increments. We're decrementing rsp by
+    // a page each time until there's less than a page remaining. We're
+    // guaranteed that this function isn't called unless there's more than a
+    // page needed.
+    //
+    // Note that we're also testing against `8(%rsp)` to account for the 8
+    // bytes pushed on the stack orginally with our return address. Using
+    // `8(%rsp)` simulates us testing the stack pointer in the caller's
+    // context.
+
+    // It's usually called when %rax >= 0x1000, but that's not always true.
+    // Dynamic stack allocation, which is needed to implement unsized
+    // rvalues, triggers stackprobe even if %rax < 0x1000.
+    // Thus we have to check %r11 first to avoid segfault.
+    cmp    $0x1000,%r11
+    jna    3f
+2:
+    sub    $0x1000,%rsp
+    test   %rsp,8(%rsp)
+    sub    $0x1000,%r11
+    cmp    $0x1000,%r11
+    ja     2b
+
+3:
+    // Finish up the last remaining stack space requested, getting the last
+    // bits out of r11
+    sub    %r11,%rsp
+    test   %rsp,8(%rsp)
+
+    // Restore the stack pointer to what it previously was when entering
+    // this function. The caller will readjust the stack pointer after we
+    // return.
+    add    %rax,%rsp
+
+    leave
+    .cfi_def_cfa_register %rsp
+    .cfi_adjust_cfa_offset -8
+    pop %r11
+    lfence
+    jmp *%r11
+    .cfi_endproc
+    "
+    ),
+    options(att_syntax)
+);
+
+#[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
+// This is the same as x86_64 above, only translated for 32-bit sizes. Note
+// that on Unix we're expected to restore everything as it was, this
+// function basically can't tamper with anything.
+//
+// The ABI here is the same as x86_64, except everything is 32-bits large.
+core::arch::global_asm!(
+    define_rust_probestack!(
+        "
+    .cfi_startproc
+    push   %ebp
+    .cfi_adjust_cfa_offset 4
+    .cfi_offset %ebp, -8
+    mov    %esp, %ebp
+    .cfi_def_cfa_register %ebp
+    push   %ecx
+    mov    %eax,%ecx
+
+    cmp    $0x1000,%ecx
+    jna    3f
+2:
+    sub    $0x1000,%esp
+    test   %esp,8(%esp)
+    sub    $0x1000,%ecx
+    cmp    $0x1000,%ecx
+    ja     2b
+
+3:
+    sub    %ecx,%esp
+    test   %esp,8(%esp)
+
+    add    %eax,%esp
+    pop    %ecx
+    leave
+    .cfi_def_cfa_register %esp
+    .cfi_adjust_cfa_offset -4
+    ret
+    .cfi_endproc
+    "
+    ),
+    options(att_syntax)
+);
+
+#[cfg(all(target_arch = "x86", target_os = "uefi"))]
+// UEFI target is windows like target. LLVM will do _chkstk things like windows.
+// probestack function will also do things like _chkstk in MSVC.
+// So we need to sub %ax %sp in probestack when arch is x86.
+//
+// REF: Rust commit(74e80468347)
+// rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805
+// Comments in LLVM:
+//   MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
+//   MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
+//   themselves.
+core::arch::global_asm!(
+    define_rust_probestack!(
+        "
+    .cfi_startproc
+    push   %ebp
+    .cfi_adjust_cfa_offset 4
+    .cfi_offset %ebp, -8
+    mov    %esp, %ebp
+    .cfi_def_cfa_register %ebp
+    push   %ecx
+    push   %edx
+    mov    %eax,%ecx
+
+    cmp    $0x1000,%ecx
+    jna    3f
+2:
+    sub    $0x1000,%esp
+    test   %esp,8(%esp)
+    sub    $0x1000,%ecx
+    cmp    $0x1000,%ecx
+    ja     2b
+
+3:
+    sub    %ecx,%esp
+    test   %esp,8(%esp)
+    mov    4(%ebp),%edx
+    mov    %edx, 12(%esp)
+    add    %eax,%esp
+    pop    %edx
+    pop    %ecx
+    leave
+
+    sub   %eax, %esp
+    .cfi_def_cfa_register %esp
+    .cfi_adjust_cfa_offset -4
+    ret
+    .cfi_endproc
+    "
+    ),
+    options(att_syntax)
+);
diff --git a/vendor/compiler_builtins/src/riscv.rs b/vendor/compiler_builtins/src/riscv.rs
new file mode 100644
index 000000000..ee78b9dba
--- /dev/null
+++ b/vendor/compiler_builtins/src/riscv.rs
@@ -0,0 +1,34 @@
+intrinsics! {
+    // Implementation from gcc
+    // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c
+    pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 {
+        let (mut a, mut b) = (a, b);
+        let mut r = 0;
+
+        while a > 0 {
+            if a & 1 > 0 {
+                r += b;
+            }
+            a >>= 1;
+            b <<= 1;
+        }
+
+        r
+    }
+
+    #[cfg(not(target_feature = "m"))]
+    pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 {
+        let (mut a, mut b) = (a, b);
+        let mut r = 0;
+
+        while a > 0 {
+            if a & 1 > 0 {
+                r += b;
+            }
+            a >>= 1;
+            b <<= 1;
+        }
+
+        r
+    }
+}
diff --git a/vendor/compiler_builtins/src/x86.rs b/vendor/compiler_builtins/src/x86.rs
new file mode 100644
index 000000000..fd1f32e3a
--- /dev/null
+++ b/vendor/compiler_builtins/src/x86.rs
@@ -0,0 +1,85 @@
+#![allow(unused_imports)]
+
+use core::intrinsics;
+
+// NOTE These functions are implemented using assembly because they using a custom
+// calling convention which can't be implemented using a normal Rust function
+
+// NOTE These functions are never mangled as they are not tested against compiler-rt
+// and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca
+
+intrinsics! {
+    #[naked]
+    #[cfg(all(
+        windows,
+        target_env = "gnu",
+        not(feature = "no-asm")
+    ))]
+    pub unsafe extern "C" fn ___chkstk_ms() {
+        core::arch::asm!(
+            "push   %ecx",
+            "push   %eax",
+            "cmp    $0x1000,%eax",
+            "lea    12(%esp),%ecx",
+            "jb     1f",
+            "2:",
+            "sub    $0x1000,%ecx",
+            "test   %ecx,(%ecx)",
+            "sub    $0x1000,%eax",
+            "cmp    $0x1000,%eax",
+            "ja     2b",
+            "1:",
+            "sub    %eax,%ecx",
+            "test   %ecx,(%ecx)",
+            "pop    %eax",
+            "pop    %ecx",
+            "ret",
+            options(noreturn, att_syntax)
+        );
+    }
+
+    // FIXME: __alloca should be an alias to __chkstk
+    #[naked]
+    #[cfg(all(
+        windows,
+        target_env = "gnu",
+        not(feature = "no-asm")
+    ))]
+    pub unsafe extern "C" fn __alloca() {
+        core::arch::asm!(
+            "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable"
+            options(noreturn, att_syntax)
+        );
+    }
+
+    #[naked]
+    #[cfg(all(
+        windows,
+        target_env = "gnu",
+        not(feature = "no-asm")
+    ))]
+    pub unsafe extern "C" fn ___chkstk() {
+        core::arch::asm!(
+            "push   %ecx",
+            "cmp    $0x1000,%eax",
+            "lea    8(%esp),%ecx", // esp before calling this routine -> ecx
+            "jb     1f",
+            "2:",
+            "sub    $0x1000,%ecx",
+            "test   %ecx,(%ecx)",
+            "sub    $0x1000,%eax",
+            "cmp    $0x1000,%eax",
+            "ja     2b",
+            "1:",
+            "sub    %eax,%ecx",
+            "test   %ecx,(%ecx)",
+            "lea    4(%esp),%eax",  // load pointer to the return address into eax
+            "mov    %ecx,%esp",     // install the new top of stack pointer into esp
+            "mov    -4(%eax),%ecx", // restore ecx
+            "push   (%eax)",        // push return address onto the stack
+            "sub    %esp,%eax",     // restore the original value in eax
+            "ret",
+            options(noreturn, att_syntax)
+        );
+    }
+}
diff --git a/vendor/compiler_builtins/src/x86_64.rs b/vendor/compiler_builtins/src/x86_64.rs
new file mode 100644
index 000000000..393eeddd8
--- /dev/null
+++ b/vendor/compiler_builtins/src/x86_64.rs
@@ -0,0 +1,94 @@
+#![allow(unused_imports)]
+
+use core::intrinsics;
+
+// NOTE These functions are implemented using assembly because they using a custom
+// calling convention which can't be implemented using a normal Rust function
+
+// NOTE These functions are never mangled as they are not tested against compiler-rt
+// and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca
+
+intrinsics! {
+    #[naked]
+    #[cfg(all(
+        windows,
+        target_env = "gnu",
+        not(feature = "no-asm")
+    ))]
+    pub unsafe extern "C" fn ___chkstk_ms() {
+        core::arch::asm!(
+            "push   %rcx",
+            "push   %rax",
+            "cmp    $0x1000,%rax",
+            "lea    24(%rsp),%rcx",
+            "jb     1f",
+            "2:",
+            "sub    $0x1000,%rcx",
+            "test   %rcx,(%rcx)",
+            "sub    $0x1000,%rax",
+            "cmp    $0x1000,%rax",
+            "ja     2b",
+            "1:",
+            "sub    %rax,%rcx",
+            "test   %rcx,(%rcx)",
+            "pop    %rax",
+            "pop    %rcx",
+            "ret",
+            options(noreturn, att_syntax)
+        );
+    }
+
+    #[naked]
+    #[cfg(all(
+        windows,
+        target_env = "gnu",
+        not(feature = "no-asm")
+    ))]
+    pub unsafe extern "C" fn __alloca() {
+        core::arch::asm!(
+            "mov    %rcx,%rax", // x64 _alloca is a normal function with parameter in rcx
+            "jmp    ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable"
+            options(noreturn, att_syntax)
+        );
+    }
+
+    #[naked]
+    #[cfg(all(
+        windows,
+        target_env = "gnu",
+        not(feature = "no-asm")
+    ))]
+    pub unsafe extern "C" fn ___chkstk() {
+        core::arch::asm!(
+            "push   %rcx",
+            "cmp    $0x1000,%rax",
+            "lea    16(%rsp),%rcx", // rsp before calling this routine -> rcx
+            "jb     1f",
+            "2:",
+            "sub    $0x1000,%rcx",
+            "test   %rcx,(%rcx)",
+            "sub    $0x1000,%rax",
+            "cmp    $0x1000,%rax",
+            "ja     2b",
+            "1:",
+            "sub    %rax,%rcx",
+            "test   %rcx,(%rcx)",
+            "lea    8(%rsp),%rax",  // load pointer to the return address into rax
+            "mov    %rcx,%rsp",     // install the new top of stack pointer into rsp
+            "mov    -8(%rax),%rcx", // restore rcx
+            "push   (%rax)",        // push return address onto the stack
+            "sub    %rsp,%rax",     // restore the original value in rax
+            "ret",
+            options(noreturn, att_syntax)
+        );
+    }
+}
+
+// HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM
+// support unless we emit the _fltused
+mod _fltused {
+    #[no_mangle]
+    #[used]
+    #[cfg(target_os = "uefi")]
+    static _fltused: i32 = 0;
+}
-- 
cgit v1.2.3