8 files changed, 3273 insertions, 0 deletions
diff --git a/third_party/rust/rust_decimal/src/ops/add.rs b/third_party/rust/rust_decimal/src/ops/add.rs
new file mode 100644
index 0000000000..52ba675f23
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/add.rs
@@ -0,0 +1,382 @@
+use crate::constants::{MAX_I32_SCALE, POWERS_10, SCALE_MASK, SCALE_SHIFT, SIGN_MASK, U32_MASK, U32_MAX};
+use crate::decimal::{CalculationResult, Decimal};
+use crate::ops::common::{Buf24, Dec64};
+
+pub(crate) fn add_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+    add_sub_internal(d1, d2, false)
+}
+
+pub(crate) fn sub_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+    add_sub_internal(d1, d2, true)
+}
+
+#[inline]
+fn add_sub_internal(d1: &Decimal, d2: &Decimal, subtract: bool) -> CalculationResult {
+    if d1.is_zero() {
+        // 0 - x or 0 + x
+        let mut result = *d2;
+        if subtract && !d2.is_zero() {
+            result.set_sign_negative(d2.is_sign_positive());
+        }
+        return CalculationResult::Ok(result);
+    }
+    if d2.is_zero() {
+        // x - 0 or x + 0
+        return CalculationResult::Ok(*d1);
+    }
+
+    // Work out whether we need to rescale and/or if it's a subtract still given the signs of the
+    // numbers.
+    let flags = d1.flags() ^ d2.flags();
+    let subtract = subtract ^ ((flags & SIGN_MASK) != 0);
+    let rescale = (flags & SCALE_MASK) > 0;
+
+    // We optimize towards using 32 bit logic as much as possible. It's noticeably faster at
+    // scale, even on 64 bit machines
+    if d1.mid() | d1.hi() == 0 && d2.mid() | d2.hi() == 0 {
+        // We'll try to rescale, however we may end up with 64 bit (or more) numbers
+        // If we do, we'll choose a different flow than fast_add
+        if rescale {
+            // This is less optimized if we scale to a 64 bit integer. We can add some further logic
+            // here later on.
+            let rescale_factor = ((d2.flags() & SCALE_MASK) as i32 - (d1.flags() & SCALE_MASK) as i32) >> SCALE_SHIFT;
+            if rescale_factor < 0 {
+                // We try to rescale the rhs
+                if let Some(rescaled) = rescale32(d2.lo(), -rescale_factor) {
+                    return fast_add(d1.lo(), rescaled, d1.flags(), subtract);
+                }
+            } else {
+                // We try to rescale the lhs
+                if let Some(rescaled) = rescale32(d1.lo(), rescale_factor) {
+                    return fast_add(
+                        rescaled,
+                        d2.lo(),
+                        (d2.flags() & SCALE_MASK) | (d1.flags() & SIGN_MASK),
+                        subtract,
+                    );
+                }
+            }
+        } else {
+            return fast_add(d1.lo(), d2.lo(), d1.flags(), subtract);
+        }
+    }
+
+    // Continue on with the slower 64 bit method
+    let d1 = Dec64::new(d1);
+    let d2 = Dec64::new(d2);
+
+    // If we're not the same scale then make sure we're there first before starting addition
+    if rescale {
+        let rescale_factor = d2.scale as i32 - d1.scale as i32;
+        if rescale_factor < 0 {
+            let negative = subtract ^ d1.negative;
+            let scale = d1.scale;
+            unaligned_add(d2, d1, negative, scale, -rescale_factor, subtract)
+        } else {
+            let negative = d1.negative;
+            let scale = d2.scale;
+            unaligned_add(d1, d2, negative, scale, rescale_factor, subtract)
+        }
+    } else {
+        let neg = d1.negative;
+        let scale = d1.scale;
+        aligned_add(d1, d2, neg, scale, subtract)
+    }
+}
+
+#[inline(always)]
+fn rescale32(num: u32, rescale_factor: i32) -> Option<u32> {
+    if rescale_factor > MAX_I32_SCALE {
+        return None;
+    }
+    num.checked_mul(POWERS_10[rescale_factor as usize])
+}
+
+fn fast_add(lo1: u32, lo2: u32, flags: u32, subtract: bool) -> CalculationResult {
+    if subtract {
+        // Sub can't overflow because we're ensuring the bigger number always subtracts the smaller number
+        if lo1 < lo2 {
+            return CalculationResult::Ok(Decimal::from_parts_raw(lo2 - lo1, 0, 0, flags ^ SIGN_MASK));
+        }
+        return CalculationResult::Ok(Decimal::from_parts_raw(lo1 - lo2, 0, 0, flags));
+    }
+    // Add can overflow however, so we check for that explicitly
+    let lo = lo1.wrapping_add(lo2);
+    let mid = if lo < lo1 { 1 } else { 0 };
+    CalculationResult::Ok(Decimal::from_parts_raw(lo, mid, 0, flags))
+}
+
+fn aligned_add(lhs: Dec64, rhs: Dec64, negative: bool, scale: u32, subtract: bool) -> CalculationResult {
+    if subtract {
+        // Signs differ, so subtract
+        let mut result = Dec64 {
+            negative,
+            scale,
+            low64: lhs.low64.wrapping_sub(rhs.low64),
+            hi: lhs.hi.wrapping_sub(rhs.hi),
+        };
+
+        // Check for carry
+        if result.low64 > lhs.low64 {
+            result.hi = result.hi.wrapping_sub(1);
+            if result.hi >= lhs.hi {
+                flip_sign(&mut result);
+            }
+        } else if result.hi > lhs.hi {
+            flip_sign(&mut result);
+        }
+        CalculationResult::Ok(result.to_decimal())
+    } else {
+        // Signs are the same, so add
+        let mut result = Dec64 {
+            negative,
+            scale,
+            low64: lhs.low64.wrapping_add(rhs.low64),
+            hi: lhs.hi.wrapping_add(rhs.hi),
+        };
+
+        // Check for carry
+        if result.low64 < lhs.low64 {
+            result.hi = result.hi.wrapping_add(1);
+            if result.hi <= lhs.hi {
+                if result.scale == 0 {
+                    return CalculationResult::Overflow;
+                }
+                reduce_scale(&mut result);
+            }
+        } else if result.hi < lhs.hi {
+            if result.scale == 0 {
+                return CalculationResult::Overflow;
+            }
+            reduce_scale(&mut result);
+        }
+        CalculationResult::Ok(result.to_decimal())
+    }
+}
+
+fn flip_sign(result: &mut Dec64) {
+    // Bitwise not the high portion
+    result.hi = !result.hi;
+    let low64 = ((result.low64 as i64).wrapping_neg()) as u64;
+    if low64 == 0 {
+        result.hi += 1;
+    }
+    result.low64 = low64;
+    result.negative = !result.negative;
+}
+
+fn reduce_scale(result: &mut Dec64) {
+    let mut low64 = result.low64;
+    let mut hi = result.hi;
+
+    let mut num = (hi as u64) + (1u64 << 32);
+    hi = (num / 10u64) as u32;
+    num = ((num - (hi as u64) * 10u64) << 32) + (low64 >> 32);
+    let mut div = (num / 10) as u32;
+    num = ((num - (div as u64) * 10u64) << 32) + (low64 & U32_MASK);
+    low64 = (div as u64) << 32;
+    div = (num / 10u64) as u32;
+    low64 = low64.wrapping_add(div as u64);
+    let remainder = (num as u32).wrapping_sub(div.wrapping_mul(10));
+
+    // Finally, round. This is optimizing slightly toward non-rounded numbers
+    if remainder >= 5 && (remainder > 5 || (low64 & 1) > 0) {
+        low64 = low64.wrapping_add(1);
+        if low64 == 0 {
+            hi += 1;
+        }
+    }
+
+    result.low64 = low64;
+    result.hi = hi;
+    result.scale -= 1;
+}
+
+// Assumption going into this function is that the LHS is the larger number and will "absorb" the
+// smaller number.
+fn unaligned_add(
+    lhs: Dec64,
+    rhs: Dec64,
+    negative: bool,
+    scale: u32,
+    rescale_factor: i32,
+    subtract: bool,
+) -> CalculationResult {
+    let mut lhs = lhs;
+    let mut low64 = lhs.low64;
+    let mut high = lhs.hi;
+    let mut rescale_factor = rescale_factor;
+
+    // First off, we see if we can get away with scaling small amounts (or none at all)
+    if high == 0 {
+        if low64 <= U32_MAX {
+            // We know it's not zero, so we start scaling.
+            // Start with reducing the scale down for the low portion
+            while low64 <= U32_MAX {
+                if rescale_factor <= MAX_I32_SCALE {
+                    low64 *= POWERS_10[rescale_factor as usize] as u64;
+                    lhs.low64 = low64;
+                    return aligned_add(lhs, rhs, negative, scale, subtract);
+                }
+                rescale_factor -= MAX_I32_SCALE;
+                low64 *= POWERS_10[9] as u64;
+            }
+        }
+
+        // Reduce the scale for the high portion
+        while high == 0 {
+            let power = if rescale_factor <= MAX_I32_SCALE {
+                POWERS_10[rescale_factor as usize] as u64
+            } else {
+                POWERS_10[9] as u64
+            };
+
+            let tmp_low = (low64 & U32_MASK) * power;
+            let tmp_hi = (low64 >> 32) * power + (tmp_low >> 32);
+            low64 = (tmp_low & U32_MASK) + (tmp_hi << 32);
+            high = (tmp_hi >> 32) as u32;
+            rescale_factor -= MAX_I32_SCALE;
+            if rescale_factor <= 0 {
+                lhs.low64 = low64;
+                lhs.hi = high;
+                return aligned_add(lhs, rhs, negative, scale, subtract);
+            }
+        }
+    }
+
+    // See if we can get away with keeping it in the 96 bits. Otherwise, we need a buffer
+    let mut tmp64: u64;
+    loop {
+        let power = if rescale_factor <= MAX_I32_SCALE {
+            POWERS_10[rescale_factor as usize] as u64
+        } else {
+            POWERS_10[9] as u64
+        };
+
+        let tmp_low = (low64 & U32_MASK) * power;
+        tmp64 = (low64 >> 32) * power + (tmp_low >> 32);
+        low64 = (tmp_low & U32_MASK) + (tmp64 << 32);
+        tmp64 >>= 32;
+        tmp64 += (high as u64) * power;
+
+        rescale_factor -= MAX_I32_SCALE;
+
+        if tmp64 > U32_MAX {
+            break;
+        } else {
+            high = tmp64 as u32;
+            if rescale_factor <= 0 {
+                lhs.low64 = low64;
+                lhs.hi = high;
+                return aligned_add(lhs, rhs, negative, scale, subtract);
+            }
+        }
+    }
+
+    let mut buffer = Buf24::zero();
+    buffer.set_low64(low64);
+    buffer.set_mid64(tmp64);
+
+    let mut upper_word = buffer.upper_word();
+    while rescale_factor > 0 {
+        let power = if rescale_factor <= MAX_I32_SCALE {
+            POWERS_10[rescale_factor as usize] as u64
+        } else {
+            POWERS_10[9] as u64
+        };
+        tmp64 = 0;
+        for (index, part) in buffer.data.iter_mut().enumerate() {
+            tmp64 = tmp64.wrapping_add((*part as u64) * power);
+            *part = tmp64 as u32;
+            tmp64 >>= 32;
+            if index + 1 > upper_word {
+                break;
+            }
+        }
+
+        if tmp64 & U32_MASK > 0 {
+            // Extend the result
+            upper_word += 1;
+            buffer.data[upper_word] = tmp64 as u32;
+        }
+
+        rescale_factor -= MAX_I32_SCALE;
+    }
+
+    // Do the add
+    tmp64 = buffer.low64();
+    low64 = rhs.low64;
+    let tmp_hi = buffer.data[2];
+    high = rhs.hi;
+
+    if subtract {
+        low64 = tmp64.wrapping_sub(low64);
+        high = tmp_hi.wrapping_sub(high);
+
+        // Check for carry
+        let carry = if low64 > tmp64 {
+            high = high.wrapping_sub(1);
+            high >= tmp_hi
+        } else {
+            high > tmp_hi
+        };
+
+        if carry {
+            for part in buffer.data.iter_mut().skip(3) {
+                *part = part.wrapping_sub(1);
+                if *part > 0 {
+                    break;
+                }
+            }
+
+            if buffer.data[upper_word] == 0 && upper_word < 3 {
+                return CalculationResult::Ok(Decimal::from_parts(
+                    low64 as u32,
+                    (low64 >> 32) as u32,
+                    high,
+                    negative,
+                    scale,
+                ));
+            }
+        }
+    } else {
+        low64 = low64.wrapping_add(tmp64);
+        high = high.wrapping_add(tmp_hi);
+
+        // Check for carry
+        let carry = if low64 < tmp64 {
+            high = high.wrapping_add(1);
+            high <= tmp_hi
+        } else {
+            high < tmp_hi
+        };
+
+        if carry {
+            for (index, part) in buffer.data.iter_mut().enumerate().skip(3) {
+                if upper_word < index {
+                    *part = 1;
+                    upper_word = index;
+                    break;
+                }
+                *part = part.wrapping_add(1);
+                if *part > 0 {
+                    break;
+                }
+            }
+        }
+    }
+
+    buffer.set_low64(low64);
+    buffer.data[2] = high;
+    if let Some(scale) = buffer.rescale(upper_word, scale) {
+        CalculationResult::Ok(Decimal::from_parts(
+            buffer.data[0],
+            buffer.data[1],
+            buffer.data[2],
+            negative,
+            scale,
+        ))
+    } else {
+        CalculationResult::Overflow
+    }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/array.rs b/third_party/rust/rust_decimal/src/ops/array.rs
new file mode 100644
index 0000000000..2ab58b4dfb
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/array.rs
@@ -0,0 +1,381 @@
+use crate::constants::{MAX_PRECISION_U32, POWERS_10, U32_MASK};
+
+/// Rescales the given decimal to new scale.
+/// e.g. with 1.23 and new scale 3 rescale the value to 1.230
+#[inline(always)]
+pub(crate) fn rescale_internal(value: &mut [u32; 3], value_scale: &mut u32, new_scale: u32) {
+    if *value_scale == new_scale {
+        // Nothing to do
+        return;
+    }
+
+    if is_all_zero(value) {
+        *value_scale = new_scale.min(MAX_PRECISION_U32);
+        return;
+    }
+
+    if *value_scale > new_scale {
+        let mut diff = value_scale.wrapping_sub(new_scale);
+        // Scaling further isn't possible since we got an overflow
+        // In this case we need to reduce the accuracy of the "side to keep"
+
+        // Now do the necessary rounding
+        let mut remainder = 0;
+        while let Some(diff_minus_one) = diff.checked_sub(1) {
+            if is_all_zero(value) {
+                *value_scale = new_scale;
+                return;
+            }
+
+            diff = diff_minus_one;
+
+            // Any remainder is discarded if diff > 0 still (i.e. lost precision)
+            remainder = div_by_u32(value, 10);
+        }
+        if remainder >= 5 {
+            for part in value.iter_mut() {
+                let digit = u64::from(*part) + 1u64;
+                remainder = if digit > U32_MASK { 1 } else { 0 };
+                *part = (digit & U32_MASK) as u32;
+                if remainder == 0 {
+                    break;
+                }
+            }
+        }
+        *value_scale = new_scale;
+    } else {
+        let mut diff = new_scale.wrapping_sub(*value_scale);
+        let mut working = [value[0], value[1], value[2]];
+        while let Some(diff_minus_one) = diff.checked_sub(1) {
+            if mul_by_10(&mut working) == 0 {
+                value.copy_from_slice(&working);
+                diff = diff_minus_one;
+            } else {
+                break;
+            }
+        }
+        *value_scale = new_scale.wrapping_sub(diff);
+    }
+}
+
+#[cfg(feature = "legacy-ops")]
+pub(crate) fn add_by_internal(value: &mut [u32], by: &[u32]) -> u32 {
+    let mut carry: u64 = 0;
+    let vl = value.len();
+    let bl = by.len();
+    if vl >= bl {
+        let mut sum: u64;
+        for i in 0..bl {
+            sum = u64::from(value[i]) + u64::from(by[i]) + carry;
+            value[i] = (sum & U32_MASK) as u32;
+            carry = sum >> 32;
+        }
+        if vl > bl && carry > 0 {
+            for i in value.iter_mut().skip(bl) {
+                sum = u64::from(*i) + carry;
+                *i = (sum & U32_MASK) as u32;
+                carry = sum >> 32;
+                if carry == 0 {
+                    break;
+                }
+            }
+        }
+    } else if vl + 1 == bl {
+        // Overflow, by default, is anything in the high portion of by
+        let mut sum: u64;
+        for i in 0..vl {
+            sum = u64::from(value[i]) + u64::from(by[i]) + carry;
+            value[i] = (sum & U32_MASK) as u32;
+            carry = sum >> 32;
+        }
+        if by[vl] > 0 {
+            carry += u64::from(by[vl]);
+        }
+    } else {
+        panic!("Internal error: add using incompatible length arrays. {} <- {}", vl, bl);
+    }
+    carry as u32
+}
+
+pub(crate) fn add_by_internal_flattened(value: &mut [u32; 3], by: u32) -> u32 {
+    manage_add_by_internal(by, value)
+}
+
+#[inline]
+pub(crate) fn add_one_internal(value: &mut [u32; 3]) -> u32 {
+    manage_add_by_internal(1, value)
+}
+
+// `u64 as u32` are safe because of widening and 32bits shifts
+#[inline]
+pub(crate) fn manage_add_by_internal<const N: usize>(initial_carry: u32, value: &mut [u32; N]) -> u32 {
+    let mut carry = u64::from(initial_carry);
+    let mut iter = 0..value.len();
+    let mut sum = 0;
+
+    let mut sum_fn = |local_carry: &mut u64, idx| {
+        sum = u64::from(value[idx]).wrapping_add(*local_carry);
+        value[idx] = (sum & U32_MASK) as u32;
+        *local_carry = sum.wrapping_shr(32);
+    };
+
+    if let Some(idx) = iter.next() {
+        sum_fn(&mut carry, idx);
+    }
+
+    for idx in iter {
+        if carry > 0 {
+            sum_fn(&mut carry, idx);
+        }
+    }
+
+    carry as u32
+}
+
+pub(crate) fn sub_by_internal(value: &mut [u32], by: &[u32]) -> u32 {
+    // The way this works is similar to long subtraction
+    // Let's assume we're working with bytes for simplicity in an example:
+    //   257 - 8 = 249
+    //   0000_0001 0000_0001 - 0000_0000 0000_1000 = 0000_0000 1111_1001
+    // We start by doing the first byte...
+    //   Overflow = 0
+    //   Left = 0000_0001 (1)
+    //   Right = 0000_1000 (8)
+    // Firstly, we make sure the left and right are scaled up to twice the size
+    //   Left = 0000_0000 0000_0001
+    //   Right = 0000_0000 0000_1000
+    // We then subtract right from left
+    //   Result = Left - Right = 1111_1111 1111_1001
+    // We subtract the overflow, which in this case is 0.
+    // Because left < right (1 < 8) we invert the high part.
+    //   Lo = 1111_1001
+    //   Hi = 1111_1111 -> 0000_0001
+    // Lo is the field, hi is the overflow.
+    // We do the same for the second byte...
+    //   Overflow = 1
+    //   Left = 0000_0001
+    //   Right = 0000_0000
+    //   Result = Left - Right = 0000_0000 0000_0001
+    // We subtract the overflow...
+    //   Result = 0000_0000 0000_0001 - 1 = 0
+    // And we invert the high, just because (invert 0 = 0).
+    // So our result is:
+    //   0000_0000 1111_1001
+    let mut overflow = 0;
+    let vl = value.len();
+    let bl = by.len();
+    for i in 0..vl {
+        if i >= bl {
+            break;
+        }
+        let (lo, hi) = sub_part(value[i], by[i], overflow);
+        value[i] = lo;
+        overflow = hi;
+    }
+    overflow
+}
+
+fn sub_part(left: u32, right: u32, overflow: u32) -> (u32, u32) {
+    let part = 0x1_0000_0000u64 + u64::from(left) - (u64::from(right) + u64::from(overflow));
+    let lo = part as u32;
+    let hi = 1 - ((part >> 32) as u32);
+    (lo, hi)
+}
+
+// Returns overflow
+#[inline]
+pub(crate) fn mul_by_10(bits: &mut [u32; 3]) -> u32 {
+    let mut overflow = 0u64;
+    for b in bits.iter_mut() {
+        let result = u64::from(*b) * 10u64 + overflow;
+        let hi = (result >> 32) & U32_MASK;
+        let lo = (result & U32_MASK) as u32;
+        *b = lo;
+        overflow = hi;
+    }
+
+    overflow as u32
+}
+
+// Returns overflow
+pub(crate) fn mul_by_u32(bits: &mut [u32], m: u32) -> u32 {
+    let mut overflow = 0;
+    for b in bits.iter_mut() {
+        let (lo, hi) = mul_part(*b, m, overflow);
+        *b = lo;
+        overflow = hi;
+    }
+    overflow
+}
+
+pub(crate) fn mul_part(left: u32, right: u32, high: u32) -> (u32, u32) {
+    let result = u64::from(left) * u64::from(right) + u64::from(high);
+    let hi = ((result >> 32) & U32_MASK) as u32;
+    let lo = (result & U32_MASK) as u32;
+    (lo, hi)
+}
+
+// Returns remainder
+pub(crate) fn div_by_u32<const N: usize>(bits: &mut [u32; N], divisor: u32) -> u32 {
+    if divisor == 0 {
+        // Divide by zero
+        panic!("Internal error: divide by zero");
+    } else if divisor == 1 {
+        // dividend remains unchanged
+        0
+    } else {
+        let mut remainder = 0u32;
+        let divisor = u64::from(divisor);
+        for part in bits.iter_mut().rev() {
+            let temp = (u64::from(remainder) << 32) + u64::from(*part);
+            remainder = (temp % divisor) as u32;
+            *part = (temp / divisor) as u32;
+        }
+
+        remainder
+    }
+}
+
+pub(crate) fn div_by_1x(bits: &mut [u32; 3], power: usize) -> u32 {
+    let mut remainder = 0u32;
+    let divisor = POWERS_10[power] as u64;
+    let temp = ((remainder as u64) << 32) + (bits[2] as u64);
+    remainder = (temp % divisor) as u32;
+    bits[2] = (temp / divisor) as u32;
+    let temp = ((remainder as u64) << 32) + (bits[1] as u64);
+    remainder = (temp % divisor) as u32;
+    bits[1] = (temp / divisor) as u32;
+    let temp = ((remainder as u64) << 32) + (bits[0] as u64);
+    remainder = (temp % divisor) as u32;
+    bits[0] = (temp / divisor) as u32;
+    remainder
+}
+
+#[inline]
+pub(crate) fn shl1_internal(bits: &mut [u32], carry: u32) -> u32 {
+    let mut carry = carry;
+    for part in bits.iter_mut() {
+        let b = *part >> 31;
+        *part = (*part << 1) | carry;
+        carry = b;
+    }
+    carry
+}
+
+#[inline]
+pub(crate) fn cmp_internal(left: &[u32; 3], right: &[u32; 3]) -> core::cmp::Ordering {
+    let left_hi: u32 = left[2];
+    let right_hi: u32 = right[2];
+    let left_lo: u64 = u64::from(left[1]) << 32 | u64::from(left[0]);
+    let right_lo: u64 = u64::from(right[1]) << 32 | u64::from(right[0]);
+    if left_hi < right_hi || (left_hi <= right_hi && left_lo < right_lo) {
+        core::cmp::Ordering::Less
+    } else if left_hi == right_hi && left_lo == right_lo {
+        core::cmp::Ordering::Equal
+    } else {
+        core::cmp::Ordering::Greater
+    }
+}
+
+#[inline]
+pub(crate) fn is_all_zero<const N: usize>(bits: &[u32; N]) -> bool {
+    bits.iter().all(|b| *b == 0)
+}
+
+#[cfg(test)]
+mod test {
+    // Tests on private methods.
+    //
+    // All public tests should go under `tests/`.
+
+    use super::*;
+    use crate::prelude::*;
+
+    #[test]
+    fn it_can_rescale_internal() {
+        fn extract(value: &str) -> ([u32; 3], u32) {
+            let v = Decimal::from_str(value).unwrap();
+            (v.mantissa_array3(), v.scale())
+        }
+
+        let tests = &[
+            ("1", 0, "1", 0),
+            ("1", 1, "1.0", 1),
+            ("1", 5, "1.00000", 5),
+            ("1", 10, "1.0000000000", 10),
+            ("1", 20, "1.00000000000000000000", 20),
+            (
+                "0.6386554621848739495798319328",
+                27,
+                "0.638655462184873949579831933",
+                27,
+            ),
+            (
+                "843.65000000", // Scale 8
+                25,
+                "843.6500000000000000000000000",
+                25,
+            ),
+            (
+                "843.65000000", // Scale 8
+                30,
+                "843.6500000000000000000000000",
+                25, // Only fits 25
+            ),
+            ("0", 130, "0.000000000000000000000000000000", 28),
+        ];
+
+        for &(value_raw, new_scale, expected_value, expected_scale) in tests {
+            let (expected_value, _) = extract(expected_value);
+            let (mut value, mut value_scale) = extract(value_raw);
+            rescale_internal(&mut value, &mut value_scale, new_scale);
+            assert_eq!(value, expected_value);
+            assert_eq!(
+                value_scale, expected_scale,
+                "value: {}, requested scale: {}",
+                value_raw, new_scale
+            );
+        }
+    }
+
+    #[test]
+    fn test_shl1_internal() {
+        struct TestCase {
+            // One thing to be cautious of is that the structure of a number here for shifting left is
+            // the reverse of how you may conceive this mentally. i.e. a[2] contains the higher order
+            // bits: a[2] a[1] a[0]
+            given: [u32; 3],
+            given_carry: u32,
+            expected: [u32; 3],
+            expected_carry: u32,
+        }
+        let tests = [
+            TestCase {
+                given: [1, 0, 0],
+                given_carry: 0,
+                expected: [2, 0, 0],
+                expected_carry: 0,
+            },
+            TestCase {
+                given: [1, 0, 2147483648],
+                given_carry: 1,
+                expected: [3, 0, 0],
+                expected_carry: 1,
+            },
+        ];
+        for case in &tests {
+            let mut test = [case.given[0], case.given[1], case.given[2]];
+            let carry = shl1_internal(&mut test, case.given_carry);
+            assert_eq!(
+                test, case.expected,
+                "Bits: {:?} << 1 | {}",
+                case.given, case.given_carry
+            );
+            assert_eq!(
+                carry, case.expected_carry,
+                "Carry: {:?} << 1 | {}",
+                case.given, case.given_carry
+            )
+        }
+    }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/cmp.rs b/third_party/rust/rust_decimal/src/ops/cmp.rs
new file mode 100644
index 0000000000..636085bff5
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/cmp.rs
@@ -0,0 +1,101 @@
+use crate::constants::{MAX_I32_SCALE, POWERS_10, U32_MASK, U32_MAX};
+use crate::decimal::Decimal;
+use crate::ops::common::Dec64;
+
+use core::cmp::Ordering;
+
+pub(crate) fn cmp_impl(d1: &Decimal, d2: &Decimal) -> Ordering {
+    if d2.is_zero() {
+        return if d1.is_zero() {
+            return Ordering::Equal;
+        } else if d1.is_sign_negative() {
+            Ordering::Less
+        } else {
+            Ordering::Greater
+        };
+    }
+    if d1.is_zero() {
+        return if d2.is_sign_negative() {
+            Ordering::Greater
+        } else {
+            Ordering::Less
+        };
+    }
+    // If the sign is different, then it's an easy answer
+    if d1.is_sign_negative() != d2.is_sign_negative() {
+        return if d1.is_sign_negative() {
+            Ordering::Less
+        } else {
+            Ordering::Greater
+        };
+    }
+
+    // Otherwise, do a deep comparison
+    let d1 = Dec64::new(d1);
+    let d2 = Dec64::new(d2);
+    // We know both signs are the same here so flip it here.
+    // Negative is handled differently. i.e. 0.5 > 0.01 however -0.5 < -0.01
+    if d1.negative {
+        cmp_internal(&d2, &d1)
+    } else {
+        cmp_internal(&d1, &d2)
+    }
+}
+
+pub(in crate::ops) fn cmp_internal(d1: &Dec64, d2: &Dec64) -> Ordering {
+    // This function ignores sign
+    let mut d1_low = d1.low64;
+    let mut d1_high = d1.hi;
+    let mut d2_low = d2.low64;
+    let mut d2_high = d2.hi;
+
+    // If the scale factors aren't equal then
+    if d1.scale != d2.scale {
+        let mut diff = d2.scale as i32 - d1.scale as i32;
+        if diff < 0 {
+            diff = -diff;
+            if !rescale(&mut d2_low, &mut d2_high, diff as u32) {
+                return Ordering::Less;
+            }
+        } else if !rescale(&mut d1_low, &mut d1_high, diff as u32) {
+            return Ordering::Greater;
+        }
+    }
+
+    // They're the same scale, do a standard bitwise comparison
+    let hi_order = d1_high.cmp(&d2_high);
+    if hi_order != Ordering::Equal {
+        return hi_order;
+    }
+    d1_low.cmp(&d2_low)
+}
+
+fn rescale(low64: &mut u64, high: &mut u32, diff: u32) -> bool {
+    let mut diff = diff as i32;
+    // We need to modify d1 by 10^diff to get it to the same scale as d2
+    loop {
+        let power = if diff >= MAX_I32_SCALE {
+            POWERS_10[9]
+        } else {
+            POWERS_10[diff as usize]
+        } as u64;
+        let tmp_lo_32 = (*low64 & U32_MASK) * power;
+        let mut tmp = (*low64 >> 32) * power + (tmp_lo_32 >> 32);
+        *low64 = (tmp_lo_32 & U32_MASK) + (tmp << 32);
+        tmp >>= 32;
+        tmp = tmp.wrapping_add((*high as u64) * power);
+        // Indicates > 96 bits
+        if tmp > U32_MAX {
+            return false;
+        }
+        *high = tmp as u32;
+
+        // Keep scaling if there is more to go
+        diff -= MAX_I32_SCALE;
+        if diff <= 0 {
+            break;
+        }
+    }
+
+    true
+}
diff --git a/third_party/rust/rust_decimal/src/ops/common.rs b/third_party/rust/rust_decimal/src/ops/common.rs
new file mode 100644
index 0000000000..c29362d824
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/common.rs
@@ -0,0 +1,455 @@
+use crate::constants::{MAX_I32_SCALE, MAX_PRECISION_I32, POWERS_10};
+use crate::Decimal;
+
+#[derive(Debug)]
+pub struct Buf12 {
+    pub data: [u32; 3],
+}
+
+impl Buf12 {
+    pub(super) const fn from_dec64(value: &Dec64) -> Self {
+        Buf12 {
+            data: [value.low64 as u32, (value.low64 >> 32) as u32, value.hi],
+        }
+    }
+
+    pub(super) const fn from_decimal(value: &Decimal) -> Self {
+        Buf12 {
+            data: value.mantissa_array3(),
+        }
+    }
+
+    #[inline(always)]
+    pub const fn lo(&self) -> u32 {
+        self.data[0]
+    }
+    #[inline(always)]
+    pub const fn mid(&self) -> u32 {
+        self.data[1]
+    }
+    #[inline(always)]
+    pub const fn hi(&self) -> u32 {
+        self.data[2]
+    }
+    #[inline(always)]
+    pub fn set_lo(&mut self, value: u32) {
+        self.data[0] = value;
+    }
+    #[inline(always)]
+    pub fn set_mid(&mut self, value: u32) {
+        self.data[1] = value;
+    }
+    #[inline(always)]
+    pub fn set_hi(&mut self, value: u32) {
+        self.data[2] = value;
+    }
+
+    #[inline(always)]
+    pub const fn low64(&self) -> u64 {
+        ((self.data[1] as u64) << 32) | (self.data[0] as u64)
+    }
+
+    #[inline(always)]
+    pub fn set_low64(&mut self, value: u64) {
+        self.data[1] = (value >> 32) as u32;
+        self.data[0] = value as u32;
+    }
+
+    #[inline(always)]
+    pub const fn high64(&self) -> u64 {
+        ((self.data[2] as u64) << 32) | (self.data[1] as u64)
+    }
+
+    #[inline(always)]
+    pub fn set_high64(&mut self, value: u64) {
+        self.data[2] = (value >> 32) as u32;
+        self.data[1] = value as u32;
+    }
+
+    // Determine the maximum value of x that ensures that the quotient when scaled up by 10^x
+    // still fits in 96 bits. Ultimately, we want to make scale positive - if we can't then
+    // we're going to overflow. Because x is ultimately used to lookup inside the POWERS array, it
+    // must be a valid value 0 <= x <= 9
+    pub fn find_scale(&self, scale: i32) -> Option<usize> {
+        const OVERFLOW_MAX_9_HI: u32 = 4;
+        const OVERFLOW_MAX_8_HI: u32 = 42;
+        const OVERFLOW_MAX_7_HI: u32 = 429;
+        const OVERFLOW_MAX_6_HI: u32 = 4294;
+        const OVERFLOW_MAX_5_HI: u32 = 42949;
+        const OVERFLOW_MAX_4_HI: u32 = 429496;
+        const OVERFLOW_MAX_3_HI: u32 = 4294967;
+        const OVERFLOW_MAX_2_HI: u32 = 42949672;
+        const OVERFLOW_MAX_1_HI: u32 = 429496729;
+        const OVERFLOW_MAX_9_LOW64: u64 = 5441186219426131129;
+
+        let hi = self.data[2];
+        let low64 = self.low64();
+        let mut x = 0usize;
+
+        // Quick check to stop us from trying to scale any more.
+        //
+        if hi > OVERFLOW_MAX_1_HI {
+            // If it's less than 0, which it probably is - overflow. We can't do anything.
+            if scale < 0 {
+                return None;
+            }
+            return Some(x);
+        }
+
+        if scale > MAX_PRECISION_I32 - 9 {
+            // We can't scale by 10^9 without exceeding the max scale factor.
+            // Instead, we'll try to scale by the most that we can and see if that works.
+            // This is safe to do due to the check above. e.g. scale > 19 in the above, so it will
+            // evaluate to 9 or less below.
+            x = (MAX_PRECISION_I32 - scale) as usize;
+            if hi < POWER_OVERFLOW_VALUES[x - 1].data[2] {
+                if x as i32 + scale < 0 {
+                    // We still overflow
+                    return None;
+                }
+                return Some(x);
+            }
+        } else if hi < OVERFLOW_MAX_9_HI || hi == OVERFLOW_MAX_9_HI && low64 <= OVERFLOW_MAX_9_LOW64 {
+            return Some(9);
+        }
+
+        // Do a binary search to find a power to scale by that is less than 9
+        x = if hi > OVERFLOW_MAX_5_HI {
+            if hi > OVERFLOW_MAX_3_HI {
+                if hi > OVERFLOW_MAX_2_HI {
+                    1
+                } else {
+                    2
+                }
+            } else if hi > OVERFLOW_MAX_4_HI {
+                3
+            } else {
+                4
+            }
+        } else if hi > OVERFLOW_MAX_7_HI {
+            if hi > OVERFLOW_MAX_6_HI {
+                5
+            } else {
+                6
+            }
+        } else if hi > OVERFLOW_MAX_8_HI {
+            7
+        } else {
+            8
+        };
+
+        // Double check what we've found won't overflow. Otherwise, we go one below.
+        if hi == POWER_OVERFLOW_VALUES[x - 1].data[2] && low64 > POWER_OVERFLOW_VALUES[x - 1].low64() {
+            x -= 1;
+        }
+
+        // Confirm we've actually resolved things
+        if x as i32 + scale < 0 {
+            None
+        } else {
+            Some(x)
+        }
+    }
+}
+
+// This is a table of the largest values that will not overflow when multiplied
+// by a given power as represented by the index.
+static POWER_OVERFLOW_VALUES: [Buf12; 8] = [
+    Buf12 {
+        data: [2576980377, 2576980377, 429496729],
+    },
+    Buf12 {
+        data: [687194767, 4123168604, 42949672],
+    },
+    Buf12 {
+        data: [2645699854, 1271310319, 4294967],
+    },
+    Buf12 {
+        data: [694066715, 3133608139, 429496],
+    },
+    Buf12 {
+        data: [2216890319, 2890341191, 42949],
+    },
+    Buf12 {
+        data: [2369172679, 4154504685, 4294],
+    },
+    Buf12 {
+        data: [4102387834, 2133437386, 429],
+    },
+    Buf12 {
+        data: [410238783, 4078814305, 42],
+    },
+];
+
+pub(super) struct Dec64 {
+    pub negative: bool,
+    pub scale: u32,
+    pub hi: u32,
+    pub low64: u64,
+}
+
+impl Dec64 {
+    pub(super) const fn new(d: &Decimal) -> Dec64 {
+        let m = d.mantissa_array3();
+        if m[1] == 0 {
+            Dec64 {
+                negative: d.is_sign_negative(),
+                scale: d.scale(),
+                hi: m[2],
+                low64: m[0] as u64,
+            }
+        } else {
+            Dec64 {
+                negative: d.is_sign_negative(),
+                scale: d.scale(),
+                hi: m[2],
+                low64: ((m[1] as u64) << 32) | (m[0] as u64),
+            }
+        }
+    }
+
+    #[inline(always)]
+    pub(super) const fn lo(&self) -> u32 {
+        self.low64 as u32
+    }
+    #[inline(always)]
+    pub(super) const fn mid(&self) -> u32 {
+        (self.low64 >> 32) as u32
+    }
+
+    #[inline(always)]
+    pub(super) const fn high64(&self) -> u64 {
+        (self.low64 >> 32) | ((self.hi as u64) << 32)
+    }
+
+    pub(super) const fn to_decimal(&self) -> Decimal {
+        Decimal::from_parts(
+            self.low64 as u32,
+            (self.low64 >> 32) as u32,
+            self.hi,
+            self.negative,
+            self.scale,
+        )
+    }
+}
+
+pub struct Buf16 {
+    pub data: [u32; 4],
+}
+
+impl Buf16 {
+    pub const fn zero() -> Self {
+        Buf16 { data: [0, 0, 0, 0] }
+    }
+
+    pub const fn low64(&self) -> u64 {
+        ((self.data[1] as u64) << 32) | (self.data[0] as u64)
+    }
+
+    pub fn set_low64(&mut self, value: u64) {
+        self.data[1] = (value >> 32) as u32;
+        self.data[0] = value as u32;
+    }
+
+    pub const fn mid64(&self) -> u64 {
+        ((self.data[2] as u64) << 32) | (self.data[1] as u64)
+    }
+
+    pub fn set_mid64(&mut self, value: u64) {
+        self.data[2] = (value >> 32) as u32;
+        self.data[1] = value as u32;
+    }
+
+    pub const fn high64(&self) -> u64 {
+        ((self.data[3] as u64) << 32) | (self.data[2] as u64)
+    }
+
+    pub fn set_high64(&mut self, value: u64) {
+        self.data[3] = (value >> 32) as u32;
+        self.data[2] = value as u32;
+    }
+}
+
+#[derive(Debug)]
+pub struct Buf24 {
+    pub data: [u32; 6],
+}
+
+impl Buf24 {
+    pub const fn zero() -> Self {
+        Buf24 {
+            data: [0, 0, 0, 0, 0, 0],
+        }
+    }
+
+    pub const fn low64(&self) -> u64 {
+        ((self.data[1] as u64) << 32) | (self.data[0] as u64)
+    }
+
+    pub fn set_low64(&mut self, value: u64) {
+        self.data[1] = (value >> 32) as u32;
+        self.data[0] = value as u32;
+    }
+
+    #[allow(dead_code)]
+    pub const fn mid64(&self) -> u64 {
+        ((self.data[3] as u64) << 32) | (self.data[2] as u64)
+    }
+
+    pub fn set_mid64(&mut self, value: u64) {
+        self.data[3] = (value >> 32) as u32;
+        self.data[2] = value as u32;
+    }
+
+    #[allow(dead_code)]
+    pub const fn high64(&self) -> u64 {
+        ((self.data[5] as u64) << 32) | (self.data[4] as u64)
+    }
+
+    pub fn set_high64(&mut self, value: u64) {
+        self.data[5] = (value >> 32) as u32;
+        self.data[4] = value as u32;
+    }
+
+    pub const fn upper_word(&self) -> usize {
+        if self.data[5] > 0 {
+            return 5;
+        }
+        if self.data[4] > 0 {
+            return 4;
+        }
+        if self.data[3] > 0 {
+            return 3;
+        }
+        if self.data[2] > 0 {
+            return 2;
+        }
+        if self.data[1] > 0 {
+            return 1;
+        }
+        0
+    }
+
+    // Attempt to rescale the number into 96 bits. If successful, the scale is returned wrapped
+    // in an Option. If it failed due to overflow, we return None.
+    // * `upper` - Index of last non-zero value in self.
+    // * `scale` - Current scale factor for this value.
+    pub fn rescale(&mut self, upper: usize, scale: u32) -> Option<u32> {
+        let mut scale = scale as i32;
+        let mut upper = upper;
+
+        // Determine a rescale target to start with
+        let mut rescale_target = 0i32;
+        if upper > 2 {
+            rescale_target = upper as i32 * 32 - 64 - 1;
+            rescale_target -= self.data[upper].leading_zeros() as i32;
+            rescale_target = ((rescale_target * 77) >> 8) + 1;
+            if rescale_target > scale {
+                return None;
+            }
+        }
+
+        // Make sure we scale enough to bring it into a valid range
+        if rescale_target < scale - MAX_PRECISION_I32 {
+            rescale_target = scale - MAX_PRECISION_I32;
+        }
+
+        if rescale_target > 0 {
+            // We're going to keep reducing by powers of 10. So, start by reducing the scale by
+            // that amount.
+            scale -= rescale_target;
+            let mut sticky = 0;
+            let mut remainder = 0;
+            loop {
+                sticky |= remainder;
+                let mut power = if rescale_target > 8 {
+                    POWERS_10[9]
+                } else {
+                    POWERS_10[rescale_target as usize]
+                };
+
+                let high = self.data[upper];
+                let high_quotient = high / power;
+                remainder = high - high_quotient * power;
+
+                for item in self.data.iter_mut().rev().skip(6 - upper) {
+                    let num = (*item as u64).wrapping_add((remainder as u64) << 32);
+                    *item = (num / power as u64) as u32;
+                    remainder = (num as u32).wrapping_sub(item.wrapping_mul(power));
+                }
+
+                self.data[upper] = high_quotient;
+
+                // If the high quotient was zero then decrease the upper bound
+                if high_quotient == 0 && upper > 0 {
+                    upper -= 1;
+                }
+                if rescale_target > MAX_I32_SCALE {
+                    // Scale some more
+                    rescale_target -= MAX_I32_SCALE;
+                    continue;
+                }
+
+                // If we fit into 96 bits then we've scaled enough. Otherwise, scale once more.
+                if upper > 2 {
+                    if scale == 0 {
+                        return None;
+                    }
+                    // Equivalent to scaling down by 10
+                    rescale_target = 1;
+                    scale -= 1;
+                    continue;
+                }
+
+                // Round the final result.
+                power >>= 1;
+                let carried = if power <= remainder {
+                    // If we're less than half then we're fine. Otherwise, we round if odd or if the
+                    // sticky bit is set.
+                    if power < remainder || ((self.data[0] & 1) | sticky) != 0 {
+                        // Round up
+                        self.data[0] = self.data[0].wrapping_add(1);
+                        // Check if we carried
+                        self.data[0] == 0
+                    } else {
+                        false
+                    }
+                } else {
+                    false
+                };
+
+                // If we carried then propagate through the portions
+                if carried {
+                    let mut pos = 0;
+                    for (index, value) in self.data.iter_mut().enumerate().skip(1) {
+                        pos = index;
+                        *value = value.wrapping_add(1);
+                        if *value != 0 {
+                            break;
+                        }
+                    }
+
+                    // If we ended up rounding over the 96 bits then we'll try to rescale down (again)
+                    if pos > 2 {
+                        // Nothing to scale down from will cause overflow
+                        if scale == 0 {
+                            return None;
+                        }
+
+                        // Loop back around using scale of 10.
+                        // Reset the sticky bit and remainder before looping.
+                        upper = pos;
+                        sticky = 0;
+                        remainder = 0;
+                        rescale_target = 1;
+                        scale -= 1;
+                        continue;
+                    }
+                }
+                break;
+            }
+        }
+
+        Some(scale as u32)
+    }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/div.rs b/third_party/rust/rust_decimal/src/ops/div.rs
new file mode 100644
index 0000000000..3b5ec577b2
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/div.rs
@@ -0,0 +1,658 @@
+use crate::constants::{MAX_PRECISION_I32, POWERS_10};
+use crate::decimal::{CalculationResult, Decimal};
+use crate::ops::common::{Buf12, Buf16, Dec64};
+
+use core::cmp::Ordering;
+use core::ops::BitXor;
+
+impl Buf12 {
+    // Returns true if successful, else false for an overflow
+    fn add32(&mut self, value: u32) -> Result<(), DivError> {
+        let value = value as u64;
+        let new = self.low64().wrapping_add(value);
+        self.set_low64(new);
+        if new < value {
+            self.data[2] = self.data[2].wrapping_add(1);
+            if self.data[2] == 0 {
+                return Err(DivError::Overflow);
+            }
+        }
+        Ok(())
+    }
+
+    // Divide a Decimal union by a 32 bit divisor.
+    // Self is overwritten with the quotient.
+    // Return value is a 32 bit remainder.
+    fn div32(&mut self, divisor: u32) -> u32 {
+        let divisor64 = divisor as u64;
+        // See if we can get by using a simple u64 division
+        if self.data[2] != 0 {
+            let mut temp = self.high64();
+            let q64 = temp / divisor64;
+            self.set_high64(q64);
+
+            // Calculate the "remainder"
+            temp = ((temp - q64 * divisor64) << 32) | (self.data[0] as u64);
+            if temp == 0 {
+                return 0;
+            }
+            let q32 = (temp / divisor64) as u32;
+            self.data[0] = q32;
+            ((temp as u32).wrapping_sub(q32.wrapping_mul(divisor))) as u32
+        } else {
+            // Super easy divisor
+            let low64 = self.low64();
+            if low64 == 0 {
+                // Nothing to do
+                return 0;
+            }
+            // Do the calc
+            let quotient = low64 / divisor64;
+            self.set_low64(quotient);
+            // Remainder is the leftover that wasn't used
+            (low64.wrapping_sub(quotient.wrapping_mul(divisor64))) as u32
+        }
+    }
+
+    // Divide the number by a power constant
+    // Returns true if division was successful
+    fn div32_const(&mut self, pow: u32) -> bool {
+        let pow64 = pow as u64;
+        let high64 = self.high64();
+        let lo = self.data[0] as u64;
+        let div64: u64 = high64 / pow64;
+        let div = ((((high64 - div64 * pow64) << 32) + lo) / pow64) as u32;
+        if self.data[0] == div.wrapping_mul(pow) {
+            self.set_high64(div64);
+            self.data[0] = div;
+            true
+        } else {
+            false
+        }
+    }
+}
+
+impl Buf16 {
+    // Does a partial divide with a 64 bit divisor. The divisor in this case must be 64 bits
+    // otherwise various assumptions fail (e.g. 32 bit quotient).
+    // To assist, the upper 64 bits must be greater than the divisor for this to succeed.
+    // Consequently, it will return the quotient as a 32 bit number and overwrite self with the
+    // 64 bit remainder.
+    pub(super) fn partial_divide_64(&mut self, divisor: u64) -> u32 {
+        // We make this assertion here, however below we pivot based on the data
+        debug_assert!(divisor > self.mid64());
+
+        // If we have an empty high bit, then divisor must be greater than the dividend due to
+        // the assumption that the divisor REQUIRES 64 bits.
+        if self.data[2] == 0 {
+            let low64 = self.low64();
+            if low64 < divisor {
+                // We can't divide at at all so result is 0. The dividend remains untouched since
+                // the full amount is the remainder.
+                return 0;
+            }
+
+            let quotient = low64 / divisor;
+            self.set_low64(low64 - (quotient * divisor));
+            return quotient as u32;
+        }
+
+        // Do a simple check to see if the hi portion of the dividend is greater than the hi
+        // portion of the divisor.
+        let divisor_hi32 = (divisor >> 32) as u32;
+        if self.data[2] >= divisor_hi32 {
+            // We know that the divisor goes into this at MOST u32::max times.
+            // So we kick things off, with that assumption
+            let mut low64 = self.low64();
+            low64 = low64.wrapping_sub(divisor << 32).wrapping_add(divisor);
+            let mut quotient = u32::MAX;
+
+            // If we went negative then keep adding it back in
+            loop {
+                if low64 < divisor {
+                    break;
+                }
+                quotient = quotient.wrapping_sub(1);
+                low64 = low64.wrapping_add(divisor);
+            }
+            self.set_low64(low64);
+            return quotient;
+        }
+
+        let mid64 = self.mid64();
+        let divisor_hi32_64 = divisor_hi32 as u64;
+        if mid64 < divisor_hi32_64 as u64 {
+            // similar situation as above where we've got nothing left to divide
+            return 0;
+        }
+
+        let mut quotient = mid64 / divisor_hi32_64;
+        let mut remainder = self.data[0] as u64 | ((mid64 - quotient * divisor_hi32_64) << 32);
+
+        // Do quotient * lo divisor
+        let product = quotient * (divisor & 0xFFFF_FFFF);
+        remainder = remainder.wrapping_sub(product);
+
+        // Check if we've gone negative. If so, add it back
+        if remainder > product.bitxor(u64::MAX) {
+            loop {
+                quotient = quotient.wrapping_sub(1);
+                remainder = remainder.wrapping_add(divisor);
+                if remainder < divisor {
+                    break;
+                }
+            }
+        }
+
+        self.set_low64(remainder);
+        quotient as u32
+    }
+
+    // Does a partial divide with a 96 bit divisor. The divisor in this case must require 96 bits
+    // otherwise various assumptions fail (e.g. 32 bit quotient).
+    pub(super) fn partial_divide_96(&mut self, divisor: &Buf12) -> u32 {
+        let dividend = self.high64();
+        let divisor_hi = divisor.data[2];
+        if dividend < divisor_hi as u64 {
+            // Dividend is too small - entire number is remainder
+            return 0;
+        }
+
+        let mut quo = (dividend / divisor_hi as u64) as u32;
+        let mut remainder = (dividend as u32).wrapping_sub(quo.wrapping_mul(divisor_hi));
+
+        // Compute full remainder
+        let mut prod1 = quo as u64 * divisor.data[0] as u64;
+        let mut prod2 = quo as u64 * divisor.data[1] as u64;
+        prod2 += prod1 >> 32;
+        prod1 = (prod1 & 0xFFFF_FFFF) | (prod2 << 32);
+        prod2 >>= 32;
+
+        let mut num = self.low64();
+        num = num.wrapping_sub(prod1);
+        remainder = remainder.wrapping_sub(prod2 as u32);
+
+        // If there are carries make sure they are propagated
+        if num > prod1.bitxor(u64::MAX) {
+            remainder = remainder.wrapping_sub(1);
+            if remainder < (prod2 as u32).bitxor(u32::MAX) {
+                self.set_low64(num);
+                self.data[2] = remainder;
+                return quo;
+            }
+        } else if remainder <= (prod2 as u32).bitxor(u32::MAX) {
+            self.set_low64(num);
+            self.data[2] = remainder;
+            return quo;
+        }
+
+        // Remainder went negative, add divisor back until it's positive
+        prod1 = divisor.low64();
+        loop {
+            quo = quo.wrapping_sub(1);
+            num = num.wrapping_add(prod1);
+            remainder = remainder.wrapping_add(divisor_hi);
+
+            if num < prod1 {
+                // Detected carry.
+                let tmp = remainder;
+                remainder = remainder.wrapping_add(1);
+                if tmp < divisor_hi {
+                    break;
+                }
+            }
+            if remainder < divisor_hi {
+                break; // detected carry
+            }
+        }
+
+        self.set_low64(num);
+        self.data[2] = remainder;
+        quo
+    }
+}
+
+enum DivError {
+    Overflow,
+}
+
+pub(crate) fn div_impl(dividend: &Decimal, divisor: &Decimal) -> CalculationResult {
+    if divisor.is_zero() {
+        return CalculationResult::DivByZero;
+    }
+    if dividend.is_zero() {
+        return CalculationResult::Ok(Decimal::ZERO);
+    }
+    let dividend = Dec64::new(dividend);
+    let divisor = Dec64::new(divisor);
+
+    // Pre calculate the scale and the sign
+    let mut scale = (dividend.scale as i32) - (divisor.scale as i32);
+    let sign_negative = dividend.negative ^ divisor.negative;
+
+    // Set up some variables for modification throughout
+    let mut require_unscale = false;
+    let mut quotient = Buf12::from_dec64(&dividend);
+    let divisor = Buf12::from_dec64(&divisor);
+
+    // Branch depending on the complexity of the divisor
+    if divisor.data[2] | divisor.data[1] == 0 {
+        // We have a simple(r) divisor (32 bit)
+        let divisor32 = divisor.data[0];
+
+        // Remainder can only be 32 bits since the divisor is 32 bits.
+        let mut remainder = quotient.div32(divisor32);
+        let mut power_scale = 0;
+
+        // Figure out how to apply the remainder (i.e. we may have performed something like 10/3 or 8/5)
+        loop {
+            // Remainder is 0 so we have a simple situation
+            if remainder == 0 {
+                // If the scale is positive then we're actually done
+                if scale >= 0 {
+                    break;
+                }
+                power_scale = 9usize.min((-scale) as usize);
+            } else {
+                // We may need to normalize later, so set the flag appropriately
+                require_unscale = true;
+
+                // We have a remainder so we effectively want to try to adjust the quotient and add
+                // the remainder into the quotient. We do this below, however first of all we want
+                // to try to avoid overflowing so we do that check first.
+                let will_overflow = if scale == MAX_PRECISION_I32 {
+                    true
+                } else {
+                    // Figure out how much we can scale by
+                    if let Some(s) = quotient.find_scale(scale) {
+                        power_scale = s;
+                    } else {
+                        return CalculationResult::Overflow;
+                    }
+                    // If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since
+                    // we're doing nothing.
+                    power_scale == 0
+                };
+                if will_overflow {
+                    // No more scaling can be done, but remainder is non-zero so we round if necessary.
+                    let tmp = remainder << 1;
+                    let round = if tmp < remainder {
+                        // We round if we wrapped around
+                        true
+                    } else if tmp >= divisor32 {
+                        // If we're greater than the divisor (i.e. underflow)
+                        // or if there is a lo bit set, we round
+                        tmp > divisor32 || (quotient.data[0] & 0x1) > 0
+                    } else {
+                        false
+                    };
+
+                    // If we need to round, try to do so.
+                    if round {
+                        if let Ok(new_scale) = round_up(&mut quotient, scale) {
+                            scale = new_scale;
+                        } else {
+                            // Overflowed
+                            return CalculationResult::Overflow;
+                        }
+                    }
+                    break;
+                }
+            }
+
+            // Do some scaling
+            let power = POWERS_10[power_scale];
+            scale += power_scale as i32;
+            // Increase the quotient by the power that was looked up
+            let overflow = increase_scale(&mut quotient, power as u64);
+            if overflow > 0 {
+                return CalculationResult::Overflow;
+            }
+
+            let remainder_scaled = (remainder as u64) * (power as u64);
+            let remainder_quotient = (remainder_scaled / (divisor32 as u64)) as u32;
+            remainder = (remainder_scaled - remainder_quotient as u64 * divisor32 as u64) as u32;
+            if let Err(DivError::Overflow) = quotient.add32(remainder_quotient) {
+                if let Ok(adj) = unscale_from_overflow(&mut quotient, scale, remainder != 0) {
+                    scale = adj;
+                } else {
+                    // Still overflowing
+                    return CalculationResult::Overflow;
+                }
+                break;
+            }
+        }
+    } else {
+        // We have a divisor greater than 32 bits. Both of these share some quick calculation wins
+        // so we'll do those before branching into separate logic.
+        // The win we can do is shifting the bits to the left as much as possible. We do this to both
+        // the dividend and the divisor to ensure the quotient is not changed.
+        // As a simple contrived example: if we have 4 / 2 then we could bit shift all the way to the
+        // left meaning that the lo portion would have nothing inside of it. Of course, shifting these
+        // left one has the same result (8/4) etc.
+        // The advantage is that we may be able to write off lower portions of the number making things
+        // easier.
+        let mut power_scale = if divisor.data[2] == 0 {
+            divisor.data[1].leading_zeros()
+        } else {
+            divisor.data[2].leading_zeros()
+        } as usize;
+        let mut remainder = Buf16::zero();
+        remainder.set_low64(quotient.low64() << power_scale);
+        let tmp_high = ((quotient.data[1] as u64) + ((quotient.data[2] as u64) << 32)) >> (32 - power_scale);
+        remainder.set_high64(tmp_high);
+
+        // Work out the divisor after it's shifted
+        let divisor64 = divisor.low64() << power_scale;
+        // Check if the divisor is 64 bit or the full 96 bits
+        if divisor.data[2] == 0 {
+            // It's 64 bits
+            quotient.data[2] = 0;
+
+            // Calc mid/lo by shifting accordingly
+            let rem_lo = remainder.data[0];
+            remainder.data[0] = remainder.data[1];
+            remainder.data[1] = remainder.data[2];
+            remainder.data[2] = remainder.data[3];
+            quotient.data[1] = remainder.partial_divide_64(divisor64);
+
+            remainder.data[2] = remainder.data[1];
+            remainder.data[1] = remainder.data[0];
+            remainder.data[0] = rem_lo;
+            quotient.data[0] = remainder.partial_divide_64(divisor64);
+
+            loop {
+                let rem_low64 = remainder.low64();
+                if rem_low64 == 0 {
+                    // If the scale is positive then we're actually done
+                    if scale >= 0 {
+                        break;
+                    }
+                    power_scale = 9usize.min((-scale) as usize);
+                } else {
+                    // We may need to normalize later, so set the flag appropriately
+                    require_unscale = true;
+
+                    // We have a remainder so we effectively want to try to adjust the quotient and add
+                    // the remainder into the quotient. We do this below, however first of all we want
+                    // to try to avoid overflowing so we do that check first.
+                    let will_overflow = if scale == MAX_PRECISION_I32 {
+                        true
+                    } else {
+                        // Figure out how much we can scale by
+                        if let Some(s) = quotient.find_scale(scale) {
+                            power_scale = s;
+                        } else {
+                            return CalculationResult::Overflow;
+                        }
+                        // If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since
+                        // we're doing nothing.
+                        power_scale == 0
+                    };
+                    if will_overflow {
+                        // No more scaling can be done, but remainder is non-zero so we round if necessary.
+                        let mut tmp = remainder.low64();
+                        let round = if (tmp as i64) < 0 {
+                            // We round if we wrapped around
+                            true
+                        } else {
+                            tmp <<= 1;
+                            if tmp > divisor64 {
+                                true
+                            } else {
+                                tmp == divisor64 && quotient.data[0] & 0x1 != 0
+                            }
+                        };
+
+                        // If we need to round, try to do so.
+                        if round {
+                            if let Ok(new_scale) = round_up(&mut quotient, scale) {
+                                scale = new_scale;
+                            } else {
+                                // Overflowed
+                                return CalculationResult::Overflow;
+                            }
+                        }
+                        break;
+                    }
+                }
+
+                // Do some scaling
+                let power = POWERS_10[power_scale];
+                scale += power_scale as i32;
+
+                // Increase the quotient by the power that was looked up
+                let overflow = increase_scale(&mut quotient, power as u64);
+                if overflow > 0 {
+                    return CalculationResult::Overflow;
+                }
+                increase_scale64(&mut remainder, power as u64);
+
+                let tmp = remainder.partial_divide_64(divisor64);
+                if let Err(DivError::Overflow) = quotient.add32(tmp) {
+                    if let Ok(adj) = unscale_from_overflow(&mut quotient, scale, remainder.low64() != 0) {
+                        scale = adj;
+                    } else {
+                        // Still overflowing
+                        return CalculationResult::Overflow;
+                    }
+                    break;
+                }
+            }
+        } else {
+            // It's 96 bits
+            // Start by finishing the shift left
+            let divisor_mid = divisor.data[1];
+            let divisor_hi = divisor.data[2];
+            let mut divisor = divisor;
+            divisor.set_low64(divisor64);
+            divisor.data[2] = ((divisor_mid as u64 + ((divisor_hi as u64) << 32)) >> (32 - power_scale)) as u32;
+
+            let quo = remainder.partial_divide_96(&divisor);
+            quotient.set_low64(quo as u64);
+            quotient.data[2] = 0;
+
+            loop {
+                let mut rem_low64 = remainder.low64();
+                if rem_low64 == 0 && remainder.data[2] == 0 {
+                    // If the scale is positive then we're actually done
+                    if scale >= 0 {
+                        break;
+                    }
+                    power_scale = 9usize.min((-scale) as usize);
+                } else {
+                    // We may need to normalize later, so set the flag appropriately
+                    require_unscale = true;
+
+                    // We have a remainder so we effectively want to try to adjust the quotient and add
+                    // the remainder into the quotient. We do this below, however first of all we want
+                    // to try to avoid overflowing so we do that check first.
+                    let will_overflow = if scale == MAX_PRECISION_I32 {
+                        true
+                    } else {
+                        // Figure out how much we can scale by
+                        if let Some(s) = quotient.find_scale(scale) {
+                            power_scale = s;
+                        } else {
+                            return CalculationResult::Overflow;
+                        }
+                        // If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since
+                        // we're doing nothing.
+                        power_scale == 0
+                    };
+                    if will_overflow {
+                        // No more scaling can be done, but remainder is non-zero so we round if necessary.
+                        let round = if (remainder.data[2] as i32) < 0 {
+                            // We round if we wrapped around
+                            true
+                        } else {
+                            let tmp = remainder.data[1] >> 31;
+                            rem_low64 <<= 1;
+                            remainder.set_low64(rem_low64);
+                            remainder.data[2] = (&remainder.data[2] << 1) + tmp;
+
+                            match remainder.data[2].cmp(&divisor.data[2]) {
+                                Ordering::Less => false,
+                                Ordering::Equal => {
+                                    let divisor_low64 = divisor.low64();
+                                    if rem_low64 > divisor_low64 {
+                                        true
+                                    } else {
+                                        rem_low64 == divisor_low64 && (quotient.data[0] & 1) != 0
+                                    }
+                                }
+                                Ordering::Greater => true,
+                            }
+                        };
+
+                        // If we need to round, try to do so.
+                        if round {
+                            if let Ok(new_scale) = round_up(&mut quotient, scale) {
+                                scale = new_scale;
+                            } else {
+                                // Overflowed
+                                return CalculationResult::Overflow;
+                            }
+                        }
+                        break;
+                    }
+                }
+
+                // Do some scaling
+                let power = POWERS_10[power_scale];
+                scale += power_scale as i32;
+
+                // Increase the quotient by the power that was looked up
+                let overflow = increase_scale(&mut quotient, power as u64);
+                if overflow > 0 {
+                    return CalculationResult::Overflow;
+                }
+                let mut tmp_remainder = Buf12 {
+                    data: [remainder.data[0], remainder.data[1], remainder.data[2]],
+                };
+                let overflow = increase_scale(&mut tmp_remainder, power as u64);
+                remainder.data[0] = tmp_remainder.data[0];
+                remainder.data[1] = tmp_remainder.data[1];
+                remainder.data[2] = tmp_remainder.data[2];
+                remainder.data[3] = overflow;
+
+                let tmp = remainder.partial_divide_96(&divisor);
+                if let Err(DivError::Overflow) = quotient.add32(tmp) {
+                    if let Ok(adj) =
+                        unscale_from_overflow(&mut quotient, scale, (remainder.low64() | remainder.high64()) != 0)
+                    {
+                        scale = adj;
+                    } else {
+                        // Still overflowing
+                        return CalculationResult::Overflow;
+                    }
+                    break;
+                }
+            }
+        }
+    }
+    if require_unscale {
+        scale = unscale(&mut quotient, scale);
+    }
+    CalculationResult::Ok(Decimal::from_parts(
+        quotient.data[0],
+        quotient.data[1],
+        quotient.data[2],
+        sign_negative,
+        scale as u32,
+    ))
+}
+
+// Multiply num by power (multiple of 10). Power must be 32 bits.
+// Returns the overflow, if any
+fn increase_scale(num: &mut Buf12, power: u64) -> u32 {
+    let mut tmp = (num.data[0] as u64) * power;
+    num.data[0] = tmp as u32;
+    tmp >>= 32;
+    tmp += (num.data[1] as u64) * power;
+    num.data[1] = tmp as u32;
+    tmp >>= 32;
+    tmp += (num.data[2] as u64) * power;
+    num.data[2] = tmp as u32;
+    (tmp >> 32) as u32
+}
+
+// Multiply num by power (multiple of 10). Power must be 32 bits.
+fn increase_scale64(num: &mut Buf16, power: u64) {
+    let mut tmp = (num.data[0] as u64) * power;
+    num.data[0] = tmp as u32;
+    tmp >>= 32;
+    tmp += (num.data[1] as u64) * power;
+    num.set_mid64(tmp)
+}
+
+// Adjust the number to deal with an overflow. This function follows being scaled up (i.e. multiplied
+// by 10, so this effectively tries to reverse that by dividing by 10 then feeding in the high bit
+// to undo the overflow and rounding instead.
+// Returns the updated scale.
+fn unscale_from_overflow(num: &mut Buf12, scale: i32, sticky: bool) -> Result<i32, DivError> {
+    let scale = scale - 1;
+    if scale < 0 {
+        return Err(DivError::Overflow);
+    }
+
+    // This function is called when the hi portion has "overflowed" upon adding one and has wrapped
+    // back around to 0. Consequently, we need to "feed" that back in, but also rescaling down
+    // to reverse out the overflow.
+    const HIGH_BIT: u64 = 0x1_0000_0000;
+    num.data[2] = (HIGH_BIT / 10) as u32;
+
+    // Calc the mid
+    let mut tmp = ((HIGH_BIT % 10) << 32) + (num.data[1] as u64);
+    let mut val = (tmp / 10) as u32;
+    num.data[1] = val;
+
+    // Calc the lo using a similar method
+    tmp = ((tmp - (val as u64) * 10) << 32) + (num.data[0] as u64);
+    val = (tmp / 10) as u32;
+    num.data[0] = val;
+
+    // Work out the remainder, and round if we have one (since it doesn't fit)
+    let remainder = (tmp - (val as u64) * 10) as u32;
+    if remainder > 5 || (remainder == 5 && (sticky || num.data[0] & 0x1 > 0)) {
+        let _ = num.add32(1);
+    }
+    Ok(scale)
+}
+
+#[inline]
+fn round_up(num: &mut Buf12, scale: i32) -> Result<i32, DivError> {
+    let low64 = num.low64().wrapping_add(1);
+    num.set_low64(low64);
+    if low64 != 0 {
+        return Ok(scale);
+    }
+    let hi = num.data[2].wrapping_add(1);
+    num.data[2] = hi;
+    if hi != 0 {
+        return Ok(scale);
+    }
+    unscale_from_overflow(num, scale, true)
+}
+
+fn unscale(num: &mut Buf12, scale: i32) -> i32 {
+    // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10 we can extract.
+    // We use this as a quick test on whether to try a given power.
+    let mut scale = scale;
+    while num.data[0] == 0 && scale >= 8 && num.div32_const(100000000) {
+        scale -= 8;
+    }
+
+    if (num.data[0] & 0xF) == 0 && scale >= 4 && num.div32_const(10000) {
+        scale -= 4;
+    }
+
+    if (num.data[0] & 0x3) == 0 && scale >= 2 && num.div32_const(100) {
+        scale -= 2;
+    }
+
+    if (num.data[0] & 0x1) == 0 && scale >= 1 && num.div32_const(10) {
+        scale -= 1;
+    }
+    scale
+}
diff --git a/third_party/rust/rust_decimal/src/ops/legacy.rs b/third_party/rust/rust_decimal/src/ops/legacy.rs
new file mode 100644
index 0000000000..49f39814f8
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/legacy.rs
@@ -0,0 +1,843 @@
+use crate::{
+    constants::{MAX_PRECISION_U32, POWERS_10, U32_MASK},
+    decimal::{CalculationResult, Decimal},
+    ops::array::{
+        add_by_internal, cmp_internal, div_by_u32, is_all_zero, mul_by_u32, mul_part, rescale_internal, shl1_internal,
+    },
+};
+
+use core::cmp::Ordering;
+use num_traits::Zero;
+
+pub(crate) fn add_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+    // Convert to the same scale
+    let mut my = d1.mantissa_array3();
+    let mut my_scale = d1.scale();
+    let mut ot = d2.mantissa_array3();
+    let mut other_scale = d2.scale();
+    rescale_to_maximum_scale(&mut my, &mut my_scale, &mut ot, &mut other_scale);
+    let mut final_scale = my_scale.max(other_scale);
+
+    // Add the items together
+    let my_negative = d1.is_sign_negative();
+    let other_negative = d2.is_sign_negative();
+    let mut negative = false;
+    let carry;
+    if !(my_negative ^ other_negative) {
+        negative = my_negative;
+        carry = add_by_internal3(&mut my, &ot);
+    } else {
+        let cmp = cmp_internal(&my, &ot);
+        // -x + y
+        // if x > y then it's negative (i.e. -2 + 1)
+        match cmp {
+            Ordering::Less => {
+                negative = other_negative;
+                sub_by_internal3(&mut ot, &my);
+                my[0] = ot[0];
+                my[1] = ot[1];
+                my[2] = ot[2];
+            }
+            Ordering::Greater => {
+                negative = my_negative;
+                sub_by_internal3(&mut my, &ot);
+            }
+            Ordering::Equal => {
+                // -2 + 2
+                my[0] = 0;
+                my[1] = 0;
+                my[2] = 0;
+            }
+        }
+        carry = 0;
+    }
+
+    // If we have a carry we underflowed.
+    // We need to lose some significant digits (if possible)
+    if carry > 0 {
+        if final_scale == 0 {
+            return CalculationResult::Overflow;
+        }
+
+        // Copy it over to a temp array for modification
+        let mut temp = [my[0], my[1], my[2], carry];
+        while final_scale > 0 && temp[3] != 0 {
+            div_by_u32(&mut temp, 10);
+            final_scale -= 1;
+        }
+
+        // If we still have a carry bit then we overflowed
+        if temp[3] > 0 {
+            return CalculationResult::Overflow;
+        }
+
+        // Copy it back - we're done
+        my[0] = temp[0];
+        my[1] = temp[1];
+        my[2] = temp[2];
+    }
+
+    CalculationResult::Ok(Decimal::from_parts(my[0], my[1], my[2], negative, final_scale))
+}
+
+pub(crate) fn sub_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+    add_impl(d1, &(-*d2))
+}
+
+pub(crate) fn div_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+    if d2.is_zero() {
+        return CalculationResult::DivByZero;
+    }
+    if d1.is_zero() {
+        return CalculationResult::Ok(Decimal::zero());
+    }
+
+    let dividend = d1.mantissa_array3();
+    let divisor = d2.mantissa_array3();
+    let mut quotient = [0u32, 0u32, 0u32];
+    let mut quotient_scale: i32 = d1.scale() as i32 - d2.scale() as i32;
+
+    // We supply an extra overflow word for each of the dividend and the remainder
+    let mut working_quotient = [dividend[0], dividend[1], dividend[2], 0u32];
+    let mut working_remainder = [0u32, 0u32, 0u32, 0u32];
+    let mut working_scale = quotient_scale;
+    let mut remainder_scale = quotient_scale;
+    let mut underflow;
+
+    loop {
+        div_internal(&mut working_quotient, &mut working_remainder, &divisor);
+        underflow = add_with_scale_internal(
+            &mut quotient,
+            &mut quotient_scale,
+            &mut working_quotient,
+            &mut working_scale,
+        );
+
+        // Multiply the remainder by 10
+        let mut overflow = 0;
+        for part in working_remainder.iter_mut() {
+            let (lo, hi) = mul_part(*part, 10, overflow);
+            *part = lo;
+            overflow = hi;
+        }
+        // Copy temp remainder into the temp quotient section
+        working_quotient.copy_from_slice(&working_remainder);
+
+        remainder_scale += 1;
+        working_scale = remainder_scale;
+
+        if underflow || is_all_zero(&working_remainder) {
+            break;
+        }
+    }
+
+    // If we have a really big number try to adjust the scale to 0
+    while quotient_scale < 0 {
+        copy_array_diff_lengths(&mut working_quotient, &quotient);
+        working_quotient[3] = 0;
+        working_remainder.iter_mut().for_each(|x| *x = 0);
+
+        // Mul 10
+        let mut overflow = 0;
+        for part in &mut working_quotient {
+            let (lo, hi) = mul_part(*part, 10, overflow);
+            *part = lo;
+            overflow = hi;
+        }
+        for part in &mut working_remainder {
+            let (lo, hi) = mul_part(*part, 10, overflow);
+            *part = lo;
+            overflow = hi;
+        }
+        if working_quotient[3] == 0 && is_all_zero(&working_remainder) {
+            quotient_scale += 1;
+            quotient[0] = working_quotient[0];
+            quotient[1] = working_quotient[1];
+            quotient[2] = working_quotient[2];
+        } else {
+            // Overflow
+            return CalculationResult::Overflow;
+        }
+    }
+
+    if quotient_scale > 255 {
+        quotient[0] = 0;
+        quotient[1] = 0;
+        quotient[2] = 0;
+        quotient_scale = 0;
+    }
+
+    let mut quotient_negative = d1.is_sign_negative() ^ d2.is_sign_negative();
+
+    // Check for underflow
+    let mut final_scale: u32 = quotient_scale as u32;
+    if final_scale > MAX_PRECISION_U32 {
+        let mut remainder = 0;
+
+        // Division underflowed. We must remove some significant digits over using
+        //  an invalid scale.
+        while final_scale > MAX_PRECISION_U32 && !is_all_zero(&quotient) {
+            remainder = div_by_u32(&mut quotient, 10);
+            final_scale -= 1;
+        }
+        if final_scale > MAX_PRECISION_U32 {
+            // Result underflowed so set to zero
+            final_scale = 0;
+            quotient_negative = false;
+        } else if remainder >= 5 {
+            for part in &mut quotient {
+                if remainder == 0 {
+                    break;
+                }
+                let digit: u64 = u64::from(*part) + 1;
+                remainder = if digit > 0xFFFF_FFFF { 1 } else { 0 };
+                *part = (digit & 0xFFFF_FFFF) as u32;
+            }
+        }
+    }
+
+    CalculationResult::Ok(Decimal::from_parts(
+        quotient[0],
+        quotient[1],
+        quotient[2],
+        quotient_negative,
+        final_scale,
+    ))
+}
+
+pub(crate) fn mul_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+    // Early exit if either is zero
+    if d1.is_zero() || d2.is_zero() {
+        return CalculationResult::Ok(Decimal::zero());
+    }
+
+    // We are only resulting in a negative if we have mismatched signs
+    let negative = d1.is_sign_negative() ^ d2.is_sign_negative();
+
+    // We get the scale of the result by adding the operands. This may be too big, however
+    //  we'll correct later
+    let mut final_scale = d1.scale() + d2.scale();
+
+    // First of all, if ONLY the lo parts of both numbers is filled
+    // then we can simply do a standard 64 bit calculation. It's a minor
+    // optimization however prevents the need for long form multiplication
+    let my = d1.mantissa_array3();
+    let ot = d2.mantissa_array3();
+    if my[1] == 0 && my[2] == 0 && ot[1] == 0 && ot[2] == 0 {
+        // Simply multiplication
+        let mut u64_result = u64_to_array(u64::from(my[0]) * u64::from(ot[0]));
+
+        // If we're above max precision then this is a very small number
+        if final_scale > MAX_PRECISION_U32 {
+            final_scale -= MAX_PRECISION_U32;
+
+            // If the number is above 19 then this will equate to zero.
+            // This is because the max value in 64 bits is 1.84E19
+            if final_scale > 19 {
+                return CalculationResult::Ok(Decimal::zero());
+            }
+
+            let mut rem_lo = 0;
+            let mut power;
+            if final_scale > 9 {
+                // Since 10^10 doesn't fit into u32, we divide by 10^10/4
+                // and multiply the next divisor by 4.
+                rem_lo = div_by_u32(&mut u64_result, 2_500_000_000);
+                power = POWERS_10[final_scale as usize - 10] << 2;
+            } else {
+                power = POWERS_10[final_scale as usize];
+            }
+
+            // Divide fits in 32 bits
+            let rem_hi = div_by_u32(&mut u64_result, power);
+
+            // Round the result. Since the divisor is a power of 10
+            // we check to see if the remainder is >= 1/2 divisor
+            power >>= 1;
+            if rem_hi >= power && (rem_hi > power || (rem_lo | (u64_result[0] & 0x1)) != 0) {
+                u64_result[0] += 1;
+            }
+
+            final_scale = MAX_PRECISION_U32;
+        }
+        return CalculationResult::Ok(Decimal::from_parts(
+            u64_result[0],
+            u64_result[1],
+            0,
+            negative,
+            final_scale,
+        ));
+    }
+
+    // We're using some of the high bits, so we essentially perform
+    // long form multiplication. We compute the 9 partial products
+    // into a 192 bit result array.
+    //
+    //                     [my-h][my-m][my-l]
+    //                  x  [ot-h][ot-m][ot-l]
+    // --------------------------------------
+    // 1.                        [r-hi][r-lo] my-l * ot-l [0, 0]
+    // 2.                  [r-hi][r-lo]       my-l * ot-m [0, 1]
+    // 3.                  [r-hi][r-lo]       my-m * ot-l [1, 0]
+    // 4.            [r-hi][r-lo]             my-m * ot-m [1, 1]
+    // 5.            [r-hi][r-lo]             my-l * ot-h [0, 2]
+    // 6.            [r-hi][r-lo]             my-h * ot-l [2, 0]
+    // 7.      [r-hi][r-lo]                   my-m * ot-h [1, 2]
+    // 8.      [r-hi][r-lo]                   my-h * ot-m [2, 1]
+    // 9.[r-hi][r-lo]                         my-h * ot-h [2, 2]
+    let mut product = [0u32, 0u32, 0u32, 0u32, 0u32, 0u32];
+
+    // We can perform a minor short circuit here. If the
+    // high portions are both 0 then we can skip portions 5-9
+    let to = if my[2] == 0 && ot[2] == 0 { 2 } else { 3 };
+
+    for (my_index, my_item) in my.iter().enumerate().take(to) {
+        for (ot_index, ot_item) in ot.iter().enumerate().take(to) {
+            let (mut rlo, mut rhi) = mul_part(*my_item, *ot_item, 0);
+
+            // Get the index for the lo portion of the product
+            for prod in product.iter_mut().skip(my_index + ot_index) {
+                let (res, overflow) = add_part(rlo, *prod);
+                *prod = res;
+
+                // If we have something in rhi from before then promote that
+                if rhi > 0 {
+                    // If we overflowed in the last add, add that with rhi
+                    if overflow > 0 {
+                        let (nlo, nhi) = add_part(rhi, overflow);
+                        rlo = nlo;
+                        rhi = nhi;
+                    } else {
+                        rlo = rhi;
+                        rhi = 0;
+                    }
+                } else if overflow > 0 {
+                    rlo = overflow;
+                    rhi = 0;
+                } else {
+                    break;
+                }
+
+                // If nothing to do next round then break out
+                if rlo == 0 {
+                    break;
+                }
+            }
+        }
+    }
+
+    // If our result has used up the high portion of the product
+    // then we either have an overflow or an underflow situation
+    // Overflow will occur if we can't scale it back, whereas underflow
+    // with kick in rounding
+    let mut remainder = 0;
+    while final_scale > 0 && (product[3] != 0 || product[4] != 0 || product[5] != 0) {
+        remainder = div_by_u32(&mut product, 10u32);
+        final_scale -= 1;
+    }
+
+    // Round up the carry if we need to
+    if remainder >= 5 {
+        for part in product.iter_mut() {
+            if remainder == 0 {
+                break;
+            }
+            let digit: u64 = u64::from(*part) + 1;
+            remainder = if digit > 0xFFFF_FFFF { 1 } else { 0 };
+            *part = (digit & 0xFFFF_FFFF) as u32;
+        }
+    }
+
+    // If we're still above max precision then we'll try again to
+    // reduce precision - we may be dealing with a limit of "0"
+    if final_scale > MAX_PRECISION_U32 {
+        // We're in an underflow situation
+        // The easiest way to remove precision is to divide off the result
+        while final_scale > MAX_PRECISION_U32 && !is_all_zero(&product) {
+            div_by_u32(&mut product, 10);
+            final_scale -= 1;
+        }
+        // If we're still at limit then we can't represent any
+        // significant decimal digits and will return an integer only
+        // Can also be invoked while representing 0.
+        if final_scale > MAX_PRECISION_U32 {
+            final_scale = 0;
+        }
+    } else if !(product[3] == 0 && product[4] == 0 && product[5] == 0) {
+        // We're in an overflow situation - we're within our precision bounds
+        // but still have bits in overflow
+        return CalculationResult::Overflow;
+    }
+
+    CalculationResult::Ok(Decimal::from_parts(
+        product[0],
+        product[1],
+        product[2],
+        negative,
+        final_scale,
+    ))
+}
+
+pub(crate) fn rem_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+    if d2.is_zero() {
+        return CalculationResult::DivByZero;
+    }
+    if d1.is_zero() {
+        return CalculationResult::Ok(Decimal::zero());
+    }
+
+    // Rescale so comparable
+    let initial_scale = d1.scale();
+    let mut quotient = d1.mantissa_array3();
+    let mut quotient_scale = initial_scale;
+    let mut divisor = d2.mantissa_array3();
+    let mut divisor_scale = d2.scale();
+    rescale_to_maximum_scale(&mut quotient, &mut quotient_scale, &mut divisor, &mut divisor_scale);
+
+    // Working is the remainder + the quotient
+    // We use an aligned array since we'll be using it a lot.
+    let mut working_quotient = [quotient[0], quotient[1], quotient[2], 0u32];
+    let mut working_remainder = [0u32, 0u32, 0u32, 0u32];
+    div_internal(&mut working_quotient, &mut working_remainder, &divisor);
+
+    // Round if necessary. This is for semantic correctness, but could feasibly be removed for
+    // performance improvements.
+    if quotient_scale > initial_scale {
+        let mut working = [
+            working_remainder[0],
+            working_remainder[1],
+            working_remainder[2],
+            working_remainder[3],
+        ];
+        while quotient_scale > initial_scale {
+            if div_by_u32(&mut working, 10) > 0 {
+                break;
+            }
+            quotient_scale -= 1;
+            working_remainder.copy_from_slice(&working);
+        }
+    }
+
+    CalculationResult::Ok(Decimal::from_parts(
+        working_remainder[0],
+        working_remainder[1],
+        working_remainder[2],
+        d1.is_sign_negative(),
+        quotient_scale,
+    ))
+}
+
+pub(crate) fn cmp_impl(d1: &Decimal, d2: &Decimal) -> Ordering {
+    // Quick exit if major differences
+    if d1.is_zero() && d2.is_zero() {
+        return Ordering::Equal;
+    }
+    let self_negative = d1.is_sign_negative();
+    let other_negative = d2.is_sign_negative();
+    if self_negative && !other_negative {
+        return Ordering::Less;
+    } else if !self_negative && other_negative {
+        return Ordering::Greater;
+    }
+
+    // If we have 1.23 and 1.2345 then we have
+    //  123 scale 2 and 12345 scale 4
+    //  We need to convert the first to
+    //  12300 scale 4 so we can compare equally
+    let left: &Decimal;
+    let right: &Decimal;
+    if self_negative && other_negative {
+        // Both are negative, so reverse cmp
+        left = d2;
+        right = d1;
+    } else {
+        left = d1;
+        right = d2;
+    }
+    let mut left_scale = left.scale();
+    let mut right_scale = right.scale();
+    let mut left_raw = left.mantissa_array3();
+    let mut right_raw = right.mantissa_array3();
+
+    if left_scale == right_scale {
+        // Fast path for same scale
+        if left_raw[2] != right_raw[2] {
+            return left_raw[2].cmp(&right_raw[2]);
+        }
+        if left_raw[1] != right_raw[1] {
+            return left_raw[1].cmp(&right_raw[1]);
+        }
+        return left_raw[0].cmp(&right_raw[0]);
+    }
+
+    // Rescale and compare
+    rescale_to_maximum_scale(&mut left_raw, &mut left_scale, &mut right_raw, &mut right_scale);
+    cmp_internal(&left_raw, &right_raw)
+}
+
+#[inline]
+fn add_part(left: u32, right: u32) -> (u32, u32) {
+    let added = u64::from(left) + u64::from(right);
+    ((added & U32_MASK) as u32, (added >> 32 & U32_MASK) as u32)
+}
+
+#[inline(always)]
+fn sub_by_internal3(value: &mut [u32; 3], by: &[u32; 3]) {
+    let mut overflow = 0;
+    let vl = value.len();
+    for i in 0..vl {
+        let part = (0x1_0000_0000u64 + u64::from(value[i])) - (u64::from(by[i]) + overflow);
+        value[i] = part as u32;
+        overflow = 1 - (part >> 32);
+    }
+}
+
+fn div_internal(quotient: &mut [u32; 4], remainder: &mut [u32; 4], divisor: &[u32; 3]) {
+    // There are a couple of ways to do division on binary numbers:
+    //   1. Using long division
+    //   2. Using the complement method
+    // ref: http://paulmason.me/dividing-binary-numbers-part-2/
+    // The complement method basically keeps trying to subtract the
+    // divisor until it can't anymore and placing the rest in remainder.
+    let mut complement = [
+        divisor[0] ^ 0xFFFF_FFFF,
+        divisor[1] ^ 0xFFFF_FFFF,
+        divisor[2] ^ 0xFFFF_FFFF,
+        0xFFFF_FFFF,
+    ];
+
+    // Add one onto the complement
+    add_one_internal4(&mut complement);
+
+    // Make sure the remainder is 0
+    remainder.iter_mut().for_each(|x| *x = 0);
+
+    // If we have nothing in our hi+ block then shift over till we do
+    let mut blocks_to_process = 0;
+    while blocks_to_process < 4 && quotient[3] == 0 {
+        // memcpy would be useful here
+        quotient[3] = quotient[2];
+        quotient[2] = quotient[1];
+        quotient[1] = quotient[0];
+        quotient[0] = 0;
+
+        // Increment the counter
+        blocks_to_process += 1;
+    }
+
+    // Let's try and do the addition...
+    let mut block = blocks_to_process << 5;
+    let mut working = [0u32, 0u32, 0u32, 0u32];
+    while block < 128 {
+        // << 1 for quotient AND remainder. Moving the carry from the quotient to the bottom of the
+        // remainder.
+        let carry = shl1_internal(quotient, 0);
+        shl1_internal(remainder, carry);
+
+        // Copy the remainder of working into sub
+        working.copy_from_slice(remainder);
+
+        // Add the remainder with the complement
+        add_by_internal(&mut working, &complement);
+
+        // Check for the significant bit - move over to the quotient
+        // as necessary
+        if (working[3] & 0x8000_0000) == 0 {
+            remainder.copy_from_slice(&working);
+            quotient[0] |= 1;
+        }
+
+        // Increment our pointer
+        block += 1;
+    }
+}
+
+#[inline]
+fn copy_array_diff_lengths(into: &mut [u32], from: &[u32]) {
+    for i in 0..into.len() {
+        if i >= from.len() {
+            break;
+        }
+        into[i] = from[i];
+    }
+}
+
+#[inline]
+fn add_one_internal4(value: &mut [u32; 4]) -> u32 {
+    let mut carry: u64 = 1; // Start with one, since adding one
+    let mut sum: u64;
+    for i in value.iter_mut() {
+        sum = (*i as u64) + carry;
+        *i = (sum & U32_MASK) as u32;
+        carry = sum >> 32;
+    }
+
+    carry as u32
+}
+
+#[inline]
+fn add_by_internal3(value: &mut [u32; 3], by: &[u32; 3]) -> u32 {
+    let mut carry: u32 = 0;
+    let bl = by.len();
+    for i in 0..bl {
+        let res1 = value[i].overflowing_add(by[i]);
+        let res2 = res1.0.overflowing_add(carry);
+        value[i] = res2.0;
+        carry = (res1.1 | res2.1) as u32;
+    }
+    carry
+}
+
+#[inline]
+const fn u64_to_array(value: u64) -> [u32; 2] {
+    [(value & U32_MASK) as u32, (value >> 32 & U32_MASK) as u32]
+}
+
+fn add_with_scale_internal(
+    quotient: &mut [u32; 3],
+    quotient_scale: &mut i32,
+    working_quotient: &mut [u32; 4],
+    working_scale: &mut i32,
+) -> bool {
+    // Add quotient and the working (i.e. quotient = quotient + working)
+    if is_all_zero(quotient) {
+        // Quotient is zero so we can just copy the working quotient in directly
+        // First, make sure they are both 96 bit.
+        while working_quotient[3] != 0 {
+            div_by_u32(working_quotient, 10);
+            *working_scale -= 1;
+        }
+        copy_array_diff_lengths(quotient, working_quotient);
+        *quotient_scale = *working_scale;
+        return false;
+    }
+
+    if is_all_zero(working_quotient) {
+        return false;
+    }
+
+    // We have ensured that our working is not zero so we should do the addition
+
+    // If our two quotients are different then
+    // try to scale down the one with the bigger scale
+    let mut temp3 = [0u32, 0u32, 0u32];
+    let mut temp4 = [0u32, 0u32, 0u32, 0u32];
+    if *quotient_scale != *working_scale {
+        // TODO: Remove necessity for temp (without performance impact)
+        fn div_by_10<const N: usize>(target: &mut [u32], temp: &mut [u32; N], scale: &mut i32, target_scale: i32) {
+            // Copy to the temp array
+            temp.copy_from_slice(target);
+            // divide by 10 until target scale is reached
+            while *scale > target_scale {
+                let remainder = div_by_u32(temp, 10);
+                if remainder == 0 {
+                    *scale -= 1;
+                    target.copy_from_slice(temp);
+                } else {
+                    break;
+                }
+            }
+        }
+
+        if *quotient_scale < *working_scale {
+            div_by_10(working_quotient, &mut temp4, working_scale, *quotient_scale);
+        } else {
+            div_by_10(quotient, &mut temp3, quotient_scale, *working_scale);
+        }
+    }
+
+    // If our two quotients are still different then
+    // try to scale up the smaller scale
+    if *quotient_scale != *working_scale {
+        // TODO: Remove necessity for temp (without performance impact)
+        fn mul_by_10(target: &mut [u32], temp: &mut [u32], scale: &mut i32, target_scale: i32) {
+            temp.copy_from_slice(target);
+            let mut overflow = 0;
+            // Multiply by 10 until target scale reached or overflow
+            while *scale < target_scale && overflow == 0 {
+                overflow = mul_by_u32(temp, 10);
+                if overflow == 0 {
+                    // Still no overflow
+                    *scale += 1;
+                    target.copy_from_slice(temp);
+                }
+            }
+        }
+
+        if *quotient_scale > *working_scale {
+            mul_by_10(working_quotient, &mut temp4, working_scale, *quotient_scale);
+        } else {
+            mul_by_10(quotient, &mut temp3, quotient_scale, *working_scale);
+        }
+    }
+
+    // If our two quotients are still different then
+    // try to scale down the one with the bigger scale
+    // (ultimately losing significant digits)
+    if *quotient_scale != *working_scale {
+        // TODO: Remove necessity for temp (without performance impact)
+        fn div_by_10_lossy<const N: usize>(
+            target: &mut [u32],
+            temp: &mut [u32; N],
+            scale: &mut i32,
+            target_scale: i32,
+        ) {
+            temp.copy_from_slice(target);
+            // divide by 10 until target scale is reached
+            while *scale > target_scale {
+                div_by_u32(temp, 10);
+                *scale -= 1;
+                target.copy_from_slice(temp);
+            }
+        }
+        if *quotient_scale < *working_scale {
+            div_by_10_lossy(working_quotient, &mut temp4, working_scale, *quotient_scale);
+        } else {
+            div_by_10_lossy(quotient, &mut temp3, quotient_scale, *working_scale);
+        }
+    }
+
+    // If quotient or working are zero we have an underflow condition
+    if is_all_zero(quotient) || is_all_zero(working_quotient) {
+        // Underflow
+        return true;
+    } else {
+        // Both numbers have the same scale and can be added.
+        // We just need to know whether we can fit them in
+        let mut underflow = false;
+        let mut temp = [0u32, 0u32, 0u32];
+        while !underflow {
+            temp.copy_from_slice(quotient);
+
+            // Add the working quotient
+            let overflow = add_by_internal(&mut temp, working_quotient);
+            if overflow == 0 {
+                // addition was successful
+                quotient.copy_from_slice(&temp);
+                break;
+            } else {
+                // addition overflowed - remove significant digits and try again
+                div_by_u32(quotient, 10);
+                *quotient_scale -= 1;
+                div_by_u32(working_quotient, 10);
+                *working_scale -= 1;
+                // Check for underflow
+                underflow = is_all_zero(quotient) || is_all_zero(working_quotient);
+            }
+        }
+        if underflow {
+            return true;
+        }
+    }
+    false
+}
+
+/// Rescales the given decimals to equivalent scales.
+/// It will firstly try to scale both the left and the right side to
+/// the maximum scale of left/right. If it is unable to do that it
+/// will try to reduce the accuracy of the other argument.
+/// e.g. with 1.23 and 2.345 it'll rescale the first arg to 1.230
+#[inline(always)]
+fn rescale_to_maximum_scale(left: &mut [u32; 3], left_scale: &mut u32, right: &mut [u32; 3], right_scale: &mut u32) {
+    if left_scale == right_scale {
+        // Nothing to do
+        return;
+    }
+
+    if is_all_zero(left) {
+        *left_scale = *right_scale;
+        return;
+    } else if is_all_zero(right) {
+        *right_scale = *left_scale;
+        return;
+    }
+
+    if left_scale > right_scale {
+        rescale_internal(right, right_scale, *left_scale);
+        if right_scale != left_scale {
+            rescale_internal(left, left_scale, *right_scale);
+        }
+    } else {
+        rescale_internal(left, left_scale, *right_scale);
+        if right_scale != left_scale {
+            rescale_internal(right, right_scale, *left_scale);
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    // Tests on private methods.
+    //
+    // All public tests should go under `tests/`.
+
+    use super::*;
+    use crate::prelude::*;
+
+    #[test]
+    fn it_can_rescale_to_maximum_scale() {
+        fn extract(value: &str) -> ([u32; 3], u32) {
+            let v = Decimal::from_str(value).unwrap();
+            (v.mantissa_array3(), v.scale())
+        }
+
+        let tests = &[
+            ("1", "1", "1", "1"),
+            ("1", "1.0", "1.0", "1.0"),
+            ("1", "1.00000", "1.00000", "1.00000"),
+            ("1", "1.0000000000", "1.0000000000", "1.0000000000"),
+            (
+                "1",
+                "1.00000000000000000000",
+                "1.00000000000000000000",
+                "1.00000000000000000000",
+            ),
+            ("1.1", "1.1", "1.1", "1.1"),
+            ("1.1", "1.10000", "1.10000", "1.10000"),
+            ("1.1", "1.1000000000", "1.1000000000", "1.1000000000"),
+            (
+                "1.1",
+                "1.10000000000000000000",
+                "1.10000000000000000000",
+                "1.10000000000000000000",
+            ),
+            (
+                "0.6386554621848739495798319328",
+                "11.815126050420168067226890757",
+                "0.638655462184873949579831933",
+                "11.815126050420168067226890757",
+            ),
+            (
+                "0.0872727272727272727272727272", // Scale 28
+                "843.65000000",                   // Scale 8
+                "0.0872727272727272727272727",    // 25
+                "843.6500000000000000000000000",  // 25
+            ),
+        ];
+
+        for &(left_raw, right_raw, expected_left, expected_right) in tests {
+            // Left = the value to rescale
+            // Right = the new scale we're scaling to
+            // Expected = the expected left value after rescale
+            let (expected_left, expected_lscale) = extract(expected_left);
+            let (expected_right, expected_rscale) = extract(expected_right);
+
+            let (mut left, mut left_scale) = extract(left_raw);
+            let (mut right, mut right_scale) = extract(right_raw);
+            rescale_to_maximum_scale(&mut left, &mut left_scale, &mut right, &mut right_scale);
+            assert_eq!(left, expected_left);
+            assert_eq!(left_scale, expected_lscale);
+            assert_eq!(right, expected_right);
+            assert_eq!(right_scale, expected_rscale);
+
+            // Also test the transitive case
+            let (mut left, mut left_scale) = extract(left_raw);
+            let (mut right, mut right_scale) = extract(right_raw);
+            rescale_to_maximum_scale(&mut right, &mut right_scale, &mut left, &mut left_scale);
+            assert_eq!(left, expected_left);
+            assert_eq!(left_scale, expected_lscale);
+            assert_eq!(right, expected_right);
+            assert_eq!(right_scale, expected_rscale);
+        }
+    }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/mul.rs b/third_party/rust/rust_decimal/src/ops/mul.rs
new file mode 100644
index 0000000000..b36729599d
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/mul.rs
@@ -0,0 +1,168 @@
+use crate::constants::{BIG_POWERS_10, MAX_I64_SCALE, MAX_PRECISION_U32, U32_MAX};
+use crate::decimal::{CalculationResult, Decimal};
+use crate::ops::common::Buf24;
+
+pub(crate) fn mul_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+    if d1.is_zero() || d2.is_zero() {
+        // We should think about this - does zero need to maintain precision? This treats it like
+        // an absolute which I think is ok, especially since we have is_zero() functions etc.
+        return CalculationResult::Ok(Decimal::ZERO);
+    }
+
+    let mut scale = d1.scale() + d2.scale();
+    let negative = d1.is_sign_negative() ^ d2.is_sign_negative();
+    let mut product = Buf24::zero();
+
+    // See if we can optimize this calculation depending on whether the hi bits are set
+    if d1.hi() | d1.mid() == 0 {
+        if d2.hi() | d2.mid() == 0 {
+            // We're multiplying two 32 bit integers, so we can take some liberties to optimize this.
+            let mut low64 = d1.lo() as u64 * d2.lo() as u64;
+            if scale > MAX_PRECISION_U32 {
+                // We've exceeded maximum scale so we need to start reducing the precision (aka
+                // rounding) until we have something that fits.
+                // If we're too big then we effectively round to zero.
+                if scale > MAX_PRECISION_U32 + MAX_I64_SCALE {
+                    return CalculationResult::Ok(Decimal::ZERO);
+                }
+
+                scale -= MAX_PRECISION_U32 + 1;
+                let mut power = BIG_POWERS_10[scale as usize];
+
+                let tmp = low64 / power;
+                let remainder = low64 - tmp * power;
+                low64 = tmp;
+
+                // Round the result. Since the divisor was a power of 10, it's always even.
+                power >>= 1;
+                if remainder >= power && (remainder > power || (low64 as u32 & 1) > 0) {
+                    low64 += 1;
+                }
+
+                scale = MAX_PRECISION_U32;
+            }
+
+            // Early exit
+            return CalculationResult::Ok(Decimal::from_parts(
+                low64 as u32,
+                (low64 >> 32) as u32,
+                0,
+                negative,
+                scale,
+            ));
+        }
+
+        // We know that the left hand side is just 32 bits but the right hand side is either
+        // 64 or 96 bits.
+        mul_by_32bit_lhs(d1.lo() as u64, d2, &mut product);
+    } else if d2.mid() | d2.hi() == 0 {
+        // We know that the right hand side is just 32 bits.
+        mul_by_32bit_lhs(d2.lo() as u64, d1, &mut product);
+    } else {
+        // We know we're not dealing with simple 32 bit operands on either side.
+        // We compute and accumulate the 9 partial products using long multiplication
+
+        // 1: ll * rl
+        let mut tmp = d1.lo() as u64 * d2.lo() as u64;
+        product.data[0] = tmp as u32;
+
+        // 2: ll * rm
+        let mut tmp2 = (d1.lo() as u64 * d2.mid() as u64).wrapping_add(tmp >> 32);
+
+        // 3: lm * rl
+        tmp = d1.mid() as u64 * d2.lo() as u64;
+        tmp = tmp.wrapping_add(tmp2);
+        product.data[1] = tmp as u32;
+
+        // Detect if carry happened from the wrapping add
+        if tmp < tmp2 {
+            tmp2 = (tmp >> 32) | (1u64 << 32);
+        } else {
+            tmp2 = tmp >> 32;
+        }
+
+        // 4: lm * rm
+        tmp = (d1.mid() as u64 * d2.mid() as u64) + tmp2;
+
+        // If the high bit isn't set then we can stop here. Otherwise, we need to continue calculating
+        // using the high bits.
+        if (d1.hi() | d2.hi()) > 0 {
+            // 5. ll * rh
+            tmp2 = d1.lo() as u64 * d2.hi() as u64;
+            tmp = tmp.wrapping_add(tmp2);
+            // Detect if we carried
+            let mut tmp3 = if tmp < tmp2 { 1 } else { 0 };
+
+            // 6. lh * rl
+            tmp2 = d1.hi() as u64 * d2.lo() as u64;
+            tmp = tmp.wrapping_add(tmp2);
+            product.data[2] = tmp as u32;
+            // Detect if we carried
+            if tmp < tmp2 {
+                tmp3 += 1;
+            }
+            tmp2 = (tmp3 << 32) | (tmp >> 32);
+
+            // 7. lm * rh
+            tmp = d1.mid() as u64 * d2.hi() as u64;
+            tmp = tmp.wrapping_add(tmp2);
+            // Check for carry
+            tmp3 = if tmp < tmp2 { 1 } else { 0 };
+
+            // 8. lh * rm
+            tmp2 = d1.hi() as u64 * d2.mid() as u64;
+            tmp = tmp.wrapping_add(tmp2);
+            product.data[3] = tmp as u32;
+            // Check for carry
+            if tmp < tmp2 {
+                tmp3 += 1;
+            }
+            tmp = (tmp3 << 32) | (tmp >> 32);
+
+            // 9. lh * rh
+            product.set_high64(d1.hi() as u64 * d2.hi() as u64 + tmp);
+        } else {
+            product.set_mid64(tmp);
+        }
+    }
+
+    // We may want to "rescale". This is the case if the mantissa is > 96 bits or if the scale
+    // exceeds the maximum precision.
+    let upper_word = product.upper_word();
+    if upper_word > 2 || scale > MAX_PRECISION_U32 {
+        scale = if let Some(new_scale) = product.rescale(upper_word, scale) {
+            new_scale
+        } else {
+            return CalculationResult::Overflow;
+        }
+    }
+
+    CalculationResult::Ok(Decimal::from_parts(
+        product.data[0],
+        product.data[1],
+        product.data[2],
+        negative,
+        scale,
+    ))
+}
+
+#[inline(always)]
+fn mul_by_32bit_lhs(d1: u64, d2: &Decimal, product: &mut Buf24) {
+    let mut tmp = d1 * d2.lo() as u64;
+    product.data[0] = tmp as u32;
+    tmp = (d1 * d2.mid() as u64).wrapping_add(tmp >> 32);
+    product.data[1] = tmp as u32;
+    tmp >>= 32;
+
+    // If we're multiplying by a 96 bit integer then continue the calculation
+    if d2.hi() > 0 {
+        tmp = tmp.wrapping_add(d1 * d2.hi() as u64);
+        if tmp > U32_MAX {
+            product.set_mid64(tmp);
+        } else {
+            product.data[2] = tmp as u32;
+        }
+    } else {
+        product.data[2] = tmp as u32;
+    }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/rem.rs b/third_party/rust/rust_decimal/src/ops/rem.rs
new file mode 100644
index 0000000000..a79334e04b
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/rem.rs
@@ -0,0 +1,285 @@
+use crate::constants::{MAX_I32_SCALE, MAX_PRECISION_I32, POWERS_10};
+use crate::decimal::{CalculationResult, Decimal};
+use crate::ops::common::{Buf12, Buf16, Buf24, Dec64};
+
+pub(crate) fn rem_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+    if d2.is_zero() {
+        return CalculationResult::DivByZero;
+    }
+    if d1.is_zero() {
+        return CalculationResult::Ok(Decimal::ZERO);
+    }
+
+    // We handle the structs a bit different here. Firstly, we ignore both the sign/scale of d2.
+    // This is because during a remainder operation we do not care about the sign of the divisor
+    // and only concern ourselves with that of the dividend.
+    let mut d1 = Dec64::new(d1);
+    let d2_scale = d2.scale();
+    let mut d2 = Buf12::from_decimal(d2);
+
+    let cmp = crate::ops::cmp::cmp_internal(
+        &d1,
+        &Dec64 {
+            negative: d1.negative,
+            scale: d2_scale,
+            hi: d2.hi(),
+            low64: d2.low64(),
+        },
+    );
+    match cmp {
+        core::cmp::Ordering::Equal => {
+            // Same numbers meaning that remainder is zero
+            return CalculationResult::Ok(Decimal::ZERO);
+        }
+        core::cmp::Ordering::Less => {
+            // d1 < d2, e.g. 1/2. This means that the result is the value of d1
+            return CalculationResult::Ok(d1.to_decimal());
+        }
+        core::cmp::Ordering::Greater => {}
+    }
+
+    // At this point we know that the dividend > divisor and that they are both non-zero.
+    let mut scale = d1.scale as i32 - d2_scale as i32;
+    if scale > 0 {
+        // Scale up the divisor
+        loop {
+            let power = if scale >= MAX_I32_SCALE {
+                POWERS_10[9]
+            } else {
+                POWERS_10[scale as usize]
+            } as u64;
+
+            let mut tmp = d2.lo() as u64 * power;
+            d2.set_lo(tmp as u32);
+            tmp >>= 32;
+            tmp = tmp.wrapping_add((d2.mid() as u64 + ((d2.hi() as u64) << 32)) * power);
+            d2.set_mid(tmp as u32);
+            d2.set_hi((tmp >> 32) as u32);
+
+            // Keep scaling if there is more to go
+            scale -= MAX_I32_SCALE;
+            if scale <= 0 {
+                break;
+            }
+        }
+        scale = 0;
+    }
+
+    loop {
+        // If the dividend is smaller than the divisor then try to scale that up first
+        if scale < 0 {
+            let mut quotient = Buf12 {
+                data: [d1.lo(), d1.mid(), d1.hi],
+            };
+            loop {
+                // Figure out how much we can scale by
+                let power_scale;
+                if let Some(u) = quotient.find_scale(MAX_PRECISION_I32 + scale) {
+                    if u >= POWERS_10.len() {
+                        power_scale = 9;
+                    } else {
+                        power_scale = u;
+                    }
+                } else {
+                    return CalculationResult::Overflow;
+                };
+                if power_scale == 0 {
+                    break;
+                }
+                let power = POWERS_10[power_scale] as u64;
+                scale += power_scale as i32;
+
+                let mut tmp = quotient.data[0] as u64 * power;
+                quotient.data[0] = tmp as u32;
+                tmp >>= 32;
+                quotient.set_high64(tmp.wrapping_add(quotient.high64().wrapping_mul(power)));
+                if power_scale != 9 {
+                    break;
+                }
+                if scale >= 0 {
+                    break;
+                }
+            }
+            d1.low64 = quotient.low64();
+            d1.hi = quotient.data[2];
+            d1.scale = d2_scale;
+        }
+
+        // if the high portion is empty then return the modulus of the bottom portion
+        if d1.hi == 0 {
+            d1.low64 %= d2.low64();
+            return CalculationResult::Ok(d1.to_decimal());
+        } else if (d2.mid() | d2.hi()) == 0 {
+            let mut tmp = d1.high64();
+            tmp = ((tmp % d2.lo() as u64) << 32) | (d1.lo() as u64);
+            d1.low64 = tmp % d2.lo() as u64;
+            d1.hi = 0;
+        } else {
+            // Divisor is > 32 bits
+            return rem_full(&d1, &d2, scale);
+        }
+
+        if scale >= 0 {
+            break;
+        }
+    }
+
+    CalculationResult::Ok(d1.to_decimal())
+}
+
+fn rem_full(d1: &Dec64, d2: &Buf12, scale: i32) -> CalculationResult {
+    let mut scale = scale;
+
+    // First normalize the divisor
+    let shift = if d2.hi() == 0 {
+        d2.mid().leading_zeros()
+    } else {
+        d2.hi().leading_zeros()
+    };
+
+    let mut buffer = Buf24::zero();
+    let mut overflow = 0u32;
+    buffer.set_low64(d1.low64 << shift);
+    buffer.set_mid64(((d1.mid() as u64).wrapping_add((d1.hi as u64) << 32)) >> (32 - shift));
+    let mut upper = 3; // We start at 3 due to bit shifting
+
+    while scale < 0 {
+        let power = if -scale >= MAX_I32_SCALE {
+            POWERS_10[9]
+        } else {
+            POWERS_10[-scale as usize]
+        } as u64;
+        let mut tmp64 = buffer.data[0] as u64 * power;
+        buffer.data[0] = tmp64 as u32;
+
+        for (index, part) in buffer.data.iter_mut().enumerate().skip(1) {
+            if index > upper {
+                break;
+            }
+            tmp64 >>= 32;
+            tmp64 = tmp64.wrapping_add((*part as u64).wrapping_mul(power));
+            *part = tmp64 as u32;
+        }
+        // If we have overflow then also process that
+        if upper == 6 {
+            tmp64 >>= 32;
+            tmp64 = tmp64.wrapping_add((overflow as u64).wrapping_mul(power));
+            overflow = tmp64 as u32;
+        }
+
+        // Make sure the high bit is not set
+        if tmp64 > 0x7FFF_FFFF {
+            upper += 1;
+            if upper > 5 {
+                overflow = (tmp64 >> 32) as u32;
+            } else {
+                buffer.data[upper] = (tmp64 >> 32) as u32;
+            }
+        }
+        scale += MAX_I32_SCALE;
+    }
+
+    // TODO: Optimize slice logic
+
+    let mut tmp = Buf16::zero();
+    let divisor = d2.low64() << shift;
+    if d2.hi() == 0 {
+        // Do some division
+        if upper == 6 {
+            upper -= 1;
+
+            tmp.data = [buffer.data[4], buffer.data[5], overflow, 0];
+            tmp.partial_divide_64(divisor);
+            buffer.data[4] = tmp.data[0];
+            buffer.data[5] = tmp.data[1];
+        }
+        if upper == 5 {
+            upper -= 1;
+            tmp.data = [buffer.data[3], buffer.data[4], buffer.data[5], 0];
+            tmp.partial_divide_64(divisor);
+            buffer.data[3] = tmp.data[0];
+            buffer.data[4] = tmp.data[1];
+            buffer.data[5] = tmp.data[2];
+        }
+        if upper == 4 {
+            tmp.data = [buffer.data[2], buffer.data[3], buffer.data[4], 0];
+            tmp.partial_divide_64(divisor);
+            buffer.data[2] = tmp.data[0];
+            buffer.data[3] = tmp.data[1];
+            buffer.data[4] = tmp.data[2];
+        }
+
+        tmp.data = [buffer.data[1], buffer.data[2], buffer.data[3], 0];
+        tmp.partial_divide_64(divisor);
+        buffer.data[1] = tmp.data[0];
+        buffer.data[2] = tmp.data[1];
+        buffer.data[3] = tmp.data[2];
+
+        tmp.data = [buffer.data[0], buffer.data[1], buffer.data[2], 0];
+        tmp.partial_divide_64(divisor);
+        buffer.data[0] = tmp.data[0];
+        buffer.data[1] = tmp.data[1];
+        buffer.data[2] = tmp.data[2];
+
+        let low64 = buffer.low64() >> shift;
+        CalculationResult::Ok(Decimal::from_parts(
+            low64 as u32,
+            (low64 >> 32) as u32,
+            0,
+            d1.negative,
+            d1.scale,
+        ))
+    } else {
+        let divisor_low64 = divisor;
+        let divisor = Buf12 {
+            data: [
+                divisor_low64 as u32,
+                (divisor_low64 >> 32) as u32,
+                (((d2.mid() as u64) + ((d2.hi() as u64) << 32)) >> (32 - shift)) as u32,
+            ],
+        };
+
+        // Do some division
+        if upper == 6 {
+            upper -= 1;
+            tmp.data = [buffer.data[3], buffer.data[4], buffer.data[5], overflow];
+            tmp.partial_divide_96(&divisor);
+            buffer.data[3] = tmp.data[0];
+            buffer.data[4] = tmp.data[1];
+            buffer.data[5] = tmp.data[2];
+        }
+        if upper == 5 {
+            upper -= 1;
+            tmp.data = [buffer.data[2], buffer.data[3], buffer.data[4], buffer.data[5]];
+            tmp.partial_divide_96(&divisor);
+            buffer.data[2] = tmp.data[0];
+            buffer.data[3] = tmp.data[1];
+            buffer.data[4] = tmp.data[2];
+            buffer.data[5] = tmp.data[3];
+        }
+        if upper == 4 {
+            tmp.data = [buffer.data[1], buffer.data[2], buffer.data[3], buffer.data[4]];
+            tmp.partial_divide_96(&divisor);
+            buffer.data[1] = tmp.data[0];
+            buffer.data[2] = tmp.data[1];
+            buffer.data[3] = tmp.data[2];
+            buffer.data[4] = tmp.data[3];
+        }
+
+        tmp.data = [buffer.data[0], buffer.data[1], buffer.data[2], buffer.data[3]];
+        tmp.partial_divide_96(&divisor);
+        buffer.data[0] = tmp.data[0];
+        buffer.data[1] = tmp.data[1];
+        buffer.data[2] = tmp.data[2];
+        buffer.data[3] = tmp.data[3];
+
+        let low64 = (buffer.low64() >> shift) + ((buffer.data[2] as u64) << (32 - shift) << 32);
+        CalculationResult::Ok(Decimal::from_parts(
+            low64 as u32,
+            (low64 >> 32) as u32,
+            buffer.data[2] >> shift,
+            d1.negative,
+            d1.scale,
+        ))
+    }
+}