summaryrefslogtreecommitdiffstats
path: root/third_party/rust/rust_decimal/src/ops
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/rust_decimal/src/ops')
-rw-r--r--third_party/rust/rust_decimal/src/ops/add.rs382
-rw-r--r--third_party/rust/rust_decimal/src/ops/array.rs381
-rw-r--r--third_party/rust/rust_decimal/src/ops/cmp.rs101
-rw-r--r--third_party/rust/rust_decimal/src/ops/common.rs455
-rw-r--r--third_party/rust/rust_decimal/src/ops/div.rs658
-rw-r--r--third_party/rust/rust_decimal/src/ops/legacy.rs843
-rw-r--r--third_party/rust/rust_decimal/src/ops/mul.rs168
-rw-r--r--third_party/rust/rust_decimal/src/ops/rem.rs285
8 files changed, 3273 insertions, 0 deletions
diff --git a/third_party/rust/rust_decimal/src/ops/add.rs b/third_party/rust/rust_decimal/src/ops/add.rs
new file mode 100644
index 0000000000..52ba675f23
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/add.rs
@@ -0,0 +1,382 @@
+use crate::constants::{MAX_I32_SCALE, POWERS_10, SCALE_MASK, SCALE_SHIFT, SIGN_MASK, U32_MASK, U32_MAX};
+use crate::decimal::{CalculationResult, Decimal};
+use crate::ops::common::{Buf24, Dec64};
+
+pub(crate) fn add_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+ add_sub_internal(d1, d2, false)
+}
+
+pub(crate) fn sub_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+ add_sub_internal(d1, d2, true)
+}
+
+#[inline]
+fn add_sub_internal(d1: &Decimal, d2: &Decimal, subtract: bool) -> CalculationResult {
+ if d1.is_zero() {
+ // 0 - x or 0 + x
+ let mut result = *d2;
+ if subtract && !d2.is_zero() {
+ result.set_sign_negative(d2.is_sign_positive());
+ }
+ return CalculationResult::Ok(result);
+ }
+ if d2.is_zero() {
+ // x - 0 or x + 0
+ return CalculationResult::Ok(*d1);
+ }
+
+ // Work out whether we need to rescale and/or if it's a subtract still given the signs of the
+ // numbers.
+ let flags = d1.flags() ^ d2.flags();
+ let subtract = subtract ^ ((flags & SIGN_MASK) != 0);
+ let rescale = (flags & SCALE_MASK) > 0;
+
+ // We optimize towards using 32 bit logic as much as possible. It's noticeably faster at
+ // scale, even on 64 bit machines
+ if d1.mid() | d1.hi() == 0 && d2.mid() | d2.hi() == 0 {
+ // We'll try to rescale, however we may end up with 64 bit (or more) numbers
+ // If we do, we'll choose a different flow than fast_add
+ if rescale {
+ // This is less optimized if we scale to a 64 bit integer. We can add some further logic
+ // here later on.
+ let rescale_factor = ((d2.flags() & SCALE_MASK) as i32 - (d1.flags() & SCALE_MASK) as i32) >> SCALE_SHIFT;
+ if rescale_factor < 0 {
+ // We try to rescale the rhs
+ if let Some(rescaled) = rescale32(d2.lo(), -rescale_factor) {
+ return fast_add(d1.lo(), rescaled, d1.flags(), subtract);
+ }
+ } else {
+ // We try to rescale the lhs
+ if let Some(rescaled) = rescale32(d1.lo(), rescale_factor) {
+ return fast_add(
+ rescaled,
+ d2.lo(),
+ (d2.flags() & SCALE_MASK) | (d1.flags() & SIGN_MASK),
+ subtract,
+ );
+ }
+ }
+ } else {
+ return fast_add(d1.lo(), d2.lo(), d1.flags(), subtract);
+ }
+ }
+
+ // Continue on with the slower 64 bit method
+ let d1 = Dec64::new(d1);
+ let d2 = Dec64::new(d2);
+
+ // If we're not the same scale then make sure we're there first before starting addition
+ if rescale {
+ let rescale_factor = d2.scale as i32 - d1.scale as i32;
+ if rescale_factor < 0 {
+ let negative = subtract ^ d1.negative;
+ let scale = d1.scale;
+ unaligned_add(d2, d1, negative, scale, -rescale_factor, subtract)
+ } else {
+ let negative = d1.negative;
+ let scale = d2.scale;
+ unaligned_add(d1, d2, negative, scale, rescale_factor, subtract)
+ }
+ } else {
+ let neg = d1.negative;
+ let scale = d1.scale;
+ aligned_add(d1, d2, neg, scale, subtract)
+ }
+}
+
+#[inline(always)]
+fn rescale32(num: u32, rescale_factor: i32) -> Option<u32> {
+ if rescale_factor > MAX_I32_SCALE {
+ return None;
+ }
+ num.checked_mul(POWERS_10[rescale_factor as usize])
+}
+
+fn fast_add(lo1: u32, lo2: u32, flags: u32, subtract: bool) -> CalculationResult {
+ if subtract {
+ // Sub can't overflow because we're ensuring the bigger number always subtracts the smaller number
+ if lo1 < lo2 {
+ return CalculationResult::Ok(Decimal::from_parts_raw(lo2 - lo1, 0, 0, flags ^ SIGN_MASK));
+ }
+ return CalculationResult::Ok(Decimal::from_parts_raw(lo1 - lo2, 0, 0, flags));
+ }
+ // Add can overflow however, so we check for that explicitly
+ let lo = lo1.wrapping_add(lo2);
+ let mid = if lo < lo1 { 1 } else { 0 };
+ CalculationResult::Ok(Decimal::from_parts_raw(lo, mid, 0, flags))
+}
+
+fn aligned_add(lhs: Dec64, rhs: Dec64, negative: bool, scale: u32, subtract: bool) -> CalculationResult {
+ if subtract {
+ // Signs differ, so subtract
+ let mut result = Dec64 {
+ negative,
+ scale,
+ low64: lhs.low64.wrapping_sub(rhs.low64),
+ hi: lhs.hi.wrapping_sub(rhs.hi),
+ };
+
+ // Check for carry
+ if result.low64 > lhs.low64 {
+ result.hi = result.hi.wrapping_sub(1);
+ if result.hi >= lhs.hi {
+ flip_sign(&mut result);
+ }
+ } else if result.hi > lhs.hi {
+ flip_sign(&mut result);
+ }
+ CalculationResult::Ok(result.to_decimal())
+ } else {
+ // Signs are the same, so add
+ let mut result = Dec64 {
+ negative,
+ scale,
+ low64: lhs.low64.wrapping_add(rhs.low64),
+ hi: lhs.hi.wrapping_add(rhs.hi),
+ };
+
+ // Check for carry
+ if result.low64 < lhs.low64 {
+ result.hi = result.hi.wrapping_add(1);
+ if result.hi <= lhs.hi {
+ if result.scale == 0 {
+ return CalculationResult::Overflow;
+ }
+ reduce_scale(&mut result);
+ }
+ } else if result.hi < lhs.hi {
+ if result.scale == 0 {
+ return CalculationResult::Overflow;
+ }
+ reduce_scale(&mut result);
+ }
+ CalculationResult::Ok(result.to_decimal())
+ }
+}
+
+fn flip_sign(result: &mut Dec64) {
+ // Bitwise not the high portion
+ result.hi = !result.hi;
+ let low64 = ((result.low64 as i64).wrapping_neg()) as u64;
+ if low64 == 0 {
+ result.hi += 1;
+ }
+ result.low64 = low64;
+ result.negative = !result.negative;
+}
+
+fn reduce_scale(result: &mut Dec64) {
+ let mut low64 = result.low64;
+ let mut hi = result.hi;
+
+ let mut num = (hi as u64) + (1u64 << 32);
+ hi = (num / 10u64) as u32;
+ num = ((num - (hi as u64) * 10u64) << 32) + (low64 >> 32);
+ let mut div = (num / 10) as u32;
+ num = ((num - (div as u64) * 10u64) << 32) + (low64 & U32_MASK);
+ low64 = (div as u64) << 32;
+ div = (num / 10u64) as u32;
+ low64 = low64.wrapping_add(div as u64);
+ let remainder = (num as u32).wrapping_sub(div.wrapping_mul(10));
+
+ // Finally, round. This is optimizing slightly toward non-rounded numbers
+ if remainder >= 5 && (remainder > 5 || (low64 & 1) > 0) {
+ low64 = low64.wrapping_add(1);
+ if low64 == 0 {
+ hi += 1;
+ }
+ }
+
+ result.low64 = low64;
+ result.hi = hi;
+ result.scale -= 1;
+}
+
+// Assumption going into this function is that the LHS is the larger number and will "absorb" the
+// smaller number.
+fn unaligned_add(
+ lhs: Dec64,
+ rhs: Dec64,
+ negative: bool,
+ scale: u32,
+ rescale_factor: i32,
+ subtract: bool,
+) -> CalculationResult {
+ let mut lhs = lhs;
+ let mut low64 = lhs.low64;
+ let mut high = lhs.hi;
+ let mut rescale_factor = rescale_factor;
+
+ // First off, we see if we can get away with scaling small amounts (or none at all)
+ if high == 0 {
+ if low64 <= U32_MAX {
+ // We know it's not zero, so we start scaling.
+ // Start with reducing the scale down for the low portion
+ while low64 <= U32_MAX {
+ if rescale_factor <= MAX_I32_SCALE {
+ low64 *= POWERS_10[rescale_factor as usize] as u64;
+ lhs.low64 = low64;
+ return aligned_add(lhs, rhs, negative, scale, subtract);
+ }
+ rescale_factor -= MAX_I32_SCALE;
+ low64 *= POWERS_10[9] as u64;
+ }
+ }
+
+ // Reduce the scale for the high portion
+ while high == 0 {
+ let power = if rescale_factor <= MAX_I32_SCALE {
+ POWERS_10[rescale_factor as usize] as u64
+ } else {
+ POWERS_10[9] as u64
+ };
+
+ let tmp_low = (low64 & U32_MASK) * power;
+ let tmp_hi = (low64 >> 32) * power + (tmp_low >> 32);
+ low64 = (tmp_low & U32_MASK) + (tmp_hi << 32);
+ high = (tmp_hi >> 32) as u32;
+ rescale_factor -= MAX_I32_SCALE;
+ if rescale_factor <= 0 {
+ lhs.low64 = low64;
+ lhs.hi = high;
+ return aligned_add(lhs, rhs, negative, scale, subtract);
+ }
+ }
+ }
+
+ // See if we can get away with keeping it in the 96 bits. Otherwise, we need a buffer
+ let mut tmp64: u64;
+ loop {
+ let power = if rescale_factor <= MAX_I32_SCALE {
+ POWERS_10[rescale_factor as usize] as u64
+ } else {
+ POWERS_10[9] as u64
+ };
+
+ let tmp_low = (low64 & U32_MASK) * power;
+ tmp64 = (low64 >> 32) * power + (tmp_low >> 32);
+ low64 = (tmp_low & U32_MASK) + (tmp64 << 32);
+ tmp64 >>= 32;
+ tmp64 += (high as u64) * power;
+
+ rescale_factor -= MAX_I32_SCALE;
+
+ if tmp64 > U32_MAX {
+ break;
+ } else {
+ high = tmp64 as u32;
+ if rescale_factor <= 0 {
+ lhs.low64 = low64;
+ lhs.hi = high;
+ return aligned_add(lhs, rhs, negative, scale, subtract);
+ }
+ }
+ }
+
+ let mut buffer = Buf24::zero();
+ buffer.set_low64(low64);
+ buffer.set_mid64(tmp64);
+
+ let mut upper_word = buffer.upper_word();
+ while rescale_factor > 0 {
+ let power = if rescale_factor <= MAX_I32_SCALE {
+ POWERS_10[rescale_factor as usize] as u64
+ } else {
+ POWERS_10[9] as u64
+ };
+ tmp64 = 0;
+ for (index, part) in buffer.data.iter_mut().enumerate() {
+ tmp64 = tmp64.wrapping_add((*part as u64) * power);
+ *part = tmp64 as u32;
+ tmp64 >>= 32;
+ if index + 1 > upper_word {
+ break;
+ }
+ }
+
+ if tmp64 & U32_MASK > 0 {
+ // Extend the result
+ upper_word += 1;
+ buffer.data[upper_word] = tmp64 as u32;
+ }
+
+ rescale_factor -= MAX_I32_SCALE;
+ }
+
+ // Do the add
+ tmp64 = buffer.low64();
+ low64 = rhs.low64;
+ let tmp_hi = buffer.data[2];
+ high = rhs.hi;
+
+ if subtract {
+ low64 = tmp64.wrapping_sub(low64);
+ high = tmp_hi.wrapping_sub(high);
+
+ // Check for carry
+ let carry = if low64 > tmp64 {
+ high = high.wrapping_sub(1);
+ high >= tmp_hi
+ } else {
+ high > tmp_hi
+ };
+
+ if carry {
+ for part in buffer.data.iter_mut().skip(3) {
+ *part = part.wrapping_sub(1);
+ if *part > 0 {
+ break;
+ }
+ }
+
+ if buffer.data[upper_word] == 0 && upper_word < 3 {
+ return CalculationResult::Ok(Decimal::from_parts(
+ low64 as u32,
+ (low64 >> 32) as u32,
+ high,
+ negative,
+ scale,
+ ));
+ }
+ }
+ } else {
+ low64 = low64.wrapping_add(tmp64);
+ high = high.wrapping_add(tmp_hi);
+
+ // Check for carry
+ let carry = if low64 < tmp64 {
+ high = high.wrapping_add(1);
+ high <= tmp_hi
+ } else {
+ high < tmp_hi
+ };
+
+ if carry {
+ for (index, part) in buffer.data.iter_mut().enumerate().skip(3) {
+ if upper_word < index {
+ *part = 1;
+ upper_word = index;
+ break;
+ }
+ *part = part.wrapping_add(1);
+ if *part > 0 {
+ break;
+ }
+ }
+ }
+ }
+
+ buffer.set_low64(low64);
+ buffer.data[2] = high;
+ if let Some(scale) = buffer.rescale(upper_word, scale) {
+ CalculationResult::Ok(Decimal::from_parts(
+ buffer.data[0],
+ buffer.data[1],
+ buffer.data[2],
+ negative,
+ scale,
+ ))
+ } else {
+ CalculationResult::Overflow
+ }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/array.rs b/third_party/rust/rust_decimal/src/ops/array.rs
new file mode 100644
index 0000000000..2ab58b4dfb
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/array.rs
@@ -0,0 +1,381 @@
+use crate::constants::{MAX_PRECISION_U32, POWERS_10, U32_MASK};
+
+/// Rescales the given decimal to new scale.
+/// e.g. with 1.23 and new scale 3 rescale the value to 1.230
+#[inline(always)]
+pub(crate) fn rescale_internal(value: &mut [u32; 3], value_scale: &mut u32, new_scale: u32) {
+ if *value_scale == new_scale {
+ // Nothing to do
+ return;
+ }
+
+ if is_all_zero(value) {
+ *value_scale = new_scale.min(MAX_PRECISION_U32);
+ return;
+ }
+
+ if *value_scale > new_scale {
+ let mut diff = value_scale.wrapping_sub(new_scale);
+ // Scaling further isn't possible since we got an overflow
+ // In this case we need to reduce the accuracy of the "side to keep"
+
+ // Now do the necessary rounding
+ let mut remainder = 0;
+ while let Some(diff_minus_one) = diff.checked_sub(1) {
+ if is_all_zero(value) {
+ *value_scale = new_scale;
+ return;
+ }
+
+ diff = diff_minus_one;
+
+ // Any remainder is discarded if diff > 0 still (i.e. lost precision)
+ remainder = div_by_u32(value, 10);
+ }
+ if remainder >= 5 {
+ for part in value.iter_mut() {
+ let digit = u64::from(*part) + 1u64;
+ remainder = if digit > U32_MASK { 1 } else { 0 };
+ *part = (digit & U32_MASK) as u32;
+ if remainder == 0 {
+ break;
+ }
+ }
+ }
+ *value_scale = new_scale;
+ } else {
+ let mut diff = new_scale.wrapping_sub(*value_scale);
+ let mut working = [value[0], value[1], value[2]];
+ while let Some(diff_minus_one) = diff.checked_sub(1) {
+ if mul_by_10(&mut working) == 0 {
+ value.copy_from_slice(&working);
+ diff = diff_minus_one;
+ } else {
+ break;
+ }
+ }
+ *value_scale = new_scale.wrapping_sub(diff);
+ }
+}
+
+#[cfg(feature = "legacy-ops")]
+pub(crate) fn add_by_internal(value: &mut [u32], by: &[u32]) -> u32 {
+ let mut carry: u64 = 0;
+ let vl = value.len();
+ let bl = by.len();
+ if vl >= bl {
+ let mut sum: u64;
+ for i in 0..bl {
+ sum = u64::from(value[i]) + u64::from(by[i]) + carry;
+ value[i] = (sum & U32_MASK) as u32;
+ carry = sum >> 32;
+ }
+ if vl > bl && carry > 0 {
+ for i in value.iter_mut().skip(bl) {
+ sum = u64::from(*i) + carry;
+ *i = (sum & U32_MASK) as u32;
+ carry = sum >> 32;
+ if carry == 0 {
+ break;
+ }
+ }
+ }
+ } else if vl + 1 == bl {
+ // Overflow, by default, is anything in the high portion of by
+ let mut sum: u64;
+ for i in 0..vl {
+ sum = u64::from(value[i]) + u64::from(by[i]) + carry;
+ value[i] = (sum & U32_MASK) as u32;
+ carry = sum >> 32;
+ }
+ if by[vl] > 0 {
+ carry += u64::from(by[vl]);
+ }
+ } else {
+ panic!("Internal error: add using incompatible length arrays. {} <- {}", vl, bl);
+ }
+ carry as u32
+}
+
+pub(crate) fn add_by_internal_flattened(value: &mut [u32; 3], by: u32) -> u32 {
+ manage_add_by_internal(by, value)
+}
+
+#[inline]
+pub(crate) fn add_one_internal(value: &mut [u32; 3]) -> u32 {
+ manage_add_by_internal(1, value)
+}
+
+// `u64 as u32` are safe because of widening and 32bits shifts
+#[inline]
+pub(crate) fn manage_add_by_internal<const N: usize>(initial_carry: u32, value: &mut [u32; N]) -> u32 {
+ let mut carry = u64::from(initial_carry);
+ let mut iter = 0..value.len();
+ let mut sum = 0;
+
+ let mut sum_fn = |local_carry: &mut u64, idx| {
+ sum = u64::from(value[idx]).wrapping_add(*local_carry);
+ value[idx] = (sum & U32_MASK) as u32;
+ *local_carry = sum.wrapping_shr(32);
+ };
+
+ if let Some(idx) = iter.next() {
+ sum_fn(&mut carry, idx);
+ }
+
+ for idx in iter {
+ if carry > 0 {
+ sum_fn(&mut carry, idx);
+ }
+ }
+
+ carry as u32
+}
+
+pub(crate) fn sub_by_internal(value: &mut [u32], by: &[u32]) -> u32 {
+ // The way this works is similar to long subtraction
+ // Let's assume we're working with bytes for simplicity in an example:
+ // 257 - 8 = 249
+ // 0000_0001 0000_0001 - 0000_0000 0000_1000 = 0000_0000 1111_1001
+ // We start by doing the first byte...
+ // Overflow = 0
+ // Left = 0000_0001 (1)
+ // Right = 0000_1000 (8)
+ // Firstly, we make sure the left and right are scaled up to twice the size
+ // Left = 0000_0000 0000_0001
+ // Right = 0000_0000 0000_1000
+ // We then subtract right from left
+ // Result = Left - Right = 1111_1111 1111_1001
+ // We subtract the overflow, which in this case is 0.
+ // Because left < right (1 < 8) we invert the high part.
+ // Lo = 1111_1001
+ // Hi = 1111_1111 -> 0000_0001
+ // Lo is the field, hi is the overflow.
+ // We do the same for the second byte...
+ // Overflow = 1
+ // Left = 0000_0001
+ // Right = 0000_0000
+ // Result = Left - Right = 0000_0000 0000_0001
+ // We subtract the overflow...
+ // Result = 0000_0000 0000_0001 - 1 = 0
+ // And we invert the high, just because (invert 0 = 0).
+ // So our result is:
+ // 0000_0000 1111_1001
+ let mut overflow = 0;
+ let vl = value.len();
+ let bl = by.len();
+ for i in 0..vl {
+ if i >= bl {
+ break;
+ }
+ let (lo, hi) = sub_part(value[i], by[i], overflow);
+ value[i] = lo;
+ overflow = hi;
+ }
+ overflow
+}
+
+fn sub_part(left: u32, right: u32, overflow: u32) -> (u32, u32) {
+ let part = 0x1_0000_0000u64 + u64::from(left) - (u64::from(right) + u64::from(overflow));
+ let lo = part as u32;
+ let hi = 1 - ((part >> 32) as u32);
+ (lo, hi)
+}
+
+// Returns overflow
+#[inline]
+pub(crate) fn mul_by_10(bits: &mut [u32; 3]) -> u32 {
+ let mut overflow = 0u64;
+ for b in bits.iter_mut() {
+ let result = u64::from(*b) * 10u64 + overflow;
+ let hi = (result >> 32) & U32_MASK;
+ let lo = (result & U32_MASK) as u32;
+ *b = lo;
+ overflow = hi;
+ }
+
+ overflow as u32
+}
+
+// Returns overflow
+pub(crate) fn mul_by_u32(bits: &mut [u32], m: u32) -> u32 {
+ let mut overflow = 0;
+ for b in bits.iter_mut() {
+ let (lo, hi) = mul_part(*b, m, overflow);
+ *b = lo;
+ overflow = hi;
+ }
+ overflow
+}
+
+pub(crate) fn mul_part(left: u32, right: u32, high: u32) -> (u32, u32) {
+ let result = u64::from(left) * u64::from(right) + u64::from(high);
+ let hi = ((result >> 32) & U32_MASK) as u32;
+ let lo = (result & U32_MASK) as u32;
+ (lo, hi)
+}
+
+// Returns remainder
+pub(crate) fn div_by_u32<const N: usize>(bits: &mut [u32; N], divisor: u32) -> u32 {
+ if divisor == 0 {
+ // Divide by zero
+ panic!("Internal error: divide by zero");
+ } else if divisor == 1 {
+ // dividend remains unchanged
+ 0
+ } else {
+ let mut remainder = 0u32;
+ let divisor = u64::from(divisor);
+ for part in bits.iter_mut().rev() {
+ let temp = (u64::from(remainder) << 32) + u64::from(*part);
+ remainder = (temp % divisor) as u32;
+ *part = (temp / divisor) as u32;
+ }
+
+ remainder
+ }
+}
+
+pub(crate) fn div_by_1x(bits: &mut [u32; 3], power: usize) -> u32 {
+ let mut remainder = 0u32;
+ let divisor = POWERS_10[power] as u64;
+ let temp = ((remainder as u64) << 32) + (bits[2] as u64);
+ remainder = (temp % divisor) as u32;
+ bits[2] = (temp / divisor) as u32;
+ let temp = ((remainder as u64) << 32) + (bits[1] as u64);
+ remainder = (temp % divisor) as u32;
+ bits[1] = (temp / divisor) as u32;
+ let temp = ((remainder as u64) << 32) + (bits[0] as u64);
+ remainder = (temp % divisor) as u32;
+ bits[0] = (temp / divisor) as u32;
+ remainder
+}
+
+#[inline]
+pub(crate) fn shl1_internal(bits: &mut [u32], carry: u32) -> u32 {
+ let mut carry = carry;
+ for part in bits.iter_mut() {
+ let b = *part >> 31;
+ *part = (*part << 1) | carry;
+ carry = b;
+ }
+ carry
+}
+
+#[inline]
+pub(crate) fn cmp_internal(left: &[u32; 3], right: &[u32; 3]) -> core::cmp::Ordering {
+ let left_hi: u32 = left[2];
+ let right_hi: u32 = right[2];
+ let left_lo: u64 = u64::from(left[1]) << 32 | u64::from(left[0]);
+ let right_lo: u64 = u64::from(right[1]) << 32 | u64::from(right[0]);
+ if left_hi < right_hi || (left_hi <= right_hi && left_lo < right_lo) {
+ core::cmp::Ordering::Less
+ } else if left_hi == right_hi && left_lo == right_lo {
+ core::cmp::Ordering::Equal
+ } else {
+ core::cmp::Ordering::Greater
+ }
+}
+
+#[inline]
+pub(crate) fn is_all_zero<const N: usize>(bits: &[u32; N]) -> bool {
+ bits.iter().all(|b| *b == 0)
+}
+
+#[cfg(test)]
+mod test {
+ // Tests on private methods.
+ //
+ // All public tests should go under `tests/`.
+
+ use super::*;
+ use crate::prelude::*;
+
+ #[test]
+ fn it_can_rescale_internal() {
+ fn extract(value: &str) -> ([u32; 3], u32) {
+ let v = Decimal::from_str(value).unwrap();
+ (v.mantissa_array3(), v.scale())
+ }
+
+ let tests = &[
+ ("1", 0, "1", 0),
+ ("1", 1, "1.0", 1),
+ ("1", 5, "1.00000", 5),
+ ("1", 10, "1.0000000000", 10),
+ ("1", 20, "1.00000000000000000000", 20),
+ (
+ "0.6386554621848739495798319328",
+ 27,
+ "0.638655462184873949579831933",
+ 27,
+ ),
+ (
+ "843.65000000", // Scale 8
+ 25,
+ "843.6500000000000000000000000",
+ 25,
+ ),
+ (
+ "843.65000000", // Scale 8
+ 30,
+ "843.6500000000000000000000000",
+ 25, // Only fits 25
+ ),
+ ("0", 130, "0.000000000000000000000000000000", 28),
+ ];
+
+ for &(value_raw, new_scale, expected_value, expected_scale) in tests {
+ let (expected_value, _) = extract(expected_value);
+ let (mut value, mut value_scale) = extract(value_raw);
+ rescale_internal(&mut value, &mut value_scale, new_scale);
+ assert_eq!(value, expected_value);
+ assert_eq!(
+ value_scale, expected_scale,
+ "value: {}, requested scale: {}",
+ value_raw, new_scale
+ );
+ }
+ }
+
+ #[test]
+ fn test_shl1_internal() {
+ struct TestCase {
+ // One thing to be cautious of is that the structure of a number here for shifting left is
+ // the reverse of how you may conceive this mentally. i.e. a[2] contains the higher order
+ // bits: a[2] a[1] a[0]
+ given: [u32; 3],
+ given_carry: u32,
+ expected: [u32; 3],
+ expected_carry: u32,
+ }
+ let tests = [
+ TestCase {
+ given: [1, 0, 0],
+ given_carry: 0,
+ expected: [2, 0, 0],
+ expected_carry: 0,
+ },
+ TestCase {
+ given: [1, 0, 2147483648],
+ given_carry: 1,
+ expected: [3, 0, 0],
+ expected_carry: 1,
+ },
+ ];
+ for case in &tests {
+ let mut test = [case.given[0], case.given[1], case.given[2]];
+ let carry = shl1_internal(&mut test, case.given_carry);
+ assert_eq!(
+ test, case.expected,
+ "Bits: {:?} << 1 | {}",
+ case.given, case.given_carry
+ );
+ assert_eq!(
+ carry, case.expected_carry,
+ "Carry: {:?} << 1 | {}",
+ case.given, case.given_carry
+ )
+ }
+ }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/cmp.rs b/third_party/rust/rust_decimal/src/ops/cmp.rs
new file mode 100644
index 0000000000..636085bff5
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/cmp.rs
@@ -0,0 +1,101 @@
+use crate::constants::{MAX_I32_SCALE, POWERS_10, U32_MASK, U32_MAX};
+use crate::decimal::Decimal;
+use crate::ops::common::Dec64;
+
+use core::cmp::Ordering;
+
+pub(crate) fn cmp_impl(d1: &Decimal, d2: &Decimal) -> Ordering {
+ if d2.is_zero() {
+ return if d1.is_zero() {
+ return Ordering::Equal;
+ } else if d1.is_sign_negative() {
+ Ordering::Less
+ } else {
+ Ordering::Greater
+ };
+ }
+ if d1.is_zero() {
+ return if d2.is_sign_negative() {
+ Ordering::Greater
+ } else {
+ Ordering::Less
+ };
+ }
+ // If the sign is different, then it's an easy answer
+ if d1.is_sign_negative() != d2.is_sign_negative() {
+ return if d1.is_sign_negative() {
+ Ordering::Less
+ } else {
+ Ordering::Greater
+ };
+ }
+
+ // Otherwise, do a deep comparison
+ let d1 = Dec64::new(d1);
+ let d2 = Dec64::new(d2);
+ // We know both signs are the same here so flip it here.
+ // Negative is handled differently. i.e. 0.5 > 0.01 however -0.5 < -0.01
+ if d1.negative {
+ cmp_internal(&d2, &d1)
+ } else {
+ cmp_internal(&d1, &d2)
+ }
+}
+
+pub(in crate::ops) fn cmp_internal(d1: &Dec64, d2: &Dec64) -> Ordering {
+ // This function ignores sign
+ let mut d1_low = d1.low64;
+ let mut d1_high = d1.hi;
+ let mut d2_low = d2.low64;
+ let mut d2_high = d2.hi;
+
+ // If the scale factors aren't equal then
+ if d1.scale != d2.scale {
+ let mut diff = d2.scale as i32 - d1.scale as i32;
+ if diff < 0 {
+ diff = -diff;
+ if !rescale(&mut d2_low, &mut d2_high, diff as u32) {
+ return Ordering::Less;
+ }
+ } else if !rescale(&mut d1_low, &mut d1_high, diff as u32) {
+ return Ordering::Greater;
+ }
+ }
+
+ // They're the same scale, do a standard bitwise comparison
+ let hi_order = d1_high.cmp(&d2_high);
+ if hi_order != Ordering::Equal {
+ return hi_order;
+ }
+ d1_low.cmp(&d2_low)
+}
+
+fn rescale(low64: &mut u64, high: &mut u32, diff: u32) -> bool {
+ let mut diff = diff as i32;
+ // We need to modify d1 by 10^diff to get it to the same scale as d2
+ loop {
+ let power = if diff >= MAX_I32_SCALE {
+ POWERS_10[9]
+ } else {
+ POWERS_10[diff as usize]
+ } as u64;
+ let tmp_lo_32 = (*low64 & U32_MASK) * power;
+ let mut tmp = (*low64 >> 32) * power + (tmp_lo_32 >> 32);
+ *low64 = (tmp_lo_32 & U32_MASK) + (tmp << 32);
+ tmp >>= 32;
+ tmp = tmp.wrapping_add((*high as u64) * power);
+ // Indicates > 96 bits
+ if tmp > U32_MAX {
+ return false;
+ }
+ *high = tmp as u32;
+
+ // Keep scaling if there is more to go
+ diff -= MAX_I32_SCALE;
+ if diff <= 0 {
+ break;
+ }
+ }
+
+ true
+}
diff --git a/third_party/rust/rust_decimal/src/ops/common.rs b/third_party/rust/rust_decimal/src/ops/common.rs
new file mode 100644
index 0000000000..c29362d824
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/common.rs
@@ -0,0 +1,455 @@
+use crate::constants::{MAX_I32_SCALE, MAX_PRECISION_I32, POWERS_10};
+use crate::Decimal;
+
+#[derive(Debug)]
+pub struct Buf12 {
+ pub data: [u32; 3],
+}
+
+impl Buf12 {
+ pub(super) const fn from_dec64(value: &Dec64) -> Self {
+ Buf12 {
+ data: [value.low64 as u32, (value.low64 >> 32) as u32, value.hi],
+ }
+ }
+
+ pub(super) const fn from_decimal(value: &Decimal) -> Self {
+ Buf12 {
+ data: value.mantissa_array3(),
+ }
+ }
+
+ #[inline(always)]
+ pub const fn lo(&self) -> u32 {
+ self.data[0]
+ }
+ #[inline(always)]
+ pub const fn mid(&self) -> u32 {
+ self.data[1]
+ }
+ #[inline(always)]
+ pub const fn hi(&self) -> u32 {
+ self.data[2]
+ }
+ #[inline(always)]
+ pub fn set_lo(&mut self, value: u32) {
+ self.data[0] = value;
+ }
+ #[inline(always)]
+ pub fn set_mid(&mut self, value: u32) {
+ self.data[1] = value;
+ }
+ #[inline(always)]
+ pub fn set_hi(&mut self, value: u32) {
+ self.data[2] = value;
+ }
+
+ #[inline(always)]
+ pub const fn low64(&self) -> u64 {
+ ((self.data[1] as u64) << 32) | (self.data[0] as u64)
+ }
+
+ #[inline(always)]
+ pub fn set_low64(&mut self, value: u64) {
+ self.data[1] = (value >> 32) as u32;
+ self.data[0] = value as u32;
+ }
+
+ #[inline(always)]
+ pub const fn high64(&self) -> u64 {
+ ((self.data[2] as u64) << 32) | (self.data[1] as u64)
+ }
+
+ #[inline(always)]
+ pub fn set_high64(&mut self, value: u64) {
+ self.data[2] = (value >> 32) as u32;
+ self.data[1] = value as u32;
+ }
+
+ // Determine the maximum value of x that ensures that the quotient when scaled up by 10^x
+ // still fits in 96 bits. Ultimately, we want to make scale positive - if we can't then
+ // we're going to overflow. Because x is ultimately used to lookup inside the POWERS array, it
+ // must be a valid value 0 <= x <= 9
+ pub fn find_scale(&self, scale: i32) -> Option<usize> {
+ const OVERFLOW_MAX_9_HI: u32 = 4;
+ const OVERFLOW_MAX_8_HI: u32 = 42;
+ const OVERFLOW_MAX_7_HI: u32 = 429;
+ const OVERFLOW_MAX_6_HI: u32 = 4294;
+ const OVERFLOW_MAX_5_HI: u32 = 42949;
+ const OVERFLOW_MAX_4_HI: u32 = 429496;
+ const OVERFLOW_MAX_3_HI: u32 = 4294967;
+ const OVERFLOW_MAX_2_HI: u32 = 42949672;
+ const OVERFLOW_MAX_1_HI: u32 = 429496729;
+ const OVERFLOW_MAX_9_LOW64: u64 = 5441186219426131129;
+
+ let hi = self.data[2];
+ let low64 = self.low64();
+ let mut x = 0usize;
+
+ // Quick check to stop us from trying to scale any more.
+ //
+ if hi > OVERFLOW_MAX_1_HI {
+ // If it's less than 0, which it probably is - overflow. We can't do anything.
+ if scale < 0 {
+ return None;
+ }
+ return Some(x);
+ }
+
+ if scale > MAX_PRECISION_I32 - 9 {
+ // We can't scale by 10^9 without exceeding the max scale factor.
+ // Instead, we'll try to scale by the most that we can and see if that works.
+ // This is safe to do due to the check above. e.g. scale > 19 in the above, so it will
+ // evaluate to 9 or less below.
+ x = (MAX_PRECISION_I32 - scale) as usize;
+ if hi < POWER_OVERFLOW_VALUES[x - 1].data[2] {
+ if x as i32 + scale < 0 {
+ // We still overflow
+ return None;
+ }
+ return Some(x);
+ }
+ } else if hi < OVERFLOW_MAX_9_HI || hi == OVERFLOW_MAX_9_HI && low64 <= OVERFLOW_MAX_9_LOW64 {
+ return Some(9);
+ }
+
+ // Do a binary search to find a power to scale by that is less than 9
+ x = if hi > OVERFLOW_MAX_5_HI {
+ if hi > OVERFLOW_MAX_3_HI {
+ if hi > OVERFLOW_MAX_2_HI {
+ 1
+ } else {
+ 2
+ }
+ } else if hi > OVERFLOW_MAX_4_HI {
+ 3
+ } else {
+ 4
+ }
+ } else if hi > OVERFLOW_MAX_7_HI {
+ if hi > OVERFLOW_MAX_6_HI {
+ 5
+ } else {
+ 6
+ }
+ } else if hi > OVERFLOW_MAX_8_HI {
+ 7
+ } else {
+ 8
+ };
+
+ // Double check what we've found won't overflow. Otherwise, we go one below.
+ if hi == POWER_OVERFLOW_VALUES[x - 1].data[2] && low64 > POWER_OVERFLOW_VALUES[x - 1].low64() {
+ x -= 1;
+ }
+
+ // Confirm we've actually resolved things
+ if x as i32 + scale < 0 {
+ None
+ } else {
+ Some(x)
+ }
+ }
+}
+
+// This is a table of the largest values that will not overflow when multiplied
+// by a given power as represented by the index.
+static POWER_OVERFLOW_VALUES: [Buf12; 8] = [
+ Buf12 {
+ data: [2576980377, 2576980377, 429496729],
+ },
+ Buf12 {
+ data: [687194767, 4123168604, 42949672],
+ },
+ Buf12 {
+ data: [2645699854, 1271310319, 4294967],
+ },
+ Buf12 {
+ data: [694066715, 3133608139, 429496],
+ },
+ Buf12 {
+ data: [2216890319, 2890341191, 42949],
+ },
+ Buf12 {
+ data: [2369172679, 4154504685, 4294],
+ },
+ Buf12 {
+ data: [4102387834, 2133437386, 429],
+ },
+ Buf12 {
+ data: [410238783, 4078814305, 42],
+ },
+];
+
+pub(super) struct Dec64 {
+ pub negative: bool,
+ pub scale: u32,
+ pub hi: u32,
+ pub low64: u64,
+}
+
+impl Dec64 {
+ pub(super) const fn new(d: &Decimal) -> Dec64 {
+ let m = d.mantissa_array3();
+ if m[1] == 0 {
+ Dec64 {
+ negative: d.is_sign_negative(),
+ scale: d.scale(),
+ hi: m[2],
+ low64: m[0] as u64,
+ }
+ } else {
+ Dec64 {
+ negative: d.is_sign_negative(),
+ scale: d.scale(),
+ hi: m[2],
+ low64: ((m[1] as u64) << 32) | (m[0] as u64),
+ }
+ }
+ }
+
+ #[inline(always)]
+ pub(super) const fn lo(&self) -> u32 {
+ self.low64 as u32
+ }
+ #[inline(always)]
+ pub(super) const fn mid(&self) -> u32 {
+ (self.low64 >> 32) as u32
+ }
+
+ #[inline(always)]
+ pub(super) const fn high64(&self) -> u64 {
+ (self.low64 >> 32) | ((self.hi as u64) << 32)
+ }
+
+ pub(super) const fn to_decimal(&self) -> Decimal {
+ Decimal::from_parts(
+ self.low64 as u32,
+ (self.low64 >> 32) as u32,
+ self.hi,
+ self.negative,
+ self.scale,
+ )
+ }
+}
+
+pub struct Buf16 {
+ pub data: [u32; 4],
+}
+
+impl Buf16 {
+ pub const fn zero() -> Self {
+ Buf16 { data: [0, 0, 0, 0] }
+ }
+
+ pub const fn low64(&self) -> u64 {
+ ((self.data[1] as u64) << 32) | (self.data[0] as u64)
+ }
+
+ pub fn set_low64(&mut self, value: u64) {
+ self.data[1] = (value >> 32) as u32;
+ self.data[0] = value as u32;
+ }
+
+ pub const fn mid64(&self) -> u64 {
+ ((self.data[2] as u64) << 32) | (self.data[1] as u64)
+ }
+
+ pub fn set_mid64(&mut self, value: u64) {
+ self.data[2] = (value >> 32) as u32;
+ self.data[1] = value as u32;
+ }
+
+ pub const fn high64(&self) -> u64 {
+ ((self.data[3] as u64) << 32) | (self.data[2] as u64)
+ }
+
+ pub fn set_high64(&mut self, value: u64) {
+ self.data[3] = (value >> 32) as u32;
+ self.data[2] = value as u32;
+ }
+}
+
+#[derive(Debug)]
+pub struct Buf24 {
+ pub data: [u32; 6],
+}
+
+impl Buf24 {
+ pub const fn zero() -> Self {
+ Buf24 {
+ data: [0, 0, 0, 0, 0, 0],
+ }
+ }
+
+ pub const fn low64(&self) -> u64 {
+ ((self.data[1] as u64) << 32) | (self.data[0] as u64)
+ }
+
+ pub fn set_low64(&mut self, value: u64) {
+ self.data[1] = (value >> 32) as u32;
+ self.data[0] = value as u32;
+ }
+
+ #[allow(dead_code)]
+ pub const fn mid64(&self) -> u64 {
+ ((self.data[3] as u64) << 32) | (self.data[2] as u64)
+ }
+
+ pub fn set_mid64(&mut self, value: u64) {
+ self.data[3] = (value >> 32) as u32;
+ self.data[2] = value as u32;
+ }
+
+ #[allow(dead_code)]
+ pub const fn high64(&self) -> u64 {
+ ((self.data[5] as u64) << 32) | (self.data[4] as u64)
+ }
+
+ pub fn set_high64(&mut self, value: u64) {
+ self.data[5] = (value >> 32) as u32;
+ self.data[4] = value as u32;
+ }
+
+ pub const fn upper_word(&self) -> usize {
+ if self.data[5] > 0 {
+ return 5;
+ }
+ if self.data[4] > 0 {
+ return 4;
+ }
+ if self.data[3] > 0 {
+ return 3;
+ }
+ if self.data[2] > 0 {
+ return 2;
+ }
+ if self.data[1] > 0 {
+ return 1;
+ }
+ 0
+ }
+
+ // Attempt to rescale the number into 96 bits. If successful, the scale is returned wrapped
+ // in an Option. If it failed due to overflow, we return None.
+ // * `upper` - Index of last non-zero value in self.
+ // * `scale` - Current scale factor for this value.
+ pub fn rescale(&mut self, upper: usize, scale: u32) -> Option<u32> {
+ let mut scale = scale as i32;
+ let mut upper = upper;
+
+ // Determine a rescale target to start with
+ let mut rescale_target = 0i32;
+ if upper > 2 {
+ rescale_target = upper as i32 * 32 - 64 - 1;
+ rescale_target -= self.data[upper].leading_zeros() as i32;
+ rescale_target = ((rescale_target * 77) >> 8) + 1;
+ if rescale_target > scale {
+ return None;
+ }
+ }
+
+ // Make sure we scale enough to bring it into a valid range
+ if rescale_target < scale - MAX_PRECISION_I32 {
+ rescale_target = scale - MAX_PRECISION_I32;
+ }
+
+ if rescale_target > 0 {
+ // We're going to keep reducing by powers of 10. So, start by reducing the scale by
+ // that amount.
+ scale -= rescale_target;
+ let mut sticky = 0;
+ let mut remainder = 0;
+ loop {
+ sticky |= remainder;
+ let mut power = if rescale_target > 8 {
+ POWERS_10[9]
+ } else {
+ POWERS_10[rescale_target as usize]
+ };
+
+ let high = self.data[upper];
+ let high_quotient = high / power;
+ remainder = high - high_quotient * power;
+
+ for item in self.data.iter_mut().rev().skip(6 - upper) {
+ let num = (*item as u64).wrapping_add((remainder as u64) << 32);
+ *item = (num / power as u64) as u32;
+ remainder = (num as u32).wrapping_sub(item.wrapping_mul(power));
+ }
+
+ self.data[upper] = high_quotient;
+
+ // If the high quotient was zero then decrease the upper bound
+ if high_quotient == 0 && upper > 0 {
+ upper -= 1;
+ }
+ if rescale_target > MAX_I32_SCALE {
+ // Scale some more
+ rescale_target -= MAX_I32_SCALE;
+ continue;
+ }
+
+ // If we fit into 96 bits then we've scaled enough. Otherwise, scale once more.
+ if upper > 2 {
+ if scale == 0 {
+ return None;
+ }
+ // Equivalent to scaling down by 10
+ rescale_target = 1;
+ scale -= 1;
+ continue;
+ }
+
+ // Round the final result.
+ power >>= 1;
+ let carried = if power <= remainder {
+ // If we're less than half then we're fine. Otherwise, we round if odd or if the
+ // sticky bit is set.
+ if power < remainder || ((self.data[0] & 1) | sticky) != 0 {
+ // Round up
+ self.data[0] = self.data[0].wrapping_add(1);
+ // Check if we carried
+ self.data[0] == 0
+ } else {
+ false
+ }
+ } else {
+ false
+ };
+
+ // If we carried then propagate through the portions
+ if carried {
+ let mut pos = 0;
+ for (index, value) in self.data.iter_mut().enumerate().skip(1) {
+ pos = index;
+ *value = value.wrapping_add(1);
+ if *value != 0 {
+ break;
+ }
+ }
+
+ // If we ended up rounding over the 96 bits then we'll try to rescale down (again)
+ if pos > 2 {
+ // Nothing to scale down from will cause overflow
+ if scale == 0 {
+ return None;
+ }
+
+ // Loop back around using scale of 10.
+ // Reset the sticky bit and remainder before looping.
+ upper = pos;
+ sticky = 0;
+ remainder = 0;
+ rescale_target = 1;
+ scale -= 1;
+ continue;
+ }
+ }
+ break;
+ }
+ }
+
+ Some(scale as u32)
+ }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/div.rs b/third_party/rust/rust_decimal/src/ops/div.rs
new file mode 100644
index 0000000000..3b5ec577b2
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/div.rs
@@ -0,0 +1,658 @@
+use crate::constants::{MAX_PRECISION_I32, POWERS_10};
+use crate::decimal::{CalculationResult, Decimal};
+use crate::ops::common::{Buf12, Buf16, Dec64};
+
+use core::cmp::Ordering;
+use core::ops::BitXor;
+
+impl Buf12 {
+ // Returns true if successful, else false for an overflow
+ fn add32(&mut self, value: u32) -> Result<(), DivError> {
+ let value = value as u64;
+ let new = self.low64().wrapping_add(value);
+ self.set_low64(new);
+ if new < value {
+ self.data[2] = self.data[2].wrapping_add(1);
+ if self.data[2] == 0 {
+ return Err(DivError::Overflow);
+ }
+ }
+ Ok(())
+ }
+
+ // Divide a Decimal union by a 32 bit divisor.
+ // Self is overwritten with the quotient.
+ // Return value is a 32 bit remainder.
+ fn div32(&mut self, divisor: u32) -> u32 {
+ let divisor64 = divisor as u64;
+ // See if we can get by using a simple u64 division
+ if self.data[2] != 0 {
+ let mut temp = self.high64();
+ let q64 = temp / divisor64;
+ self.set_high64(q64);
+
+ // Calculate the "remainder"
+ temp = ((temp - q64 * divisor64) << 32) | (self.data[0] as u64);
+ if temp == 0 {
+ return 0;
+ }
+ let q32 = (temp / divisor64) as u32;
+ self.data[0] = q32;
+ ((temp as u32).wrapping_sub(q32.wrapping_mul(divisor))) as u32
+ } else {
+ // Super easy divisor
+ let low64 = self.low64();
+ if low64 == 0 {
+ // Nothing to do
+ return 0;
+ }
+ // Do the calc
+ let quotient = low64 / divisor64;
+ self.set_low64(quotient);
+ // Remainder is the leftover that wasn't used
+ (low64.wrapping_sub(quotient.wrapping_mul(divisor64))) as u32
+ }
+ }
+
+ // Divide the number by a power constant
+ // Returns true if division was successful
+ fn div32_const(&mut self, pow: u32) -> bool {
+ let pow64 = pow as u64;
+ let high64 = self.high64();
+ let lo = self.data[0] as u64;
+ let div64: u64 = high64 / pow64;
+ let div = ((((high64 - div64 * pow64) << 32) + lo) / pow64) as u32;
+ if self.data[0] == div.wrapping_mul(pow) {
+ self.set_high64(div64);
+ self.data[0] = div;
+ true
+ } else {
+ false
+ }
+ }
+}
+
+impl Buf16 {
+ // Does a partial divide with a 64 bit divisor. The divisor in this case must be 64 bits
+ // otherwise various assumptions fail (e.g. 32 bit quotient).
+ // To assist, the upper 64 bits must be greater than the divisor for this to succeed.
+ // Consequently, it will return the quotient as a 32 bit number and overwrite self with the
+ // 64 bit remainder.
+ pub(super) fn partial_divide_64(&mut self, divisor: u64) -> u32 {
+ // We make this assertion here, however below we pivot based on the data
+ debug_assert!(divisor > self.mid64());
+
+ // If we have an empty high bit, then divisor must be greater than the dividend due to
+ // the assumption that the divisor REQUIRES 64 bits.
+ if self.data[2] == 0 {
+ let low64 = self.low64();
+ if low64 < divisor {
+ // We can't divide at at all so result is 0. The dividend remains untouched since
+ // the full amount is the remainder.
+ return 0;
+ }
+
+ let quotient = low64 / divisor;
+ self.set_low64(low64 - (quotient * divisor));
+ return quotient as u32;
+ }
+
+ // Do a simple check to see if the hi portion of the dividend is greater than the hi
+ // portion of the divisor.
+ let divisor_hi32 = (divisor >> 32) as u32;
+ if self.data[2] >= divisor_hi32 {
+ // We know that the divisor goes into this at MOST u32::max times.
+ // So we kick things off, with that assumption
+ let mut low64 = self.low64();
+ low64 = low64.wrapping_sub(divisor << 32).wrapping_add(divisor);
+ let mut quotient = u32::MAX;
+
+ // If we went negative then keep adding it back in
+ loop {
+ if low64 < divisor {
+ break;
+ }
+ quotient = quotient.wrapping_sub(1);
+ low64 = low64.wrapping_add(divisor);
+ }
+ self.set_low64(low64);
+ return quotient;
+ }
+
+ let mid64 = self.mid64();
+ let divisor_hi32_64 = divisor_hi32 as u64;
+ if mid64 < divisor_hi32_64 as u64 {
+ // similar situation as above where we've got nothing left to divide
+ return 0;
+ }
+
+ let mut quotient = mid64 / divisor_hi32_64;
+ let mut remainder = self.data[0] as u64 | ((mid64 - quotient * divisor_hi32_64) << 32);
+
+ // Do quotient * lo divisor
+ let product = quotient * (divisor & 0xFFFF_FFFF);
+ remainder = remainder.wrapping_sub(product);
+
+ // Check if we've gone negative. If so, add it back
+ if remainder > product.bitxor(u64::MAX) {
+ loop {
+ quotient = quotient.wrapping_sub(1);
+ remainder = remainder.wrapping_add(divisor);
+ if remainder < divisor {
+ break;
+ }
+ }
+ }
+
+ self.set_low64(remainder);
+ quotient as u32
+ }
+
+ // Does a partial divide with a 96 bit divisor. The divisor in this case must require 96 bits
+ // otherwise various assumptions fail (e.g. 32 bit quotient).
+ pub(super) fn partial_divide_96(&mut self, divisor: &Buf12) -> u32 {
+ let dividend = self.high64();
+ let divisor_hi = divisor.data[2];
+ if dividend < divisor_hi as u64 {
+ // Dividend is too small - entire number is remainder
+ return 0;
+ }
+
+ let mut quo = (dividend / divisor_hi as u64) as u32;
+ let mut remainder = (dividend as u32).wrapping_sub(quo.wrapping_mul(divisor_hi));
+
+ // Compute full remainder
+ let mut prod1 = quo as u64 * divisor.data[0] as u64;
+ let mut prod2 = quo as u64 * divisor.data[1] as u64;
+ prod2 += prod1 >> 32;
+ prod1 = (prod1 & 0xFFFF_FFFF) | (prod2 << 32);
+ prod2 >>= 32;
+
+ let mut num = self.low64();
+ num = num.wrapping_sub(prod1);
+ remainder = remainder.wrapping_sub(prod2 as u32);
+
+ // If there are carries make sure they are propagated
+ if num > prod1.bitxor(u64::MAX) {
+ remainder = remainder.wrapping_sub(1);
+ if remainder < (prod2 as u32).bitxor(u32::MAX) {
+ self.set_low64(num);
+ self.data[2] = remainder;
+ return quo;
+ }
+ } else if remainder <= (prod2 as u32).bitxor(u32::MAX) {
+ self.set_low64(num);
+ self.data[2] = remainder;
+ return quo;
+ }
+
+ // Remainder went negative, add divisor back until it's positive
+ prod1 = divisor.low64();
+ loop {
+ quo = quo.wrapping_sub(1);
+ num = num.wrapping_add(prod1);
+ remainder = remainder.wrapping_add(divisor_hi);
+
+ if num < prod1 {
+ // Detected carry.
+ let tmp = remainder;
+ remainder = remainder.wrapping_add(1);
+ if tmp < divisor_hi {
+ break;
+ }
+ }
+ if remainder < divisor_hi {
+ break; // detected carry
+ }
+ }
+
+ self.set_low64(num);
+ self.data[2] = remainder;
+ quo
+ }
+}
+
+enum DivError {
+ Overflow,
+}
+
+pub(crate) fn div_impl(dividend: &Decimal, divisor: &Decimal) -> CalculationResult {
+ if divisor.is_zero() {
+ return CalculationResult::DivByZero;
+ }
+ if dividend.is_zero() {
+ return CalculationResult::Ok(Decimal::ZERO);
+ }
+ let dividend = Dec64::new(dividend);
+ let divisor = Dec64::new(divisor);
+
+ // Pre calculate the scale and the sign
+ let mut scale = (dividend.scale as i32) - (divisor.scale as i32);
+ let sign_negative = dividend.negative ^ divisor.negative;
+
+ // Set up some variables for modification throughout
+ let mut require_unscale = false;
+ let mut quotient = Buf12::from_dec64(&dividend);
+ let divisor = Buf12::from_dec64(&divisor);
+
+ // Branch depending on the complexity of the divisor
+ if divisor.data[2] | divisor.data[1] == 0 {
+ // We have a simple(r) divisor (32 bit)
+ let divisor32 = divisor.data[0];
+
+ // Remainder can only be 32 bits since the divisor is 32 bits.
+ let mut remainder = quotient.div32(divisor32);
+ let mut power_scale = 0;
+
+ // Figure out how to apply the remainder (i.e. we may have performed something like 10/3 or 8/5)
+ loop {
+ // Remainder is 0 so we have a simple situation
+ if remainder == 0 {
+ // If the scale is positive then we're actually done
+ if scale >= 0 {
+ break;
+ }
+ power_scale = 9usize.min((-scale) as usize);
+ } else {
+ // We may need to normalize later, so set the flag appropriately
+ require_unscale = true;
+
+ // We have a remainder so we effectively want to try to adjust the quotient and add
+ // the remainder into the quotient. We do this below, however first of all we want
+ // to try to avoid overflowing so we do that check first.
+ let will_overflow = if scale == MAX_PRECISION_I32 {
+ true
+ } else {
+ // Figure out how much we can scale by
+ if let Some(s) = quotient.find_scale(scale) {
+ power_scale = s;
+ } else {
+ return CalculationResult::Overflow;
+ }
+ // If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since
+ // we're doing nothing.
+ power_scale == 0
+ };
+ if will_overflow {
+ // No more scaling can be done, but remainder is non-zero so we round if necessary.
+ let tmp = remainder << 1;
+ let round = if tmp < remainder {
+ // We round if we wrapped around
+ true
+ } else if tmp >= divisor32 {
+ // If we're greater than the divisor (i.e. underflow)
+ // or if there is a lo bit set, we round
+ tmp > divisor32 || (quotient.data[0] & 0x1) > 0
+ } else {
+ false
+ };
+
+ // If we need to round, try to do so.
+ if round {
+ if let Ok(new_scale) = round_up(&mut quotient, scale) {
+ scale = new_scale;
+ } else {
+ // Overflowed
+ return CalculationResult::Overflow;
+ }
+ }
+ break;
+ }
+ }
+
+ // Do some scaling
+ let power = POWERS_10[power_scale];
+ scale += power_scale as i32;
+ // Increase the quotient by the power that was looked up
+ let overflow = increase_scale(&mut quotient, power as u64);
+ if overflow > 0 {
+ return CalculationResult::Overflow;
+ }
+
+ let remainder_scaled = (remainder as u64) * (power as u64);
+ let remainder_quotient = (remainder_scaled / (divisor32 as u64)) as u32;
+ remainder = (remainder_scaled - remainder_quotient as u64 * divisor32 as u64) as u32;
+ if let Err(DivError::Overflow) = quotient.add32(remainder_quotient) {
+ if let Ok(adj) = unscale_from_overflow(&mut quotient, scale, remainder != 0) {
+ scale = adj;
+ } else {
+ // Still overflowing
+ return CalculationResult::Overflow;
+ }
+ break;
+ }
+ }
+ } else {
+ // We have a divisor greater than 32 bits. Both of these share some quick calculation wins
+ // so we'll do those before branching into separate logic.
+ // The win we can do is shifting the bits to the left as much as possible. We do this to both
+ // the dividend and the divisor to ensure the quotient is not changed.
+ // As a simple contrived example: if we have 4 / 2 then we could bit shift all the way to the
+ // left meaning that the lo portion would have nothing inside of it. Of course, shifting these
+ // left one has the same result (8/4) etc.
+ // The advantage is that we may be able to write off lower portions of the number making things
+ // easier.
+ let mut power_scale = if divisor.data[2] == 0 {
+ divisor.data[1].leading_zeros()
+ } else {
+ divisor.data[2].leading_zeros()
+ } as usize;
+ let mut remainder = Buf16::zero();
+ remainder.set_low64(quotient.low64() << power_scale);
+ let tmp_high = ((quotient.data[1] as u64) + ((quotient.data[2] as u64) << 32)) >> (32 - power_scale);
+ remainder.set_high64(tmp_high);
+
+ // Work out the divisor after it's shifted
+ let divisor64 = divisor.low64() << power_scale;
+ // Check if the divisor is 64 bit or the full 96 bits
+ if divisor.data[2] == 0 {
+ // It's 64 bits
+ quotient.data[2] = 0;
+
+ // Calc mid/lo by shifting accordingly
+ let rem_lo = remainder.data[0];
+ remainder.data[0] = remainder.data[1];
+ remainder.data[1] = remainder.data[2];
+ remainder.data[2] = remainder.data[3];
+ quotient.data[1] = remainder.partial_divide_64(divisor64);
+
+ remainder.data[2] = remainder.data[1];
+ remainder.data[1] = remainder.data[0];
+ remainder.data[0] = rem_lo;
+ quotient.data[0] = remainder.partial_divide_64(divisor64);
+
+ loop {
+ let rem_low64 = remainder.low64();
+ if rem_low64 == 0 {
+ // If the scale is positive then we're actually done
+ if scale >= 0 {
+ break;
+ }
+ power_scale = 9usize.min((-scale) as usize);
+ } else {
+ // We may need to normalize later, so set the flag appropriately
+ require_unscale = true;
+
+ // We have a remainder so we effectively want to try to adjust the quotient and add
+ // the remainder into the quotient. We do this below, however first of all we want
+ // to try to avoid overflowing so we do that check first.
+ let will_overflow = if scale == MAX_PRECISION_I32 {
+ true
+ } else {
+ // Figure out how much we can scale by
+ if let Some(s) = quotient.find_scale(scale) {
+ power_scale = s;
+ } else {
+ return CalculationResult::Overflow;
+ }
+ // If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since
+ // we're doing nothing.
+ power_scale == 0
+ };
+ if will_overflow {
+ // No more scaling can be done, but remainder is non-zero so we round if necessary.
+ let mut tmp = remainder.low64();
+ let round = if (tmp as i64) < 0 {
+ // We round if we wrapped around
+ true
+ } else {
+ tmp <<= 1;
+ if tmp > divisor64 {
+ true
+ } else {
+ tmp == divisor64 && quotient.data[0] & 0x1 != 0
+ }
+ };
+
+ // If we need to round, try to do so.
+ if round {
+ if let Ok(new_scale) = round_up(&mut quotient, scale) {
+ scale = new_scale;
+ } else {
+ // Overflowed
+ return CalculationResult::Overflow;
+ }
+ }
+ break;
+ }
+ }
+
+ // Do some scaling
+ let power = POWERS_10[power_scale];
+ scale += power_scale as i32;
+
+ // Increase the quotient by the power that was looked up
+ let overflow = increase_scale(&mut quotient, power as u64);
+ if overflow > 0 {
+ return CalculationResult::Overflow;
+ }
+ increase_scale64(&mut remainder, power as u64);
+
+ let tmp = remainder.partial_divide_64(divisor64);
+ if let Err(DivError::Overflow) = quotient.add32(tmp) {
+ if let Ok(adj) = unscale_from_overflow(&mut quotient, scale, remainder.low64() != 0) {
+ scale = adj;
+ } else {
+ // Still overflowing
+ return CalculationResult::Overflow;
+ }
+ break;
+ }
+ }
+ } else {
+ // It's 96 bits
+ // Start by finishing the shift left
+ let divisor_mid = divisor.data[1];
+ let divisor_hi = divisor.data[2];
+ let mut divisor = divisor;
+ divisor.set_low64(divisor64);
+ divisor.data[2] = ((divisor_mid as u64 + ((divisor_hi as u64) << 32)) >> (32 - power_scale)) as u32;
+
+ let quo = remainder.partial_divide_96(&divisor);
+ quotient.set_low64(quo as u64);
+ quotient.data[2] = 0;
+
+ loop {
+ let mut rem_low64 = remainder.low64();
+ if rem_low64 == 0 && remainder.data[2] == 0 {
+ // If the scale is positive then we're actually done
+ if scale >= 0 {
+ break;
+ }
+ power_scale = 9usize.min((-scale) as usize);
+ } else {
+ // We may need to normalize later, so set the flag appropriately
+ require_unscale = true;
+
+ // We have a remainder so we effectively want to try to adjust the quotient and add
+ // the remainder into the quotient. We do this below, however first of all we want
+ // to try to avoid overflowing so we do that check first.
+ let will_overflow = if scale == MAX_PRECISION_I32 {
+ true
+ } else {
+ // Figure out how much we can scale by
+ if let Some(s) = quotient.find_scale(scale) {
+ power_scale = s;
+ } else {
+ return CalculationResult::Overflow;
+ }
+ // If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since
+ // we're doing nothing.
+ power_scale == 0
+ };
+ if will_overflow {
+ // No more scaling can be done, but remainder is non-zero so we round if necessary.
+ let round = if (remainder.data[2] as i32) < 0 {
+ // We round if we wrapped around
+ true
+ } else {
+ let tmp = remainder.data[1] >> 31;
+ rem_low64 <<= 1;
+ remainder.set_low64(rem_low64);
+ remainder.data[2] = (&remainder.data[2] << 1) + tmp;
+
+ match remainder.data[2].cmp(&divisor.data[2]) {
+ Ordering::Less => false,
+ Ordering::Equal => {
+ let divisor_low64 = divisor.low64();
+ if rem_low64 > divisor_low64 {
+ true
+ } else {
+ rem_low64 == divisor_low64 && (quotient.data[0] & 1) != 0
+ }
+ }
+ Ordering::Greater => true,
+ }
+ };
+
+ // If we need to round, try to do so.
+ if round {
+ if let Ok(new_scale) = round_up(&mut quotient, scale) {
+ scale = new_scale;
+ } else {
+ // Overflowed
+ return CalculationResult::Overflow;
+ }
+ }
+ break;
+ }
+ }
+
+ // Do some scaling
+ let power = POWERS_10[power_scale];
+ scale += power_scale as i32;
+
+ // Increase the quotient by the power that was looked up
+ let overflow = increase_scale(&mut quotient, power as u64);
+ if overflow > 0 {
+ return CalculationResult::Overflow;
+ }
+ let mut tmp_remainder = Buf12 {
+ data: [remainder.data[0], remainder.data[1], remainder.data[2]],
+ };
+ let overflow = increase_scale(&mut tmp_remainder, power as u64);
+ remainder.data[0] = tmp_remainder.data[0];
+ remainder.data[1] = tmp_remainder.data[1];
+ remainder.data[2] = tmp_remainder.data[2];
+ remainder.data[3] = overflow;
+
+ let tmp = remainder.partial_divide_96(&divisor);
+ if let Err(DivError::Overflow) = quotient.add32(tmp) {
+ if let Ok(adj) =
+ unscale_from_overflow(&mut quotient, scale, (remainder.low64() | remainder.high64()) != 0)
+ {
+ scale = adj;
+ } else {
+ // Still overflowing
+ return CalculationResult::Overflow;
+ }
+ break;
+ }
+ }
+ }
+ }
+ if require_unscale {
+ scale = unscale(&mut quotient, scale);
+ }
+ CalculationResult::Ok(Decimal::from_parts(
+ quotient.data[0],
+ quotient.data[1],
+ quotient.data[2],
+ sign_negative,
+ scale as u32,
+ ))
+}
+
+// Multiply num by power (multiple of 10). Power must be 32 bits.
+// Returns the overflow, if any
+fn increase_scale(num: &mut Buf12, power: u64) -> u32 {
+ let mut tmp = (num.data[0] as u64) * power;
+ num.data[0] = tmp as u32;
+ tmp >>= 32;
+ tmp += (num.data[1] as u64) * power;
+ num.data[1] = tmp as u32;
+ tmp >>= 32;
+ tmp += (num.data[2] as u64) * power;
+ num.data[2] = tmp as u32;
+ (tmp >> 32) as u32
+}
+
+// Multiply num by power (multiple of 10). Power must be 32 bits.
+fn increase_scale64(num: &mut Buf16, power: u64) {
+ let mut tmp = (num.data[0] as u64) * power;
+ num.data[0] = tmp as u32;
+ tmp >>= 32;
+ tmp += (num.data[1] as u64) * power;
+ num.set_mid64(tmp)
+}
+
+// Adjust the number to deal with an overflow. This function follows being scaled up (i.e. multiplied
+// by 10, so this effectively tries to reverse that by dividing by 10 then feeding in the high bit
+// to undo the overflow and rounding instead.
+// Returns the updated scale.
+fn unscale_from_overflow(num: &mut Buf12, scale: i32, sticky: bool) -> Result<i32, DivError> {
+ let scale = scale - 1;
+ if scale < 0 {
+ return Err(DivError::Overflow);
+ }
+
+ // This function is called when the hi portion has "overflowed" upon adding one and has wrapped
+ // back around to 0. Consequently, we need to "feed" that back in, but also rescaling down
+ // to reverse out the overflow.
+ const HIGH_BIT: u64 = 0x1_0000_0000;
+ num.data[2] = (HIGH_BIT / 10) as u32;
+
+ // Calc the mid
+ let mut tmp = ((HIGH_BIT % 10) << 32) + (num.data[1] as u64);
+ let mut val = (tmp / 10) as u32;
+ num.data[1] = val;
+
+ // Calc the lo using a similar method
+ tmp = ((tmp - (val as u64) * 10) << 32) + (num.data[0] as u64);
+ val = (tmp / 10) as u32;
+ num.data[0] = val;
+
+ // Work out the remainder, and round if we have one (since it doesn't fit)
+ let remainder = (tmp - (val as u64) * 10) as u32;
+ if remainder > 5 || (remainder == 5 && (sticky || num.data[0] & 0x1 > 0)) {
+ let _ = num.add32(1);
+ }
+ Ok(scale)
+}
+
+#[inline]
+fn round_up(num: &mut Buf12, scale: i32) -> Result<i32, DivError> {
+ let low64 = num.low64().wrapping_add(1);
+ num.set_low64(low64);
+ if low64 != 0 {
+ return Ok(scale);
+ }
+ let hi = num.data[2].wrapping_add(1);
+ num.data[2] = hi;
+ if hi != 0 {
+ return Ok(scale);
+ }
+ unscale_from_overflow(num, scale, true)
+}
+
+fn unscale(num: &mut Buf12, scale: i32) -> i32 {
+ // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10 we can extract.
+ // We use this as a quick test on whether to try a given power.
+ let mut scale = scale;
+ while num.data[0] == 0 && scale >= 8 && num.div32_const(100000000) {
+ scale -= 8;
+ }
+
+ if (num.data[0] & 0xF) == 0 && scale >= 4 && num.div32_const(10000) {
+ scale -= 4;
+ }
+
+ if (num.data[0] & 0x3) == 0 && scale >= 2 && num.div32_const(100) {
+ scale -= 2;
+ }
+
+ if (num.data[0] & 0x1) == 0 && scale >= 1 && num.div32_const(10) {
+ scale -= 1;
+ }
+ scale
+}
diff --git a/third_party/rust/rust_decimal/src/ops/legacy.rs b/third_party/rust/rust_decimal/src/ops/legacy.rs
new file mode 100644
index 0000000000..49f39814f8
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/legacy.rs
@@ -0,0 +1,843 @@
+use crate::{
+ constants::{MAX_PRECISION_U32, POWERS_10, U32_MASK},
+ decimal::{CalculationResult, Decimal},
+ ops::array::{
+ add_by_internal, cmp_internal, div_by_u32, is_all_zero, mul_by_u32, mul_part, rescale_internal, shl1_internal,
+ },
+};
+
+use core::cmp::Ordering;
+use num_traits::Zero;
+
+pub(crate) fn add_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+ // Convert to the same scale
+ let mut my = d1.mantissa_array3();
+ let mut my_scale = d1.scale();
+ let mut ot = d2.mantissa_array3();
+ let mut other_scale = d2.scale();
+ rescale_to_maximum_scale(&mut my, &mut my_scale, &mut ot, &mut other_scale);
+ let mut final_scale = my_scale.max(other_scale);
+
+ // Add the items together
+ let my_negative = d1.is_sign_negative();
+ let other_negative = d2.is_sign_negative();
+ let mut negative = false;
+ let carry;
+ if !(my_negative ^ other_negative) {
+ negative = my_negative;
+ carry = add_by_internal3(&mut my, &ot);
+ } else {
+ let cmp = cmp_internal(&my, &ot);
+ // -x + y
+ // if x > y then it's negative (i.e. -2 + 1)
+ match cmp {
+ Ordering::Less => {
+ negative = other_negative;
+ sub_by_internal3(&mut ot, &my);
+ my[0] = ot[0];
+ my[1] = ot[1];
+ my[2] = ot[2];
+ }
+ Ordering::Greater => {
+ negative = my_negative;
+ sub_by_internal3(&mut my, &ot);
+ }
+ Ordering::Equal => {
+ // -2 + 2
+ my[0] = 0;
+ my[1] = 0;
+ my[2] = 0;
+ }
+ }
+ carry = 0;
+ }
+
+ // If we have a carry we underflowed.
+ // We need to lose some significant digits (if possible)
+ if carry > 0 {
+ if final_scale == 0 {
+ return CalculationResult::Overflow;
+ }
+
+ // Copy it over to a temp array for modification
+ let mut temp = [my[0], my[1], my[2], carry];
+ while final_scale > 0 && temp[3] != 0 {
+ div_by_u32(&mut temp, 10);
+ final_scale -= 1;
+ }
+
+ // If we still have a carry bit then we overflowed
+ if temp[3] > 0 {
+ return CalculationResult::Overflow;
+ }
+
+ // Copy it back - we're done
+ my[0] = temp[0];
+ my[1] = temp[1];
+ my[2] = temp[2];
+ }
+
+ CalculationResult::Ok(Decimal::from_parts(my[0], my[1], my[2], negative, final_scale))
+}
+
+pub(crate) fn sub_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+ add_impl(d1, &(-*d2))
+}
+
+pub(crate) fn div_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+ if d2.is_zero() {
+ return CalculationResult::DivByZero;
+ }
+ if d1.is_zero() {
+ return CalculationResult::Ok(Decimal::zero());
+ }
+
+ let dividend = d1.mantissa_array3();
+ let divisor = d2.mantissa_array3();
+ let mut quotient = [0u32, 0u32, 0u32];
+ let mut quotient_scale: i32 = d1.scale() as i32 - d2.scale() as i32;
+
+ // We supply an extra overflow word for each of the dividend and the remainder
+ let mut working_quotient = [dividend[0], dividend[1], dividend[2], 0u32];
+ let mut working_remainder = [0u32, 0u32, 0u32, 0u32];
+ let mut working_scale = quotient_scale;
+ let mut remainder_scale = quotient_scale;
+ let mut underflow;
+
+ loop {
+ div_internal(&mut working_quotient, &mut working_remainder, &divisor);
+ underflow = add_with_scale_internal(
+ &mut quotient,
+ &mut quotient_scale,
+ &mut working_quotient,
+ &mut working_scale,
+ );
+
+ // Multiply the remainder by 10
+ let mut overflow = 0;
+ for part in working_remainder.iter_mut() {
+ let (lo, hi) = mul_part(*part, 10, overflow);
+ *part = lo;
+ overflow = hi;
+ }
+ // Copy temp remainder into the temp quotient section
+ working_quotient.copy_from_slice(&working_remainder);
+
+ remainder_scale += 1;
+ working_scale = remainder_scale;
+
+ if underflow || is_all_zero(&working_remainder) {
+ break;
+ }
+ }
+
+ // If we have a really big number try to adjust the scale to 0
+ while quotient_scale < 0 {
+ copy_array_diff_lengths(&mut working_quotient, &quotient);
+ working_quotient[3] = 0;
+ working_remainder.iter_mut().for_each(|x| *x = 0);
+
+ // Mul 10
+ let mut overflow = 0;
+ for part in &mut working_quotient {
+ let (lo, hi) = mul_part(*part, 10, overflow);
+ *part = lo;
+ overflow = hi;
+ }
+ for part in &mut working_remainder {
+ let (lo, hi) = mul_part(*part, 10, overflow);
+ *part = lo;
+ overflow = hi;
+ }
+ if working_quotient[3] == 0 && is_all_zero(&working_remainder) {
+ quotient_scale += 1;
+ quotient[0] = working_quotient[0];
+ quotient[1] = working_quotient[1];
+ quotient[2] = working_quotient[2];
+ } else {
+ // Overflow
+ return CalculationResult::Overflow;
+ }
+ }
+
+ if quotient_scale > 255 {
+ quotient[0] = 0;
+ quotient[1] = 0;
+ quotient[2] = 0;
+ quotient_scale = 0;
+ }
+
+ let mut quotient_negative = d1.is_sign_negative() ^ d2.is_sign_negative();
+
+ // Check for underflow
+ let mut final_scale: u32 = quotient_scale as u32;
+ if final_scale > MAX_PRECISION_U32 {
+ let mut remainder = 0;
+
+ // Division underflowed. We must remove some significant digits over using
+ // an invalid scale.
+ while final_scale > MAX_PRECISION_U32 && !is_all_zero(&quotient) {
+ remainder = div_by_u32(&mut quotient, 10);
+ final_scale -= 1;
+ }
+ if final_scale > MAX_PRECISION_U32 {
+ // Result underflowed so set to zero
+ final_scale = 0;
+ quotient_negative = false;
+ } else if remainder >= 5 {
+ for part in &mut quotient {
+ if remainder == 0 {
+ break;
+ }
+ let digit: u64 = u64::from(*part) + 1;
+ remainder = if digit > 0xFFFF_FFFF { 1 } else { 0 };
+ *part = (digit & 0xFFFF_FFFF) as u32;
+ }
+ }
+ }
+
+ CalculationResult::Ok(Decimal::from_parts(
+ quotient[0],
+ quotient[1],
+ quotient[2],
+ quotient_negative,
+ final_scale,
+ ))
+}
+
+pub(crate) fn mul_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+ // Early exit if either is zero
+ if d1.is_zero() || d2.is_zero() {
+ return CalculationResult::Ok(Decimal::zero());
+ }
+
+ // We are only resulting in a negative if we have mismatched signs
+ let negative = d1.is_sign_negative() ^ d2.is_sign_negative();
+
+ // We get the scale of the result by adding the operands. This may be too big, however
+ // we'll correct later
+ let mut final_scale = d1.scale() + d2.scale();
+
+ // First of all, if ONLY the lo parts of both numbers is filled
+ // then we can simply do a standard 64 bit calculation. It's a minor
+ // optimization however prevents the need for long form multiplication
+ let my = d1.mantissa_array3();
+ let ot = d2.mantissa_array3();
+ if my[1] == 0 && my[2] == 0 && ot[1] == 0 && ot[2] == 0 {
+ // Simply multiplication
+ let mut u64_result = u64_to_array(u64::from(my[0]) * u64::from(ot[0]));
+
+ // If we're above max precision then this is a very small number
+ if final_scale > MAX_PRECISION_U32 {
+ final_scale -= MAX_PRECISION_U32;
+
+ // If the number is above 19 then this will equate to zero.
+ // This is because the max value in 64 bits is 1.84E19
+ if final_scale > 19 {
+ return CalculationResult::Ok(Decimal::zero());
+ }
+
+ let mut rem_lo = 0;
+ let mut power;
+ if final_scale > 9 {
+ // Since 10^10 doesn't fit into u32, we divide by 10^10/4
+ // and multiply the next divisor by 4.
+ rem_lo = div_by_u32(&mut u64_result, 2_500_000_000);
+ power = POWERS_10[final_scale as usize - 10] << 2;
+ } else {
+ power = POWERS_10[final_scale as usize];
+ }
+
+ // Divide fits in 32 bits
+ let rem_hi = div_by_u32(&mut u64_result, power);
+
+ // Round the result. Since the divisor is a power of 10
+ // we check to see if the remainder is >= 1/2 divisor
+ power >>= 1;
+ if rem_hi >= power && (rem_hi > power || (rem_lo | (u64_result[0] & 0x1)) != 0) {
+ u64_result[0] += 1;
+ }
+
+ final_scale = MAX_PRECISION_U32;
+ }
+ return CalculationResult::Ok(Decimal::from_parts(
+ u64_result[0],
+ u64_result[1],
+ 0,
+ negative,
+ final_scale,
+ ));
+ }
+
+ // We're using some of the high bits, so we essentially perform
+ // long form multiplication. We compute the 9 partial products
+ // into a 192 bit result array.
+ //
+ // [my-h][my-m][my-l]
+ // x [ot-h][ot-m][ot-l]
+ // --------------------------------------
+ // 1. [r-hi][r-lo] my-l * ot-l [0, 0]
+ // 2. [r-hi][r-lo] my-l * ot-m [0, 1]
+ // 3. [r-hi][r-lo] my-m * ot-l [1, 0]
+ // 4. [r-hi][r-lo] my-m * ot-m [1, 1]
+ // 5. [r-hi][r-lo] my-l * ot-h [0, 2]
+ // 6. [r-hi][r-lo] my-h * ot-l [2, 0]
+ // 7. [r-hi][r-lo] my-m * ot-h [1, 2]
+ // 8. [r-hi][r-lo] my-h * ot-m [2, 1]
+ // 9.[r-hi][r-lo] my-h * ot-h [2, 2]
+ let mut product = [0u32, 0u32, 0u32, 0u32, 0u32, 0u32];
+
+ // We can perform a minor short circuit here. If the
+ // high portions are both 0 then we can skip portions 5-9
+ let to = if my[2] == 0 && ot[2] == 0 { 2 } else { 3 };
+
+ for (my_index, my_item) in my.iter().enumerate().take(to) {
+ for (ot_index, ot_item) in ot.iter().enumerate().take(to) {
+ let (mut rlo, mut rhi) = mul_part(*my_item, *ot_item, 0);
+
+ // Get the index for the lo portion of the product
+ for prod in product.iter_mut().skip(my_index + ot_index) {
+ let (res, overflow) = add_part(rlo, *prod);
+ *prod = res;
+
+ // If we have something in rhi from before then promote that
+ if rhi > 0 {
+ // If we overflowed in the last add, add that with rhi
+ if overflow > 0 {
+ let (nlo, nhi) = add_part(rhi, overflow);
+ rlo = nlo;
+ rhi = nhi;
+ } else {
+ rlo = rhi;
+ rhi = 0;
+ }
+ } else if overflow > 0 {
+ rlo = overflow;
+ rhi = 0;
+ } else {
+ break;
+ }
+
+ // If nothing to do next round then break out
+ if rlo == 0 {
+ break;
+ }
+ }
+ }
+ }
+
+ // If our result has used up the high portion of the product
+ // then we either have an overflow or an underflow situation
+ // Overflow will occur if we can't scale it back, whereas underflow
+ // with kick in rounding
+ let mut remainder = 0;
+ while final_scale > 0 && (product[3] != 0 || product[4] != 0 || product[5] != 0) {
+ remainder = div_by_u32(&mut product, 10u32);
+ final_scale -= 1;
+ }
+
+ // Round up the carry if we need to
+ if remainder >= 5 {
+ for part in product.iter_mut() {
+ if remainder == 0 {
+ break;
+ }
+ let digit: u64 = u64::from(*part) + 1;
+ remainder = if digit > 0xFFFF_FFFF { 1 } else { 0 };
+ *part = (digit & 0xFFFF_FFFF) as u32;
+ }
+ }
+
+ // If we're still above max precision then we'll try again to
+ // reduce precision - we may be dealing with a limit of "0"
+ if final_scale > MAX_PRECISION_U32 {
+ // We're in an underflow situation
+ // The easiest way to remove precision is to divide off the result
+ while final_scale > MAX_PRECISION_U32 && !is_all_zero(&product) {
+ div_by_u32(&mut product, 10);
+ final_scale -= 1;
+ }
+ // If we're still at limit then we can't represent any
+ // significant decimal digits and will return an integer only
+ // Can also be invoked while representing 0.
+ if final_scale > MAX_PRECISION_U32 {
+ final_scale = 0;
+ }
+ } else if !(product[3] == 0 && product[4] == 0 && product[5] == 0) {
+ // We're in an overflow situation - we're within our precision bounds
+ // but still have bits in overflow
+ return CalculationResult::Overflow;
+ }
+
+ CalculationResult::Ok(Decimal::from_parts(
+ product[0],
+ product[1],
+ product[2],
+ negative,
+ final_scale,
+ ))
+}
+
+pub(crate) fn rem_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+ if d2.is_zero() {
+ return CalculationResult::DivByZero;
+ }
+ if d1.is_zero() {
+ return CalculationResult::Ok(Decimal::zero());
+ }
+
+ // Rescale so comparable
+ let initial_scale = d1.scale();
+ let mut quotient = d1.mantissa_array3();
+ let mut quotient_scale = initial_scale;
+ let mut divisor = d2.mantissa_array3();
+ let mut divisor_scale = d2.scale();
+ rescale_to_maximum_scale(&mut quotient, &mut quotient_scale, &mut divisor, &mut divisor_scale);
+
+ // Working is the remainder + the quotient
+ // We use an aligned array since we'll be using it a lot.
+ let mut working_quotient = [quotient[0], quotient[1], quotient[2], 0u32];
+ let mut working_remainder = [0u32, 0u32, 0u32, 0u32];
+ div_internal(&mut working_quotient, &mut working_remainder, &divisor);
+
+ // Round if necessary. This is for semantic correctness, but could feasibly be removed for
+ // performance improvements.
+ if quotient_scale > initial_scale {
+ let mut working = [
+ working_remainder[0],
+ working_remainder[1],
+ working_remainder[2],
+ working_remainder[3],
+ ];
+ while quotient_scale > initial_scale {
+ if div_by_u32(&mut working, 10) > 0 {
+ break;
+ }
+ quotient_scale -= 1;
+ working_remainder.copy_from_slice(&working);
+ }
+ }
+
+ CalculationResult::Ok(Decimal::from_parts(
+ working_remainder[0],
+ working_remainder[1],
+ working_remainder[2],
+ d1.is_sign_negative(),
+ quotient_scale,
+ ))
+}
+
+pub(crate) fn cmp_impl(d1: &Decimal, d2: &Decimal) -> Ordering {
+ // Quick exit if major differences
+ if d1.is_zero() && d2.is_zero() {
+ return Ordering::Equal;
+ }
+ let self_negative = d1.is_sign_negative();
+ let other_negative = d2.is_sign_negative();
+ if self_negative && !other_negative {
+ return Ordering::Less;
+ } else if !self_negative && other_negative {
+ return Ordering::Greater;
+ }
+
+ // If we have 1.23 and 1.2345 then we have
+ // 123 scale 2 and 12345 scale 4
+ // We need to convert the first to
+ // 12300 scale 4 so we can compare equally
+ let left: &Decimal;
+ let right: &Decimal;
+ if self_negative && other_negative {
+ // Both are negative, so reverse cmp
+ left = d2;
+ right = d1;
+ } else {
+ left = d1;
+ right = d2;
+ }
+ let mut left_scale = left.scale();
+ let mut right_scale = right.scale();
+ let mut left_raw = left.mantissa_array3();
+ let mut right_raw = right.mantissa_array3();
+
+ if left_scale == right_scale {
+ // Fast path for same scale
+ if left_raw[2] != right_raw[2] {
+ return left_raw[2].cmp(&right_raw[2]);
+ }
+ if left_raw[1] != right_raw[1] {
+ return left_raw[1].cmp(&right_raw[1]);
+ }
+ return left_raw[0].cmp(&right_raw[0]);
+ }
+
+ // Rescale and compare
+ rescale_to_maximum_scale(&mut left_raw, &mut left_scale, &mut right_raw, &mut right_scale);
+ cmp_internal(&left_raw, &right_raw)
+}
+
+#[inline]
+fn add_part(left: u32, right: u32) -> (u32, u32) {
+ let added = u64::from(left) + u64::from(right);
+ ((added & U32_MASK) as u32, (added >> 32 & U32_MASK) as u32)
+}
+
+#[inline(always)]
+fn sub_by_internal3(value: &mut [u32; 3], by: &[u32; 3]) {
+ let mut overflow = 0;
+ let vl = value.len();
+ for i in 0..vl {
+ let part = (0x1_0000_0000u64 + u64::from(value[i])) - (u64::from(by[i]) + overflow);
+ value[i] = part as u32;
+ overflow = 1 - (part >> 32);
+ }
+}
+
+fn div_internal(quotient: &mut [u32; 4], remainder: &mut [u32; 4], divisor: &[u32; 3]) {
+ // There are a couple of ways to do division on binary numbers:
+ // 1. Using long division
+ // 2. Using the complement method
+ // ref: http://paulmason.me/dividing-binary-numbers-part-2/
+ // The complement method basically keeps trying to subtract the
+ // divisor until it can't anymore and placing the rest in remainder.
+ let mut complement = [
+ divisor[0] ^ 0xFFFF_FFFF,
+ divisor[1] ^ 0xFFFF_FFFF,
+ divisor[2] ^ 0xFFFF_FFFF,
+ 0xFFFF_FFFF,
+ ];
+
+ // Add one onto the complement
+ add_one_internal4(&mut complement);
+
+ // Make sure the remainder is 0
+ remainder.iter_mut().for_each(|x| *x = 0);
+
+ // If we have nothing in our hi+ block then shift over till we do
+ let mut blocks_to_process = 0;
+ while blocks_to_process < 4 && quotient[3] == 0 {
+ // memcpy would be useful here
+ quotient[3] = quotient[2];
+ quotient[2] = quotient[1];
+ quotient[1] = quotient[0];
+ quotient[0] = 0;
+
+ // Increment the counter
+ blocks_to_process += 1;
+ }
+
+ // Let's try and do the addition...
+ let mut block = blocks_to_process << 5;
+ let mut working = [0u32, 0u32, 0u32, 0u32];
+ while block < 128 {
+ // << 1 for quotient AND remainder. Moving the carry from the quotient to the bottom of the
+ // remainder.
+ let carry = shl1_internal(quotient, 0);
+ shl1_internal(remainder, carry);
+
+ // Copy the remainder of working into sub
+ working.copy_from_slice(remainder);
+
+ // Add the remainder with the complement
+ add_by_internal(&mut working, &complement);
+
+ // Check for the significant bit - move over to the quotient
+ // as necessary
+ if (working[3] & 0x8000_0000) == 0 {
+ remainder.copy_from_slice(&working);
+ quotient[0] |= 1;
+ }
+
+ // Increment our pointer
+ block += 1;
+ }
+}
+
+#[inline]
+fn copy_array_diff_lengths(into: &mut [u32], from: &[u32]) {
+ for i in 0..into.len() {
+ if i >= from.len() {
+ break;
+ }
+ into[i] = from[i];
+ }
+}
+
+#[inline]
+fn add_one_internal4(value: &mut [u32; 4]) -> u32 {
+ let mut carry: u64 = 1; // Start with one, since adding one
+ let mut sum: u64;
+ for i in value.iter_mut() {
+ sum = (*i as u64) + carry;
+ *i = (sum & U32_MASK) as u32;
+ carry = sum >> 32;
+ }
+
+ carry as u32
+}
+
+#[inline]
+fn add_by_internal3(value: &mut [u32; 3], by: &[u32; 3]) -> u32 {
+ let mut carry: u32 = 0;
+ let bl = by.len();
+ for i in 0..bl {
+ let res1 = value[i].overflowing_add(by[i]);
+ let res2 = res1.0.overflowing_add(carry);
+ value[i] = res2.0;
+ carry = (res1.1 | res2.1) as u32;
+ }
+ carry
+}
+
+#[inline]
+const fn u64_to_array(value: u64) -> [u32; 2] {
+ [(value & U32_MASK) as u32, (value >> 32 & U32_MASK) as u32]
+}
+
+fn add_with_scale_internal(
+ quotient: &mut [u32; 3],
+ quotient_scale: &mut i32,
+ working_quotient: &mut [u32; 4],
+ working_scale: &mut i32,
+) -> bool {
+ // Add quotient and the working (i.e. quotient = quotient + working)
+ if is_all_zero(quotient) {
+ // Quotient is zero so we can just copy the working quotient in directly
+ // First, make sure they are both 96 bit.
+ while working_quotient[3] != 0 {
+ div_by_u32(working_quotient, 10);
+ *working_scale -= 1;
+ }
+ copy_array_diff_lengths(quotient, working_quotient);
+ *quotient_scale = *working_scale;
+ return false;
+ }
+
+ if is_all_zero(working_quotient) {
+ return false;
+ }
+
+ // We have ensured that our working is not zero so we should do the addition
+
+ // If our two quotients are different then
+ // try to scale down the one with the bigger scale
+ let mut temp3 = [0u32, 0u32, 0u32];
+ let mut temp4 = [0u32, 0u32, 0u32, 0u32];
+ if *quotient_scale != *working_scale {
+ // TODO: Remove necessity for temp (without performance impact)
+ fn div_by_10<const N: usize>(target: &mut [u32], temp: &mut [u32; N], scale: &mut i32, target_scale: i32) {
+ // Copy to the temp array
+ temp.copy_from_slice(target);
+ // divide by 10 until target scale is reached
+ while *scale > target_scale {
+ let remainder = div_by_u32(temp, 10);
+ if remainder == 0 {
+ *scale -= 1;
+ target.copy_from_slice(temp);
+ } else {
+ break;
+ }
+ }
+ }
+
+ if *quotient_scale < *working_scale {
+ div_by_10(working_quotient, &mut temp4, working_scale, *quotient_scale);
+ } else {
+ div_by_10(quotient, &mut temp3, quotient_scale, *working_scale);
+ }
+ }
+
+ // If our two quotients are still different then
+ // try to scale up the smaller scale
+ if *quotient_scale != *working_scale {
+ // TODO: Remove necessity for temp (without performance impact)
+ fn mul_by_10(target: &mut [u32], temp: &mut [u32], scale: &mut i32, target_scale: i32) {
+ temp.copy_from_slice(target);
+ let mut overflow = 0;
+ // Multiply by 10 until target scale reached or overflow
+ while *scale < target_scale && overflow == 0 {
+ overflow = mul_by_u32(temp, 10);
+ if overflow == 0 {
+ // Still no overflow
+ *scale += 1;
+ target.copy_from_slice(temp);
+ }
+ }
+ }
+
+ if *quotient_scale > *working_scale {
+ mul_by_10(working_quotient, &mut temp4, working_scale, *quotient_scale);
+ } else {
+ mul_by_10(quotient, &mut temp3, quotient_scale, *working_scale);
+ }
+ }
+
+ // If our two quotients are still different then
+ // try to scale down the one with the bigger scale
+ // (ultimately losing significant digits)
+ if *quotient_scale != *working_scale {
+ // TODO: Remove necessity for temp (without performance impact)
+ fn div_by_10_lossy<const N: usize>(
+ target: &mut [u32],
+ temp: &mut [u32; N],
+ scale: &mut i32,
+ target_scale: i32,
+ ) {
+ temp.copy_from_slice(target);
+ // divide by 10 until target scale is reached
+ while *scale > target_scale {
+ div_by_u32(temp, 10);
+ *scale -= 1;
+ target.copy_from_slice(temp);
+ }
+ }
+ if *quotient_scale < *working_scale {
+ div_by_10_lossy(working_quotient, &mut temp4, working_scale, *quotient_scale);
+ } else {
+ div_by_10_lossy(quotient, &mut temp3, quotient_scale, *working_scale);
+ }
+ }
+
+ // If quotient or working are zero we have an underflow condition
+ if is_all_zero(quotient) || is_all_zero(working_quotient) {
+ // Underflow
+ return true;
+ } else {
+ // Both numbers have the same scale and can be added.
+ // We just need to know whether we can fit them in
+ let mut underflow = false;
+ let mut temp = [0u32, 0u32, 0u32];
+ while !underflow {
+ temp.copy_from_slice(quotient);
+
+ // Add the working quotient
+ let overflow = add_by_internal(&mut temp, working_quotient);
+ if overflow == 0 {
+ // addition was successful
+ quotient.copy_from_slice(&temp);
+ break;
+ } else {
+ // addition overflowed - remove significant digits and try again
+ div_by_u32(quotient, 10);
+ *quotient_scale -= 1;
+ div_by_u32(working_quotient, 10);
+ *working_scale -= 1;
+ // Check for underflow
+ underflow = is_all_zero(quotient) || is_all_zero(working_quotient);
+ }
+ }
+ if underflow {
+ return true;
+ }
+ }
+ false
+}
+
+/// Rescales the given decimals to equivalent scales.
+/// It will firstly try to scale both the left and the right side to
+/// the maximum scale of left/right. If it is unable to do that it
+/// will try to reduce the accuracy of the other argument.
+/// e.g. with 1.23 and 2.345 it'll rescale the first arg to 1.230
+#[inline(always)]
+fn rescale_to_maximum_scale(left: &mut [u32; 3], left_scale: &mut u32, right: &mut [u32; 3], right_scale: &mut u32) {
+ if left_scale == right_scale {
+ // Nothing to do
+ return;
+ }
+
+ if is_all_zero(left) {
+ *left_scale = *right_scale;
+ return;
+ } else if is_all_zero(right) {
+ *right_scale = *left_scale;
+ return;
+ }
+
+ if left_scale > right_scale {
+ rescale_internal(right, right_scale, *left_scale);
+ if right_scale != left_scale {
+ rescale_internal(left, left_scale, *right_scale);
+ }
+ } else {
+ rescale_internal(left, left_scale, *right_scale);
+ if right_scale != left_scale {
+ rescale_internal(right, right_scale, *left_scale);
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ // Tests on private methods.
+ //
+ // All public tests should go under `tests/`.
+
+ use super::*;
+ use crate::prelude::*;
+
+ #[test]
+ fn it_can_rescale_to_maximum_scale() {
+ fn extract(value: &str) -> ([u32; 3], u32) {
+ let v = Decimal::from_str(value).unwrap();
+ (v.mantissa_array3(), v.scale())
+ }
+
+ let tests = &[
+ ("1", "1", "1", "1"),
+ ("1", "1.0", "1.0", "1.0"),
+ ("1", "1.00000", "1.00000", "1.00000"),
+ ("1", "1.0000000000", "1.0000000000", "1.0000000000"),
+ (
+ "1",
+ "1.00000000000000000000",
+ "1.00000000000000000000",
+ "1.00000000000000000000",
+ ),
+ ("1.1", "1.1", "1.1", "1.1"),
+ ("1.1", "1.10000", "1.10000", "1.10000"),
+ ("1.1", "1.1000000000", "1.1000000000", "1.1000000000"),
+ (
+ "1.1",
+ "1.10000000000000000000",
+ "1.10000000000000000000",
+ "1.10000000000000000000",
+ ),
+ (
+ "0.6386554621848739495798319328",
+ "11.815126050420168067226890757",
+ "0.638655462184873949579831933",
+ "11.815126050420168067226890757",
+ ),
+ (
+ "0.0872727272727272727272727272", // Scale 28
+ "843.65000000", // Scale 8
+ "0.0872727272727272727272727", // 25
+ "843.6500000000000000000000000", // 25
+ ),
+ ];
+
+ for &(left_raw, right_raw, expected_left, expected_right) in tests {
+ // Left = the value to rescale
+ // Right = the new scale we're scaling to
+ // Expected = the expected left value after rescale
+ let (expected_left, expected_lscale) = extract(expected_left);
+ let (expected_right, expected_rscale) = extract(expected_right);
+
+ let (mut left, mut left_scale) = extract(left_raw);
+ let (mut right, mut right_scale) = extract(right_raw);
+ rescale_to_maximum_scale(&mut left, &mut left_scale, &mut right, &mut right_scale);
+ assert_eq!(left, expected_left);
+ assert_eq!(left_scale, expected_lscale);
+ assert_eq!(right, expected_right);
+ assert_eq!(right_scale, expected_rscale);
+
+ // Also test the transitive case
+ let (mut left, mut left_scale) = extract(left_raw);
+ let (mut right, mut right_scale) = extract(right_raw);
+ rescale_to_maximum_scale(&mut right, &mut right_scale, &mut left, &mut left_scale);
+ assert_eq!(left, expected_left);
+ assert_eq!(left_scale, expected_lscale);
+ assert_eq!(right, expected_right);
+ assert_eq!(right_scale, expected_rscale);
+ }
+ }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/mul.rs b/third_party/rust/rust_decimal/src/ops/mul.rs
new file mode 100644
index 0000000000..b36729599d
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/mul.rs
@@ -0,0 +1,168 @@
+use crate::constants::{BIG_POWERS_10, MAX_I64_SCALE, MAX_PRECISION_U32, U32_MAX};
+use crate::decimal::{CalculationResult, Decimal};
+use crate::ops::common::Buf24;
+
+pub(crate) fn mul_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+ if d1.is_zero() || d2.is_zero() {
+ // We should think about this - does zero need to maintain precision? This treats it like
+ // an absolute which I think is ok, especially since we have is_zero() functions etc.
+ return CalculationResult::Ok(Decimal::ZERO);
+ }
+
+ let mut scale = d1.scale() + d2.scale();
+ let negative = d1.is_sign_negative() ^ d2.is_sign_negative();
+ let mut product = Buf24::zero();
+
+ // See if we can optimize this calculation depending on whether the hi bits are set
+ if d1.hi() | d1.mid() == 0 {
+ if d2.hi() | d2.mid() == 0 {
+ // We're multiplying two 32 bit integers, so we can take some liberties to optimize this.
+ let mut low64 = d1.lo() as u64 * d2.lo() as u64;
+ if scale > MAX_PRECISION_U32 {
+ // We've exceeded maximum scale so we need to start reducing the precision (aka
+ // rounding) until we have something that fits.
+ // If we're too big then we effectively round to zero.
+ if scale > MAX_PRECISION_U32 + MAX_I64_SCALE {
+ return CalculationResult::Ok(Decimal::ZERO);
+ }
+
+ scale -= MAX_PRECISION_U32 + 1;
+ let mut power = BIG_POWERS_10[scale as usize];
+
+ let tmp = low64 / power;
+ let remainder = low64 - tmp * power;
+ low64 = tmp;
+
+ // Round the result. Since the divisor was a power of 10, it's always even.
+ power >>= 1;
+ if remainder >= power && (remainder > power || (low64 as u32 & 1) > 0) {
+ low64 += 1;
+ }
+
+ scale = MAX_PRECISION_U32;
+ }
+
+ // Early exit
+ return CalculationResult::Ok(Decimal::from_parts(
+ low64 as u32,
+ (low64 >> 32) as u32,
+ 0,
+ negative,
+ scale,
+ ));
+ }
+
+ // We know that the left hand side is just 32 bits but the right hand side is either
+ // 64 or 96 bits.
+ mul_by_32bit_lhs(d1.lo() as u64, d2, &mut product);
+ } else if d2.mid() | d2.hi() == 0 {
+ // We know that the right hand side is just 32 bits.
+ mul_by_32bit_lhs(d2.lo() as u64, d1, &mut product);
+ } else {
+ // We know we're not dealing with simple 32 bit operands on either side.
+ // We compute and accumulate the 9 partial products using long multiplication
+
+ // 1: ll * rl
+ let mut tmp = d1.lo() as u64 * d2.lo() as u64;
+ product.data[0] = tmp as u32;
+
+ // 2: ll * rm
+ let mut tmp2 = (d1.lo() as u64 * d2.mid() as u64).wrapping_add(tmp >> 32);
+
+ // 3: lm * rl
+ tmp = d1.mid() as u64 * d2.lo() as u64;
+ tmp = tmp.wrapping_add(tmp2);
+ product.data[1] = tmp as u32;
+
+ // Detect if carry happened from the wrapping add
+ if tmp < tmp2 {
+ tmp2 = (tmp >> 32) | (1u64 << 32);
+ } else {
+ tmp2 = tmp >> 32;
+ }
+
+ // 4: lm * rm
+ tmp = (d1.mid() as u64 * d2.mid() as u64) + tmp2;
+
+ // If the high bit isn't set then we can stop here. Otherwise, we need to continue calculating
+ // using the high bits.
+ if (d1.hi() | d2.hi()) > 0 {
+ // 5. ll * rh
+ tmp2 = d1.lo() as u64 * d2.hi() as u64;
+ tmp = tmp.wrapping_add(tmp2);
+ // Detect if we carried
+ let mut tmp3 = if tmp < tmp2 { 1 } else { 0 };
+
+ // 6. lh * rl
+ tmp2 = d1.hi() as u64 * d2.lo() as u64;
+ tmp = tmp.wrapping_add(tmp2);
+ product.data[2] = tmp as u32;
+ // Detect if we carried
+ if tmp < tmp2 {
+ tmp3 += 1;
+ }
+ tmp2 = (tmp3 << 32) | (tmp >> 32);
+
+ // 7. lm * rh
+ tmp = d1.mid() as u64 * d2.hi() as u64;
+ tmp = tmp.wrapping_add(tmp2);
+ // Check for carry
+ tmp3 = if tmp < tmp2 { 1 } else { 0 };
+
+ // 8. lh * rm
+ tmp2 = d1.hi() as u64 * d2.mid() as u64;
+ tmp = tmp.wrapping_add(tmp2);
+ product.data[3] = tmp as u32;
+ // Check for carry
+ if tmp < tmp2 {
+ tmp3 += 1;
+ }
+ tmp = (tmp3 << 32) | (tmp >> 32);
+
+ // 9. lh * rh
+ product.set_high64(d1.hi() as u64 * d2.hi() as u64 + tmp);
+ } else {
+ product.set_mid64(tmp);
+ }
+ }
+
+ // We may want to "rescale". This is the case if the mantissa is > 96 bits or if the scale
+ // exceeds the maximum precision.
+ let upper_word = product.upper_word();
+ if upper_word > 2 || scale > MAX_PRECISION_U32 {
+ scale = if let Some(new_scale) = product.rescale(upper_word, scale) {
+ new_scale
+ } else {
+ return CalculationResult::Overflow;
+ }
+ }
+
+ CalculationResult::Ok(Decimal::from_parts(
+ product.data[0],
+ product.data[1],
+ product.data[2],
+ negative,
+ scale,
+ ))
+}
+
+#[inline(always)]
+fn mul_by_32bit_lhs(d1: u64, d2: &Decimal, product: &mut Buf24) {
+ let mut tmp = d1 * d2.lo() as u64;
+ product.data[0] = tmp as u32;
+ tmp = (d1 * d2.mid() as u64).wrapping_add(tmp >> 32);
+ product.data[1] = tmp as u32;
+ tmp >>= 32;
+
+ // If we're multiplying by a 96 bit integer then continue the calculation
+ if d2.hi() > 0 {
+ tmp = tmp.wrapping_add(d1 * d2.hi() as u64);
+ if tmp > U32_MAX {
+ product.set_mid64(tmp);
+ } else {
+ product.data[2] = tmp as u32;
+ }
+ } else {
+ product.data[2] = tmp as u32;
+ }
+}
diff --git a/third_party/rust/rust_decimal/src/ops/rem.rs b/third_party/rust/rust_decimal/src/ops/rem.rs
new file mode 100644
index 0000000000..a79334e04b
--- /dev/null
+++ b/third_party/rust/rust_decimal/src/ops/rem.rs
@@ -0,0 +1,285 @@
+use crate::constants::{MAX_I32_SCALE, MAX_PRECISION_I32, POWERS_10};
+use crate::decimal::{CalculationResult, Decimal};
+use crate::ops::common::{Buf12, Buf16, Buf24, Dec64};
+
+pub(crate) fn rem_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
+ if d2.is_zero() {
+ return CalculationResult::DivByZero;
+ }
+ if d1.is_zero() {
+ return CalculationResult::Ok(Decimal::ZERO);
+ }
+
+ // We handle the structs a bit different here. Firstly, we ignore both the sign/scale of d2.
+ // This is because during a remainder operation we do not care about the sign of the divisor
+ // and only concern ourselves with that of the dividend.
+ let mut d1 = Dec64::new(d1);
+ let d2_scale = d2.scale();
+ let mut d2 = Buf12::from_decimal(d2);
+
+ let cmp = crate::ops::cmp::cmp_internal(
+ &d1,
+ &Dec64 {
+ negative: d1.negative,
+ scale: d2_scale,
+ hi: d2.hi(),
+ low64: d2.low64(),
+ },
+ );
+ match cmp {
+ core::cmp::Ordering::Equal => {
+ // Same numbers meaning that remainder is zero
+ return CalculationResult::Ok(Decimal::ZERO);
+ }
+ core::cmp::Ordering::Less => {
+ // d1 < d2, e.g. 1/2. This means that the result is the value of d1
+ return CalculationResult::Ok(d1.to_decimal());
+ }
+ core::cmp::Ordering::Greater => {}
+ }
+
+ // At this point we know that the dividend > divisor and that they are both non-zero.
+ let mut scale = d1.scale as i32 - d2_scale as i32;
+ if scale > 0 {
+ // Scale up the divisor
+ loop {
+ let power = if scale >= MAX_I32_SCALE {
+ POWERS_10[9]
+ } else {
+ POWERS_10[scale as usize]
+ } as u64;
+
+ let mut tmp = d2.lo() as u64 * power;
+ d2.set_lo(tmp as u32);
+ tmp >>= 32;
+ tmp = tmp.wrapping_add((d2.mid() as u64 + ((d2.hi() as u64) << 32)) * power);
+ d2.set_mid(tmp as u32);
+ d2.set_hi((tmp >> 32) as u32);
+
+ // Keep scaling if there is more to go
+ scale -= MAX_I32_SCALE;
+ if scale <= 0 {
+ break;
+ }
+ }
+ scale = 0;
+ }
+
+ loop {
+ // If the dividend is smaller than the divisor then try to scale that up first
+ if scale < 0 {
+ let mut quotient = Buf12 {
+ data: [d1.lo(), d1.mid(), d1.hi],
+ };
+ loop {
+ // Figure out how much we can scale by
+ let power_scale;
+ if let Some(u) = quotient.find_scale(MAX_PRECISION_I32 + scale) {
+ if u >= POWERS_10.len() {
+ power_scale = 9;
+ } else {
+ power_scale = u;
+ }
+ } else {
+ return CalculationResult::Overflow;
+ };
+ if power_scale == 0 {
+ break;
+ }
+ let power = POWERS_10[power_scale] as u64;
+ scale += power_scale as i32;
+
+ let mut tmp = quotient.data[0] as u64 * power;
+ quotient.data[0] = tmp as u32;
+ tmp >>= 32;
+ quotient.set_high64(tmp.wrapping_add(quotient.high64().wrapping_mul(power)));
+ if power_scale != 9 {
+ break;
+ }
+ if scale >= 0 {
+ break;
+ }
+ }
+ d1.low64 = quotient.low64();
+ d1.hi = quotient.data[2];
+ d1.scale = d2_scale;
+ }
+
+ // if the high portion is empty then return the modulus of the bottom portion
+ if d1.hi == 0 {
+ d1.low64 %= d2.low64();
+ return CalculationResult::Ok(d1.to_decimal());
+ } else if (d2.mid() | d2.hi()) == 0 {
+ let mut tmp = d1.high64();
+ tmp = ((tmp % d2.lo() as u64) << 32) | (d1.lo() as u64);
+ d1.low64 = tmp % d2.lo() as u64;
+ d1.hi = 0;
+ } else {
+ // Divisor is > 32 bits
+ return rem_full(&d1, &d2, scale);
+ }
+
+ if scale >= 0 {
+ break;
+ }
+ }
+
+ CalculationResult::Ok(d1.to_decimal())
+}
+
+fn rem_full(d1: &Dec64, d2: &Buf12, scale: i32) -> CalculationResult {
+ let mut scale = scale;
+
+ // First normalize the divisor
+ let shift = if d2.hi() == 0 {
+ d2.mid().leading_zeros()
+ } else {
+ d2.hi().leading_zeros()
+ };
+
+ let mut buffer = Buf24::zero();
+ let mut overflow = 0u32;
+ buffer.set_low64(d1.low64 << shift);
+ buffer.set_mid64(((d1.mid() as u64).wrapping_add((d1.hi as u64) << 32)) >> (32 - shift));
+ let mut upper = 3; // We start at 3 due to bit shifting
+
+ while scale < 0 {
+ let power = if -scale >= MAX_I32_SCALE {
+ POWERS_10[9]
+ } else {
+ POWERS_10[-scale as usize]
+ } as u64;
+ let mut tmp64 = buffer.data[0] as u64 * power;
+ buffer.data[0] = tmp64 as u32;
+
+ for (index, part) in buffer.data.iter_mut().enumerate().skip(1) {
+ if index > upper {
+ break;
+ }
+ tmp64 >>= 32;
+ tmp64 = tmp64.wrapping_add((*part as u64).wrapping_mul(power));
+ *part = tmp64 as u32;
+ }
+ // If we have overflow then also process that
+ if upper == 6 {
+ tmp64 >>= 32;
+ tmp64 = tmp64.wrapping_add((overflow as u64).wrapping_mul(power));
+ overflow = tmp64 as u32;
+ }
+
+ // Make sure the high bit is not set
+ if tmp64 > 0x7FFF_FFFF {
+ upper += 1;
+ if upper > 5 {
+ overflow = (tmp64 >> 32) as u32;
+ } else {
+ buffer.data[upper] = (tmp64 >> 32) as u32;
+ }
+ }
+ scale += MAX_I32_SCALE;
+ }
+
+ // TODO: Optimize slice logic
+
+ let mut tmp = Buf16::zero();
+ let divisor = d2.low64() << shift;
+ if d2.hi() == 0 {
+ // Do some division
+ if upper == 6 {
+ upper -= 1;
+
+ tmp.data = [buffer.data[4], buffer.data[5], overflow, 0];
+ tmp.partial_divide_64(divisor);
+ buffer.data[4] = tmp.data[0];
+ buffer.data[5] = tmp.data[1];
+ }
+ if upper == 5 {
+ upper -= 1;
+ tmp.data = [buffer.data[3], buffer.data[4], buffer.data[5], 0];
+ tmp.partial_divide_64(divisor);
+ buffer.data[3] = tmp.data[0];
+ buffer.data[4] = tmp.data[1];
+ buffer.data[5] = tmp.data[2];
+ }
+ if upper == 4 {
+ tmp.data = [buffer.data[2], buffer.data[3], buffer.data[4], 0];
+ tmp.partial_divide_64(divisor);
+ buffer.data[2] = tmp.data[0];
+ buffer.data[3] = tmp.data[1];
+ buffer.data[4] = tmp.data[2];
+ }
+
+ tmp.data = [buffer.data[1], buffer.data[2], buffer.data[3], 0];
+ tmp.partial_divide_64(divisor);
+ buffer.data[1] = tmp.data[0];
+ buffer.data[2] = tmp.data[1];
+ buffer.data[3] = tmp.data[2];
+
+ tmp.data = [buffer.data[0], buffer.data[1], buffer.data[2], 0];
+ tmp.partial_divide_64(divisor);
+ buffer.data[0] = tmp.data[0];
+ buffer.data[1] = tmp.data[1];
+ buffer.data[2] = tmp.data[2];
+
+ let low64 = buffer.low64() >> shift;
+ CalculationResult::Ok(Decimal::from_parts(
+ low64 as u32,
+ (low64 >> 32) as u32,
+ 0,
+ d1.negative,
+ d1.scale,
+ ))
+ } else {
+ let divisor_low64 = divisor;
+ let divisor = Buf12 {
+ data: [
+ divisor_low64 as u32,
+ (divisor_low64 >> 32) as u32,
+ (((d2.mid() as u64) + ((d2.hi() as u64) << 32)) >> (32 - shift)) as u32,
+ ],
+ };
+
+ // Do some division
+ if upper == 6 {
+ upper -= 1;
+ tmp.data = [buffer.data[3], buffer.data[4], buffer.data[5], overflow];
+ tmp.partial_divide_96(&divisor);
+ buffer.data[3] = tmp.data[0];
+ buffer.data[4] = tmp.data[1];
+ buffer.data[5] = tmp.data[2];
+ }
+ if upper == 5 {
+ upper -= 1;
+ tmp.data = [buffer.data[2], buffer.data[3], buffer.data[4], buffer.data[5]];
+ tmp.partial_divide_96(&divisor);
+ buffer.data[2] = tmp.data[0];
+ buffer.data[3] = tmp.data[1];
+ buffer.data[4] = tmp.data[2];
+ buffer.data[5] = tmp.data[3];
+ }
+ if upper == 4 {
+ tmp.data = [buffer.data[1], buffer.data[2], buffer.data[3], buffer.data[4]];
+ tmp.partial_divide_96(&divisor);
+ buffer.data[1] = tmp.data[0];
+ buffer.data[2] = tmp.data[1];
+ buffer.data[3] = tmp.data[2];
+ buffer.data[4] = tmp.data[3];
+ }
+
+ tmp.data = [buffer.data[0], buffer.data[1], buffer.data[2], buffer.data[3]];
+ tmp.partial_divide_96(&divisor);
+ buffer.data[0] = tmp.data[0];
+ buffer.data[1] = tmp.data[1];
+ buffer.data[2] = tmp.data[2];
+ buffer.data[3] = tmp.data[3];
+
+ let low64 = (buffer.low64() >> shift) + ((buffer.data[2] as u64) << (32 - shift) << 32);
+ CalculationResult::Ok(Decimal::from_parts(
+ low64 as u32,
+ (low64 >> 32) as u32,
+ buffer.data[2] >> shift,
+ d1.negative,
+ d1.scale,
+ ))
+ }
+}