diff options
Diffstat (limited to 'third_party/rust/rust_decimal/src/ops')
-rw-r--r-- | third_party/rust/rust_decimal/src/ops/add.rs | 382 | ||||
-rw-r--r-- | third_party/rust/rust_decimal/src/ops/array.rs | 381 | ||||
-rw-r--r-- | third_party/rust/rust_decimal/src/ops/cmp.rs | 101 | ||||
-rw-r--r-- | third_party/rust/rust_decimal/src/ops/common.rs | 455 | ||||
-rw-r--r-- | third_party/rust/rust_decimal/src/ops/div.rs | 658 | ||||
-rw-r--r-- | third_party/rust/rust_decimal/src/ops/legacy.rs | 843 | ||||
-rw-r--r-- | third_party/rust/rust_decimal/src/ops/mul.rs | 168 | ||||
-rw-r--r-- | third_party/rust/rust_decimal/src/ops/rem.rs | 285 |
8 files changed, 3273 insertions, 0 deletions
diff --git a/third_party/rust/rust_decimal/src/ops/add.rs b/third_party/rust/rust_decimal/src/ops/add.rs new file mode 100644 index 0000000000..52ba675f23 --- /dev/null +++ b/third_party/rust/rust_decimal/src/ops/add.rs @@ -0,0 +1,382 @@ +use crate::constants::{MAX_I32_SCALE, POWERS_10, SCALE_MASK, SCALE_SHIFT, SIGN_MASK, U32_MASK, U32_MAX}; +use crate::decimal::{CalculationResult, Decimal}; +use crate::ops::common::{Buf24, Dec64}; + +pub(crate) fn add_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult { + add_sub_internal(d1, d2, false) +} + +pub(crate) fn sub_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult { + add_sub_internal(d1, d2, true) +} + +#[inline] +fn add_sub_internal(d1: &Decimal, d2: &Decimal, subtract: bool) -> CalculationResult { + if d1.is_zero() { + // 0 - x or 0 + x + let mut result = *d2; + if subtract && !d2.is_zero() { + result.set_sign_negative(d2.is_sign_positive()); + } + return CalculationResult::Ok(result); + } + if d2.is_zero() { + // x - 0 or x + 0 + return CalculationResult::Ok(*d1); + } + + // Work out whether we need to rescale and/or if it's a subtract still given the signs of the + // numbers. + let flags = d1.flags() ^ d2.flags(); + let subtract = subtract ^ ((flags & SIGN_MASK) != 0); + let rescale = (flags & SCALE_MASK) > 0; + + // We optimize towards using 32 bit logic as much as possible. It's noticeably faster at + // scale, even on 64 bit machines + if d1.mid() | d1.hi() == 0 && d2.mid() | d2.hi() == 0 { + // We'll try to rescale, however we may end up with 64 bit (or more) numbers + // If we do, we'll choose a different flow than fast_add + if rescale { + // This is less optimized if we scale to a 64 bit integer. We can add some further logic + // here later on. + let rescale_factor = ((d2.flags() & SCALE_MASK) as i32 - (d1.flags() & SCALE_MASK) as i32) >> SCALE_SHIFT; + if rescale_factor < 0 { + // We try to rescale the rhs + if let Some(rescaled) = rescale32(d2.lo(), -rescale_factor) { + return fast_add(d1.lo(), rescaled, d1.flags(), subtract); + } + } else { + // We try to rescale the lhs + if let Some(rescaled) = rescale32(d1.lo(), rescale_factor) { + return fast_add( + rescaled, + d2.lo(), + (d2.flags() & SCALE_MASK) | (d1.flags() & SIGN_MASK), + subtract, + ); + } + } + } else { + return fast_add(d1.lo(), d2.lo(), d1.flags(), subtract); + } + } + + // Continue on with the slower 64 bit method + let d1 = Dec64::new(d1); + let d2 = Dec64::new(d2); + + // If we're not the same scale then make sure we're there first before starting addition + if rescale { + let rescale_factor = d2.scale as i32 - d1.scale as i32; + if rescale_factor < 0 { + let negative = subtract ^ d1.negative; + let scale = d1.scale; + unaligned_add(d2, d1, negative, scale, -rescale_factor, subtract) + } else { + let negative = d1.negative; + let scale = d2.scale; + unaligned_add(d1, d2, negative, scale, rescale_factor, subtract) + } + } else { + let neg = d1.negative; + let scale = d1.scale; + aligned_add(d1, d2, neg, scale, subtract) + } +} + +#[inline(always)] +fn rescale32(num: u32, rescale_factor: i32) -> Option<u32> { + if rescale_factor > MAX_I32_SCALE { + return None; + } + num.checked_mul(POWERS_10[rescale_factor as usize]) +} + +fn fast_add(lo1: u32, lo2: u32, flags: u32, subtract: bool) -> CalculationResult { + if subtract { + // Sub can't overflow because we're ensuring the bigger number always subtracts the smaller number + if lo1 < lo2 { + return CalculationResult::Ok(Decimal::from_parts_raw(lo2 - lo1, 0, 0, flags ^ SIGN_MASK)); + } + return CalculationResult::Ok(Decimal::from_parts_raw(lo1 - lo2, 0, 0, flags)); + } + // Add can overflow however, so we check for that explicitly + let lo = lo1.wrapping_add(lo2); + let mid = if lo < lo1 { 1 } else { 0 }; + CalculationResult::Ok(Decimal::from_parts_raw(lo, mid, 0, flags)) +} + +fn aligned_add(lhs: Dec64, rhs: Dec64, negative: bool, scale: u32, subtract: bool) -> CalculationResult { + if subtract { + // Signs differ, so subtract + let mut result = Dec64 { + negative, + scale, + low64: lhs.low64.wrapping_sub(rhs.low64), + hi: lhs.hi.wrapping_sub(rhs.hi), + }; + + // Check for carry + if result.low64 > lhs.low64 { + result.hi = result.hi.wrapping_sub(1); + if result.hi >= lhs.hi { + flip_sign(&mut result); + } + } else if result.hi > lhs.hi { + flip_sign(&mut result); + } + CalculationResult::Ok(result.to_decimal()) + } else { + // Signs are the same, so add + let mut result = Dec64 { + negative, + scale, + low64: lhs.low64.wrapping_add(rhs.low64), + hi: lhs.hi.wrapping_add(rhs.hi), + }; + + // Check for carry + if result.low64 < lhs.low64 { + result.hi = result.hi.wrapping_add(1); + if result.hi <= lhs.hi { + if result.scale == 0 { + return CalculationResult::Overflow; + } + reduce_scale(&mut result); + } + } else if result.hi < lhs.hi { + if result.scale == 0 { + return CalculationResult::Overflow; + } + reduce_scale(&mut result); + } + CalculationResult::Ok(result.to_decimal()) + } +} + +fn flip_sign(result: &mut Dec64) { + // Bitwise not the high portion + result.hi = !result.hi; + let low64 = ((result.low64 as i64).wrapping_neg()) as u64; + if low64 == 0 { + result.hi += 1; + } + result.low64 = low64; + result.negative = !result.negative; +} + +fn reduce_scale(result: &mut Dec64) { + let mut low64 = result.low64; + let mut hi = result.hi; + + let mut num = (hi as u64) + (1u64 << 32); + hi = (num / 10u64) as u32; + num = ((num - (hi as u64) * 10u64) << 32) + (low64 >> 32); + let mut div = (num / 10) as u32; + num = ((num - (div as u64) * 10u64) << 32) + (low64 & U32_MASK); + low64 = (div as u64) << 32; + div = (num / 10u64) as u32; + low64 = low64.wrapping_add(div as u64); + let remainder = (num as u32).wrapping_sub(div.wrapping_mul(10)); + + // Finally, round. This is optimizing slightly toward non-rounded numbers + if remainder >= 5 && (remainder > 5 || (low64 & 1) > 0) { + low64 = low64.wrapping_add(1); + if low64 == 0 { + hi += 1; + } + } + + result.low64 = low64; + result.hi = hi; + result.scale -= 1; +} + +// Assumption going into this function is that the LHS is the larger number and will "absorb" the +// smaller number. +fn unaligned_add( + lhs: Dec64, + rhs: Dec64, + negative: bool, + scale: u32, + rescale_factor: i32, + subtract: bool, +) -> CalculationResult { + let mut lhs = lhs; + let mut low64 = lhs.low64; + let mut high = lhs.hi; + let mut rescale_factor = rescale_factor; + + // First off, we see if we can get away with scaling small amounts (or none at all) + if high == 0 { + if low64 <= U32_MAX { + // We know it's not zero, so we start scaling. + // Start with reducing the scale down for the low portion + while low64 <= U32_MAX { + if rescale_factor <= MAX_I32_SCALE { + low64 *= POWERS_10[rescale_factor as usize] as u64; + lhs.low64 = low64; + return aligned_add(lhs, rhs, negative, scale, subtract); + } + rescale_factor -= MAX_I32_SCALE; + low64 *= POWERS_10[9] as u64; + } + } + + // Reduce the scale for the high portion + while high == 0 { + let power = if rescale_factor <= MAX_I32_SCALE { + POWERS_10[rescale_factor as usize] as u64 + } else { + POWERS_10[9] as u64 + }; + + let tmp_low = (low64 & U32_MASK) * power; + let tmp_hi = (low64 >> 32) * power + (tmp_low >> 32); + low64 = (tmp_low & U32_MASK) + (tmp_hi << 32); + high = (tmp_hi >> 32) as u32; + rescale_factor -= MAX_I32_SCALE; + if rescale_factor <= 0 { + lhs.low64 = low64; + lhs.hi = high; + return aligned_add(lhs, rhs, negative, scale, subtract); + } + } + } + + // See if we can get away with keeping it in the 96 bits. Otherwise, we need a buffer + let mut tmp64: u64; + loop { + let power = if rescale_factor <= MAX_I32_SCALE { + POWERS_10[rescale_factor as usize] as u64 + } else { + POWERS_10[9] as u64 + }; + + let tmp_low = (low64 & U32_MASK) * power; + tmp64 = (low64 >> 32) * power + (tmp_low >> 32); + low64 = (tmp_low & U32_MASK) + (tmp64 << 32); + tmp64 >>= 32; + tmp64 += (high as u64) * power; + + rescale_factor -= MAX_I32_SCALE; + + if tmp64 > U32_MAX { + break; + } else { + high = tmp64 as u32; + if rescale_factor <= 0 { + lhs.low64 = low64; + lhs.hi = high; + return aligned_add(lhs, rhs, negative, scale, subtract); + } + } + } + + let mut buffer = Buf24::zero(); + buffer.set_low64(low64); + buffer.set_mid64(tmp64); + + let mut upper_word = buffer.upper_word(); + while rescale_factor > 0 { + let power = if rescale_factor <= MAX_I32_SCALE { + POWERS_10[rescale_factor as usize] as u64 + } else { + POWERS_10[9] as u64 + }; + tmp64 = 0; + for (index, part) in buffer.data.iter_mut().enumerate() { + tmp64 = tmp64.wrapping_add((*part as u64) * power); + *part = tmp64 as u32; + tmp64 >>= 32; + if index + 1 > upper_word { + break; + } + } + + if tmp64 & U32_MASK > 0 { + // Extend the result + upper_word += 1; + buffer.data[upper_word] = tmp64 as u32; + } + + rescale_factor -= MAX_I32_SCALE; + } + + // Do the add + tmp64 = buffer.low64(); + low64 = rhs.low64; + let tmp_hi = buffer.data[2]; + high = rhs.hi; + + if subtract { + low64 = tmp64.wrapping_sub(low64); + high = tmp_hi.wrapping_sub(high); + + // Check for carry + let carry = if low64 > tmp64 { + high = high.wrapping_sub(1); + high >= tmp_hi + } else { + high > tmp_hi + }; + + if carry { + for part in buffer.data.iter_mut().skip(3) { + *part = part.wrapping_sub(1); + if *part > 0 { + break; + } + } + + if buffer.data[upper_word] == 0 && upper_word < 3 { + return CalculationResult::Ok(Decimal::from_parts( + low64 as u32, + (low64 >> 32) as u32, + high, + negative, + scale, + )); + } + } + } else { + low64 = low64.wrapping_add(tmp64); + high = high.wrapping_add(tmp_hi); + + // Check for carry + let carry = if low64 < tmp64 { + high = high.wrapping_add(1); + high <= tmp_hi + } else { + high < tmp_hi + }; + + if carry { + for (index, part) in buffer.data.iter_mut().enumerate().skip(3) { + if upper_word < index { + *part = 1; + upper_word = index; + break; + } + *part = part.wrapping_add(1); + if *part > 0 { + break; + } + } + } + } + + buffer.set_low64(low64); + buffer.data[2] = high; + if let Some(scale) = buffer.rescale(upper_word, scale) { + CalculationResult::Ok(Decimal::from_parts( + buffer.data[0], + buffer.data[1], + buffer.data[2], + negative, + scale, + )) + } else { + CalculationResult::Overflow + } +} diff --git a/third_party/rust/rust_decimal/src/ops/array.rs b/third_party/rust/rust_decimal/src/ops/array.rs new file mode 100644 index 0000000000..2ab58b4dfb --- /dev/null +++ b/third_party/rust/rust_decimal/src/ops/array.rs @@ -0,0 +1,381 @@ +use crate::constants::{MAX_PRECISION_U32, POWERS_10, U32_MASK}; + +/// Rescales the given decimal to new scale. +/// e.g. with 1.23 and new scale 3 rescale the value to 1.230 +#[inline(always)] +pub(crate) fn rescale_internal(value: &mut [u32; 3], value_scale: &mut u32, new_scale: u32) { + if *value_scale == new_scale { + // Nothing to do + return; + } + + if is_all_zero(value) { + *value_scale = new_scale.min(MAX_PRECISION_U32); + return; + } + + if *value_scale > new_scale { + let mut diff = value_scale.wrapping_sub(new_scale); + // Scaling further isn't possible since we got an overflow + // In this case we need to reduce the accuracy of the "side to keep" + + // Now do the necessary rounding + let mut remainder = 0; + while let Some(diff_minus_one) = diff.checked_sub(1) { + if is_all_zero(value) { + *value_scale = new_scale; + return; + } + + diff = diff_minus_one; + + // Any remainder is discarded if diff > 0 still (i.e. lost precision) + remainder = div_by_u32(value, 10); + } + if remainder >= 5 { + for part in value.iter_mut() { + let digit = u64::from(*part) + 1u64; + remainder = if digit > U32_MASK { 1 } else { 0 }; + *part = (digit & U32_MASK) as u32; + if remainder == 0 { + break; + } + } + } + *value_scale = new_scale; + } else { + let mut diff = new_scale.wrapping_sub(*value_scale); + let mut working = [value[0], value[1], value[2]]; + while let Some(diff_minus_one) = diff.checked_sub(1) { + if mul_by_10(&mut working) == 0 { + value.copy_from_slice(&working); + diff = diff_minus_one; + } else { + break; + } + } + *value_scale = new_scale.wrapping_sub(diff); + } +} + +#[cfg(feature = "legacy-ops")] +pub(crate) fn add_by_internal(value: &mut [u32], by: &[u32]) -> u32 { + let mut carry: u64 = 0; + let vl = value.len(); + let bl = by.len(); + if vl >= bl { + let mut sum: u64; + for i in 0..bl { + sum = u64::from(value[i]) + u64::from(by[i]) + carry; + value[i] = (sum & U32_MASK) as u32; + carry = sum >> 32; + } + if vl > bl && carry > 0 { + for i in value.iter_mut().skip(bl) { + sum = u64::from(*i) + carry; + *i = (sum & U32_MASK) as u32; + carry = sum >> 32; + if carry == 0 { + break; + } + } + } + } else if vl + 1 == bl { + // Overflow, by default, is anything in the high portion of by + let mut sum: u64; + for i in 0..vl { + sum = u64::from(value[i]) + u64::from(by[i]) + carry; + value[i] = (sum & U32_MASK) as u32; + carry = sum >> 32; + } + if by[vl] > 0 { + carry += u64::from(by[vl]); + } + } else { + panic!("Internal error: add using incompatible length arrays. {} <- {}", vl, bl); + } + carry as u32 +} + +pub(crate) fn add_by_internal_flattened(value: &mut [u32; 3], by: u32) -> u32 { + manage_add_by_internal(by, value) +} + +#[inline] +pub(crate) fn add_one_internal(value: &mut [u32; 3]) -> u32 { + manage_add_by_internal(1, value) +} + +// `u64 as u32` are safe because of widening and 32bits shifts +#[inline] +pub(crate) fn manage_add_by_internal<const N: usize>(initial_carry: u32, value: &mut [u32; N]) -> u32 { + let mut carry = u64::from(initial_carry); + let mut iter = 0..value.len(); + let mut sum = 0; + + let mut sum_fn = |local_carry: &mut u64, idx| { + sum = u64::from(value[idx]).wrapping_add(*local_carry); + value[idx] = (sum & U32_MASK) as u32; + *local_carry = sum.wrapping_shr(32); + }; + + if let Some(idx) = iter.next() { + sum_fn(&mut carry, idx); + } + + for idx in iter { + if carry > 0 { + sum_fn(&mut carry, idx); + } + } + + carry as u32 +} + +pub(crate) fn sub_by_internal(value: &mut [u32], by: &[u32]) -> u32 { + // The way this works is similar to long subtraction + // Let's assume we're working with bytes for simplicity in an example: + // 257 - 8 = 249 + // 0000_0001 0000_0001 - 0000_0000 0000_1000 = 0000_0000 1111_1001 + // We start by doing the first byte... + // Overflow = 0 + // Left = 0000_0001 (1) + // Right = 0000_1000 (8) + // Firstly, we make sure the left and right are scaled up to twice the size + // Left = 0000_0000 0000_0001 + // Right = 0000_0000 0000_1000 + // We then subtract right from left + // Result = Left - Right = 1111_1111 1111_1001 + // We subtract the overflow, which in this case is 0. + // Because left < right (1 < 8) we invert the high part. + // Lo = 1111_1001 + // Hi = 1111_1111 -> 0000_0001 + // Lo is the field, hi is the overflow. + // We do the same for the second byte... + // Overflow = 1 + // Left = 0000_0001 + // Right = 0000_0000 + // Result = Left - Right = 0000_0000 0000_0001 + // We subtract the overflow... + // Result = 0000_0000 0000_0001 - 1 = 0 + // And we invert the high, just because (invert 0 = 0). + // So our result is: + // 0000_0000 1111_1001 + let mut overflow = 0; + let vl = value.len(); + let bl = by.len(); + for i in 0..vl { + if i >= bl { + break; + } + let (lo, hi) = sub_part(value[i], by[i], overflow); + value[i] = lo; + overflow = hi; + } + overflow +} + +fn sub_part(left: u32, right: u32, overflow: u32) -> (u32, u32) { + let part = 0x1_0000_0000u64 + u64::from(left) - (u64::from(right) + u64::from(overflow)); + let lo = part as u32; + let hi = 1 - ((part >> 32) as u32); + (lo, hi) +} + +// Returns overflow +#[inline] +pub(crate) fn mul_by_10(bits: &mut [u32; 3]) -> u32 { + let mut overflow = 0u64; + for b in bits.iter_mut() { + let result = u64::from(*b) * 10u64 + overflow; + let hi = (result >> 32) & U32_MASK; + let lo = (result & U32_MASK) as u32; + *b = lo; + overflow = hi; + } + + overflow as u32 +} + +// Returns overflow +pub(crate) fn mul_by_u32(bits: &mut [u32], m: u32) -> u32 { + let mut overflow = 0; + for b in bits.iter_mut() { + let (lo, hi) = mul_part(*b, m, overflow); + *b = lo; + overflow = hi; + } + overflow +} + +pub(crate) fn mul_part(left: u32, right: u32, high: u32) -> (u32, u32) { + let result = u64::from(left) * u64::from(right) + u64::from(high); + let hi = ((result >> 32) & U32_MASK) as u32; + let lo = (result & U32_MASK) as u32; + (lo, hi) +} + +// Returns remainder +pub(crate) fn div_by_u32<const N: usize>(bits: &mut [u32; N], divisor: u32) -> u32 { + if divisor == 0 { + // Divide by zero + panic!("Internal error: divide by zero"); + } else if divisor == 1 { + // dividend remains unchanged + 0 + } else { + let mut remainder = 0u32; + let divisor = u64::from(divisor); + for part in bits.iter_mut().rev() { + let temp = (u64::from(remainder) << 32) + u64::from(*part); + remainder = (temp % divisor) as u32; + *part = (temp / divisor) as u32; + } + + remainder + } +} + +pub(crate) fn div_by_1x(bits: &mut [u32; 3], power: usize) -> u32 { + let mut remainder = 0u32; + let divisor = POWERS_10[power] as u64; + let temp = ((remainder as u64) << 32) + (bits[2] as u64); + remainder = (temp % divisor) as u32; + bits[2] = (temp / divisor) as u32; + let temp = ((remainder as u64) << 32) + (bits[1] as u64); + remainder = (temp % divisor) as u32; + bits[1] = (temp / divisor) as u32; + let temp = ((remainder as u64) << 32) + (bits[0] as u64); + remainder = (temp % divisor) as u32; + bits[0] = (temp / divisor) as u32; + remainder +} + +#[inline] +pub(crate) fn shl1_internal(bits: &mut [u32], carry: u32) -> u32 { + let mut carry = carry; + for part in bits.iter_mut() { + let b = *part >> 31; + *part = (*part << 1) | carry; + carry = b; + } + carry +} + +#[inline] +pub(crate) fn cmp_internal(left: &[u32; 3], right: &[u32; 3]) -> core::cmp::Ordering { + let left_hi: u32 = left[2]; + let right_hi: u32 = right[2]; + let left_lo: u64 = u64::from(left[1]) << 32 | u64::from(left[0]); + let right_lo: u64 = u64::from(right[1]) << 32 | u64::from(right[0]); + if left_hi < right_hi || (left_hi <= right_hi && left_lo < right_lo) { + core::cmp::Ordering::Less + } else if left_hi == right_hi && left_lo == right_lo { + core::cmp::Ordering::Equal + } else { + core::cmp::Ordering::Greater + } +} + +#[inline] +pub(crate) fn is_all_zero<const N: usize>(bits: &[u32; N]) -> bool { + bits.iter().all(|b| *b == 0) +} + +#[cfg(test)] +mod test { + // Tests on private methods. + // + // All public tests should go under `tests/`. + + use super::*; + use crate::prelude::*; + + #[test] + fn it_can_rescale_internal() { + fn extract(value: &str) -> ([u32; 3], u32) { + let v = Decimal::from_str(value).unwrap(); + (v.mantissa_array3(), v.scale()) + } + + let tests = &[ + ("1", 0, "1", 0), + ("1", 1, "1.0", 1), + ("1", 5, "1.00000", 5), + ("1", 10, "1.0000000000", 10), + ("1", 20, "1.00000000000000000000", 20), + ( + "0.6386554621848739495798319328", + 27, + "0.638655462184873949579831933", + 27, + ), + ( + "843.65000000", // Scale 8 + 25, + "843.6500000000000000000000000", + 25, + ), + ( + "843.65000000", // Scale 8 + 30, + "843.6500000000000000000000000", + 25, // Only fits 25 + ), + ("0", 130, "0.000000000000000000000000000000", 28), + ]; + + for &(value_raw, new_scale, expected_value, expected_scale) in tests { + let (expected_value, _) = extract(expected_value); + let (mut value, mut value_scale) = extract(value_raw); + rescale_internal(&mut value, &mut value_scale, new_scale); + assert_eq!(value, expected_value); + assert_eq!( + value_scale, expected_scale, + "value: {}, requested scale: {}", + value_raw, new_scale + ); + } + } + + #[test] + fn test_shl1_internal() { + struct TestCase { + // One thing to be cautious of is that the structure of a number here for shifting left is + // the reverse of how you may conceive this mentally. i.e. a[2] contains the higher order + // bits: a[2] a[1] a[0] + given: [u32; 3], + given_carry: u32, + expected: [u32; 3], + expected_carry: u32, + } + let tests = [ + TestCase { + given: [1, 0, 0], + given_carry: 0, + expected: [2, 0, 0], + expected_carry: 0, + }, + TestCase { + given: [1, 0, 2147483648], + given_carry: 1, + expected: [3, 0, 0], + expected_carry: 1, + }, + ]; + for case in &tests { + let mut test = [case.given[0], case.given[1], case.given[2]]; + let carry = shl1_internal(&mut test, case.given_carry); + assert_eq!( + test, case.expected, + "Bits: {:?} << 1 | {}", + case.given, case.given_carry + ); + assert_eq!( + carry, case.expected_carry, + "Carry: {:?} << 1 | {}", + case.given, case.given_carry + ) + } + } +} diff --git a/third_party/rust/rust_decimal/src/ops/cmp.rs b/third_party/rust/rust_decimal/src/ops/cmp.rs new file mode 100644 index 0000000000..636085bff5 --- /dev/null +++ b/third_party/rust/rust_decimal/src/ops/cmp.rs @@ -0,0 +1,101 @@ +use crate::constants::{MAX_I32_SCALE, POWERS_10, U32_MASK, U32_MAX}; +use crate::decimal::Decimal; +use crate::ops::common::Dec64; + +use core::cmp::Ordering; + +pub(crate) fn cmp_impl(d1: &Decimal, d2: &Decimal) -> Ordering { + if d2.is_zero() { + return if d1.is_zero() { + return Ordering::Equal; + } else if d1.is_sign_negative() { + Ordering::Less + } else { + Ordering::Greater + }; + } + if d1.is_zero() { + return if d2.is_sign_negative() { + Ordering::Greater + } else { + Ordering::Less + }; + } + // If the sign is different, then it's an easy answer + if d1.is_sign_negative() != d2.is_sign_negative() { + return if d1.is_sign_negative() { + Ordering::Less + } else { + Ordering::Greater + }; + } + + // Otherwise, do a deep comparison + let d1 = Dec64::new(d1); + let d2 = Dec64::new(d2); + // We know both signs are the same here so flip it here. + // Negative is handled differently. i.e. 0.5 > 0.01 however -0.5 < -0.01 + if d1.negative { + cmp_internal(&d2, &d1) + } else { + cmp_internal(&d1, &d2) + } +} + +pub(in crate::ops) fn cmp_internal(d1: &Dec64, d2: &Dec64) -> Ordering { + // This function ignores sign + let mut d1_low = d1.low64; + let mut d1_high = d1.hi; + let mut d2_low = d2.low64; + let mut d2_high = d2.hi; + + // If the scale factors aren't equal then + if d1.scale != d2.scale { + let mut diff = d2.scale as i32 - d1.scale as i32; + if diff < 0 { + diff = -diff; + if !rescale(&mut d2_low, &mut d2_high, diff as u32) { + return Ordering::Less; + } + } else if !rescale(&mut d1_low, &mut d1_high, diff as u32) { + return Ordering::Greater; + } + } + + // They're the same scale, do a standard bitwise comparison + let hi_order = d1_high.cmp(&d2_high); + if hi_order != Ordering::Equal { + return hi_order; + } + d1_low.cmp(&d2_low) +} + +fn rescale(low64: &mut u64, high: &mut u32, diff: u32) -> bool { + let mut diff = diff as i32; + // We need to modify d1 by 10^diff to get it to the same scale as d2 + loop { + let power = if diff >= MAX_I32_SCALE { + POWERS_10[9] + } else { + POWERS_10[diff as usize] + } as u64; + let tmp_lo_32 = (*low64 & U32_MASK) * power; + let mut tmp = (*low64 >> 32) * power + (tmp_lo_32 >> 32); + *low64 = (tmp_lo_32 & U32_MASK) + (tmp << 32); + tmp >>= 32; + tmp = tmp.wrapping_add((*high as u64) * power); + // Indicates > 96 bits + if tmp > U32_MAX { + return false; + } + *high = tmp as u32; + + // Keep scaling if there is more to go + diff -= MAX_I32_SCALE; + if diff <= 0 { + break; + } + } + + true +} diff --git a/third_party/rust/rust_decimal/src/ops/common.rs b/third_party/rust/rust_decimal/src/ops/common.rs new file mode 100644 index 0000000000..c29362d824 --- /dev/null +++ b/third_party/rust/rust_decimal/src/ops/common.rs @@ -0,0 +1,455 @@ +use crate::constants::{MAX_I32_SCALE, MAX_PRECISION_I32, POWERS_10}; +use crate::Decimal; + +#[derive(Debug)] +pub struct Buf12 { + pub data: [u32; 3], +} + +impl Buf12 { + pub(super) const fn from_dec64(value: &Dec64) -> Self { + Buf12 { + data: [value.low64 as u32, (value.low64 >> 32) as u32, value.hi], + } + } + + pub(super) const fn from_decimal(value: &Decimal) -> Self { + Buf12 { + data: value.mantissa_array3(), + } + } + + #[inline(always)] + pub const fn lo(&self) -> u32 { + self.data[0] + } + #[inline(always)] + pub const fn mid(&self) -> u32 { + self.data[1] + } + #[inline(always)] + pub const fn hi(&self) -> u32 { + self.data[2] + } + #[inline(always)] + pub fn set_lo(&mut self, value: u32) { + self.data[0] = value; + } + #[inline(always)] + pub fn set_mid(&mut self, value: u32) { + self.data[1] = value; + } + #[inline(always)] + pub fn set_hi(&mut self, value: u32) { + self.data[2] = value; + } + + #[inline(always)] + pub const fn low64(&self) -> u64 { + ((self.data[1] as u64) << 32) | (self.data[0] as u64) + } + + #[inline(always)] + pub fn set_low64(&mut self, value: u64) { + self.data[1] = (value >> 32) as u32; + self.data[0] = value as u32; + } + + #[inline(always)] + pub const fn high64(&self) -> u64 { + ((self.data[2] as u64) << 32) | (self.data[1] as u64) + } + + #[inline(always)] + pub fn set_high64(&mut self, value: u64) { + self.data[2] = (value >> 32) as u32; + self.data[1] = value as u32; + } + + // Determine the maximum value of x that ensures that the quotient when scaled up by 10^x + // still fits in 96 bits. Ultimately, we want to make scale positive - if we can't then + // we're going to overflow. Because x is ultimately used to lookup inside the POWERS array, it + // must be a valid value 0 <= x <= 9 + pub fn find_scale(&self, scale: i32) -> Option<usize> { + const OVERFLOW_MAX_9_HI: u32 = 4; + const OVERFLOW_MAX_8_HI: u32 = 42; + const OVERFLOW_MAX_7_HI: u32 = 429; + const OVERFLOW_MAX_6_HI: u32 = 4294; + const OVERFLOW_MAX_5_HI: u32 = 42949; + const OVERFLOW_MAX_4_HI: u32 = 429496; + const OVERFLOW_MAX_3_HI: u32 = 4294967; + const OVERFLOW_MAX_2_HI: u32 = 42949672; + const OVERFLOW_MAX_1_HI: u32 = 429496729; + const OVERFLOW_MAX_9_LOW64: u64 = 5441186219426131129; + + let hi = self.data[2]; + let low64 = self.low64(); + let mut x = 0usize; + + // Quick check to stop us from trying to scale any more. + // + if hi > OVERFLOW_MAX_1_HI { + // If it's less than 0, which it probably is - overflow. We can't do anything. + if scale < 0 { + return None; + } + return Some(x); + } + + if scale > MAX_PRECISION_I32 - 9 { + // We can't scale by 10^9 without exceeding the max scale factor. + // Instead, we'll try to scale by the most that we can and see if that works. + // This is safe to do due to the check above. e.g. scale > 19 in the above, so it will + // evaluate to 9 or less below. + x = (MAX_PRECISION_I32 - scale) as usize; + if hi < POWER_OVERFLOW_VALUES[x - 1].data[2] { + if x as i32 + scale < 0 { + // We still overflow + return None; + } + return Some(x); + } + } else if hi < OVERFLOW_MAX_9_HI || hi == OVERFLOW_MAX_9_HI && low64 <= OVERFLOW_MAX_9_LOW64 { + return Some(9); + } + + // Do a binary search to find a power to scale by that is less than 9 + x = if hi > OVERFLOW_MAX_5_HI { + if hi > OVERFLOW_MAX_3_HI { + if hi > OVERFLOW_MAX_2_HI { + 1 + } else { + 2 + } + } else if hi > OVERFLOW_MAX_4_HI { + 3 + } else { + 4 + } + } else if hi > OVERFLOW_MAX_7_HI { + if hi > OVERFLOW_MAX_6_HI { + 5 + } else { + 6 + } + } else if hi > OVERFLOW_MAX_8_HI { + 7 + } else { + 8 + }; + + // Double check what we've found won't overflow. Otherwise, we go one below. + if hi == POWER_OVERFLOW_VALUES[x - 1].data[2] && low64 > POWER_OVERFLOW_VALUES[x - 1].low64() { + x -= 1; + } + + // Confirm we've actually resolved things + if x as i32 + scale < 0 { + None + } else { + Some(x) + } + } +} + +// This is a table of the largest values that will not overflow when multiplied +// by a given power as represented by the index. +static POWER_OVERFLOW_VALUES: [Buf12; 8] = [ + Buf12 { + data: [2576980377, 2576980377, 429496729], + }, + Buf12 { + data: [687194767, 4123168604, 42949672], + }, + Buf12 { + data: [2645699854, 1271310319, 4294967], + }, + Buf12 { + data: [694066715, 3133608139, 429496], + }, + Buf12 { + data: [2216890319, 2890341191, 42949], + }, + Buf12 { + data: [2369172679, 4154504685, 4294], + }, + Buf12 { + data: [4102387834, 2133437386, 429], + }, + Buf12 { + data: [410238783, 4078814305, 42], + }, +]; + +pub(super) struct Dec64 { + pub negative: bool, + pub scale: u32, + pub hi: u32, + pub low64: u64, +} + +impl Dec64 { + pub(super) const fn new(d: &Decimal) -> Dec64 { + let m = d.mantissa_array3(); + if m[1] == 0 { + Dec64 { + negative: d.is_sign_negative(), + scale: d.scale(), + hi: m[2], + low64: m[0] as u64, + } + } else { + Dec64 { + negative: d.is_sign_negative(), + scale: d.scale(), + hi: m[2], + low64: ((m[1] as u64) << 32) | (m[0] as u64), + } + } + } + + #[inline(always)] + pub(super) const fn lo(&self) -> u32 { + self.low64 as u32 + } + #[inline(always)] + pub(super) const fn mid(&self) -> u32 { + (self.low64 >> 32) as u32 + } + + #[inline(always)] + pub(super) const fn high64(&self) -> u64 { + (self.low64 >> 32) | ((self.hi as u64) << 32) + } + + pub(super) const fn to_decimal(&self) -> Decimal { + Decimal::from_parts( + self.low64 as u32, + (self.low64 >> 32) as u32, + self.hi, + self.negative, + self.scale, + ) + } +} + +pub struct Buf16 { + pub data: [u32; 4], +} + +impl Buf16 { + pub const fn zero() -> Self { + Buf16 { data: [0, 0, 0, 0] } + } + + pub const fn low64(&self) -> u64 { + ((self.data[1] as u64) << 32) | (self.data[0] as u64) + } + + pub fn set_low64(&mut self, value: u64) { + self.data[1] = (value >> 32) as u32; + self.data[0] = value as u32; + } + + pub const fn mid64(&self) -> u64 { + ((self.data[2] as u64) << 32) | (self.data[1] as u64) + } + + pub fn set_mid64(&mut self, value: u64) { + self.data[2] = (value >> 32) as u32; + self.data[1] = value as u32; + } + + pub const fn high64(&self) -> u64 { + ((self.data[3] as u64) << 32) | (self.data[2] as u64) + } + + pub fn set_high64(&mut self, value: u64) { + self.data[3] = (value >> 32) as u32; + self.data[2] = value as u32; + } +} + +#[derive(Debug)] +pub struct Buf24 { + pub data: [u32; 6], +} + +impl Buf24 { + pub const fn zero() -> Self { + Buf24 { + data: [0, 0, 0, 0, 0, 0], + } + } + + pub const fn low64(&self) -> u64 { + ((self.data[1] as u64) << 32) | (self.data[0] as u64) + } + + pub fn set_low64(&mut self, value: u64) { + self.data[1] = (value >> 32) as u32; + self.data[0] = value as u32; + } + + #[allow(dead_code)] + pub const fn mid64(&self) -> u64 { + ((self.data[3] as u64) << 32) | (self.data[2] as u64) + } + + pub fn set_mid64(&mut self, value: u64) { + self.data[3] = (value >> 32) as u32; + self.data[2] = value as u32; + } + + #[allow(dead_code)] + pub const fn high64(&self) -> u64 { + ((self.data[5] as u64) << 32) | (self.data[4] as u64) + } + + pub fn set_high64(&mut self, value: u64) { + self.data[5] = (value >> 32) as u32; + self.data[4] = value as u32; + } + + pub const fn upper_word(&self) -> usize { + if self.data[5] > 0 { + return 5; + } + if self.data[4] > 0 { + return 4; + } + if self.data[3] > 0 { + return 3; + } + if self.data[2] > 0 { + return 2; + } + if self.data[1] > 0 { + return 1; + } + 0 + } + + // Attempt to rescale the number into 96 bits. If successful, the scale is returned wrapped + // in an Option. If it failed due to overflow, we return None. + // * `upper` - Index of last non-zero value in self. + // * `scale` - Current scale factor for this value. + pub fn rescale(&mut self, upper: usize, scale: u32) -> Option<u32> { + let mut scale = scale as i32; + let mut upper = upper; + + // Determine a rescale target to start with + let mut rescale_target = 0i32; + if upper > 2 { + rescale_target = upper as i32 * 32 - 64 - 1; + rescale_target -= self.data[upper].leading_zeros() as i32; + rescale_target = ((rescale_target * 77) >> 8) + 1; + if rescale_target > scale { + return None; + } + } + + // Make sure we scale enough to bring it into a valid range + if rescale_target < scale - MAX_PRECISION_I32 { + rescale_target = scale - MAX_PRECISION_I32; + } + + if rescale_target > 0 { + // We're going to keep reducing by powers of 10. So, start by reducing the scale by + // that amount. + scale -= rescale_target; + let mut sticky = 0; + let mut remainder = 0; + loop { + sticky |= remainder; + let mut power = if rescale_target > 8 { + POWERS_10[9] + } else { + POWERS_10[rescale_target as usize] + }; + + let high = self.data[upper]; + let high_quotient = high / power; + remainder = high - high_quotient * power; + + for item in self.data.iter_mut().rev().skip(6 - upper) { + let num = (*item as u64).wrapping_add((remainder as u64) << 32); + *item = (num / power as u64) as u32; + remainder = (num as u32).wrapping_sub(item.wrapping_mul(power)); + } + + self.data[upper] = high_quotient; + + // If the high quotient was zero then decrease the upper bound + if high_quotient == 0 && upper > 0 { + upper -= 1; + } + if rescale_target > MAX_I32_SCALE { + // Scale some more + rescale_target -= MAX_I32_SCALE; + continue; + } + + // If we fit into 96 bits then we've scaled enough. Otherwise, scale once more. + if upper > 2 { + if scale == 0 { + return None; + } + // Equivalent to scaling down by 10 + rescale_target = 1; + scale -= 1; + continue; + } + + // Round the final result. + power >>= 1; + let carried = if power <= remainder { + // If we're less than half then we're fine. Otherwise, we round if odd or if the + // sticky bit is set. + if power < remainder || ((self.data[0] & 1) | sticky) != 0 { + // Round up + self.data[0] = self.data[0].wrapping_add(1); + // Check if we carried + self.data[0] == 0 + } else { + false + } + } else { + false + }; + + // If we carried then propagate through the portions + if carried { + let mut pos = 0; + for (index, value) in self.data.iter_mut().enumerate().skip(1) { + pos = index; + *value = value.wrapping_add(1); + if *value != 0 { + break; + } + } + + // If we ended up rounding over the 96 bits then we'll try to rescale down (again) + if pos > 2 { + // Nothing to scale down from will cause overflow + if scale == 0 { + return None; + } + + // Loop back around using scale of 10. + // Reset the sticky bit and remainder before looping. + upper = pos; + sticky = 0; + remainder = 0; + rescale_target = 1; + scale -= 1; + continue; + } + } + break; + } + } + + Some(scale as u32) + } +} diff --git a/third_party/rust/rust_decimal/src/ops/div.rs b/third_party/rust/rust_decimal/src/ops/div.rs new file mode 100644 index 0000000000..3b5ec577b2 --- /dev/null +++ b/third_party/rust/rust_decimal/src/ops/div.rs @@ -0,0 +1,658 @@ +use crate::constants::{MAX_PRECISION_I32, POWERS_10}; +use crate::decimal::{CalculationResult, Decimal}; +use crate::ops::common::{Buf12, Buf16, Dec64}; + +use core::cmp::Ordering; +use core::ops::BitXor; + +impl Buf12 { + // Returns true if successful, else false for an overflow + fn add32(&mut self, value: u32) -> Result<(), DivError> { + let value = value as u64; + let new = self.low64().wrapping_add(value); + self.set_low64(new); + if new < value { + self.data[2] = self.data[2].wrapping_add(1); + if self.data[2] == 0 { + return Err(DivError::Overflow); + } + } + Ok(()) + } + + // Divide a Decimal union by a 32 bit divisor. + // Self is overwritten with the quotient. + // Return value is a 32 bit remainder. + fn div32(&mut self, divisor: u32) -> u32 { + let divisor64 = divisor as u64; + // See if we can get by using a simple u64 division + if self.data[2] != 0 { + let mut temp = self.high64(); + let q64 = temp / divisor64; + self.set_high64(q64); + + // Calculate the "remainder" + temp = ((temp - q64 * divisor64) << 32) | (self.data[0] as u64); + if temp == 0 { + return 0; + } + let q32 = (temp / divisor64) as u32; + self.data[0] = q32; + ((temp as u32).wrapping_sub(q32.wrapping_mul(divisor))) as u32 + } else { + // Super easy divisor + let low64 = self.low64(); + if low64 == 0 { + // Nothing to do + return 0; + } + // Do the calc + let quotient = low64 / divisor64; + self.set_low64(quotient); + // Remainder is the leftover that wasn't used + (low64.wrapping_sub(quotient.wrapping_mul(divisor64))) as u32 + } + } + + // Divide the number by a power constant + // Returns true if division was successful + fn div32_const(&mut self, pow: u32) -> bool { + let pow64 = pow as u64; + let high64 = self.high64(); + let lo = self.data[0] as u64; + let div64: u64 = high64 / pow64; + let div = ((((high64 - div64 * pow64) << 32) + lo) / pow64) as u32; + if self.data[0] == div.wrapping_mul(pow) { + self.set_high64(div64); + self.data[0] = div; + true + } else { + false + } + } +} + +impl Buf16 { + // Does a partial divide with a 64 bit divisor. The divisor in this case must be 64 bits + // otherwise various assumptions fail (e.g. 32 bit quotient). + // To assist, the upper 64 bits must be greater than the divisor for this to succeed. + // Consequently, it will return the quotient as a 32 bit number and overwrite self with the + // 64 bit remainder. + pub(super) fn partial_divide_64(&mut self, divisor: u64) -> u32 { + // We make this assertion here, however below we pivot based on the data + debug_assert!(divisor > self.mid64()); + + // If we have an empty high bit, then divisor must be greater than the dividend due to + // the assumption that the divisor REQUIRES 64 bits. + if self.data[2] == 0 { + let low64 = self.low64(); + if low64 < divisor { + // We can't divide at at all so result is 0. The dividend remains untouched since + // the full amount is the remainder. + return 0; + } + + let quotient = low64 / divisor; + self.set_low64(low64 - (quotient * divisor)); + return quotient as u32; + } + + // Do a simple check to see if the hi portion of the dividend is greater than the hi + // portion of the divisor. + let divisor_hi32 = (divisor >> 32) as u32; + if self.data[2] >= divisor_hi32 { + // We know that the divisor goes into this at MOST u32::max times. + // So we kick things off, with that assumption + let mut low64 = self.low64(); + low64 = low64.wrapping_sub(divisor << 32).wrapping_add(divisor); + let mut quotient = u32::MAX; + + // If we went negative then keep adding it back in + loop { + if low64 < divisor { + break; + } + quotient = quotient.wrapping_sub(1); + low64 = low64.wrapping_add(divisor); + } + self.set_low64(low64); + return quotient; + } + + let mid64 = self.mid64(); + let divisor_hi32_64 = divisor_hi32 as u64; + if mid64 < divisor_hi32_64 as u64 { + // similar situation as above where we've got nothing left to divide + return 0; + } + + let mut quotient = mid64 / divisor_hi32_64; + let mut remainder = self.data[0] as u64 | ((mid64 - quotient * divisor_hi32_64) << 32); + + // Do quotient * lo divisor + let product = quotient * (divisor & 0xFFFF_FFFF); + remainder = remainder.wrapping_sub(product); + + // Check if we've gone negative. If so, add it back + if remainder > product.bitxor(u64::MAX) { + loop { + quotient = quotient.wrapping_sub(1); + remainder = remainder.wrapping_add(divisor); + if remainder < divisor { + break; + } + } + } + + self.set_low64(remainder); + quotient as u32 + } + + // Does a partial divide with a 96 bit divisor. The divisor in this case must require 96 bits + // otherwise various assumptions fail (e.g. 32 bit quotient). + pub(super) fn partial_divide_96(&mut self, divisor: &Buf12) -> u32 { + let dividend = self.high64(); + let divisor_hi = divisor.data[2]; + if dividend < divisor_hi as u64 { + // Dividend is too small - entire number is remainder + return 0; + } + + let mut quo = (dividend / divisor_hi as u64) as u32; + let mut remainder = (dividend as u32).wrapping_sub(quo.wrapping_mul(divisor_hi)); + + // Compute full remainder + let mut prod1 = quo as u64 * divisor.data[0] as u64; + let mut prod2 = quo as u64 * divisor.data[1] as u64; + prod2 += prod1 >> 32; + prod1 = (prod1 & 0xFFFF_FFFF) | (prod2 << 32); + prod2 >>= 32; + + let mut num = self.low64(); + num = num.wrapping_sub(prod1); + remainder = remainder.wrapping_sub(prod2 as u32); + + // If there are carries make sure they are propagated + if num > prod1.bitxor(u64::MAX) { + remainder = remainder.wrapping_sub(1); + if remainder < (prod2 as u32).bitxor(u32::MAX) { + self.set_low64(num); + self.data[2] = remainder; + return quo; + } + } else if remainder <= (prod2 as u32).bitxor(u32::MAX) { + self.set_low64(num); + self.data[2] = remainder; + return quo; + } + + // Remainder went negative, add divisor back until it's positive + prod1 = divisor.low64(); + loop { + quo = quo.wrapping_sub(1); + num = num.wrapping_add(prod1); + remainder = remainder.wrapping_add(divisor_hi); + + if num < prod1 { + // Detected carry. + let tmp = remainder; + remainder = remainder.wrapping_add(1); + if tmp < divisor_hi { + break; + } + } + if remainder < divisor_hi { + break; // detected carry + } + } + + self.set_low64(num); + self.data[2] = remainder; + quo + } +} + +enum DivError { + Overflow, +} + +pub(crate) fn div_impl(dividend: &Decimal, divisor: &Decimal) -> CalculationResult { + if divisor.is_zero() { + return CalculationResult::DivByZero; + } + if dividend.is_zero() { + return CalculationResult::Ok(Decimal::ZERO); + } + let dividend = Dec64::new(dividend); + let divisor = Dec64::new(divisor); + + // Pre calculate the scale and the sign + let mut scale = (dividend.scale as i32) - (divisor.scale as i32); + let sign_negative = dividend.negative ^ divisor.negative; + + // Set up some variables for modification throughout + let mut require_unscale = false; + let mut quotient = Buf12::from_dec64(÷nd); + let divisor = Buf12::from_dec64(&divisor); + + // Branch depending on the complexity of the divisor + if divisor.data[2] | divisor.data[1] == 0 { + // We have a simple(r) divisor (32 bit) + let divisor32 = divisor.data[0]; + + // Remainder can only be 32 bits since the divisor is 32 bits. + let mut remainder = quotient.div32(divisor32); + let mut power_scale = 0; + + // Figure out how to apply the remainder (i.e. we may have performed something like 10/3 or 8/5) + loop { + // Remainder is 0 so we have a simple situation + if remainder == 0 { + // If the scale is positive then we're actually done + if scale >= 0 { + break; + } + power_scale = 9usize.min((-scale) as usize); + } else { + // We may need to normalize later, so set the flag appropriately + require_unscale = true; + + // We have a remainder so we effectively want to try to adjust the quotient and add + // the remainder into the quotient. We do this below, however first of all we want + // to try to avoid overflowing so we do that check first. + let will_overflow = if scale == MAX_PRECISION_I32 { + true + } else { + // Figure out how much we can scale by + if let Some(s) = quotient.find_scale(scale) { + power_scale = s; + } else { + return CalculationResult::Overflow; + } + // If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since + // we're doing nothing. + power_scale == 0 + }; + if will_overflow { + // No more scaling can be done, but remainder is non-zero so we round if necessary. + let tmp = remainder << 1; + let round = if tmp < remainder { + // We round if we wrapped around + true + } else if tmp >= divisor32 { + // If we're greater than the divisor (i.e. underflow) + // or if there is a lo bit set, we round + tmp > divisor32 || (quotient.data[0] & 0x1) > 0 + } else { + false + }; + + // If we need to round, try to do so. + if round { + if let Ok(new_scale) = round_up(&mut quotient, scale) { + scale = new_scale; + } else { + // Overflowed + return CalculationResult::Overflow; + } + } + break; + } + } + + // Do some scaling + let power = POWERS_10[power_scale]; + scale += power_scale as i32; + // Increase the quotient by the power that was looked up + let overflow = increase_scale(&mut quotient, power as u64); + if overflow > 0 { + return CalculationResult::Overflow; + } + + let remainder_scaled = (remainder as u64) * (power as u64); + let remainder_quotient = (remainder_scaled / (divisor32 as u64)) as u32; + remainder = (remainder_scaled - remainder_quotient as u64 * divisor32 as u64) as u32; + if let Err(DivError::Overflow) = quotient.add32(remainder_quotient) { + if let Ok(adj) = unscale_from_overflow(&mut quotient, scale, remainder != 0) { + scale = adj; + } else { + // Still overflowing + return CalculationResult::Overflow; + } + break; + } + } + } else { + // We have a divisor greater than 32 bits. Both of these share some quick calculation wins + // so we'll do those before branching into separate logic. + // The win we can do is shifting the bits to the left as much as possible. We do this to both + // the dividend and the divisor to ensure the quotient is not changed. + // As a simple contrived example: if we have 4 / 2 then we could bit shift all the way to the + // left meaning that the lo portion would have nothing inside of it. Of course, shifting these + // left one has the same result (8/4) etc. + // The advantage is that we may be able to write off lower portions of the number making things + // easier. + let mut power_scale = if divisor.data[2] == 0 { + divisor.data[1].leading_zeros() + } else { + divisor.data[2].leading_zeros() + } as usize; + let mut remainder = Buf16::zero(); + remainder.set_low64(quotient.low64() << power_scale); + let tmp_high = ((quotient.data[1] as u64) + ((quotient.data[2] as u64) << 32)) >> (32 - power_scale); + remainder.set_high64(tmp_high); + + // Work out the divisor after it's shifted + let divisor64 = divisor.low64() << power_scale; + // Check if the divisor is 64 bit or the full 96 bits + if divisor.data[2] == 0 { + // It's 64 bits + quotient.data[2] = 0; + + // Calc mid/lo by shifting accordingly + let rem_lo = remainder.data[0]; + remainder.data[0] = remainder.data[1]; + remainder.data[1] = remainder.data[2]; + remainder.data[2] = remainder.data[3]; + quotient.data[1] = remainder.partial_divide_64(divisor64); + + remainder.data[2] = remainder.data[1]; + remainder.data[1] = remainder.data[0]; + remainder.data[0] = rem_lo; + quotient.data[0] = remainder.partial_divide_64(divisor64); + + loop { + let rem_low64 = remainder.low64(); + if rem_low64 == 0 { + // If the scale is positive then we're actually done + if scale >= 0 { + break; + } + power_scale = 9usize.min((-scale) as usize); + } else { + // We may need to normalize later, so set the flag appropriately + require_unscale = true; + + // We have a remainder so we effectively want to try to adjust the quotient and add + // the remainder into the quotient. We do this below, however first of all we want + // to try to avoid overflowing so we do that check first. + let will_overflow = if scale == MAX_PRECISION_I32 { + true + } else { + // Figure out how much we can scale by + if let Some(s) = quotient.find_scale(scale) { + power_scale = s; + } else { + return CalculationResult::Overflow; + } + // If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since + // we're doing nothing. + power_scale == 0 + }; + if will_overflow { + // No more scaling can be done, but remainder is non-zero so we round if necessary. + let mut tmp = remainder.low64(); + let round = if (tmp as i64) < 0 { + // We round if we wrapped around + true + } else { + tmp <<= 1; + if tmp > divisor64 { + true + } else { + tmp == divisor64 && quotient.data[0] & 0x1 != 0 + } + }; + + // If we need to round, try to do so. + if round { + if let Ok(new_scale) = round_up(&mut quotient, scale) { + scale = new_scale; + } else { + // Overflowed + return CalculationResult::Overflow; + } + } + break; + } + } + + // Do some scaling + let power = POWERS_10[power_scale]; + scale += power_scale as i32; + + // Increase the quotient by the power that was looked up + let overflow = increase_scale(&mut quotient, power as u64); + if overflow > 0 { + return CalculationResult::Overflow; + } + increase_scale64(&mut remainder, power as u64); + + let tmp = remainder.partial_divide_64(divisor64); + if let Err(DivError::Overflow) = quotient.add32(tmp) { + if let Ok(adj) = unscale_from_overflow(&mut quotient, scale, remainder.low64() != 0) { + scale = adj; + } else { + // Still overflowing + return CalculationResult::Overflow; + } + break; + } + } + } else { + // It's 96 bits + // Start by finishing the shift left + let divisor_mid = divisor.data[1]; + let divisor_hi = divisor.data[2]; + let mut divisor = divisor; + divisor.set_low64(divisor64); + divisor.data[2] = ((divisor_mid as u64 + ((divisor_hi as u64) << 32)) >> (32 - power_scale)) as u32; + + let quo = remainder.partial_divide_96(&divisor); + quotient.set_low64(quo as u64); + quotient.data[2] = 0; + + loop { + let mut rem_low64 = remainder.low64(); + if rem_low64 == 0 && remainder.data[2] == 0 { + // If the scale is positive then we're actually done + if scale >= 0 { + break; + } + power_scale = 9usize.min((-scale) as usize); + } else { + // We may need to normalize later, so set the flag appropriately + require_unscale = true; + + // We have a remainder so we effectively want to try to adjust the quotient and add + // the remainder into the quotient. We do this below, however first of all we want + // to try to avoid overflowing so we do that check first. + let will_overflow = if scale == MAX_PRECISION_I32 { + true + } else { + // Figure out how much we can scale by + if let Some(s) = quotient.find_scale(scale) { + power_scale = s; + } else { + return CalculationResult::Overflow; + } + // If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since + // we're doing nothing. + power_scale == 0 + }; + if will_overflow { + // No more scaling can be done, but remainder is non-zero so we round if necessary. + let round = if (remainder.data[2] as i32) < 0 { + // We round if we wrapped around + true + } else { + let tmp = remainder.data[1] >> 31; + rem_low64 <<= 1; + remainder.set_low64(rem_low64); + remainder.data[2] = (&remainder.data[2] << 1) + tmp; + + match remainder.data[2].cmp(&divisor.data[2]) { + Ordering::Less => false, + Ordering::Equal => { + let divisor_low64 = divisor.low64(); + if rem_low64 > divisor_low64 { + true + } else { + rem_low64 == divisor_low64 && (quotient.data[0] & 1) != 0 + } + } + Ordering::Greater => true, + } + }; + + // If we need to round, try to do so. + if round { + if let Ok(new_scale) = round_up(&mut quotient, scale) { + scale = new_scale; + } else { + // Overflowed + return CalculationResult::Overflow; + } + } + break; + } + } + + // Do some scaling + let power = POWERS_10[power_scale]; + scale += power_scale as i32; + + // Increase the quotient by the power that was looked up + let overflow = increase_scale(&mut quotient, power as u64); + if overflow > 0 { + return CalculationResult::Overflow; + } + let mut tmp_remainder = Buf12 { + data: [remainder.data[0], remainder.data[1], remainder.data[2]], + }; + let overflow = increase_scale(&mut tmp_remainder, power as u64); + remainder.data[0] = tmp_remainder.data[0]; + remainder.data[1] = tmp_remainder.data[1]; + remainder.data[2] = tmp_remainder.data[2]; + remainder.data[3] = overflow; + + let tmp = remainder.partial_divide_96(&divisor); + if let Err(DivError::Overflow) = quotient.add32(tmp) { + if let Ok(adj) = + unscale_from_overflow(&mut quotient, scale, (remainder.low64() | remainder.high64()) != 0) + { + scale = adj; + } else { + // Still overflowing + return CalculationResult::Overflow; + } + break; + } + } + } + } + if require_unscale { + scale = unscale(&mut quotient, scale); + } + CalculationResult::Ok(Decimal::from_parts( + quotient.data[0], + quotient.data[1], + quotient.data[2], + sign_negative, + scale as u32, + )) +} + +// Multiply num by power (multiple of 10). Power must be 32 bits. +// Returns the overflow, if any +fn increase_scale(num: &mut Buf12, power: u64) -> u32 { + let mut tmp = (num.data[0] as u64) * power; + num.data[0] = tmp as u32; + tmp >>= 32; + tmp += (num.data[1] as u64) * power; + num.data[1] = tmp as u32; + tmp >>= 32; + tmp += (num.data[2] as u64) * power; + num.data[2] = tmp as u32; + (tmp >> 32) as u32 +} + +// Multiply num by power (multiple of 10). Power must be 32 bits. +fn increase_scale64(num: &mut Buf16, power: u64) { + let mut tmp = (num.data[0] as u64) * power; + num.data[0] = tmp as u32; + tmp >>= 32; + tmp += (num.data[1] as u64) * power; + num.set_mid64(tmp) +} + +// Adjust the number to deal with an overflow. This function follows being scaled up (i.e. multiplied +// by 10, so this effectively tries to reverse that by dividing by 10 then feeding in the high bit +// to undo the overflow and rounding instead. +// Returns the updated scale. +fn unscale_from_overflow(num: &mut Buf12, scale: i32, sticky: bool) -> Result<i32, DivError> { + let scale = scale - 1; + if scale < 0 { + return Err(DivError::Overflow); + } + + // This function is called when the hi portion has "overflowed" upon adding one and has wrapped + // back around to 0. Consequently, we need to "feed" that back in, but also rescaling down + // to reverse out the overflow. + const HIGH_BIT: u64 = 0x1_0000_0000; + num.data[2] = (HIGH_BIT / 10) as u32; + + // Calc the mid + let mut tmp = ((HIGH_BIT % 10) << 32) + (num.data[1] as u64); + let mut val = (tmp / 10) as u32; + num.data[1] = val; + + // Calc the lo using a similar method + tmp = ((tmp - (val as u64) * 10) << 32) + (num.data[0] as u64); + val = (tmp / 10) as u32; + num.data[0] = val; + + // Work out the remainder, and round if we have one (since it doesn't fit) + let remainder = (tmp - (val as u64) * 10) as u32; + if remainder > 5 || (remainder == 5 && (sticky || num.data[0] & 0x1 > 0)) { + let _ = num.add32(1); + } + Ok(scale) +} + +#[inline] +fn round_up(num: &mut Buf12, scale: i32) -> Result<i32, DivError> { + let low64 = num.low64().wrapping_add(1); + num.set_low64(low64); + if low64 != 0 { + return Ok(scale); + } + let hi = num.data[2].wrapping_add(1); + num.data[2] = hi; + if hi != 0 { + return Ok(scale); + } + unscale_from_overflow(num, scale, true) +} + +fn unscale(num: &mut Buf12, scale: i32) -> i32 { + // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10 we can extract. + // We use this as a quick test on whether to try a given power. + let mut scale = scale; + while num.data[0] == 0 && scale >= 8 && num.div32_const(100000000) { + scale -= 8; + } + + if (num.data[0] & 0xF) == 0 && scale >= 4 && num.div32_const(10000) { + scale -= 4; + } + + if (num.data[0] & 0x3) == 0 && scale >= 2 && num.div32_const(100) { + scale -= 2; + } + + if (num.data[0] & 0x1) == 0 && scale >= 1 && num.div32_const(10) { + scale -= 1; + } + scale +} diff --git a/third_party/rust/rust_decimal/src/ops/legacy.rs b/third_party/rust/rust_decimal/src/ops/legacy.rs new file mode 100644 index 0000000000..49f39814f8 --- /dev/null +++ b/third_party/rust/rust_decimal/src/ops/legacy.rs @@ -0,0 +1,843 @@ +use crate::{ + constants::{MAX_PRECISION_U32, POWERS_10, U32_MASK}, + decimal::{CalculationResult, Decimal}, + ops::array::{ + add_by_internal, cmp_internal, div_by_u32, is_all_zero, mul_by_u32, mul_part, rescale_internal, shl1_internal, + }, +}; + +use core::cmp::Ordering; +use num_traits::Zero; + +pub(crate) fn add_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult { + // Convert to the same scale + let mut my = d1.mantissa_array3(); + let mut my_scale = d1.scale(); + let mut ot = d2.mantissa_array3(); + let mut other_scale = d2.scale(); + rescale_to_maximum_scale(&mut my, &mut my_scale, &mut ot, &mut other_scale); + let mut final_scale = my_scale.max(other_scale); + + // Add the items together + let my_negative = d1.is_sign_negative(); + let other_negative = d2.is_sign_negative(); + let mut negative = false; + let carry; + if !(my_negative ^ other_negative) { + negative = my_negative; + carry = add_by_internal3(&mut my, &ot); + } else { + let cmp = cmp_internal(&my, &ot); + // -x + y + // if x > y then it's negative (i.e. -2 + 1) + match cmp { + Ordering::Less => { + negative = other_negative; + sub_by_internal3(&mut ot, &my); + my[0] = ot[0]; + my[1] = ot[1]; + my[2] = ot[2]; + } + Ordering::Greater => { + negative = my_negative; + sub_by_internal3(&mut my, &ot); + } + Ordering::Equal => { + // -2 + 2 + my[0] = 0; + my[1] = 0; + my[2] = 0; + } + } + carry = 0; + } + + // If we have a carry we underflowed. + // We need to lose some significant digits (if possible) + if carry > 0 { + if final_scale == 0 { + return CalculationResult::Overflow; + } + + // Copy it over to a temp array for modification + let mut temp = [my[0], my[1], my[2], carry]; + while final_scale > 0 && temp[3] != 0 { + div_by_u32(&mut temp, 10); + final_scale -= 1; + } + + // If we still have a carry bit then we overflowed + if temp[3] > 0 { + return CalculationResult::Overflow; + } + + // Copy it back - we're done + my[0] = temp[0]; + my[1] = temp[1]; + my[2] = temp[2]; + } + + CalculationResult::Ok(Decimal::from_parts(my[0], my[1], my[2], negative, final_scale)) +} + +pub(crate) fn sub_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult { + add_impl(d1, &(-*d2)) +} + +pub(crate) fn div_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult { + if d2.is_zero() { + return CalculationResult::DivByZero; + } + if d1.is_zero() { + return CalculationResult::Ok(Decimal::zero()); + } + + let dividend = d1.mantissa_array3(); + let divisor = d2.mantissa_array3(); + let mut quotient = [0u32, 0u32, 0u32]; + let mut quotient_scale: i32 = d1.scale() as i32 - d2.scale() as i32; + + // We supply an extra overflow word for each of the dividend and the remainder + let mut working_quotient = [dividend[0], dividend[1], dividend[2], 0u32]; + let mut working_remainder = [0u32, 0u32, 0u32, 0u32]; + let mut working_scale = quotient_scale; + let mut remainder_scale = quotient_scale; + let mut underflow; + + loop { + div_internal(&mut working_quotient, &mut working_remainder, &divisor); + underflow = add_with_scale_internal( + &mut quotient, + &mut quotient_scale, + &mut working_quotient, + &mut working_scale, + ); + + // Multiply the remainder by 10 + let mut overflow = 0; + for part in working_remainder.iter_mut() { + let (lo, hi) = mul_part(*part, 10, overflow); + *part = lo; + overflow = hi; + } + // Copy temp remainder into the temp quotient section + working_quotient.copy_from_slice(&working_remainder); + + remainder_scale += 1; + working_scale = remainder_scale; + + if underflow || is_all_zero(&working_remainder) { + break; + } + } + + // If we have a really big number try to adjust the scale to 0 + while quotient_scale < 0 { + copy_array_diff_lengths(&mut working_quotient, "ient); + working_quotient[3] = 0; + working_remainder.iter_mut().for_each(|x| *x = 0); + + // Mul 10 + let mut overflow = 0; + for part in &mut working_quotient { + let (lo, hi) = mul_part(*part, 10, overflow); + *part = lo; + overflow = hi; + } + for part in &mut working_remainder { + let (lo, hi) = mul_part(*part, 10, overflow); + *part = lo; + overflow = hi; + } + if working_quotient[3] == 0 && is_all_zero(&working_remainder) { + quotient_scale += 1; + quotient[0] = working_quotient[0]; + quotient[1] = working_quotient[1]; + quotient[2] = working_quotient[2]; + } else { + // Overflow + return CalculationResult::Overflow; + } + } + + if quotient_scale > 255 { + quotient[0] = 0; + quotient[1] = 0; + quotient[2] = 0; + quotient_scale = 0; + } + + let mut quotient_negative = d1.is_sign_negative() ^ d2.is_sign_negative(); + + // Check for underflow + let mut final_scale: u32 = quotient_scale as u32; + if final_scale > MAX_PRECISION_U32 { + let mut remainder = 0; + + // Division underflowed. We must remove some significant digits over using + // an invalid scale. + while final_scale > MAX_PRECISION_U32 && !is_all_zero("ient) { + remainder = div_by_u32(&mut quotient, 10); + final_scale -= 1; + } + if final_scale > MAX_PRECISION_U32 { + // Result underflowed so set to zero + final_scale = 0; + quotient_negative = false; + } else if remainder >= 5 { + for part in &mut quotient { + if remainder == 0 { + break; + } + let digit: u64 = u64::from(*part) + 1; + remainder = if digit > 0xFFFF_FFFF { 1 } else { 0 }; + *part = (digit & 0xFFFF_FFFF) as u32; + } + } + } + + CalculationResult::Ok(Decimal::from_parts( + quotient[0], + quotient[1], + quotient[2], + quotient_negative, + final_scale, + )) +} + +pub(crate) fn mul_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult { + // Early exit if either is zero + if d1.is_zero() || d2.is_zero() { + return CalculationResult::Ok(Decimal::zero()); + } + + // We are only resulting in a negative if we have mismatched signs + let negative = d1.is_sign_negative() ^ d2.is_sign_negative(); + + // We get the scale of the result by adding the operands. This may be too big, however + // we'll correct later + let mut final_scale = d1.scale() + d2.scale(); + + // First of all, if ONLY the lo parts of both numbers is filled + // then we can simply do a standard 64 bit calculation. It's a minor + // optimization however prevents the need for long form multiplication + let my = d1.mantissa_array3(); + let ot = d2.mantissa_array3(); + if my[1] == 0 && my[2] == 0 && ot[1] == 0 && ot[2] == 0 { + // Simply multiplication + let mut u64_result = u64_to_array(u64::from(my[0]) * u64::from(ot[0])); + + // If we're above max precision then this is a very small number + if final_scale > MAX_PRECISION_U32 { + final_scale -= MAX_PRECISION_U32; + + // If the number is above 19 then this will equate to zero. + // This is because the max value in 64 bits is 1.84E19 + if final_scale > 19 { + return CalculationResult::Ok(Decimal::zero()); + } + + let mut rem_lo = 0; + let mut power; + if final_scale > 9 { + // Since 10^10 doesn't fit into u32, we divide by 10^10/4 + // and multiply the next divisor by 4. + rem_lo = div_by_u32(&mut u64_result, 2_500_000_000); + power = POWERS_10[final_scale as usize - 10] << 2; + } else { + power = POWERS_10[final_scale as usize]; + } + + // Divide fits in 32 bits + let rem_hi = div_by_u32(&mut u64_result, power); + + // Round the result. Since the divisor is a power of 10 + // we check to see if the remainder is >= 1/2 divisor + power >>= 1; + if rem_hi >= power && (rem_hi > power || (rem_lo | (u64_result[0] & 0x1)) != 0) { + u64_result[0] += 1; + } + + final_scale = MAX_PRECISION_U32; + } + return CalculationResult::Ok(Decimal::from_parts( + u64_result[0], + u64_result[1], + 0, + negative, + final_scale, + )); + } + + // We're using some of the high bits, so we essentially perform + // long form multiplication. We compute the 9 partial products + // into a 192 bit result array. + // + // [my-h][my-m][my-l] + // x [ot-h][ot-m][ot-l] + // -------------------------------------- + // 1. [r-hi][r-lo] my-l * ot-l [0, 0] + // 2. [r-hi][r-lo] my-l * ot-m [0, 1] + // 3. [r-hi][r-lo] my-m * ot-l [1, 0] + // 4. [r-hi][r-lo] my-m * ot-m [1, 1] + // 5. [r-hi][r-lo] my-l * ot-h [0, 2] + // 6. [r-hi][r-lo] my-h * ot-l [2, 0] + // 7. [r-hi][r-lo] my-m * ot-h [1, 2] + // 8. [r-hi][r-lo] my-h * ot-m [2, 1] + // 9.[r-hi][r-lo] my-h * ot-h [2, 2] + let mut product = [0u32, 0u32, 0u32, 0u32, 0u32, 0u32]; + + // We can perform a minor short circuit here. If the + // high portions are both 0 then we can skip portions 5-9 + let to = if my[2] == 0 && ot[2] == 0 { 2 } else { 3 }; + + for (my_index, my_item) in my.iter().enumerate().take(to) { + for (ot_index, ot_item) in ot.iter().enumerate().take(to) { + let (mut rlo, mut rhi) = mul_part(*my_item, *ot_item, 0); + + // Get the index for the lo portion of the product + for prod in product.iter_mut().skip(my_index + ot_index) { + let (res, overflow) = add_part(rlo, *prod); + *prod = res; + + // If we have something in rhi from before then promote that + if rhi > 0 { + // If we overflowed in the last add, add that with rhi + if overflow > 0 { + let (nlo, nhi) = add_part(rhi, overflow); + rlo = nlo; + rhi = nhi; + } else { + rlo = rhi; + rhi = 0; + } + } else if overflow > 0 { + rlo = overflow; + rhi = 0; + } else { + break; + } + + // If nothing to do next round then break out + if rlo == 0 { + break; + } + } + } + } + + // If our result has used up the high portion of the product + // then we either have an overflow or an underflow situation + // Overflow will occur if we can't scale it back, whereas underflow + // with kick in rounding + let mut remainder = 0; + while final_scale > 0 && (product[3] != 0 || product[4] != 0 || product[5] != 0) { + remainder = div_by_u32(&mut product, 10u32); + final_scale -= 1; + } + + // Round up the carry if we need to + if remainder >= 5 { + for part in product.iter_mut() { + if remainder == 0 { + break; + } + let digit: u64 = u64::from(*part) + 1; + remainder = if digit > 0xFFFF_FFFF { 1 } else { 0 }; + *part = (digit & 0xFFFF_FFFF) as u32; + } + } + + // If we're still above max precision then we'll try again to + // reduce precision - we may be dealing with a limit of "0" + if final_scale > MAX_PRECISION_U32 { + // We're in an underflow situation + // The easiest way to remove precision is to divide off the result + while final_scale > MAX_PRECISION_U32 && !is_all_zero(&product) { + div_by_u32(&mut product, 10); + final_scale -= 1; + } + // If we're still at limit then we can't represent any + // significant decimal digits and will return an integer only + // Can also be invoked while representing 0. + if final_scale > MAX_PRECISION_U32 { + final_scale = 0; + } + } else if !(product[3] == 0 && product[4] == 0 && product[5] == 0) { + // We're in an overflow situation - we're within our precision bounds + // but still have bits in overflow + return CalculationResult::Overflow; + } + + CalculationResult::Ok(Decimal::from_parts( + product[0], + product[1], + product[2], + negative, + final_scale, + )) +} + +pub(crate) fn rem_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult { + if d2.is_zero() { + return CalculationResult::DivByZero; + } + if d1.is_zero() { + return CalculationResult::Ok(Decimal::zero()); + } + + // Rescale so comparable + let initial_scale = d1.scale(); + let mut quotient = d1.mantissa_array3(); + let mut quotient_scale = initial_scale; + let mut divisor = d2.mantissa_array3(); + let mut divisor_scale = d2.scale(); + rescale_to_maximum_scale(&mut quotient, &mut quotient_scale, &mut divisor, &mut divisor_scale); + + // Working is the remainder + the quotient + // We use an aligned array since we'll be using it a lot. + let mut working_quotient = [quotient[0], quotient[1], quotient[2], 0u32]; + let mut working_remainder = [0u32, 0u32, 0u32, 0u32]; + div_internal(&mut working_quotient, &mut working_remainder, &divisor); + + // Round if necessary. This is for semantic correctness, but could feasibly be removed for + // performance improvements. + if quotient_scale > initial_scale { + let mut working = [ + working_remainder[0], + working_remainder[1], + working_remainder[2], + working_remainder[3], + ]; + while quotient_scale > initial_scale { + if div_by_u32(&mut working, 10) > 0 { + break; + } + quotient_scale -= 1; + working_remainder.copy_from_slice(&working); + } + } + + CalculationResult::Ok(Decimal::from_parts( + working_remainder[0], + working_remainder[1], + working_remainder[2], + d1.is_sign_negative(), + quotient_scale, + )) +} + +pub(crate) fn cmp_impl(d1: &Decimal, d2: &Decimal) -> Ordering { + // Quick exit if major differences + if d1.is_zero() && d2.is_zero() { + return Ordering::Equal; + } + let self_negative = d1.is_sign_negative(); + let other_negative = d2.is_sign_negative(); + if self_negative && !other_negative { + return Ordering::Less; + } else if !self_negative && other_negative { + return Ordering::Greater; + } + + // If we have 1.23 and 1.2345 then we have + // 123 scale 2 and 12345 scale 4 + // We need to convert the first to + // 12300 scale 4 so we can compare equally + let left: &Decimal; + let right: &Decimal; + if self_negative && other_negative { + // Both are negative, so reverse cmp + left = d2; + right = d1; + } else { + left = d1; + right = d2; + } + let mut left_scale = left.scale(); + let mut right_scale = right.scale(); + let mut left_raw = left.mantissa_array3(); + let mut right_raw = right.mantissa_array3(); + + if left_scale == right_scale { + // Fast path for same scale + if left_raw[2] != right_raw[2] { + return left_raw[2].cmp(&right_raw[2]); + } + if left_raw[1] != right_raw[1] { + return left_raw[1].cmp(&right_raw[1]); + } + return left_raw[0].cmp(&right_raw[0]); + } + + // Rescale and compare + rescale_to_maximum_scale(&mut left_raw, &mut left_scale, &mut right_raw, &mut right_scale); + cmp_internal(&left_raw, &right_raw) +} + +#[inline] +fn add_part(left: u32, right: u32) -> (u32, u32) { + let added = u64::from(left) + u64::from(right); + ((added & U32_MASK) as u32, (added >> 32 & U32_MASK) as u32) +} + +#[inline(always)] +fn sub_by_internal3(value: &mut [u32; 3], by: &[u32; 3]) { + let mut overflow = 0; + let vl = value.len(); + for i in 0..vl { + let part = (0x1_0000_0000u64 + u64::from(value[i])) - (u64::from(by[i]) + overflow); + value[i] = part as u32; + overflow = 1 - (part >> 32); + } +} + +fn div_internal(quotient: &mut [u32; 4], remainder: &mut [u32; 4], divisor: &[u32; 3]) { + // There are a couple of ways to do division on binary numbers: + // 1. Using long division + // 2. Using the complement method + // ref: http://paulmason.me/dividing-binary-numbers-part-2/ + // The complement method basically keeps trying to subtract the + // divisor until it can't anymore and placing the rest in remainder. + let mut complement = [ + divisor[0] ^ 0xFFFF_FFFF, + divisor[1] ^ 0xFFFF_FFFF, + divisor[2] ^ 0xFFFF_FFFF, + 0xFFFF_FFFF, + ]; + + // Add one onto the complement + add_one_internal4(&mut complement); + + // Make sure the remainder is 0 + remainder.iter_mut().for_each(|x| *x = 0); + + // If we have nothing in our hi+ block then shift over till we do + let mut blocks_to_process = 0; + while blocks_to_process < 4 && quotient[3] == 0 { + // memcpy would be useful here + quotient[3] = quotient[2]; + quotient[2] = quotient[1]; + quotient[1] = quotient[0]; + quotient[0] = 0; + + // Increment the counter + blocks_to_process += 1; + } + + // Let's try and do the addition... + let mut block = blocks_to_process << 5; + let mut working = [0u32, 0u32, 0u32, 0u32]; + while block < 128 { + // << 1 for quotient AND remainder. Moving the carry from the quotient to the bottom of the + // remainder. + let carry = shl1_internal(quotient, 0); + shl1_internal(remainder, carry); + + // Copy the remainder of working into sub + working.copy_from_slice(remainder); + + // Add the remainder with the complement + add_by_internal(&mut working, &complement); + + // Check for the significant bit - move over to the quotient + // as necessary + if (working[3] & 0x8000_0000) == 0 { + remainder.copy_from_slice(&working); + quotient[0] |= 1; + } + + // Increment our pointer + block += 1; + } +} + +#[inline] +fn copy_array_diff_lengths(into: &mut [u32], from: &[u32]) { + for i in 0..into.len() { + if i >= from.len() { + break; + } + into[i] = from[i]; + } +} + +#[inline] +fn add_one_internal4(value: &mut [u32; 4]) -> u32 { + let mut carry: u64 = 1; // Start with one, since adding one + let mut sum: u64; + for i in value.iter_mut() { + sum = (*i as u64) + carry; + *i = (sum & U32_MASK) as u32; + carry = sum >> 32; + } + + carry as u32 +} + +#[inline] +fn add_by_internal3(value: &mut [u32; 3], by: &[u32; 3]) -> u32 { + let mut carry: u32 = 0; + let bl = by.len(); + for i in 0..bl { + let res1 = value[i].overflowing_add(by[i]); + let res2 = res1.0.overflowing_add(carry); + value[i] = res2.0; + carry = (res1.1 | res2.1) as u32; + } + carry +} + +#[inline] +const fn u64_to_array(value: u64) -> [u32; 2] { + [(value & U32_MASK) as u32, (value >> 32 & U32_MASK) as u32] +} + +fn add_with_scale_internal( + quotient: &mut [u32; 3], + quotient_scale: &mut i32, + working_quotient: &mut [u32; 4], + working_scale: &mut i32, +) -> bool { + // Add quotient and the working (i.e. quotient = quotient + working) + if is_all_zero(quotient) { + // Quotient is zero so we can just copy the working quotient in directly + // First, make sure they are both 96 bit. + while working_quotient[3] != 0 { + div_by_u32(working_quotient, 10); + *working_scale -= 1; + } + copy_array_diff_lengths(quotient, working_quotient); + *quotient_scale = *working_scale; + return false; + } + + if is_all_zero(working_quotient) { + return false; + } + + // We have ensured that our working is not zero so we should do the addition + + // If our two quotients are different then + // try to scale down the one with the bigger scale + let mut temp3 = [0u32, 0u32, 0u32]; + let mut temp4 = [0u32, 0u32, 0u32, 0u32]; + if *quotient_scale != *working_scale { + // TODO: Remove necessity for temp (without performance impact) + fn div_by_10<const N: usize>(target: &mut [u32], temp: &mut [u32; N], scale: &mut i32, target_scale: i32) { + // Copy to the temp array + temp.copy_from_slice(target); + // divide by 10 until target scale is reached + while *scale > target_scale { + let remainder = div_by_u32(temp, 10); + if remainder == 0 { + *scale -= 1; + target.copy_from_slice(temp); + } else { + break; + } + } + } + + if *quotient_scale < *working_scale { + div_by_10(working_quotient, &mut temp4, working_scale, *quotient_scale); + } else { + div_by_10(quotient, &mut temp3, quotient_scale, *working_scale); + } + } + + // If our two quotients are still different then + // try to scale up the smaller scale + if *quotient_scale != *working_scale { + // TODO: Remove necessity for temp (without performance impact) + fn mul_by_10(target: &mut [u32], temp: &mut [u32], scale: &mut i32, target_scale: i32) { + temp.copy_from_slice(target); + let mut overflow = 0; + // Multiply by 10 until target scale reached or overflow + while *scale < target_scale && overflow == 0 { + overflow = mul_by_u32(temp, 10); + if overflow == 0 { + // Still no overflow + *scale += 1; + target.copy_from_slice(temp); + } + } + } + + if *quotient_scale > *working_scale { + mul_by_10(working_quotient, &mut temp4, working_scale, *quotient_scale); + } else { + mul_by_10(quotient, &mut temp3, quotient_scale, *working_scale); + } + } + + // If our two quotients are still different then + // try to scale down the one with the bigger scale + // (ultimately losing significant digits) + if *quotient_scale != *working_scale { + // TODO: Remove necessity for temp (without performance impact) + fn div_by_10_lossy<const N: usize>( + target: &mut [u32], + temp: &mut [u32; N], + scale: &mut i32, + target_scale: i32, + ) { + temp.copy_from_slice(target); + // divide by 10 until target scale is reached + while *scale > target_scale { + div_by_u32(temp, 10); + *scale -= 1; + target.copy_from_slice(temp); + } + } + if *quotient_scale < *working_scale { + div_by_10_lossy(working_quotient, &mut temp4, working_scale, *quotient_scale); + } else { + div_by_10_lossy(quotient, &mut temp3, quotient_scale, *working_scale); + } + } + + // If quotient or working are zero we have an underflow condition + if is_all_zero(quotient) || is_all_zero(working_quotient) { + // Underflow + return true; + } else { + // Both numbers have the same scale and can be added. + // We just need to know whether we can fit them in + let mut underflow = false; + let mut temp = [0u32, 0u32, 0u32]; + while !underflow { + temp.copy_from_slice(quotient); + + // Add the working quotient + let overflow = add_by_internal(&mut temp, working_quotient); + if overflow == 0 { + // addition was successful + quotient.copy_from_slice(&temp); + break; + } else { + // addition overflowed - remove significant digits and try again + div_by_u32(quotient, 10); + *quotient_scale -= 1; + div_by_u32(working_quotient, 10); + *working_scale -= 1; + // Check for underflow + underflow = is_all_zero(quotient) || is_all_zero(working_quotient); + } + } + if underflow { + return true; + } + } + false +} + +/// Rescales the given decimals to equivalent scales. +/// It will firstly try to scale both the left and the right side to +/// the maximum scale of left/right. If it is unable to do that it +/// will try to reduce the accuracy of the other argument. +/// e.g. with 1.23 and 2.345 it'll rescale the first arg to 1.230 +#[inline(always)] +fn rescale_to_maximum_scale(left: &mut [u32; 3], left_scale: &mut u32, right: &mut [u32; 3], right_scale: &mut u32) { + if left_scale == right_scale { + // Nothing to do + return; + } + + if is_all_zero(left) { + *left_scale = *right_scale; + return; + } else if is_all_zero(right) { + *right_scale = *left_scale; + return; + } + + if left_scale > right_scale { + rescale_internal(right, right_scale, *left_scale); + if right_scale != left_scale { + rescale_internal(left, left_scale, *right_scale); + } + } else { + rescale_internal(left, left_scale, *right_scale); + if right_scale != left_scale { + rescale_internal(right, right_scale, *left_scale); + } + } +} + +#[cfg(test)] +mod test { + // Tests on private methods. + // + // All public tests should go under `tests/`. + + use super::*; + use crate::prelude::*; + + #[test] + fn it_can_rescale_to_maximum_scale() { + fn extract(value: &str) -> ([u32; 3], u32) { + let v = Decimal::from_str(value).unwrap(); + (v.mantissa_array3(), v.scale()) + } + + let tests = &[ + ("1", "1", "1", "1"), + ("1", "1.0", "1.0", "1.0"), + ("1", "1.00000", "1.00000", "1.00000"), + ("1", "1.0000000000", "1.0000000000", "1.0000000000"), + ( + "1", + "1.00000000000000000000", + "1.00000000000000000000", + "1.00000000000000000000", + ), + ("1.1", "1.1", "1.1", "1.1"), + ("1.1", "1.10000", "1.10000", "1.10000"), + ("1.1", "1.1000000000", "1.1000000000", "1.1000000000"), + ( + "1.1", + "1.10000000000000000000", + "1.10000000000000000000", + "1.10000000000000000000", + ), + ( + "0.6386554621848739495798319328", + "11.815126050420168067226890757", + "0.638655462184873949579831933", + "11.815126050420168067226890757", + ), + ( + "0.0872727272727272727272727272", // Scale 28 + "843.65000000", // Scale 8 + "0.0872727272727272727272727", // 25 + "843.6500000000000000000000000", // 25 + ), + ]; + + for &(left_raw, right_raw, expected_left, expected_right) in tests { + // Left = the value to rescale + // Right = the new scale we're scaling to + // Expected = the expected left value after rescale + let (expected_left, expected_lscale) = extract(expected_left); + let (expected_right, expected_rscale) = extract(expected_right); + + let (mut left, mut left_scale) = extract(left_raw); + let (mut right, mut right_scale) = extract(right_raw); + rescale_to_maximum_scale(&mut left, &mut left_scale, &mut right, &mut right_scale); + assert_eq!(left, expected_left); + assert_eq!(left_scale, expected_lscale); + assert_eq!(right, expected_right); + assert_eq!(right_scale, expected_rscale); + + // Also test the transitive case + let (mut left, mut left_scale) = extract(left_raw); + let (mut right, mut right_scale) = extract(right_raw); + rescale_to_maximum_scale(&mut right, &mut right_scale, &mut left, &mut left_scale); + assert_eq!(left, expected_left); + assert_eq!(left_scale, expected_lscale); + assert_eq!(right, expected_right); + assert_eq!(right_scale, expected_rscale); + } + } +} diff --git a/third_party/rust/rust_decimal/src/ops/mul.rs b/third_party/rust/rust_decimal/src/ops/mul.rs new file mode 100644 index 0000000000..b36729599d --- /dev/null +++ b/third_party/rust/rust_decimal/src/ops/mul.rs @@ -0,0 +1,168 @@ +use crate::constants::{BIG_POWERS_10, MAX_I64_SCALE, MAX_PRECISION_U32, U32_MAX}; +use crate::decimal::{CalculationResult, Decimal}; +use crate::ops::common::Buf24; + +pub(crate) fn mul_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult { + if d1.is_zero() || d2.is_zero() { + // We should think about this - does zero need to maintain precision? This treats it like + // an absolute which I think is ok, especially since we have is_zero() functions etc. + return CalculationResult::Ok(Decimal::ZERO); + } + + let mut scale = d1.scale() + d2.scale(); + let negative = d1.is_sign_negative() ^ d2.is_sign_negative(); + let mut product = Buf24::zero(); + + // See if we can optimize this calculation depending on whether the hi bits are set + if d1.hi() | d1.mid() == 0 { + if d2.hi() | d2.mid() == 0 { + // We're multiplying two 32 bit integers, so we can take some liberties to optimize this. + let mut low64 = d1.lo() as u64 * d2.lo() as u64; + if scale > MAX_PRECISION_U32 { + // We've exceeded maximum scale so we need to start reducing the precision (aka + // rounding) until we have something that fits. + // If we're too big then we effectively round to zero. + if scale > MAX_PRECISION_U32 + MAX_I64_SCALE { + return CalculationResult::Ok(Decimal::ZERO); + } + + scale -= MAX_PRECISION_U32 + 1; + let mut power = BIG_POWERS_10[scale as usize]; + + let tmp = low64 / power; + let remainder = low64 - tmp * power; + low64 = tmp; + + // Round the result. Since the divisor was a power of 10, it's always even. + power >>= 1; + if remainder >= power && (remainder > power || (low64 as u32 & 1) > 0) { + low64 += 1; + } + + scale = MAX_PRECISION_U32; + } + + // Early exit + return CalculationResult::Ok(Decimal::from_parts( + low64 as u32, + (low64 >> 32) as u32, + 0, + negative, + scale, + )); + } + + // We know that the left hand side is just 32 bits but the right hand side is either + // 64 or 96 bits. + mul_by_32bit_lhs(d1.lo() as u64, d2, &mut product); + } else if d2.mid() | d2.hi() == 0 { + // We know that the right hand side is just 32 bits. + mul_by_32bit_lhs(d2.lo() as u64, d1, &mut product); + } else { + // We know we're not dealing with simple 32 bit operands on either side. + // We compute and accumulate the 9 partial products using long multiplication + + // 1: ll * rl + let mut tmp = d1.lo() as u64 * d2.lo() as u64; + product.data[0] = tmp as u32; + + // 2: ll * rm + let mut tmp2 = (d1.lo() as u64 * d2.mid() as u64).wrapping_add(tmp >> 32); + + // 3: lm * rl + tmp = d1.mid() as u64 * d2.lo() as u64; + tmp = tmp.wrapping_add(tmp2); + product.data[1] = tmp as u32; + + // Detect if carry happened from the wrapping add + if tmp < tmp2 { + tmp2 = (tmp >> 32) | (1u64 << 32); + } else { + tmp2 = tmp >> 32; + } + + // 4: lm * rm + tmp = (d1.mid() as u64 * d2.mid() as u64) + tmp2; + + // If the high bit isn't set then we can stop here. Otherwise, we need to continue calculating + // using the high bits. + if (d1.hi() | d2.hi()) > 0 { + // 5. ll * rh + tmp2 = d1.lo() as u64 * d2.hi() as u64; + tmp = tmp.wrapping_add(tmp2); + // Detect if we carried + let mut tmp3 = if tmp < tmp2 { 1 } else { 0 }; + + // 6. lh * rl + tmp2 = d1.hi() as u64 * d2.lo() as u64; + tmp = tmp.wrapping_add(tmp2); + product.data[2] = tmp as u32; + // Detect if we carried + if tmp < tmp2 { + tmp3 += 1; + } + tmp2 = (tmp3 << 32) | (tmp >> 32); + + // 7. lm * rh + tmp = d1.mid() as u64 * d2.hi() as u64; + tmp = tmp.wrapping_add(tmp2); + // Check for carry + tmp3 = if tmp < tmp2 { 1 } else { 0 }; + + // 8. lh * rm + tmp2 = d1.hi() as u64 * d2.mid() as u64; + tmp = tmp.wrapping_add(tmp2); + product.data[3] = tmp as u32; + // Check for carry + if tmp < tmp2 { + tmp3 += 1; + } + tmp = (tmp3 << 32) | (tmp >> 32); + + // 9. lh * rh + product.set_high64(d1.hi() as u64 * d2.hi() as u64 + tmp); + } else { + product.set_mid64(tmp); + } + } + + // We may want to "rescale". This is the case if the mantissa is > 96 bits or if the scale + // exceeds the maximum precision. + let upper_word = product.upper_word(); + if upper_word > 2 || scale > MAX_PRECISION_U32 { + scale = if let Some(new_scale) = product.rescale(upper_word, scale) { + new_scale + } else { + return CalculationResult::Overflow; + } + } + + CalculationResult::Ok(Decimal::from_parts( + product.data[0], + product.data[1], + product.data[2], + negative, + scale, + )) +} + +#[inline(always)] +fn mul_by_32bit_lhs(d1: u64, d2: &Decimal, product: &mut Buf24) { + let mut tmp = d1 * d2.lo() as u64; + product.data[0] = tmp as u32; + tmp = (d1 * d2.mid() as u64).wrapping_add(tmp >> 32); + product.data[1] = tmp as u32; + tmp >>= 32; + + // If we're multiplying by a 96 bit integer then continue the calculation + if d2.hi() > 0 { + tmp = tmp.wrapping_add(d1 * d2.hi() as u64); + if tmp > U32_MAX { + product.set_mid64(tmp); + } else { + product.data[2] = tmp as u32; + } + } else { + product.data[2] = tmp as u32; + } +} diff --git a/third_party/rust/rust_decimal/src/ops/rem.rs b/third_party/rust/rust_decimal/src/ops/rem.rs new file mode 100644 index 0000000000..a79334e04b --- /dev/null +++ b/third_party/rust/rust_decimal/src/ops/rem.rs @@ -0,0 +1,285 @@ +use crate::constants::{MAX_I32_SCALE, MAX_PRECISION_I32, POWERS_10}; +use crate::decimal::{CalculationResult, Decimal}; +use crate::ops::common::{Buf12, Buf16, Buf24, Dec64}; + +pub(crate) fn rem_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult { + if d2.is_zero() { + return CalculationResult::DivByZero; + } + if d1.is_zero() { + return CalculationResult::Ok(Decimal::ZERO); + } + + // We handle the structs a bit different here. Firstly, we ignore both the sign/scale of d2. + // This is because during a remainder operation we do not care about the sign of the divisor + // and only concern ourselves with that of the dividend. + let mut d1 = Dec64::new(d1); + let d2_scale = d2.scale(); + let mut d2 = Buf12::from_decimal(d2); + + let cmp = crate::ops::cmp::cmp_internal( + &d1, + &Dec64 { + negative: d1.negative, + scale: d2_scale, + hi: d2.hi(), + low64: d2.low64(), + }, + ); + match cmp { + core::cmp::Ordering::Equal => { + // Same numbers meaning that remainder is zero + return CalculationResult::Ok(Decimal::ZERO); + } + core::cmp::Ordering::Less => { + // d1 < d2, e.g. 1/2. This means that the result is the value of d1 + return CalculationResult::Ok(d1.to_decimal()); + } + core::cmp::Ordering::Greater => {} + } + + // At this point we know that the dividend > divisor and that they are both non-zero. + let mut scale = d1.scale as i32 - d2_scale as i32; + if scale > 0 { + // Scale up the divisor + loop { + let power = if scale >= MAX_I32_SCALE { + POWERS_10[9] + } else { + POWERS_10[scale as usize] + } as u64; + + let mut tmp = d2.lo() as u64 * power; + d2.set_lo(tmp as u32); + tmp >>= 32; + tmp = tmp.wrapping_add((d2.mid() as u64 + ((d2.hi() as u64) << 32)) * power); + d2.set_mid(tmp as u32); + d2.set_hi((tmp >> 32) as u32); + + // Keep scaling if there is more to go + scale -= MAX_I32_SCALE; + if scale <= 0 { + break; + } + } + scale = 0; + } + + loop { + // If the dividend is smaller than the divisor then try to scale that up first + if scale < 0 { + let mut quotient = Buf12 { + data: [d1.lo(), d1.mid(), d1.hi], + }; + loop { + // Figure out how much we can scale by + let power_scale; + if let Some(u) = quotient.find_scale(MAX_PRECISION_I32 + scale) { + if u >= POWERS_10.len() { + power_scale = 9; + } else { + power_scale = u; + } + } else { + return CalculationResult::Overflow; + }; + if power_scale == 0 { + break; + } + let power = POWERS_10[power_scale] as u64; + scale += power_scale as i32; + + let mut tmp = quotient.data[0] as u64 * power; + quotient.data[0] = tmp as u32; + tmp >>= 32; + quotient.set_high64(tmp.wrapping_add(quotient.high64().wrapping_mul(power))); + if power_scale != 9 { + break; + } + if scale >= 0 { + break; + } + } + d1.low64 = quotient.low64(); + d1.hi = quotient.data[2]; + d1.scale = d2_scale; + } + + // if the high portion is empty then return the modulus of the bottom portion + if d1.hi == 0 { + d1.low64 %= d2.low64(); + return CalculationResult::Ok(d1.to_decimal()); + } else if (d2.mid() | d2.hi()) == 0 { + let mut tmp = d1.high64(); + tmp = ((tmp % d2.lo() as u64) << 32) | (d1.lo() as u64); + d1.low64 = tmp % d2.lo() as u64; + d1.hi = 0; + } else { + // Divisor is > 32 bits + return rem_full(&d1, &d2, scale); + } + + if scale >= 0 { + break; + } + } + + CalculationResult::Ok(d1.to_decimal()) +} + +fn rem_full(d1: &Dec64, d2: &Buf12, scale: i32) -> CalculationResult { + let mut scale = scale; + + // First normalize the divisor + let shift = if d2.hi() == 0 { + d2.mid().leading_zeros() + } else { + d2.hi().leading_zeros() + }; + + let mut buffer = Buf24::zero(); + let mut overflow = 0u32; + buffer.set_low64(d1.low64 << shift); + buffer.set_mid64(((d1.mid() as u64).wrapping_add((d1.hi as u64) << 32)) >> (32 - shift)); + let mut upper = 3; // We start at 3 due to bit shifting + + while scale < 0 { + let power = if -scale >= MAX_I32_SCALE { + POWERS_10[9] + } else { + POWERS_10[-scale as usize] + } as u64; + let mut tmp64 = buffer.data[0] as u64 * power; + buffer.data[0] = tmp64 as u32; + + for (index, part) in buffer.data.iter_mut().enumerate().skip(1) { + if index > upper { + break; + } + tmp64 >>= 32; + tmp64 = tmp64.wrapping_add((*part as u64).wrapping_mul(power)); + *part = tmp64 as u32; + } + // If we have overflow then also process that + if upper == 6 { + tmp64 >>= 32; + tmp64 = tmp64.wrapping_add((overflow as u64).wrapping_mul(power)); + overflow = tmp64 as u32; + } + + // Make sure the high bit is not set + if tmp64 > 0x7FFF_FFFF { + upper += 1; + if upper > 5 { + overflow = (tmp64 >> 32) as u32; + } else { + buffer.data[upper] = (tmp64 >> 32) as u32; + } + } + scale += MAX_I32_SCALE; + } + + // TODO: Optimize slice logic + + let mut tmp = Buf16::zero(); + let divisor = d2.low64() << shift; + if d2.hi() == 0 { + // Do some division + if upper == 6 { + upper -= 1; + + tmp.data = [buffer.data[4], buffer.data[5], overflow, 0]; + tmp.partial_divide_64(divisor); + buffer.data[4] = tmp.data[0]; + buffer.data[5] = tmp.data[1]; + } + if upper == 5 { + upper -= 1; + tmp.data = [buffer.data[3], buffer.data[4], buffer.data[5], 0]; + tmp.partial_divide_64(divisor); + buffer.data[3] = tmp.data[0]; + buffer.data[4] = tmp.data[1]; + buffer.data[5] = tmp.data[2]; + } + if upper == 4 { + tmp.data = [buffer.data[2], buffer.data[3], buffer.data[4], 0]; + tmp.partial_divide_64(divisor); + buffer.data[2] = tmp.data[0]; + buffer.data[3] = tmp.data[1]; + buffer.data[4] = tmp.data[2]; + } + + tmp.data = [buffer.data[1], buffer.data[2], buffer.data[3], 0]; + tmp.partial_divide_64(divisor); + buffer.data[1] = tmp.data[0]; + buffer.data[2] = tmp.data[1]; + buffer.data[3] = tmp.data[2]; + + tmp.data = [buffer.data[0], buffer.data[1], buffer.data[2], 0]; + tmp.partial_divide_64(divisor); + buffer.data[0] = tmp.data[0]; + buffer.data[1] = tmp.data[1]; + buffer.data[2] = tmp.data[2]; + + let low64 = buffer.low64() >> shift; + CalculationResult::Ok(Decimal::from_parts( + low64 as u32, + (low64 >> 32) as u32, + 0, + d1.negative, + d1.scale, + )) + } else { + let divisor_low64 = divisor; + let divisor = Buf12 { + data: [ + divisor_low64 as u32, + (divisor_low64 >> 32) as u32, + (((d2.mid() as u64) + ((d2.hi() as u64) << 32)) >> (32 - shift)) as u32, + ], + }; + + // Do some division + if upper == 6 { + upper -= 1; + tmp.data = [buffer.data[3], buffer.data[4], buffer.data[5], overflow]; + tmp.partial_divide_96(&divisor); + buffer.data[3] = tmp.data[0]; + buffer.data[4] = tmp.data[1]; + buffer.data[5] = tmp.data[2]; + } + if upper == 5 { + upper -= 1; + tmp.data = [buffer.data[2], buffer.data[3], buffer.data[4], buffer.data[5]]; + tmp.partial_divide_96(&divisor); + buffer.data[2] = tmp.data[0]; + buffer.data[3] = tmp.data[1]; + buffer.data[4] = tmp.data[2]; + buffer.data[5] = tmp.data[3]; + } + if upper == 4 { + tmp.data = [buffer.data[1], buffer.data[2], buffer.data[3], buffer.data[4]]; + tmp.partial_divide_96(&divisor); + buffer.data[1] = tmp.data[0]; + buffer.data[2] = tmp.data[1]; + buffer.data[3] = tmp.data[2]; + buffer.data[4] = tmp.data[3]; + } + + tmp.data = [buffer.data[0], buffer.data[1], buffer.data[2], buffer.data[3]]; + tmp.partial_divide_96(&divisor); + buffer.data[0] = tmp.data[0]; + buffer.data[1] = tmp.data[1]; + buffer.data[2] = tmp.data[2]; + buffer.data[3] = tmp.data[3]; + + let low64 = (buffer.low64() >> shift) + ((buffer.data[2] as u64) << (32 - shift) << 32); + CalculationResult::Ok(Decimal::from_parts( + low64 as u32, + (low64 >> 32) as u32, + buffer.data[2] >> shift, + d1.negative, + d1.scale, + )) + } +} |