1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
|
use crate::constants::{MAX_PRECISION_I32, POWERS_10};
use crate::decimal::{CalculationResult, Decimal};
use crate::ops::common::{Buf12, Buf16, Dec64};
use core::cmp::Ordering;
use core::ops::BitXor;
impl Buf12 {
// Returns true if successful, else false for an overflow
fn add32(&mut self, value: u32) -> Result<(), DivError> {
let value = value as u64;
let new = self.low64().wrapping_add(value);
self.set_low64(new);
if new < value {
self.data[2] = self.data[2].wrapping_add(1);
if self.data[2] == 0 {
return Err(DivError::Overflow);
}
}
Ok(())
}
// Divide a Decimal union by a 32 bit divisor.
// Self is overwritten with the quotient.
// Return value is a 32 bit remainder.
fn div32(&mut self, divisor: u32) -> u32 {
let divisor64 = divisor as u64;
// See if we can get by using a simple u64 division
if self.data[2] != 0 {
let mut temp = self.high64();
let q64 = temp / divisor64;
self.set_high64(q64);
// Calculate the "remainder"
temp = ((temp - q64 * divisor64) << 32) | (self.data[0] as u64);
if temp == 0 {
return 0;
}
let q32 = (temp / divisor64) as u32;
self.data[0] = q32;
((temp as u32).wrapping_sub(q32.wrapping_mul(divisor))) as u32
} else {
// Super easy divisor
let low64 = self.low64();
if low64 == 0 {
// Nothing to do
return 0;
}
// Do the calc
let quotient = low64 / divisor64;
self.set_low64(quotient);
// Remainder is the leftover that wasn't used
(low64.wrapping_sub(quotient.wrapping_mul(divisor64))) as u32
}
}
// Divide the number by a power constant
// Returns true if division was successful
fn div32_const(&mut self, pow: u32) -> bool {
let pow64 = pow as u64;
let high64 = self.high64();
let lo = self.data[0] as u64;
let div64: u64 = high64 / pow64;
let div = ((((high64 - div64 * pow64) << 32) + lo) / pow64) as u32;
if self.data[0] == div.wrapping_mul(pow) {
self.set_high64(div64);
self.data[0] = div;
true
} else {
false
}
}
}
impl Buf16 {
// Does a partial divide with a 64 bit divisor. The divisor in this case must be 64 bits
// otherwise various assumptions fail (e.g. 32 bit quotient).
// To assist, the upper 64 bits must be greater than the divisor for this to succeed.
// Consequently, it will return the quotient as a 32 bit number and overwrite self with the
// 64 bit remainder.
pub(super) fn partial_divide_64(&mut self, divisor: u64) -> u32 {
// We make this assertion here, however below we pivot based on the data
debug_assert!(divisor > self.mid64());
// If we have an empty high bit, then divisor must be greater than the dividend due to
// the assumption that the divisor REQUIRES 64 bits.
if self.data[2] == 0 {
let low64 = self.low64();
if low64 < divisor {
// We can't divide at at all so result is 0. The dividend remains untouched since
// the full amount is the remainder.
return 0;
}
let quotient = low64 / divisor;
self.set_low64(low64 - (quotient * divisor));
return quotient as u32;
}
// Do a simple check to see if the hi portion of the dividend is greater than the hi
// portion of the divisor.
let divisor_hi32 = (divisor >> 32) as u32;
if self.data[2] >= divisor_hi32 {
// We know that the divisor goes into this at MOST u32::max times.
// So we kick things off, with that assumption
let mut low64 = self.low64();
low64 = low64.wrapping_sub(divisor << 32).wrapping_add(divisor);
let mut quotient = u32::MAX;
// If we went negative then keep adding it back in
loop {
if low64 < divisor {
break;
}
quotient = quotient.wrapping_sub(1);
low64 = low64.wrapping_add(divisor);
}
self.set_low64(low64);
return quotient;
}
let mid64 = self.mid64();
let divisor_hi32_64 = divisor_hi32 as u64;
if mid64 < divisor_hi32_64 as u64 {
// similar situation as above where we've got nothing left to divide
return 0;
}
let mut quotient = mid64 / divisor_hi32_64;
let mut remainder = self.data[0] as u64 | ((mid64 - quotient * divisor_hi32_64) << 32);
// Do quotient * lo divisor
let product = quotient * (divisor & 0xFFFF_FFFF);
remainder = remainder.wrapping_sub(product);
// Check if we've gone negative. If so, add it back
if remainder > product.bitxor(u64::MAX) {
loop {
quotient = quotient.wrapping_sub(1);
remainder = remainder.wrapping_add(divisor);
if remainder < divisor {
break;
}
}
}
self.set_low64(remainder);
quotient as u32
}
// Does a partial divide with a 96 bit divisor. The divisor in this case must require 96 bits
// otherwise various assumptions fail (e.g. 32 bit quotient).
pub(super) fn partial_divide_96(&mut self, divisor: &Buf12) -> u32 {
let dividend = self.high64();
let divisor_hi = divisor.data[2];
if dividend < divisor_hi as u64 {
// Dividend is too small - entire number is remainder
return 0;
}
let mut quo = (dividend / divisor_hi as u64) as u32;
let mut remainder = (dividend as u32).wrapping_sub(quo.wrapping_mul(divisor_hi));
// Compute full remainder
let mut prod1 = quo as u64 * divisor.data[0] as u64;
let mut prod2 = quo as u64 * divisor.data[1] as u64;
prod2 += prod1 >> 32;
prod1 = (prod1 & 0xFFFF_FFFF) | (prod2 << 32);
prod2 >>= 32;
let mut num = self.low64();
num = num.wrapping_sub(prod1);
remainder = remainder.wrapping_sub(prod2 as u32);
// If there are carries make sure they are propagated
if num > prod1.bitxor(u64::MAX) {
remainder = remainder.wrapping_sub(1);
if remainder < (prod2 as u32).bitxor(u32::MAX) {
self.set_low64(num);
self.data[2] = remainder;
return quo;
}
} else if remainder <= (prod2 as u32).bitxor(u32::MAX) {
self.set_low64(num);
self.data[2] = remainder;
return quo;
}
// Remainder went negative, add divisor back until it's positive
prod1 = divisor.low64();
loop {
quo = quo.wrapping_sub(1);
num = num.wrapping_add(prod1);
remainder = remainder.wrapping_add(divisor_hi);
if num < prod1 {
// Detected carry.
let tmp = remainder;
remainder = remainder.wrapping_add(1);
if tmp < divisor_hi {
break;
}
}
if remainder < divisor_hi {
break; // detected carry
}
}
self.set_low64(num);
self.data[2] = remainder;
quo
}
}
enum DivError {
Overflow,
}
pub(crate) fn div_impl(dividend: &Decimal, divisor: &Decimal) -> CalculationResult {
if divisor.is_zero() {
return CalculationResult::DivByZero;
}
if dividend.is_zero() {
return CalculationResult::Ok(Decimal::ZERO);
}
let dividend = Dec64::new(dividend);
let divisor = Dec64::new(divisor);
// Pre calculate the scale and the sign
let mut scale = (dividend.scale as i32) - (divisor.scale as i32);
let sign_negative = dividend.negative ^ divisor.negative;
// Set up some variables for modification throughout
let mut require_unscale = false;
let mut quotient = Buf12::from_dec64(÷nd);
let divisor = Buf12::from_dec64(&divisor);
// Branch depending on the complexity of the divisor
if divisor.data[2] | divisor.data[1] == 0 {
// We have a simple(r) divisor (32 bit)
let divisor32 = divisor.data[0];
// Remainder can only be 32 bits since the divisor is 32 bits.
let mut remainder = quotient.div32(divisor32);
let mut power_scale = 0;
// Figure out how to apply the remainder (i.e. we may have performed something like 10/3 or 8/5)
loop {
// Remainder is 0 so we have a simple situation
if remainder == 0 {
// If the scale is positive then we're actually done
if scale >= 0 {
break;
}
power_scale = 9usize.min((-scale) as usize);
} else {
// We may need to normalize later, so set the flag appropriately
require_unscale = true;
// We have a remainder so we effectively want to try to adjust the quotient and add
// the remainder into the quotient. We do this below, however first of all we want
// to try to avoid overflowing so we do that check first.
let will_overflow = if scale == MAX_PRECISION_I32 {
true
} else {
// Figure out how much we can scale by
if let Some(s) = quotient.find_scale(scale) {
power_scale = s;
} else {
return CalculationResult::Overflow;
}
// If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since
// we're doing nothing.
power_scale == 0
};
if will_overflow {
// No more scaling can be done, but remainder is non-zero so we round if necessary.
let tmp = remainder << 1;
let round = if tmp < remainder {
// We round if we wrapped around
true
} else if tmp >= divisor32 {
// If we're greater than the divisor (i.e. underflow)
// or if there is a lo bit set, we round
tmp > divisor32 || (quotient.data[0] & 0x1) > 0
} else {
false
};
// If we need to round, try to do so.
if round {
if let Ok(new_scale) = round_up(&mut quotient, scale) {
scale = new_scale;
} else {
// Overflowed
return CalculationResult::Overflow;
}
}
break;
}
}
// Do some scaling
let power = POWERS_10[power_scale];
scale += power_scale as i32;
// Increase the quotient by the power that was looked up
let overflow = increase_scale(&mut quotient, power as u64);
if overflow > 0 {
return CalculationResult::Overflow;
}
let remainder_scaled = (remainder as u64) * (power as u64);
let remainder_quotient = (remainder_scaled / (divisor32 as u64)) as u32;
remainder = (remainder_scaled - remainder_quotient as u64 * divisor32 as u64) as u32;
if let Err(DivError::Overflow) = quotient.add32(remainder_quotient) {
if let Ok(adj) = unscale_from_overflow(&mut quotient, scale, remainder != 0) {
scale = adj;
} else {
// Still overflowing
return CalculationResult::Overflow;
}
break;
}
}
} else {
// We have a divisor greater than 32 bits. Both of these share some quick calculation wins
// so we'll do those before branching into separate logic.
// The win we can do is shifting the bits to the left as much as possible. We do this to both
// the dividend and the divisor to ensure the quotient is not changed.
// As a simple contrived example: if we have 4 / 2 then we could bit shift all the way to the
// left meaning that the lo portion would have nothing inside of it. Of course, shifting these
// left one has the same result (8/4) etc.
// The advantage is that we may be able to write off lower portions of the number making things
// easier.
let mut power_scale = if divisor.data[2] == 0 {
divisor.data[1].leading_zeros()
} else {
divisor.data[2].leading_zeros()
} as usize;
let mut remainder = Buf16::zero();
remainder.set_low64(quotient.low64() << power_scale);
let tmp_high = ((quotient.data[1] as u64) + ((quotient.data[2] as u64) << 32)) >> (32 - power_scale);
remainder.set_high64(tmp_high);
// Work out the divisor after it's shifted
let divisor64 = divisor.low64() << power_scale;
// Check if the divisor is 64 bit or the full 96 bits
if divisor.data[2] == 0 {
// It's 64 bits
quotient.data[2] = 0;
// Calc mid/lo by shifting accordingly
let rem_lo = remainder.data[0];
remainder.data[0] = remainder.data[1];
remainder.data[1] = remainder.data[2];
remainder.data[2] = remainder.data[3];
quotient.data[1] = remainder.partial_divide_64(divisor64);
remainder.data[2] = remainder.data[1];
remainder.data[1] = remainder.data[0];
remainder.data[0] = rem_lo;
quotient.data[0] = remainder.partial_divide_64(divisor64);
loop {
let rem_low64 = remainder.low64();
if rem_low64 == 0 {
// If the scale is positive then we're actually done
if scale >= 0 {
break;
}
power_scale = 9usize.min((-scale) as usize);
} else {
// We may need to normalize later, so set the flag appropriately
require_unscale = true;
// We have a remainder so we effectively want to try to adjust the quotient and add
// the remainder into the quotient. We do this below, however first of all we want
// to try to avoid overflowing so we do that check first.
let will_overflow = if scale == MAX_PRECISION_I32 {
true
} else {
// Figure out how much we can scale by
if let Some(s) = quotient.find_scale(scale) {
power_scale = s;
} else {
return CalculationResult::Overflow;
}
// If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since
// we're doing nothing.
power_scale == 0
};
if will_overflow {
// No more scaling can be done, but remainder is non-zero so we round if necessary.
let mut tmp = remainder.low64();
let round = if (tmp as i64) < 0 {
// We round if we wrapped around
true
} else {
tmp <<= 1;
if tmp > divisor64 {
true
} else {
tmp == divisor64 && quotient.data[0] & 0x1 != 0
}
};
// If we need to round, try to do so.
if round {
if let Ok(new_scale) = round_up(&mut quotient, scale) {
scale = new_scale;
} else {
// Overflowed
return CalculationResult::Overflow;
}
}
break;
}
}
// Do some scaling
let power = POWERS_10[power_scale];
scale += power_scale as i32;
// Increase the quotient by the power that was looked up
let overflow = increase_scale(&mut quotient, power as u64);
if overflow > 0 {
return CalculationResult::Overflow;
}
increase_scale64(&mut remainder, power as u64);
let tmp = remainder.partial_divide_64(divisor64);
if let Err(DivError::Overflow) = quotient.add32(tmp) {
if let Ok(adj) = unscale_from_overflow(&mut quotient, scale, remainder.low64() != 0) {
scale = adj;
} else {
// Still overflowing
return CalculationResult::Overflow;
}
break;
}
}
} else {
// It's 96 bits
// Start by finishing the shift left
let divisor_mid = divisor.data[1];
let divisor_hi = divisor.data[2];
let mut divisor = divisor;
divisor.set_low64(divisor64);
divisor.data[2] = ((divisor_mid as u64 + ((divisor_hi as u64) << 32)) >> (32 - power_scale)) as u32;
let quo = remainder.partial_divide_96(&divisor);
quotient.set_low64(quo as u64);
quotient.data[2] = 0;
loop {
let mut rem_low64 = remainder.low64();
if rem_low64 == 0 && remainder.data[2] == 0 {
// If the scale is positive then we're actually done
if scale >= 0 {
break;
}
power_scale = 9usize.min((-scale) as usize);
} else {
// We may need to normalize later, so set the flag appropriately
require_unscale = true;
// We have a remainder so we effectively want to try to adjust the quotient and add
// the remainder into the quotient. We do this below, however first of all we want
// to try to avoid overflowing so we do that check first.
let will_overflow = if scale == MAX_PRECISION_I32 {
true
} else {
// Figure out how much we can scale by
if let Some(s) = quotient.find_scale(scale) {
power_scale = s;
} else {
return CalculationResult::Overflow;
}
// If it comes back as 0 (i.e. 10^0 = 1) then we're going to overflow since
// we're doing nothing.
power_scale == 0
};
if will_overflow {
// No more scaling can be done, but remainder is non-zero so we round if necessary.
let round = if (remainder.data[2] as i32) < 0 {
// We round if we wrapped around
true
} else {
let tmp = remainder.data[1] >> 31;
rem_low64 <<= 1;
remainder.set_low64(rem_low64);
remainder.data[2] = (&remainder.data[2] << 1) + tmp;
match remainder.data[2].cmp(&divisor.data[2]) {
Ordering::Less => false,
Ordering::Equal => {
let divisor_low64 = divisor.low64();
if rem_low64 > divisor_low64 {
true
} else {
rem_low64 == divisor_low64 && (quotient.data[0] & 1) != 0
}
}
Ordering::Greater => true,
}
};
// If we need to round, try to do so.
if round {
if let Ok(new_scale) = round_up(&mut quotient, scale) {
scale = new_scale;
} else {
// Overflowed
return CalculationResult::Overflow;
}
}
break;
}
}
// Do some scaling
let power = POWERS_10[power_scale];
scale += power_scale as i32;
// Increase the quotient by the power that was looked up
let overflow = increase_scale(&mut quotient, power as u64);
if overflow > 0 {
return CalculationResult::Overflow;
}
let mut tmp_remainder = Buf12 {
data: [remainder.data[0], remainder.data[1], remainder.data[2]],
};
let overflow = increase_scale(&mut tmp_remainder, power as u64);
remainder.data[0] = tmp_remainder.data[0];
remainder.data[1] = tmp_remainder.data[1];
remainder.data[2] = tmp_remainder.data[2];
remainder.data[3] = overflow;
let tmp = remainder.partial_divide_96(&divisor);
if let Err(DivError::Overflow) = quotient.add32(tmp) {
if let Ok(adj) =
unscale_from_overflow(&mut quotient, scale, (remainder.low64() | remainder.high64()) != 0)
{
scale = adj;
} else {
// Still overflowing
return CalculationResult::Overflow;
}
break;
}
}
}
}
if require_unscale {
scale = unscale(&mut quotient, scale);
}
CalculationResult::Ok(Decimal::from_parts(
quotient.data[0],
quotient.data[1],
quotient.data[2],
sign_negative,
scale as u32,
))
}
// Multiply num by power (multiple of 10). Power must be 32 bits.
// Returns the overflow, if any
fn increase_scale(num: &mut Buf12, power: u64) -> u32 {
let mut tmp = (num.data[0] as u64) * power;
num.data[0] = tmp as u32;
tmp >>= 32;
tmp += (num.data[1] as u64) * power;
num.data[1] = tmp as u32;
tmp >>= 32;
tmp += (num.data[2] as u64) * power;
num.data[2] = tmp as u32;
(tmp >> 32) as u32
}
// Multiply num by power (multiple of 10). Power must be 32 bits.
fn increase_scale64(num: &mut Buf16, power: u64) {
let mut tmp = (num.data[0] as u64) * power;
num.data[0] = tmp as u32;
tmp >>= 32;
tmp += (num.data[1] as u64) * power;
num.set_mid64(tmp)
}
// Adjust the number to deal with an overflow. This function follows being scaled up (i.e. multiplied
// by 10, so this effectively tries to reverse that by dividing by 10 then feeding in the high bit
// to undo the overflow and rounding instead.
// Returns the updated scale.
fn unscale_from_overflow(num: &mut Buf12, scale: i32, sticky: bool) -> Result<i32, DivError> {
let scale = scale - 1;
if scale < 0 {
return Err(DivError::Overflow);
}
// This function is called when the hi portion has "overflowed" upon adding one and has wrapped
// back around to 0. Consequently, we need to "feed" that back in, but also rescaling down
// to reverse out the overflow.
const HIGH_BIT: u64 = 0x1_0000_0000;
num.data[2] = (HIGH_BIT / 10) as u32;
// Calc the mid
let mut tmp = ((HIGH_BIT % 10) << 32) + (num.data[1] as u64);
let mut val = (tmp / 10) as u32;
num.data[1] = val;
// Calc the lo using a similar method
tmp = ((tmp - (val as u64) * 10) << 32) + (num.data[0] as u64);
val = (tmp / 10) as u32;
num.data[0] = val;
// Work out the remainder, and round if we have one (since it doesn't fit)
let remainder = (tmp - (val as u64) * 10) as u32;
if remainder > 5 || (remainder == 5 && (sticky || num.data[0] & 0x1 > 0)) {
let _ = num.add32(1);
}
Ok(scale)
}
#[inline]
fn round_up(num: &mut Buf12, scale: i32) -> Result<i32, DivError> {
let low64 = num.low64().wrapping_add(1);
num.set_low64(low64);
if low64 != 0 {
return Ok(scale);
}
let hi = num.data[2].wrapping_add(1);
num.data[2] = hi;
if hi != 0 {
return Ok(scale);
}
unscale_from_overflow(num, scale, true)
}
fn unscale(num: &mut Buf12, scale: i32) -> i32 {
// Since 10 = 2 * 5, there must be a factor of 2 for every power of 10 we can extract.
// We use this as a quick test on whether to try a given power.
let mut scale = scale;
while num.data[0] == 0 && scale >= 8 && num.div32_const(100000000) {
scale -= 8;
}
if (num.data[0] & 0xF) == 0 && scale >= 4 && num.div32_const(10000) {
scale -= 4;
}
if (num.data[0] & 0x3) == 0 && scale >= 2 && num.div32_const(100) {
scale -= 2;
}
if (num.data[0] & 0x1) == 0 && scale >= 1 && num.div32_const(10) {
scale -= 1;
}
scale
}
|