third_party/rust/rust_decimal/src/ops/add.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382

use crate::constants::{MAX_I32_SCALE, POWERS_10, SCALE_MASK, SCALE_SHIFT, SIGN_MASK, U32_MASK, U32_MAX};
use crate::decimal::{CalculationResult, Decimal};
use crate::ops::common::{Buf24, Dec64};

pub(crate) fn add_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
    add_sub_internal(d1, d2, false)
}

pub(crate) fn sub_impl(d1: &Decimal, d2: &Decimal) -> CalculationResult {
    add_sub_internal(d1, d2, true)
}

#[inline]
fn add_sub_internal(d1: &Decimal, d2: &Decimal, subtract: bool) -> CalculationResult {
    if d1.is_zero() {
        // 0 - x or 0 + x
        let mut result = *d2;
        if subtract && !d2.is_zero() {
            result.set_sign_negative(d2.is_sign_positive());
        }
        return CalculationResult::Ok(result);
    }
    if d2.is_zero() {
        // x - 0 or x + 0
        return CalculationResult::Ok(*d1);
    }

    // Work out whether we need to rescale and/or if it's a subtract still given the signs of the
    // numbers.
    let flags = d1.flags() ^ d2.flags();
    let subtract = subtract ^ ((flags & SIGN_MASK) != 0);
    let rescale = (flags & SCALE_MASK) > 0;

    // We optimize towards using 32 bit logic as much as possible. It's noticeably faster at
    // scale, even on 64 bit machines
    if d1.mid() | d1.hi() == 0 && d2.mid() | d2.hi() == 0 {
        // We'll try to rescale, however we may end up with 64 bit (or more) numbers
        // If we do, we'll choose a different flow than fast_add
        if rescale {
            // This is less optimized if we scale to a 64 bit integer. We can add some further logic
            // here later on.
            let rescale_factor = ((d2.flags() & SCALE_MASK) as i32 - (d1.flags() & SCALE_MASK) as i32) >> SCALE_SHIFT;
            if rescale_factor < 0 {
                // We try to rescale the rhs
                if let Some(rescaled) = rescale32(d2.lo(), -rescale_factor) {
                    return fast_add(d1.lo(), rescaled, d1.flags(), subtract);
                }
            } else {
                // We try to rescale the lhs
                if let Some(rescaled) = rescale32(d1.lo(), rescale_factor) {
                    return fast_add(
                        rescaled,
                        d2.lo(),
                        (d2.flags() & SCALE_MASK) | (d1.flags() & SIGN_MASK),
                        subtract,
                    );
                }
            }
        } else {
            return fast_add(d1.lo(), d2.lo(), d1.flags(), subtract);
        }
    }

    // Continue on with the slower 64 bit method
    let d1 = Dec64::new(d1);
    let d2 = Dec64::new(d2);

    // If we're not the same scale then make sure we're there first before starting addition
    if rescale {
        let rescale_factor = d2.scale as i32 - d1.scale as i32;
        if rescale_factor < 0 {
            let negative = subtract ^ d1.negative;
            let scale = d1.scale;
            unaligned_add(d2, d1, negative, scale, -rescale_factor, subtract)
        } else {
            let negative = d1.negative;
            let scale = d2.scale;
            unaligned_add(d1, d2, negative, scale, rescale_factor, subtract)
        }
    } else {
        let neg = d1.negative;
        let scale = d1.scale;
        aligned_add(d1, d2, neg, scale, subtract)
    }
}

#[inline(always)]
fn rescale32(num: u32, rescale_factor: i32) -> Option<u32> {
    if rescale_factor > MAX_I32_SCALE {
        return None;
    }
    num.checked_mul(POWERS_10[rescale_factor as usize])
}

fn fast_add(lo1: u32, lo2: u32, flags: u32, subtract: bool) -> CalculationResult {
    if subtract {
        // Sub can't overflow because we're ensuring the bigger number always subtracts the smaller number
        if lo1 < lo2 {
            return CalculationResult::Ok(Decimal::from_parts_raw(lo2 - lo1, 0, 0, flags ^ SIGN_MASK));
        }
        return CalculationResult::Ok(Decimal::from_parts_raw(lo1 - lo2, 0, 0, flags));
    }
    // Add can overflow however, so we check for that explicitly
    let lo = lo1.wrapping_add(lo2);
    let mid = if lo < lo1 { 1 } else { 0 };
    CalculationResult::Ok(Decimal::from_parts_raw(lo, mid, 0, flags))
}

fn aligned_add(lhs: Dec64, rhs: Dec64, negative: bool, scale: u32, subtract: bool) -> CalculationResult {
    if subtract {
        // Signs differ, so subtract
        let mut result = Dec64 {
            negative,
            scale,
            low64: lhs.low64.wrapping_sub(rhs.low64),
            hi: lhs.hi.wrapping_sub(rhs.hi),
        };

        // Check for carry
        if result.low64 > lhs.low64 {
            result.hi = result.hi.wrapping_sub(1);
            if result.hi >= lhs.hi {
                flip_sign(&mut result);
            }
        } else if result.hi > lhs.hi {
            flip_sign(&mut result);
        }
        CalculationResult::Ok(result.to_decimal())
    } else {
        // Signs are the same, so add
        let mut result = Dec64 {
            negative,
            scale,
            low64: lhs.low64.wrapping_add(rhs.low64),
            hi: lhs.hi.wrapping_add(rhs.hi),
        };

        // Check for carry
        if result.low64 < lhs.low64 {
            result.hi = result.hi.wrapping_add(1);
            if result.hi <= lhs.hi {
                if result.scale == 0 {
                    return CalculationResult::Overflow;
                }
                reduce_scale(&mut result);
            }
        } else if result.hi < lhs.hi {
            if result.scale == 0 {
                return CalculationResult::Overflow;
            }
            reduce_scale(&mut result);
        }
        CalculationResult::Ok(result.to_decimal())
    }
}

fn flip_sign(result: &mut Dec64) {
    // Bitwise not the high portion
    result.hi = !result.hi;
    let low64 = ((result.low64 as i64).wrapping_neg()) as u64;
    if low64 == 0 {
        result.hi += 1;
    }
    result.low64 = low64;
    result.negative = !result.negative;
}

fn reduce_scale(result: &mut Dec64) {
    let mut low64 = result.low64;
    let mut hi = result.hi;

    let mut num = (hi as u64) + (1u64 << 32);
    hi = (num / 10u64) as u32;
    num = ((num - (hi as u64) * 10u64) << 32) + (low64 >> 32);
    let mut div = (num / 10) as u32;
    num = ((num - (div as u64) * 10u64) << 32) + (low64 & U32_MASK);
    low64 = (div as u64) << 32;
    div = (num / 10u64) as u32;
    low64 = low64.wrapping_add(div as u64);
    let remainder = (num as u32).wrapping_sub(div.wrapping_mul(10));

    // Finally, round. This is optimizing slightly toward non-rounded numbers
    if remainder >= 5 && (remainder > 5 || (low64 & 1) > 0) {
        low64 = low64.wrapping_add(1);
        if low64 == 0 {
            hi += 1;
        }
    }

    result.low64 = low64;
    result.hi = hi;
    result.scale -= 1;
}

// Assumption going into this function is that the LHS is the larger number and will "absorb" the
// smaller number.
fn unaligned_add(
    lhs: Dec64,
    rhs: Dec64,
    negative: bool,
    scale: u32,
    rescale_factor: i32,
    subtract: bool,
) -> CalculationResult {
    let mut lhs = lhs;
    let mut low64 = lhs.low64;
    let mut high = lhs.hi;
    let mut rescale_factor = rescale_factor;

    // First off, we see if we can get away with scaling small amounts (or none at all)
    if high == 0 {
        if low64 <= U32_MAX {
            // We know it's not zero, so we start scaling.
            // Start with reducing the scale down for the low portion
            while low64 <= U32_MAX {
                if rescale_factor <= MAX_I32_SCALE {
                    low64 *= POWERS_10[rescale_factor as usize] as u64;
                    lhs.low64 = low64;
                    return aligned_add(lhs, rhs, negative, scale, subtract);
                }
                rescale_factor -= MAX_I32_SCALE;
                low64 *= POWERS_10[9] as u64;
            }
        }

        // Reduce the scale for the high portion
        while high == 0 {
            let power = if rescale_factor <= MAX_I32_SCALE {
                POWERS_10[rescale_factor as usize] as u64
            } else {
                POWERS_10[9] as u64
            };

            let tmp_low = (low64 & U32_MASK) * power;
            let tmp_hi = (low64 >> 32) * power + (tmp_low >> 32);
            low64 = (tmp_low & U32_MASK) + (tmp_hi << 32);
            high = (tmp_hi >> 32) as u32;
            rescale_factor -= MAX_I32_SCALE;
            if rescale_factor <= 0 {
                lhs.low64 = low64;
                lhs.hi = high;
                return aligned_add(lhs, rhs, negative, scale, subtract);
            }
        }
    }

    // See if we can get away with keeping it in the 96 bits. Otherwise, we need a buffer
    let mut tmp64: u64;
    loop {
        let power = if rescale_factor <= MAX_I32_SCALE {
            POWERS_10[rescale_factor as usize] as u64
        } else {
            POWERS_10[9] as u64
        };

        let tmp_low = (low64 & U32_MASK) * power;
        tmp64 = (low64 >> 32) * power + (tmp_low >> 32);
        low64 = (tmp_low & U32_MASK) + (tmp64 << 32);
        tmp64 >>= 32;
        tmp64 += (high as u64) * power;

        rescale_factor -= MAX_I32_SCALE;

        if tmp64 > U32_MAX {
            break;
        } else {
            high = tmp64 as u32;
            if rescale_factor <= 0 {
                lhs.low64 = low64;
                lhs.hi = high;
                return aligned_add(lhs, rhs, negative, scale, subtract);
            }
        }
    }

    let mut buffer = Buf24::zero();
    buffer.set_low64(low64);
    buffer.set_mid64(tmp64);

    let mut upper_word = buffer.upper_word();
    while rescale_factor > 0 {
        let power = if rescale_factor <= MAX_I32_SCALE {
            POWERS_10[rescale_factor as usize] as u64
        } else {
            POWERS_10[9] as u64
        };
        tmp64 = 0;
        for (index, part) in buffer.data.iter_mut().enumerate() {
            tmp64 = tmp64.wrapping_add((*part as u64) * power);
            *part = tmp64 as u32;
            tmp64 >>= 32;
            if index + 1 > upper_word {
                break;
            }
        }

        if tmp64 & U32_MASK > 0 {
            // Extend the result
            upper_word += 1;
            buffer.data[upper_word] = tmp64 as u32;
        }

        rescale_factor -= MAX_I32_SCALE;
    }

    // Do the add
    tmp64 = buffer.low64();
    low64 = rhs.low64;
    let tmp_hi = buffer.data[2];
    high = rhs.hi;

    if subtract {
        low64 = tmp64.wrapping_sub(low64);
        high = tmp_hi.wrapping_sub(high);

        // Check for carry
        let carry = if low64 > tmp64 {
            high = high.wrapping_sub(1);
            high >= tmp_hi
        } else {
            high > tmp_hi
        };

        if carry {
            for part in buffer.data.iter_mut().skip(3) {
                *part = part.wrapping_sub(1);
                if *part > 0 {
                    break;
                }
            }

            if buffer.data[upper_word] == 0 && upper_word < 3 {
                return CalculationResult::Ok(Decimal::from_parts(
                    low64 as u32,
                    (low64 >> 32) as u32,
                    high,
                    negative,
                    scale,
                ));
            }
        }
    } else {
        low64 = low64.wrapping_add(tmp64);
        high = high.wrapping_add(tmp_hi);

        // Check for carry
        let carry = if low64 < tmp64 {
            high = high.wrapping_add(1);
            high <= tmp_hi
        } else {
            high < tmp_hi
        };

        if carry {
            for (index, part) in buffer.data.iter_mut().enumerate().skip(3) {
                if upper_word < index {
                    *part = 1;
                    upper_word = index;
                    break;
                }
                *part = part.wrapping_add(1);
                if *part > 0 {
                    break;
                }
            }
        }
    }

    buffer.set_low64(low64);
    buffer.data[2] = high;
    if let Some(scale) = buffer.rescale(upper_word, scale) {
        CalculationResult::Ok(Decimal::from_parts(
            buffer.data[0],
            buffer.data[1],
            buffer.data[2],
            negative,
            scale,
        ))
    } else {
        CalculationResult::Overflow
    }
}