diff options
Diffstat (limited to '')
-rw-r--r-- | src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc | 723 |
1 files changed, 723 insertions, 0 deletions
diff --git a/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc new file mode 100644 index 000000000..61cac6062 --- /dev/null +++ b/src/arrow/cpp/src/gandiva/precompiled/decimal_ops.cc @@ -0,0 +1,723 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Algorithms adapted from Apache Impala + +#include "gandiva/precompiled/decimal_ops.h" + +#include <algorithm> +#include <cmath> +#include <limits> + +#include "arrow/util/logging.h" +#include "gandiva/decimal_type_util.h" +#include "gandiva/decimal_xlarge.h" +#include "gandiva/gdv_function_stubs.h" + +// Several operations (multiply, divide, mod, ..) require converting to 256-bit, and we +// use the boost library for doing 256-bit operations. To avoid references to boost from +// the precompiled-to-ir code (this causes issues with symbol resolution at runtime), we +// use a wrapper exported from the CPP code. The wrapper functions are named gdv_xlarge_xx + +namespace gandiva { +namespace decimalops { + +using arrow::BasicDecimal128; + +static BasicDecimal128 CheckAndIncreaseScale(const BasicDecimal128& in, int32_t delta) { + return (delta <= 0) ? in : in.IncreaseScaleBy(delta); +} + +static BasicDecimal128 CheckAndReduceScale(const BasicDecimal128& in, int32_t delta) { + return (delta <= 0) ? in : in.ReduceScaleBy(delta); +} + +/// Adjust x and y to the same scale, and add them. +static BasicDecimal128 AddFastPath(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_scale) { + auto higher_scale = std::max(x.scale(), y.scale()); + + auto x_scaled = CheckAndIncreaseScale(x.value(), higher_scale - x.scale()); + auto y_scaled = CheckAndIncreaseScale(y.value(), higher_scale - y.scale()); + return x_scaled + y_scaled; +} + +/// Add x and y, caller has ensured there can be no overflow. +static BasicDecimal128 AddNoOverflow(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_scale) { + auto higher_scale = std::max(x.scale(), y.scale()); + auto sum = AddFastPath(x, y, out_scale); + return CheckAndReduceScale(sum, higher_scale - out_scale); +} + +/// Both x_value and y_value must be >= 0 +static BasicDecimal128 AddLargePositive(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale) { + DCHECK_GE(x.value(), 0); + DCHECK_GE(y.value(), 0); + + // separate out whole/fractions. + BasicDecimal128 x_left, x_right, y_left, y_right; + x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right); + y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right); + + // Adjust fractional parts to higher scale. + auto higher_scale = std::max(x.scale(), y.scale()); + auto x_right_scaled = CheckAndIncreaseScale(x_right, higher_scale - x.scale()); + auto y_right_scaled = CheckAndIncreaseScale(y_right, higher_scale - y.scale()); + + BasicDecimal128 right; + BasicDecimal128 carry_to_left; + auto multiplier = BasicDecimal128::GetScaleMultiplier(higher_scale); + if (x_right_scaled >= multiplier - y_right_scaled) { + right = x_right_scaled - (multiplier - y_right_scaled); + carry_to_left = 1; + } else { + right = x_right_scaled + y_right_scaled; + carry_to_left = 0; + } + right = CheckAndReduceScale(right, higher_scale - out_scale); + + auto left = x_left + y_left + carry_to_left; + return (left * BasicDecimal128::GetScaleMultiplier(out_scale)) + right; +} + +/// x_value and y_value cannot be 0, and one must be positive and the other negative. +static BasicDecimal128 AddLargeNegative(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale) { + DCHECK_NE(x.value(), 0); + DCHECK_NE(y.value(), 0); + DCHECK((x.value() < 0 && y.value() > 0) || (x.value() > 0 && y.value() < 0)); + + // separate out whole/fractions. + BasicDecimal128 x_left, x_right, y_left, y_right; + x.value().GetWholeAndFraction(x.scale(), &x_left, &x_right); + y.value().GetWholeAndFraction(y.scale(), &y_left, &y_right); + + // Adjust fractional parts to higher scale. + auto higher_scale = std::max(x.scale(), y.scale()); + x_right = CheckAndIncreaseScale(x_right, higher_scale - x.scale()); + y_right = CheckAndIncreaseScale(y_right, higher_scale - y.scale()); + + // Overflow not possible because one is +ve and the other is -ve. + auto left = x_left + y_left; + auto right = x_right + y_right; + + // If the whole and fractional parts have different signs, then we need to make the + // fractional part have the same sign as the whole part. If either left or right is + // zero, then nothing needs to be done. + if (left < 0 && right > 0) { + left += 1; + right -= BasicDecimal128::GetScaleMultiplier(higher_scale); + } else if (left > 0 && right < 0) { + left -= 1; + right += BasicDecimal128::GetScaleMultiplier(higher_scale); + } + right = CheckAndReduceScale(right, higher_scale - out_scale); + return (left * BasicDecimal128::GetScaleMultiplier(out_scale)) + right; +} + +static BasicDecimal128 AddLarge(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_scale) { + if (x.value() >= 0 && y.value() >= 0) { + // both positive or 0 + return AddLargePositive(x, y, out_scale); + } else if (x.value() <= 0 && y.value() <= 0) { + // both negative or 0 + BasicDecimalScalar128 x_neg(-x.value(), x.precision(), x.scale()); + BasicDecimalScalar128 y_neg(-y.value(), y.precision(), y.scale()); + return -AddLargePositive(x_neg, y_neg, out_scale); + } else { + // one positive and the other negative + return AddLargeNegative(x, y, out_scale); + } +} + +// Suppose we have a number that requires x bits to be represented and we scale it up by +// 10^scale_by. Let's say now y bits are required to represent it. This function returns +// the maximum possible y - x for a given 'scale_by'. +inline int32_t MaxBitsRequiredIncreaseAfterScaling(int32_t scale_by) { + // We rely on the following formula: + // bits_required(x * 10^y) <= bits_required(x) + floor(log2(10^y)) + 1 + // We precompute floor(log2(10^x)) + 1 for x = 0, 1, 2...75, 76 + DCHECK_GE(scale_by, 0); + DCHECK_LE(scale_by, 76); + static const int32_t floor_log2_plus_one[] = { + 0, 4, 7, 10, 14, 17, 20, 24, 27, 30, 34, 37, 40, 44, 47, 50, + 54, 57, 60, 64, 67, 70, 74, 77, 80, 84, 87, 90, 94, 97, 100, 103, + 107, 110, 113, 117, 120, 123, 127, 130, 133, 137, 140, 143, 147, 150, 153, 157, + 160, 163, 167, 170, 173, 177, 180, 183, 187, 190, 193, 196, 200, 203, 206, 210, + 213, 216, 220, 223, 226, 230, 233, 236, 240, 243, 246, 250, 253}; + return floor_log2_plus_one[scale_by]; +} + +// If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_by, +// this function returns the minimum number of leading zeros the result can have. +inline int32_t MinLeadingZerosAfterScaling(int32_t num_lz, int32_t scale_by) { + DCHECK_GE(scale_by, 0); + DCHECK_LE(scale_by, 76); + int32_t result = num_lz - MaxBitsRequiredIncreaseAfterScaling(scale_by); + return result; +} + +// Returns the maximum possible number of bits required to represent num * 10^scale_by. +inline int32_t MaxBitsRequiredAfterScaling(const BasicDecimalScalar128& num, + int32_t scale_by) { + auto value = num.value(); + auto value_abs = value.Abs(); + + int32_t num_occupied = 128 - value_abs.CountLeadingBinaryZeros(); + DCHECK_GE(scale_by, 0); + DCHECK_LE(scale_by, 76); + return num_occupied + MaxBitsRequiredIncreaseAfterScaling(scale_by); +} + +// Returns the minimum number of leading zero x or y would have after one of them gets +// scaled up to match the scale of the other one. +inline int32_t MinLeadingZeros(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y) { + auto x_value = x.value(); + auto x_value_abs = x_value.Abs(); + + auto y_value = y.value(); + auto y_value_abs = y_value.Abs(); + + int32_t x_lz = x_value_abs.CountLeadingBinaryZeros(); + int32_t y_lz = y_value_abs.CountLeadingBinaryZeros(); + if (x.scale() < y.scale()) { + x_lz = MinLeadingZerosAfterScaling(x_lz, y.scale() - x.scale()); + } else if (x.scale() > y.scale()) { + y_lz = MinLeadingZerosAfterScaling(y_lz, x.scale() - y.scale()); + } + return std::min(x_lz, y_lz); +} + +BasicDecimal128 Add(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, + int32_t out_precision, int32_t out_scale) { + if (out_precision < DecimalTypeUtil::kMaxPrecision) { + // fast-path add + return AddFastPath(x, y, out_scale); + } else { + int32_t min_lz = MinLeadingZeros(x, y); + if (min_lz >= 3) { + // If both numbers have at least MIN_LZ leading zeros, we can add them directly + // without the risk of overflow. + // We want the result to have at least 2 leading zeros, which ensures that it fits + // into the maximum decimal because 2^126 - 1 < 10^38 - 1. If both x and y have at + // least 3 leading zeros, then we are guaranteed that the result will have at lest 2 + // leading zeros. + return AddNoOverflow(x, y, out_scale); + } else { + // slower-version : add whole/fraction parts separately, and then, combine. + return AddLarge(x, y, out_scale); + } + } +} + +BasicDecimal128 Subtract(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, + int32_t out_precision, int32_t out_scale) { + return Add(x, {-y.value(), y.precision(), y.scale()}, out_precision, out_scale); +} + +// Multiply when the out_precision is 38, and there is no trimming of the scale i.e +// the intermediate value is the same as the final value. +static BasicDecimal128 MultiplyMaxPrecisionNoScaleDown(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale, + bool* overflow) { + DCHECK_EQ(x.scale() + y.scale(), out_scale); + + BasicDecimal128 result; + auto x_abs = BasicDecimal128::Abs(x.value()); + auto y_abs = BasicDecimal128::Abs(y.value()); + + if (x_abs > BasicDecimal128::GetMaxValue() / y_abs) { + *overflow = true; + } else { + // We've verified that the result will fit into 128 bits. + *overflow = false; + result = x.value() * y.value(); + } + return result; +} + +// Multiply when the out_precision is 38, and there is trimming of the scale i.e +// the intermediate value could be larger than the final value. +static BasicDecimal128 MultiplyMaxPrecisionAndScaleDown(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale, + bool* overflow) { + auto delta_scale = x.scale() + y.scale() - out_scale; + DCHECK_GT(delta_scale, 0); + + *overflow = false; + BasicDecimal128 result; + auto x_abs = BasicDecimal128::Abs(x.value()); + auto y_abs = BasicDecimal128::Abs(y.value()); + + // It's possible that the intermediate value does not fit in 128-bits, but the + // final value will (after scaling down). + bool needs_int256 = false; + int32_t total_leading_zeros = + x_abs.CountLeadingBinaryZeros() + y_abs.CountLeadingBinaryZeros(); + // This check is quick, but conservative. In some cases it will indicate that + // converting to 256 bits is necessary, when it's not actually the case. + needs_int256 = total_leading_zeros <= 128; + if (ARROW_PREDICT_FALSE(needs_int256)) { + int64_t result_high; + uint64_t result_low; + + // This requires converting to 256-bit, and we use the boost library for that. To + // avoid references to boost from the precompiled-to-ir code (this causes issues + // with symbol resolution at runtime), we use a wrapper exported from the CPP code. + gdv_xlarge_multiply_and_scale_down(x.value().high_bits(), x.value().low_bits(), + y.value().high_bits(), y.value().low_bits(), + delta_scale, &result_high, &result_low, overflow); + result = BasicDecimal128(result_high, result_low); + } else { + if (ARROW_PREDICT_TRUE(delta_scale <= 38)) { + // The largest value that result can have here is (2^64 - 1) * (2^63 - 1), which is + // greater than BasicDecimal128::kMaxValue. + result = x.value() * y.value(); + // Since delta_scale is greater than zero, result can now be at most + // ((2^64 - 1) * (2^63 - 1)) / 10, which is less than BasicDecimal128::kMaxValue, so + // there cannot be any overflow. + result = result.ReduceScaleBy(delta_scale); + } else { + // We are multiplying decimal(38, 38) by decimal(38, 38). The result should be a + // decimal(38, 37), so delta scale = 38 + 38 - 37 = 39. Since we are not in the + // 256 bit intermediate value case and we are scaling down by 39, then we are + // guaranteed that the result is 0 (even if we try to round). The largest possible + // intermediate result is 38 "9"s. If we scale down by 39, the leftmost 9 is now + // two digits to the right of the rightmost "visible" one. The reason why we have + // to handle this case separately is because a scale multiplier with a delta_scale + // 39 does not fit into 128 bit. + DCHECK_EQ(delta_scale, 39); + result = 0; + } + } + return result; +} + +// Multiply when the out_precision is 38. +static BasicDecimal128 MultiplyMaxPrecision(const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, + int32_t out_scale, bool* overflow) { + auto delta_scale = x.scale() + y.scale() - out_scale; + DCHECK_GE(delta_scale, 0); + if (delta_scale == 0) { + return MultiplyMaxPrecisionNoScaleDown(x, y, out_scale, overflow); + } else { + return MultiplyMaxPrecisionAndScaleDown(x, y, out_scale, overflow); + } +} + +BasicDecimal128 Multiply(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y, + int32_t out_precision, int32_t out_scale, bool* overflow) { + BasicDecimal128 result; + *overflow = false; + if (out_precision < DecimalTypeUtil::kMaxPrecision) { + // fast-path multiply + result = x.value() * y.value(); + DCHECK_EQ(x.scale() + y.scale(), out_scale); + DCHECK_LE(BasicDecimal128::Abs(result), BasicDecimal128::GetMaxValue()); + } else if (x.value() == 0 || y.value() == 0) { + // Handle this separately to avoid divide-by-zero errors. + result = BasicDecimal128(0, 0); + } else { + result = MultiplyMaxPrecision(x, y, out_scale, overflow); + } + DCHECK(*overflow || BasicDecimal128::Abs(result) <= BasicDecimal128::GetMaxValue()); + return result; +} + +BasicDecimal128 Divide(int64_t context, const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_precision, + int32_t out_scale, bool* overflow) { + if (y.value() == 0) { + char const* err_msg = "divide by zero error"; + gdv_fn_context_set_error_msg(context, err_msg); + return 0; + } + + // scale up to the output scale, and do an integer division. + int32_t delta_scale = out_scale + y.scale() - x.scale(); + DCHECK_GE(delta_scale, 0); + + BasicDecimal128 result; + auto num_bits_required_after_scaling = MaxBitsRequiredAfterScaling(x, delta_scale); + if (num_bits_required_after_scaling <= 127) { + // fast-path. The dividend fits in 128-bit after scaling too. + *overflow = false; + + // do the division. + auto x_scaled = CheckAndIncreaseScale(x.value(), delta_scale); + BasicDecimal128 remainder; + auto status = x_scaled.Divide(y.value(), &result, &remainder); + DCHECK_EQ(status, arrow::DecimalStatus::kSuccess); + + // round-up + if (BasicDecimal128::Abs(2 * remainder) >= BasicDecimal128::Abs(y.value())) { + result += (x.value().Sign() ^ y.value().Sign()) + 1; + } + } else { + // convert to 256-bit and do the divide. + *overflow = delta_scale > 38 && num_bits_required_after_scaling > 255; + if (!*overflow) { + int64_t result_high; + uint64_t result_low; + + gdv_xlarge_scale_up_and_divide(x.value().high_bits(), x.value().low_bits(), + y.value().high_bits(), y.value().low_bits(), + delta_scale, &result_high, &result_low, overflow); + result = BasicDecimal128(result_high, result_low); + } + } + return result; +} + +BasicDecimal128 Mod(int64_t context, const BasicDecimalScalar128& x, + const BasicDecimalScalar128& y, int32_t out_precision, + int32_t out_scale, bool* overflow) { + if (y.value() == 0) { + char const* err_msg = "divide by zero error"; + gdv_fn_context_set_error_msg(context, err_msg); + return 0; + } + + // Adsjust x and y to the same scale (higher one), and then, do a integer mod. + *overflow = false; + BasicDecimal128 result; + int32_t min_lz = MinLeadingZeros(x, y); + if (min_lz >= 2) { + auto higher_scale = std::max(x.scale(), y.scale()); + auto x_scaled = CheckAndIncreaseScale(x.value(), higher_scale - x.scale()); + auto y_scaled = CheckAndIncreaseScale(y.value(), higher_scale - y.scale()); + result = x_scaled % y_scaled; + DCHECK_LE(BasicDecimal128::Abs(result), BasicDecimal128::GetMaxValue()); + } else { + int64_t result_high; + uint64_t result_low; + + gdv_xlarge_mod(x.value().high_bits(), x.value().low_bits(), x.scale(), + y.value().high_bits(), y.value().low_bits(), y.scale(), &result_high, + &result_low); + result = BasicDecimal128(result_high, result_low); + } + DCHECK(BasicDecimal128::Abs(result) <= BasicDecimal128::Abs(x.value()) || + BasicDecimal128::Abs(result) <= BasicDecimal128::Abs(y.value())); + return result; +} + +int32_t CompareSameScale(const BasicDecimal128& x, const BasicDecimal128& y) { + if (x == y) { + return 0; + } else if (x < y) { + return -1; + } else { + return 1; + } +} + +int32_t Compare(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y) { + int32_t delta_scale = x.scale() - y.scale(); + + // fast-path : both are of the same scale. + if (delta_scale == 0) { + return CompareSameScale(x.value(), y.value()); + } + + // Check if we'll need more than 256-bits after adjusting the scale. + bool need256 = + (delta_scale < 0 && x.precision() - delta_scale > DecimalTypeUtil::kMaxPrecision) || + (y.precision() + delta_scale > DecimalTypeUtil::kMaxPrecision); + if (need256) { + return gdv_xlarge_compare(x.value().high_bits(), x.value().low_bits(), x.scale(), + y.value().high_bits(), y.value().low_bits(), y.scale()); + } else { + BasicDecimal128 x_scaled; + BasicDecimal128 y_scaled; + + if (delta_scale < 0) { + x_scaled = x.value().IncreaseScaleBy(-delta_scale); + y_scaled = y.value(); + } else { + x_scaled = x.value(); + y_scaled = y.value().IncreaseScaleBy(delta_scale); + } + return CompareSameScale(x_scaled, y_scaled); + } +} + +#define DECIMAL_OVERFLOW_IF(condition, overflow) \ + do { \ + if (*overflow || (condition)) { \ + *overflow = true; \ + return 0; \ + } \ + } while (0) + +static BasicDecimal128 GetMaxValue(int32_t precision) { + return BasicDecimal128::GetScaleMultiplier(precision) - 1; +} + +// Compute the double scale multipliers once. +static std::array<double, DecimalTypeUtil::kMaxPrecision + 1> kDoubleScaleMultipliers = + ([]() -> std::array<double, DecimalTypeUtil::kMaxPrecision + 1> { + std::array<double, DecimalTypeUtil::kMaxPrecision + 1> values; + values[0] = 1.0; + for (int32_t idx = 1; idx <= DecimalTypeUtil::kMaxPrecision; idx++) { + values[idx] = values[idx - 1] * 10; + } + return values; + })(); + +BasicDecimal128 FromDouble(double in, int32_t precision, int32_t scale, bool* overflow) { + // Multiply decimal with the scale + auto unscaled = in * kDoubleScaleMultipliers[scale]; + DECIMAL_OVERFLOW_IF(std::isnan(unscaled), overflow); + + unscaled = std::round(unscaled); + + // convert scaled double to int128 + int32_t sign = unscaled < 0 ? -1 : 1; + auto unscaled_abs = std::abs(unscaled); + + // overflow if > 2^127 - 1 + DECIMAL_OVERFLOW_IF(unscaled_abs > std::ldexp(static_cast<double>(1), 127) - 1, + overflow); + + uint64_t high_bits = static_cast<uint64_t>(std::ldexp(unscaled_abs, -64)); + uint64_t low_bits = static_cast<uint64_t>( + unscaled_abs - std::ldexp(static_cast<double>(high_bits), 64)); + + auto result = BasicDecimal128(static_cast<int64_t>(high_bits), low_bits); + + // overflow if > max value based on precision + DECIMAL_OVERFLOW_IF(result > GetMaxValue(precision), overflow); + return result * sign; +} + +double ToDouble(const BasicDecimalScalar128& in, bool* overflow) { + // convert int128 to double + int64_t sign = in.value().Sign(); + auto value_abs = BasicDecimal128::Abs(in.value()); + double unscaled = static_cast<double>(value_abs.low_bits()) + + std::ldexp(static_cast<double>(value_abs.high_bits()), 64); + + // scale double. + return (unscaled * sign) / kDoubleScaleMultipliers[in.scale()]; +} + +BasicDecimal128 FromInt64(int64_t in, int32_t precision, int32_t scale, bool* overflow) { + // check if multiplying by scale will cause an overflow. + DECIMAL_OVERFLOW_IF(std::abs(in) > GetMaxValue(precision - scale), overflow); + return in * BasicDecimal128::GetScaleMultiplier(scale); +} + +// Helper function to modify the scale and/or precision of a decimal value. +static BasicDecimal128 ModifyScaleAndPrecision(const BasicDecimalScalar128& x, + int32_t out_precision, int32_t out_scale, + bool* overflow) { + int32_t delta_scale = out_scale - x.scale(); + if (delta_scale >= 0) { + // check if multiplying by delta_scale will cause an overflow. + DECIMAL_OVERFLOW_IF( + BasicDecimal128::Abs(x.value()) > GetMaxValue(out_precision - delta_scale), + overflow); + return x.value().IncreaseScaleBy(delta_scale); + } else { + // Do not do any rounding, that is handled by the caller. + auto result = x.value().ReduceScaleBy(-delta_scale, false); + DECIMAL_OVERFLOW_IF(BasicDecimal128::Abs(result) > GetMaxValue(out_precision), + overflow); + return result; + } +} + +enum RoundType { + kRoundTypeCeil, // +1 if +ve and trailing value is > 0, else no rounding. + kRoundTypeFloor, // -1 if -ve and trailing value is < 0, else no rounding. + kRoundTypeTrunc, // no rounding, truncate the trailing digits. + kRoundTypeHalfRoundUp, // if +ve and trailing value is >= half of base, +1. + // else if -ve and trailing value is >= half of base, -1. +}; + +// Compute the rounding delta for the givven rounding type. +static int32_t ComputeRoundingDelta(const BasicDecimal128& x, int32_t x_scale, + int32_t out_scale, RoundType type) { + if (type == kRoundTypeTrunc || // no rounding for this type. + out_scale >= x_scale) { // no digits dropped, so no rounding. + return 0; + } + + int32_t result = 0; + switch (type) { + case kRoundTypeHalfRoundUp: { + auto base = BasicDecimal128::GetScaleMultiplier(x_scale - out_scale); + auto trailing = x % base; + if (trailing == 0) { + result = 0; + } else if (trailing.Abs() < base / 2) { + result = 0; + } else { + result = (x < 0) ? -1 : 1; + } + break; + } + + case kRoundTypeCeil: + if (x < 0) { + // no rounding for -ve + result = 0; + } else { + auto base = BasicDecimal128::GetScaleMultiplier(x_scale - out_scale); + auto trailing = x % base; + result = (trailing == 0) ? 0 : 1; + } + break; + + case kRoundTypeFloor: + if (x > 0) { + // no rounding for +ve + result = 0; + } else { + auto base = BasicDecimal128::GetScaleMultiplier(x_scale - out_scale); + auto trailing = x % base; + result = (trailing == 0) ? 0 : -1; + } + break; + + case kRoundTypeTrunc: + break; + } + return result; +} + +// Modify the scale and round. +static BasicDecimal128 RoundWithPositiveScale(const BasicDecimalScalar128& x, + int32_t out_precision, int32_t out_scale, + RoundType round_type, bool* overflow) { + DCHECK_GE(out_scale, 0); + + auto scaled = ModifyScaleAndPrecision(x, out_precision, out_scale, overflow); + if (*overflow) { + return 0; + } + + auto delta = ComputeRoundingDelta(x.value(), x.scale(), out_scale, round_type); + if (delta == 0) { + return scaled; + } + + // If there is a rounding delta, the output scale must be less than the input scale. + // That means at least one digit is dropped after the decimal. The delta add can add + // utmost one digit before the decimal. So, overflow will occur only if the output + // precision has changed. + DCHECK_GT(x.scale(), out_scale); + auto result = scaled + delta; + DECIMAL_OVERFLOW_IF(out_precision < x.precision() && + BasicDecimal128::Abs(result) > GetMaxValue(out_precision), + overflow); + return result; +} + +// Modify scale to drop all digits to the right of the decimal and round. +// Then, zero out 'rounding_scale' number of digits to the left of the decimal point. +static BasicDecimal128 RoundWithNegativeScale(const BasicDecimalScalar128& x, + int32_t out_precision, + int32_t rounding_scale, + RoundType round_type, bool* overflow) { + DCHECK_LT(rounding_scale, 0); + + // get rid of the fractional part. + auto scaled = ModifyScaleAndPrecision(x, out_precision, 0, overflow); + auto rounding_delta = ComputeRoundingDelta(scaled, 0, -rounding_scale, round_type); + + auto base = BasicDecimal128::GetScaleMultiplier(-rounding_scale); + auto delta = rounding_delta * base - (scaled % base); + DECIMAL_OVERFLOW_IF(BasicDecimal128::Abs(scaled) > + GetMaxValue(out_precision) - BasicDecimal128::Abs(delta), + overflow); + return scaled + delta; +} + +BasicDecimal128 Round(const BasicDecimalScalar128& x, int32_t out_precision, + int32_t out_scale, int32_t rounding_scale, bool* overflow) { + // no-op if target scale is same as arg scale + if (x.scale() == out_scale && rounding_scale >= 0) { + return x.value(); + } + + if (rounding_scale < 0) { + return RoundWithNegativeScale(x, out_precision, rounding_scale, + RoundType::kRoundTypeHalfRoundUp, overflow); + } else { + return RoundWithPositiveScale(x, out_precision, rounding_scale, + RoundType::kRoundTypeHalfRoundUp, overflow); + } +} + +BasicDecimal128 Truncate(const BasicDecimalScalar128& x, int32_t out_precision, + int32_t out_scale, int32_t rounding_scale, bool* overflow) { + // no-op if target scale is same as arg scale + if (x.scale() == out_scale && rounding_scale >= 0) { + return x.value(); + } + + if (rounding_scale < 0) { + return RoundWithNegativeScale(x, out_precision, rounding_scale, + RoundType::kRoundTypeTrunc, overflow); + } else { + return RoundWithPositiveScale(x, out_precision, rounding_scale, + RoundType::kRoundTypeTrunc, overflow); + } +} + +BasicDecimal128 Ceil(const BasicDecimalScalar128& x, bool* overflow) { + return RoundWithPositiveScale(x, x.precision(), 0, RoundType::kRoundTypeCeil, overflow); +} + +BasicDecimal128 Floor(const BasicDecimalScalar128& x, bool* overflow) { + return RoundWithPositiveScale(x, x.precision(), 0, RoundType::kRoundTypeFloor, + overflow); +} + +BasicDecimal128 Convert(const BasicDecimalScalar128& x, int32_t out_precision, + int32_t out_scale, bool* overflow) { + DCHECK_GE(out_scale, 0); + DCHECK_LE(out_scale, DecimalTypeUtil::kMaxScale); + DCHECK_GT(out_precision, 0); + DCHECK_LE(out_precision, DecimalTypeUtil::kMaxScale); + + return RoundWithPositiveScale(x, out_precision, out_scale, + RoundType::kRoundTypeHalfRoundUp, overflow); +} + +int64_t ToInt64(const BasicDecimalScalar128& in, bool* overflow) { + auto rounded = RoundWithPositiveScale(in, in.precision(), 0 /*scale*/, + RoundType::kRoundTypeHalfRoundUp, overflow); + DECIMAL_OVERFLOW_IF((rounded > std::numeric_limits<int64_t>::max()) || + (rounded < std::numeric_limits<int64_t>::min()), + overflow); + return static_cast<int64_t>(rounded.low_bits()); +} + +} // namespace decimalops +} // namespace gandiva |