diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /mfbt/FloatingPoint.h | |
parent | Initial commit. (diff) | |
download | thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'mfbt/FloatingPoint.h')
-rw-r--r-- | mfbt/FloatingPoint.h | 606 |
1 files changed, 606 insertions, 0 deletions
diff --git a/mfbt/FloatingPoint.h b/mfbt/FloatingPoint.h new file mode 100644 index 0000000000..f4ae36257b --- /dev/null +++ b/mfbt/FloatingPoint.h @@ -0,0 +1,606 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Various predicates and operations on IEEE-754 floating point types. */ + +#ifndef mozilla_FloatingPoint_h +#define mozilla_FloatingPoint_h + +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/Casting.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/MemoryChecking.h" +#include "mozilla/Types.h" + +#include <algorithm> +#include <climits> +#include <limits> +#include <stdint.h> + +namespace mozilla { + +/* + * It's reasonable to ask why we have this header at all. Don't isnan, + * copysign, the built-in comparison operators, and the like solve these + * problems? Unfortunately, they don't. We've found that various compilers + * (MSVC, MSVC when compiling with PGO, and GCC on OS X, at least) miscompile + * the standard methods in various situations, so we can't use them. Some of + * these compilers even have problems compiling seemingly reasonable bitwise + * algorithms! But with some care we've found algorithms that seem to not + * trigger those compiler bugs. + * + * For the aforementioned reasons, be very wary of making changes to any of + * these algorithms. If you must make changes, keep a careful eye out for + * compiler bustage, particularly PGO-specific bustage. + */ + +namespace detail { + +/* + * These implementations assume float/double are 32/64-bit single/double + * format number types compatible with the IEEE-754 standard. C++ doesn't + * require this, but we required it in implementations of these algorithms that + * preceded this header, so we shouldn't break anything to continue doing so. + */ +template <typename T> +struct FloatingPointTrait; + +template <> +struct FloatingPointTrait<float> { + protected: + using Bits = uint32_t; + + static constexpr unsigned kExponentWidth = 8; + static constexpr unsigned kSignificandWidth = 23; +}; + +template <> +struct FloatingPointTrait<double> { + protected: + using Bits = uint64_t; + + static constexpr unsigned kExponentWidth = 11; + static constexpr unsigned kSignificandWidth = 52; +}; + +} // namespace detail + +/* + * This struct contains details regarding the encoding of floating-point + * numbers that can be useful for direct bit manipulation. As of now, the + * template parameter has to be float or double. + * + * The nested typedef |Bits| is the unsigned integral type with the same size + * as T: uint32_t for float and uint64_t for double (static assertions + * double-check these assumptions). + * + * kExponentBias is the offset that is subtracted from the exponent when + * computing the value, i.e. one plus the opposite of the mininum possible + * exponent. + * kExponentShift is the shift that one needs to apply to retrieve the + * exponent component of the value. + * + * kSignBit contains a bits mask. Bit-and-ing with this mask will result in + * obtaining the sign bit. + * kExponentBits contains the mask needed for obtaining the exponent bits and + * kSignificandBits contains the mask needed for obtaining the significand + * bits. + * + * Full details of how floating point number formats are encoded are beyond + * the scope of this comment. For more information, see + * http://en.wikipedia.org/wiki/IEEE_floating_point + * http://en.wikipedia.org/wiki/Floating_point#IEEE_754:_floating_point_in_modern_computers + */ +template <typename T> +struct FloatingPoint final : private detail::FloatingPointTrait<T> { + private: + using Base = detail::FloatingPointTrait<T>; + + public: + /** + * An unsigned integral type suitable for accessing the bitwise representation + * of T. + */ + using Bits = typename Base::Bits; + + static_assert(sizeof(T) == sizeof(Bits), "Bits must be same size as T"); + + /** The bit-width of the exponent component of T. */ + using Base::kExponentWidth; + + /** The bit-width of the significand component of T. */ + using Base::kSignificandWidth; + + static_assert(1 + kExponentWidth + kSignificandWidth == CHAR_BIT * sizeof(T), + "sign bit plus bit widths should sum to overall bit width"); + + /** + * The exponent field in an IEEE-754 floating point number consists of bits + * encoding an unsigned number. The *actual* represented exponent (for all + * values finite and not denormal) is that value, minus a bias |kExponentBias| + * so that a useful range of numbers is represented. + */ + static constexpr unsigned kExponentBias = (1U << (kExponentWidth - 1)) - 1; + + /** + * The amount by which the bits of the exponent-field in an IEEE-754 floating + * point number are shifted from the LSB of the floating point type. + */ + static constexpr unsigned kExponentShift = kSignificandWidth; + + /** The sign bit in the floating point representation. */ + static constexpr Bits kSignBit = static_cast<Bits>(1) + << (CHAR_BIT * sizeof(Bits) - 1); + + /** The exponent bits in the floating point representation. */ + static constexpr Bits kExponentBits = + ((static_cast<Bits>(1) << kExponentWidth) - 1) << kSignificandWidth; + + /** The significand bits in the floating point representation. */ + static constexpr Bits kSignificandBits = + (static_cast<Bits>(1) << kSignificandWidth) - 1; + + static_assert((kSignBit & kExponentBits) == 0, + "sign bit shouldn't overlap exponent bits"); + static_assert((kSignBit & kSignificandBits) == 0, + "sign bit shouldn't overlap significand bits"); + static_assert((kExponentBits & kSignificandBits) == 0, + "exponent bits shouldn't overlap significand bits"); + + static_assert((kSignBit | kExponentBits | kSignificandBits) == ~Bits(0), + "all bits accounted for"); +}; + +/** + * Determines whether a float/double is negative or -0. It is an error + * to call this method on a float/double which is NaN. + */ +template <typename T> +static MOZ_ALWAYS_INLINE bool IsNegative(T aValue) { + MOZ_ASSERT(!std::isnan(aValue), "NaN does not have a sign"); + return std::signbit(aValue); +} + +/** Determines whether a float/double represents -0. */ +template <typename T> +static MOZ_ALWAYS_INLINE bool IsNegativeZero(T aValue) { + /* Only the sign bit is set if the value is -0. */ + typedef FloatingPoint<T> Traits; + typedef typename Traits::Bits Bits; + Bits bits = BitwiseCast<Bits>(aValue); + return bits == Traits::kSignBit; +} + +/** Determines wether a float/double represents +0. */ +template <typename T> +static MOZ_ALWAYS_INLINE bool IsPositiveZero(T aValue) { + /* All bits are zero if the value is +0. */ + typedef FloatingPoint<T> Traits; + typedef typename Traits::Bits Bits; + Bits bits = BitwiseCast<Bits>(aValue); + return bits == 0; +} + +/** + * Returns 0 if a float/double is NaN or infinite; + * otherwise, the float/double is returned. + */ +template <typename T> +static MOZ_ALWAYS_INLINE T ToZeroIfNonfinite(T aValue) { + return std::isfinite(aValue) ? aValue : 0; +} + +/** + * Returns the exponent portion of the float/double. + * + * Zero is not special-cased, so ExponentComponent(0.0) is + * -int_fast16_t(Traits::kExponentBias). + */ +template <typename T> +static MOZ_ALWAYS_INLINE int_fast16_t ExponentComponent(T aValue) { + /* + * The exponent component of a float/double is an unsigned number, biased + * from its actual value. Subtract the bias to retrieve the actual exponent. + */ + typedef FloatingPoint<T> Traits; + typedef typename Traits::Bits Bits; + Bits bits = BitwiseCast<Bits>(aValue); + return int_fast16_t((bits & Traits::kExponentBits) >> + Traits::kExponentShift) - + int_fast16_t(Traits::kExponentBias); +} + +/** Returns +Infinity. */ +template <typename T> +static MOZ_ALWAYS_INLINE T PositiveInfinity() { + /* + * Positive infinity has all exponent bits set, sign bit set to 0, and no + * significand. + */ + typedef FloatingPoint<T> Traits; + return BitwiseCast<T>(Traits::kExponentBits); +} + +/** Returns -Infinity. */ +template <typename T> +static MOZ_ALWAYS_INLINE T NegativeInfinity() { + /* + * Negative infinity has all exponent bits set, sign bit set to 1, and no + * significand. + */ + typedef FloatingPoint<T> Traits; + return BitwiseCast<T>(Traits::kSignBit | Traits::kExponentBits); +} + +/** + * Computes the bit pattern for an infinity with the specified sign bit. + */ +template <typename T, int SignBit> +struct InfinityBits { + using Traits = FloatingPoint<T>; + + static_assert(SignBit == 0 || SignBit == 1, "bad sign bit"); + static constexpr typename Traits::Bits value = + (SignBit * Traits::kSignBit) | Traits::kExponentBits; +}; + +/** + * Computes the bit pattern for a NaN with the specified sign bit and + * significand bits. + */ +template <typename T, int SignBit, typename FloatingPoint<T>::Bits Significand> +struct SpecificNaNBits { + using Traits = FloatingPoint<T>; + + static_assert(SignBit == 0 || SignBit == 1, "bad sign bit"); + static_assert((Significand & ~Traits::kSignificandBits) == 0, + "significand must only have significand bits set"); + static_assert(Significand & Traits::kSignificandBits, + "significand must be nonzero"); + + static constexpr typename Traits::Bits value = + (SignBit * Traits::kSignBit) | Traits::kExponentBits | Significand; +}; + +/** + * Constructs a NaN value with the specified sign bit and significand bits. + * + * There is also a variant that returns the value directly. In most cases, the + * two variants should be identical. However, in the specific case of x86 + * chips, the behavior differs: returning floating-point values directly is done + * through the x87 stack, and x87 loads and stores turn signaling NaNs into + * quiet NaNs... silently. Returning floating-point values via outparam, + * however, is done entirely within the SSE registers when SSE2 floating-point + * is enabled in the compiler, which has semantics-preserving behavior you would + * expect. + * + * If preserving the distinction between signaling NaNs and quiet NaNs is + * important to you, you should use the outparam version. In all other cases, + * you should use the direct return version. + */ +template <typename T> +static MOZ_ALWAYS_INLINE void SpecificNaN( + int signbit, typename FloatingPoint<T>::Bits significand, T* result) { + typedef FloatingPoint<T> Traits; + MOZ_ASSERT(signbit == 0 || signbit == 1); + MOZ_ASSERT((significand & ~Traits::kSignificandBits) == 0); + MOZ_ASSERT(significand & Traits::kSignificandBits); + + BitwiseCast<T>( + (signbit ? Traits::kSignBit : 0) | Traits::kExponentBits | significand, + result); + MOZ_ASSERT(std::isnan(*result)); +} + +template <typename T> +static MOZ_ALWAYS_INLINE T +SpecificNaN(int signbit, typename FloatingPoint<T>::Bits significand) { + T t; + SpecificNaN(signbit, significand, &t); + return t; +} + +/** Computes the smallest non-zero positive float/double value. */ +template <typename T> +static MOZ_ALWAYS_INLINE T MinNumberValue() { + typedef FloatingPoint<T> Traits; + typedef typename Traits::Bits Bits; + return BitwiseCast<T>(Bits(1)); +} + +namespace detail { + +template <typename Float, typename SignedInteger> +inline bool NumberEqualsSignedInteger(Float aValue, SignedInteger* aInteger) { + static_assert(std::is_same_v<Float, float> || std::is_same_v<Float, double>, + "Float must be an IEEE-754 floating point type"); + static_assert(std::is_signed_v<SignedInteger>, + "this algorithm only works for signed types: a different one " + "will be required for unsigned types"); + static_assert(sizeof(SignedInteger) >= sizeof(int), + "this function *might* require some finessing for signed types " + "subject to integral promotion before it can be used on them"); + + MOZ_MAKE_MEM_UNDEFINED(aInteger, sizeof(*aInteger)); + + // NaNs and infinities are not integers. + if (!std::isfinite(aValue)) { + return false; + } + + // Otherwise do direct comparisons against the minimum/maximum |SignedInteger| + // values that can be encoded in |Float|. + + constexpr SignedInteger MaxIntValue = + std::numeric_limits<SignedInteger>::max(); // e.g. INT32_MAX + constexpr SignedInteger MinValue = + std::numeric_limits<SignedInteger>::min(); // e.g. INT32_MIN + + static_assert(IsPowerOfTwo(Abs(MinValue)), + "MinValue should be is a small power of two, thus exactly " + "representable in float/double both"); + + constexpr unsigned SignedIntegerWidth = CHAR_BIT * sizeof(SignedInteger); + constexpr unsigned ExponentShift = FloatingPoint<Float>::kExponentShift; + + // Careful! |MaxIntValue| may not be the maximum |SignedInteger| value that + // can be encoded in |Float|. Its |SignedIntegerWidth - 1| bits of precision + // may exceed |Float|'s |ExponentShift + 1| bits of precision. If necessary, + // compute the maximum |SignedInteger| that fits in |Float| from IEEE-754 + // first principles. (|MinValue| doesn't have this problem because as a + // [relatively] small power of two it's always representable in |Float|.) + + // Per C++11 [expr.const]p2, unevaluated subexpressions of logical AND/OR and + // conditional expressions *may* contain non-constant expressions, without + // making the enclosing expression not constexpr. MSVC implements this -- but + // it sometimes warns about undefined behavior in unevaluated subexpressions. + // This bites us if we initialize |MaxValue| the obvious way including an + // |uint64_t(1) << (SignedIntegerWidth - 2 - ExponentShift)| subexpression. + // Pull that shift-amount out and give it a not-too-huge value when it's in an + // unevaluated subexpression. 🙄 + constexpr unsigned PrecisionExceededShiftAmount = + ExponentShift > SignedIntegerWidth - 1 + ? 0 + : SignedIntegerWidth - 2 - ExponentShift; + + constexpr SignedInteger MaxValue = + ExponentShift > SignedIntegerWidth - 1 + ? MaxIntValue + : SignedInteger((uint64_t(1) << (SignedIntegerWidth - 1)) - + (uint64_t(1) << PrecisionExceededShiftAmount)); + + if (static_cast<Float>(MinValue) <= aValue && + aValue <= static_cast<Float>(MaxValue)) { + auto possible = static_cast<SignedInteger>(aValue); + if (static_cast<Float>(possible) == aValue) { + *aInteger = possible; + return true; + } + } + + return false; +} + +template <typename Float, typename SignedInteger> +inline bool NumberIsSignedInteger(Float aValue, SignedInteger* aInteger) { + static_assert(std::is_same_v<Float, float> || std::is_same_v<Float, double>, + "Float must be an IEEE-754 floating point type"); + static_assert(std::is_signed_v<SignedInteger>, + "this algorithm only works for signed types: a different one " + "will be required for unsigned types"); + static_assert(sizeof(SignedInteger) >= sizeof(int), + "this function *might* require some finessing for signed types " + "subject to integral promotion before it can be used on them"); + + MOZ_MAKE_MEM_UNDEFINED(aInteger, sizeof(*aInteger)); + + if (IsNegativeZero(aValue)) { + return false; + } + + return NumberEqualsSignedInteger(aValue, aInteger); +} + +} // namespace detail + +/** + * If |aValue| is identical to some |int32_t| value, set |*aInt32| to that value + * and return true. Otherwise return false, leaving |*aInt32| in an + * indeterminate state. + * + * This method returns false for negative zero. If you want to consider -0 to + * be 0, use NumberEqualsInt32 below. + */ +template <typename T> +static MOZ_ALWAYS_INLINE bool NumberIsInt32(T aValue, int32_t* aInt32) { + return detail::NumberIsSignedInteger(aValue, aInt32); +} + +/** + * If |aValue| is identical to some |int64_t| value, set |*aInt64| to that value + * and return true. Otherwise return false, leaving |*aInt64| in an + * indeterminate state. + * + * This method returns false for negative zero. If you want to consider -0 to + * be 0, use NumberEqualsInt64 below. + */ +template <typename T> +static MOZ_ALWAYS_INLINE bool NumberIsInt64(T aValue, int64_t* aInt64) { + return detail::NumberIsSignedInteger(aValue, aInt64); +} + +/** + * If |aValue| is equal to some int32_t value (where -0 and +0 are considered + * equal), set |*aInt32| to that value and return true. Otherwise return false, + * leaving |*aInt32| in an indeterminate state. + * + * |NumberEqualsInt32(-0.0, ...)| will return true. To test whether a value can + * be losslessly converted to |int32_t| and back, use NumberIsInt32 above. + */ +template <typename T> +static MOZ_ALWAYS_INLINE bool NumberEqualsInt32(T aValue, int32_t* aInt32) { + return detail::NumberEqualsSignedInteger(aValue, aInt32); +} + +/** + * If |aValue| is equal to some int64_t value (where -0 and +0 are considered + * equal), set |*aInt64| to that value and return true. Otherwise return false, + * leaving |*aInt64| in an indeterminate state. + * + * |NumberEqualsInt64(-0.0, ...)| will return true. To test whether a value can + * be losslessly converted to |int64_t| and back, use NumberIsInt64 above. + */ +template <typename T> +static MOZ_ALWAYS_INLINE bool NumberEqualsInt64(T aValue, int64_t* aInt64) { + return detail::NumberEqualsSignedInteger(aValue, aInt64); +} + +/** + * Computes a NaN value. Do not use this method if you depend upon a particular + * NaN value being returned. + */ +template <typename T> +static MOZ_ALWAYS_INLINE T UnspecifiedNaN() { + /* + * If we can use any quiet NaN, we might as well use the all-ones NaN, + * since it's cheap to materialize on common platforms (such as x64, where + * this value can be represented in a 32-bit signed immediate field, allowing + * it to be stored to memory in a single instruction). + */ + typedef FloatingPoint<T> Traits; + return SpecificNaN<T>(1, Traits::kSignificandBits); +} + +/** + * Compare two doubles for equality, *without* equating -0 to +0, and equating + * any NaN value to any other NaN value. (The normal equality operators equate + * -0 with +0, and they equate NaN to no other value.) + */ +template <typename T> +static inline bool NumbersAreIdentical(T aValue1, T aValue2) { + using Bits = typename FloatingPoint<T>::Bits; + if (std::isnan(aValue1)) { + return std::isnan(aValue2); + } + return BitwiseCast<Bits>(aValue1) == BitwiseCast<Bits>(aValue2); +} + +/** + * Compare two floating point values for bit-wise equality. + */ +template <typename T> +static inline bool NumbersAreBitwiseIdentical(T aValue1, T aValue2) { + using Bits = typename FloatingPoint<T>::Bits; + return BitwiseCast<Bits>(aValue1) == BitwiseCast<Bits>(aValue2); +} + +/** + * Return true iff |aValue| and |aValue2| are equal (ignoring sign if both are + * zero) or both NaN. + */ +template <typename T> +static inline bool EqualOrBothNaN(T aValue1, T aValue2) { + if (std::isnan(aValue1)) { + return std::isnan(aValue2); + } + return aValue1 == aValue2; +} + +/** + * Return NaN if either |aValue1| or |aValue2| is NaN, or the minimum of + * |aValue1| and |aValue2| otherwise. + */ +template <typename T> +static inline T NaNSafeMin(T aValue1, T aValue2) { + if (std::isnan(aValue1) || std::isnan(aValue2)) { + return UnspecifiedNaN<T>(); + } + return std::min(aValue1, aValue2); +} + +/** + * Return NaN if either |aValue1| or |aValue2| is NaN, or the maximum of + * |aValue1| and |aValue2| otherwise. + */ +template <typename T> +static inline T NaNSafeMax(T aValue1, T aValue2) { + if (std::isnan(aValue1) || std::isnan(aValue2)) { + return UnspecifiedNaN<T>(); + } + return std::max(aValue1, aValue2); +} + +namespace detail { + +template <typename T> +struct FuzzyEqualsEpsilon; + +template <> +struct FuzzyEqualsEpsilon<float> { + // A number near 1e-5 that is exactly representable in a float. + static float value() { return 1.0f / (1 << 17); } +}; + +template <> +struct FuzzyEqualsEpsilon<double> { + // A number near 1e-12 that is exactly representable in a double. + static double value() { return 1.0 / (1LL << 40); } +}; + +} // namespace detail + +/** + * Compare two floating point values for equality, modulo rounding error. That + * is, the two values are considered equal if they are both not NaN and if they + * are less than or equal to aEpsilon apart. The default value of aEpsilon is + * near 1e-5. + * + * For most scenarios you will want to use FuzzyEqualsMultiplicative instead, + * as it is more reasonable over the entire range of floating point numbers. + * This additive version should only be used if you know the range of the + * numbers you are dealing with is bounded and stays around the same order of + * magnitude. + */ +template <typename T> +static MOZ_ALWAYS_INLINE bool FuzzyEqualsAdditive( + T aValue1, T aValue2, T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value()) { + static_assert(std::is_floating_point_v<T>, "floating point type required"); + return Abs(aValue1 - aValue2) <= aEpsilon; +} + +/** + * Compare two floating point values for equality, allowing for rounding error + * relative to the magnitude of the values. That is, the two values are + * considered equal if they are both not NaN and they are less than or equal to + * some aEpsilon apart, where the aEpsilon is scaled by the smaller of the two + * argument values. + * + * In most cases you will want to use this rather than FuzzyEqualsAdditive, as + * this function effectively masks out differences in the bottom few bits of + * the floating point numbers being compared, regardless of what order of + * magnitude those numbers are at. + */ +template <typename T> +static MOZ_ALWAYS_INLINE bool FuzzyEqualsMultiplicative( + T aValue1, T aValue2, T aEpsilon = detail::FuzzyEqualsEpsilon<T>::value()) { + static_assert(std::is_floating_point_v<T>, "floating point type required"); + // can't use std::min because of bug 965340 + T smaller = Abs(aValue1) < Abs(aValue2) ? Abs(aValue1) : Abs(aValue2); + return Abs(aValue1 - aValue2) <= aEpsilon * smaller; +} + +/** + * Returns true if |aValue| can be losslessly represented as an IEEE-754 single + * precision number, false otherwise. All NaN values are considered + * representable (even though the bit patterns of double precision NaNs can't + * all be exactly represented in single precision). + */ +[[nodiscard]] extern MFBT_API bool IsFloat32Representable(double aValue); + +} /* namespace mozilla */ + +#endif /* mozilla_FloatingPoint_h */ |