/*--------------------------------------------------------------------------- * * Ryu floating-point output for double precision. * * Portions Copyright (c) 2018-2020, PostgreSQL Global Development Group * * IDENTIFICATION * src/common/d2s_intrinsics.h * * This is a modification of code taken from github.com/ulfjack/ryu under the * terms of the Boost license (not the Apache license). The original copyright * notice follows: * * Copyright 2018 Ulf Adams * * The contents of this file may be used under the terms of the Apache * License, Version 2.0. * * (See accompanying file LICENSE-Apache or copy at * http://www.apache.org/licenses/LICENSE-2.0) * * Alternatively, the contents of this file may be used under the terms of the * Boost Software License, Version 1.0. * * (See accompanying file LICENSE-Boost or copy at * https://www.boost.org/LICENSE_1_0.txt) * * Unless required by applicable law or agreed to in writing, this software is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. * *--------------------------------------------------------------------------- */ #ifndef RYU_D2S_INTRINSICS_H #define RYU_D2S_INTRINSICS_H #if defined(HAS_64_BIT_INTRINSICS) #include static inline uint64 umul128(const uint64 a, const uint64 b, uint64 *const productHi) { return _umul128(a, b, productHi); } static inline uint64 shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) { /* * For the __shiftright128 intrinsic, the shift value is always modulo 64. * In the current implementation of the double-precision version of Ryu, * the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0, * the shift value is in the range [49, 58]. Otherwise in the range [2, * 59].) Check this here in case a future change requires larger shift * values. In this case this function needs to be adjusted. */ Assert(dist < 64); return __shiftright128(lo, hi, (unsigned char) dist); } #else /* defined(HAS_64_BIT_INTRINSICS) */ static inline uint64 umul128(const uint64 a, const uint64 b, uint64 *const productHi) { /* * The casts here help MSVC to avoid calls to the __allmul library * function. */ const uint32 aLo = (uint32) a; const uint32 aHi = (uint32) (a >> 32); const uint32 bLo = (uint32) b; const uint32 bHi = (uint32) (b >> 32); const uint64 b00 = (uint64) aLo * bLo; const uint64 b01 = (uint64) aLo * bHi; const uint64 b10 = (uint64) aHi * bLo; const uint64 b11 = (uint64) aHi * bHi; const uint32 b00Lo = (uint32) b00; const uint32 b00Hi = (uint32) (b00 >> 32); const uint64 mid1 = b10 + b00Hi; const uint32 mid1Lo = (uint32) (mid1); const uint32 mid1Hi = (uint32) (mid1 >> 32); const uint64 mid2 = b01 + mid1Lo; const uint32 mid2Lo = (uint32) (mid2); const uint32 mid2Hi = (uint32) (mid2 >> 32); const uint64 pHi = b11 + mid1Hi + mid2Hi; const uint64 pLo = ((uint64) mid2Lo << 32) + b00Lo; *productHi = pHi; return pLo; } static inline uint64 shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) { /* We don't need to handle the case dist >= 64 here (see above). */ Assert(dist < 64); #if !defined(RYU_32_BIT_PLATFORM) Assert(dist > 0); return (hi << (64 - dist)) | (lo >> dist); #else /* Avoid a 64-bit shift by taking advantage of the range of shift values. */ Assert(dist >= 32); return (hi << (64 - dist)) | ((uint32) (lo >> 32) >> (dist - 32)); #endif } #endif /* // defined(HAS_64_BIT_INTRINSICS) */ #ifdef RYU_32_BIT_PLATFORM /* Returns the high 64 bits of the 128-bit product of a and b. */ static inline uint64 umulh(const uint64 a, const uint64 b) { /* * Reuse the umul128 implementation. Optimizers will likely eliminate the * instructions used to compute the low part of the product. */ uint64 hi; umul128(a, b, &hi); return hi; } /*---- * On 32-bit platforms, compilers typically generate calls to library * functions for 64-bit divisions, even if the divisor is a constant. * * E.g.: * https://bugs.llvm.org/show_bug.cgi?id=37932 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958 * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443 * * The functions here perform division-by-constant using multiplications * in the same way as 64-bit compilers would do. * * NB: * The multipliers and shift values are the ones generated by clang x64 * for expressions like x/5, x/10, etc. *---- */ static inline uint64 div5(const uint64 x) { return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 2; } static inline uint64 div10(const uint64 x) { return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 3; } static inline uint64 div100(const uint64 x) { return umulh(x >> 2, UINT64CONST(0x28F5C28F5C28F5C3)) >> 2; } static inline uint64 div1e8(const uint64 x) { return umulh(x, UINT64CONST(0xABCC77118461CEFD)) >> 26; } #else /* RYU_32_BIT_PLATFORM */ static inline uint64 div5(const uint64 x) { return x / 5; } static inline uint64 div10(const uint64 x) { return x / 10; } static inline uint64 div100(const uint64 x) { return x / 100; } static inline uint64 div1e8(const uint64 x) { return x / 100000000; } #endif /* RYU_32_BIT_PLATFORM */ #endif /* RYU_D2S_INTRINSICS_H */