diff options
Diffstat (limited to 'third_party/xsimd/include/xsimd/arch/xsimd_scalar.hpp')
-rw-r--r-- | third_party/xsimd/include/xsimd/arch/xsimd_scalar.hpp | 1043 |
1 files changed, 1043 insertions, 0 deletions
diff --git a/third_party/xsimd/include/xsimd/arch/xsimd_scalar.hpp b/third_party/xsimd/include/xsimd/arch/xsimd_scalar.hpp new file mode 100644 index 0000000000..d5116cbd71 --- /dev/null +++ b/third_party/xsimd/include/xsimd/arch/xsimd_scalar.hpp @@ -0,0 +1,1043 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_SCALAR_HPP +#define XSIMD_SCALAR_HPP + +#include <cassert> +#include <cmath> +#include <complex> +#include <cstdint> +#include <cstring> +#include <limits> +#include <type_traits> + +#ifdef XSIMD_ENABLE_XTL_COMPLEX +#include "xtl/xcomplex.hpp" +#endif + +namespace xsimd +{ + template <class T, class A> + class batch; + template <class T, class A> + class batch_bool; + + using std::abs; + + using std::acos; + using std::acosh; + using std::arg; + using std::asin; + using std::asinh; + using std::atan; + using std::atan2; + using std::atanh; + using std::cbrt; + using std::ceil; + using std::conj; + using std::copysign; + using std::cos; + using std::cosh; + using std::erf; + using std::erfc; + using std::exp; + using std::exp2; + using std::expm1; + using std::fabs; + using std::fdim; + using std::floor; + using std::fmax; + using std::fmin; + using std::fmod; + using std::hypot; + using std::ldexp; + using std::lgamma; + using std::log; + using std::log10; + using std::log1p; + using std::log2; + using std::modf; + using std::nearbyint; + using std::nextafter; + using std::norm; + using std::polar; + using std::proj; + using std::remainder; + using std::rint; + using std::round; + using std::sin; + using std::sinh; + using std::sqrt; + using std::tan; + using std::tanh; + using std::tgamma; + using std::trunc; + +#ifndef _WIN32 + using std::isfinite; + using std::isinf; + using std::isnan; +#else + + // Windows defines catch all templates + template <class T> + inline typename std::enable_if<std::is_floating_point<T>::value, bool>::type + isfinite(T var) noexcept + { + return std::isfinite(var); + } + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, bool>::type + isfinite(T var) noexcept + { + return isfinite(double(var)); + } + + template <class T> + inline typename std::enable_if<std::is_floating_point<T>::value, bool>::type + isinf(T var) noexcept + { + return std::isinf(var); + } + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, bool>::type + isinf(T var) noexcept + { + return isinf(double(var)); + } + + template <class T> + inline typename std::enable_if<std::is_floating_point<T>::value, bool>::type + isnan(T var) noexcept + { + return std::isnan(var); + } + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, bool>::type + isnan(T var) noexcept + { + return isnan(double(var)); + } +#endif + + template <class T, class Tp> + inline auto add(T const& x, Tp const& y) noexcept -> decltype(x + y) + { + return x + y; + } + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, T>::type + bitwise_and(T x, T y) noexcept + { + return x & y; + } + + inline float bitwise_and(float x, float y) noexcept + { + uint32_t ix, iy; + std::memcpy((void*)&ix, (void*)&x, sizeof(float)); + std::memcpy((void*)&iy, (void*)&y, sizeof(float)); + uint32_t ir = bitwise_and(ix, iy); + float r; + std::memcpy((void*)&r, (void*)&ir, sizeof(float)); + return r; + } + + inline double bitwise_and(double x, double y) noexcept + { + uint64_t ix, iy; + std::memcpy((void*)&ix, (void*)&x, sizeof(double)); + std::memcpy((void*)&iy, (void*)&y, sizeof(double)); + uint64_t ir = bitwise_and(ix, iy); + double r; + std::memcpy((void*)&r, (void*)&ir, sizeof(double)); + return r; + } + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, T>::type + bitwise_andnot(T x, T y) noexcept + { + return x & ~y; + } + + inline float bitwise_andnot(float x, float y) noexcept + { + uint32_t ix, iy; + std::memcpy((void*)&ix, (void*)&x, sizeof(float)); + std::memcpy((void*)&iy, (void*)&y, sizeof(float)); + uint32_t ir = bitwise_andnot(ix, iy); + float r; + std::memcpy((void*)&r, (void*)&ir, sizeof(float)); + return r; + } + + inline double bitwise_andnot(double x, double y) noexcept + { + uint64_t ix, iy; + std::memcpy((void*)&ix, (void*)&x, sizeof(double)); + std::memcpy((void*)&iy, (void*)&y, sizeof(double)); + uint64_t ir = bitwise_andnot(ix, iy); + double r; + std::memcpy((void*)&r, (void*)&ir, sizeof(double)); + return r; + } + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, T>::type + bitwise_not(T x) noexcept + { + return ~x; + } + + inline float bitwise_not(float x) noexcept + { + uint32_t ix; + std::memcpy((void*)&ix, (void*)&x, sizeof(float)); + uint32_t ir = bitwise_not(ix); + float r; + std::memcpy((void*)&r, (void*)&ir, sizeof(float)); + return r; + } + + inline double bitwise_not(double x) noexcept + { + uint64_t ix; + std::memcpy((void*)&ix, (void*)&x, sizeof(double)); + uint64_t ir = bitwise_not(ix); + double r; + std::memcpy((void*)&r, (void*)&ir, sizeof(double)); + return r; + } + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, T>::type + bitwise_or(T x, T y) noexcept + { + return x | y; + } + + inline float bitwise_or(float x, float y) noexcept + { + uint32_t ix, iy; + std::memcpy((void*)&ix, (void*)&x, sizeof(float)); + std::memcpy((void*)&iy, (void*)&y, sizeof(float)); + uint32_t ir = bitwise_or(ix, iy); + float r; + std::memcpy((void*)&r, (void*)&ir, sizeof(float)); + return r; + } + + inline double bitwise_or(double x, double y) noexcept + { + uint64_t ix, iy; + std::memcpy((void*)&ix, (void*)&x, sizeof(double)); + std::memcpy((void*)&iy, (void*)&y, sizeof(double)); + uint64_t ir = bitwise_or(ix, iy); + double r; + std::memcpy((void*)&r, (void*)&ir, sizeof(double)); + return r; + } + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, T>::type + bitwise_xor(T x, T y) noexcept + { + return x ^ y; + } + + inline float bitwise_xor(float x, float y) noexcept + { + uint32_t ix, iy; + std::memcpy((void*)&ix, (void*)&x, sizeof(float)); + std::memcpy((void*)&iy, (void*)&y, sizeof(float)); + uint32_t ir = bitwise_xor(ix, iy); + float r; + std::memcpy((void*)&r, (void*)&ir, sizeof(float)); + return r; + } + + inline double bitwise_xor(double x, double y) noexcept + { + uint64_t ix, iy; + std::memcpy((void*)&ix, (void*)&x, sizeof(double)); + std::memcpy((void*)&iy, (void*)&y, sizeof(double)); + uint64_t ir = bitwise_xor(ix, iy); + double r; + std::memcpy((void*)&r, (void*)&ir, sizeof(double)); + return r; + } + + template <class T, class Tp> + inline auto div(T const& x, Tp const& y) noexcept -> decltype(x / y) + { + return x / y; + } + + template <class T, class Tp> + inline auto mod(T const& x, Tp const& y) noexcept -> decltype(x % y) + { + return x % y; + } + + template <class T, class Tp> + inline auto mul(T const& x, Tp const& y) noexcept -> decltype(x * y) + { + return x * y; + } + + template <class T> + inline auto neg(T const& x) noexcept -> decltype(-x) + { + return -x; + } + + template <class T> + inline auto pos(T const& x) noexcept -> decltype(+x) + { + return +x; + } + + inline float reciprocal(float const& x) noexcept + { + return 1.f / x; + } + + inline double reciprocal(double const& x) noexcept + { + return 1. / x; + } + +#ifdef XSIMD_ENABLE_NUMPY_COMPLEX + template <class T> + inline bool isnan(std::complex<T> var) noexcept + { + return std::isnan(std::real(var)) || std::isnan(std::imag(var)); + } + + template <class T> + inline bool isinf(std::complex<T> var) noexcept + { + return std::isinf(std::real(var)) || std::isinf(std::imag(var)); + } +#endif + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + using xtl::abs; + using xtl::acos; + using xtl::acosh; + using xtl::asin; + using xtl::asinh; + using xtl::atan; + using xtl::atanh; + using xtl::cos; + using xtl::cosh; + using xtl::exp; + using xtl::log; + using xtl::log10; + using xtl::norm; + using xtl::pow; + using xtl::proj; + using xtl::sin; + using xtl::sinh; + using xtl::sqrt; + using xtl::tan; + using xtl::tanh; +#endif + + template <typename T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline T clip(const T& val, const T& low, const T& hi) noexcept + { + assert(low <= hi && "ordered clipping bounds"); + return low > val ? low : (hi < val ? hi : val); + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool is_flint(const T& x) noexcept + { + return std::isnan(x - x) ? false : (x - std::trunc(x)) == T(0); + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool is_even(const T& x) noexcept + { + return is_flint(x * T(0.5)); + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool is_odd(const T& x) noexcept + { + return is_even(x - 1.); + } + + inline int32_t nearbyint_as_int(float var) noexcept + { + return static_cast<int32_t>(std::nearbyint(var)); + } + + inline int64_t nearbyint_as_int(double var) noexcept + { + return static_cast<int64_t>(std::nearbyint(var)); + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool eq(const T& x0, const T& x1) noexcept + { + return x0 == x1; + } + + template <class T> + inline bool eq(const std::complex<T>& x0, const std::complex<T>& x1) noexcept + { + return x0 == x1; + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool ge(const T& x0, const T& x1) noexcept + { + return x0 >= x1; + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool gt(const T& x0, const T& x1) noexcept + { + return x0 > x1; + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool le(const T& x0, const T& x1) noexcept + { + return x0 <= x1; + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool lt(const T& x0, const T& x1) noexcept + { + return x0 < x1; + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool neq(const T& x0, const T& x1) noexcept + { + return x0 != x1; + } + + template <class T> + inline bool neq(const std::complex<T>& x0, const std::complex<T>& x1) noexcept + { + return !(x0 == x1); + } + +#if defined(__APPLE__) + inline float exp10(const float& x) noexcept + { + return __exp10f(x); + } + inline double exp10(const double& x) noexcept + { + return __exp10(x); + } +#elif defined(__GLIBC__) + inline float exp10(const float& x) noexcept + { + return ::exp10f(x); + } + inline double exp10(const double& x) noexcept + { + return ::exp10(x); + } +#elif defined(_WIN32) + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline T exp10(const T& x) noexcept + { + // Very inefficient but other implementations give incorrect results + // on Windows + return std::pow(T(10), x); + } +#else + inline float exp10(const float& x) noexcept + { + return std::exp(0x1.26bb1cp+1f * x); + } + inline double exp10(const double& x) noexcept + { + return std::exp(0x1.26bb1bbb55516p+1 * x); + } +#endif + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline auto rsqrt(const T& x) noexcept -> decltype(std::sqrt(x)) + { + using float_type = decltype(std::sqrt(x)); + return static_cast<float_type>(1) / std::sqrt(x); + } + + namespace detail + { + template <class C> + inline C expm1_complex_scalar_impl(const C& val) noexcept + { + using T = typename C::value_type; + T isin = std::sin(val.imag()); + T rem1 = std::expm1(val.real()); + T re = rem1 + T(1.); + T si = std::sin(val.imag() * T(0.5)); + return std::complex<T>(rem1 - T(2.) * re * si * si, re * isin); + } + } + + template <class T> + inline std::complex<T> expm1(const std::complex<T>& val) noexcept + { + return detail::expm1_complex_scalar_impl(val); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + inline xtl::xcomplex<T, T, i3ec> expm1(const xtl::xcomplex<T, T, i3ec>& val) noexcept + { + return detail::expm1_complex_scalar_impl(val); + } +#endif + + namespace detail + { + template <class C> + inline C log1p_complex_scalar_impl(const C& val) noexcept + { + using T = typename C::value_type; + C u = C(1.) + val; + return u == C(1.) ? val : (u.real() <= T(0.) ? log(u) : log(u) * val / (u - C(1.))); + } + } + + template <class T> + inline std::complex<T> log1p(const std::complex<T>& val) noexcept + { + return detail::log1p_complex_scalar_impl(val); + } + + template <class T> + inline std::complex<T> log2(const std::complex<T>& val) noexcept + { + return log(val) / std::log(T(2)); + } + + template <typename T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline T sadd(const T& lhs, const T& rhs) noexcept + { + if (std::numeric_limits<T>::is_signed) + { + if ((lhs > 0) && (rhs > std::numeric_limits<T>::max() - lhs)) + { + return std::numeric_limits<T>::max(); + } + else if ((lhs < 0) && (rhs < std::numeric_limits<T>::lowest() - lhs)) + { + return std::numeric_limits<T>::lowest(); + } + else + { + return lhs + rhs; + } + } + else + { + if (rhs > std::numeric_limits<T>::max() - lhs) + { + return std::numeric_limits<T>::max(); + } + else + { + return lhs + rhs; + } + } + } + + template <typename T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline T ssub(const T& lhs, const T& rhs) noexcept + { + if (std::numeric_limits<T>::is_signed) + { + return sadd(lhs, (T)-rhs); + } + else + { + if (lhs < rhs) + { + return std::numeric_limits<T>::lowest(); + } + else + { + return lhs - rhs; + } + } + } + + namespace detail + { + template <class T> + struct value_type_or_type_helper + { + using type = T; + }; + template <class T, class A> + struct value_type_or_type_helper<batch<T, A>> + { + using type = T; + }; + + template <class T> + using value_type_or_type = typename value_type_or_type_helper<T>::type; + + template <class T0, class T1> + inline typename std::enable_if<std::is_integral<T1>::value, T0>::type + ipow(const T0& x, const T1& n) noexcept + { + static_assert(std::is_integral<T1>::value, "second argument must be an integer"); + T0 a = x; + T1 b = n; + bool const recip = b < 0; + T0 r(static_cast<value_type_or_type<T0>>(1)); + while (1) + { + if (b & 1) + { + r *= a; + } + b /= 2; + if (b == 0) + { + break; + } + a *= a; + } + return recip ? static_cast<T0>(1) / r : r; + } + } + + template <class T0, class T1> + inline typename std::enable_if<std::is_integral<T1>::value, T0>::type + pow(const T0& x, const T1& n) noexcept + { + return detail::ipow(x, n); + } + + template <class T0, class T1> + inline auto + pow(const T0& t0, const T1& t1) noexcept + -> typename std::enable_if<std::is_scalar<T0>::value && std::is_floating_point<T1>::value, decltype(std::pow(t0, t1))>::type + { + return std::pow(t0, t1); + } + + template <class T0, class T1> + inline typename std::enable_if<std::is_integral<T1>::value, std::complex<T0>>::type + pow(const std::complex<T0>& t0, const T1& t1) noexcept + { + return detail::ipow(t0, t1); + } + + template <class T0, class T1> + inline typename std::enable_if<!std::is_integral<T1>::value, std::complex<T0>>::type + pow(const std::complex<T0>& t0, const T1& t1) noexcept + { + return std::pow(t0, t1); + } + + template <class T0, class T1> + inline auto + pow(const T0& t0, const std::complex<T1>& t1) noexcept + -> typename std::enable_if<std::is_scalar<T0>::value, decltype(std::pow(t0, t1))>::type + { + return std::pow(t0, t1); + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline bool bitofsign(T const& x) noexcept + { + return x < T(0); + } + + template <class T> + inline auto signbit(T const& v) noexcept -> decltype(bitofsign(v)) + { + return bitofsign(v); + } + + inline double sign(bool const& v) noexcept + { + return v; + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline T sign(const T& v) noexcept + { + return v < T(0) ? T(-1.) : v == T(0) ? T(0.) + : T(1.); + } + + namespace detail + { + template <class C> + inline C sign_complex_scalar_impl(const C& v) noexcept + { + using value_type = typename C::value_type; + if (v.real()) + { + return C(sign(v.real()), value_type(0)); + } + else + { + return C(sign(v.imag()), value_type(0)); + } + } + } + + template <class T> + inline std::complex<T> sign(const std::complex<T>& v) noexcept + { + return detail::sign_complex_scalar_impl(v); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + inline xtl::xcomplex<T, T, i3ec> sign(const xtl::xcomplex<T, T, i3ec>& v) noexcept + { + return detail::sign_complex_scalar_impl(v); + } +#endif + + inline double signnz(bool const&) noexcept + { + return 1; + } + + template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type> + inline T signnz(const T& v) noexcept + { + return v < T(0) ? T(-1.) : T(1.); + } + + template <class T, class Tp> + inline auto sub(T const& x, Tp const& y) noexcept -> decltype(x - y) + { + return x - y; + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + inline xtl::xcomplex<T, T, i3ec> log2(const xtl::xcomplex<T, T, i3ec>& val) noexcept + { + return log(val) / log(T(2)); + } +#endif + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + inline xtl::xcomplex<T, T, i3ec> log1p(const xtl::xcomplex<T, T, i3ec>& val) noexcept + { + return detail::log1p_complex_scalar_impl(val); + } +#endif + + template <class T0, class T1> + inline auto min(T0 const& self, T1 const& other) noexcept + -> typename std::enable_if<std::is_scalar<T0>::value && std::is_scalar<T1>::value, + typename std::decay<decltype(self > other ? other : self)>::type>::type + { + return self > other ? other : self; + } + + // numpy defines minimum operator on complex using lexical comparison + template <class T0, class T1> + inline std::complex<typename std::common_type<T0, T1>::type> + min(std::complex<T0> const& self, std::complex<T1> const& other) noexcept + { + return (self.real() < other.real()) ? (self) : (self.real() == other.real() ? (self.imag() < other.imag() ? self : other) : other); + } + + template <class T0, class T1> + inline auto max(T0 const& self, T1 const& other) noexcept + -> typename std::enable_if<std::is_scalar<T0>::value && std::is_scalar<T1>::value, + typename std::decay<decltype(self > other ? other : self)>::type>::type + { + return self < other ? other : self; + } + + // numpy defines maximum operator on complex using lexical comparison + template <class T0, class T1> + inline std::complex<typename std::common_type<T0, T1>::type> + max(std::complex<T0> const& self, std::complex<T1> const& other) noexcept + { + return (self.real() > other.real()) ? (self) : (self.real() == other.real() ? (self.imag() > other.imag() ? self : other) : other); + } + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, T>::type fma(const T& a, const T& b, const T& c) noexcept + { + return a * b + c; + } + + template <class T> + inline typename std::enable_if<std::is_floating_point<T>::value, T>::type fma(const T& a, const T& b, const T& c) noexcept + { + return std::fma(a, b, c); + } + + template <class T> + inline typename std::enable_if<std::is_scalar<T>::value, T>::type fms(const T& a, const T& b, const T& c) noexcept + { + return a * b - c; + } + + namespace detail + { + template <class C> + inline C fma_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept + { + return { fms(a.real(), b.real(), fms(a.imag(), b.imag(), c.real())), + fma(a.real(), b.imag(), fma(a.imag(), b.real(), c.imag())) }; + } + } + + template <class T> + inline std::complex<T> fma(const std::complex<T>& a, const std::complex<T>& b, const std::complex<T>& c) noexcept + { + return detail::fma_complex_scalar_impl(a, b, c); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + inline xtl::xcomplex<T, T, i3ec> fma(const xtl::xcomplex<T, T, i3ec>& a, const xtl::xcomplex<T, T, i3ec>& b, const xtl::xcomplex<T, T, i3ec>& c) noexcept + { + return detail::fma_complex_scalar_impl(a, b, c); + } +#endif + + namespace detail + { + template <class C> + inline C fms_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept + { + return { fms(a.real(), b.real(), fma(a.imag(), b.imag(), c.real())), + fma(a.real(), b.imag(), fms(a.imag(), b.real(), c.imag())) }; + } + } + + template <class T> + inline std::complex<T> fms(const std::complex<T>& a, const std::complex<T>& b, const std::complex<T>& c) noexcept + { + return detail::fms_complex_scalar_impl(a, b, c); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + inline xtl::xcomplex<T, T, i3ec> fms(const xtl::xcomplex<T, T, i3ec>& a, const xtl::xcomplex<T, T, i3ec>& b, const xtl::xcomplex<T, T, i3ec>& c) noexcept + { + return detail::fms_complex_scalar_impl(a, b, c); + } +#endif + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, T>::type fnma(const T& a, const T& b, const T& c) noexcept + { + return -(a * b) + c; + } + + template <class T> + inline typename std::enable_if<std::is_floating_point<T>::value, T>::type fnma(const T& a, const T& b, const T& c) noexcept + { + return std::fma(-a, b, c); + } + + namespace detail + { + template <class C> + inline C fnma_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept + { + return { fms(a.imag(), b.imag(), fms(a.real(), b.real(), c.real())), + -fma(a.real(), b.imag(), fms(a.imag(), b.real(), c.imag())) }; + } + } + + template <class T> + inline std::complex<T> fnma(const std::complex<T>& a, const std::complex<T>& b, const std::complex<T>& c) noexcept + { + return detail::fnma_complex_scalar_impl(a, b, c); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + inline xtl::xcomplex<T, T, i3ec> fnma(const xtl::xcomplex<T, T, i3ec>& a, const xtl::xcomplex<T, T, i3ec>& b, const xtl::xcomplex<T, T, i3ec>& c) noexcept + { + return detail::fnma_complex_scalar_impl(a, b, c); + } +#endif + + template <class T> + inline typename std::enable_if<std::is_integral<T>::value, T>::type fnms(const T& a, const T& b, const T& c) noexcept + { + return -(a * b) - c; + } + + template <class T> + inline typename std::enable_if<std::is_floating_point<T>::value, T>::type fnms(const T& a, const T& b, const T& c) noexcept + { + return -std::fma(a, b, c); + } + + namespace detail + { + template <class C> + inline C fnms_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept + { + return { fms(a.imag(), b.imag(), fma(a.real(), b.real(), c.real())), + -fma(a.real(), b.imag(), fma(a.imag(), b.real(), c.imag())) }; + } + } + + template <class T> + inline std::complex<T> fnms(const std::complex<T>& a, const std::complex<T>& b, const std::complex<T>& c) noexcept + { + return detail::fnms_complex_scalar_impl(a, b, c); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + inline xtl::xcomplex<T, T, i3ec> fnms(const xtl::xcomplex<T, T, i3ec>& a, const xtl::xcomplex<T, T, i3ec>& b, const xtl::xcomplex<T, T, i3ec>& c) noexcept + { + return detail::fnms_complex_scalar_impl(a, b, c); + } +#endif + + namespace detail + { +#define XSIMD_HASSINCOS_TRAIT(func) \ + template <class S> \ + struct has##func \ + { \ + template <class T> \ + static auto get(T* ptr) -> decltype(func(std::declval<T>(), std::declval<T*>(), std::declval<T*>()), std::true_type {}); \ + static std::false_type get(...); \ + static constexpr bool value = decltype(get((S*)nullptr))::value; \ + } + +#define XSIMD_HASSINCOS(func, T) has##func<T>::value + + XSIMD_HASSINCOS_TRAIT(sincos); + XSIMD_HASSINCOS_TRAIT(sincosf); + XSIMD_HASSINCOS_TRAIT(__sincos); + XSIMD_HASSINCOS_TRAIT(__sincosf); + + struct generic_sincosf + { + template <class T> + typename std::enable_if<XSIMD_HASSINCOS(sincosf, T), void>::type + operator()(float val, T& s, T& c) + { + sincosf(val, &s, &c); + } + + template <class T> + typename std::enable_if<!XSIMD_HASSINCOS(sincosf, T) && XSIMD_HASSINCOS(__sincosf, T), void>::type + operator()(float val, T& s, T& c) + { + __sincosf(val, &s, &c); + } + + template <class T> + typename std::enable_if<!XSIMD_HASSINCOS(sincosf, T) && !XSIMD_HASSINCOS(__sincosf, T), void>::type + operator()(float val, T& s, T& c) + { + s = std::sin(val); + c = std::cos(val); + } + }; + + struct generic_sincos + { + template <class T> + typename std::enable_if<XSIMD_HASSINCOS(sincos, T), void>::type + operator()(double val, T& s, T& c) + { + sincos(val, &s, &c); + } + + template <class T> + typename std::enable_if<!XSIMD_HASSINCOS(sincos, T) && XSIMD_HASSINCOS(__sincos, T), void>::type + operator()(double val, T& s, T& c) + { + __sincos(val, &s, &c); + } + + template <class T> + typename std::enable_if<!XSIMD_HASSINCOS(sincos, T) && !XSIMD_HASSINCOS(__sincos, T), void>::type + operator()(double val, T& s, T& c) + { + s = std::sin(val); + c = std::cos(val); + } + }; + +#undef XSIMD_HASSINCOS_TRAIT +#undef XSIMD_HASSINCOS + } + + inline std::pair<float, float> sincos(float val) noexcept + { + float s, c; + detail::generic_sincosf {}(val, s, c); + return std::make_pair(s, c); + } + + inline std::pair<double, double> sincos(double val) noexcept + { + double s, c; + detail::generic_sincos {}(val, s, c); + return std::make_pair(s, c); + } + + template <class T> + inline std::pair<std::complex<T>, std::complex<T>> + sincos(const std::complex<T>& val) noexcept + { + return std::make_pair(std::sin(val), std::cos(val)); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T> + inline std::pair<xtl::xcomplex<T>, xtl::xcomplex<T>> sincos(const xtl::xcomplex<T>& val) noexcept + { + return std::make_pair(sin(val), cos(val)); + } +#endif + + template <class T, class _ = typename std::enable_if<std::is_floating_point<T>::value, void>::type> + inline T frexp(T const& val, int& exp) noexcept + { + return std::frexp(val, &exp); + } + + template <class T> + inline T select(bool cond, T const& true_br, T const& false_br) noexcept + { + return cond ? true_br : false_br; + } + +} + +#endif |