summaryrefslogtreecommitdiffstats
path: root/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_trigo.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_trigo.hpp')
-rw-r--r--third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_trigo.hpp969
1 files changed, 969 insertions, 0 deletions
diff --git a/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_trigo.hpp b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_trigo.hpp
new file mode 100644
index 0000000000..2568a7253f
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_trigo.hpp
@@ -0,0 +1,969 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_GENERIC_TRIGO_HPP
+#define XSIMD_GENERIC_TRIGO_HPP
+
+#include "./xsimd_generic_details.hpp"
+
+#include <array>
+
+namespace xsimd
+{
+
+ namespace kernel
+ {
+ /* origin: boost/simd/arch/common/detail/simd/trig_base.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+
+ using namespace types;
+
+ // acos
+ template <class A, class T>
+ inline batch<T, A> acos(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ batch_type x = abs(self);
+ auto x_larger_05 = x > batch_type(0.5);
+ x = select(x_larger_05, sqrt(fma(batch_type(-0.5), x, batch_type(0.5))), self);
+ x = asin(x);
+ x = select(x_larger_05, x + x, x);
+ x = select(self < batch_type(-0.5), constants::pi<batch_type>() - x, x);
+ return select(x_larger_05, x, constants::pio2<batch_type>() - x);
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> acos(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<std::complex<T>, A>;
+ using real_batch = typename batch_type::real_batch;
+ batch_type tmp = asin(z);
+ return { constants::pio2<real_batch>() - tmp.real(), -tmp.imag() };
+ }
+
+ // acosh
+ /* origin: boost/simd/arch/common/simd/function/acosh.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+ template <class A, class T>
+ inline batch<T, A> acosh(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ batch_type x = self - batch_type(1.);
+ auto test = x > constants::oneotwoeps<batch_type>();
+ batch_type z = select(test, self, x + sqrt(x + x + x * x));
+ batch_type l1pz = log1p(z);
+ return select(test, l1pz + constants::log_2<batch_type>(), l1pz);
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> acosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<std::complex<T>, A>;
+ batch_type w = acos(z);
+ w = batch_type(-w.imag(), w.real());
+ return w;
+ }
+
+ // asin
+ template <class A>
+ inline batch<float, A> asin(batch<float, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<float, A>;
+ batch_type x = abs(self);
+ batch_type sign = bitofsign(self);
+ auto x_larger_05 = x > batch_type(0.5);
+ batch_type z = select(x_larger_05, batch_type(0.5) * (batch_type(1.) - x), x * x);
+ x = select(x_larger_05, sqrt(z), x);
+ batch_type z1 = detail::horner<batch_type,
+ 0x3e2aaae4,
+ 0x3d9980f6,
+ 0x3d3a3ec7,
+ 0x3cc617e3,
+ 0x3d2cb352>(z);
+ z1 = fma(z1, z * x, x);
+ z = select(x_larger_05, constants::pio2<batch_type>() - (z1 + z1), z1);
+ return z ^ sign;
+ }
+ template <class A>
+ inline batch<double, A> asin(batch<double, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<double, A>;
+ batch_type x = abs(self);
+ auto small_cond = x < constants::sqrteps<batch_type>();
+ batch_type ct1 = batch_type(bit_cast<double>(int64_t(0x3fe4000000000000)));
+ batch_type zz1 = batch_type(1.) - x;
+ batch_type vp = zz1 * detail::horner<batch_type, 0x403c896240f3081dull, 0xc03991aaac01ab68ull, 0x401bdff5baf33e6aull, 0xbfe2079259f9290full, 0x3f684fc3988e9f08ull>(zz1) / detail::horner1<batch_type, 0x40756709b0b644beull, 0xc077fe08959063eeull, 0x40626219af6a7f42ull, 0xc035f2a2b6bf5d8cull>(zz1);
+ zz1 = sqrt(zz1 + zz1);
+ batch_type z = constants::pio4<batch_type>() - zz1;
+ zz1 = fms(zz1, vp, constants::pio_2lo<batch_type>());
+ z = z - zz1;
+ zz1 = z + constants::pio4<batch_type>();
+ batch_type zz2 = self * self;
+ z = zz2 * detail::horner<batch_type, 0xc020656c06ceafd5ull, 0x40339007da779259ull, 0xc0304331de27907bull, 0x4015c74b178a2dd9ull, 0xbfe34341333e5c16ull, 0x3f716b9b0bd48ad3ull>(zz2) / detail::horner1<batch_type, 0xc04898220a3607acull, 0x4061705684ffbf9dull, 0xc06265bb6d3576d7ull, 0x40519fc025fe9054ull, 0xc02d7b590b5e0eabull>(zz2);
+ zz2 = fma(x, z, x);
+ return select(x > batch_type(1.), constants::nan<batch_type>(),
+ select(small_cond, x,
+ select(x > ct1, zz1, zz2))
+ ^ bitofsign(self));
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> asin(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<std::complex<T>, A>;
+ using real_batch = typename batch_type::real_batch;
+ real_batch x = z.real();
+ real_batch y = z.imag();
+
+ batch_type ct(-y, x);
+ batch_type zz(real_batch(1.) - (x - y) * (x + y), -2 * x * y);
+ zz = log(ct + sqrt(zz));
+ batch_type resg(zz.imag(), -zz.real());
+
+ return select(y == real_batch(0.),
+ select(fabs(x) > real_batch(1.),
+ batch_type(constants::pio2<real_batch>(), real_batch(0.)),
+ batch_type(asin(x), real_batch(0.))),
+ resg);
+ }
+
+ // asinh
+ /* origin: boost/simd/arch/common/simd/function/asinh.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+ namespace detail
+ {
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+ inline batch<T, A>
+ average(const batch<T, A>& x1, const batch<T, A>& x2) noexcept
+ {
+ return (x1 & x2) + ((x1 ^ x2) >> 1);
+ }
+
+ template <class A, class T>
+ inline batch<T, A>
+ averagef(const batch<T, A>& x1, const batch<T, A>& x2) noexcept
+ {
+ using batch_type = batch<T, A>;
+ return fma(x1, batch_type(0.5), x2 * batch_type(0.5));
+ }
+ template <class A>
+ inline batch<float, A> average(batch<float, A> const& x1, batch<float, A> const& x2) noexcept
+ {
+ return averagef(x1, x2);
+ }
+ template <class A>
+ inline batch<double, A> average(batch<double, A> const& x1, batch<double, A> const& x2) noexcept
+ {
+ return averagef(x1, x2);
+ }
+ }
+ template <class A>
+ inline batch<float, A> asinh(batch<float, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<float, A>;
+ batch_type x = abs(self);
+ auto lthalf = x < batch_type(0.5);
+ batch_type x2 = x * x;
+ batch_type bts = bitofsign(self);
+ batch_type z(0.);
+ if (any(lthalf))
+ {
+ z = detail::horner<batch_type,
+ 0x3f800000,
+ 0xbe2aa9ad,
+ 0x3d9949b1,
+ 0xbd2ee581,
+ 0x3ca4d6e6>(x2)
+ * x;
+ if (all(lthalf))
+ return z ^ bts;
+ }
+ batch_type tmp = select(x > constants::oneosqrteps<batch_type>(), x, detail::average(x, hypot(batch_type(1.), x)));
+#ifndef XSIMD_NO_NANS
+ return select(isnan(self), constants::nan<batch_type>(), select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts);
+#else
+ return select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts;
+#endif
+ }
+ template <class A>
+ inline batch<double, A> asinh(batch<double, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<double, A>;
+ batch_type x = abs(self);
+ auto test = x > constants::oneosqrteps<batch_type>();
+ batch_type z = select(test, x - batch_type(1.), x + x * x / (batch_type(1.) + hypot(batch_type(1.), x)));
+#ifndef XSIMD_NO_INFINITIES
+ z = select(x == constants::infinity<batch_type>(), x, z);
+#endif
+ batch_type l1pz = log1p(z);
+ z = select(test, l1pz + constants::log_2<batch_type>(), l1pz);
+ return bitofsign(self) ^ z;
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> asinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<std::complex<T>, A>;
+ batch_type w = asin(batch_type(-z.imag(), z.real()));
+ w = batch_type(w.imag(), -w.real());
+ return w;
+ }
+
+ // atan
+ namespace detail
+ {
+ template <class A>
+ static inline batch<float, A> kernel_atan(const batch<float, A>& x, const batch<float, A>& recx) noexcept
+ {
+ using batch_type = batch<float, A>;
+ const auto flag1 = x < constants::tan3pio8<batch_type>();
+ const auto flag2 = (x >= batch_type(bit_cast<float>((uint32_t)0x3ed413cd))) && flag1;
+ batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>());
+ yy = select(flag2, constants::pio4<batch_type>(), yy);
+ batch_type xx = select(flag1, x, -recx);
+ xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx);
+ const batch_type z = xx * xx;
+ batch_type z1 = detail::horner<batch_type,
+ 0xbeaaaa2aul,
+ 0x3e4c925ful,
+ 0xbe0e1b85ul,
+ 0x3da4f0d1ul>(z);
+ z1 = fma(xx, z1 * z, xx);
+ z1 = select(flag2, z1 + constants::pio_4lo<batch_type>(), z1);
+ z1 = select(!flag1, z1 + constants::pio_2lo<batch_type>(), z1);
+ return yy + z1;
+ }
+ template <class A>
+ static inline batch<double, A> kernel_atan(const batch<double, A>& x, const batch<double, A>& recx) noexcept
+ {
+ using batch_type = batch<double, A>;
+ const auto flag1 = x < constants::tan3pio8<batch_type>();
+ const auto flag2 = (x >= constants::tanpio8<batch_type>()) && flag1;
+ batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>());
+ yy = select(flag2, constants::pio4<batch_type>(), yy);
+ batch_type xx = select(flag1, x, -recx);
+ xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx);
+ batch_type z = xx * xx;
+ z *= detail::horner<batch_type,
+ 0xc0503669fd28ec8eull,
+ 0xc05eb8bf2d05ba25ull,
+ 0xc052c08c36880273ull,
+ 0xc03028545b6b807aull,
+ 0xbfec007fa1f72594ull>(z)
+ / detail::horner1<batch_type,
+ 0x4068519efbbd62ecull,
+ 0x407e563f13b049eaull,
+ 0x407b0e18d2e2be3bull,
+ 0x4064a0dd43b8fa25ull,
+ 0x4038dbc45b14603cull>(z);
+ z = fma(xx, z, xx);
+ z = select(flag2, z + constants::pio_4lo<batch_type>(), z);
+ z = z + select(flag1, batch_type(0.), constants::pio_2lo<batch_type>());
+ return yy + z;
+ }
+ }
+ template <class A, class T>
+ inline batch<T, A> atan(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ const batch_type absa = abs(self);
+ const batch_type x = detail::kernel_atan(absa, batch_type(1.) / absa);
+ return x ^ bitofsign(self);
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> atan(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<std::complex<T>, A>;
+ using real_batch = typename batch_type::real_batch;
+ real_batch x = z.real();
+ real_batch y = z.imag();
+ real_batch x2 = x * x;
+ real_batch one(1.);
+ real_batch a = one - x2 - (y * y);
+ real_batch w = 0.5 * atan2(2. * x, a);
+ real_batch num = y + one;
+ num = x2 + num * num;
+ real_batch den = y - one;
+ den = x2 + den * den;
+ batch_type res = select((x == real_batch(0.)) && (y == real_batch(1.)),
+ batch_type(real_batch(0.), constants::infinity<real_batch>()),
+ batch_type(w, 0.25 * log(num / den)));
+ return res;
+ }
+
+ // atanh
+ /* origin: boost/simd/arch/common/simd/function/acosh.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+ template <class A, class T>
+ inline batch<T, A> atanh(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ batch_type x = abs(self);
+ batch_type t = x + x;
+ batch_type z = batch_type(1.) - x;
+ auto test = x < batch_type(0.5);
+ batch_type tmp = select(test, x, t) / z;
+ return bitofsign(self) ^ (batch_type(0.5) * log1p(select(test, fma(t, tmp, t), tmp)));
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> atanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<std::complex<T>, A>;
+ batch_type w = atan(batch_type(-z.imag(), z.real()));
+ w = batch_type(w.imag(), -w.real());
+ return w;
+ }
+
+ // atan2
+ template <class A, class T>
+ inline batch<T, A> atan2(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ const batch_type q = abs(self / other);
+ const batch_type z = detail::kernel_atan(q, batch_type(1.) / q);
+ return select(other > batch_type(0.), z, constants::pi<batch_type>() - z) * signnz(self);
+ }
+
+ // cos
+ namespace detail
+ {
+ template <class T, class A>
+ inline batch<T, A> quadrant(const batch<T, A>& x) noexcept
+ {
+ return x & batch<T, A>(3);
+ }
+
+ template <class A>
+ inline batch<float, A> quadrant(const batch<float, A>& x) noexcept
+ {
+ return to_float(quadrant(to_int(x)));
+ }
+
+ template <class A>
+ inline batch<double, A> quadrant(const batch<double, A>& x) noexcept
+ {
+ using batch_type = batch<double, A>;
+ batch_type a = x * batch_type(0.25);
+ return (a - floor(a)) * batch_type(4.);
+ }
+ /* origin: boost/simd/arch/common/detail/simd/f_trig_evaluation.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+
+ template <class A>
+ inline batch<float, A> cos_eval(const batch<float, A>& z) noexcept
+ {
+ using batch_type = batch<float, A>;
+ batch_type y = detail::horner<batch_type,
+ 0x3d2aaaa5,
+ 0xbab60619,
+ 0x37ccf5ce>(z);
+ return batch_type(1.) + fma(z, batch_type(-0.5), y * z * z);
+ }
+
+ template <class A>
+ inline batch<float, A> sin_eval(const batch<float, A>& z, const batch<float, A>& x) noexcept
+ {
+ using batch_type = batch<float, A>;
+ batch_type y = detail::horner<batch_type,
+ 0xbe2aaaa2,
+ 0x3c08839d,
+ 0xb94ca1f9>(z);
+ return fma(y * z, x, x);
+ }
+
+ template <class A>
+ static inline batch<float, A> base_tancot_eval(const batch<float, A>& z) noexcept
+ {
+ using batch_type = batch<float, A>;
+ batch_type zz = z * z;
+ batch_type y = detail::horner<batch_type,
+ 0x3eaaaa6f,
+ 0x3e0896dd,
+ 0x3d5ac5c9,
+ 0x3cc821b5,
+ 0x3b4c779c,
+ 0x3c19c53b>(zz);
+ return fma(y, zz * z, z);
+ }
+
+ template <class A, class BB>
+ static inline batch<float, A> tan_eval(const batch<float, A>& z, const BB& test) noexcept
+ {
+ using batch_type = batch<float, A>;
+ batch_type y = base_tancot_eval(z);
+ return select(test, y, -batch_type(1.) / y);
+ }
+
+ template <class A, class BB>
+ static inline batch<float, A> cot_eval(const batch<float, A>& z, const BB& test) noexcept
+ {
+ using batch_type = batch<float, A>;
+ batch_type y = base_tancot_eval(z);
+ return select(test, batch_type(1.) / y, -y);
+ }
+
+ /* origin: boost/simd/arch/common/detail/simd/d_trig_evaluation.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+ template <class A>
+ static inline batch<double, A> cos_eval(const batch<double, A>& z) noexcept
+ {
+ using batch_type = batch<double, A>;
+ batch_type y = detail::horner<batch_type,
+ 0x3fe0000000000000ull,
+ 0xbfa5555555555551ull,
+ 0x3f56c16c16c15d47ull,
+ 0xbefa01a019ddbcd9ull,
+ 0x3e927e4f8e06d9a5ull,
+ 0xbe21eea7c1e514d4ull,
+ 0x3da8ff831ad9b219ull>(z);
+ return batch_type(1.) - y * z;
+ }
+
+ template <class A>
+ static inline batch<double, A> sin_eval(const batch<double, A>& z, const batch<double, A>& x) noexcept
+ {
+ using batch_type = batch<double, A>;
+ batch_type y = detail::horner<batch_type,
+ 0xbfc5555555555548ull,
+ 0x3f8111111110f7d0ull,
+ 0xbf2a01a019bfdf03ull,
+ 0x3ec71de3567d4896ull,
+ 0xbe5ae5e5a9291691ull,
+ 0x3de5d8fd1fcf0ec1ull>(z);
+ return fma(y * z, x, x);
+ }
+
+ template <class A>
+ static inline batch<double, A> base_tancot_eval(const batch<double, A>& z) noexcept
+ {
+ using batch_type = batch<double, A>;
+ batch_type zz = z * z;
+ batch_type num = detail::horner<batch_type,
+ 0xc1711fead3299176ull,
+ 0x413199eca5fc9dddull,
+ 0xc0c992d8d24f3f38ull>(zz);
+ batch_type den = detail::horner1<batch_type,
+ 0xc189afe03cbe5a31ull,
+ 0x4177d98fc2ead8efull,
+ 0xc13427bc582abc96ull,
+ 0x40cab8a5eeb36572ull>(zz);
+ return fma(z, (zz * (num / den)), z);
+ }
+
+ template <class A, class BB>
+ static inline batch<double, A> tan_eval(const batch<double, A>& z, const BB& test) noexcept
+ {
+ using batch_type = batch<double, A>;
+ batch_type y = base_tancot_eval(z);
+ return select(test, y, -batch_type(1.) / y);
+ }
+
+ template <class A, class BB>
+ static inline batch<double, A> cot_eval(const batch<double, A>& z, const BB& test) noexcept
+ {
+ using batch_type = batch<double, A>;
+ batch_type y = base_tancot_eval(z);
+ return select(test, batch_type(1.) / y, -y);
+ }
+ /* origin: boost/simd/arch/common/detail/simd/trig_reduction.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+
+ struct trigo_radian_tag
+ {
+ };
+ struct trigo_pi_tag
+ {
+ };
+
+ template <class B, class Tag = trigo_radian_tag>
+ struct trigo_reducer
+ {
+ static inline B reduce(const B& x, B& xr) noexcept
+ {
+ if (all(x <= constants::pio4<B>()))
+ {
+ xr = x;
+ return B(0.);
+ }
+ else if (all(x <= constants::pio2<B>()))
+ {
+ auto test = x > constants::pio4<B>();
+ xr = x - constants::pio2_1<B>();
+ xr -= constants::pio2_2<B>();
+ xr -= constants::pio2_3<B>();
+ xr = select(test, xr, x);
+ return select(test, B(1.), B(0.));
+ }
+ else if (all(x <= constants::twentypi<B>()))
+ {
+ B xi = nearbyint(x * constants::twoopi<B>());
+ xr = fnma(xi, constants::pio2_1<B>(), x);
+ xr -= xi * constants::pio2_2<B>();
+ xr -= xi * constants::pio2_3<B>();
+ return quadrant(xi);
+ }
+ else if (all(x <= constants::mediumpi<B>()))
+ {
+ B fn = nearbyint(x * constants::twoopi<B>());
+ B r = x - fn * constants::pio2_1<B>();
+ B w = fn * constants::pio2_1t<B>();
+ B t = r;
+ w = fn * constants::pio2_2<B>();
+ r = t - w;
+ w = fn * constants::pio2_2t<B>() - ((t - r) - w);
+ t = r;
+ w = fn * constants::pio2_3<B>();
+ r = t - w;
+ w = fn * constants::pio2_3t<B>() - ((t - r) - w);
+ xr = r - w;
+ return quadrant(fn);
+ }
+ else
+ {
+ static constexpr std::size_t size = B::size;
+ using value_type = typename B::value_type;
+ alignas(B) std::array<value_type, size> tmp;
+ alignas(B) std::array<value_type, size> txr;
+ alignas(B) std::array<value_type, size> args;
+ x.store_aligned(args.data());
+
+ for (std::size_t i = 0; i < size; ++i)
+ {
+ double arg = args[i];
+ if (arg == std::numeric_limits<value_type>::infinity())
+ {
+ tmp[i] = 0.;
+ txr[i] = std::numeric_limits<value_type>::quiet_NaN();
+ }
+ else
+ {
+ double y[2];
+ std::int32_t n = ::xsimd::detail::__ieee754_rem_pio2(arg, y);
+ tmp[i] = value_type(n & 3);
+ txr[i] = value_type(y[0]);
+ }
+ }
+ xr = B::load_aligned(&txr[0]);
+ B res = B::load_aligned(&tmp[0]);
+ return res;
+ }
+ }
+ };
+
+ template <class B>
+ struct trigo_reducer<B, trigo_pi_tag>
+ {
+ static inline B reduce(const B& x, B& xr) noexcept
+ {
+ B xi = nearbyint(x * B(2.));
+ B x2 = x - xi * B(0.5);
+ xr = x2 * constants::pi<B>();
+ return quadrant(xi);
+ }
+ };
+
+ }
+ template <class A, class T>
+ inline batch<T, A> cos(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ const batch_type x = abs(self);
+ batch_type xr = constants::nan<batch_type>();
+ const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
+ auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
+ auto swap_bit = fma(batch_type(-2.), tmp, n);
+ auto sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
+ const batch_type z = xr * xr;
+ const batch_type se = detail::sin_eval(z, xr);
+ const batch_type ce = detail::cos_eval(z);
+ const batch_type z1 = select(swap_bit != batch_type(0.), se, ce);
+ return z1 ^ sign_bit;
+ }
+
+ template <class A, class T>
+ inline batch<std::complex<T>, A> cos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
+ {
+ return { cos(z.real()) * cosh(z.imag()), -sin(z.real()) * sinh(z.imag()) };
+ }
+
+ // cosh
+
+ /* origin: boost/simd/arch/common/simd/function/cosh.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+
+ template <class A, class T>
+ inline batch<T, A> cosh(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ batch_type x = abs(self);
+ auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>());
+ batch_type fac = select(test1, batch_type(0.5), batch_type(1.));
+ batch_type tmp = exp(x * fac);
+ batch_type tmp1 = batch_type(0.5) * tmp;
+ return select(test1, tmp1 * tmp, detail::average(tmp, batch_type(1.) / tmp));
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> cosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
+ {
+ auto x = z.real();
+ auto y = z.imag();
+ return { cosh(x) * cos(y), sinh(x) * sin(y) };
+ }
+
+ // sin
+ namespace detail
+ {
+ template <class A, class T, class Tag = trigo_radian_tag>
+ inline batch<T, A> sin(batch<T, A> const& self, Tag = Tag()) noexcept
+ {
+ using batch_type = batch<T, A>;
+ const batch_type x = abs(self);
+ batch_type xr = constants::nan<batch_type>();
+ const batch_type n = detail::trigo_reducer<batch_type, Tag>::reduce(x, xr);
+ auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
+ auto swap_bit = fma(batch_type(-2.), tmp, n);
+ auto sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
+ const batch_type z = xr * xr;
+ const batch_type se = detail::sin_eval(z, xr);
+ const batch_type ce = detail::cos_eval(z);
+ const batch_type z1 = select(swap_bit == batch_type(0.), se, ce);
+ return z1 ^ sign_bit;
+ }
+ }
+
+ template <class A, class T>
+ inline batch<T, A> sin(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ return detail::sin(self);
+ }
+
+ template <class A, class T>
+ inline batch<std::complex<T>, A> sin(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
+ {
+ return { sin(z.real()) * cosh(z.imag()), cos(z.real()) * sinh(z.imag()) };
+ }
+
+ // sincos
+ template <class A, class T>
+ inline std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ const batch_type x = abs(self);
+ batch_type xr = constants::nan<batch_type>();
+ const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
+ auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
+ auto swap_bit = fma(batch_type(-2.), tmp, n);
+ const batch_type z = xr * xr;
+ const batch_type se = detail::sin_eval(z, xr);
+ const batch_type ce = detail::cos_eval(z);
+ auto sin_sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
+ const batch_type sin_z1 = select(swap_bit == batch_type(0.), se, ce);
+ auto cos_sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
+ const batch_type cos_z1 = select(swap_bit != batch_type(0.), se, ce);
+ return std::make_pair(sin_z1 ^ sin_sign_bit, cos_z1 ^ cos_sign_bit);
+ }
+
+ template <class A, class T>
+ inline std::pair<batch<std::complex<T>, A>, batch<std::complex<T>, A>>
+ sincos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<std::complex<T>, A>;
+ using real_batch = typename batch_type::real_batch;
+ real_batch rcos = cos(z.real());
+ real_batch rsin = sin(z.real());
+ real_batch icosh = cosh(z.imag());
+ real_batch isinh = sinh(z.imag());
+ return std::make_pair(batch_type(rsin * icosh, rcos * isinh), batch_type(rcos * icosh, -rsin * isinh));
+ }
+
+ // sinh
+ namespace detail
+ {
+ /* origin: boost/simd/arch/common/detail/generic/sinh_kernel.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+ template <class A>
+ inline batch<float, A> sinh_kernel(batch<float, A> const& self) noexcept
+ {
+ using batch_type = batch<float, A>;
+ batch_type sqr_self = self * self;
+ return detail::horner<batch_type,
+ 0x3f800000, // 1.0f
+ 0x3e2aaacc, // 1.66667160211E-1f
+ 0x3c087bbe, // 8.33028376239E-3f
+ 0x39559e2f // 2.03721912945E-4f
+ >(sqr_self)
+ * self;
+ }
+
+ template <class A>
+ inline batch<double, A> sinh_kernel(batch<double, A> const& self) noexcept
+ {
+ using batch_type = batch<double, A>;
+ batch_type sqrself = self * self;
+ return fma(self, (detail::horner<batch_type,
+ 0xc115782bdbf6ab05ull, // -3.51754964808151394800E5
+ 0xc0c694b8c71d6182ull, // -1.15614435765005216044E4,
+ 0xc064773a398ff4feull, // -1.63725857525983828727E2,
+ 0xbfe9435fe8bb3cd6ull // -7.89474443963537015605E-1
+ >(sqrself)
+ / detail::horner1<batch_type,
+ 0xc1401a20e4f90044ull, // -2.11052978884890840399E6
+ 0x40e1a7ba7ed72245ull, // 3.61578279834431989373E4,
+ 0xc0715b6096e96484ull // -2.77711081420602794433E2,
+ >(sqrself))
+ * sqrself,
+ self);
+ }
+ }
+ /* origin: boost/simd/arch/common/simd/function/sinh.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+ template <class A, class T>
+ inline batch<T, A> sinh(batch<T, A> const& a, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ batch_type half(0.5);
+ batch_type x = abs(a);
+ auto lt1 = x < batch_type(1.);
+ batch_type bts = bitofsign(a);
+ batch_type z(0.);
+ if (any(lt1))
+ {
+ z = detail::sinh_kernel(x);
+ if (all(lt1))
+ return z ^ bts;
+ }
+ auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>());
+ batch_type fac = select(test1, half, batch_type(1.));
+ batch_type tmp = exp(x * fac);
+ batch_type tmp1 = half * tmp;
+ batch_type r = select(test1, tmp1 * tmp, tmp1 - half / tmp);
+ return select(lt1, z, r) ^ bts;
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> sinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
+ {
+ auto x = z.real();
+ auto y = z.imag();
+ return { sinh(x) * cos(y), cosh(x) * sin(y) };
+ }
+
+ // tan
+ template <class A, class T>
+ inline batch<T, A> tan(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ const batch_type x = abs(self);
+ batch_type xr = constants::nan<batch_type>();
+ const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
+ auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
+ auto swap_bit = fma(batch_type(-2.), tmp, n);
+ auto test = (swap_bit == batch_type(0.));
+ const batch_type y = detail::tan_eval(xr, test);
+ return y ^ bitofsign(self);
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> tan(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<std::complex<T>, A>;
+ using real_batch = typename batch_type::real_batch;
+ real_batch d = cos(2 * z.real()) + cosh(2 * z.imag());
+ batch_type winf(constants::infinity<real_batch>(), constants::infinity<real_batch>());
+ real_batch wreal = sin(2 * z.real()) / d;
+ real_batch wimag = sinh(2 * z.imag());
+ batch_type wres = select(isinf(wimag), batch_type(wreal, real_batch(1.)), batch_type(wreal, wimag / d));
+ return select(d == real_batch(0.), winf, wres);
+ }
+
+ // tanh
+ namespace detail
+ {
+ /* origin: boost/simd/arch/common/detail/generic/tanh_kernel.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+ template <class B>
+ struct tanh_kernel;
+
+ template <class A>
+ struct tanh_kernel<batch<float, A>>
+ {
+ using batch_type = batch<float, A>;
+ static inline batch_type tanh(const batch_type& x) noexcept
+ {
+ batch_type sqrx = x * x;
+ return fma(detail::horner<batch_type,
+ 0xbeaaaa99, // -3.33332819422E-1F
+ 0x3e088393, // +1.33314422036E-1F
+ 0xbd5c1e2d, // -5.37397155531E-2F
+ 0x3ca9134e, // +2.06390887954E-2F
+ 0xbbbaf0ea // -5.70498872745E-3F
+ >(sqrx)
+ * sqrx,
+ x, x);
+ }
+
+ static inline batch_type cotanh(const batch_type& x) noexcept
+ {
+ return batch_type(1.) / tanh(x);
+ }
+ };
+
+ template <class A>
+ struct tanh_kernel<batch<double, A>>
+ {
+ using batch_type = batch<double, A>;
+ static inline batch_type tanh(const batch_type& x) noexcept
+ {
+ batch_type sqrx = x * x;
+ return fma(sqrx * p(sqrx) / q(sqrx), x, x);
+ }
+
+ static inline batch_type cotanh(const batch_type& x) noexcept
+ {
+ batch_type sqrx = x * x;
+ batch_type qval = q(sqrx);
+ return qval / (x * fma(p(sqrx), sqrx, qval));
+ }
+
+ static inline batch_type p(const batch_type& x) noexcept
+ {
+ return detail::horner<batch_type,
+ 0xc0993ac030580563, // -1.61468768441708447952E3
+ 0xc058d26a0e26682d, // -9.92877231001918586564E1,
+ 0xbfeedc5baafd6f4b // -9.64399179425052238628E-1
+ >(x);
+ }
+
+ static inline batch_type q(const batch_type& x) noexcept
+ {
+ return detail::horner1<batch_type,
+ 0x40b2ec102442040c, // 4.84406305325125486048E3
+ 0x40a176fa0e5535fa, // 2.23548839060100448583E3,
+ 0x405c33f28a581B86 // 1.12811678491632931402E2,
+ >(x);
+ }
+ };
+
+ }
+ /* origin: boost/simd/arch/common/simd/function/tanh.hpp */
+ /*
+ * ====================================================
+ * copyright 2016 NumScale SAS
+ *
+ * Distributed under the Boost Software License, Version 1.0.
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
+ * ====================================================
+ */
+ template <class A, class T>
+ inline batch<T, A> tanh(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ batch_type one(1.);
+ batch_type x = abs(self);
+ auto test = x < (batch_type(5.) / batch_type(8.));
+ batch_type bts = bitofsign(self);
+ batch_type z = one;
+ if (any(test))
+ {
+ z = detail::tanh_kernel<batch_type>::tanh(x);
+ if (all(test))
+ return z ^ bts;
+ }
+ batch_type r = fma(batch_type(-2.), one / (one + exp(x + x)), one);
+ return select(test, z, r) ^ bts;
+ }
+ template <class A, class T>
+ inline batch<std::complex<T>, A> tanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
+ {
+ using real_batch = typename batch<std::complex<T>, A>::real_batch;
+ auto x = z.real();
+ auto y = z.imag();
+ real_batch two(2);
+ auto d = cosh(two * x) + cos(two * y);
+ return { sinh(two * x) / d, sin(two * y) / d };
+ }
+
+ }
+
+}
+
+#endif