summaryrefslogtreecommitdiffstats
path: root/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp')
-rw-r--r--third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp241
1 files changed, 241 insertions, 0 deletions
diff --git a/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp
new file mode 100644
index 0000000000..c72e416c6e
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp
@@ -0,0 +1,241 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_GENERIC_ARITHMETIC_HPP
+#define XSIMD_GENERIC_ARITHMETIC_HPP
+
+#include <complex>
+#include <limits>
+#include <type_traits>
+
+#include "./xsimd_generic_details.hpp"
+
+namespace xsimd
+{
+
+ namespace kernel
+ {
+
+ using namespace types;
+
+ // bitwise_lshift
+ template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
+ inline batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
+ {
+ return detail::apply([](T x, T y) noexcept
+ { return x << y; },
+ self, other);
+ }
+
+ // bitwise_rshift
+ template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
+ inline batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
+ {
+ return detail::apply([](T x, T y) noexcept
+ { return x >> y; },
+ self, other);
+ }
+
+ // decr
+ template <class A, class T>
+ inline batch<T, A> decr(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ return self - T(1);
+ }
+
+ // decr_if
+ template <class A, class T, class Mask>
+ inline batch<T, A> decr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
+ {
+ return select(mask, decr(self), self);
+ }
+
+ // div
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
+ inline batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
+ {
+ return detail::apply([](T x, T y) noexcept -> T
+ { return x / y; },
+ self, other);
+ }
+
+ // fma
+ template <class A, class T>
+ inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
+ {
+ return x * y + z;
+ }
+
+ template <class A, class T>
+ inline batch<std::complex<T>, A> fma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
+ {
+ auto res_r = fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
+ auto res_i = fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
+ return { res_r, res_i };
+ }
+
+ // fms
+ template <class A, class T>
+ inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
+ {
+ return x * y - z;
+ }
+
+ template <class A, class T>
+ inline batch<std::complex<T>, A> fms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
+ {
+ auto res_r = fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
+ auto res_i = fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
+ return { res_r, res_i };
+ }
+
+ // fnma
+ template <class A, class T>
+ inline batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
+ {
+ return -x * y + z;
+ }
+
+ template <class A, class T>
+ inline batch<std::complex<T>, A> fnma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
+ {
+ auto res_r = -fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
+ auto res_i = -fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
+ return { res_r, res_i };
+ }
+
+ // fnms
+ template <class A, class T>
+ inline batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
+ {
+ return -x * y - z;
+ }
+
+ template <class A, class T>
+ inline batch<std::complex<T>, A> fnms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
+ {
+ auto res_r = -fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
+ auto res_i = -fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
+ return { res_r, res_i };
+ }
+
+ // hadd
+ template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
+ inline T hadd(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ alignas(A::alignment()) T buffer[batch<T, A>::size];
+ self.store_aligned(buffer);
+ T res = 0;
+ for (T val : buffer)
+ {
+ res += val;
+ }
+ return res;
+ }
+
+ // incr
+ template <class A, class T>
+ inline batch<T, A> incr(batch<T, A> const& self, requires_arch<generic>) noexcept
+ {
+ return self + T(1);
+ }
+
+ // incr_if
+ template <class A, class T, class Mask>
+ inline batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
+ {
+ return select(mask, incr(self), self);
+ }
+
+ // mul
+ template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
+ inline batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
+ {
+ return detail::apply([](T x, T y) noexcept -> T
+ { return x * y; },
+ self, other);
+ }
+
+ // rotl
+ template <class A, class T, class STy>
+ inline batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
+ {
+ constexpr auto N = std::numeric_limits<T>::digits;
+ return (self << other) | (self >> (N - other));
+ }
+
+ // rotr
+ template <class A, class T, class STy>
+ inline batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
+ {
+ constexpr auto N = std::numeric_limits<T>::digits;
+ return (self >> other) | (self << (N - other));
+ }
+
+ // sadd
+ template <class A>
+ inline batch<float, A> sadd(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept
+ {
+ return add(self, other); // no saturated arithmetic on floating point numbers
+ }
+ template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
+ inline batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
+ {
+ if (std::is_signed<T>::value)
+ {
+ auto mask = (other >> (8 * sizeof(T) - 1));
+ auto self_pos_branch = min(std::numeric_limits<T>::max() - other, self);
+ auto self_neg_branch = max(std::numeric_limits<T>::min() - other, self);
+ return other + select(batch_bool<T, A>(mask.data), self_neg_branch, self_pos_branch);
+ }
+ else
+ {
+ const auto diffmax = std::numeric_limits<T>::max() - self;
+ const auto mindiff = min(diffmax, other);
+ return self + mindiff;
+ }
+ }
+ template <class A>
+ inline batch<double, A> sadd(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
+ {
+ return add(self, other); // no saturated arithmetic on floating point numbers
+ }
+
+ // ssub
+ template <class A>
+ inline batch<float, A> ssub(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept
+ {
+ return sub(self, other); // no saturated arithmetic on floating point numbers
+ }
+ template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
+ inline batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
+ {
+ if (std::is_signed<T>::value)
+ {
+ return sadd(self, -other);
+ }
+ else
+ {
+ const auto diff = min(self, other);
+ return self - diff;
+ }
+ }
+ template <class A>
+ inline batch<double, A> ssub(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
+ {
+ return sub(self, other); // no saturated arithmetic on floating point numbers
+ }
+
+ }
+
+}
+
+#endif