diff options
Diffstat (limited to 'third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp')
-rw-r--r-- | third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp new file mode 100644 index 0000000000..c72e416c6e --- /dev/null +++ b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_arithmetic.hpp @@ -0,0 +1,241 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_GENERIC_ARITHMETIC_HPP +#define XSIMD_GENERIC_ARITHMETIC_HPP + +#include <complex> +#include <limits> +#include <type_traits> + +#include "./xsimd_generic_details.hpp" + +namespace xsimd +{ + + namespace kernel + { + + using namespace types; + + // bitwise_lshift + template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/> + inline batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept + { + return detail::apply([](T x, T y) noexcept + { return x << y; }, + self, other); + } + + // bitwise_rshift + template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/> + inline batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept + { + return detail::apply([](T x, T y) noexcept + { return x >> y; }, + self, other); + } + + // decr + template <class A, class T> + inline batch<T, A> decr(batch<T, A> const& self, requires_arch<generic>) noexcept + { + return self - T(1); + } + + // decr_if + template <class A, class T, class Mask> + inline batch<T, A> decr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept + { + return select(mask, decr(self), self); + } + + // div + template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type> + inline batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept + { + return detail::apply([](T x, T y) noexcept -> T + { return x / y; }, + self, other); + } + + // fma + template <class A, class T> + inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept + { + return x * y + z; + } + + template <class A, class T> + inline batch<std::complex<T>, A> fma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept + { + auto res_r = fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real())); + auto res_i = fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag())); + return { res_r, res_i }; + } + + // fms + template <class A, class T> + inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept + { + return x * y - z; + } + + template <class A, class T> + inline batch<std::complex<T>, A> fms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept + { + auto res_r = fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real())); + auto res_i = fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag())); + return { res_r, res_i }; + } + + // fnma + template <class A, class T> + inline batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept + { + return -x * y + z; + } + + template <class A, class T> + inline batch<std::complex<T>, A> fnma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept + { + auto res_r = -fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real())); + auto res_i = -fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag())); + return { res_r, res_i }; + } + + // fnms + template <class A, class T> + inline batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept + { + return -x * y - z; + } + + template <class A, class T> + inline batch<std::complex<T>, A> fnms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept + { + auto res_r = -fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real())); + auto res_i = -fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag())); + return { res_r, res_i }; + } + + // hadd + template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/> + inline T hadd(batch<T, A> const& self, requires_arch<generic>) noexcept + { + alignas(A::alignment()) T buffer[batch<T, A>::size]; + self.store_aligned(buffer); + T res = 0; + for (T val : buffer) + { + res += val; + } + return res; + } + + // incr + template <class A, class T> + inline batch<T, A> incr(batch<T, A> const& self, requires_arch<generic>) noexcept + { + return self + T(1); + } + + // incr_if + template <class A, class T, class Mask> + inline batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept + { + return select(mask, incr(self), self); + } + + // mul + template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/> + inline batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept + { + return detail::apply([](T x, T y) noexcept -> T + { return x * y; }, + self, other); + } + + // rotl + template <class A, class T, class STy> + inline batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept + { + constexpr auto N = std::numeric_limits<T>::digits; + return (self << other) | (self >> (N - other)); + } + + // rotr + template <class A, class T, class STy> + inline batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept + { + constexpr auto N = std::numeric_limits<T>::digits; + return (self >> other) | (self << (N - other)); + } + + // sadd + template <class A> + inline batch<float, A> sadd(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept + { + return add(self, other); // no saturated arithmetic on floating point numbers + } + template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/> + inline batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept + { + if (std::is_signed<T>::value) + { + auto mask = (other >> (8 * sizeof(T) - 1)); + auto self_pos_branch = min(std::numeric_limits<T>::max() - other, self); + auto self_neg_branch = max(std::numeric_limits<T>::min() - other, self); + return other + select(batch_bool<T, A>(mask.data), self_neg_branch, self_pos_branch); + } + else + { + const auto diffmax = std::numeric_limits<T>::max() - self; + const auto mindiff = min(diffmax, other); + return self + mindiff; + } + } + template <class A> + inline batch<double, A> sadd(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept + { + return add(self, other); // no saturated arithmetic on floating point numbers + } + + // ssub + template <class A> + inline batch<float, A> ssub(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept + { + return sub(self, other); // no saturated arithmetic on floating point numbers + } + template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/> + inline batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept + { + if (std::is_signed<T>::value) + { + return sadd(self, -other); + } + else + { + const auto diff = min(self, other); + return self - diff; + } + } + template <class A> + inline batch<double, A> ssub(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept + { + return sub(self, other); // no saturated arithmetic on floating point numbers + } + + } + +} + +#endif |