diff options
Diffstat (limited to 'third_party/xsimd/include/xsimd/types')
36 files changed, 7381 insertions, 0 deletions
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_all_registers.hpp b/third_party/xsimd/include/xsimd/types/xsimd_all_registers.hpp new file mode 100644 index 0000000000..4350ca0a28 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_all_registers.hpp @@ -0,0 +1,46 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#include "xsimd_fma3_sse_register.hpp" +#include "xsimd_fma4_register.hpp" +#include "xsimd_sse2_register.hpp" +#include "xsimd_sse3_register.hpp" +#include "xsimd_sse4_1_register.hpp" +#include "xsimd_sse4_2_register.hpp" + +#include "xsimd_avx2_register.hpp" +#include "xsimd_avx_register.hpp" +#include "xsimd_avxvnni_register.hpp" +#include "xsimd_fma3_avx2_register.hpp" +#include "xsimd_fma3_avx_register.hpp" + +#include "xsimd_avx512vnni_avx512bw_register.hpp" +#include "xsimd_avx512vnni_avx512vbmi_register.hpp" + +#include "xsimd_avx512ifma_register.hpp" +#include "xsimd_avx512vbmi_register.hpp" + +#include "xsimd_avx512er_register.hpp" +#include "xsimd_avx512pf_register.hpp" + +#include "xsimd_avx512bw_register.hpp" +#include "xsimd_avx512cd_register.hpp" +#include "xsimd_avx512dq_register.hpp" +#include "xsimd_avx512f_register.hpp" + +#include "xsimd_neon64_register.hpp" +#include "xsimd_neon_register.hpp" + +#include "xsimd_sve_register.hpp" + +#include "xsimd_rvv_register.hpp" + +#include "xsimd_wasm_register.hpp" diff --git a/third_party/xsimd/include/xsimd/types/xsimd_api.hpp b/third_party/xsimd/include/xsimd/types/xsimd_api.hpp new file mode 100644 index 0000000000..0420f0a09d --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_api.hpp @@ -0,0 +1,2599 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_API_HPP +#define XSIMD_API_HPP + +#include <complex> +#include <cstddef> +#include <limits> +#include <ostream> + +#include "../arch/xsimd_isa.hpp" +#include "../types/xsimd_batch.hpp" +#include "../types/xsimd_traits.hpp" + +namespace xsimd +{ + /** + * high level free functions + * + * @defgroup batch_arithmetic Arithmetic operators + * @defgroup batch_constant Constant batches + * @defgroup batch_data_transfer Memory operators + * @defgroup batch_math Basic math operators + * @defgroup batch_math_extra Extra math operators + * @defgroup batch_fp Floating point manipulation + * @defgroup batch_rounding Rounding operators + * @defgroup batch_conversion Conversion operators + * @defgroup batch_complex_op Complex operators + * @defgroup batch_logical Logical operators + * @defgroup batch_bitwise Bitwise operators + * @defgroup batch_reducers Reducers + * @defgroup batch_miscellaneous Miscellaneous + * @defgroup batch_trigo Trigonometry + * + * @defgroup batch_bool_logical Boolean logical operators + * @defgroup batch_bool_reducers Boolean reducers + */ + + /** + * @ingroup batch_math + * + * Computes the absolute values of each scalar in the batch \c x. + * @param x batch of integer or floating point values. + * @return the absolute values of \c x. + */ + template <class T, class A> + inline batch<T, A> abs(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::abs<A>(x, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the absolute values of each complex in the batch \c z. + * @param z batch of complex values. + * @return the absolute values of \c z. + */ + template <class T, class A> + inline batch<T, A> abs(batch<std::complex<T>, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::abs<A>(z, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the sum of the batches \c x and \c y. + * @param x batch or scalar involved in the addition. + * @param y batch or scalar involved in the addition. + * @return the sum of \c x and \c y + */ + template <class T, class A> + inline auto add(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x + y) + { + detail::static_check_supported_config<T, A>(); + return x + y; + } + + /** + * @ingroup batch_trigo + * + * Computes the arc cosine of the batch \c x. + * @param x batch of floating point values. + * @return the arc cosine of \c x. + */ + template <class T, class A> + inline batch<T, A> acos(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::acos<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the inverse hyperbolic cosine of the batch \c x. + * @param x batch of floating point values. + * @return the inverse hyperbolic cosine of \c x. + */ + template <class T, class A> + inline batch<T, A> acosh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::acosh<A>(x, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the argument of the batch \c z. + * @param z batch of complex or real values. + * @return the argument of \c z. + */ + template <class T, class A> + inline real_batch_type_t<batch<T, A>> arg(batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::arg<A>(z, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the arc sine of the batch \c x. + * @param x batch of floating point values. + * @return the arc sine of \c x. + */ + template <class T, class A> + inline batch<T, A> asin(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::asin<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the inverse hyperbolic sine of the batch \c x. + * @param x batch of floating point values. + * @return the inverse hyperbolic sine of \c x. + */ + template <class T, class A> + inline batch<T, A> asinh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::asinh<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the arc tangent of the batch \c x. + * @param x batch of floating point values. + * @return the arc tangent of \c x. + */ + template <class T, class A> + inline batch<T, A> atan(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::atan<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the arc tangent of the batch \c x/y, using the signs of the + * arguments to determine the correct quadrant. + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return the arc tangent of \c x/y. + */ + template <class T, class A> + inline batch<T, A> atan2(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::atan2<A>(x, y, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the inverse hyperbolic tangent of the batch \c x. + * @param x batch of floating point values. + * @return the inverse hyperbolic tangent of \c x. + */ + template <class T, class A> + inline batch<T, A> atanh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::atanh<A>(x, A {}); + } + + /** + * @ingroup batch_conversion + * + * Perform a static_cast from \c T_in to \c T_out on \c \c x. + * @param x batch_bool of \c T_in + * @return \c x cast to \c T_out + */ + template <class T_out, class T_in, class A> + inline batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& x) noexcept + { + detail::static_check_supported_config<T_out, A>(); + detail::static_check_supported_config<T_in, A>(); + static_assert(batch_bool<T_out, A>::size == batch_bool<T_in, A>::size, "Casting between incompatibles batch_bool types."); + return kernel::batch_bool_cast<A>(x, batch_bool<T_out, A> {}, A {}); + } + + /** + * @ingroup batch_conversion + * + * Perform a static_cast from \c T_in to \c T_out on \c \c x. + * @param x batch of \c T_in + * @return \c x cast to \c T_out + */ + template <class T_out, class T_in, class A> + inline batch<T_out, A> batch_cast(batch<T_in, A> const& x) noexcept + { + detail::static_check_supported_config<T_out, A>(); + detail::static_check_supported_config<T_in, A>(); + return kernel::batch_cast<A>(x, batch<T_out, A> {}, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Computes the bit of sign of \c x + * @param x batch of scalar + * @return bit of sign of \c x + */ + template <class T, class A> + inline batch<T, A> bitofsign(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitofsign<A>(x, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise and of the batches \c x and \c y. + * @param x batch involved in the operation. + * @param y batch involved in the operation. + * @return the result of the bitwise and. + */ + template <class T, class A> + inline auto bitwise_and(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x & y) + { + detail::static_check_supported_config<T, A>(); + return x & y; + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise and of the batches \c x and \c y. + * @param x batch involved in the operation. + * @param y batch involved in the operation. + * @return the result of the bitwise and. + */ + template <class T, class A> + inline auto bitwise_and(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x & y) + { + detail::static_check_supported_config<T, A>(); + return x & y; + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise and not of batches \c x and \c y. + * @param x batch involved in the operation. + * @param y batch involved in the operation. + * @return the result of the bitwise and not. + */ + template <class T, class A> + inline batch<T, A> bitwise_andnot(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_andnot<A>(x, y, A {}); + } + + /** + * @ingroup batch_bool_logical + * + * Computes the bitwise and not of batches \c x and \c y. + * @param x batch involved in the operation. + * @param y batch involved in the operation. + * @return the result of the bitwise and not. + */ + template <class T, class A> + inline batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_andnot<A>(x, y, A {}); + } + + /** + * @ingroup batch_conversion + * + * Perform a reinterpret_cast from \c T_in to \c T_out on \c x. + * @param x batch of \c T_in + * @return \c x reinterpreted as \c T_out + */ + template <class T_out, class T_in, class A> + inline batch<T_out, A> bitwise_cast(batch<T_in, A> const& x) noexcept + { + detail::static_check_supported_config<T_in, A>(); + detail::static_check_supported_config<T_out, A>(); + return kernel::bitwise_cast<A>(x, batch<T_out, A> {}, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Perform a bitwise shift to the left + * @param x batch of \c T_in + * @param shift scalar amount to shift + * @return shifted \c x. + */ + template <class T, class A> + inline batch<T, A> bitwise_lshift(batch<T, A> const& x, int shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_lshift<A>(x, shift, A {}); + } + template <class T, class A> + inline batch<T, A> bitwise_lshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_lshift<A>(x, shift, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise not of batch \c x. + * @param x batch involved in the operation. + * @return the result of the bitwise not. + */ + template <class T, class A> + inline batch<T, A> bitwise_not(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_not<A>(x, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise not of batch \c x. + * @param x batch involved in the operation. + * @return the result of the bitwise not. + */ + template <class T, class A> + inline batch_bool<T, A> bitwise_not(batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_not<A>(x, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise or of the batches \c x and \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the bitwise or. + */ + template <class T, class A> + inline auto bitwise_or(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x | y) + { + detail::static_check_supported_config<T, A>(); + return x | y; + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise or of the batches \c x and \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the bitwise or. + */ + template <class T, class A> + inline auto bitwise_or(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x | y) + { + detail::static_check_supported_config<T, A>(); + return x | y; + } + + /** + * @ingroup batch_bitwise + * + * Perform a bitwise shift to the right + * @param x batch of \c T_in + * @param shift scalar amount to shift + * @return shifted \c x. + */ + template <class T, class A> + inline batch<T, A> bitwise_rshift(batch<T, A> const& x, int shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_rshift<A>(x, shift, A {}); + } + template <class T, class A> + inline batch<T, A> bitwise_rshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_rshift<A>(x, shift, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise xor of the batches \c x and \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the bitwise xor. + */ + template <class T, class A> + inline auto bitwise_xor(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x ^ y) + { + detail::static_check_supported_config<T, A>(); + return x ^ y; + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise xor of the batches \c x and \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the bitwise xor. + */ + template <class T, class A> + inline auto bitwise_xor(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x ^ y) + { + detail::static_check_supported_config<T, A>(); + return x ^ y; + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the single value \c v. + * @param v the value used to initialize the batch + * @return a new batch instance + */ + template <class T, class A = default_arch> + inline batch<T, A> broadcast(T v) noexcept + { + detail::static_check_supported_config<T, A>(); + return batch<T, A>::broadcast(v); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the single value \c v and + * the specified batch value type \c To. + * @param v the value used to initialize the batch + * @return a new batch instance + */ + template <class To, class A = default_arch, class From> + inline simd_return_type<From, To, A> broadcast_as(From v) noexcept + { + detail::static_check_supported_config<From, A>(); + using batch_value_type = typename simd_return_type<From, To, A>::value_type; + using value_type = typename std::conditional<std::is_same<From, bool>::value, + bool, + batch_value_type>::type; + return simd_return_type<From, To, A>(value_type(v)); + } + + /** + * @ingroup batch_math + * + * Computes the cubic root of the batch \c x. + * @param x batch of floating point values. + * @return the cubic root of \c x. + */ + template <class T, class A> + inline batch<T, A> cbrt(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::cbrt<A>(x, A {}); + } + + /** + * @ingroup batch_rounding + * + * Computes the batch of smallest integer values not less than + * scalars in \c x. + * @param x batch of floating point values. + * @return the batch of smallest integer values not less than \c x. + */ + template <class T, class A> + inline batch<T, A> ceil(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::ceil<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Clips the values of the batch \c x between those of the batches \c lo and \c hi. + * @param x batch of scalar values. + * @param lo batch of scalar values. + * @param hi batch of scalar values. + * @return the result of the clipping. + */ + template <class T, class A> + inline batch<T, A> clip(batch<T, A> const& x, batch<T, A> const& lo, batch<T, A> const& hi) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::clip(x, lo, hi, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Pick elements from \c x selected by \c mask, and append them to the + * resulting vector, zeroing the remaining slots + */ + template <class T, class A> + inline batch<T, A> compress(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::compress<A>(x, mask, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the conjugate of the batch \c z. + * @param z batch of complex values. + * @return the argument of \c z. + */ + template <class A, class T> + inline complex_batch_type_t<batch<T, A>> conj(batch<T, A> const& z) noexcept + { + return kernel::conj(z, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Computes a value whose absolute value matches + * that of \c x, but whose sign bit matches that of \c y. + * @param x batch of scalars + * @param y batch of scalars + * @return batch whose absolute value matches that of \c x, but whose sign bit + * matches that of \c y. + */ + template <class T, class A> + inline batch<T, A> copysign(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::copysign<A>(x, y, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the cosine of the batch \c x. + * @param x batch of floating point values. + * @return the cosine of \c x. + */ + template <class T, class A> + inline batch<T, A> cos(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::cos<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * computes the hyperbolic cosine of the batch \c x. + * @param x batch of floating point values. + * @return the hyperbolic cosine of \c x. + */ + template <class T, class A> + inline batch<T, A> cosh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::cosh<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Subtract 1 to batch \c x. + * @param x batch involved in the decrement. + * @return the subtraction of \c x and 1. + */ + template <class T, class A> + inline batch<T, A> decr(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::decr<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Subtract 1 to batch \c x for each element where \c mask is true. + * @param x batch involved in the increment. + * @param mask whether to perform the increment or not. Can be a \c + * batch_bool or a \c batch_bool_constant. + * @return the subtraction of \c x and 1 when \c mask is true. + */ + template <class T, class A, class Mask> + inline batch<T, A> decr_if(batch<T, A> const& x, Mask const& mask) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::decr_if<A>(x, mask, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the division of the batch \c x by the batch \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the division. + */ + template <class T, class A> + inline auto div(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x / y) + { + detail::static_check_supported_config<T, A>(); + return x / y; + } + + /** + * @ingroup batch_logical + * + * Element-wise equality comparison of batches \c x and \c y. + * @param x batch of scalars + * @param y batch of scalars + * @return a boolean batch. + */ + template <class T, class A> + inline auto eq(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x == y) + { + detail::static_check_supported_config<T, A>(); + return x == y; + } + + /** + * @ingroup batch_logical + * + * Element-wise equality comparison of batches of boolean values \c x and \c y. + * @param x batch of booleans involved in the comparison. + * @param y batch of booleans involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline auto eq(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x == y) + { + detail::static_check_supported_config<T, A>(); + return x == y; + } + + /** + * @ingroup batch_math + * + * Computes the natural exponential of the batch \c x. + * @param x batch of floating point values. + * @return the natural exponential of \c x. + */ + template <class T, class A> + inline batch<T, A> exp(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::exp<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the base 10 exponential of the batch \c x. + * @param x batch of floating point values. + * @return the base 10 exponential of \c x. + */ + template <class T, class A> + inline batch<T, A> exp10(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::exp10<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the base 2 exponential of the batch \c x. + * @param x batch of floating point values. + * @return the base 2 exponential of \c x. + */ + template <class T, class A> + inline batch<T, A> exp2(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::exp2<A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Load contiguous elements from \c x and place them in slots selected by \c + * mask, zeroing the other slots + */ + template <class T, class A> + inline batch<T, A> expand(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::expand<A>(x, mask, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the natural exponential of the batch \c x, minus one. + * @param x batch of floating point values. + * @return the natural exponential of \c x, minus one. + */ + template <class T, class A> + inline batch<T, A> expm1(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::expm1<A>(x, A {}); + } + + /** + * @ingroup batch_math_extra + * + * Computes the error function of the batch \c x. + * @param x batch of floating point values. + * @return the error function of \c x. + */ + template <class T, class A> + inline batch<T, A> erf(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::erf<A>(x, A {}); + } + + /** + * @ingroup batch_math_extra + * + * Computes the complementary error function of the batch \c x. + * @param x batch of floating point values. + * @return the error function of \c x. + */ + template <class T, class A> + inline batch<T, A> erfc(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::erfc<A>(x, A {}); + } + + /** + * Extract vector from pair of vectors + * extracts the lowest vector elements from the second source \c x + * and the highest vector elements from the first source \c y + * Concatenates the results into th Return value. + * @param x batch of integer or floating point values. + * @param y batch of integer or floating point values. + * @param i integer specifying the lowest vector element to extract from the first source register + * @return. + */ + template <class T, class A> + inline batch<T, A> extract_pair(batch<T, A> const& x, batch<T, A> const& y, std::size_t i) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::extract_pair<A>(x, y, i, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the absolute values of each scalar in the batch \c x. + * @param x batch floating point values. + * @return the absolute values of \c x. + */ + template <class T, class A> + inline batch<T, A> fabs(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::abs<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the positive difference between \c x and \c y, that is, + * <tt>max(0, x-y)</tt>. + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return the positive difference. + */ + template <class T, class A> + inline batch<T, A> fdim(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fdim<A>(x, y, A {}); + } + + /** + * @ingroup batch_rounding + * + * Computes the batch of largest integer values not greater than + * scalars in \c x. + * @param x batch of floating point values. + * @return the batch of largest integer values not greater than \c x. + */ + template <class T, class A> + inline batch<T, A> floor(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::floor<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes <tt>(x*y) + z</tt> in a single instruction when possible. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @param z a batch of integer or floating point values. + * @return the result of the fused multiply-add operation. + */ + template <class T, class A> + inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fma<A>(x, y, z, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the larger values of the batches \c x and \c y. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return a batch of the larger values. + */ + template <class T, class A> + inline batch<T, A> fmax(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::max<A>(x, y, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the smaller values of the batches \c x and \c y. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return a batch of the smaller values. + */ + template <class T, class A> + inline batch<T, A> fmin(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::min<A>(x, y, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the modulo of the batch \c x by the batch \c y. + * @param x batch involved in the modulo. + * @param y batch involved in the modulo. + * @return the result of the modulo. + */ + template <class T, class A> + inline batch<T, A> fmod(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fmod<A>(x, y, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes <tt>(x*y) - z</tt> in a single instruction when possible. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @param z a batch of integer or floating point values. + * @return the result of the fused multiply-sub operation. + */ + template <class T, class A> + inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fms<A>(x, y, z, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes <tt>-(x*y) + z</tt> in a single instruction when possible. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @param z a batch of integer or floating point values. + * @return the result of the fused negated multiply-add operation. + */ + template <class T, class A> + inline batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fnma<A>(x, y, z, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes <tt>-(x*y) - z</tt> in a single instruction when possible. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @param z a batch of integer or floating point values. + * @return the result of the fused negated multiply-sub operation. + */ + template <class T, class A> + inline batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fnms<A>(x, y, z, A {}); + } + + /** + * @ingroup batch_fp + * + * Split split the number x into a normalized fraction and an exponent which is stored in exp + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return the normalized fraction of x + */ + template <class T, class A> + inline batch<T, A> frexp(const batch<T, A>& x, batch<as_integer_t<T>, A>& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::frexp<A>(x, y, A {}); + } + + /** + * @ingroup batch_logical + * + * Element-wise greater or equal comparison of batches \c x and \c y. + * @tparam X the actual type of batch. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline batch_bool<T, A> ge(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return x >= y; + } + + /** + * @ingroup batch_logical + * + * Element-wise greater than comparison of batches \c x and \c y. + * @tparam X the actual type of batch. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline batch_bool<T, A> gt(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return x > y; + } + + /** + * @ingroup batch_reducers + * + * Parallel horizontal addition: adds the scalars of each batch + * in the array pointed by \c row and store them in a returned + * batch. + * @param row an array of \c N batches + * @return the result of the reduction. + */ + template <class T, class A> + inline batch<T, A> haddp(batch<T, A> const* row) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::haddp<A>(row, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the square root of the sum of the squares of the batches + * \c x, and \c y. + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return the square root of the sum of the squares of \c x and \c y. + */ + template <class T, class A> + inline batch<T, A> hypot(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::hypot<A>(x, y, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the imaginary part of the batch \c x. + * @param x batch of complex or real values. + * @return the argument of \c x. + */ + template <class T, class A> + inline real_batch_type_t<batch<T, A>> imag(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::imag<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Add 1 to batch \c x. + * @param x batch involved in the increment. + * @return the sum of \c x and 1. + */ + template <class T, class A> + inline batch<T, A> incr(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::incr<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Add 1 to batch \c x for each element where \c mask is true. + * @param x batch involved in the increment. + * @param mask whether to perform the increment or not. Can be a \c + * batch_bool or a \c batch_bool_constant. + * @return the sum of \c x and 1 when \c mask is true. + */ + template <class T, class A, class Mask> + inline batch<T, A> incr_if(batch<T, A> const& x, Mask const& mask) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::incr_if<A>(x, mask, A {}); + } + + /** + * @ingroup batch_constant + * + * Return a batch of scalars representing positive infinity + * @return a batch of positive infinity + */ + template <class B> + inline B infinity() + { + using T = typename B::value_type; + using A = typename B::arch_type; + detail::static_check_supported_config<T, A>(); + return B(std::numeric_limits<T>::infinity()); + } + + /** + * @ingroup batch_data_transfer + * + * Create a new batch equivalent to \c x but with element \c val set at position \c pos + * @param x batch + * @param val value to set + * @param pos index of the updated slot + * @return copy of \c x with position \c pos set to \c val + */ + template <class T, class A, size_t I> + inline batch<T, A> insert(batch<T, A> const& x, T val, index<I> pos) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::insert<A>(x, val, pos, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x represent an even integer value + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline batch_bool<T, A> is_even(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::is_even<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the floating-point scalars in the given batch \c x represent integer value + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline batch_bool<T, A> is_flint(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::is_flint<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x represent an odd integer value + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline batch_bool<T, A> is_odd(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::is_odd<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x are inf values. + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline typename batch<T, A>::batch_bool_type isinf(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::isinf<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x are finite values. + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline typename batch<T, A>::batch_bool_type isfinite(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::isfinite<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x are NaN values. + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::isnan<A>(x, A {}); + } + + /** + * @ingroup batch_math_extra + * + * Computes the multiplication of the floating point number \c x by 2 raised to the power \c y. + * @param x batch of floating point values. + * @param y batch of integer values. + * @return a batch of floating point values. + */ + template <class T, class A> + inline batch<T, A> ldexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::ldexp<A>(x, y, A {}); + } + + /** + * @ingroup batch_logical + * + * Element-wise lesser or equal to comparison of batches \c x and \c y. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline batch_bool<T, A> le(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return x <= y; + } + + /** + * @ingroup batch_math_extra + * + * Computes the natural logarithm of the gamma function of the batch \c x. + * @param x batch of floating point values. + * @return the natural logarithm of the gamma function of \c x. + */ + template <class T, class A> + inline batch<T, A> lgamma(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::lgamma<A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr and the specifed + * batch value type \c To. The memory needs to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class To, class A = default_arch, class From> + inline simd_return_type<From, To, A> load_as(From const* ptr, aligned_mode) noexcept + { + using batch_value_type = typename simd_return_type<From, To, A>::value_type; + detail::static_check_supported_config<From, A>(); + detail::static_check_supported_config<To, A>(); + return kernel::load_aligned<A>(ptr, kernel::convert<batch_value_type> {}, A {}); + } + + template <class To, class A = default_arch> + inline simd_return_type<bool, To, A> load_as(bool const* ptr, aligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + return simd_return_type<bool, To, A>::load_aligned(ptr); + } + + template <class To, class A = default_arch, class From> + inline simd_return_type<std::complex<From>, To, A> load_as(std::complex<From> const* ptr, aligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + using batch_value_type = typename simd_return_type<std::complex<From>, To, A>::value_type; + return kernel::load_complex_aligned<A>(ptr, kernel::convert<batch_value_type> {}, A {}); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class To, class A = default_arch, class From, bool i3ec> + inline simd_return_type<xtl::xcomplex<From, From, i3ec>, To, A> load_as(xtl::xcomplex<From, From, i3ec> const* ptr, aligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + detail::static_check_supported_config<From, A>(); + return load_as<To>(reinterpret_cast<std::complex<From> const*>(ptr), aligned_mode()); + } +#endif + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr and the specifed + * batch value type \c To. The memory does not need to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class To, class A = default_arch, class From> + inline simd_return_type<From, To, A> load_as(From const* ptr, unaligned_mode) noexcept + { + using batch_value_type = typename simd_return_type<From, To, A>::value_type; + detail::static_check_supported_config<To, A>(); + detail::static_check_supported_config<From, A>(); + return kernel::load_unaligned<A>(ptr, kernel::convert<batch_value_type> {}, A {}); + } + + template <class To, class A = default_arch> + inline simd_return_type<bool, To, A> load_as(bool const* ptr, unaligned_mode) noexcept + { + return simd_return_type<bool, To, A>::load_unaligned(ptr); + } + + template <class To, class A = default_arch, class From> + inline simd_return_type<std::complex<From>, To, A> load_as(std::complex<From> const* ptr, unaligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + detail::static_check_supported_config<From, A>(); + using batch_value_type = typename simd_return_type<std::complex<From>, To, A>::value_type; + return kernel::load_complex_unaligned<A>(ptr, kernel::convert<batch_value_type> {}, A {}); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class To, class A = default_arch, class From, bool i3ec> + inline simd_return_type<xtl::xcomplex<From, From, i3ec>, To, A> load_as(xtl::xcomplex<From, From, i3ec> const* ptr, unaligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + detail::static_check_supported_config<From, A>(); + return load_as<To>(reinterpret_cast<std::complex<From> const*>(ptr), unaligned_mode()); + } +#endif + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr. The + * memory needs to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class A = default_arch, class From> + inline batch<From, A> load(From const* ptr, aligned_mode = {}) noexcept + { + detail::static_check_supported_config<From, A>(); + return load_as<From, A>(ptr, aligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr. The + * memory does not need to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class A = default_arch, class From> + inline batch<From, A> load(From const* ptr, unaligned_mode) noexcept + { + detail::static_check_supported_config<From, A>(); + return load_as<From, A>(ptr, unaligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr. The + * memory needs to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class A = default_arch, class From> + inline batch<From, A> load_aligned(From const* ptr) noexcept + { + detail::static_check_supported_config<From, A>(); + return load_as<From, A>(ptr, aligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr. The + * memory does not need to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class A = default_arch, class From> + inline batch<From, A> load_unaligned(From const* ptr) noexcept + { + detail::static_check_supported_config<From, A>(); + return load_as<From, A>(ptr, unaligned_mode {}); + } + + /** + * @ingroup batch_math + * + * Computes the natural logarithm of the batch \c x. + * @param x batch of floating point values. + * @return the natural logarithm of \c x. + */ + template <class T, class A> + inline batch<T, A> log(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::log<A>(x, A {}); + } + + /** + * @ingroup batch_math + * Computes the base 2 logarithm of the batch \c x. + * @param x batch of floating point values. + * @return the base 2 logarithm of \c x. + */ + template <class T, class A> + inline batch<T, A> log2(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::log2<A>(x, A {}); + } + + /** + * @ingroup batch_math + * Computes the base 10 logarithm of the batch \c x. + * @param x batch of floating point values. + * @return the base 10 logarithm of \c x. + */ + template <class T, class A> + inline batch<T, A> log10(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::log10<A>(x, A {}); + } + + /** + * @ingroup batch_math + * Computes the natural logarithm of one plus the batch \c x. + * @param x batch of floating point values. + * @return the natural logarithm of one plus \c x. + */ + template <class T, class A> + inline batch<T, A> log1p(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::log1p<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Element-wise lesser than comparison of batches \c x and \c y. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline batch_bool<T, A> lt(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return x < y; + } + + /** + * @ingroup batch_math + * + * Computes the larger values of the batches \c x and \c y. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return a batch of the larger values. + */ + template <class T, class A> + inline batch<T, A> max(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::max<A>(x, y, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the smaller values of the batches \c x and \c y. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return a batch of the smaller values. + */ + template <class T, class A> + inline batch<T, A> min(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::min<A>(x, y, A {}); + } + + /** + * @ingroup batch_constant + * + * Return a batch of scalars representing positive infinity + * @return a batch of positive infinity + */ + template <class B> + inline B minusinfinity() noexcept + { + using T = typename B::value_type; + using A = typename B::arch_type; + detail::static_check_supported_config<T, A>(); + return B(-std::numeric_limits<T>::infinity()); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the integer modulo of the batch \c x by the batch \c y. + * @param x batch involved in the modulo. + * @param y batch involved in the modulo. + * @return the result of the modulo. + */ + template <class T, class A> + inline auto mod(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x % y) + { + detail::static_check_supported_config<T, A>(); + return x % y; + } + + /** + * @ingroup batch_arithmetic + * + * Computes the product of the batches \c x and \c y. + * @tparam X the actual type of batch. + * @param x batch involved in the product. + * @param y batch involved in the product. + * @return the result of the product. + */ + template <class T, class A> + inline auto mul(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x * y) + { + detail::static_check_supported_config<T, A>(); + return x * y; + } + + /** + * @ingroup batch_rounding + * + * Rounds the scalars in \c x to integer values (in floating point format), using + * the current rounding mode. + * @param x batch of floating point values. + * @return the batch of nearest integer values. + */ + template <class T, class A> + inline batch<T, A> nearbyint(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::nearbyint<A>(x, A {}); + } + + /** + * @ingroup batch_rounding + * + * Rounds the scalars in \c x to integer values (in integer format) using + * the current rounding mode. + * @param x batch of floating point values. + * @return the batch of nearest integer values. + * + * @warning For very large values the conversion to int silently overflows. + */ + template <class T, class A> + inline batch<as_integer_t<T>, A> + nearbyint_as_int(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::nearbyint_as_int(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Element-wise inequality comparison of batches \c x and \c y. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline auto neq(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x != y) + { + detail::static_check_supported_config<T, A>(); + return x != y; + } + + /** + * @ingroup batch_logical + * + * Element-wise inequality comparison of batches of boolean values \c x and \c y. + * @param x batch of booleans involved in the comparison. + * @param y batch of booleans involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline auto neq(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x != y) + { + detail::static_check_supported_config<T, A>(); + return x != y; + } + + /** + * @ingroup batch_arithmetic + * + * Computes the opposite of the batch \c x. + * @param x batch involved in the operation. + * @return the opposite of \c x. + */ + template <class T, class A> + inline batch<T, A> neg(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return -x; + } + + /** + * @ingroup batch_math_extra + * + * Computes the next representable floating-point + * value following x in the direction of y + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return \c x raised to the power \c y. + */ + template <class T, class A> + inline batch<T, A> nextafter(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::nextafter<A>(x, y, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the norm of the batch \c x. + * @param x batch of complex or real values. + * @return the norm of \c x. + */ + template <class T, class A> + inline real_batch_type_t<batch<T, A>> norm(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::norm(x, A {}); + } + + /** + * @ingroup batch_math + * + * Returns a complex batch with magnitude \c r and phase angle \c theta. + * @param r The magnitude of the desired complex result. + * @param theta The phase angle of the desired complex result. + * @return \c r exp(i * \c theta). + */ + template <class T, class A> + inline complex_batch_type_t<batch<T, A>> polar(batch<T, A> const& r, batch<T, A> const& theta = batch<T, A> {}) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::polar<A>(r, theta, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * No-op on \c x. + * @param x batch involved in the operation. + * @return \c x. + */ + template <class T, class A> + inline batch<T, A> pos(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return +x; + } + + /** + * @ingroup batch_math + * + * Computes the value of the batch \c x raised to the power + * \c y. + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return \c x raised to the power \c y. + */ + template <class T, class A> + inline batch<T, A> pow(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::pow<A>(x, y, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the value of the batch \c x raised to the power + * \c y. + * @param x batch of integral values. + * @param y batch of integral values. + * @return \c x raised to the power \c y. + */ + template <class T, class ITy, class A, class = typename std::enable_if<std::is_integral<ITy>::value, void>::type> + inline batch<T, A> pow(batch<T, A> const& x, ITy y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::ipow<A>(x, y, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the projection of the batch \c z. + * @param z batch of complex or real values. + * @return the projection of \c z. + */ + template <class T, class A> + inline complex_batch_type_t<batch<T, A>> proj(batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::proj(z, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the real part of the batch \c z. + * @param z batch of complex or real values. + * @return the argument of \c z. + */ + template <class T, class A> + inline real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::real<A>(z, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the approximate reciprocal of the batch \c x. + * The maximum relative error for this approximation is + * less than 1.5*2^-12. + * @param x batch of floating point numbers. + * @return the reciprocal. + */ + template <class T, class A, class = typename std::enable_if<std::is_floating_point<T>::value, void>::type> + inline batch<T, A> reciprocal(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::reciprocal(x, A {}); + } + + /** + * @ingroup batch_reducers + * + * Generic reducer using only batch operations + * @param f reducing function, accepting `batch ()(batch, batch)` + * @param x batch involved in the reduction + * @return the result of the reduction, as a scalar. + */ + template <class T, class A, class F> + inline T reduce(F&& f, batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>()); + } + + /** + * @ingroup batch_reducers + * + * Adds all the scalars of the batch \c x. + * @param x batch involved in the reduction + * @return the result of the reduction. + */ + template <class T, class A> + inline T reduce_add(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::reduce_add<A>(x, A {}); + } + + /** + * @ingroup batch_reducers + * + * Max of all the scalars of the batch \c x. + * @param x batch involved in the reduction + * @return the result of the reduction. + */ + template <class T, class A> + inline T reduce_max(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::reduce_max<A>(x, A {}); + } + + /** + * @ingroup batch_reducers + * + * Min of all the scalars of the batch \c x. + * @param x batch involved in the reduction + * @return the result of the reduction. + */ + template <class T, class A> + inline T reduce_min(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::reduce_min<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the remainder of dividing \c x by \c y + * @param x batch of scalar values + * @param y batch of scalar values + * @return the result of the addition. + */ + template <class T, class A> + inline batch<T, A> remainder(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::remainder<A>(x, y, A {}); + } + + /** + * @ingroup batch_rounding + * + * Rounds the scalars in \c x to integer values (in floating point format), using + * the current rounding mode. + * @param x batch of floating point values. + * @return the batch of rounded values. + */ + template <class T, class A> + inline batch<T, A> rint(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return nearbyint(x); + } + + /** + * @ingroup rotate_left + * + * Slide the whole batch to the left by \c n bytes, and reintroduce the + * slided out elements from the right. This is different from + * \c rol that rotates each batch element to the left. + * + * @tparam N Amount of bytes to rotated to the left. + * @param x batch of integer values. + * @return rotated batch. + */ + template <size_t N, class T, class A> + inline batch<T, A> rotate_left(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotate_left<N, A>(x, A {}); + } + + /** + * @ingroup rotate_right + * + * Slide the whole batch to the right by \c n bytes, and reintroduce the + * slided out elements from the left. This is different from + * \c rol that rotates each batch element to the left. + * + * @tparam N Amount of bytes to rotate to the right. + * @param x batch of integer values. + * @return rotated batch. + */ + template <size_t N, class T, class A> + inline batch<T, A> rotate_right(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotate_right<N, A>(x, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Perform a bitwise shift to the left, reintroducing the shifted out bits + * to the right + * @param x batch to rotate + * @param shift scalar amount to shift + * @return rotated \c x. + */ + template <class T, class A> + inline batch<T, A> rotl(batch<T, A> const& x, int shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotl<A>(x, shift, A {}); + } + template <class T, class A> + inline batch<T, A> rotl(batch<T, A> const& x, batch<T, A> const& shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotl<A>(x, shift, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Perform a bitwise shift to the right, reintroducing the shifted out bits + * to the left. + * @param x batch to rotate + * @param shift scalar amount to shift + * @return rotated \c x. + */ + template <class T, class A> + inline batch<T, A> rotr(batch<T, A> const& x, int shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotr<A>(x, shift, A {}); + } + template <class T, class A> + inline batch<T, A> rotr(batch<T, A> const& x, batch<T, A> const& shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotr<A>(x, shift, A {}); + } + + /** + * @ingroup batch_rounding + * + * Computes the batch of nearest integer values to scalars in \c x (in + * floating point format), rounding halfway cases away from zero, regardless + * of the current rounding mode. + * @param x batch of flaoting point values. + * @return the batch of nearest integer values. + */ + template <class T, class A> + inline batch<T, A> round(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::round<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes an estimate of the inverse square root of the batch \c x. + * + * @warning Unlike most xsimd function, this does not return the same result as the + * equivalent scalar operation, trading accuracy for speed. + * + * @param x batch of floating point values. + * @return the inverse square root of \c x. + */ + template <class T, class A> + inline batch<T, A> rsqrt(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rsqrt<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the saturate sum of the batch \c x and the batch \c y. + + * @tparam X the actual type of batch. + * @param x batch involved in the saturated addition. + * @param y batch involved in the saturated addition. + * @return the result of the saturated addition. + */ + template <class T, class A> + inline batch<T, A> sadd(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sadd<A>(x, y, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Ternary operator for batches: selects values from the batches \c true_br or \c false_br + * depending on the boolean values in the constant batch \c cond. Equivalent to + * \code{.cpp} + * for(std::size_t i = 0; i < N; ++i) + * res[i] = cond[i] ? true_br[i] : false_br[i]; + * \endcode + * @param cond batch condition. + * @param true_br batch values for truthy condition. + * @param false_br batch value for falsy condition. + * @return the result of the selection. + */ + template <class T, class A> + inline batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::select<A>(cond, true_br, false_br, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Ternary operator for batches: selects values from the batches \c true_br or \c false_br + * depending on the boolean values in the constant batch \c cond. Equivalent to + * \code{.cpp} + * for(std::size_t i = 0; i < N; ++i) + * res[i] = cond[i] ? true_br[i] : false_br[i]; + * \endcode + * @param cond batch condition. + * @param true_br batch values for truthy condition. + * @param false_br batch value for falsy condition. + * @return the result of the selection. + */ + template <class T, class A> + inline batch<std::complex<T>, A> select(batch_bool<T, A> const& cond, batch<std::complex<T>, A> const& true_br, batch<std::complex<T>, A> const& false_br) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::select<A>(cond, true_br, false_br, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Ternary operator for batches: selects values from the batches \c true_br or \c false_br + * depending on the boolean values in the constant batch \c cond. Equivalent to + * \code{.cpp} + * for(std::size_t i = 0; i < N; ++i) + * res[i] = cond[i] ? true_br[i] : false_br[i]; + * \endcode + * @param cond constant batch condition. + * @param true_br batch values for truthy condition. + * @param false_br batch value for falsy condition. + * @return the result of the selection. + */ + template <class T, class A, bool... Values> + inline batch<T, A> select(batch_bool_constant<batch<T, A>, Values...> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::select<A>(cond, true_br, false_br, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Combine elements from \c x and \c y according to selector \c mask + * @param x batch + * @param y batch + * @param mask constant batch mask of integer elements of the same size as + * element of \c x and \c y. Each element of the mask index the vector that + * would be formed by the concatenation of \c x and \c y. For instance + * \code{.cpp} + * batch_constant<batch<uint32_t, sse2>, 0, 4, 3, 7> + * \endcode + * Picks \c x[0], \c y[0], \c x[3], \c y[3] + * + * @return combined batch + */ + template <class T, class A, class Vt, Vt... Values> + inline typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type + shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<Vt, A>, Values...> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::shuffle<A>(x, y, mask, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Computes the sign of \c x + * @param x batch + * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element + */ + template <class T, class A> + inline batch<T, A> sign(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sign<A>(x, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Computes the sign of \c x, assuming x doesn't have any zero + * @param x batch + * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element + */ + template <class T, class A> + inline batch<T, A> signnz(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::signnz<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the sine of the batch \c x. + * @param x batch of floating point values. + * @return the sine of \c x. + */ + template <class T, class A> + inline batch<T, A> sin(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sin<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the sine and the cosine of the batch \c x. This method is faster + * than calling sine and cosine independently. + * @param x batch of floating point values. + * @return a pair containing the sine then the cosine of batch \c x + */ + template <class T, class A> + inline std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sincos<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the hyperbolic sine of the batch \c x. + * @param x batch of floating point values. + * @return the hyperbolic sine of \c x. + */ + template <class T, class A> + inline batch<T, A> sinh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sinh<A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Slide the whole batch to the left by \c n bytes. This is different from + * \c bitwise_lshift that shifts each batch element to the left. + * + * @tparam N Amount of bytes to slide to the left. + * @param x batch of integer values. + * @return slided batch. + */ + template <size_t N, class T, class A> + inline batch<T, A> slide_left(batch<T, A> const& x) noexcept + { + static_assert(std::is_integral<T>::value, "can only slide batch of integers"); + detail::static_check_supported_config<T, A>(); + return kernel::slide_left<N, A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Slide the whole batch to the right by \c N bytes. This is different from + * \c bitwise_rshift that shifts each batch element to the right. + * + * @tparam N Amount of bytes to slide to the right. + * @param x batch of integer values. + * @return slided batch. + */ + template <size_t N, class T, class A> + inline batch<T, A> slide_right(batch<T, A> const& x) noexcept + { + static_assert(std::is_integral<T>::value, "can only slide batch of integers"); + detail::static_check_supported_config<T, A>(); + return kernel::slide_right<N, A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the square root of the batch \c x. + * @param x batch of floating point values. + * @return the square root of \c x. + */ + template <class T, class A> + inline batch<T, A> sqrt(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sqrt<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the saturate difference of the batch \c x and the batch \c y. + * @tparam X the actual type of batch. + * @param x batch involved in the saturated difference. + * @param y batch involved in the saturated difference. + * @return the result of the saturated difference. + */ + template <class T, class A> + inline batch<T, A> ssub(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::ssub<A>(x, y, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c src to the buffer \c dst. The + * memory needs to be aligned. + * @param dst the memory buffer to write to + * @param src the batch to copy + */ + template <class To, class A = default_arch, class From> + inline void store_as(To* dst, batch<From, A> const& src, aligned_mode) noexcept + { + kernel::store_aligned(dst, src, A {}); + } + + template <class A = default_arch, class From> + inline void store_as(bool* dst, batch_bool<From, A> const& src, aligned_mode) noexcept + { + kernel::store(src, dst, A {}); + } + + template <class To, class A = default_arch, class From> + inline void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, aligned_mode) noexcept + { + kernel::store_complex_aligned(dst, src, A {}); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class To, class A = default_arch, class From, bool i3ec> + inline void store_as(xtl::xcomplex<To, To, i3ec>* dst, batch<std::complex<From>, A> const& src, aligned_mode) noexcept + { + store_as(reinterpret_cast<std::complex<To>*>(dst), src, aligned_mode()); + } +#endif + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c src to the buffer \c dst. The + * memory does not need to be aligned. + * @param dst the memory buffer to write to + * @param src the batch to copy + */ + template <class To, class A = default_arch, class From> + inline void store_as(To* dst, batch<From, A> const& src, unaligned_mode) noexcept + { + kernel::store_unaligned(dst, src, A {}); + } + + template <class A = default_arch, class From> + inline void store_as(bool* dst, batch_bool<From, A> const& src, unaligned_mode) noexcept + { + kernel::store(src, dst, A {}); + } + + template <class To, class A = default_arch, class From> + inline void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, unaligned_mode) noexcept + { + kernel::store_complex_unaligned(dst, src, A {}); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class To, class A = default_arch, class From, bool i3ec> + inline void store_as(xtl::xcomplex<To, To, i3ec>* dst, batch<std::complex<From>, A> const& src, unaligned_mode) noexcept + { + store_as(reinterpret_cast<std::complex<To>*>(dst), src, unaligned_mode()); + } +#endif + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c val to the buffer \c mem. The + * memory does not need to be aligned. + * @param mem the memory buffer to write to + * @param val the batch to copy from + */ + template <class A, class T> + inline void store(T* mem, batch<T, A> const& val, aligned_mode = {}) noexcept + { + store_as<T, A>(mem, val, aligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c val to the buffer \c mem. The + * memory does not need to be aligned. + * @param mem the memory buffer to write to + * @param val the batch to copy from + */ + template <class A, class T> + inline void store(T* mem, batch<T, A> const& val, unaligned_mode) noexcept + { + store_as<T, A>(mem, val, unaligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c val to the buffer \c mem. The + * memory needs to be aligned. + * @param mem the memory buffer to write to + * @param val the batch to copy from + */ + template <class A, class T> + inline void store_aligned(T* mem, batch<T, A> const& val) noexcept + { + store_as<T, A>(mem, val, aligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c val to the buffer \c mem. The + * memory does not need to be aligned. + * @param mem the memory buffer to write to + * @param val the batch to copy + */ + template <class A, class T> + inline void store_unaligned(T* mem, batch<T, A> const& val) noexcept + { + store_as<T, A>(mem, val, unaligned_mode {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the difference between \c x and \c y + * @tparam X the actual type of batch. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the difference between \c x and \c y + */ + template <class T, class A> + inline auto sub(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x - y) + { + detail::static_check_supported_config<T, A>(); + return x - y; + } + + /** + * @ingroup batch_data_transfer + * + * Rearrange elements from \c x according to constant mask \c mask + * @param x batch + * @param mask constant batch mask of integer elements of the same size as + * element of \c x + * @return swizzled batch + */ + template <class T, class A, class Vt, Vt... Values> + inline typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type + swizzle(batch<T, A> const& x, batch_constant<batch<Vt, A>, Values...> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::swizzle<A>(x, mask, A {}); + } + template <class T, class A, class Vt, Vt... Values> + inline batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& x, batch_constant<batch<Vt, A>, Values...> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::swizzle<A>(x, mask, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Rearrange elements from \c x according to mask \c mask + * @param x batch + * @param mask batch mask of integer elements of the same size as + * element of \c x + * @return swizzled batch + */ + template <class T, class A, class Vt> + inline typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type + swizzle(batch<T, A> const& x, batch<Vt, A> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::swizzle<A>(x, mask, A {}); + } + + template <class T, class A, class Vt> + inline batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& x, batch<Vt, A> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::swizzle<A>(x, mask, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the tangent of the batch \c x. + * @param x batch of floating point values. + * @return the tangent of \c x. + */ + template <class T, class A> + inline batch<T, A> tan(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::tan<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the hyperbolic tangent of the batch \c x. + * @param x batch of floating point values. + * @return the hyperbolic tangent of \c x. + */ + template <class T, class A> + inline batch<T, A> tanh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::tanh<A>(x, A {}); + } + + /** + * @ingroup batch_math_extra + * + * Computes the gamma function of the batch \c x. + * @param x batch of floating point values. + * @return the gamma function of \c x. + */ + template <class T, class A> + inline batch<T, A> tgamma(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::tgamma<A>(x, A {}); + } + + /** + * @ingroup batch_conversion + * + * Perform a conversion from \c i to a value of an floating point type of the same size as \c T. + * This is equivalent to \c batch_cast<as_float_t<T>>(i) + * @param i batch of integers. + * @return \c i converted to a value of an floating point type of the same size as \c T + */ + template <class T, class A> + inline batch<as_float_t<T>, A> to_float(batch<T, A> const& i) noexcept + { + detail::static_check_supported_config<T, A>(); + return batch_cast<as_float_t<T>>(i); + } + + /** + * @ingroup batch_conversion + * + * Perform a conversion from \c x to a value of an integer type of the same size as \c T + * This is equivalent to \c batch_cast<as_integer_t<T>>(x) + * @param x batch. + * @return \c x converted to a value of an integer type of the same size as \c T + */ + template <class T, class A> + inline batch<as_integer_t<T>, A> to_int(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return batch_cast<as_integer_t<T>>(x); + } + + /** + * @ingroup batch_rounding + * + * Computes the batch of nearest integer values not greater in magnitude + * than scalars in \c x. + * @param x batch of floating point values. + * @return the batch of nearest integer values not greater in magnitude than \c x. + */ + template <class T, class A> + inline batch<T, A> trunc(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::trunc<A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Unpack and interleave data from the HIGH half of batches \c x and \c y. + * Store the results in the Return value. + * @param x a batch of integer or floating point or double precision values. + * @param y a batch of integer or floating point or double precision values. + * @return a batch of the high part of shuffled values. + */ + template <class T, class A> + inline batch<T, A> zip_hi(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::zip_hi<A>(x, y, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Unpack and interleave data from the LOW half of batches \c x and \c y. + * Store the results in the Return value. + * @param x a batch of integer or floating point or double precision values. + * @param y a batch of integer or floating point or double precision values. + * @return a batch of the low part of shuffled values. + */ + template <class T, class A> + inline batch<T, A> zip_lo(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::zip_lo<A>(x, y, A {}); + } + + /** + * @ingroup batch_conversion + * + * Cast a \c batch_bool of \c T into a \c batch of the same type using the + * following rule: if an element of \c self is true, it maps to -1 in the + * returned integral batch, otherwise it maps to 0. + * + * @param self batch_bool of \c T + * @return \c self cast to a \c batch of \c T + */ + template <class T, class A, typename std::enable_if<std::is_integral<T>::value, int>::type = 3> + inline batch<T, A> bitwise_cast(batch_bool<T, A> const& self) noexcept + { + T z(0); + detail::static_check_supported_config<T, A>(); + return select(self, batch<T, A>(T(~z)), batch<T, A>(z)); + } + + template <class T, class A, typename std::enable_if<std::is_floating_point<T>::value, int>::type = 3> + inline batch<T, A> bitwise_cast(batch_bool<T, A> const& self) noexcept + { + T z0(0), z1(0); + using int_type = as_unsigned_integer_t<T>; + int_type value(~int_type(0)); + std::memcpy(&z1, &value, sizeof(int_type)); + detail::static_check_supported_config<T, A>(); + return select(self, batch<T, A>(z1), batch<T, A>(z0)); + } + + /** + * @ingroup batch_bool_reducers + * + * Returns true if all the boolean values in the batch are true, + * false otherwise. + * @param x the batch to reduce. + * @return a boolean scalar. + */ + template <class T, class A> + inline bool all(batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::all<A>(x, A {}); + } + + /** + * @ingroup batch_bool_reducers + * + * Return true if any of the boolean values in the batch is true, + * false otherwise. + * @param x the batch to reduce. + * @return a boolean scalar. + */ + template <class T, class A> + inline bool any(batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::any<A>(x, A {}); + } + + /** + * @ingroup batch_bool_reducers + * + * Return true if none of the boolean values in the batch is true, + * false otherwise. + * @param x the batch to reduce. + * @return a boolean scalar. + */ + template <class T, class A> + inline bool none(batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return !xsimd::any(x); + } + + /** + * @ingroup batch_miscellaneous + * + * Dump the content of batch \c x to stream \c o + * @param o the stream where the batch is dumped + * @param x batch to dump. + * @return a reference to \c o + */ + template <class T, class A> + inline std::ostream& operator<<(std::ostream& o, batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + constexpr auto size = batch<T, A>::size; + alignas(A::alignment()) T buffer[size]; + x.store_aligned(&buffer[0]); + o << '('; + for (std::size_t i = 0; i < size - 1; ++i) + o << buffer[i] << ", "; + return o << buffer[size - 1] << ')'; + } + + /** + * @ingroup batch_miscellaneous + * + * Dump the content of batch \c x to stream \c o + * @param o the stream where the batch is dumped + * @param x batch to dump. + * @return a reference to \c o + */ + template <class T, class A> + inline std::ostream& operator<<(std::ostream& o, batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + constexpr auto size = batch_bool<T, A>::size; + alignas(A::alignment()) bool buffer[size]; + x.store_aligned(&buffer[0]); + o << '('; + for (std::size_t i = 0; i < size - 1; ++i) + o << buffer[i] << ", "; + return o << buffer[size - 1] << ')'; + } +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx2_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx2_register.hpp new file mode 100644 index 0000000000..cd10383e2b --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx2_register.hpp @@ -0,0 +1,40 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX2_REGISTER_HPP +#define XSIMD_AVX2_REGISTER_HPP + +#include "./xsimd_avx_register.hpp" + +namespace xsimd +{ + /** + * @ingroup architectures + * + * AVX2 instructions + */ + struct avx2 : avx + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX2; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(2, 2, 0); } + static constexpr char const* name() noexcept { return "avx2"; } + }; + +#if XSIMD_WITH_AVX2 + namespace types + { + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx2, avx); + } +#endif +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp new file mode 100644 index 0000000000..15c19832ae --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512BW_REGISTER_HPP +#define XSIMD_AVX512BW_REGISTER_HPP + +#include "./xsimd_avx512dq_register.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512BW instructions + */ + struct avx512bw : avx512dq + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512BW; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 4, 0); } + static constexpr char const* name() noexcept { return "avx512bw"; } + }; + +#if XSIMD_WITH_AVX512BW + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512bw> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512bw, avx512dq); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp new file mode 100644 index 0000000000..29efca368c --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512CD_REGISTER_HPP +#define XSIMD_AVX512CD_REGISTER_HPP + +#include "./xsimd_avx512f_register.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512CD instructions + */ + struct avx512cd : avx512f + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512CD; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 2, 0); } + static constexpr char const* name() noexcept { return "avx512cd"; } + }; + +#if XSIMD_WITH_AVX512CD + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512cd> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512cd, avx512f); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp new file mode 100644 index 0000000000..25a255ec15 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512DQ_REGISTER_HPP +#define XSIMD_AVX512DQ_REGISTER_HPP + +#include "./xsimd_avx512cd_register.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512DQ instructions + */ + struct avx512dq : avx512cd + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512DQ; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 3, 0); } + static constexpr char const* name() noexcept { return "avx512dq"; } + }; + +#if XSIMD_WITH_AVX512DQ + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512dq> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512dq, avx512cd); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp new file mode 100644 index 0000000000..a99157cf37 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512ER_REGISTER_HPP +#define XSIMD_AVX512ER_REGISTER_HPP + +#include "./xsimd_avx512dq_register.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512ER instructions + */ + struct avx512er : avx512cd + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512ER; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 3, 1); } + static constexpr char const* name() noexcept { return "avx512er"; } + }; + +#if XSIMD_WITH_AVX512ER + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512er> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512er, avx512cd); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp new file mode 100644 index 0000000000..c1f80a122d --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp @@ -0,0 +1,74 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512F_REGISTER_HPP +#define XSIMD_AVX512F_REGISTER_HPP + +#include "./xsimd_generic_arch.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512F instructions + */ + struct avx512f : generic + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512F; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 1, 0); } + static constexpr std::size_t alignment() noexcept { return 64; } + static constexpr bool requires_alignment() noexcept { return true; } + static constexpr char const* name() noexcept { return "avx512f"; } + }; + +#if XSIMD_WITH_AVX512F + + namespace types + { + template <class T> + struct simd_avx512_bool_register + { + using register_type = typename std::conditional< + (sizeof(T) < 4), std::conditional<(sizeof(T) == 1), __mmask64, __mmask32>, + std::conditional<(sizeof(T) == 4), __mmask16, __mmask8>>::type::type; + register_type data; + simd_avx512_bool_register() = default; + simd_avx512_bool_register(register_type r) { data = r; } + operator register_type() const noexcept { return data; } + }; + template <class T> + struct get_bool_simd_register<T, avx512f> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER(signed char, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned char, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(char, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned short, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(short, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned int, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(int, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(long int, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(long long int, avx512f, __m512i); + XSIMD_DECLARE_SIMD_REGISTER(float, avx512f, __m512); + XSIMD_DECLARE_SIMD_REGISTER(double, avx512f, __m512d); + + } +#endif +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp new file mode 100644 index 0000000000..ba76ea147b --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512IFMA_REGISTER_HPP +#define XSIMD_AVX512IFMA_REGISTER_HPP + +#include "./xsimd_avx512bw_register.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512IFMA instructions + */ + struct avx512ifma : avx512bw + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512IFMA; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 5, 0); } + static constexpr char const* name() noexcept { return "avx512ifma"; } + }; + +#if XSIMD_WITH_AVX512IFMA + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512ifma> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512ifma, avx512bw); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp new file mode 100644 index 0000000000..38a10f0227 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512PF_REGISTER_HPP +#define XSIMD_AVX512PF_REGISTER_HPP + +#include "./xsimd_avx512er_register.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512BW instructions + */ + struct avx512pf : avx512er + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512PF; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 4, 1); } + static constexpr char const* name() noexcept { return "avx512pf"; } + }; + +#if XSIMD_WITH_AVX512PF + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512pf> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512pf, avx512er); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp new file mode 100644 index 0000000000..19ff744d72 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512VBMI_REGISTER_HPP +#define XSIMD_AVX512VBMI_REGISTER_HPP + +#include "./xsimd_avx512ifma_register.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512VBMI instructions + */ + struct avx512vbmi : avx512ifma + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VBMI; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 6, 0); } + static constexpr char const* name() noexcept { return "avx512vbmi"; } + }; + +#if XSIMD_WITH_AVX512VBMI + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512vbmi> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vbmi, avx512ifma); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp new file mode 100644 index 0000000000..85edbdf230 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp @@ -0,0 +1,51 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512VNNI_AVX512BW_REGISTER_HPP +#define XSIMD_AVX512VNNI_AVX512BW_REGISTER_HPP + +#include "./xsimd_avx512bw_register.hpp" + +namespace xsimd +{ + template <typename arch> + struct avx512vnni; + + /** + * @ingroup architectures + * + * AVX512VNNI instructions + */ + template <> + struct avx512vnni<avx512bw> : avx512bw + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512BW; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 4, 1); } + static constexpr char const* name() noexcept { return "avx512vnni+avx512bw"; } + }; + +#if XSIMD_WITH_AVX512VNNI_AVX512BW + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512vnni<avx512bw>> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni<avx512bw>, avx512bw); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp new file mode 100644 index 0000000000..232b19a5cb --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp @@ -0,0 +1,51 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP +#define XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP + +#include "./xsimd_avx512vbmi_register.hpp" + +namespace xsimd +{ + template <typename arch> + struct avx512vnni; + + /** + * @ingroup architectures + * + * AVX512VNNI instructions + */ + template <> + struct avx512vnni<avx512vbmi> : avx512vbmi + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512VBMI; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 6, 1); } + static constexpr char const* name() noexcept { return "avx512vnni+avx512vbmi"; } + }; + +#if XSIMD_WITH_AVX512VNNI_AVX512VBMI + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512vnni<avx512vbmi>> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni<avx512vbmi>, avx512vbmi); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_register.hpp new file mode 100644 index 0000000000..c276fb0079 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_register.hpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX512VNNI_REGISTER_HPP +#define XSIMD_AVX512VNNI_REGISTER_HPP + +#include "./xsimd_avx512vbmi_register.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX512VNNI instructions + */ + struct avx512vnni : avx512vbmi + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(3, 7, 0); } + static constexpr char const* name() noexcept { return "avx512vnni"; } + }; + +#if XSIMD_WITH_AVX512VNNI + + namespace types + { + template <class T> + struct get_bool_simd_register<T, avx512vnni> + { + using type = simd_avx512_bool_register<T>; + }; + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni, avx512vbmi); + + } +#endif +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx_register.hpp new file mode 100644 index 0000000000..6b1951f964 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avx_register.hpp @@ -0,0 +1,61 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVX_REGISTER_HPP +#define XSIMD_AVX_REGISTER_HPP + +#include "./xsimd_generic_arch.hpp" + +namespace xsimd +{ + + /** + * @ingroup architectures + * + * AVX instructions + */ + struct avx : generic + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVX; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(2, 1, 0); } + static constexpr std::size_t alignment() noexcept { return 32; } + static constexpr bool requires_alignment() noexcept { return true; } + static constexpr char const* name() noexcept { return "avx"; } + }; +} + +#if XSIMD_WITH_AVX + +#include <immintrin.h> + +namespace xsimd +{ + namespace types + { + + XSIMD_DECLARE_SIMD_REGISTER(signed char, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned char, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(char, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned short, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(short, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned int, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(int, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(long int, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(long long int, avx, __m256i); + XSIMD_DECLARE_SIMD_REGISTER(float, avx, __m256); + XSIMD_DECLARE_SIMD_REGISTER(double, avx, __m256d); + } +} +#endif +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp new file mode 100644 index 0000000000..f68fe16bad --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp @@ -0,0 +1,40 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_AVXVNNI_REGISTER_HPP +#define XSIMD_AVXVNNI_REGISTER_HPP + +#include "./xsimd_avx2_register.hpp" + +namespace xsimd +{ + /** + * @ingroup architectures + * + * AVXVNNI instructions + */ + struct avxvnni : avx2 + { + static constexpr bool supported() noexcept { return XSIMD_WITH_AVXVNNI; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(2, 3, 0); } + static constexpr char const* name() noexcept { return "avxvnni"; } + }; + +#if XSIMD_WITH_AVXVNNI + namespace types + { + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avxvnni, avx2); + } +#endif +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_batch.hpp b/third_party/xsimd/include/xsimd/types/xsimd_batch.hpp new file mode 100644 index 0000000000..b4989fc88d --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_batch.hpp @@ -0,0 +1,1492 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_BATCH_HPP +#define XSIMD_BATCH_HPP + +#include <cassert> +#include <complex> + +#include "../config/xsimd_arch.hpp" +#include "../memory/xsimd_alignment.hpp" +#include "./xsimd_utils.hpp" + +namespace xsimd +{ + template <class T, class A = default_arch> + class batch; + + namespace types + { + template <class T, class A> + struct integral_only_operators + { + inline batch<T, A>& operator%=(batch<T, A> const& other) noexcept; + inline batch<T, A>& operator>>=(int32_t other) noexcept; + inline batch<T, A>& operator>>=(batch<T, A> const& other) noexcept; + inline batch<T, A>& operator<<=(int32_t other) noexcept; + inline batch<T, A>& operator<<=(batch<T, A> const& other) noexcept; + + /** Shorthand for xsimd::mod() */ + friend inline batch<T, A> operator%(batch<T, A> const& self, batch<T, A> const& other) noexcept + { + return batch<T, A>(self) %= other; + } + + /** Shorthand for xsimd::bitwise_rshift() */ + friend inline batch<T, A> operator>>(batch<T, A> const& self, batch<T, A> const& other) noexcept + { + return batch<T, A>(self) >>= other; + } + + /** Shorthand for xsimd::bitwise_lshift() */ + friend inline batch<T, A> operator<<(batch<T, A> const& self, batch<T, A> const& other) noexcept + { + return batch<T, A>(self) <<= other; + } + + /** Shorthand for xsimd::bitwise_rshift() */ + friend inline batch<T, A> operator>>(batch<T, A> const& self, int32_t other) noexcept + { + return batch<T, A>(self) >>= other; + } + + /** Shorthand for xsimd::bitwise_lshift() */ + friend inline batch<T, A> operator<<(batch<T, A> const& self, int32_t other) noexcept + { + return batch<T, A>(self) <<= other; + } + }; + template <class A> + struct integral_only_operators<float, A> + { + }; + template <class A> + struct integral_only_operators<double, A> + { + }; + + } + + namespace details + { + // These functions are forwarded declared here so that they can be used by friend functions + // with batch<T, A>. Their implementation must appear only once the + // kernel implementations have been included. + template <class T, class A> + inline batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other) noexcept; + + template <class T, class A> + inline batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other) noexcept; + + template <class T, class A> + inline batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other) noexcept; + + template <class T, class A> + inline batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other) noexcept; + + template <class T, class A> + inline batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other) noexcept; + + template <class T, class A> + inline batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other) noexcept; + } + + /** + * @brief batch of integer or floating point values. + * + * Abstract representation of an SIMD register for floating point or integral + * value. + * + * @tparam T the type of the underlying values. + * @tparam A the architecture this batch is tied too. + **/ + template <class T, class A> + class batch : public types::simd_register<T, A>, public types::integral_only_operators<T, A> + { + static_assert(!std::is_same<T, bool>::value, "use xsimd::batch_bool<T, A> instead of xsimd::batch<bool, A>"); + + public: + static constexpr std::size_t size = sizeof(types::simd_register<T, A>) / sizeof(T); ///< Number of scalar elements in this batch. + + using value_type = T; ///< Type of the scalar elements within this batch. + using arch_type = A; ///< SIMD Architecture abstracted by this batch. + using register_type = typename types::simd_register<T, A>::register_type; ///< SIMD register type abstracted by this batch. + using batch_bool_type = batch_bool<T, A>; ///< Associated batch type used to represented logical operations on this batch. + + // constructors + inline batch() = default; ///< Create a batch initialized with undefined values. + inline batch(T val) noexcept; + template <class... Ts> + inline batch(T val0, T val1, Ts... vals) noexcept; + inline explicit batch(batch_bool_type const& b) noexcept; + inline batch(register_type reg) noexcept; + + template <class U> + XSIMD_NO_DISCARD static inline batch broadcast(U val) noexcept; + + // memory operators + template <class U> + inline void store_aligned(U* mem) const noexcept; + template <class U> + inline void store_unaligned(U* mem) const noexcept; + template <class U> + inline void store(U* mem, aligned_mode) const noexcept; + template <class U> + inline void store(U* mem, unaligned_mode) const noexcept; + + template <class U> + XSIMD_NO_DISCARD static inline batch load_aligned(U const* mem) noexcept; + template <class U> + XSIMD_NO_DISCARD static inline batch load_unaligned(U const* mem) noexcept; + template <class U> + XSIMD_NO_DISCARD static inline batch load(U const* mem, aligned_mode) noexcept; + template <class U> + XSIMD_NO_DISCARD static inline batch load(U const* mem, unaligned_mode) noexcept; + + template <class U, class V> + XSIMD_NO_DISCARD static inline batch gather(U const* src, batch<V, arch_type> const& index) noexcept; + template <class U, class V> + inline void scatter(U* dst, batch<V, arch_type> const& index) const noexcept; + + inline T get(std::size_t i) const noexcept; + + // comparison operators. Defined as friend to enable automatic + // conversion of parameters from scalar to batch, at the cost of using a + // proxy implementation from details::. + friend inline batch_bool<T, A> operator==(batch const& self, batch const& other) noexcept + { + return details::eq<T, A>(self, other); + } + friend inline batch_bool<T, A> operator!=(batch const& self, batch const& other) noexcept + { + return details::neq<T, A>(self, other); + } + friend inline batch_bool<T, A> operator>=(batch const& self, batch const& other) noexcept + { + return details::ge<T, A>(self, other); + } + friend inline batch_bool<T, A> operator<=(batch const& self, batch const& other) noexcept + { + return details::le<T, A>(self, other); + } + friend inline batch_bool<T, A> operator>(batch const& self, batch const& other) noexcept + { + return details::gt<T, A>(self, other); + } + friend inline batch_bool<T, A> operator<(batch const& self, batch const& other) noexcept + { + return details::lt<T, A>(self, other); + } + + // Update operators + inline batch& operator+=(batch const& other) noexcept; + inline batch& operator-=(batch const& other) noexcept; + inline batch& operator*=(batch const& other) noexcept; + inline batch& operator/=(batch const& other) noexcept; + inline batch& operator&=(batch const& other) noexcept; + inline batch& operator|=(batch const& other) noexcept; + inline batch& operator^=(batch const& other) noexcept; + + // incr/decr operators + inline batch& operator++() noexcept; + inline batch& operator--() noexcept; + inline batch operator++(int) noexcept; + inline batch operator--(int) noexcept; + + // unary operators + inline batch_bool_type operator!() const noexcept; + inline batch operator~() const noexcept; + inline batch operator-() const noexcept; + inline batch operator+() const noexcept; + + // arithmetic operators. They are defined as friend to enable automatic + // conversion of parameters from scalar to batch. Inline implementation + // is required to avoid warnings. + + /** Shorthand for xsimd::add() */ + friend inline batch operator+(batch const& self, batch const& other) noexcept + { + return batch(self) += other; + } + + /** Shorthand for xsimd::sub() */ + friend inline batch operator-(batch const& self, batch const& other) noexcept + { + return batch(self) -= other; + } + + /** Shorthand for xsimd::mul() */ + friend inline batch operator*(batch const& self, batch const& other) noexcept + { + return batch(self) *= other; + } + + /** Shorthand for xsimd::div() */ + friend inline batch operator/(batch const& self, batch const& other) noexcept + { + return batch(self) /= other; + } + + /** Shorthand for xsimd::bitwise_and() */ + friend inline batch operator&(batch const& self, batch const& other) noexcept + { + return batch(self) &= other; + } + + /** Shorthand for xsimd::bitwise_or() */ + friend inline batch operator|(batch const& self, batch const& other) noexcept + { + return batch(self) |= other; + } + + /** Shorthand for xsimd::bitwise_xor() */ + friend inline batch operator^(batch const& self, batch const& other) noexcept + { + return batch(self) ^= other; + } + + /** Shorthand for xsimd::logical_and() */ + friend inline batch operator&&(batch const& self, batch const& other) noexcept + { + return batch(self).logical_and(other); + } + + /** Shorthand for xsimd::logical_or() */ + friend inline batch operator||(batch const& self, batch const& other) noexcept + { + return batch(self).logical_or(other); + } + + private: + inline batch logical_and(batch const& other) const noexcept; + inline batch logical_or(batch const& other) const noexcept; + }; + + template <class T, class A> + constexpr std::size_t batch<T, A>::size; + + /** + * @brief batch of predicate over scalar or complex values. + * + * Abstract representation of a predicate over SIMD register for scalar or + * complex values. + * + * @tparam T the type of the predicated values. + * @tparam A the architecture this batch is tied too. + **/ + template <class T, class A = default_arch> + class batch_bool : public types::get_bool_simd_register_t<T, A> + { + using base_type = types::get_bool_simd_register_t<T, A>; + + public: + static constexpr std::size_t size = sizeof(types::simd_register<T, A>) / sizeof(T); ///< Number of scalar elements in this batch. + + using value_type = bool; ///< Type of the scalar elements within this batch. + using arch_type = A; ///< SIMD Architecture abstracted by this batch. + using register_type = typename base_type::register_type; ///< SIMD register type abstracted by this batch. + using batch_type = batch<T, A>; ///< Associated batch type this batch represents logical operations for. + + // constructors + inline batch_bool() = default; ///< Create a batch initialized with undefined values. + inline batch_bool(bool val) noexcept; + inline batch_bool(register_type reg) noexcept; + template <class... Ts> + inline batch_bool(bool val0, bool val1, Ts... vals) noexcept; + + template <class Tp> + inline batch_bool(Tp const*) = delete; + + // memory operators + inline void store_aligned(bool* mem) const noexcept; + inline void store_unaligned(bool* mem) const noexcept; + XSIMD_NO_DISCARD static inline batch_bool load_aligned(bool const* mem) noexcept; + XSIMD_NO_DISCARD static inline batch_bool load_unaligned(bool const* mem) noexcept; + + inline bool get(std::size_t i) const noexcept; + + // mask operations + inline uint64_t mask() const noexcept; + inline static batch_bool from_mask(uint64_t mask) noexcept; + + // comparison operators + inline batch_bool operator==(batch_bool const& other) const noexcept; + inline batch_bool operator!=(batch_bool const& other) const noexcept; + + // logical operators + inline batch_bool operator~() const noexcept; + inline batch_bool operator!() const noexcept; + inline batch_bool operator&(batch_bool const& other) const noexcept; + inline batch_bool operator|(batch_bool const& other) const noexcept; + inline batch_bool operator^(batch_bool const& other) const noexcept; + inline batch_bool operator&&(batch_bool const& other) const noexcept; + inline batch_bool operator||(batch_bool const& other) const noexcept; + + // update operators + inline batch_bool& operator&=(batch_bool const& other) noexcept { return (*this) = (*this) & other; } + inline batch_bool& operator|=(batch_bool const& other) noexcept { return (*this) = (*this) | other; } + inline batch_bool& operator^=(batch_bool const& other) noexcept { return (*this) = (*this) ^ other; } + + private: + template <class U, class... V, size_t I, size_t... Is> + static inline register_type make_register(detail::index_sequence<I, Is...>, U u, V... v) noexcept; + + template <class... V> + static inline register_type make_register(detail::index_sequence<>, V... v) noexcept; + }; + + template <class T, class A> + constexpr std::size_t batch_bool<T, A>::size; + + /** + * @brief batch of complex values. + * + * Abstract representation of an SIMD register for complex values. + * + * @tparam T the type of the underlying values. + * @tparam A the architecture this batch is tied too. + **/ + template <class T, class A> + class batch<std::complex<T>, A> + { + public: + using value_type = std::complex<T>; ///< Type of the complex elements within this batch. + using real_batch = batch<T, A>; ///< Type of the scalar elements within this batch. + using arch_type = A; ///< SIMD Architecture abstracted by this batch. + using batch_bool_type = batch_bool<T, A>; ///< Associated batch type used to represented logical operations on this batch. + + static constexpr std::size_t size = real_batch::size; ///< Number of complex elements in this batch. + + // constructors + inline batch() = default; ///< Create a batch initialized with undefined values. + inline batch(value_type const& val) noexcept; + inline batch(real_batch const& real, real_batch const& imag) noexcept; + + inline batch(real_batch const& real) noexcept; + inline batch(T val) noexcept; + template <class... Ts> + inline batch(value_type val0, value_type val1, Ts... vals) noexcept; + inline explicit batch(batch_bool_type const& b) noexcept; + + template <class U> + XSIMD_NO_DISCARD static inline batch broadcast(U val) noexcept; + + // memory operators + XSIMD_NO_DISCARD static inline batch load_aligned(const T* real_src, const T* imag_src = nullptr) noexcept; + XSIMD_NO_DISCARD static inline batch load_unaligned(const T* real_src, const T* imag_src = nullptr) noexcept; + inline void store_aligned(T* real_dst, T* imag_dst) const noexcept; + inline void store_unaligned(T* real_dst, T* imag_dst) const noexcept; + + XSIMD_NO_DISCARD static inline batch load_aligned(const value_type* src) noexcept; + XSIMD_NO_DISCARD static inline batch load_unaligned(const value_type* src) noexcept; + inline void store_aligned(value_type* dst) const noexcept; + inline void store_unaligned(value_type* dst) const noexcept; + + template <class U> + XSIMD_NO_DISCARD static inline batch load(U const* mem, aligned_mode) noexcept; + template <class U> + XSIMD_NO_DISCARD static inline batch load(U const* mem, unaligned_mode) noexcept; + template <class U> + inline void store(U* mem, aligned_mode) const noexcept; + template <class U> + inline void store(U* mem, unaligned_mode) const noexcept; + + inline real_batch real() const noexcept; + inline real_batch imag() const noexcept; + + inline value_type get(std::size_t i) const noexcept; + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + // xtl-related methods + template <bool i3ec> + inline batch(xtl::xcomplex<T, T, i3ec> const& val) noexcept; + template <bool i3ec, class... Ts> + inline batch(xtl::xcomplex<T, T, i3ec> val0, xtl::xcomplex<T, T, i3ec> val1, Ts... vals) noexcept; + + template <bool i3ec> + XSIMD_NO_DISCARD static inline batch load_aligned(const xtl::xcomplex<T, T, i3ec>* src) noexcept; + template <bool i3ec> + XSIMD_NO_DISCARD static inline batch load_unaligned(const xtl::xcomplex<T, T, i3ec>* src) noexcept; + template <bool i3ec> + inline void store_aligned(xtl::xcomplex<T, T, i3ec>* dst) const noexcept; + template <bool i3ec> + inline void store_unaligned(xtl::xcomplex<T, T, i3ec>* dst) const noexcept; +#endif + + // comparison operators + inline batch_bool<T, A> operator==(batch const& other) const noexcept; + inline batch_bool<T, A> operator!=(batch const& other) const noexcept; + + // Update operators + inline batch& operator+=(batch const& other) noexcept; + inline batch& operator-=(batch const& other) noexcept; + inline batch& operator*=(batch const& other) noexcept; + inline batch& operator/=(batch const& other) noexcept; + + // incr/decr operators + inline batch& operator++() noexcept; + inline batch& operator--() noexcept; + inline batch operator++(int) noexcept; + inline batch operator--(int) noexcept; + + // unary operators + inline batch_bool_type operator!() const noexcept; + inline batch operator~() const noexcept; + inline batch operator-() const noexcept; + inline batch operator+() const noexcept; + + // arithmetic operators. They are defined as friend to enable automatic + // conversion of parameters from scalar to batch + + /** Shorthand for xsimd::add() */ + friend inline batch operator+(batch const& self, batch const& other) noexcept + { + return batch(self) += other; + } + + /** Shorthand for xsimd::sub() */ + friend inline batch operator-(batch const& self, batch const& other) noexcept + { + return batch(self) -= other; + } + + /** Shorthand for xsimd::mul() */ + friend inline batch operator*(batch const& self, batch const& other) noexcept + { + return batch(self) *= other; + } + + /** Shorthand for xsimd::div() */ + friend inline batch operator/(batch const& self, batch const& other) noexcept + { + return batch(self) /= other; + } + + private: + real_batch m_real; + real_batch m_imag; + }; + + template <class T, class A> + constexpr std::size_t batch<std::complex<T>, A>::size; + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <typename T, bool i3ec, typename A> + struct batch<xtl::xcomplex<T, T, i3ec>, A> + { + static_assert(std::is_same<T, void>::value, + "Please use batch<std::complex<T>, A> initialized from xtl::xcomplex instead"); + }; +#endif +} + +#include "../arch/xsimd_isa.hpp" +#include "./xsimd_batch_constant.hpp" +#include "./xsimd_traits.hpp" + +namespace xsimd +{ + + /** + * Create a batch with all element initialized to \c val. + */ + template <class T, class A> + inline batch<T, A>::batch(T val) noexcept + : types::simd_register<T, A>(kernel::broadcast<A>(val, A {})) + { + detail::static_check_supported_config<T, A>(); + } + + /** + * Create a batch with elements initialized from \c val0, \c val1, \c vals... + * There must be exactly \c size elements in total. + */ + template <class T, class A> + template <class... Ts> + inline batch<T, A>::batch(T val0, T val1, Ts... vals) noexcept + : batch(kernel::set<A>(batch {}, A {}, val0, val1, static_cast<T>(vals)...)) + { + detail::static_check_supported_config<T, A>(); + static_assert(sizeof...(Ts) + 2 == size, "The constructor requires as many arguments as batch elements."); + } + + /** + * Converts a \c bool_batch to a \c batch where each element is + * set to 1 (resp. 0) if the corresponding element is `true` + * (resp. `false`). + */ + template <class T, class A> + inline batch<T, A>::batch(batch_bool<T, A> const& b) noexcept + : batch(kernel::from_bool(b, A {})) + { + } + + /** + * Wraps a compatible native simd register as a \c batch. This is generally not needed but + * becomes handy when doing architecture-specific operations. + */ + template <class T, class A> + inline batch<T, A>::batch(register_type reg) noexcept + : types::simd_register<T, A>({ reg }) + { + detail::static_check_supported_config<T, A>(); + } + + /** + * Equivalent to batch::batch(T val). + */ + template <class T, class A> + template <class U> + XSIMD_NO_DISCARD inline batch<T, A> batch<T, A>::broadcast(U val) noexcept + { + detail::static_check_supported_config<T, A>(); + return batch(static_cast<T>(val)); + } + + /************************** + * batch memory operators * + **************************/ + + /** + * Copy content of this batch to the buffer \c mem. The + * memory needs to be aligned. + */ + template <class T, class A> + template <class U> + inline void batch<T, A>::store_aligned(U* mem) const noexcept + { + detail::static_check_supported_config<T, A>(); + assert(((reinterpret_cast<uintptr_t>(mem) % A::alignment()) == 0) + && "store location is not properly aligned"); + kernel::store_aligned<A>(mem, *this, A {}); + } + + /** + * Copy content of this batch to the buffer \c mem. The + * memory does not need to be aligned. + */ + template <class T, class A> + template <class U> + inline void batch<T, A>::store_unaligned(U* mem) const noexcept + { + detail::static_check_supported_config<T, A>(); + kernel::store_unaligned<A>(mem, *this, A {}); + } + + /** + * Equivalent to batch::store_aligned() + */ + template <class T, class A> + template <class U> + inline void batch<T, A>::store(U* mem, aligned_mode) const noexcept + { + detail::static_check_supported_config<T, A>(); + return store_aligned(mem); + } + + /** + * Equivalent to batch::store_unaligned() + */ + template <class T, class A> + template <class U> + inline void batch<T, A>::store(U* mem, unaligned_mode) const noexcept + { + detail::static_check_supported_config<T, A>(); + return store_unaligned(mem); + } + + /** + * Loading from aligned memory. May involve a conversion if \c U is different + * from \c T. + */ + template <class T, class A> + template <class U> + inline batch<T, A> batch<T, A>::load_aligned(U const* mem) noexcept + { + assert(((reinterpret_cast<uintptr_t>(mem) % A::alignment()) == 0) + && "loaded pointer is not properly aligned"); + detail::static_check_supported_config<T, A>(); + return kernel::load_aligned<A>(mem, kernel::convert<T> {}, A {}); + } + + /** + * Loading from unaligned memory. May involve a conversion if \c U is different + * from \c T. + */ + template <class T, class A> + template <class U> + inline batch<T, A> batch<T, A>::load_unaligned(U const* mem) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::load_unaligned<A>(mem, kernel::convert<T> {}, A {}); + } + + /** + * Equivalent to batch::load_aligned() + */ + template <class T, class A> + template <class U> + inline batch<T, A> batch<T, A>::load(U const* mem, aligned_mode) noexcept + { + detail::static_check_supported_config<T, A>(); + return load_aligned(mem); + } + + /** + * Equivalent to batch::load_unaligned() + */ + template <class T, class A> + template <class U> + inline batch<T, A> batch<T, A>::load(U const* mem, unaligned_mode) noexcept + { + detail::static_check_supported_config<T, A>(); + return load_unaligned(mem); + } + + /** + * Create a new batch gathering elements starting at address \c src and + * offset by each element in \c index. + * If \c T is not of the same size as \c U, a \c static_cast is performed + * at element gather time. + */ + template <class T, class A> + template <typename U, typename V> + inline batch<T, A> batch<T, A>::gather(U const* src, batch<V, A> const& index) noexcept + { + detail::static_check_supported_config<T, A>(); + static_assert(std::is_convertible<T, U>::value, "Can't convert from src to this batch's type!"); + return kernel::gather(batch {}, src, index, A {}); + } + + /** + * Scatter elements from this batch into addresses starting at \c dst + * and offset by each element in \c index. + * If \c T is not of the same size as \c U, a \c static_cast is performed + * at element scatter time. + */ + template <class T, class A> + template <class U, class V> + inline void batch<T, A>::scatter(U* dst, batch<V, A> const& index) const noexcept + { + detail::static_check_supported_config<T, A>(); + static_assert(std::is_convertible<T, U>::value, "Can't convert from this batch's type to dst!"); + kernel::scatter<A>(*this, dst, index, A {}); + } + + /** + * Retrieve the \c i th scalar element in this batch. + * + * \c warning This is very inefficient and should only be used for debugging purpose. + */ + template <class T, class A> + inline T batch<T, A>::get(std::size_t i) const noexcept + { + return kernel::get(*this, i, A {}); + } + + /****************************** + * batch comparison operators * + ******************************/ + namespace details + { + /** + * Shorthand for xsimd::eq() + */ + template <class T, class A> + inline batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::eq<A>(self, other, A {}); + } + + /** + * Shorthand for xsimd::neq() + */ + template <class T, class A> + inline batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::neq<A>(self, other, A {}); + } + + /** + * Shorthand for xsimd::ge() + */ + template <class T, class A> + inline batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::ge<A>(self, other, A {}); + } + + /** + * Shorthand for xsimd::le() + */ + template <class T, class A> + inline batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::le<A>(self, other, A {}); + } + + /** + * Shorthand for xsimd::gt() + */ + template <class T, class A> + inline batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::gt<A>(self, other, A {}); + } + + /** + * Shorthand for xsimd::lt() + */ + template <class T, class A> + inline batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::lt<A>(self, other, A {}); + } + } + + /************************** + * batch update operators * + **************************/ + + template <class T, class A> + inline batch<T, A>& batch<T, A>::operator+=(batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return *this = kernel::add<A>(*this, other, A {}); + } + + template <class T, class A> + inline batch<T, A>& batch<T, A>::operator-=(batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return *this = kernel::sub<A>(*this, other, A {}); + } + + template <class T, class A> + inline batch<T, A>& batch<T, A>::operator*=(batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return *this = kernel::mul<A>(*this, other, A {}); + } + + template <class T, class A> + inline batch<T, A>& batch<T, A>::operator/=(batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return *this = kernel::div<A>(*this, other, A {}); + } + + template <class T, class A> + inline batch<T, A>& types::integral_only_operators<T, A>::operator%=(batch<T, A> const& other) noexcept + { + ::xsimd::detail::static_check_supported_config<T, A>(); + return *static_cast<batch<T, A>*>(this) = kernel::mod<A>(*static_cast<batch<T, A>*>(this), other, A {}); + } + + template <class T, class A> + inline batch<T, A>& batch<T, A>::operator&=(batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return *this = kernel::bitwise_and<A>(*this, other, A {}); + } + + template <class T, class A> + inline batch<T, A>& batch<T, A>::operator|=(batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return *this = kernel::bitwise_or<A>(*this, other, A {}); + } + + template <class T, class A> + inline batch<T, A>& batch<T, A>::operator^=(batch<T, A> const& other) noexcept + { + detail::static_check_supported_config<T, A>(); + return *this = kernel::bitwise_xor<A>(*this, other, A {}); + } + + template <class T, class A> + inline batch<T, A>& kernel::integral_only_operators<T, A>::operator>>=(batch<T, A> const& other) noexcept + { + ::xsimd::detail::static_check_supported_config<T, A>(); + return *static_cast<batch<T, A>*>(this) = kernel::bitwise_rshift<A>(*static_cast<batch<T, A>*>(this), other, A {}); + } + + template <class T, class A> + inline batch<T, A>& kernel::integral_only_operators<T, A>::operator<<=(batch<T, A> const& other) noexcept + { + ::xsimd::detail::static_check_supported_config<T, A>(); + return *static_cast<batch<T, A>*>(this) = kernel::bitwise_lshift<A>(*static_cast<batch<T, A>*>(this), other, A {}); + } + + template <class T, class A> + inline batch<T, A>& kernel::integral_only_operators<T, A>::operator>>=(int32_t other) noexcept + { + ::xsimd::detail::static_check_supported_config<T, A>(); + return *static_cast<batch<T, A>*>(this) = kernel::bitwise_rshift<A>(*static_cast<batch<T, A>*>(this), other, A {}); + } + + template <class T, class A> + inline batch<T, A>& kernel::integral_only_operators<T, A>::operator<<=(int32_t other) noexcept + { + ::xsimd::detail::static_check_supported_config<T, A>(); + return *static_cast<batch<T, A>*>(this) = kernel::bitwise_lshift<A>(*static_cast<batch<T, A>*>(this), other, A {}); + } + + /***************************** + * batch incr/decr operators * + *****************************/ + + template <class T, class A> + inline batch<T, A>& batch<T, A>::operator++() noexcept + { + detail::static_check_supported_config<T, A>(); + return operator+=(1); + } + + template <class T, class A> + inline batch<T, A>& batch<T, A>::operator--() noexcept + { + detail::static_check_supported_config<T, A>(); + return operator-=(1); + } + + template <class T, class A> + inline batch<T, A> batch<T, A>::operator++(int) noexcept + { + detail::static_check_supported_config<T, A>(); + batch<T, A> copy(*this); + operator+=(1); + return copy; + } + + template <class T, class A> + inline batch<T, A> batch<T, A>::operator--(int) noexcept + { + detail::static_check_supported_config<T, A>(); + batch copy(*this); + operator-=(1); + return copy; + } + + /************************* + * batch unary operators * + *************************/ + + template <class T, class A> + inline batch_bool<T, A> batch<T, A>::operator!() const noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::eq<A>(*this, batch(0), A {}); + } + + template <class T, class A> + inline batch<T, A> batch<T, A>::operator~() const noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_not<A>(*this, A {}); + } + + template <class T, class A> + inline batch<T, A> batch<T, A>::operator-() const noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::neg<A>(*this, A {}); + } + + template <class T, class A> + inline batch<T, A> batch<T, A>::operator+() const noexcept + { + detail::static_check_supported_config<T, A>(); + return *this; + } + + /************************ + * batch private method * + ************************/ + + template <class T, class A> + inline batch<T, A> batch<T, A>::logical_and(batch<T, A> const& other) const noexcept + { + return kernel::logical_and<A>(*this, other, A()); + } + + template <class T, class A> + inline batch<T, A> batch<T, A>::logical_or(batch<T, A> const& other) const noexcept + { + return kernel::logical_or<A>(*this, other, A()); + } + + /*************************** + * batch_bool constructors * + ***************************/ + + template <class T, class A> + inline batch_bool<T, A>::batch_bool(register_type reg) noexcept + : types::get_bool_simd_register_t<T, A>({ reg }) + { + } + + template <class T, class A> + template <class... Ts> + inline batch_bool<T, A>::batch_bool(bool val0, bool val1, Ts... vals) noexcept + : batch_bool(kernel::set<A>(batch_bool {}, A {}, val0, val1, static_cast<bool>(vals)...)) + { + static_assert(sizeof...(Ts) + 2 == size, "The constructor requires as many arguments as batch elements."); + } + + /******************************* + * batch_bool memory operators * + *******************************/ + + template <class T, class A> + inline void batch_bool<T, A>::store_aligned(bool* mem) const noexcept + { + kernel::store(*this, mem, A {}); + } + + template <class T, class A> + inline void batch_bool<T, A>::store_unaligned(bool* mem) const noexcept + { + store_aligned(mem); + } + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::load_aligned(bool const* mem) noexcept + { + batch_type ref(0); + alignas(A::alignment()) T buffer[size]; + for (std::size_t i = 0; i < size; ++i) + buffer[i] = mem[i] ? 1 : 0; + return ref != batch_type::load_aligned(&buffer[0]); + } + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::load_unaligned(bool const* mem) noexcept + { + return load_aligned(mem); + } + + /** + * Extract a scalar mask representation from this @c batch_bool. + * + * @return bit mask + */ + template <class T, class A> + inline uint64_t batch_bool<T, A>::mask() const noexcept + { + return kernel::mask(*this, A {}); + } + + /** + * Extract a scalar mask representation from this @c batch_bool. + * + * @return bit mask + */ + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::from_mask(uint64_t mask) noexcept + { + return kernel::from_mask(batch_bool<T, A>(), mask, A {}); + } + + template <class T, class A> + inline bool batch_bool<T, A>::get(std::size_t i) const noexcept + { + return kernel::get(*this, i, A {}); + } + + /*********************************** + * batch_bool comparison operators * + ***********************************/ + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::operator==(batch_bool<T, A> const& other) const noexcept + { + return kernel::eq<A>(*this, other, A {}).data; + } + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::operator!=(batch_bool<T, A> const& other) const noexcept + { + return kernel::neq<A>(*this, other, A {}).data; + } + + /******************************** + * batch_bool logical operators * + ********************************/ + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::operator~() const noexcept + { + return kernel::bitwise_not<A>(*this, A {}).data; + } + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::operator!() const noexcept + { + return operator==(batch_bool(false)); + } + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::operator&(batch_bool<T, A> const& other) const noexcept + { + return kernel::bitwise_and<A>(*this, other, A {}).data; + } + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::operator|(batch_bool<T, A> const& other) const noexcept + { + return kernel::bitwise_or<A>(*this, other, A {}).data; + } + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::operator^(batch_bool<T, A> const& other) const noexcept + { + return kernel::bitwise_xor<A>(*this, other, A {}).data; + } + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::operator&&(batch_bool const& other) const noexcept + { + return operator&(other); + } + + template <class T, class A> + inline batch_bool<T, A> batch_bool<T, A>::operator||(batch_bool const& other) const noexcept + { + return operator|(other); + } + + /****************************** + * batch_bool private methods * + ******************************/ + + template <class T, class A> + inline batch_bool<T, A>::batch_bool(bool val) noexcept + : base_type { make_register(detail::make_index_sequence<size - 1>(), val) } + { + } + + template <class T, class A> + template <class U, class... V, size_t I, size_t... Is> + inline auto batch_bool<T, A>::make_register(detail::index_sequence<I, Is...>, U u, V... v) noexcept -> register_type + { + return make_register(detail::index_sequence<Is...>(), u, u, v...); + } + + template <class T, class A> + template <class... V> + inline auto batch_bool<T, A>::make_register(detail::index_sequence<>, V... v) noexcept -> register_type + { + return kernel::set<A>(batch_bool<T, A>(), A {}, v...).data; + } + + /******************************* + * batch<complex> constructors * + *******************************/ + + template <class T, class A> + inline batch<std::complex<T>, A>::batch(value_type const& val) noexcept + : m_real(val.real()) + , m_imag(val.imag()) + { + } + + template <class T, class A> + inline batch<std::complex<T>, A>::batch(real_batch const& real, real_batch const& imag) noexcept + : m_real(real) + , m_imag(imag) + { + } + + template <class T, class A> + inline batch<std::complex<T>, A>::batch(real_batch const& real) noexcept + : m_real(real) + , m_imag(0) + { + } + + template <class T, class A> + inline batch<std::complex<T>, A>::batch(T val) noexcept + : m_real(val) + , m_imag(0) + { + } + + template <class T, class A> + template <class... Ts> + inline batch<std::complex<T>, A>::batch(value_type val0, value_type val1, Ts... vals) noexcept + : batch(kernel::set<A>(batch {}, A {}, val0, val1, static_cast<value_type>(vals)...)) + { + static_assert(sizeof...(Ts) + 2 == size, "as many arguments as batch elements"); + } + + template <class T, class A> + inline batch<std::complex<T>, A>::batch(batch_bool_type const& b) noexcept + : m_real(b) + , m_imag(0) + { + } + + template <class T, class A> + template <class U> + XSIMD_NO_DISCARD inline batch<std::complex<T>, A> batch<std::complex<T>, A>::broadcast(U val) noexcept + { + return batch(static_cast<std::complex<T>>(val)); + } + + /*********************************** + * batch<complex> memory operators * + ***********************************/ + + template <class T, class A> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_aligned(const T* real_src, const T* imag_src) noexcept + { + return { batch<T, A>::load_aligned(real_src), imag_src ? batch<T, A>::load_aligned(imag_src) : batch<T, A>(0) }; + } + template <class T, class A> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_unaligned(const T* real_src, const T* imag_src) noexcept + { + return { batch<T, A>::load_unaligned(real_src), imag_src ? batch<T, A>::load_unaligned(imag_src) : batch<T, A>(0) }; + } + + template <class T, class A> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_aligned(const value_type* src) noexcept + { + assert(((reinterpret_cast<uintptr_t>(src) % A::alignment()) == 0) + && "loaded pointer is not properly aligned"); + return kernel::load_complex_aligned<A>(src, kernel::convert<value_type> {}, A {}); + } + + template <class T, class A> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_unaligned(const value_type* src) noexcept + { + return kernel::load_complex_unaligned<A>(src, kernel::convert<value_type> {}, A {}); + } + + template <class T, class A> + inline void batch<std::complex<T>, A>::store_aligned(value_type* dst) const noexcept + { + assert(((reinterpret_cast<uintptr_t>(dst) % A::alignment()) == 0) + && "store location is not properly aligned"); + return kernel::store_complex_aligned(dst, *this, A {}); + } + + template <class T, class A> + inline void batch<std::complex<T>, A>::store_unaligned(value_type* dst) const noexcept + { + return kernel::store_complex_unaligned(dst, *this, A {}); + } + + template <class T, class A> + inline void batch<std::complex<T>, A>::store_aligned(T* real_dst, T* imag_dst) const noexcept + { + m_real.store_aligned(real_dst); + m_imag.store_aligned(imag_dst); + } + + template <class T, class A> + inline void batch<std::complex<T>, A>::store_unaligned(T* real_dst, T* imag_dst) const noexcept + { + m_real.store_unaligned(real_dst); + m_imag.store_unaligned(imag_dst); + } + + template <class T, class A> + template <class U> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load(U const* mem, aligned_mode) noexcept + { + return load_aligned(mem); + } + + template <class T, class A> + template <class U> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load(U const* mem, unaligned_mode) noexcept + { + return load_unaligned(mem); + } + + template <class T, class A> + template <class U> + inline void batch<std::complex<T>, A>::store(U* mem, aligned_mode) const noexcept + { + return store_aligned(mem); + } + + template <class T, class A> + template <class U> + inline void batch<std::complex<T>, A>::store(U* mem, unaligned_mode) const noexcept + { + return store_unaligned(mem); + } + + template <class T, class A> + inline auto batch<std::complex<T>, A>::real() const noexcept -> real_batch + { + return m_real; + } + + template <class T, class A> + inline auto batch<std::complex<T>, A>::imag() const noexcept -> real_batch + { + return m_imag; + } + + template <class T, class A> + inline auto batch<std::complex<T>, A>::get(std::size_t i) const noexcept -> value_type + { + return kernel::get(*this, i, A {}); + } + + /************************************** + * batch<complex> xtl-related methods * + **************************************/ + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + + template <class T, class A> + template <bool i3ec> + inline batch<std::complex<T>, A>::batch(xtl::xcomplex<T, T, i3ec> const& val) noexcept + : m_real(val.real()) + , m_imag(val.imag()) + { + } + + template <class T, class A> + template <bool i3ec, class... Ts> + inline batch<std::complex<T>, A>::batch(xtl::xcomplex<T, T, i3ec> val0, xtl::xcomplex<T, T, i3ec> val1, Ts... vals) noexcept + : batch(kernel::set<A>(batch {}, A {}, val0, val1, static_cast<xtl::xcomplex<T, T, i3ec>>(vals)...)) + { + static_assert(sizeof...(Ts) + 2 == size, "as many arguments as batch elements"); + } + + // Memory layout of an xcomplex and std::complex are the same when xcomplex + // stores values and not reference. Unfortunately, this breaks strict + // aliasing... + + template <class T, class A> + template <bool i3ec> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_aligned(const xtl::xcomplex<T, T, i3ec>* src) noexcept + { + return load_aligned(reinterpret_cast<std::complex<T> const*>(src)); + } + + template <class T, class A> + template <bool i3ec> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_unaligned(const xtl::xcomplex<T, T, i3ec>* src) noexcept + { + return load_unaligned(reinterpret_cast<std::complex<T> const*>(src)); + } + + template <class T, class A> + template <bool i3ec> + inline void batch<std::complex<T>, A>::store_aligned(xtl::xcomplex<T, T, i3ec>* dst) const noexcept + { + store_aligned(reinterpret_cast<std::complex<T>*>(dst)); + } + + template <class T, class A> + template <bool i3ec> + inline void batch<std::complex<T>, A>::store_unaligned(xtl::xcomplex<T, T, i3ec>* dst) const noexcept + { + store_unaligned(reinterpret_cast<std::complex<T>*>(dst)); + } + +#endif + + /*************************************** + * batch<complex> comparison operators * + ***************************************/ + + template <class T, class A> + inline batch_bool<T, A> batch<std::complex<T>, A>::operator==(batch const& other) const noexcept + { + return m_real == other.m_real && m_imag == other.m_imag; + } + + template <class T, class A> + inline batch_bool<T, A> batch<std::complex<T>, A>::operator!=(batch const& other) const noexcept + { + return m_real != other.m_real || m_imag != other.m_imag; + } + + /*********************************** + * batch<complex> update operators * + ***********************************/ + + template <class T, class A> + inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator+=(batch const& other) noexcept + { + m_real += other.m_real; + m_imag += other.m_imag; + return *this; + } + + template <class T, class A> + inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator-=(batch const& other) noexcept + { + m_real -= other.m_real; + m_imag -= other.m_imag; + return *this; + } + + template <class T, class A> + inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator*=(batch const& other) noexcept + { + real_batch new_real = real() * other.real() - imag() * other.imag(); + real_batch new_imag = real() * other.imag() + imag() * other.real(); + m_real = new_real; + m_imag = new_imag; + return *this; + } + + template <class T, class A> + inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator/=(batch const& other) noexcept + { + real_batch a = real(); + real_batch b = imag(); + real_batch c = other.real(); + real_batch d = other.imag(); + real_batch e = c * c + d * d; + m_real = (c * a + d * b) / e; + m_imag = (c * b - d * a) / e; + return *this; + } + + /************************************** + * batch<complex> incr/decr operators * + **************************************/ + + template <class T, class A> + inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator++() noexcept + { + return operator+=(1); + } + + template <class T, class A> + inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator--() noexcept + { + return operator-=(1); + } + + template <class T, class A> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator++(int) noexcept + { + batch copy(*this); + operator+=(1); + return copy; + } + + template <class T, class A> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator--(int) noexcept + { + batch copy(*this); + operator-=(1); + return copy; + } + + /********************************** + * batch<complex> unary operators * + **********************************/ + + template <class T, class A> + inline batch_bool<T, A> batch<std::complex<T>, A>::operator!() const noexcept + { + return operator==(batch(0)); + } + + template <class T, class A> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator~() const noexcept + { + return { ~m_real, ~m_imag }; + } + + template <class T, class A> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator-() const noexcept + { + return { -m_real, -m_imag }; + } + + template <class T, class A> + inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator+() const noexcept + { + return { +m_real, +m_imag }; + } + + /********************************** + * size type aliases + **********************************/ + + namespace details + { + template <typename T, std::size_t N, class ArchList> + struct sized_batch; + + template <typename T, std::size_t N> + struct sized_batch<T, N, xsimd::arch_list<>> + { + using type = void; + }; + + template <typename T, class Arch, bool BatchExists = xsimd::types::has_simd_register<T, Arch>::value> + struct batch_trait; + + template <typename T, class Arch> + struct batch_trait<T, Arch, true> + { + using type = xsimd::batch<T, Arch>; + static constexpr std::size_t size = xsimd::batch<T, Arch>::size; + }; + + template <typename T, class Arch> + struct batch_trait<T, Arch, false> + { + using type = void; + static constexpr std::size_t size = 0; + }; + + template <typename T, std::size_t N, class Arch, class... Archs> + struct sized_batch<T, N, xsimd::arch_list<Arch, Archs...>> + { + using type = typename std::conditional< + batch_trait<T, Arch>::size == N, + typename batch_trait<T, Arch>::type, + typename sized_batch<T, N, xsimd::arch_list<Archs...>>::type>::type; + }; + } + + /** + * @brief type utility to select a batch of given type and size + * + * If one of the available architectures has a native vector type of the + * given type and size, sets the @p type member to the appropriate batch + * type. Otherwise set its to @p void. + * + * @tparam T the type of the underlying values. + * @tparam N the number of elements of that type in the batch. + **/ + template <typename T, std::size_t N> + struct make_sized_batch + { + using type = typename details::sized_batch<T, N, supported_architectures>::type; + }; + + template <typename T, std::size_t N> + using make_sized_batch_t = typename make_sized_batch<T, N>::type; +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_batch_constant.hpp b/third_party/xsimd/include/xsimd/types/xsimd_batch_constant.hpp new file mode 100644 index 0000000000..0de9c8ad42 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_batch_constant.hpp @@ -0,0 +1,288 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_BATCH_CONSTANT_HPP +#define XSIMD_BATCH_CONSTANT_HPP + +#include "./xsimd_batch.hpp" +#include "./xsimd_utils.hpp" + +namespace xsimd +{ + /** + * @brief batch of boolean constant + * + * Abstract representation of a batch of boolean constants. + * + * @tparam batch_type the type of the associated batch values. + * @tparam Values boolean constant represented by this batch + **/ + template <class batch_type, bool... Values> + struct batch_bool_constant + { + + public: + static constexpr std::size_t size = sizeof...(Values); + using arch_type = typename batch_type::arch_type; + using value_type = bool; + static_assert(sizeof...(Values) == batch_type::size, "consistent batch size"); + + constexpr operator batch_bool<typename batch_type::value_type, arch_type>() const noexcept { return { Values... }; } + + constexpr bool get(size_t i) const noexcept + { + return std::array<value_type, size> { { Values... } }[i]; + } + + static constexpr int mask() noexcept + { + return mask_helper(0, static_cast<int>(Values)...); + } + + private: + static constexpr int mask_helper(int acc) noexcept { return acc; } + + template <class... Tys> + static constexpr int mask_helper(int acc, int mask, Tys... masks) noexcept + { + return mask_helper(acc | mask, (masks << 1)...); + } + + struct logical_or + { + constexpr bool operator()(bool x, bool y) const { return x || y; } + }; + struct logical_and + { + constexpr bool operator()(bool x, bool y) const { return x && y; } + }; + struct logical_xor + { + constexpr bool operator()(bool x, bool y) const { return x ^ y; } + }; + + template <class F, class SelfPack, class OtherPack, size_t... Indices> + static constexpr batch_bool_constant<batch_type, F()(std::tuple_element<Indices, SelfPack>::type::value, std::tuple_element<Indices, OtherPack>::type::value)...> + apply(detail::index_sequence<Indices...>) + { + return {}; + } + + template <class F, bool... OtherValues> + static constexpr auto apply(batch_bool_constant<batch_type, Values...>, batch_bool_constant<batch_type, OtherValues...>) + -> decltype(apply<F, std::tuple<std::integral_constant<bool, Values>...>, std::tuple<std::integral_constant<bool, OtherValues>...>>(detail::make_index_sequence<sizeof...(Values)>())) + { + static_assert(sizeof...(Values) == sizeof...(OtherValues), "compatible constant batches"); + return apply<F, std::tuple<std::integral_constant<bool, Values>...>, std::tuple<std::integral_constant<bool, OtherValues>...>>(detail::make_index_sequence<sizeof...(Values)>()); + } + + public: +#define MAKE_BINARY_OP(OP, NAME) \ + template <bool... OtherValues> \ + constexpr auto operator OP(batch_bool_constant<batch_type, OtherValues...> other) const \ + -> decltype(apply<NAME>(*this, other)) \ + { \ + return apply<NAME>(*this, other); \ + } + + MAKE_BINARY_OP(|, logical_or) + MAKE_BINARY_OP(||, logical_or) + MAKE_BINARY_OP(&, logical_and) + MAKE_BINARY_OP(&&, logical_and) + MAKE_BINARY_OP(^, logical_xor) + +#undef MAKE_BINARY_OP + + constexpr batch_bool_constant<batch_type, !Values...> operator!() const + { + return {}; + } + + constexpr batch_bool_constant<batch_type, !Values...> operator~() const + { + return {}; + } + }; + + /** + * @brief batch of integral constants + * + * Abstract representation of a batch of integral constants. + * + * @tparam batch_type the type of the associated batch values. + * @tparam Values constants represented by this batch + **/ + template <class batch_type, typename batch_type::value_type... Values> + struct batch_constant + { + static constexpr std::size_t size = sizeof...(Values); + using arch_type = typename batch_type::arch_type; + using value_type = typename batch_type::value_type; + static_assert(sizeof...(Values) == batch_type::size, "consistent batch size"); + + /** + * @brief Generate a batch of @p batch_type from this @p batch_constant + */ + inline operator batch_type() const noexcept { return { Values... }; } + + /** + * @brief Get the @p i th element of this @p batch_constant + */ + constexpr value_type get(size_t i) const noexcept + { + return get(i, std::array<value_type, size> { Values... }); + } + + private: + constexpr value_type get(size_t i, std::array<value_type, size> const& values) const noexcept + { + return values[i]; + } + + struct arithmetic_add + { + constexpr value_type operator()(value_type x, value_type y) const { return x + y; } + }; + struct arithmetic_sub + { + constexpr value_type operator()(value_type x, value_type y) const { return x - y; } + }; + struct arithmetic_mul + { + constexpr value_type operator()(value_type x, value_type y) const { return x * y; } + }; + struct arithmetic_div + { + constexpr value_type operator()(value_type x, value_type y) const { return x / y; } + }; + struct arithmetic_mod + { + constexpr value_type operator()(value_type x, value_type y) const { return x % y; } + }; + struct binary_and + { + constexpr value_type operator()(value_type x, value_type y) const { return x & y; } + }; + struct binary_or + { + constexpr value_type operator()(value_type x, value_type y) const { return x | y; } + }; + struct binary_xor + { + constexpr value_type operator()(value_type x, value_type y) const { return x ^ y; } + }; + + template <class F, class SelfPack, class OtherPack, size_t... Indices> + static constexpr batch_constant<batch_type, F()(std::tuple_element<Indices, SelfPack>::type::value, std::tuple_element<Indices, OtherPack>::type::value)...> + apply(detail::index_sequence<Indices...>) + { + return {}; + } + + template <class F, value_type... OtherValues> + static constexpr auto apply(batch_constant<batch_type, Values...>, batch_constant<batch_type, OtherValues...>) + -> decltype(apply<F, std::tuple<std::integral_constant<value_type, Values>...>, std::tuple<std::integral_constant<value_type, OtherValues>...>>(detail::make_index_sequence<sizeof...(Values)>())) + { + static_assert(sizeof...(Values) == sizeof...(OtherValues), "compatible constant batches"); + return apply<F, std::tuple<std::integral_constant<value_type, Values>...>, std::tuple<std::integral_constant<value_type, OtherValues>...>>(detail::make_index_sequence<sizeof...(Values)>()); + } + + public: +#define MAKE_BINARY_OP(OP, NAME) \ + template <value_type... OtherValues> \ + constexpr auto operator OP(batch_constant<batch_type, OtherValues...> other) const \ + -> decltype(apply<NAME>(*this, other)) \ + { \ + return apply<NAME>(*this, other); \ + } + + MAKE_BINARY_OP(+, arithmetic_add) + MAKE_BINARY_OP(-, arithmetic_sub) + MAKE_BINARY_OP(*, arithmetic_mul) + MAKE_BINARY_OP(/, arithmetic_div) + MAKE_BINARY_OP(%, arithmetic_mod) + MAKE_BINARY_OP(&, binary_and) + MAKE_BINARY_OP(|, binary_or) + MAKE_BINARY_OP(^, binary_xor) + +#undef MAKE_BINARY_OP + + constexpr batch_constant<batch_type, (value_type)-Values...> operator-() const + { + return {}; + } + + constexpr batch_constant<batch_type, (value_type) + Values...> operator+() const + { + return {}; + } + + constexpr batch_constant<batch_type, (value_type)~Values...> operator~() const + { + return {}; + } + }; + + namespace detail + { + template <class batch_type, class G, std::size_t... Is> + inline constexpr auto make_batch_constant(detail::index_sequence<Is...>) noexcept + -> batch_constant<batch_type, (typename batch_type::value_type)G::get(Is, sizeof...(Is))...> + { + return {}; + } + template <class batch_type, class G, std::size_t... Is> + inline constexpr auto make_batch_bool_constant(detail::index_sequence<Is...>) noexcept + -> batch_bool_constant<batch_type, G::get(Is, sizeof...(Is))...> + { + return {}; + } + + } // namespace detail + + /** + * @brief Build a @c batch_constant out of a generator function + * + * @tparam batch_type type of the (non-constant) batch to build + * @tparam G type used to generate that batch. That type must have a static + * member @c get that's used to generate the batch constant. Conversely, the + * generated batch_constant has value `{G::get(0, batch_size), ... , G::get(batch_size - 1, batch_size)}` + * + * The following generator produces a batch of `(n - 1, 0, 1, ... n-2)` + * + * @code + * struct Rot + * { + * static constexpr unsigned get(unsigned i, unsigned n) + * { + * return (i + n - 1) % n; + * } + * }; + * @endcode + */ + template <class batch_type, class G> + inline constexpr auto make_batch_constant() noexcept -> decltype(detail::make_batch_constant<batch_type, G>(detail::make_index_sequence<batch_type::size>())) + { + return detail::make_batch_constant<batch_type, G>(detail::make_index_sequence<batch_type::size>()); + } + + template <class batch_type, class G> + inline constexpr auto make_batch_bool_constant() noexcept + -> decltype(detail::make_batch_bool_constant<batch_type, G>( + detail::make_index_sequence<batch_type::size>())) + { + return detail::make_batch_bool_constant<batch_type, G>( + detail::make_index_sequence<batch_type::size>()); + } + +} // namespace xsimd + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp new file mode 100644 index 0000000000..b9a5995414 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp @@ -0,0 +1,46 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_FMA3_AVX2_REGISTER_HPP +#define XSIMD_FMA3_AVX2_REGISTER_HPP + +#include "./xsimd_avx2_register.hpp" + +namespace xsimd +{ + template <typename arch> + struct fma3; + + /** + * @ingroup architectures + * + * AVX2 + FMA instructions + */ + template <> + struct fma3<avx2> : avx2 + { + static constexpr bool supported() noexcept { return XSIMD_WITH_FMA3_AVX2; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(2, 2, 1); } + static constexpr char const* name() noexcept { return "fma3+avx2"; } + }; + +#if XSIMD_WITH_FMA3_AVX2 + namespace types + { + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(fma3<avx2>, avx2); + + } +#endif + +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp new file mode 100644 index 0000000000..ae10598f2c --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp @@ -0,0 +1,46 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_FMA3_AVX_REGISTER_HPP +#define XSIMD_FMA3_AVX_REGISTER_HPP + +#include "./xsimd_avx_register.hpp" + +namespace xsimd +{ + template <typename arch> + struct fma3; + + /** + * @ingroup architectures + * + * AVX + FMA instructions + */ + template <> + struct fma3<avx> : avx + { + static constexpr bool supported() noexcept { return XSIMD_WITH_FMA3_AVX; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(2, 1, 1); } + static constexpr char const* name() noexcept { return "fma3+avx"; } + }; + +#if XSIMD_WITH_FMA3_AVX + namespace types + { + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(fma3<avx>, avx); + + } +#endif + +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp new file mode 100644 index 0000000000..a267490d66 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp @@ -0,0 +1,46 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_FMA3_SSE_REGISTER_HPP +#define XSIMD_FMA3_SSE_REGISTER_HPP + +#include "./xsimd_sse4_2_register.hpp" + +namespace xsimd +{ + template <typename arch> + struct fma3; + + /** + * @ingroup architectures + * + * SSE4.2 + FMA instructions + */ + template <> + struct fma3<sse4_2> : sse4_2 + { + static constexpr bool supported() noexcept { return XSIMD_WITH_FMA3_SSE; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(1, 4, 3); } + static constexpr char const* name() noexcept { return "fma3+sse4.2"; } + }; + +#if XSIMD_WITH_FMA3_SSE + namespace types + { + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(fma3<sse4_2>, sse4_2); + + } +#endif + +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_fma4_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_fma4_register.hpp new file mode 100644 index 0000000000..3684bbb401 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_fma4_register.hpp @@ -0,0 +1,42 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_FMA4_REGISTER_HPP +#define XSIMD_FMA4_REGISTER_HPP + +#include "./xsimd_sse4_2_register.hpp" + +namespace xsimd +{ + /** + * @ingroup architectures + * + * SSE4.2 + FMA4 instructions + */ + struct fma4 : sse4_2 + { + static constexpr bool supported() noexcept { return XSIMD_WITH_FMA4; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(1, 4, 4); } + static constexpr char const* name() noexcept { return "fma4"; } + }; + +#if XSIMD_WITH_FMA4 + namespace types + { + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(fma4, sse4_2); + + } +#endif + +} +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp b/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp new file mode 100644 index 0000000000..f4a2ca6aad --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp @@ -0,0 +1,52 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_GENERIC_ARCH_HPP +#define XSIMD_GENERIC_ARCH_HPP + +#include "../config/xsimd_config.hpp" + +/** + * @defgroup architectures Architecture description + * */ +namespace xsimd +{ + /** + * @ingroup architectures + * + * Base class for all architectures. + */ + struct generic + { + /// Whether this architecture is supported at compile-time. + static constexpr bool supported() noexcept { return true; } + /// Whether this architecture is available at run-time. + static constexpr bool available() noexcept { return true; } + /// If this architectures supports aligned memory accesses, the required + /// alignment. + static constexpr std::size_t alignment() noexcept { return 0; } + /// Whether this architecture requires aligned memory access. + static constexpr bool requires_alignment() noexcept { return false; } + /// Unique identifier for this architecture. + static constexpr unsigned version() noexcept { return generic::version(0, 0, 0); } + /// Name of the architecture. + static constexpr char const* name() noexcept { return "generic"; } + + protected: + static constexpr unsigned version(unsigned major, unsigned minor, unsigned patch, unsigned multiplier = 100u) noexcept { return major * multiplier * multiplier + minor * multiplier + patch; } + }; + + struct unsupported + { + }; +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_neon64_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_neon64_register.hpp new file mode 100644 index 0000000000..3aa8973b63 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_neon64_register.hpp @@ -0,0 +1,52 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_NEON64_REGISTER_HPP +#define XSIMD_NEON64_REGISTER_HPP + +#include "xsimd_neon_register.hpp" + +namespace xsimd +{ + /** + * @ingroup architectures + * + * NEON instructions for arm64 + */ + struct neon64 : neon + { + static constexpr bool supported() noexcept { return XSIMD_WITH_NEON64; } + static constexpr bool available() noexcept { return true; } + static constexpr bool requires_alignment() noexcept { return true; } + static constexpr std::size_t alignment() noexcept { return 16; } + static constexpr unsigned version() noexcept { return generic::version(8, 1, 0); } + static constexpr char const* name() noexcept { return "arm64+neon"; } + }; + +#if XSIMD_WITH_NEON64 + + namespace types + { + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(neon64, neon); + XSIMD_DECLARE_SIMD_REGISTER(double, neon64, float64x2_t); + + template <class T> + struct get_bool_simd_register<T, neon64> + : detail::neon_bool_simd_register<T, neon64> + { + }; + } + +#endif + +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_neon_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_neon_register.hpp new file mode 100644 index 0000000000..0ef4b381d3 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_neon_register.hpp @@ -0,0 +1,155 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_NEON_REGISTER_HPP +#define XSIMD_NEON_REGISTER_HPP + +#include "xsimd_generic_arch.hpp" +#include "xsimd_register.hpp" + +#if XSIMD_WITH_NEON +#include <arm_neon.h> +#endif + +namespace xsimd +{ + /** + * @ingroup architectures + * + * NEON instructions for arm32 + */ + struct neon : generic + { + static constexpr bool supported() noexcept { return XSIMD_WITH_NEON; } + static constexpr bool available() noexcept { return true; } + static constexpr bool requires_alignment() noexcept { return true; } + static constexpr std::size_t alignment() noexcept { return 16; } + static constexpr unsigned version() noexcept { return generic::version(7, 0, 0); } + static constexpr char const* name() noexcept { return "arm32+neon"; } + }; + +#if XSIMD_WITH_NEON + namespace types + { + namespace detail + { + template <size_t S> + struct neon_vector_type_impl; + + template <> + struct neon_vector_type_impl<8> + { + using signed_type = int8x16_t; + using unsigned_type = uint8x16_t; + }; + + template <> + struct neon_vector_type_impl<16> + { + using signed_type = int16x8_t; + using unsigned_type = uint16x8_t; + }; + + template <> + struct neon_vector_type_impl<32> + { + using signed_type = int32x4_t; + using unsigned_type = uint32x4_t; + }; + + template <> + struct neon_vector_type_impl<64> + { + using signed_type = int64x2_t; + using unsigned_type = uint64x2_t; + }; + + template <class T> + using signed_neon_vector_type = typename neon_vector_type_impl<8 * sizeof(T)>::signed_type; + + template <class T> + using unsigned_neon_vector_type = typename neon_vector_type_impl<8 * sizeof(T)>::unsigned_type; + + template <class T> + using neon_vector_type = typename std::conditional<std::is_signed<T>::value, + signed_neon_vector_type<T>, + unsigned_neon_vector_type<T>>::type; + + using char_neon_vector_type = typename std::conditional<std::is_signed<char>::value, + signed_neon_vector_type<char>, + unsigned_neon_vector_type<char>>::type; + } + + XSIMD_DECLARE_SIMD_REGISTER(signed char, neon, detail::neon_vector_type<signed char>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned char, neon, detail::neon_vector_type<unsigned char>); + XSIMD_DECLARE_SIMD_REGISTER(char, neon, detail::char_neon_vector_type); + XSIMD_DECLARE_SIMD_REGISTER(short, neon, detail::neon_vector_type<short>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned short, neon, detail::neon_vector_type<unsigned short>); + XSIMD_DECLARE_SIMD_REGISTER(int, neon, detail::neon_vector_type<int>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned int, neon, detail::neon_vector_type<unsigned int>); + XSIMD_DECLARE_SIMD_REGISTER(long int, neon, detail::neon_vector_type<long int>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, neon, detail::neon_vector_type<unsigned long int>); + XSIMD_DECLARE_SIMD_REGISTER(long long int, neon, detail::neon_vector_type<long long int>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, neon, detail::neon_vector_type<unsigned long long int>); + XSIMD_DECLARE_SIMD_REGISTER(float, neon, float32x4_t); + XSIMD_DECLARE_INVALID_SIMD_REGISTER(double, neon); + + namespace detail + { + template <size_t S> + struct get_unsigned_type; + + template <> + struct get_unsigned_type<1> + { + using type = uint8_t; + }; + + template <> + struct get_unsigned_type<2> + { + using type = uint16_t; + }; + + template <> + struct get_unsigned_type<4> + { + using type = uint32_t; + }; + + template <> + struct get_unsigned_type<8> + { + using type = uint64_t; + }; + + template <size_t S> + using get_unsigned_type_t = typename get_unsigned_type<S>::type; + + template <class T, class A> + struct neon_bool_simd_register + { + using type = simd_register<get_unsigned_type_t<sizeof(T)>, A>; + }; + } + + template <class T> + struct get_bool_simd_register<T, neon> + : detail::neon_bool_simd_register<T, neon> + { + }; + + } +#endif + +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_register.hpp new file mode 100644 index 0000000000..4fe4f3f13f --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_register.hpp @@ -0,0 +1,94 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_REGISTER_HPP +#define XSIMD_REGISTER_HPP + +#include <type_traits> + +namespace xsimd +{ + namespace types + { + template <class T, class A> + struct has_simd_register : std::false_type + { + }; + + template <class T, class Arch> + struct simd_register + { + struct register_type + { + }; + }; + +#define XSIMD_DECLARE_SIMD_REGISTER(SCALAR_TYPE, ISA, VECTOR_TYPE) \ + template <> \ + struct simd_register<SCALAR_TYPE, ISA> \ + { \ + using register_type = VECTOR_TYPE; \ + register_type data; \ + inline operator register_type() const noexcept \ + { \ + return data; \ + } \ + }; \ + template <> \ + struct has_simd_register<SCALAR_TYPE, ISA> : std::true_type \ + { \ + } + +#define XSIMD_DECLARE_INVALID_SIMD_REGISTER(SCALAR_TYPE, ISA) \ + template <> \ + struct has_simd_register<SCALAR_TYPE, ISA> : std::false_type \ + { \ + } + +#define XSIMD_DECLARE_SIMD_REGISTER_ALIAS(ISA, ISA_BASE) \ + template <class T> \ + struct simd_register<T, ISA> : simd_register<T, ISA_BASE> \ + { \ + using register_type = typename simd_register<T, ISA_BASE>::register_type; \ + simd_register(register_type reg) noexcept \ + : simd_register<T, ISA_BASE> { reg } \ + { \ + } \ + simd_register() = default; \ + }; \ + template <class T> \ + struct has_simd_register<T, ISA> : has_simd_register<T, ISA_BASE> \ + { \ + } + + template <class T, class Arch> + struct get_bool_simd_register + { + using type = simd_register<T, Arch>; + }; + + template <class T, class Arch> + using get_bool_simd_register_t = typename get_bool_simd_register<T, Arch>::type; + } + + namespace kernel + { + template <class A> + // makes requires_arch equal to A const&, using type_traits functions + using requires_arch = typename std::add_lvalue_reference<typename std::add_const<A>::type>::type; + template <class T> + struct convert + { + }; + } +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_rvv_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_rvv_register.hpp new file mode 100644 index 0000000000..bdc0ef3b87 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_rvv_register.hpp @@ -0,0 +1,419 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * Copyright (c) Yibo Cai * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_RVV_REGISTER_HPP +#define XSIMD_RVV_REGISTER_HPP + +#include "xsimd_generic_arch.hpp" +#include "xsimd_register.hpp" + +#if XSIMD_WITH_RVV +#include <riscv_vector.h> +#endif + +namespace xsimd +{ + namespace detail + { + /** + * @ingroup architectures + * + * RVV instructions (fixed vector size) for riscv + */ + template <size_t Width> + struct rvv : xsimd::generic + { + static constexpr size_t width = Width; + static constexpr bool supported() noexcept { return Width == XSIMD_RVV_BITS; } + static constexpr bool available() noexcept { return true; } + static constexpr bool requires_alignment() noexcept { return true; } + static constexpr std::size_t alignment() noexcept { return 16; } + static constexpr unsigned version() noexcept { return generic::version(1, 0, 0, /*multiplier=*/1000); } + static constexpr char const* name() noexcept { return "riscv+rvv"; } + }; + } + +#if XSIMD_WITH_RVV + + using rvv = detail::rvv<__riscv_v_fixed_vlen>; + +#define XSIMD_RVV_JOINT_(a, b, c) a##b##c +#define XSIMD_RVV_JOINT(a, b, c) XSIMD_RVV_JOINT_(a, b, c) +#define XSIMD_RVV_JOINT5(a, b, c, d, e) XSIMD_RVV_JOINT(XSIMD_RVV_JOINT(a, b, c), d, e) + +#define XSIMD_RVV_TYPE_i(S, V) XSIMD_RVV_JOINT5(vint, S, m, V, _t) +#define XSIMD_RVV_TYPE_u(S, V) XSIMD_RVV_JOINT5(vuint, S, m, V, _t) +#define XSIMD_RVV_TYPE_f(S, V) XSIMD_RVV_JOINT5(vfloat, S, m, V, _t) +#define XSIMD_RVV_TYPE(T, S, V) XSIMD_RVV_JOINT(XSIMD_RVV_TYPE, _, T)(S, V) + + namespace types + { + namespace detail + { + static constexpr size_t rvv_width_mf8 = XSIMD_RVV_BITS / 8; + static constexpr size_t rvv_width_mf4 = XSIMD_RVV_BITS / 4; + static constexpr size_t rvv_width_mf2 = XSIMD_RVV_BITS / 2; + static constexpr size_t rvv_width_m1 = XSIMD_RVV_BITS; + static constexpr size_t rvv_width_m2 = XSIMD_RVV_BITS * 2; + static constexpr size_t rvv_width_m4 = XSIMD_RVV_BITS * 4; + static constexpr size_t rvv_width_m8 = XSIMD_RVV_BITS * 8; + + // rvv_type_info is a utility class to convert scalar type and + // bitwidth into rvv register types. + // + // * `type` is the unadorned vector type. + // * `fixed_type` is the same type, but with the storage attribute + // applied. + // * `byte_type` is the type which is the same size in unsigned + // bytes, used as an intermediate step for bit-cast operations, + // because only a subset of __riscv_vreinterpret() intrinsics + // exist -- but always enough to get us to bytes and back. + // + template <class T, size_t Width> + struct rvv_type_info; +#define XSIMD_RVV_MAKE_TYPE(scalar, t, s, vmul) \ + template <> \ + struct rvv_type_info<scalar, rvv_width_m1 * vmul> \ + { \ + static constexpr size_t width = rvv_width_m1 * vmul; \ + using type = XSIMD_RVV_TYPE(t, s, vmul); \ + using byte_type = XSIMD_RVV_TYPE(u, 8, vmul); \ + using fixed_type = type __attribute__((riscv_rvv_vector_bits(width))); \ + template <class U> \ + static inline type bitcast(U x) noexcept \ + { \ + const auto words = XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, s, m, vmul)(x); \ + return XSIMD_RVV_JOINT5(__riscv_vreinterpret_, t, s, m, vmul)(words); \ + } \ + template <> \ + inline type bitcast<type>(type x) noexcept { return x; } \ + static inline byte_type as_bytes(type x) noexcept \ + { \ + const auto words = XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, s, m, vmul)(x); \ + return XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, 8, m, vmul)(words); \ + } \ + }; + +#define XSIMD_RVV_MAKE_TYPES(vmul) \ + XSIMD_RVV_MAKE_TYPE(int8_t, i, 8, vmul) \ + XSIMD_RVV_MAKE_TYPE(uint8_t, u, 8, vmul) \ + XSIMD_RVV_MAKE_TYPE(int16_t, i, 16, vmul) \ + XSIMD_RVV_MAKE_TYPE(uint16_t, u, 16, vmul) \ + XSIMD_RVV_MAKE_TYPE(int32_t, i, 32, vmul) \ + XSIMD_RVV_MAKE_TYPE(uint32_t, u, 32, vmul) \ + XSIMD_RVV_MAKE_TYPE(int64_t, i, 64, vmul) \ + XSIMD_RVV_MAKE_TYPE(uint64_t, u, 64, vmul) \ + XSIMD_RVV_MAKE_TYPE(float, f, 32, vmul) \ + XSIMD_RVV_MAKE_TYPE(double, f, 64, vmul) + + XSIMD_RVV_MAKE_TYPES(8) + XSIMD_RVV_MAKE_TYPES(4) + XSIMD_RVV_MAKE_TYPES(2) + XSIMD_RVV_MAKE_TYPES(1) +#undef XSIMD_RVV_TYPE +#undef XSIMD_RVV_TYPE_f +#undef XSIMD_RVV_TYPE_u +#undef XSIMD_RVV_TYPE_i +#undef XSIMD_RVV_MAKE_TYPES +#undef XSIMD_RVV_MAKE_TYPE + + // rvv_blob is storage-type abstraction for a vector register. + template <class T, size_t Width> + struct rvv_blob : public rvv_type_info<T, Width> + { + using super = rvv_type_info<T, Width>; + using typename super::fixed_type; + using typename super::type; + + fixed_type value; + type get() const { return value; } + void set(type v) { value = v; } + }; + // + // But sometimes we want our storage type to be less than a whole + // register, while presenting as a whole register to the outside + // world. This is because some partial-register types are not + // defined, but they can (mostly) be emulated using shorter vl on a + // full-width register for arithmetic, and cast back to a partial + // byte register for storage. + // + template <class T, size_t divisor> + struct rvv_semiblob : public rvv_type_info<T, rvv_width_m1> + { + using super = rvv_type_info<T, rvv_width_m1>; + static constexpr size_t width = rvv_width_m1 / divisor; + using typename super::type; + template <size_t div> + struct semitype; + template <> + struct semitype<2> + { + using type = vuint8mf2_t __attribute__((riscv_rvv_vector_bits(rvv_width_mf2))); + }; + template <> + struct semitype<4> + { + using type = vuint8mf4_t __attribute__((riscv_rvv_vector_bits(rvv_width_mf4))); + }; + template <> + struct semitype<8> + { + using type = vuint8mf8_t __attribute__((riscv_rvv_vector_bits(rvv_width_mf8))); + }; + using fixed_type = typename semitype<divisor>::type; + using super::as_bytes; + using super::bitcast; + + fixed_type value; + template <size_t div> + vuint8m1_t get_bytes() const; + template <> + vuint8m1_t get_bytes<2>() const { return __riscv_vlmul_ext_v_u8mf2_u8m1(value); } + template <> + vuint8m1_t get_bytes<4>() const { return __riscv_vlmul_ext_v_u8mf4_u8m1(value); } + template <> + vuint8m1_t get_bytes<8>() const { return __riscv_vlmul_ext_v_u8mf8_u8m1(value); } + type get() const noexcept + { + vuint8m1_t bytes = get_bytes<divisor>(); + return bitcast(bytes); + } + template <size_t div> + void set_bytes(vuint8m1_t); + template <> + void set_bytes<2>(vuint8m1_t v) { value = __riscv_vlmul_trunc_v_u8m1_u8mf2(v); } + template <> + void set_bytes<4>(vuint8m1_t v) { value = __riscv_vlmul_trunc_v_u8m1_u8mf4(v); } + template <> + void set_bytes<8>(vuint8m1_t v) { value = __riscv_vlmul_trunc_v_u8m1_u8mf8(v); } + void set(type v) + { + vuint8m1_t bytes = as_bytes(v); + set_bytes<divisor>(bytes); + } + }; + template <class T> + struct rvv_blob<T, rvv_width_mf2> : rvv_semiblob<T, 2> + { + }; + template <class T> + struct rvv_blob<T, rvv_width_mf4> : rvv_semiblob<T, 4> + { + }; + template <class T> + struct rvv_blob<T, rvv_width_mf8> : rvv_semiblob<T, 8> + { + }; + + // It's difficult dealing with both char and whichever *int8_t type + // is compatible with char, so just avoid it altogether. + // + using rvv_char_t = typename std::conditional<std::is_signed<char>::value, int8_t, uint8_t>::type; + template <class T> + using rvv_fix_char_t = typename std::conditional< + std::is_same<char, typename std::decay<T>::type>::value, + rvv_char_t, T>::type; + + // An explicit constructor isn't really explicit enough to allow + // implicit bit-casting operations between incompatible types, so + // we add this vacuous flag argument when we're serious: + // + enum rvv_bitcast_flag + { + XSIMD_RVV_BITCAST + }; + + // the general-purpose vector register type, usable within + // templates, and supporting arithmetic on partial registers for + // which there is no intrinsic type (by casting via a full register + // type). + // + template <class T, size_t Width> + struct rvv_reg + { + static constexpr size_t width = Width; + static constexpr size_t vl = Width / (sizeof(T) * 8); + using blob_type = rvv_blob<T, Width>; + using register_type = typename blob_type::type; + using byte_type = typename blob_type::byte_type; + blob_type value; + rvv_reg() noexcept = default; + rvv_reg(register_type x) noexcept { value.set(x); } + explicit rvv_reg(byte_type v, rvv_bitcast_flag) { value.set(value.bitcast(v)); } + template <class U> + explicit rvv_reg(rvv_reg<U, Width> v, rvv_bitcast_flag) + : rvv_reg(v.get_bytes(), XSIMD_RVV_BITCAST) + { + } + byte_type get_bytes() const noexcept + { + return blob_type::as_bytes(value.get()); + } + operator register_type() const noexcept { return value.get(); } + }; + template <class T, size_t Width = XSIMD_RVV_BITS> + using rvv_reg_t = typename std::conditional<!std::is_void<T>::value, rvv_reg<rvv_fix_char_t<T>, Width>, void>::type; + + // And some more of the same stuff for bool types, which have + // similar problems and similar workarounds. + // + template <size_t> + struct rvv_bool_info; +#define XSIMD_RVV_MAKE_BOOL_TYPE(i) \ + template <> \ + struct rvv_bool_info<i> \ + { \ + using type = XSIMD_RVV_JOINT(vbool, i, _t); \ + template <class T> \ + static inline type bitcast(T value) noexcept \ + { \ + return XSIMD_RVV_JOINT(__riscv_vreinterpret_b, i, )(value); \ + } \ + /*template <> static inline type bitcast(type value) noexcept { return value; }*/ \ + }; + XSIMD_RVV_MAKE_BOOL_TYPE(1); + XSIMD_RVV_MAKE_BOOL_TYPE(2); + XSIMD_RVV_MAKE_BOOL_TYPE(4); + XSIMD_RVV_MAKE_BOOL_TYPE(8); + XSIMD_RVV_MAKE_BOOL_TYPE(16); + XSIMD_RVV_MAKE_BOOL_TYPE(32); + XSIMD_RVV_MAKE_BOOL_TYPE(64); +#undef XSIMD_RVV_MAKE_BOOL_TYPE +#undef XSIMD_RVV_JOINT5 +#undef XSIMD_RVV_JOINT +#undef XSIMD_RVV_JOINT_ + + template <class T, size_t Width> + struct rvv_bool + { + using bool_info = rvv_bool_info<rvv_width_m1 * sizeof(T) * 8 / Width>; + using storage_type = vuint8m1_t __attribute__((riscv_rvv_vector_bits(rvv_width_m1))); + using type = typename bool_info::type; + storage_type value; + rvv_bool() = default; + rvv_bool(type v) noexcept + : value(__riscv_vreinterpret_u8m1(v)) + { + } + template <class U, typename std::enable_if<sizeof(T) == sizeof(U), int>::type = 0> + rvv_bool(rvv_bool<U, Width> v) + : value(v.value) + { + } + explicit rvv_bool(uint8_t mask) noexcept + : value(__riscv_vmv_v_x_u8m1(mask, rvv_width_m1 / 8)) + { + } + explicit rvv_bool(uint64_t mask) noexcept + : value(__riscv_vreinterpret_v_u64m1_u8m1(__riscv_vmv_v_x_u64m1(mask, rvv_width_m1 / 64))) + { + } + operator type() const noexcept { return bool_info::bitcast(value); } + }; + + template <class T, size_t Width = XSIMD_RVV_BITS> + using rvv_bool_t = typename std::enable_if < !std::is_void<T>::value, + rvv_bool<rvv_fix_char_t<T>, Width<rvv_width_m1 ? rvv_width_m1 : Width>>::type; + + template <size_t S> + struct rvv_vector_type_impl; + + template <> + struct rvv_vector_type_impl<8> + { + using signed_type = rvv_reg_t<int8_t>; + using unsigned_type = rvv_reg_t<uint8_t>; + using floating_point_type = void; + }; + + template <> + struct rvv_vector_type_impl<16> + { + using signed_type = rvv_reg_t<int16_t>; + using unsigned_type = rvv_reg_t<uint16_t>; + using floating_point_type = rvv_reg_t<_Float16>; + }; + + template <> + struct rvv_vector_type_impl<32> + { + using signed_type = rvv_reg_t<int32_t>; + using unsigned_type = rvv_reg_t<uint32_t>; + using floating_point_type = rvv_reg_t<float>; + }; + + template <> + struct rvv_vector_type_impl<64> + { + using signed_type = rvv_reg_t<int64_t>; + using unsigned_type = rvv_reg_t<uint64_t>; + using floating_point_type = rvv_reg_t<double>; + }; + + template <class T> + using signed_int_rvv_vector_type = typename rvv_vector_type_impl<8 * sizeof(T)>::signed_type; + + template <class T> + using unsigned_int_rvv_vector_type = typename rvv_vector_type_impl<8 * sizeof(T)>::unsigned_type; + + template <class T> + using floating_point_rvv_vector_type = typename rvv_vector_type_impl<8 * sizeof(T)>::floating_point_type; + + template <class T> + using signed_int_or_floating_point_rvv_vector_type = typename std::conditional<std::is_floating_point<T>::value, + floating_point_rvv_vector_type<T>, + signed_int_rvv_vector_type<T>>::type; + + template <class T> + using rvv_vector_type = typename std::conditional<std::is_signed<T>::value, + signed_int_or_floating_point_rvv_vector_type<T>, + unsigned_int_rvv_vector_type<T>>::type; + } // namespace detail + + XSIMD_DECLARE_SIMD_REGISTER(bool, rvv, detail::rvv_vector_type<unsigned char>); + XSIMD_DECLARE_SIMD_REGISTER(signed char, rvv, detail::rvv_vector_type<signed char>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned char, rvv, detail::rvv_vector_type<unsigned char>); + XSIMD_DECLARE_SIMD_REGISTER(char, rvv, detail::rvv_vector_type<char>); + XSIMD_DECLARE_SIMD_REGISTER(short, rvv, detail::rvv_vector_type<short>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned short, rvv, detail::rvv_vector_type<unsigned short>); + XSIMD_DECLARE_SIMD_REGISTER(int, rvv, detail::rvv_vector_type<int>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned int, rvv, detail::rvv_vector_type<unsigned int>); + XSIMD_DECLARE_SIMD_REGISTER(long int, rvv, detail::rvv_vector_type<long int>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, rvv, detail::rvv_vector_type<unsigned long int>); + XSIMD_DECLARE_SIMD_REGISTER(long long int, rvv, detail::rvv_vector_type<long long int>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, rvv, detail::rvv_vector_type<unsigned long long int>); + XSIMD_DECLARE_SIMD_REGISTER(float, rvv, detail::rvv_vector_type<float>); + XSIMD_DECLARE_SIMD_REGISTER(double, rvv, detail::rvv_vector_type<double>); + + namespace detail + { + template <class T> + struct rvv_bool_simd_register + { + using register_type = rvv_bool_t<T>; + register_type data; + operator register_type() const noexcept { return data; } + }; + } // namespace detail + + template <class T> + struct get_bool_simd_register<T, rvv> + { + using type = detail::rvv_bool_simd_register<T>; + }; + } // namespace types +#else + using rvv = detail::rvv<0xFFFFFFFF>; +#endif +} // namespace xsimd + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sse2_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sse2_register.hpp new file mode 100644 index 0000000000..a9dc8960b6 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_sse2_register.hpp @@ -0,0 +1,60 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_SSE2_REGISTER_HPP +#define XSIMD_SSE2_REGISTER_HPP + +#include "./xsimd_generic_arch.hpp" +#include "./xsimd_register.hpp" + +#if XSIMD_WITH_SSE2 +#include <emmintrin.h> +#include <xmmintrin.h> +#endif + +namespace xsimd +{ + /** + * @ingroup architectures + * + * SSE2 instructions + */ + struct sse2 : generic + { + static constexpr bool supported() noexcept { return XSIMD_WITH_SSE2; } + static constexpr bool available() noexcept { return true; } + static constexpr bool requires_alignment() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(1, 2, 0); } + static constexpr std::size_t alignment() noexcept { return 16; } + static constexpr char const* name() noexcept { return "sse2"; } + }; + +#if XSIMD_WITH_SSE2 + namespace types + { + XSIMD_DECLARE_SIMD_REGISTER(signed char, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned char, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(char, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned short, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(short, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned int, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(int, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(long int, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(long long int, sse2, __m128i); + XSIMD_DECLARE_SIMD_REGISTER(float, sse2, __m128); + XSIMD_DECLARE_SIMD_REGISTER(double, sse2, __m128d); + } +#endif +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sse3_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sse3_register.hpp new file mode 100644 index 0000000000..1a7708a896 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_sse3_register.hpp @@ -0,0 +1,45 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_SSE3_REGISTER_HPP +#define XSIMD_SSE3_REGISTER_HPP + +#include "./xsimd_sse2_register.hpp" + +#if XSIMD_WITH_SSE3 +#include <pmmintrin.h> +#endif + +namespace xsimd +{ + /** + * @ingroup architectures + * + * SSE3 instructions + */ + struct sse3 : sse2 + { + static constexpr bool supported() noexcept { return XSIMD_WITH_SSE3; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(1, 3, 0); } + static constexpr char const* name() noexcept { return "sse3"; } + }; + +#if XSIMD_WITH_SSE3 + namespace types + { + + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(sse3, sse2); + } +#endif +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp new file mode 100644 index 0000000000..d906712d56 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp @@ -0,0 +1,44 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_SSE4_1_REGISTER_HPP +#define XSIMD_SSE4_1_REGISTER_HPP + +#include "./xsimd_ssse3_register.hpp" + +#if XSIMD_WITH_SSE4_1 +#include <smmintrin.h> +#endif + +namespace xsimd +{ + /** + * @ingroup architectures + * + * SSE4.1 instructions + */ + struct sse4_1 : ssse3 + { + static constexpr bool supported() noexcept { return XSIMD_WITH_SSE4_1; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(1, 4, 1); } + static constexpr char const* name() noexcept { return "sse4.1"; } + }; + +#if XSIMD_WITH_SSE4_1 + namespace types + { + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(sse4_1, ssse3); + } +#endif +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp new file mode 100644 index 0000000000..b3446c9091 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp @@ -0,0 +1,44 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_SSE4_2_REGISTER_HPP +#define XSIMD_SSE4_2_REGISTER_HPP + +#include "./xsimd_sse4_1_register.hpp" + +#if XSIMD_WITH_SSE4_2 +#include <nmmintrin.h> +#endif + +namespace xsimd +{ + /** + * @ingroup architectures + * + * SSE4.2 instructions + */ + struct sse4_2 : sse4_1 + { + static constexpr bool supported() noexcept { return XSIMD_WITH_SSE4_2; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(1, 4, 2); } + static constexpr char const* name() noexcept { return "sse4.2"; } + }; + +#if XSIMD_WITH_SSE4_2 + namespace types + { + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(sse4_2, sse4_1); + } +#endif +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp new file mode 100644 index 0000000000..50ffac1e06 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp @@ -0,0 +1,44 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_SSSE3_REGISTER_HPP +#define XSIMD_SSSE3_REGISTER_HPP + +#include "./xsimd_sse3_register.hpp" + +#if XSIMD_WITH_SSSE3 +#include <tmmintrin.h> +#endif + +namespace xsimd +{ + /** + * @ingroup architectures + * + * SSSE3 instructions + */ + struct ssse3 : sse3 + { + static constexpr bool supported() noexcept { return XSIMD_WITH_SSSE3; } + static constexpr bool available() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(1, 3, 1); } + static constexpr char const* name() noexcept { return "ssse3"; } + }; + +#if XSIMD_WITH_SSSE3 + namespace types + { + XSIMD_DECLARE_SIMD_REGISTER_ALIAS(ssse3, sse3); + } +#endif +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sve_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sve_register.hpp new file mode 100644 index 0000000000..4f75c607e8 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_sve_register.hpp @@ -0,0 +1,157 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * Copyright (c) Yibo Cai * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_SVE_REGISTER_HPP +#define XSIMD_SVE_REGISTER_HPP + +#include "xsimd_generic_arch.hpp" +#include "xsimd_register.hpp" + +#if XSIMD_WITH_SVE +#include <arm_sve.h> +#endif + +namespace xsimd +{ + namespace detail + { + /** + * @ingroup architectures + * + * SVE instructions (fixed vector size) for arm64 + */ + template <size_t Width> + struct sve : xsimd::generic + { + static constexpr bool supported() noexcept { return Width == XSIMD_SVE_BITS; } + static constexpr bool available() noexcept { return true; } + static constexpr bool requires_alignment() noexcept { return true; } + static constexpr std::size_t alignment() noexcept { return 16; } + static constexpr unsigned version() noexcept { return generic::version(9, Width / 32, 0); } + static constexpr char const* name() noexcept { return "arm64+sve"; } + }; + } + +#if XSIMD_WITH_SVE + + using sve = detail::sve<__ARM_FEATURE_SVE_BITS>; + + namespace types + { + namespace detail + { +// define fixed size alias per SVE sizeless type +#define SVE_TO_FIXED_SIZE(ty) ty __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS))) + using sve_int8_t = SVE_TO_FIXED_SIZE(svint8_t); + using sve_uint8_t = SVE_TO_FIXED_SIZE(svuint8_t); + using sve_int16_t = SVE_TO_FIXED_SIZE(svint16_t); + using sve_uint16_t = SVE_TO_FIXED_SIZE(svuint16_t); + using sve_int32_t = SVE_TO_FIXED_SIZE(svint32_t); + using sve_uint32_t = SVE_TO_FIXED_SIZE(svuint32_t); + using sve_int64_t = SVE_TO_FIXED_SIZE(svint64_t); + using sve_uint64_t = SVE_TO_FIXED_SIZE(svuint64_t); + using sve_float32_t = SVE_TO_FIXED_SIZE(svfloat32_t); + using sve_float64_t = SVE_TO_FIXED_SIZE(svfloat64_t); + using sve_bool_t = SVE_TO_FIXED_SIZE(svbool_t); +#undef SVE_TO_FIXED_SIZE + + template <size_t S> + struct sve_vector_type_impl; + + template <> + struct sve_vector_type_impl<8> + { + using signed_type = sve_int8_t; + using unsigned_type = sve_uint8_t; + using floating_point_type = void; + }; + + template <> + struct sve_vector_type_impl<16> + { + using signed_type = sve_int16_t; + using unsigned_type = sve_uint16_t; + using floating_point_type = void; + }; + + template <> + struct sve_vector_type_impl<32> + { + using signed_type = sve_int32_t; + using unsigned_type = sve_uint32_t; + using floating_point_type = sve_float32_t; + }; + + template <> + struct sve_vector_type_impl<64> + { + using signed_type = sve_int64_t; + using unsigned_type = sve_uint64_t; + using floating_point_type = sve_float64_t; + }; + + template <class T> + using signed_int_sve_vector_type = typename sve_vector_type_impl<8 * sizeof(T)>::signed_type; + + template <class T> + using unsigned_int_sve_vector_type = typename sve_vector_type_impl<8 * sizeof(T)>::unsigned_type; + + template <class T> + using floating_point_sve_vector_type = typename sve_vector_type_impl<8 * sizeof(T)>::floating_point_type; + + template <class T> + using signed_int_or_floating_point_sve_vector_type = typename std::conditional<std::is_floating_point<T>::value, + floating_point_sve_vector_type<T>, + signed_int_sve_vector_type<T>>::type; + + template <class T> + using sve_vector_type = typename std::conditional<std::is_signed<T>::value, + signed_int_or_floating_point_sve_vector_type<T>, + unsigned_int_sve_vector_type<T>>::type; + } // namespace detail + + XSIMD_DECLARE_SIMD_REGISTER(signed char, sve, detail::sve_vector_type<signed char>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned char, sve, detail::sve_vector_type<unsigned char>); + XSIMD_DECLARE_SIMD_REGISTER(char, sve, detail::sve_vector_type<char>); + XSIMD_DECLARE_SIMD_REGISTER(short, sve, detail::sve_vector_type<short>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned short, sve, detail::sve_vector_type<unsigned short>); + XSIMD_DECLARE_SIMD_REGISTER(int, sve, detail::sve_vector_type<int>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned int, sve, detail::sve_vector_type<unsigned int>); + XSIMD_DECLARE_SIMD_REGISTER(long int, sve, detail::sve_vector_type<long int>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, sve, detail::sve_vector_type<unsigned long int>); + XSIMD_DECLARE_SIMD_REGISTER(long long int, sve, detail::sve_vector_type<long long int>); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, sve, detail::sve_vector_type<unsigned long long int>); + XSIMD_DECLARE_SIMD_REGISTER(float, sve, detail::sve_vector_type<float>); + XSIMD_DECLARE_SIMD_REGISTER(double, sve, detail::sve_vector_type<double>); + + namespace detail + { + struct sve_bool_simd_register + { + using register_type = sve_bool_t; + register_type data; + operator register_type() const noexcept { return data; } + }; + } // namespace detail + + template <class T> + struct get_bool_simd_register<T, sve> + { + using type = detail::sve_bool_simd_register; + }; + } // namespace types +#else + using sve = detail::sve<0xFFFFFFFF>; +#endif +} // namespace xsimd + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_traits.hpp b/third_party/xsimd/include/xsimd/types/xsimd_traits.hpp new file mode 100644 index 0000000000..f848aab1f7 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_traits.hpp @@ -0,0 +1,319 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_TRAITS_HPP +#define XSIMD_TRAITS_HPP + +#include <type_traits> + +#include "xsimd_batch.hpp" + +/** + * high level type traits + * + * @defgroup batch_traits Type traits + * + **/ + +namespace xsimd +{ + + /************************************** + * simd_traits and revert_simd_traits * + **************************************/ + + template <class T, class A = default_arch> + struct has_simd_register : types::has_simd_register<T, A> + { + }; + + namespace detail + { + template <class T, bool> + struct simd_traits_impl; + + template <class T> + struct simd_traits_impl<T, false> + { + using type = T; + using bool_type = bool; + static constexpr size_t size = 1; + }; + + template <class T> + constexpr size_t simd_traits_impl<T, false>::size; + + template <class T> + struct simd_traits_impl<T, true> + { + using type = batch<T>; + using bool_type = typename type::batch_bool_type; + static constexpr size_t size = type::size; + }; + + template <class T> + constexpr size_t simd_traits_impl<T, true>::size; + + template <class T, class A> + struct static_check_supported_config_emitter + { + + static_assert(A::supported(), + "usage of batch type with unsupported architecture"); + static_assert(!A::supported() || xsimd::has_simd_register<T, A>::value, + "usage of batch type with unsupported type"); + }; + + template <class T, class A> + struct static_check_supported_config_emitter<std::complex<T>, A> : static_check_supported_config_emitter<T, A> + { + }; + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, class A, bool i3ec> + struct static_check_supported_config_emitter<xtl::xcomplex<T, T, i3ec>, A> : static_check_supported_config_emitter<T, A> + { + }; +#endif + + // consistency checker + template <class T, class A> + inline void static_check_supported_config() + { + (void)static_check_supported_config_emitter<T, A>(); + } + } + + template <class T> + struct simd_traits : detail::simd_traits_impl<T, xsimd::has_simd_register<T>::value> + { + }; + + template <class T> + struct simd_traits<std::complex<T>> + : detail::simd_traits_impl<std::complex<T>, xsimd::has_simd_register<T>::value> + { + }; + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + struct simd_traits<xtl::xcomplex<T, T, i3ec>> + : detail::simd_traits_impl<std::complex<T>, xsimd::has_simd_register<T>::value> + { + }; +#endif + + template <class T> + struct revert_simd_traits + { + using type = T; + static constexpr size_t size = simd_traits<type>::size; + }; + + template <class T> + constexpr size_t revert_simd_traits<T>::size; + + template <class T> + struct revert_simd_traits<batch<T>> + { + using type = T; + static constexpr size_t size = batch<T>::size; + }; + + template <class T> + constexpr size_t revert_simd_traits<batch<T>>::size; + + template <class T> + using simd_type = typename simd_traits<T>::type; + + template <class T> + using simd_bool_type = typename simd_traits<T>::bool_type; + + template <class T> + using revert_simd_type = typename revert_simd_traits<T>::type; + + /******************** + * simd_return_type * + ********************/ + + namespace detail + { + template <class T1, class T2> + struct simd_condition + { + static constexpr bool value = (std::is_same<T1, T2>::value && !std::is_same<T1, bool>::value) || (std::is_same<T1, bool>::value && !std::is_same<T2, bool>::value) || std::is_same<T1, float>::value || std::is_same<T1, double>::value || std::is_same<T1, int8_t>::value || std::is_same<T1, uint8_t>::value || std::is_same<T1, int16_t>::value || std::is_same<T1, uint16_t>::value || std::is_same<T1, int32_t>::value || std::is_same<T1, uint32_t>::value || std::is_same<T1, int64_t>::value || std::is_same<T1, uint64_t>::value || std::is_same<T1, char>::value || detail::is_complex<T1>::value; + }; + + template <class T1, class T2, class A> + struct simd_return_type_impl + : std::enable_if<simd_condition<T1, T2>::value, batch<T2, A>> + { + }; + + template <class T2, class A> + struct simd_return_type_impl<bool, T2, A> + : std::enable_if<simd_condition<bool, T2>::value, batch_bool<T2, A>> + { + }; + + template <class T2, class A> + struct simd_return_type_impl<bool, std::complex<T2>, A> + : std::enable_if<simd_condition<bool, T2>::value, batch_bool<T2, A>> + { + }; + + template <class T1, class T2, class A> + struct simd_return_type_impl<std::complex<T1>, T2, A> + : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>> + { + }; + + template <class T1, class T2, class A> + struct simd_return_type_impl<std::complex<T1>, std::complex<T2>, A> + : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>> + { + }; + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T1, class T2, bool I3EC, class A> + struct simd_return_type_impl<xtl::xcomplex<T1, T1, I3EC>, T2, A> + : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>> + { + }; + + template <class T1, class T2, bool I3EC, class A> + struct simd_return_type_impl<xtl::xcomplex<T1, T1, I3EC>, std::complex<T2>, A> + : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>> + { + }; + + template <class T1, class T2, bool I3EC, class A> + struct simd_return_type_impl<xtl::xcomplex<T1, T1, I3EC>, xtl::xcomplex<T2, T2, I3EC>, A> + : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>> + { + }; + + template <class T1, class T2, bool I3EC, class A> + struct simd_return_type_impl<std::complex<T1>, xtl::xcomplex<T2, T2, I3EC>, A> + : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>> + { + }; +#endif + } + + template <class T1, class T2, class A = default_arch> + using simd_return_type = typename detail::simd_return_type_impl<T1, T2, A>::type; + + /** + * @ingroup batch_traits + * + * type traits that inherits from @c std::true_type for @c batch<...> types and from + * @c std::false_type otherwise. + * + * @tparam T type to analyze. + */ + template <class T> + struct is_batch; + + template <class T> + struct is_batch : std::false_type + { + }; + + template <class T, class A> + struct is_batch<batch<T, A>> : std::true_type + { + }; + + /** + * @ingroup batch_traits + * + * type traits that inherits from @c std::true_type for @c batch_bool<...> types and from + * @c std::false_type otherwise. + * + * @tparam T type to analyze. + */ + + template <class T> + struct is_batch_bool : std::false_type + { + }; + + template <class T, class A> + struct is_batch_bool<batch_bool<T, A>> : std::true_type + { + }; + + /** + * @ingroup batch_traits + * + * type traits that inherits from @c std::true_type for @c batch<std::complex<...>> + * types and from @c std::false_type otherwise. + * + * @tparam T type to analyze. + */ + + template <class T> + struct is_batch_complex : std::false_type + { + }; + + template <class T, class A> + struct is_batch_complex<batch<std::complex<T>, A>> : std::true_type + { + }; + + /** + * @ingroup batch_traits + * + * type traits whose @c type field is set to @c T::value_type if @c + * is_batch<T>::value and to @c T otherwise. + * + * @tparam T type to analyze. + */ + template <class T> + struct scalar_type + { + using type = T; + }; + template <class T, class A> + struct scalar_type<batch<T, A>> + { + using type = T; + }; + + template <class T> + using scalar_type_t = typename scalar_type<T>::type; + + /** + * @ingroup batch_traits + * + * type traits whose @c type field is set to @c T::value_type if @c + * is_batch_bool<T>::value and to @c bool otherwise. + * + * @tparam T type to analyze. + */ + template <class T> + struct mask_type + { + using type = bool; + }; + template <class T, class A> + struct mask_type<batch<T, A>> + { + using type = typename batch<T, A>::batch_bool_type; + }; + + template <class T> + using mask_type_t = typename mask_type<T>::type; +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_utils.hpp b/third_party/xsimd/include/xsimd/types/xsimd_utils.hpp new file mode 100644 index 0000000000..aa890f2410 --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_utils.hpp @@ -0,0 +1,530 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_UTILS_HPP +#define XSIMD_UTILS_HPP + +#include <complex> +#include <cstdint> +#include <cstring> +#include <tuple> +#include <type_traits> + +#ifdef XSIMD_ENABLE_XTL_COMPLEX +#include "xtl/xcomplex.hpp" +#endif + +namespace xsimd +{ + + template <class T, class A> + class batch; + + template <class T, class A> + class batch_bool; + + /************** + * index * + **************/ + + template <size_t I> + using index = std::integral_constant<size_t, I>; + + /************** + * as_integer * + **************/ + + template <class T> + struct as_integer : std::make_signed<T> + { + }; + + template <> + struct as_integer<float> + { + using type = int32_t; + }; + + template <> + struct as_integer<double> + { + using type = int64_t; + }; + + template <class T, class A> + struct as_integer<batch<T, A>> + { + using type = batch<typename as_integer<T>::type, A>; + }; + + template <class B> + using as_integer_t = typename as_integer<B>::type; + + /*********************** + * as_unsigned_integer * + ***********************/ + + template <class T> + struct as_unsigned_integer : std::make_unsigned<T> + { + }; + + template <> + struct as_unsigned_integer<float> + { + using type = uint32_t; + }; + + template <> + struct as_unsigned_integer<double> + { + using type = uint64_t; + }; + + template <class T, class A> + struct as_unsigned_integer<batch<T, A>> + { + using type = batch<typename as_unsigned_integer<T>::type, A>; + }; + + template <class T> + using as_unsigned_integer_t = typename as_unsigned_integer<T>::type; + + /********************* + * as_signed_integer * + *********************/ + + template <class T> + struct as_signed_integer : std::make_signed<T> + { + }; + + template <class T> + using as_signed_integer_t = typename as_signed_integer<T>::type; + + /****************** + * flip_sign_type * + ******************/ + + namespace detail + { + template <class T, bool is_signed> + struct flipped_sign_type_impl : std::make_signed<T> + { + }; + + template <class T> + struct flipped_sign_type_impl<T, true> : std::make_unsigned<T> + { + }; + } + + template <class T> + struct flipped_sign_type + : detail::flipped_sign_type_impl<T, std::is_signed<T>::value> + { + }; + + template <class T> + using flipped_sign_type_t = typename flipped_sign_type<T>::type; + + /*********** + * as_float * + ************/ + + template <class T> + struct as_float; + + template <> + struct as_float<int32_t> + { + using type = float; + }; + + template <> + struct as_float<int64_t> + { + using type = double; + }; + + template <class T, class A> + struct as_float<batch<T, A>> + { + using type = batch<typename as_float<T>::type, A>; + }; + + template <class T> + using as_float_t = typename as_float<T>::type; + + /************** + * as_logical * + **************/ + + template <class T> + struct as_logical; + + template <class T, class A> + struct as_logical<batch<T, A>> + { + using type = batch_bool<T, A>; + }; + + template <class T> + using as_logical_t = typename as_logical<T>::type; + + /******************** + * bit_cast * + ********************/ + + template <class To, class From> + inline To bit_cast(From val) noexcept + { + static_assert(sizeof(From) == sizeof(To), "casting between compatible layout"); + // FIXME: Some old version of GCC don't support that trait + // static_assert(std::is_trivially_copyable<From>::value, "input type is trivially copyable"); + // static_assert(std::is_trivially_copyable<To>::value, "output type is trivially copyable"); + To res; + std::memcpy(&res, &val, sizeof(val)); + return res; + } + + namespace kernel + { + namespace detail + { + /************************************** + * enabling / disabling metafunctions * + **************************************/ + + template <class T> + using enable_integral_t = typename std::enable_if<std::is_integral<T>::value, int>::type; + + template <class T, size_t S> + using enable_sized_signed_t = typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value && sizeof(T) == S, int>::type; + + template <class T, size_t S> + using enable_sized_unsigned_t = typename std::enable_if<std::is_integral<T>::value && !std::is_signed<T>::value && sizeof(T) == S, int>::type; + + template <class T, size_t S> + using enable_sized_integral_t = typename std::enable_if<std::is_integral<T>::value && sizeof(T) == S, int>::type; + + template <class T, size_t S> + using enable_sized_t = typename std::enable_if<sizeof(T) == S, int>::type; + + template <class T, size_t S> + using enable_max_sized_integral_t = typename std::enable_if<std::is_integral<T>::value && sizeof(T) <= S, int>::type; + + /******************************** + * Matching & mismatching sizes * + ********************************/ + + template <class T, class U, class B = int> + using sizes_match_t = typename std::enable_if<sizeof(T) == sizeof(U), B>::type; + + template <class T, class U, class B = int> + using sizes_mismatch_t = typename std::enable_if<sizeof(T) != sizeof(U), B>::type; + + template <class T, class U, class B = int> + using stride_match_t = typename std::enable_if<!std::is_same<T, U>::value && sizeof(T) == sizeof(U), B>::type; + } // namespace detail + } // namespace kernel + + /***************************************** + * Backport of index_sequence from c++14 * + *****************************************/ + + // TODO: Remove this once we drop C++11 support + namespace detail + { + template <typename T> + struct identity + { + using type = T; + }; + +#ifdef __cpp_lib_integer_sequence + using std::index_sequence; + using std::integer_sequence; + using std::make_index_sequence; + using std::make_integer_sequence; + + using std::index_sequence_for; +#else + template <typename T, T... Is> + struct integer_sequence + { + using value_type = T; + static constexpr std::size_t size() noexcept { return sizeof...(Is); } + }; + + template <typename Lhs, typename Rhs> + struct make_integer_sequence_concat; + + template <typename T, T... Lhs, T... Rhs> + struct make_integer_sequence_concat<integer_sequence<T, Lhs...>, + integer_sequence<T, Rhs...>> + : identity<integer_sequence<T, Lhs..., (sizeof...(Lhs) + Rhs)...>> + { + }; + + template <typename T> + struct make_integer_sequence_impl; + + template <typename T> + struct make_integer_sequence_impl<std::integral_constant<T, (T)0>> : identity<integer_sequence<T>> + { + }; + + template <typename T> + struct make_integer_sequence_impl<std::integral_constant<T, (T)1>> : identity<integer_sequence<T, 0>> + { + }; + + template <typename T, T N> + struct make_integer_sequence_impl<std::integral_constant<T, N>> + : make_integer_sequence_concat<typename make_integer_sequence_impl<std::integral_constant<T, N / 2>>::type, + typename make_integer_sequence_impl<std::integral_constant<T, N - (N / 2)>>::type> + { + }; + + template <typename T, T N> + using make_integer_sequence = typename make_integer_sequence_impl<std::integral_constant<T, N>>::type; + + template <std::size_t... Is> + using index_sequence = integer_sequence<std::size_t, Is...>; + + template <std::size_t N> + using make_index_sequence = make_integer_sequence<std::size_t, N>; + + template <typename... Ts> + using index_sequence_for = make_index_sequence<sizeof...(Ts)>; + +#endif + + template <int... Is> + using int_sequence = integer_sequence<int, Is...>; + + template <int N> + using make_int_sequence = make_integer_sequence<int, N>; + + template <typename... Ts> + using int_sequence_for = make_int_sequence<(int)sizeof...(Ts)>; + + // Type-casted index sequence. + template <class P, size_t... Is> + inline P indexes_from(index_sequence<Is...>) noexcept + { + return { static_cast<typename P::value_type>(Is)... }; + } + + template <class P> + inline P make_sequence_as_batch() noexcept + { + return indexes_from<P>(make_index_sequence<P::size>()); + } + } + + /*********************************** + * Backport of std::get from C++14 * + ***********************************/ + + namespace detail + { + template <class T, class... Types, size_t I, size_t... Is> + inline const T& get_impl(const std::tuple<Types...>& t, std::is_same<T, T>, index_sequence<I, Is...>) noexcept + { + return std::get<I>(t); + } + + template <class T, class U, class... Types, size_t I, size_t... Is> + inline const T& get_impl(const std::tuple<Types...>& t, std::is_same<T, U>, index_sequence<I, Is...>) noexcept + { + using tuple_elem = typename std::tuple_element<I + 1, std::tuple<Types...>>::type; + return get_impl<T>(t, std::is_same<T, tuple_elem>(), index_sequence<Is...>()); + } + + template <class T, class... Types> + inline const T& get(const std::tuple<Types...>& t) noexcept + { + using tuple_elem = typename std::tuple_element<0, std::tuple<Types...>>::type; + return get_impl<T>(t, std::is_same<T, tuple_elem>(), make_index_sequence<sizeof...(Types)>()); + } + } + + /********************************* + * Backport of void_t from C++17 * + *********************************/ + + namespace detail + { + template <class... T> + struct make_void + { + using type = void; + }; + + template <class... T> + using void_t = typename make_void<T...>::type; + } + + /************************************************** + * Equivalent of void_t but with size_t parameter * + **************************************************/ + + namespace detail + { + template <std::size_t> + struct check_size + { + using type = void; + }; + + template <std::size_t S> + using check_size_t = typename check_size<S>::type; + } + + /***************************************** + * Supplementary std::array constructors * + *****************************************/ + + namespace detail + { + // std::array constructor from scalar value ("broadcast") + template <typename T, std::size_t... Is> + inline constexpr std::array<T, sizeof...(Is)> + array_from_scalar_impl(const T& scalar, index_sequence<Is...>) noexcept + { + // You can safely ignore this silly ternary, the "scalar" is all + // that matters. The rest is just a dirty workaround... + return std::array<T, sizeof...(Is)> { (Is + 1) ? scalar : T()... }; + } + + template <typename T, std::size_t N> + inline constexpr std::array<T, N> + array_from_scalar(const T& scalar) noexcept + { + return array_from_scalar_impl(scalar, make_index_sequence<N>()); + } + + // std::array constructor from C-style pointer (handled as an array) + template <typename T, std::size_t... Is> + inline constexpr std::array<T, sizeof...(Is)> + array_from_pointer_impl(const T* c_array, index_sequence<Is...>) noexcept + { + return std::array<T, sizeof...(Is)> { c_array[Is]... }; + } + + template <typename T, std::size_t N> + inline constexpr std::array<T, N> + array_from_pointer(const T* c_array) noexcept + { + return array_from_pointer_impl(c_array, make_index_sequence<N>()); + } + } + + /************************ + * is_array_initializer * + ************************/ + + namespace detail + { + template <bool...> + struct bool_pack; + + template <bool... bs> + using all_true = std::is_same< + bool_pack<bs..., true>, bool_pack<true, bs...>>; + + template <typename T, typename... Args> + using is_all_convertible = all_true<std::is_convertible<Args, T>::value...>; + + template <typename T, std::size_t N, typename... Args> + using is_array_initializer = std::enable_if< + (sizeof...(Args) == N) && is_all_convertible<T, Args...>::value>; + + // Check that a variadic argument pack is a list of N values of type T, + // as usable for instantiating a value of type std::array<T, N>. + template <typename T, std::size_t N, typename... Args> + using is_array_initializer_t = typename is_array_initializer<T, N, Args...>::type; + } + + /************** + * is_complex * + **************/ + + // This is used in both xsimd_complex_base.hpp and xsimd_traits.hpp + // However xsimd_traits.hpp indirectly includes xsimd_complex_base.hpp + // so we cannot define is_complex in xsimd_traits.hpp. Besides, if + // no file defining batches is included, we still need this definition + // in xsimd_traits.hpp, so let's define it here. + + namespace detail + { + template <class T> + struct is_complex : std::false_type + { + }; + + template <class T> + struct is_complex<std::complex<T>> : std::true_type + { + }; + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class T, bool i3ec> + struct is_complex<xtl::xcomplex<T, T, i3ec>> : std::true_type + { + }; +#endif + } + + /******************* + * real_batch_type * + *******************/ + + template <class B> + struct real_batch_type + { + using type = B; + }; + + template <class T, class A> + struct real_batch_type<batch<std::complex<T>, A>> + { + using type = batch<T, A>; + }; + + template <class B> + using real_batch_type_t = typename real_batch_type<B>::type; + + /********************** + * complex_batch_type * + **********************/ + + template <class B> + struct complex_batch_type + { + using real_value_type = typename B::value_type; + using arch_type = typename B::arch_type; + using type = batch<std::complex<real_value_type>, arch_type>; + }; + + template <class T, class A> + struct complex_batch_type<batch<std::complex<T>, A>> + { + using type = batch<std::complex<T>, A>; + }; + + template <class B> + using complex_batch_type_t = typename complex_batch_type<B>::type; +} + +#endif diff --git a/third_party/xsimd/include/xsimd/types/xsimd_wasm_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_wasm_register.hpp new file mode 100644 index 0000000000..237db95c6e --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_wasm_register.hpp @@ -0,0 +1,60 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * Copyright (c) Anutosh Bhat * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_WASM_REGISTER_HPP +#define XSIMD_WASM_REGISTER_HPP + +#include "xsimd_generic_arch.hpp" +#include "xsimd_register.hpp" + +#if XSIMD_WITH_WASM +#include <wasm_simd128.h> +#endif + +namespace xsimd +{ + /** + * @ingroup architectures + * + * WASM instructions + */ + struct wasm : generic + { + static constexpr bool supported() noexcept { return XSIMD_WITH_WASM; } + static constexpr bool available() noexcept { return true; } + static constexpr bool requires_alignment() noexcept { return true; } + static constexpr unsigned version() noexcept { return generic::version(10, 0, 0); } + static constexpr std::size_t alignment() noexcept { return 16; } + static constexpr char const* name() noexcept { return "wasm"; } + }; + +#if XSIMD_WITH_WASM + namespace types + { + XSIMD_DECLARE_SIMD_REGISTER(signed char, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(unsigned char, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(char, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(unsigned short, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(short, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(unsigned int, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(int, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(long int, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(long long int, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(float, wasm, v128_t); + XSIMD_DECLARE_SIMD_REGISTER(double, wasm, v128_t); + } +#endif +} + +#endif |