diff options
Diffstat (limited to 'third_party/xsimd/include/xsimd/types/xsimd_api.hpp')
-rw-r--r-- | third_party/xsimd/include/xsimd/types/xsimd_api.hpp | 2599 |
1 files changed, 2599 insertions, 0 deletions
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_api.hpp b/third_party/xsimd/include/xsimd/types/xsimd_api.hpp new file mode 100644 index 0000000000..0420f0a09d --- /dev/null +++ b/third_party/xsimd/include/xsimd/types/xsimd_api.hpp @@ -0,0 +1,2599 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_API_HPP +#define XSIMD_API_HPP + +#include <complex> +#include <cstddef> +#include <limits> +#include <ostream> + +#include "../arch/xsimd_isa.hpp" +#include "../types/xsimd_batch.hpp" +#include "../types/xsimd_traits.hpp" + +namespace xsimd +{ + /** + * high level free functions + * + * @defgroup batch_arithmetic Arithmetic operators + * @defgroup batch_constant Constant batches + * @defgroup batch_data_transfer Memory operators + * @defgroup batch_math Basic math operators + * @defgroup batch_math_extra Extra math operators + * @defgroup batch_fp Floating point manipulation + * @defgroup batch_rounding Rounding operators + * @defgroup batch_conversion Conversion operators + * @defgroup batch_complex_op Complex operators + * @defgroup batch_logical Logical operators + * @defgroup batch_bitwise Bitwise operators + * @defgroup batch_reducers Reducers + * @defgroup batch_miscellaneous Miscellaneous + * @defgroup batch_trigo Trigonometry + * + * @defgroup batch_bool_logical Boolean logical operators + * @defgroup batch_bool_reducers Boolean reducers + */ + + /** + * @ingroup batch_math + * + * Computes the absolute values of each scalar in the batch \c x. + * @param x batch of integer or floating point values. + * @return the absolute values of \c x. + */ + template <class T, class A> + inline batch<T, A> abs(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::abs<A>(x, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the absolute values of each complex in the batch \c z. + * @param z batch of complex values. + * @return the absolute values of \c z. + */ + template <class T, class A> + inline batch<T, A> abs(batch<std::complex<T>, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::abs<A>(z, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the sum of the batches \c x and \c y. + * @param x batch or scalar involved in the addition. + * @param y batch or scalar involved in the addition. + * @return the sum of \c x and \c y + */ + template <class T, class A> + inline auto add(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x + y) + { + detail::static_check_supported_config<T, A>(); + return x + y; + } + + /** + * @ingroup batch_trigo + * + * Computes the arc cosine of the batch \c x. + * @param x batch of floating point values. + * @return the arc cosine of \c x. + */ + template <class T, class A> + inline batch<T, A> acos(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::acos<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the inverse hyperbolic cosine of the batch \c x. + * @param x batch of floating point values. + * @return the inverse hyperbolic cosine of \c x. + */ + template <class T, class A> + inline batch<T, A> acosh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::acosh<A>(x, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the argument of the batch \c z. + * @param z batch of complex or real values. + * @return the argument of \c z. + */ + template <class T, class A> + inline real_batch_type_t<batch<T, A>> arg(batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::arg<A>(z, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the arc sine of the batch \c x. + * @param x batch of floating point values. + * @return the arc sine of \c x. + */ + template <class T, class A> + inline batch<T, A> asin(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::asin<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the inverse hyperbolic sine of the batch \c x. + * @param x batch of floating point values. + * @return the inverse hyperbolic sine of \c x. + */ + template <class T, class A> + inline batch<T, A> asinh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::asinh<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the arc tangent of the batch \c x. + * @param x batch of floating point values. + * @return the arc tangent of \c x. + */ + template <class T, class A> + inline batch<T, A> atan(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::atan<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the arc tangent of the batch \c x/y, using the signs of the + * arguments to determine the correct quadrant. + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return the arc tangent of \c x/y. + */ + template <class T, class A> + inline batch<T, A> atan2(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::atan2<A>(x, y, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the inverse hyperbolic tangent of the batch \c x. + * @param x batch of floating point values. + * @return the inverse hyperbolic tangent of \c x. + */ + template <class T, class A> + inline batch<T, A> atanh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::atanh<A>(x, A {}); + } + + /** + * @ingroup batch_conversion + * + * Perform a static_cast from \c T_in to \c T_out on \c \c x. + * @param x batch_bool of \c T_in + * @return \c x cast to \c T_out + */ + template <class T_out, class T_in, class A> + inline batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& x) noexcept + { + detail::static_check_supported_config<T_out, A>(); + detail::static_check_supported_config<T_in, A>(); + static_assert(batch_bool<T_out, A>::size == batch_bool<T_in, A>::size, "Casting between incompatibles batch_bool types."); + return kernel::batch_bool_cast<A>(x, batch_bool<T_out, A> {}, A {}); + } + + /** + * @ingroup batch_conversion + * + * Perform a static_cast from \c T_in to \c T_out on \c \c x. + * @param x batch of \c T_in + * @return \c x cast to \c T_out + */ + template <class T_out, class T_in, class A> + inline batch<T_out, A> batch_cast(batch<T_in, A> const& x) noexcept + { + detail::static_check_supported_config<T_out, A>(); + detail::static_check_supported_config<T_in, A>(); + return kernel::batch_cast<A>(x, batch<T_out, A> {}, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Computes the bit of sign of \c x + * @param x batch of scalar + * @return bit of sign of \c x + */ + template <class T, class A> + inline batch<T, A> bitofsign(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitofsign<A>(x, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise and of the batches \c x and \c y. + * @param x batch involved in the operation. + * @param y batch involved in the operation. + * @return the result of the bitwise and. + */ + template <class T, class A> + inline auto bitwise_and(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x & y) + { + detail::static_check_supported_config<T, A>(); + return x & y; + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise and of the batches \c x and \c y. + * @param x batch involved in the operation. + * @param y batch involved in the operation. + * @return the result of the bitwise and. + */ + template <class T, class A> + inline auto bitwise_and(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x & y) + { + detail::static_check_supported_config<T, A>(); + return x & y; + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise and not of batches \c x and \c y. + * @param x batch involved in the operation. + * @param y batch involved in the operation. + * @return the result of the bitwise and not. + */ + template <class T, class A> + inline batch<T, A> bitwise_andnot(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_andnot<A>(x, y, A {}); + } + + /** + * @ingroup batch_bool_logical + * + * Computes the bitwise and not of batches \c x and \c y. + * @param x batch involved in the operation. + * @param y batch involved in the operation. + * @return the result of the bitwise and not. + */ + template <class T, class A> + inline batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_andnot<A>(x, y, A {}); + } + + /** + * @ingroup batch_conversion + * + * Perform a reinterpret_cast from \c T_in to \c T_out on \c x. + * @param x batch of \c T_in + * @return \c x reinterpreted as \c T_out + */ + template <class T_out, class T_in, class A> + inline batch<T_out, A> bitwise_cast(batch<T_in, A> const& x) noexcept + { + detail::static_check_supported_config<T_in, A>(); + detail::static_check_supported_config<T_out, A>(); + return kernel::bitwise_cast<A>(x, batch<T_out, A> {}, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Perform a bitwise shift to the left + * @param x batch of \c T_in + * @param shift scalar amount to shift + * @return shifted \c x. + */ + template <class T, class A> + inline batch<T, A> bitwise_lshift(batch<T, A> const& x, int shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_lshift<A>(x, shift, A {}); + } + template <class T, class A> + inline batch<T, A> bitwise_lshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_lshift<A>(x, shift, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise not of batch \c x. + * @param x batch involved in the operation. + * @return the result of the bitwise not. + */ + template <class T, class A> + inline batch<T, A> bitwise_not(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_not<A>(x, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise not of batch \c x. + * @param x batch involved in the operation. + * @return the result of the bitwise not. + */ + template <class T, class A> + inline batch_bool<T, A> bitwise_not(batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_not<A>(x, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise or of the batches \c x and \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the bitwise or. + */ + template <class T, class A> + inline auto bitwise_or(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x | y) + { + detail::static_check_supported_config<T, A>(); + return x | y; + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise or of the batches \c x and \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the bitwise or. + */ + template <class T, class A> + inline auto bitwise_or(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x | y) + { + detail::static_check_supported_config<T, A>(); + return x | y; + } + + /** + * @ingroup batch_bitwise + * + * Perform a bitwise shift to the right + * @param x batch of \c T_in + * @param shift scalar amount to shift + * @return shifted \c x. + */ + template <class T, class A> + inline batch<T, A> bitwise_rshift(batch<T, A> const& x, int shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_rshift<A>(x, shift, A {}); + } + template <class T, class A> + inline batch<T, A> bitwise_rshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::bitwise_rshift<A>(x, shift, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise xor of the batches \c x and \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the bitwise xor. + */ + template <class T, class A> + inline auto bitwise_xor(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x ^ y) + { + detail::static_check_supported_config<T, A>(); + return x ^ y; + } + + /** + * @ingroup batch_bitwise + * + * Computes the bitwise xor of the batches \c x and \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the bitwise xor. + */ + template <class T, class A> + inline auto bitwise_xor(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x ^ y) + { + detail::static_check_supported_config<T, A>(); + return x ^ y; + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the single value \c v. + * @param v the value used to initialize the batch + * @return a new batch instance + */ + template <class T, class A = default_arch> + inline batch<T, A> broadcast(T v) noexcept + { + detail::static_check_supported_config<T, A>(); + return batch<T, A>::broadcast(v); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the single value \c v and + * the specified batch value type \c To. + * @param v the value used to initialize the batch + * @return a new batch instance + */ + template <class To, class A = default_arch, class From> + inline simd_return_type<From, To, A> broadcast_as(From v) noexcept + { + detail::static_check_supported_config<From, A>(); + using batch_value_type = typename simd_return_type<From, To, A>::value_type; + using value_type = typename std::conditional<std::is_same<From, bool>::value, + bool, + batch_value_type>::type; + return simd_return_type<From, To, A>(value_type(v)); + } + + /** + * @ingroup batch_math + * + * Computes the cubic root of the batch \c x. + * @param x batch of floating point values. + * @return the cubic root of \c x. + */ + template <class T, class A> + inline batch<T, A> cbrt(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::cbrt<A>(x, A {}); + } + + /** + * @ingroup batch_rounding + * + * Computes the batch of smallest integer values not less than + * scalars in \c x. + * @param x batch of floating point values. + * @return the batch of smallest integer values not less than \c x. + */ + template <class T, class A> + inline batch<T, A> ceil(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::ceil<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Clips the values of the batch \c x between those of the batches \c lo and \c hi. + * @param x batch of scalar values. + * @param lo batch of scalar values. + * @param hi batch of scalar values. + * @return the result of the clipping. + */ + template <class T, class A> + inline batch<T, A> clip(batch<T, A> const& x, batch<T, A> const& lo, batch<T, A> const& hi) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::clip(x, lo, hi, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Pick elements from \c x selected by \c mask, and append them to the + * resulting vector, zeroing the remaining slots + */ + template <class T, class A> + inline batch<T, A> compress(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::compress<A>(x, mask, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the conjugate of the batch \c z. + * @param z batch of complex values. + * @return the argument of \c z. + */ + template <class A, class T> + inline complex_batch_type_t<batch<T, A>> conj(batch<T, A> const& z) noexcept + { + return kernel::conj(z, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Computes a value whose absolute value matches + * that of \c x, but whose sign bit matches that of \c y. + * @param x batch of scalars + * @param y batch of scalars + * @return batch whose absolute value matches that of \c x, but whose sign bit + * matches that of \c y. + */ + template <class T, class A> + inline batch<T, A> copysign(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::copysign<A>(x, y, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the cosine of the batch \c x. + * @param x batch of floating point values. + * @return the cosine of \c x. + */ + template <class T, class A> + inline batch<T, A> cos(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::cos<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * computes the hyperbolic cosine of the batch \c x. + * @param x batch of floating point values. + * @return the hyperbolic cosine of \c x. + */ + template <class T, class A> + inline batch<T, A> cosh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::cosh<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Subtract 1 to batch \c x. + * @param x batch involved in the decrement. + * @return the subtraction of \c x and 1. + */ + template <class T, class A> + inline batch<T, A> decr(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::decr<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Subtract 1 to batch \c x for each element where \c mask is true. + * @param x batch involved in the increment. + * @param mask whether to perform the increment or not. Can be a \c + * batch_bool or a \c batch_bool_constant. + * @return the subtraction of \c x and 1 when \c mask is true. + */ + template <class T, class A, class Mask> + inline batch<T, A> decr_if(batch<T, A> const& x, Mask const& mask) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::decr_if<A>(x, mask, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the division of the batch \c x by the batch \c y. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the result of the division. + */ + template <class T, class A> + inline auto div(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x / y) + { + detail::static_check_supported_config<T, A>(); + return x / y; + } + + /** + * @ingroup batch_logical + * + * Element-wise equality comparison of batches \c x and \c y. + * @param x batch of scalars + * @param y batch of scalars + * @return a boolean batch. + */ + template <class T, class A> + inline auto eq(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x == y) + { + detail::static_check_supported_config<T, A>(); + return x == y; + } + + /** + * @ingroup batch_logical + * + * Element-wise equality comparison of batches of boolean values \c x and \c y. + * @param x batch of booleans involved in the comparison. + * @param y batch of booleans involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline auto eq(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x == y) + { + detail::static_check_supported_config<T, A>(); + return x == y; + } + + /** + * @ingroup batch_math + * + * Computes the natural exponential of the batch \c x. + * @param x batch of floating point values. + * @return the natural exponential of \c x. + */ + template <class T, class A> + inline batch<T, A> exp(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::exp<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the base 10 exponential of the batch \c x. + * @param x batch of floating point values. + * @return the base 10 exponential of \c x. + */ + template <class T, class A> + inline batch<T, A> exp10(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::exp10<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the base 2 exponential of the batch \c x. + * @param x batch of floating point values. + * @return the base 2 exponential of \c x. + */ + template <class T, class A> + inline batch<T, A> exp2(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::exp2<A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Load contiguous elements from \c x and place them in slots selected by \c + * mask, zeroing the other slots + */ + template <class T, class A> + inline batch<T, A> expand(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::expand<A>(x, mask, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the natural exponential of the batch \c x, minus one. + * @param x batch of floating point values. + * @return the natural exponential of \c x, minus one. + */ + template <class T, class A> + inline batch<T, A> expm1(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::expm1<A>(x, A {}); + } + + /** + * @ingroup batch_math_extra + * + * Computes the error function of the batch \c x. + * @param x batch of floating point values. + * @return the error function of \c x. + */ + template <class T, class A> + inline batch<T, A> erf(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::erf<A>(x, A {}); + } + + /** + * @ingroup batch_math_extra + * + * Computes the complementary error function of the batch \c x. + * @param x batch of floating point values. + * @return the error function of \c x. + */ + template <class T, class A> + inline batch<T, A> erfc(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::erfc<A>(x, A {}); + } + + /** + * Extract vector from pair of vectors + * extracts the lowest vector elements from the second source \c x + * and the highest vector elements from the first source \c y + * Concatenates the results into th Return value. + * @param x batch of integer or floating point values. + * @param y batch of integer or floating point values. + * @param i integer specifying the lowest vector element to extract from the first source register + * @return. + */ + template <class T, class A> + inline batch<T, A> extract_pair(batch<T, A> const& x, batch<T, A> const& y, std::size_t i) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::extract_pair<A>(x, y, i, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the absolute values of each scalar in the batch \c x. + * @param x batch floating point values. + * @return the absolute values of \c x. + */ + template <class T, class A> + inline batch<T, A> fabs(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::abs<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the positive difference between \c x and \c y, that is, + * <tt>max(0, x-y)</tt>. + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return the positive difference. + */ + template <class T, class A> + inline batch<T, A> fdim(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fdim<A>(x, y, A {}); + } + + /** + * @ingroup batch_rounding + * + * Computes the batch of largest integer values not greater than + * scalars in \c x. + * @param x batch of floating point values. + * @return the batch of largest integer values not greater than \c x. + */ + template <class T, class A> + inline batch<T, A> floor(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::floor<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes <tt>(x*y) + z</tt> in a single instruction when possible. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @param z a batch of integer or floating point values. + * @return the result of the fused multiply-add operation. + */ + template <class T, class A> + inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fma<A>(x, y, z, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the larger values of the batches \c x and \c y. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return a batch of the larger values. + */ + template <class T, class A> + inline batch<T, A> fmax(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::max<A>(x, y, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the smaller values of the batches \c x and \c y. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return a batch of the smaller values. + */ + template <class T, class A> + inline batch<T, A> fmin(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::min<A>(x, y, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the modulo of the batch \c x by the batch \c y. + * @param x batch involved in the modulo. + * @param y batch involved in the modulo. + * @return the result of the modulo. + */ + template <class T, class A> + inline batch<T, A> fmod(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fmod<A>(x, y, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes <tt>(x*y) - z</tt> in a single instruction when possible. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @param z a batch of integer or floating point values. + * @return the result of the fused multiply-sub operation. + */ + template <class T, class A> + inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fms<A>(x, y, z, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes <tt>-(x*y) + z</tt> in a single instruction when possible. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @param z a batch of integer or floating point values. + * @return the result of the fused negated multiply-add operation. + */ + template <class T, class A> + inline batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fnma<A>(x, y, z, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes <tt>-(x*y) - z</tt> in a single instruction when possible. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @param z a batch of integer or floating point values. + * @return the result of the fused negated multiply-sub operation. + */ + template <class T, class A> + inline batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::fnms<A>(x, y, z, A {}); + } + + /** + * @ingroup batch_fp + * + * Split split the number x into a normalized fraction and an exponent which is stored in exp + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return the normalized fraction of x + */ + template <class T, class A> + inline batch<T, A> frexp(const batch<T, A>& x, batch<as_integer_t<T>, A>& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::frexp<A>(x, y, A {}); + } + + /** + * @ingroup batch_logical + * + * Element-wise greater or equal comparison of batches \c x and \c y. + * @tparam X the actual type of batch. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline batch_bool<T, A> ge(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return x >= y; + } + + /** + * @ingroup batch_logical + * + * Element-wise greater than comparison of batches \c x and \c y. + * @tparam X the actual type of batch. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline batch_bool<T, A> gt(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return x > y; + } + + /** + * @ingroup batch_reducers + * + * Parallel horizontal addition: adds the scalars of each batch + * in the array pointed by \c row and store them in a returned + * batch. + * @param row an array of \c N batches + * @return the result of the reduction. + */ + template <class T, class A> + inline batch<T, A> haddp(batch<T, A> const* row) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::haddp<A>(row, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the square root of the sum of the squares of the batches + * \c x, and \c y. + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return the square root of the sum of the squares of \c x and \c y. + */ + template <class T, class A> + inline batch<T, A> hypot(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::hypot<A>(x, y, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the imaginary part of the batch \c x. + * @param x batch of complex or real values. + * @return the argument of \c x. + */ + template <class T, class A> + inline real_batch_type_t<batch<T, A>> imag(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::imag<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Add 1 to batch \c x. + * @param x batch involved in the increment. + * @return the sum of \c x and 1. + */ + template <class T, class A> + inline batch<T, A> incr(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::incr<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Add 1 to batch \c x for each element where \c mask is true. + * @param x batch involved in the increment. + * @param mask whether to perform the increment or not. Can be a \c + * batch_bool or a \c batch_bool_constant. + * @return the sum of \c x and 1 when \c mask is true. + */ + template <class T, class A, class Mask> + inline batch<T, A> incr_if(batch<T, A> const& x, Mask const& mask) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::incr_if<A>(x, mask, A {}); + } + + /** + * @ingroup batch_constant + * + * Return a batch of scalars representing positive infinity + * @return a batch of positive infinity + */ + template <class B> + inline B infinity() + { + using T = typename B::value_type; + using A = typename B::arch_type; + detail::static_check_supported_config<T, A>(); + return B(std::numeric_limits<T>::infinity()); + } + + /** + * @ingroup batch_data_transfer + * + * Create a new batch equivalent to \c x but with element \c val set at position \c pos + * @param x batch + * @param val value to set + * @param pos index of the updated slot + * @return copy of \c x with position \c pos set to \c val + */ + template <class T, class A, size_t I> + inline batch<T, A> insert(batch<T, A> const& x, T val, index<I> pos) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::insert<A>(x, val, pos, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x represent an even integer value + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline batch_bool<T, A> is_even(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::is_even<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the floating-point scalars in the given batch \c x represent integer value + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline batch_bool<T, A> is_flint(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::is_flint<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x represent an odd integer value + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline batch_bool<T, A> is_odd(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::is_odd<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x are inf values. + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline typename batch<T, A>::batch_bool_type isinf(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::isinf<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x are finite values. + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline typename batch<T, A>::batch_bool_type isfinite(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::isfinite<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Determines if the scalars in the given batch \c x are NaN values. + * @param x batch of floating point values. + * @return a batch of booleans. + */ + template <class T, class A> + inline typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::isnan<A>(x, A {}); + } + + /** + * @ingroup batch_math_extra + * + * Computes the multiplication of the floating point number \c x by 2 raised to the power \c y. + * @param x batch of floating point values. + * @param y batch of integer values. + * @return a batch of floating point values. + */ + template <class T, class A> + inline batch<T, A> ldexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::ldexp<A>(x, y, A {}); + } + + /** + * @ingroup batch_logical + * + * Element-wise lesser or equal to comparison of batches \c x and \c y. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline batch_bool<T, A> le(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return x <= y; + } + + /** + * @ingroup batch_math_extra + * + * Computes the natural logarithm of the gamma function of the batch \c x. + * @param x batch of floating point values. + * @return the natural logarithm of the gamma function of \c x. + */ + template <class T, class A> + inline batch<T, A> lgamma(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::lgamma<A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr and the specifed + * batch value type \c To. The memory needs to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class To, class A = default_arch, class From> + inline simd_return_type<From, To, A> load_as(From const* ptr, aligned_mode) noexcept + { + using batch_value_type = typename simd_return_type<From, To, A>::value_type; + detail::static_check_supported_config<From, A>(); + detail::static_check_supported_config<To, A>(); + return kernel::load_aligned<A>(ptr, kernel::convert<batch_value_type> {}, A {}); + } + + template <class To, class A = default_arch> + inline simd_return_type<bool, To, A> load_as(bool const* ptr, aligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + return simd_return_type<bool, To, A>::load_aligned(ptr); + } + + template <class To, class A = default_arch, class From> + inline simd_return_type<std::complex<From>, To, A> load_as(std::complex<From> const* ptr, aligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + using batch_value_type = typename simd_return_type<std::complex<From>, To, A>::value_type; + return kernel::load_complex_aligned<A>(ptr, kernel::convert<batch_value_type> {}, A {}); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class To, class A = default_arch, class From, bool i3ec> + inline simd_return_type<xtl::xcomplex<From, From, i3ec>, To, A> load_as(xtl::xcomplex<From, From, i3ec> const* ptr, aligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + detail::static_check_supported_config<From, A>(); + return load_as<To>(reinterpret_cast<std::complex<From> const*>(ptr), aligned_mode()); + } +#endif + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr and the specifed + * batch value type \c To. The memory does not need to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class To, class A = default_arch, class From> + inline simd_return_type<From, To, A> load_as(From const* ptr, unaligned_mode) noexcept + { + using batch_value_type = typename simd_return_type<From, To, A>::value_type; + detail::static_check_supported_config<To, A>(); + detail::static_check_supported_config<From, A>(); + return kernel::load_unaligned<A>(ptr, kernel::convert<batch_value_type> {}, A {}); + } + + template <class To, class A = default_arch> + inline simd_return_type<bool, To, A> load_as(bool const* ptr, unaligned_mode) noexcept + { + return simd_return_type<bool, To, A>::load_unaligned(ptr); + } + + template <class To, class A = default_arch, class From> + inline simd_return_type<std::complex<From>, To, A> load_as(std::complex<From> const* ptr, unaligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + detail::static_check_supported_config<From, A>(); + using batch_value_type = typename simd_return_type<std::complex<From>, To, A>::value_type; + return kernel::load_complex_unaligned<A>(ptr, kernel::convert<batch_value_type> {}, A {}); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class To, class A = default_arch, class From, bool i3ec> + inline simd_return_type<xtl::xcomplex<From, From, i3ec>, To, A> load_as(xtl::xcomplex<From, From, i3ec> const* ptr, unaligned_mode) noexcept + { + detail::static_check_supported_config<To, A>(); + detail::static_check_supported_config<From, A>(); + return load_as<To>(reinterpret_cast<std::complex<From> const*>(ptr), unaligned_mode()); + } +#endif + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr. The + * memory needs to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class A = default_arch, class From> + inline batch<From, A> load(From const* ptr, aligned_mode = {}) noexcept + { + detail::static_check_supported_config<From, A>(); + return load_as<From, A>(ptr, aligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr. The + * memory does not need to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class A = default_arch, class From> + inline batch<From, A> load(From const* ptr, unaligned_mode) noexcept + { + detail::static_check_supported_config<From, A>(); + return load_as<From, A>(ptr, unaligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr. The + * memory needs to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class A = default_arch, class From> + inline batch<From, A> load_aligned(From const* ptr) noexcept + { + detail::static_check_supported_config<From, A>(); + return load_as<From, A>(ptr, aligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Creates a batch from the buffer \c ptr. The + * memory does not need to be aligned. + * @param ptr the memory buffer to read + * @return a new batch instance + */ + template <class A = default_arch, class From> + inline batch<From, A> load_unaligned(From const* ptr) noexcept + { + detail::static_check_supported_config<From, A>(); + return load_as<From, A>(ptr, unaligned_mode {}); + } + + /** + * @ingroup batch_math + * + * Computes the natural logarithm of the batch \c x. + * @param x batch of floating point values. + * @return the natural logarithm of \c x. + */ + template <class T, class A> + inline batch<T, A> log(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::log<A>(x, A {}); + } + + /** + * @ingroup batch_math + * Computes the base 2 logarithm of the batch \c x. + * @param x batch of floating point values. + * @return the base 2 logarithm of \c x. + */ + template <class T, class A> + inline batch<T, A> log2(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::log2<A>(x, A {}); + } + + /** + * @ingroup batch_math + * Computes the base 10 logarithm of the batch \c x. + * @param x batch of floating point values. + * @return the base 10 logarithm of \c x. + */ + template <class T, class A> + inline batch<T, A> log10(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::log10<A>(x, A {}); + } + + /** + * @ingroup batch_math + * Computes the natural logarithm of one plus the batch \c x. + * @param x batch of floating point values. + * @return the natural logarithm of one plus \c x. + */ + template <class T, class A> + inline batch<T, A> log1p(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::log1p<A>(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Element-wise lesser than comparison of batches \c x and \c y. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline batch_bool<T, A> lt(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return x < y; + } + + /** + * @ingroup batch_math + * + * Computes the larger values of the batches \c x and \c y. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return a batch of the larger values. + */ + template <class T, class A> + inline batch<T, A> max(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::max<A>(x, y, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the smaller values of the batches \c x and \c y. + * @param x a batch of integer or floating point values. + * @param y a batch of integer or floating point values. + * @return a batch of the smaller values. + */ + template <class T, class A> + inline batch<T, A> min(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::min<A>(x, y, A {}); + } + + /** + * @ingroup batch_constant + * + * Return a batch of scalars representing positive infinity + * @return a batch of positive infinity + */ + template <class B> + inline B minusinfinity() noexcept + { + using T = typename B::value_type; + using A = typename B::arch_type; + detail::static_check_supported_config<T, A>(); + return B(-std::numeric_limits<T>::infinity()); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the integer modulo of the batch \c x by the batch \c y. + * @param x batch involved in the modulo. + * @param y batch involved in the modulo. + * @return the result of the modulo. + */ + template <class T, class A> + inline auto mod(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x % y) + { + detail::static_check_supported_config<T, A>(); + return x % y; + } + + /** + * @ingroup batch_arithmetic + * + * Computes the product of the batches \c x and \c y. + * @tparam X the actual type of batch. + * @param x batch involved in the product. + * @param y batch involved in the product. + * @return the result of the product. + */ + template <class T, class A> + inline auto mul(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x * y) + { + detail::static_check_supported_config<T, A>(); + return x * y; + } + + /** + * @ingroup batch_rounding + * + * Rounds the scalars in \c x to integer values (in floating point format), using + * the current rounding mode. + * @param x batch of floating point values. + * @return the batch of nearest integer values. + */ + template <class T, class A> + inline batch<T, A> nearbyint(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::nearbyint<A>(x, A {}); + } + + /** + * @ingroup batch_rounding + * + * Rounds the scalars in \c x to integer values (in integer format) using + * the current rounding mode. + * @param x batch of floating point values. + * @return the batch of nearest integer values. + * + * @warning For very large values the conversion to int silently overflows. + */ + template <class T, class A> + inline batch<as_integer_t<T>, A> + nearbyint_as_int(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::nearbyint_as_int(x, A {}); + } + + /** + * @ingroup batch_logical + * + * Element-wise inequality comparison of batches \c x and \c y. + * @param x batch involved in the comparison. + * @param y batch involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline auto neq(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x != y) + { + detail::static_check_supported_config<T, A>(); + return x != y; + } + + /** + * @ingroup batch_logical + * + * Element-wise inequality comparison of batches of boolean values \c x and \c y. + * @param x batch of booleans involved in the comparison. + * @param y batch of booleans involved in the comparison. + * @return a boolean batch. + */ + template <class T, class A> + inline auto neq(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x != y) + { + detail::static_check_supported_config<T, A>(); + return x != y; + } + + /** + * @ingroup batch_arithmetic + * + * Computes the opposite of the batch \c x. + * @param x batch involved in the operation. + * @return the opposite of \c x. + */ + template <class T, class A> + inline batch<T, A> neg(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return -x; + } + + /** + * @ingroup batch_math_extra + * + * Computes the next representable floating-point + * value following x in the direction of y + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return \c x raised to the power \c y. + */ + template <class T, class A> + inline batch<T, A> nextafter(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::nextafter<A>(x, y, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the norm of the batch \c x. + * @param x batch of complex or real values. + * @return the norm of \c x. + */ + template <class T, class A> + inline real_batch_type_t<batch<T, A>> norm(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::norm(x, A {}); + } + + /** + * @ingroup batch_math + * + * Returns a complex batch with magnitude \c r and phase angle \c theta. + * @param r The magnitude of the desired complex result. + * @param theta The phase angle of the desired complex result. + * @return \c r exp(i * \c theta). + */ + template <class T, class A> + inline complex_batch_type_t<batch<T, A>> polar(batch<T, A> const& r, batch<T, A> const& theta = batch<T, A> {}) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::polar<A>(r, theta, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * No-op on \c x. + * @param x batch involved in the operation. + * @return \c x. + */ + template <class T, class A> + inline batch<T, A> pos(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return +x; + } + + /** + * @ingroup batch_math + * + * Computes the value of the batch \c x raised to the power + * \c y. + * @param x batch of floating point values. + * @param y batch of floating point values. + * @return \c x raised to the power \c y. + */ + template <class T, class A> + inline batch<T, A> pow(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::pow<A>(x, y, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the value of the batch \c x raised to the power + * \c y. + * @param x batch of integral values. + * @param y batch of integral values. + * @return \c x raised to the power \c y. + */ + template <class T, class ITy, class A, class = typename std::enable_if<std::is_integral<ITy>::value, void>::type> + inline batch<T, A> pow(batch<T, A> const& x, ITy y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::ipow<A>(x, y, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the projection of the batch \c z. + * @param z batch of complex or real values. + * @return the projection of \c z. + */ + template <class T, class A> + inline complex_batch_type_t<batch<T, A>> proj(batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::proj(z, A {}); + } + + /** + * @ingroup batch_complex + * + * Computes the real part of the batch \c z. + * @param z batch of complex or real values. + * @return the argument of \c z. + */ + template <class T, class A> + inline real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::real<A>(z, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the approximate reciprocal of the batch \c x. + * The maximum relative error for this approximation is + * less than 1.5*2^-12. + * @param x batch of floating point numbers. + * @return the reciprocal. + */ + template <class T, class A, class = typename std::enable_if<std::is_floating_point<T>::value, void>::type> + inline batch<T, A> reciprocal(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::reciprocal(x, A {}); + } + + /** + * @ingroup batch_reducers + * + * Generic reducer using only batch operations + * @param f reducing function, accepting `batch ()(batch, batch)` + * @param x batch involved in the reduction + * @return the result of the reduction, as a scalar. + */ + template <class T, class A, class F> + inline T reduce(F&& f, batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>()); + } + + /** + * @ingroup batch_reducers + * + * Adds all the scalars of the batch \c x. + * @param x batch involved in the reduction + * @return the result of the reduction. + */ + template <class T, class A> + inline T reduce_add(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::reduce_add<A>(x, A {}); + } + + /** + * @ingroup batch_reducers + * + * Max of all the scalars of the batch \c x. + * @param x batch involved in the reduction + * @return the result of the reduction. + */ + template <class T, class A> + inline T reduce_max(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::reduce_max<A>(x, A {}); + } + + /** + * @ingroup batch_reducers + * + * Min of all the scalars of the batch \c x. + * @param x batch involved in the reduction + * @return the result of the reduction. + */ + template <class T, class A> + inline T reduce_min(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::reduce_min<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the remainder of dividing \c x by \c y + * @param x batch of scalar values + * @param y batch of scalar values + * @return the result of the addition. + */ + template <class T, class A> + inline batch<T, A> remainder(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::remainder<A>(x, y, A {}); + } + + /** + * @ingroup batch_rounding + * + * Rounds the scalars in \c x to integer values (in floating point format), using + * the current rounding mode. + * @param x batch of floating point values. + * @return the batch of rounded values. + */ + template <class T, class A> + inline batch<T, A> rint(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return nearbyint(x); + } + + /** + * @ingroup rotate_left + * + * Slide the whole batch to the left by \c n bytes, and reintroduce the + * slided out elements from the right. This is different from + * \c rol that rotates each batch element to the left. + * + * @tparam N Amount of bytes to rotated to the left. + * @param x batch of integer values. + * @return rotated batch. + */ + template <size_t N, class T, class A> + inline batch<T, A> rotate_left(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotate_left<N, A>(x, A {}); + } + + /** + * @ingroup rotate_right + * + * Slide the whole batch to the right by \c n bytes, and reintroduce the + * slided out elements from the left. This is different from + * \c rol that rotates each batch element to the left. + * + * @tparam N Amount of bytes to rotate to the right. + * @param x batch of integer values. + * @return rotated batch. + */ + template <size_t N, class T, class A> + inline batch<T, A> rotate_right(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotate_right<N, A>(x, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Perform a bitwise shift to the left, reintroducing the shifted out bits + * to the right + * @param x batch to rotate + * @param shift scalar amount to shift + * @return rotated \c x. + */ + template <class T, class A> + inline batch<T, A> rotl(batch<T, A> const& x, int shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotl<A>(x, shift, A {}); + } + template <class T, class A> + inline batch<T, A> rotl(batch<T, A> const& x, batch<T, A> const& shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotl<A>(x, shift, A {}); + } + + /** + * @ingroup batch_bitwise + * + * Perform a bitwise shift to the right, reintroducing the shifted out bits + * to the left. + * @param x batch to rotate + * @param shift scalar amount to shift + * @return rotated \c x. + */ + template <class T, class A> + inline batch<T, A> rotr(batch<T, A> const& x, int shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotr<A>(x, shift, A {}); + } + template <class T, class A> + inline batch<T, A> rotr(batch<T, A> const& x, batch<T, A> const& shift) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rotr<A>(x, shift, A {}); + } + + /** + * @ingroup batch_rounding + * + * Computes the batch of nearest integer values to scalars in \c x (in + * floating point format), rounding halfway cases away from zero, regardless + * of the current rounding mode. + * @param x batch of flaoting point values. + * @return the batch of nearest integer values. + */ + template <class T, class A> + inline batch<T, A> round(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::round<A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes an estimate of the inverse square root of the batch \c x. + * + * @warning Unlike most xsimd function, this does not return the same result as the + * equivalent scalar operation, trading accuracy for speed. + * + * @param x batch of floating point values. + * @return the inverse square root of \c x. + */ + template <class T, class A> + inline batch<T, A> rsqrt(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::rsqrt<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the saturate sum of the batch \c x and the batch \c y. + + * @tparam X the actual type of batch. + * @param x batch involved in the saturated addition. + * @param y batch involved in the saturated addition. + * @return the result of the saturated addition. + */ + template <class T, class A> + inline batch<T, A> sadd(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sadd<A>(x, y, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Ternary operator for batches: selects values from the batches \c true_br or \c false_br + * depending on the boolean values in the constant batch \c cond. Equivalent to + * \code{.cpp} + * for(std::size_t i = 0; i < N; ++i) + * res[i] = cond[i] ? true_br[i] : false_br[i]; + * \endcode + * @param cond batch condition. + * @param true_br batch values for truthy condition. + * @param false_br batch value for falsy condition. + * @return the result of the selection. + */ + template <class T, class A> + inline batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::select<A>(cond, true_br, false_br, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Ternary operator for batches: selects values from the batches \c true_br or \c false_br + * depending on the boolean values in the constant batch \c cond. Equivalent to + * \code{.cpp} + * for(std::size_t i = 0; i < N; ++i) + * res[i] = cond[i] ? true_br[i] : false_br[i]; + * \endcode + * @param cond batch condition. + * @param true_br batch values for truthy condition. + * @param false_br batch value for falsy condition. + * @return the result of the selection. + */ + template <class T, class A> + inline batch<std::complex<T>, A> select(batch_bool<T, A> const& cond, batch<std::complex<T>, A> const& true_br, batch<std::complex<T>, A> const& false_br) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::select<A>(cond, true_br, false_br, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Ternary operator for batches: selects values from the batches \c true_br or \c false_br + * depending on the boolean values in the constant batch \c cond. Equivalent to + * \code{.cpp} + * for(std::size_t i = 0; i < N; ++i) + * res[i] = cond[i] ? true_br[i] : false_br[i]; + * \endcode + * @param cond constant batch condition. + * @param true_br batch values for truthy condition. + * @param false_br batch value for falsy condition. + * @return the result of the selection. + */ + template <class T, class A, bool... Values> + inline batch<T, A> select(batch_bool_constant<batch<T, A>, Values...> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::select<A>(cond, true_br, false_br, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Combine elements from \c x and \c y according to selector \c mask + * @param x batch + * @param y batch + * @param mask constant batch mask of integer elements of the same size as + * element of \c x and \c y. Each element of the mask index the vector that + * would be formed by the concatenation of \c x and \c y. For instance + * \code{.cpp} + * batch_constant<batch<uint32_t, sse2>, 0, 4, 3, 7> + * \endcode + * Picks \c x[0], \c y[0], \c x[3], \c y[3] + * + * @return combined batch + */ + template <class T, class A, class Vt, Vt... Values> + inline typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type + shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<Vt, A>, Values...> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::shuffle<A>(x, y, mask, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Computes the sign of \c x + * @param x batch + * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element + */ + template <class T, class A> + inline batch<T, A> sign(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sign<A>(x, A {}); + } + + /** + * @ingroup batch_miscellaneous + * + * Computes the sign of \c x, assuming x doesn't have any zero + * @param x batch + * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element + */ + template <class T, class A> + inline batch<T, A> signnz(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::signnz<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the sine of the batch \c x. + * @param x batch of floating point values. + * @return the sine of \c x. + */ + template <class T, class A> + inline batch<T, A> sin(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sin<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the sine and the cosine of the batch \c x. This method is faster + * than calling sine and cosine independently. + * @param x batch of floating point values. + * @return a pair containing the sine then the cosine of batch \c x + */ + template <class T, class A> + inline std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sincos<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the hyperbolic sine of the batch \c x. + * @param x batch of floating point values. + * @return the hyperbolic sine of \c x. + */ + template <class T, class A> + inline batch<T, A> sinh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sinh<A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Slide the whole batch to the left by \c n bytes. This is different from + * \c bitwise_lshift that shifts each batch element to the left. + * + * @tparam N Amount of bytes to slide to the left. + * @param x batch of integer values. + * @return slided batch. + */ + template <size_t N, class T, class A> + inline batch<T, A> slide_left(batch<T, A> const& x) noexcept + { + static_assert(std::is_integral<T>::value, "can only slide batch of integers"); + detail::static_check_supported_config<T, A>(); + return kernel::slide_left<N, A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Slide the whole batch to the right by \c N bytes. This is different from + * \c bitwise_rshift that shifts each batch element to the right. + * + * @tparam N Amount of bytes to slide to the right. + * @param x batch of integer values. + * @return slided batch. + */ + template <size_t N, class T, class A> + inline batch<T, A> slide_right(batch<T, A> const& x) noexcept + { + static_assert(std::is_integral<T>::value, "can only slide batch of integers"); + detail::static_check_supported_config<T, A>(); + return kernel::slide_right<N, A>(x, A {}); + } + + /** + * @ingroup batch_math + * + * Computes the square root of the batch \c x. + * @param x batch of floating point values. + * @return the square root of \c x. + */ + template <class T, class A> + inline batch<T, A> sqrt(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::sqrt<A>(x, A {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the saturate difference of the batch \c x and the batch \c y. + * @tparam X the actual type of batch. + * @param x batch involved in the saturated difference. + * @param y batch involved in the saturated difference. + * @return the result of the saturated difference. + */ + template <class T, class A> + inline batch<T, A> ssub(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::ssub<A>(x, y, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c src to the buffer \c dst. The + * memory needs to be aligned. + * @param dst the memory buffer to write to + * @param src the batch to copy + */ + template <class To, class A = default_arch, class From> + inline void store_as(To* dst, batch<From, A> const& src, aligned_mode) noexcept + { + kernel::store_aligned(dst, src, A {}); + } + + template <class A = default_arch, class From> + inline void store_as(bool* dst, batch_bool<From, A> const& src, aligned_mode) noexcept + { + kernel::store(src, dst, A {}); + } + + template <class To, class A = default_arch, class From> + inline void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, aligned_mode) noexcept + { + kernel::store_complex_aligned(dst, src, A {}); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class To, class A = default_arch, class From, bool i3ec> + inline void store_as(xtl::xcomplex<To, To, i3ec>* dst, batch<std::complex<From>, A> const& src, aligned_mode) noexcept + { + store_as(reinterpret_cast<std::complex<To>*>(dst), src, aligned_mode()); + } +#endif + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c src to the buffer \c dst. The + * memory does not need to be aligned. + * @param dst the memory buffer to write to + * @param src the batch to copy + */ + template <class To, class A = default_arch, class From> + inline void store_as(To* dst, batch<From, A> const& src, unaligned_mode) noexcept + { + kernel::store_unaligned(dst, src, A {}); + } + + template <class A = default_arch, class From> + inline void store_as(bool* dst, batch_bool<From, A> const& src, unaligned_mode) noexcept + { + kernel::store(src, dst, A {}); + } + + template <class To, class A = default_arch, class From> + inline void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, unaligned_mode) noexcept + { + kernel::store_complex_unaligned(dst, src, A {}); + } + +#ifdef XSIMD_ENABLE_XTL_COMPLEX + template <class To, class A = default_arch, class From, bool i3ec> + inline void store_as(xtl::xcomplex<To, To, i3ec>* dst, batch<std::complex<From>, A> const& src, unaligned_mode) noexcept + { + store_as(reinterpret_cast<std::complex<To>*>(dst), src, unaligned_mode()); + } +#endif + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c val to the buffer \c mem. The + * memory does not need to be aligned. + * @param mem the memory buffer to write to + * @param val the batch to copy from + */ + template <class A, class T> + inline void store(T* mem, batch<T, A> const& val, aligned_mode = {}) noexcept + { + store_as<T, A>(mem, val, aligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c val to the buffer \c mem. The + * memory does not need to be aligned. + * @param mem the memory buffer to write to + * @param val the batch to copy from + */ + template <class A, class T> + inline void store(T* mem, batch<T, A> const& val, unaligned_mode) noexcept + { + store_as<T, A>(mem, val, unaligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c val to the buffer \c mem. The + * memory needs to be aligned. + * @param mem the memory buffer to write to + * @param val the batch to copy from + */ + template <class A, class T> + inline void store_aligned(T* mem, batch<T, A> const& val) noexcept + { + store_as<T, A>(mem, val, aligned_mode {}); + } + + /** + * @ingroup batch_data_transfer + * + * Copy content of batch \c val to the buffer \c mem. The + * memory does not need to be aligned. + * @param mem the memory buffer to write to + * @param val the batch to copy + */ + template <class A, class T> + inline void store_unaligned(T* mem, batch<T, A> const& val) noexcept + { + store_as<T, A>(mem, val, unaligned_mode {}); + } + + /** + * @ingroup batch_arithmetic + * + * Computes the difference between \c x and \c y + * @tparam X the actual type of batch. + * @param x scalar or batch of scalars + * @param y scalar or batch of scalars + * @return the difference between \c x and \c y + */ + template <class T, class A> + inline auto sub(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x - y) + { + detail::static_check_supported_config<T, A>(); + return x - y; + } + + /** + * @ingroup batch_data_transfer + * + * Rearrange elements from \c x according to constant mask \c mask + * @param x batch + * @param mask constant batch mask of integer elements of the same size as + * element of \c x + * @return swizzled batch + */ + template <class T, class A, class Vt, Vt... Values> + inline typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type + swizzle(batch<T, A> const& x, batch_constant<batch<Vt, A>, Values...> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::swizzle<A>(x, mask, A {}); + } + template <class T, class A, class Vt, Vt... Values> + inline batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& x, batch_constant<batch<Vt, A>, Values...> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::swizzle<A>(x, mask, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Rearrange elements from \c x according to mask \c mask + * @param x batch + * @param mask batch mask of integer elements of the same size as + * element of \c x + * @return swizzled batch + */ + template <class T, class A, class Vt> + inline typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type + swizzle(batch<T, A> const& x, batch<Vt, A> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::swizzle<A>(x, mask, A {}); + } + + template <class T, class A, class Vt> + inline batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& x, batch<Vt, A> mask) noexcept + { + static_assert(sizeof(T) == sizeof(Vt), "consistent mask"); + detail::static_check_supported_config<T, A>(); + return kernel::swizzle<A>(x, mask, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the tangent of the batch \c x. + * @param x batch of floating point values. + * @return the tangent of \c x. + */ + template <class T, class A> + inline batch<T, A> tan(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::tan<A>(x, A {}); + } + + /** + * @ingroup batch_trigo + * + * Computes the hyperbolic tangent of the batch \c x. + * @param x batch of floating point values. + * @return the hyperbolic tangent of \c x. + */ + template <class T, class A> + inline batch<T, A> tanh(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::tanh<A>(x, A {}); + } + + /** + * @ingroup batch_math_extra + * + * Computes the gamma function of the batch \c x. + * @param x batch of floating point values. + * @return the gamma function of \c x. + */ + template <class T, class A> + inline batch<T, A> tgamma(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::tgamma<A>(x, A {}); + } + + /** + * @ingroup batch_conversion + * + * Perform a conversion from \c i to a value of an floating point type of the same size as \c T. + * This is equivalent to \c batch_cast<as_float_t<T>>(i) + * @param i batch of integers. + * @return \c i converted to a value of an floating point type of the same size as \c T + */ + template <class T, class A> + inline batch<as_float_t<T>, A> to_float(batch<T, A> const& i) noexcept + { + detail::static_check_supported_config<T, A>(); + return batch_cast<as_float_t<T>>(i); + } + + /** + * @ingroup batch_conversion + * + * Perform a conversion from \c x to a value of an integer type of the same size as \c T + * This is equivalent to \c batch_cast<as_integer_t<T>>(x) + * @param x batch. + * @return \c x converted to a value of an integer type of the same size as \c T + */ + template <class T, class A> + inline batch<as_integer_t<T>, A> to_int(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return batch_cast<as_integer_t<T>>(x); + } + + /** + * @ingroup batch_rounding + * + * Computes the batch of nearest integer values not greater in magnitude + * than scalars in \c x. + * @param x batch of floating point values. + * @return the batch of nearest integer values not greater in magnitude than \c x. + */ + template <class T, class A> + inline batch<T, A> trunc(batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::trunc<A>(x, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Unpack and interleave data from the HIGH half of batches \c x and \c y. + * Store the results in the Return value. + * @param x a batch of integer or floating point or double precision values. + * @param y a batch of integer or floating point or double precision values. + * @return a batch of the high part of shuffled values. + */ + template <class T, class A> + inline batch<T, A> zip_hi(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::zip_hi<A>(x, y, A {}); + } + + /** + * @ingroup batch_data_transfer + * + * Unpack and interleave data from the LOW half of batches \c x and \c y. + * Store the results in the Return value. + * @param x a batch of integer or floating point or double precision values. + * @param y a batch of integer or floating point or double precision values. + * @return a batch of the low part of shuffled values. + */ + template <class T, class A> + inline batch<T, A> zip_lo(batch<T, A> const& x, batch<T, A> const& y) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::zip_lo<A>(x, y, A {}); + } + + /** + * @ingroup batch_conversion + * + * Cast a \c batch_bool of \c T into a \c batch of the same type using the + * following rule: if an element of \c self is true, it maps to -1 in the + * returned integral batch, otherwise it maps to 0. + * + * @param self batch_bool of \c T + * @return \c self cast to a \c batch of \c T + */ + template <class T, class A, typename std::enable_if<std::is_integral<T>::value, int>::type = 3> + inline batch<T, A> bitwise_cast(batch_bool<T, A> const& self) noexcept + { + T z(0); + detail::static_check_supported_config<T, A>(); + return select(self, batch<T, A>(T(~z)), batch<T, A>(z)); + } + + template <class T, class A, typename std::enable_if<std::is_floating_point<T>::value, int>::type = 3> + inline batch<T, A> bitwise_cast(batch_bool<T, A> const& self) noexcept + { + T z0(0), z1(0); + using int_type = as_unsigned_integer_t<T>; + int_type value(~int_type(0)); + std::memcpy(&z1, &value, sizeof(int_type)); + detail::static_check_supported_config<T, A>(); + return select(self, batch<T, A>(z1), batch<T, A>(z0)); + } + + /** + * @ingroup batch_bool_reducers + * + * Returns true if all the boolean values in the batch are true, + * false otherwise. + * @param x the batch to reduce. + * @return a boolean scalar. + */ + template <class T, class A> + inline bool all(batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::all<A>(x, A {}); + } + + /** + * @ingroup batch_bool_reducers + * + * Return true if any of the boolean values in the batch is true, + * false otherwise. + * @param x the batch to reduce. + * @return a boolean scalar. + */ + template <class T, class A> + inline bool any(batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return kernel::any<A>(x, A {}); + } + + /** + * @ingroup batch_bool_reducers + * + * Return true if none of the boolean values in the batch is true, + * false otherwise. + * @param x the batch to reduce. + * @return a boolean scalar. + */ + template <class T, class A> + inline bool none(batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + return !xsimd::any(x); + } + + /** + * @ingroup batch_miscellaneous + * + * Dump the content of batch \c x to stream \c o + * @param o the stream where the batch is dumped + * @param x batch to dump. + * @return a reference to \c o + */ + template <class T, class A> + inline std::ostream& operator<<(std::ostream& o, batch<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + constexpr auto size = batch<T, A>::size; + alignas(A::alignment()) T buffer[size]; + x.store_aligned(&buffer[0]); + o << '('; + for (std::size_t i = 0; i < size - 1; ++i) + o << buffer[i] << ", "; + return o << buffer[size - 1] << ')'; + } + + /** + * @ingroup batch_miscellaneous + * + * Dump the content of batch \c x to stream \c o + * @param o the stream where the batch is dumped + * @param x batch to dump. + * @return a reference to \c o + */ + template <class T, class A> + inline std::ostream& operator<<(std::ostream& o, batch_bool<T, A> const& x) noexcept + { + detail::static_check_supported_config<T, A>(); + constexpr auto size = batch_bool<T, A>::size; + alignas(A::alignment()) bool buffer[size]; + x.store_aligned(&buffer[0]); + o << '('; + for (std::size_t i = 0; i < size - 1; ++i) + o << buffer[i] << ", "; + return o << buffer[size - 1] << ')'; + } +} + +#endif |