summaryrefslogtreecommitdiffstats
path: root/third_party/xsimd/include/xsimd/types
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/xsimd/include/xsimd/types')
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_all_registers.hpp46
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_api.hpp2599
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx2_register.hpp40
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp48
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp48
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp48
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp48
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp74
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp48
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp48
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp48
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp51
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp51
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_register.hpp48
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avx_register.hpp61
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp40
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_batch.hpp1492
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_batch_constant.hpp288
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp46
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp46
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp46
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_fma4_register.hpp42
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp52
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_neon64_register.hpp52
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_neon_register.hpp155
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_register.hpp94
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_rvv_register.hpp419
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_sse2_register.hpp60
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_sse3_register.hpp45
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp44
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp44
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp44
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_sve_register.hpp157
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_traits.hpp319
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_utils.hpp530
-rw-r--r--third_party/xsimd/include/xsimd/types/xsimd_wasm_register.hpp60
36 files changed, 7381 insertions, 0 deletions
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_all_registers.hpp b/third_party/xsimd/include/xsimd/types/xsimd_all_registers.hpp
new file mode 100644
index 0000000000..4350ca0a28
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_all_registers.hpp
@@ -0,0 +1,46 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#include "xsimd_fma3_sse_register.hpp"
+#include "xsimd_fma4_register.hpp"
+#include "xsimd_sse2_register.hpp"
+#include "xsimd_sse3_register.hpp"
+#include "xsimd_sse4_1_register.hpp"
+#include "xsimd_sse4_2_register.hpp"
+
+#include "xsimd_avx2_register.hpp"
+#include "xsimd_avx_register.hpp"
+#include "xsimd_avxvnni_register.hpp"
+#include "xsimd_fma3_avx2_register.hpp"
+#include "xsimd_fma3_avx_register.hpp"
+
+#include "xsimd_avx512vnni_avx512bw_register.hpp"
+#include "xsimd_avx512vnni_avx512vbmi_register.hpp"
+
+#include "xsimd_avx512ifma_register.hpp"
+#include "xsimd_avx512vbmi_register.hpp"
+
+#include "xsimd_avx512er_register.hpp"
+#include "xsimd_avx512pf_register.hpp"
+
+#include "xsimd_avx512bw_register.hpp"
+#include "xsimd_avx512cd_register.hpp"
+#include "xsimd_avx512dq_register.hpp"
+#include "xsimd_avx512f_register.hpp"
+
+#include "xsimd_neon64_register.hpp"
+#include "xsimd_neon_register.hpp"
+
+#include "xsimd_sve_register.hpp"
+
+#include "xsimd_rvv_register.hpp"
+
+#include "xsimd_wasm_register.hpp"
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_api.hpp b/third_party/xsimd/include/xsimd/types/xsimd_api.hpp
new file mode 100644
index 0000000000..0420f0a09d
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_api.hpp
@@ -0,0 +1,2599 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_API_HPP
+#define XSIMD_API_HPP
+
+#include <complex>
+#include <cstddef>
+#include <limits>
+#include <ostream>
+
+#include "../arch/xsimd_isa.hpp"
+#include "../types/xsimd_batch.hpp"
+#include "../types/xsimd_traits.hpp"
+
+namespace xsimd
+{
+ /**
+ * high level free functions
+ *
+ * @defgroup batch_arithmetic Arithmetic operators
+ * @defgroup batch_constant Constant batches
+ * @defgroup batch_data_transfer Memory operators
+ * @defgroup batch_math Basic math operators
+ * @defgroup batch_math_extra Extra math operators
+ * @defgroup batch_fp Floating point manipulation
+ * @defgroup batch_rounding Rounding operators
+ * @defgroup batch_conversion Conversion operators
+ * @defgroup batch_complex_op Complex operators
+ * @defgroup batch_logical Logical operators
+ * @defgroup batch_bitwise Bitwise operators
+ * @defgroup batch_reducers Reducers
+ * @defgroup batch_miscellaneous Miscellaneous
+ * @defgroup batch_trigo Trigonometry
+ *
+ * @defgroup batch_bool_logical Boolean logical operators
+ * @defgroup batch_bool_reducers Boolean reducers
+ */
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the absolute values of each scalar in the batch \c x.
+ * @param x batch of integer or floating point values.
+ * @return the absolute values of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> abs(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::abs<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_complex
+ *
+ * Computes the absolute values of each complex in the batch \c z.
+ * @param z batch of complex values.
+ * @return the absolute values of \c z.
+ */
+ template <class T, class A>
+ inline batch<T, A> abs(batch<std::complex<T>, A> const& z) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::abs<A>(z, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes the sum of the batches \c x and \c y.
+ * @param x batch or scalar involved in the addition.
+ * @param y batch or scalar involved in the addition.
+ * @return the sum of \c x and \c y
+ */
+ template <class T, class A>
+ inline auto add(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x + y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x + y;
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the arc cosine of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the arc cosine of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> acos(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::acos<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the inverse hyperbolic cosine of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the inverse hyperbolic cosine of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> acosh(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::acosh<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_complex
+ *
+ * Computes the argument of the batch \c z.
+ * @param z batch of complex or real values.
+ * @return the argument of \c z.
+ */
+ template <class T, class A>
+ inline real_batch_type_t<batch<T, A>> arg(batch<T, A> const& z) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::arg<A>(z, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the arc sine of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the arc sine of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> asin(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::asin<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the inverse hyperbolic sine of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the inverse hyperbolic sine of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> asinh(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::asinh<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the arc tangent of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the arc tangent of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> atan(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::atan<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the arc tangent of the batch \c x/y, using the signs of the
+ * arguments to determine the correct quadrant.
+ * @param x batch of floating point values.
+ * @param y batch of floating point values.
+ * @return the arc tangent of \c x/y.
+ */
+ template <class T, class A>
+ inline batch<T, A> atan2(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::atan2<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the inverse hyperbolic tangent of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the inverse hyperbolic tangent of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> atanh(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::atanh<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_conversion
+ *
+ * Perform a static_cast from \c T_in to \c T_out on \c \c x.
+ * @param x batch_bool of \c T_in
+ * @return \c x cast to \c T_out
+ */
+ template <class T_out, class T_in, class A>
+ inline batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T_out, A>();
+ detail::static_check_supported_config<T_in, A>();
+ static_assert(batch_bool<T_out, A>::size == batch_bool<T_in, A>::size, "Casting between incompatibles batch_bool types.");
+ return kernel::batch_bool_cast<A>(x, batch_bool<T_out, A> {}, A {});
+ }
+
+ /**
+ * @ingroup batch_conversion
+ *
+ * Perform a static_cast from \c T_in to \c T_out on \c \c x.
+ * @param x batch of \c T_in
+ * @return \c x cast to \c T_out
+ */
+ template <class T_out, class T_in, class A>
+ inline batch<T_out, A> batch_cast(batch<T_in, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T_out, A>();
+ detail::static_check_supported_config<T_in, A>();
+ return kernel::batch_cast<A>(x, batch<T_out, A> {}, A {});
+ }
+
+ /**
+ * @ingroup batch_miscellaneous
+ *
+ * Computes the bit of sign of \c x
+ * @param x batch of scalar
+ * @return bit of sign of \c x
+ */
+ template <class T, class A>
+ inline batch<T, A> bitofsign(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitofsign<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Computes the bitwise and of the batches \c x and \c y.
+ * @param x batch involved in the operation.
+ * @param y batch involved in the operation.
+ * @return the result of the bitwise and.
+ */
+ template <class T, class A>
+ inline auto bitwise_and(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x & y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x & y;
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Computes the bitwise and of the batches \c x and \c y.
+ * @param x batch involved in the operation.
+ * @param y batch involved in the operation.
+ * @return the result of the bitwise and.
+ */
+ template <class T, class A>
+ inline auto bitwise_and(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x & y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x & y;
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Computes the bitwise and not of batches \c x and \c y.
+ * @param x batch involved in the operation.
+ * @param y batch involved in the operation.
+ * @return the result of the bitwise and not.
+ */
+ template <class T, class A>
+ inline batch<T, A> bitwise_andnot(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitwise_andnot<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_bool_logical
+ *
+ * Computes the bitwise and not of batches \c x and \c y.
+ * @param x batch involved in the operation.
+ * @param y batch involved in the operation.
+ * @return the result of the bitwise and not.
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitwise_andnot<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_conversion
+ *
+ * Perform a reinterpret_cast from \c T_in to \c T_out on \c x.
+ * @param x batch of \c T_in
+ * @return \c x reinterpreted as \c T_out
+ */
+ template <class T_out, class T_in, class A>
+ inline batch<T_out, A> bitwise_cast(batch<T_in, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T_in, A>();
+ detail::static_check_supported_config<T_out, A>();
+ return kernel::bitwise_cast<A>(x, batch<T_out, A> {}, A {});
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Perform a bitwise shift to the left
+ * @param x batch of \c T_in
+ * @param shift scalar amount to shift
+ * @return shifted \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> bitwise_lshift(batch<T, A> const& x, int shift) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitwise_lshift<A>(x, shift, A {});
+ }
+ template <class T, class A>
+ inline batch<T, A> bitwise_lshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitwise_lshift<A>(x, shift, A {});
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Computes the bitwise not of batch \c x.
+ * @param x batch involved in the operation.
+ * @return the result of the bitwise not.
+ */
+ template <class T, class A>
+ inline batch<T, A> bitwise_not(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitwise_not<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Computes the bitwise not of batch \c x.
+ * @param x batch involved in the operation.
+ * @return the result of the bitwise not.
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> bitwise_not(batch_bool<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitwise_not<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Computes the bitwise or of the batches \c x and \c y.
+ * @param x scalar or batch of scalars
+ * @param y scalar or batch of scalars
+ * @return the result of the bitwise or.
+ */
+ template <class T, class A>
+ inline auto bitwise_or(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x | y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x | y;
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Computes the bitwise or of the batches \c x and \c y.
+ * @param x scalar or batch of scalars
+ * @param y scalar or batch of scalars
+ * @return the result of the bitwise or.
+ */
+ template <class T, class A>
+ inline auto bitwise_or(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x | y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x | y;
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Perform a bitwise shift to the right
+ * @param x batch of \c T_in
+ * @param shift scalar amount to shift
+ * @return shifted \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> bitwise_rshift(batch<T, A> const& x, int shift) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitwise_rshift<A>(x, shift, A {});
+ }
+ template <class T, class A>
+ inline batch<T, A> bitwise_rshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitwise_rshift<A>(x, shift, A {});
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Computes the bitwise xor of the batches \c x and \c y.
+ * @param x scalar or batch of scalars
+ * @param y scalar or batch of scalars
+ * @return the result of the bitwise xor.
+ */
+ template <class T, class A>
+ inline auto bitwise_xor(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x ^ y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x ^ y;
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Computes the bitwise xor of the batches \c x and \c y.
+ * @param x scalar or batch of scalars
+ * @param y scalar or batch of scalars
+ * @return the result of the bitwise xor.
+ */
+ template <class T, class A>
+ inline auto bitwise_xor(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x ^ y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x ^ y;
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Creates a batch from the single value \c v.
+ * @param v the value used to initialize the batch
+ * @return a new batch instance
+ */
+ template <class T, class A = default_arch>
+ inline batch<T, A> broadcast(T v) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return batch<T, A>::broadcast(v);
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Creates a batch from the single value \c v and
+ * the specified batch value type \c To.
+ * @param v the value used to initialize the batch
+ * @return a new batch instance
+ */
+ template <class To, class A = default_arch, class From>
+ inline simd_return_type<From, To, A> broadcast_as(From v) noexcept
+ {
+ detail::static_check_supported_config<From, A>();
+ using batch_value_type = typename simd_return_type<From, To, A>::value_type;
+ using value_type = typename std::conditional<std::is_same<From, bool>::value,
+ bool,
+ batch_value_type>::type;
+ return simd_return_type<From, To, A>(value_type(v));
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the cubic root of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the cubic root of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> cbrt(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::cbrt<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_rounding
+ *
+ * Computes the batch of smallest integer values not less than
+ * scalars in \c x.
+ * @param x batch of floating point values.
+ * @return the batch of smallest integer values not less than \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> ceil(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::ceil<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Clips the values of the batch \c x between those of the batches \c lo and \c hi.
+ * @param x batch of scalar values.
+ * @param lo batch of scalar values.
+ * @param hi batch of scalar values.
+ * @return the result of the clipping.
+ */
+ template <class T, class A>
+ inline batch<T, A> clip(batch<T, A> const& x, batch<T, A> const& lo, batch<T, A> const& hi) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::clip(x, lo, hi, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Pick elements from \c x selected by \c mask, and append them to the
+ * resulting vector, zeroing the remaining slots
+ */
+ template <class T, class A>
+ inline batch<T, A> compress(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::compress<A>(x, mask, A {});
+ }
+
+ /**
+ * @ingroup batch_complex
+ *
+ * Computes the conjugate of the batch \c z.
+ * @param z batch of complex values.
+ * @return the argument of \c z.
+ */
+ template <class A, class T>
+ inline complex_batch_type_t<batch<T, A>> conj(batch<T, A> const& z) noexcept
+ {
+ return kernel::conj(z, A {});
+ }
+
+ /**
+ * @ingroup batch_miscellaneous
+ *
+ * Computes a value whose absolute value matches
+ * that of \c x, but whose sign bit matches that of \c y.
+ * @param x batch of scalars
+ * @param y batch of scalars
+ * @return batch whose absolute value matches that of \c x, but whose sign bit
+ * matches that of \c y.
+ */
+ template <class T, class A>
+ inline batch<T, A> copysign(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::copysign<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the cosine of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the cosine of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> cos(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::cos<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * computes the hyperbolic cosine of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the hyperbolic cosine of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> cosh(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::cosh<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Subtract 1 to batch \c x.
+ * @param x batch involved in the decrement.
+ * @return the subtraction of \c x and 1.
+ */
+ template <class T, class A>
+ inline batch<T, A> decr(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::decr<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Subtract 1 to batch \c x for each element where \c mask is true.
+ * @param x batch involved in the increment.
+ * @param mask whether to perform the increment or not. Can be a \c
+ * batch_bool or a \c batch_bool_constant.
+ * @return the subtraction of \c x and 1 when \c mask is true.
+ */
+ template <class T, class A, class Mask>
+ inline batch<T, A> decr_if(batch<T, A> const& x, Mask const& mask) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::decr_if<A>(x, mask, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes the division of the batch \c x by the batch \c y.
+ * @param x scalar or batch of scalars
+ * @param y scalar or batch of scalars
+ * @return the result of the division.
+ */
+ template <class T, class A>
+ inline auto div(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x / y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x / y;
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Element-wise equality comparison of batches \c x and \c y.
+ * @param x batch of scalars
+ * @param y batch of scalars
+ * @return a boolean batch.
+ */
+ template <class T, class A>
+ inline auto eq(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x == y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x == y;
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Element-wise equality comparison of batches of boolean values \c x and \c y.
+ * @param x batch of booleans involved in the comparison.
+ * @param y batch of booleans involved in the comparison.
+ * @return a boolean batch.
+ */
+ template <class T, class A>
+ inline auto eq(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x == y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x == y;
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the natural exponential of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the natural exponential of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> exp(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::exp<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the base 10 exponential of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the base 10 exponential of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> exp10(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::exp10<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the base 2 exponential of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the base 2 exponential of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> exp2(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::exp2<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Load contiguous elements from \c x and place them in slots selected by \c
+ * mask, zeroing the other slots
+ */
+ template <class T, class A>
+ inline batch<T, A> expand(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::expand<A>(x, mask, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the natural exponential of the batch \c x, minus one.
+ * @param x batch of floating point values.
+ * @return the natural exponential of \c x, minus one.
+ */
+ template <class T, class A>
+ inline batch<T, A> expm1(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::expm1<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math_extra
+ *
+ * Computes the error function of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the error function of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> erf(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::erf<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math_extra
+ *
+ * Computes the complementary error function of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the error function of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> erfc(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::erfc<A>(x, A {});
+ }
+
+ /**
+ * Extract vector from pair of vectors
+ * extracts the lowest vector elements from the second source \c x
+ * and the highest vector elements from the first source \c y
+ * Concatenates the results into th Return value.
+ * @param x batch of integer or floating point values.
+ * @param y batch of integer or floating point values.
+ * @param i integer specifying the lowest vector element to extract from the first source register
+ * @return.
+ */
+ template <class T, class A>
+ inline batch<T, A> extract_pair(batch<T, A> const& x, batch<T, A> const& y, std::size_t i) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::extract_pair<A>(x, y, i, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the absolute values of each scalar in the batch \c x.
+ * @param x batch floating point values.
+ * @return the absolute values of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> fabs(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::abs<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the positive difference between \c x and \c y, that is,
+ * <tt>max(0, x-y)</tt>.
+ * @param x batch of floating point values.
+ * @param y batch of floating point values.
+ * @return the positive difference.
+ */
+ template <class T, class A>
+ inline batch<T, A> fdim(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::fdim<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_rounding
+ *
+ * Computes the batch of largest integer values not greater than
+ * scalars in \c x.
+ * @param x batch of floating point values.
+ * @return the batch of largest integer values not greater than \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> floor(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::floor<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes <tt>(x*y) + z</tt> in a single instruction when possible.
+ * @param x a batch of integer or floating point values.
+ * @param y a batch of integer or floating point values.
+ * @param z a batch of integer or floating point values.
+ * @return the result of the fused multiply-add operation.
+ */
+ template <class T, class A>
+ inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::fma<A>(x, y, z, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the larger values of the batches \c x and \c y.
+ * @param x a batch of integer or floating point values.
+ * @param y a batch of integer or floating point values.
+ * @return a batch of the larger values.
+ */
+ template <class T, class A>
+ inline batch<T, A> fmax(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::max<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the smaller values of the batches \c x and \c y.
+ * @param x a batch of integer or floating point values.
+ * @param y a batch of integer or floating point values.
+ * @return a batch of the smaller values.
+ */
+ template <class T, class A>
+ inline batch<T, A> fmin(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::min<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the modulo of the batch \c x by the batch \c y.
+ * @param x batch involved in the modulo.
+ * @param y batch involved in the modulo.
+ * @return the result of the modulo.
+ */
+ template <class T, class A>
+ inline batch<T, A> fmod(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::fmod<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes <tt>(x*y) - z</tt> in a single instruction when possible.
+ * @param x a batch of integer or floating point values.
+ * @param y a batch of integer or floating point values.
+ * @param z a batch of integer or floating point values.
+ * @return the result of the fused multiply-sub operation.
+ */
+ template <class T, class A>
+ inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::fms<A>(x, y, z, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes <tt>-(x*y) + z</tt> in a single instruction when possible.
+ * @param x a batch of integer or floating point values.
+ * @param y a batch of integer or floating point values.
+ * @param z a batch of integer or floating point values.
+ * @return the result of the fused negated multiply-add operation.
+ */
+ template <class T, class A>
+ inline batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::fnma<A>(x, y, z, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes <tt>-(x*y) - z</tt> in a single instruction when possible.
+ * @param x a batch of integer or floating point values.
+ * @param y a batch of integer or floating point values.
+ * @param z a batch of integer or floating point values.
+ * @return the result of the fused negated multiply-sub operation.
+ */
+ template <class T, class A>
+ inline batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::fnms<A>(x, y, z, A {});
+ }
+
+ /**
+ * @ingroup batch_fp
+ *
+ * Split split the number x into a normalized fraction and an exponent which is stored in exp
+ * @param x a batch of integer or floating point values.
+ * @param y a batch of integer or floating point values.
+ * @return the normalized fraction of x
+ */
+ template <class T, class A>
+ inline batch<T, A> frexp(const batch<T, A>& x, batch<as_integer_t<T>, A>& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::frexp<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Element-wise greater or equal comparison of batches \c x and \c y.
+ * @tparam X the actual type of batch.
+ * @param x batch involved in the comparison.
+ * @param y batch involved in the comparison.
+ * @return a boolean batch.
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> ge(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return x >= y;
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Element-wise greater than comparison of batches \c x and \c y.
+ * @tparam X the actual type of batch.
+ * @param x batch involved in the comparison.
+ * @param y batch involved in the comparison.
+ * @return a boolean batch.
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> gt(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return x > y;
+ }
+
+ /**
+ * @ingroup batch_reducers
+ *
+ * Parallel horizontal addition: adds the scalars of each batch
+ * in the array pointed by \c row and store them in a returned
+ * batch.
+ * @param row an array of \c N batches
+ * @return the result of the reduction.
+ */
+ template <class T, class A>
+ inline batch<T, A> haddp(batch<T, A> const* row) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::haddp<A>(row, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the square root of the sum of the squares of the batches
+ * \c x, and \c y.
+ * @param x batch of floating point values.
+ * @param y batch of floating point values.
+ * @return the square root of the sum of the squares of \c x and \c y.
+ */
+ template <class T, class A>
+ inline batch<T, A> hypot(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::hypot<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_complex
+ *
+ * Computes the imaginary part of the batch \c x.
+ * @param x batch of complex or real values.
+ * @return the argument of \c x.
+ */
+ template <class T, class A>
+ inline real_batch_type_t<batch<T, A>> imag(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::imag<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Add 1 to batch \c x.
+ * @param x batch involved in the increment.
+ * @return the sum of \c x and 1.
+ */
+ template <class T, class A>
+ inline batch<T, A> incr(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::incr<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Add 1 to batch \c x for each element where \c mask is true.
+ * @param x batch involved in the increment.
+ * @param mask whether to perform the increment or not. Can be a \c
+ * batch_bool or a \c batch_bool_constant.
+ * @return the sum of \c x and 1 when \c mask is true.
+ */
+ template <class T, class A, class Mask>
+ inline batch<T, A> incr_if(batch<T, A> const& x, Mask const& mask) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::incr_if<A>(x, mask, A {});
+ }
+
+ /**
+ * @ingroup batch_constant
+ *
+ * Return a batch of scalars representing positive infinity
+ * @return a batch of positive infinity
+ */
+ template <class B>
+ inline B infinity()
+ {
+ using T = typename B::value_type;
+ using A = typename B::arch_type;
+ detail::static_check_supported_config<T, A>();
+ return B(std::numeric_limits<T>::infinity());
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Create a new batch equivalent to \c x but with element \c val set at position \c pos
+ * @param x batch
+ * @param val value to set
+ * @param pos index of the updated slot
+ * @return copy of \c x with position \c pos set to \c val
+ */
+ template <class T, class A, size_t I>
+ inline batch<T, A> insert(batch<T, A> const& x, T val, index<I> pos) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::insert<A>(x, val, pos, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Determines if the scalars in the given batch \c x represent an even integer value
+ * @param x batch of floating point values.
+ * @return a batch of booleans.
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> is_even(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::is_even<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Determines if the floating-point scalars in the given batch \c x represent integer value
+ * @param x batch of floating point values.
+ * @return a batch of booleans.
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> is_flint(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::is_flint<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Determines if the scalars in the given batch \c x represent an odd integer value
+ * @param x batch of floating point values.
+ * @return a batch of booleans.
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> is_odd(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::is_odd<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Determines if the scalars in the given batch \c x are inf values.
+ * @param x batch of floating point values.
+ * @return a batch of booleans.
+ */
+ template <class T, class A>
+ inline typename batch<T, A>::batch_bool_type isinf(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::isinf<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Determines if the scalars in the given batch \c x are finite values.
+ * @param x batch of floating point values.
+ * @return a batch of booleans.
+ */
+ template <class T, class A>
+ inline typename batch<T, A>::batch_bool_type isfinite(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::isfinite<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Determines if the scalars in the given batch \c x are NaN values.
+ * @param x batch of floating point values.
+ * @return a batch of booleans.
+ */
+ template <class T, class A>
+ inline typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::isnan<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math_extra
+ *
+ * Computes the multiplication of the floating point number \c x by 2 raised to the power \c y.
+ * @param x batch of floating point values.
+ * @param y batch of integer values.
+ * @return a batch of floating point values.
+ */
+ template <class T, class A>
+ inline batch<T, A> ldexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::ldexp<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Element-wise lesser or equal to comparison of batches \c x and \c y.
+ * @param x batch involved in the comparison.
+ * @param y batch involved in the comparison.
+ * @return a boolean batch.
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> le(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return x <= y;
+ }
+
+ /**
+ * @ingroup batch_math_extra
+ *
+ * Computes the natural logarithm of the gamma function of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the natural logarithm of the gamma function of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> lgamma(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::lgamma<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Creates a batch from the buffer \c ptr and the specifed
+ * batch value type \c To. The memory needs to be aligned.
+ * @param ptr the memory buffer to read
+ * @return a new batch instance
+ */
+ template <class To, class A = default_arch, class From>
+ inline simd_return_type<From, To, A> load_as(From const* ptr, aligned_mode) noexcept
+ {
+ using batch_value_type = typename simd_return_type<From, To, A>::value_type;
+ detail::static_check_supported_config<From, A>();
+ detail::static_check_supported_config<To, A>();
+ return kernel::load_aligned<A>(ptr, kernel::convert<batch_value_type> {}, A {});
+ }
+
+ template <class To, class A = default_arch>
+ inline simd_return_type<bool, To, A> load_as(bool const* ptr, aligned_mode) noexcept
+ {
+ detail::static_check_supported_config<To, A>();
+ return simd_return_type<bool, To, A>::load_aligned(ptr);
+ }
+
+ template <class To, class A = default_arch, class From>
+ inline simd_return_type<std::complex<From>, To, A> load_as(std::complex<From> const* ptr, aligned_mode) noexcept
+ {
+ detail::static_check_supported_config<To, A>();
+ using batch_value_type = typename simd_return_type<std::complex<From>, To, A>::value_type;
+ return kernel::load_complex_aligned<A>(ptr, kernel::convert<batch_value_type> {}, A {});
+ }
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ template <class To, class A = default_arch, class From, bool i3ec>
+ inline simd_return_type<xtl::xcomplex<From, From, i3ec>, To, A> load_as(xtl::xcomplex<From, From, i3ec> const* ptr, aligned_mode) noexcept
+ {
+ detail::static_check_supported_config<To, A>();
+ detail::static_check_supported_config<From, A>();
+ return load_as<To>(reinterpret_cast<std::complex<From> const*>(ptr), aligned_mode());
+ }
+#endif
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Creates a batch from the buffer \c ptr and the specifed
+ * batch value type \c To. The memory does not need to be aligned.
+ * @param ptr the memory buffer to read
+ * @return a new batch instance
+ */
+ template <class To, class A = default_arch, class From>
+ inline simd_return_type<From, To, A> load_as(From const* ptr, unaligned_mode) noexcept
+ {
+ using batch_value_type = typename simd_return_type<From, To, A>::value_type;
+ detail::static_check_supported_config<To, A>();
+ detail::static_check_supported_config<From, A>();
+ return kernel::load_unaligned<A>(ptr, kernel::convert<batch_value_type> {}, A {});
+ }
+
+ template <class To, class A = default_arch>
+ inline simd_return_type<bool, To, A> load_as(bool const* ptr, unaligned_mode) noexcept
+ {
+ return simd_return_type<bool, To, A>::load_unaligned(ptr);
+ }
+
+ template <class To, class A = default_arch, class From>
+ inline simd_return_type<std::complex<From>, To, A> load_as(std::complex<From> const* ptr, unaligned_mode) noexcept
+ {
+ detail::static_check_supported_config<To, A>();
+ detail::static_check_supported_config<From, A>();
+ using batch_value_type = typename simd_return_type<std::complex<From>, To, A>::value_type;
+ return kernel::load_complex_unaligned<A>(ptr, kernel::convert<batch_value_type> {}, A {});
+ }
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ template <class To, class A = default_arch, class From, bool i3ec>
+ inline simd_return_type<xtl::xcomplex<From, From, i3ec>, To, A> load_as(xtl::xcomplex<From, From, i3ec> const* ptr, unaligned_mode) noexcept
+ {
+ detail::static_check_supported_config<To, A>();
+ detail::static_check_supported_config<From, A>();
+ return load_as<To>(reinterpret_cast<std::complex<From> const*>(ptr), unaligned_mode());
+ }
+#endif
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Creates a batch from the buffer \c ptr. The
+ * memory needs to be aligned.
+ * @param ptr the memory buffer to read
+ * @return a new batch instance
+ */
+ template <class A = default_arch, class From>
+ inline batch<From, A> load(From const* ptr, aligned_mode = {}) noexcept
+ {
+ detail::static_check_supported_config<From, A>();
+ return load_as<From, A>(ptr, aligned_mode {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Creates a batch from the buffer \c ptr. The
+ * memory does not need to be aligned.
+ * @param ptr the memory buffer to read
+ * @return a new batch instance
+ */
+ template <class A = default_arch, class From>
+ inline batch<From, A> load(From const* ptr, unaligned_mode) noexcept
+ {
+ detail::static_check_supported_config<From, A>();
+ return load_as<From, A>(ptr, unaligned_mode {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Creates a batch from the buffer \c ptr. The
+ * memory needs to be aligned.
+ * @param ptr the memory buffer to read
+ * @return a new batch instance
+ */
+ template <class A = default_arch, class From>
+ inline batch<From, A> load_aligned(From const* ptr) noexcept
+ {
+ detail::static_check_supported_config<From, A>();
+ return load_as<From, A>(ptr, aligned_mode {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Creates a batch from the buffer \c ptr. The
+ * memory does not need to be aligned.
+ * @param ptr the memory buffer to read
+ * @return a new batch instance
+ */
+ template <class A = default_arch, class From>
+ inline batch<From, A> load_unaligned(From const* ptr) noexcept
+ {
+ detail::static_check_supported_config<From, A>();
+ return load_as<From, A>(ptr, unaligned_mode {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the natural logarithm of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the natural logarithm of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> log(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::log<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ * Computes the base 2 logarithm of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the base 2 logarithm of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> log2(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::log2<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ * Computes the base 10 logarithm of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the base 10 logarithm of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> log10(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::log10<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ * Computes the natural logarithm of one plus the batch \c x.
+ * @param x batch of floating point values.
+ * @return the natural logarithm of one plus \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> log1p(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::log1p<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Element-wise lesser than comparison of batches \c x and \c y.
+ * @param x batch involved in the comparison.
+ * @param y batch involved in the comparison.
+ * @return a boolean batch.
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> lt(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return x < y;
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the larger values of the batches \c x and \c y.
+ * @param x a batch of integer or floating point values.
+ * @param y a batch of integer or floating point values.
+ * @return a batch of the larger values.
+ */
+ template <class T, class A>
+ inline batch<T, A> max(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::max<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the smaller values of the batches \c x and \c y.
+ * @param x a batch of integer or floating point values.
+ * @param y a batch of integer or floating point values.
+ * @return a batch of the smaller values.
+ */
+ template <class T, class A>
+ inline batch<T, A> min(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::min<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_constant
+ *
+ * Return a batch of scalars representing positive infinity
+ * @return a batch of positive infinity
+ */
+ template <class B>
+ inline B minusinfinity() noexcept
+ {
+ using T = typename B::value_type;
+ using A = typename B::arch_type;
+ detail::static_check_supported_config<T, A>();
+ return B(-std::numeric_limits<T>::infinity());
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes the integer modulo of the batch \c x by the batch \c y.
+ * @param x batch involved in the modulo.
+ * @param y batch involved in the modulo.
+ * @return the result of the modulo.
+ */
+ template <class T, class A>
+ inline auto mod(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x % y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x % y;
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes the product of the batches \c x and \c y.
+ * @tparam X the actual type of batch.
+ * @param x batch involved in the product.
+ * @param y batch involved in the product.
+ * @return the result of the product.
+ */
+ template <class T, class A>
+ inline auto mul(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x * y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x * y;
+ }
+
+ /**
+ * @ingroup batch_rounding
+ *
+ * Rounds the scalars in \c x to integer values (in floating point format), using
+ * the current rounding mode.
+ * @param x batch of floating point values.
+ * @return the batch of nearest integer values.
+ */
+ template <class T, class A>
+ inline batch<T, A> nearbyint(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::nearbyint<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_rounding
+ *
+ * Rounds the scalars in \c x to integer values (in integer format) using
+ * the current rounding mode.
+ * @param x batch of floating point values.
+ * @return the batch of nearest integer values.
+ *
+ * @warning For very large values the conversion to int silently overflows.
+ */
+ template <class T, class A>
+ inline batch<as_integer_t<T>, A>
+ nearbyint_as_int(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::nearbyint_as_int(x, A {});
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Element-wise inequality comparison of batches \c x and \c y.
+ * @param x batch involved in the comparison.
+ * @param y batch involved in the comparison.
+ * @return a boolean batch.
+ */
+ template <class T, class A>
+ inline auto neq(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x != y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x != y;
+ }
+
+ /**
+ * @ingroup batch_logical
+ *
+ * Element-wise inequality comparison of batches of boolean values \c x and \c y.
+ * @param x batch of booleans involved in the comparison.
+ * @param y batch of booleans involved in the comparison.
+ * @return a boolean batch.
+ */
+ template <class T, class A>
+ inline auto neq(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x != y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x != y;
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes the opposite of the batch \c x.
+ * @param x batch involved in the operation.
+ * @return the opposite of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> neg(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return -x;
+ }
+
+ /**
+ * @ingroup batch_math_extra
+ *
+ * Computes the next representable floating-point
+ * value following x in the direction of y
+ * @param x batch of floating point values.
+ * @param y batch of floating point values.
+ * @return \c x raised to the power \c y.
+ */
+ template <class T, class A>
+ inline batch<T, A> nextafter(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::nextafter<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_complex
+ *
+ * Computes the norm of the batch \c x.
+ * @param x batch of complex or real values.
+ * @return the norm of \c x.
+ */
+ template <class T, class A>
+ inline real_batch_type_t<batch<T, A>> norm(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::norm(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Returns a complex batch with magnitude \c r and phase angle \c theta.
+ * @param r The magnitude of the desired complex result.
+ * @param theta The phase angle of the desired complex result.
+ * @return \c r exp(i * \c theta).
+ */
+ template <class T, class A>
+ inline complex_batch_type_t<batch<T, A>> polar(batch<T, A> const& r, batch<T, A> const& theta = batch<T, A> {}) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::polar<A>(r, theta, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * No-op on \c x.
+ * @param x batch involved in the operation.
+ * @return \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> pos(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return +x;
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the value of the batch \c x raised to the power
+ * \c y.
+ * @param x batch of floating point values.
+ * @param y batch of floating point values.
+ * @return \c x raised to the power \c y.
+ */
+ template <class T, class A>
+ inline batch<T, A> pow(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::pow<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the value of the batch \c x raised to the power
+ * \c y.
+ * @param x batch of integral values.
+ * @param y batch of integral values.
+ * @return \c x raised to the power \c y.
+ */
+ template <class T, class ITy, class A, class = typename std::enable_if<std::is_integral<ITy>::value, void>::type>
+ inline batch<T, A> pow(batch<T, A> const& x, ITy y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::ipow<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_complex
+ *
+ * Computes the projection of the batch \c z.
+ * @param z batch of complex or real values.
+ * @return the projection of \c z.
+ */
+ template <class T, class A>
+ inline complex_batch_type_t<batch<T, A>> proj(batch<T, A> const& z) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::proj(z, A {});
+ }
+
+ /**
+ * @ingroup batch_complex
+ *
+ * Computes the real part of the batch \c z.
+ * @param z batch of complex or real values.
+ * @return the argument of \c z.
+ */
+ template <class T, class A>
+ inline real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::real<A>(z, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes the approximate reciprocal of the batch \c x.
+ * The maximum relative error for this approximation is
+ * less than 1.5*2^-12.
+ * @param x batch of floating point numbers.
+ * @return the reciprocal.
+ */
+ template <class T, class A, class = typename std::enable_if<std::is_floating_point<T>::value, void>::type>
+ inline batch<T, A> reciprocal(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::reciprocal(x, A {});
+ }
+
+ /**
+ * @ingroup batch_reducers
+ *
+ * Generic reducer using only batch operations
+ * @param f reducing function, accepting `batch ()(batch, batch)`
+ * @param x batch involved in the reduction
+ * @return the result of the reduction, as a scalar.
+ */
+ template <class T, class A, class F>
+ inline T reduce(F&& f, batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>());
+ }
+
+ /**
+ * @ingroup batch_reducers
+ *
+ * Adds all the scalars of the batch \c x.
+ * @param x batch involved in the reduction
+ * @return the result of the reduction.
+ */
+ template <class T, class A>
+ inline T reduce_add(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::reduce_add<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_reducers
+ *
+ * Max of all the scalars of the batch \c x.
+ * @param x batch involved in the reduction
+ * @return the result of the reduction.
+ */
+ template <class T, class A>
+ inline T reduce_max(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::reduce_max<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_reducers
+ *
+ * Min of all the scalars of the batch \c x.
+ * @param x batch involved in the reduction
+ * @return the result of the reduction.
+ */
+ template <class T, class A>
+ inline T reduce_min(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::reduce_min<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the remainder of dividing \c x by \c y
+ * @param x batch of scalar values
+ * @param y batch of scalar values
+ * @return the result of the addition.
+ */
+ template <class T, class A>
+ inline batch<T, A> remainder(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::remainder<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_rounding
+ *
+ * Rounds the scalars in \c x to integer values (in floating point format), using
+ * the current rounding mode.
+ * @param x batch of floating point values.
+ * @return the batch of rounded values.
+ */
+ template <class T, class A>
+ inline batch<T, A> rint(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return nearbyint(x);
+ }
+
+ /**
+ * @ingroup rotate_left
+ *
+ * Slide the whole batch to the left by \c n bytes, and reintroduce the
+ * slided out elements from the right. This is different from
+ * \c rol that rotates each batch element to the left.
+ *
+ * @tparam N Amount of bytes to rotated to the left.
+ * @param x batch of integer values.
+ * @return rotated batch.
+ */
+ template <size_t N, class T, class A>
+ inline batch<T, A> rotate_left(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::rotate_left<N, A>(x, A {});
+ }
+
+ /**
+ * @ingroup rotate_right
+ *
+ * Slide the whole batch to the right by \c n bytes, and reintroduce the
+ * slided out elements from the left. This is different from
+ * \c rol that rotates each batch element to the left.
+ *
+ * @tparam N Amount of bytes to rotate to the right.
+ * @param x batch of integer values.
+ * @return rotated batch.
+ */
+ template <size_t N, class T, class A>
+ inline batch<T, A> rotate_right(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::rotate_right<N, A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Perform a bitwise shift to the left, reintroducing the shifted out bits
+ * to the right
+ * @param x batch to rotate
+ * @param shift scalar amount to shift
+ * @return rotated \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> rotl(batch<T, A> const& x, int shift) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::rotl<A>(x, shift, A {});
+ }
+ template <class T, class A>
+ inline batch<T, A> rotl(batch<T, A> const& x, batch<T, A> const& shift) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::rotl<A>(x, shift, A {});
+ }
+
+ /**
+ * @ingroup batch_bitwise
+ *
+ * Perform a bitwise shift to the right, reintroducing the shifted out bits
+ * to the left.
+ * @param x batch to rotate
+ * @param shift scalar amount to shift
+ * @return rotated \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> rotr(batch<T, A> const& x, int shift) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::rotr<A>(x, shift, A {});
+ }
+ template <class T, class A>
+ inline batch<T, A> rotr(batch<T, A> const& x, batch<T, A> const& shift) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::rotr<A>(x, shift, A {});
+ }
+
+ /**
+ * @ingroup batch_rounding
+ *
+ * Computes the batch of nearest integer values to scalars in \c x (in
+ * floating point format), rounding halfway cases away from zero, regardless
+ * of the current rounding mode.
+ * @param x batch of flaoting point values.
+ * @return the batch of nearest integer values.
+ */
+ template <class T, class A>
+ inline batch<T, A> round(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::round<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes an estimate of the inverse square root of the batch \c x.
+ *
+ * @warning Unlike most xsimd function, this does not return the same result as the
+ * equivalent scalar operation, trading accuracy for speed.
+ *
+ * @param x batch of floating point values.
+ * @return the inverse square root of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> rsqrt(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::rsqrt<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes the saturate sum of the batch \c x and the batch \c y.
+
+ * @tparam X the actual type of batch.
+ * @param x batch involved in the saturated addition.
+ * @param y batch involved in the saturated addition.
+ * @return the result of the saturated addition.
+ */
+ template <class T, class A>
+ inline batch<T, A> sadd(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::sadd<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_miscellaneous
+ *
+ * Ternary operator for batches: selects values from the batches \c true_br or \c false_br
+ * depending on the boolean values in the constant batch \c cond. Equivalent to
+ * \code{.cpp}
+ * for(std::size_t i = 0; i < N; ++i)
+ * res[i] = cond[i] ? true_br[i] : false_br[i];
+ * \endcode
+ * @param cond batch condition.
+ * @param true_br batch values for truthy condition.
+ * @param false_br batch value for falsy condition.
+ * @return the result of the selection.
+ */
+ template <class T, class A>
+ inline batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::select<A>(cond, true_br, false_br, A {});
+ }
+
+ /**
+ * @ingroup batch_miscellaneous
+ *
+ * Ternary operator for batches: selects values from the batches \c true_br or \c false_br
+ * depending on the boolean values in the constant batch \c cond. Equivalent to
+ * \code{.cpp}
+ * for(std::size_t i = 0; i < N; ++i)
+ * res[i] = cond[i] ? true_br[i] : false_br[i];
+ * \endcode
+ * @param cond batch condition.
+ * @param true_br batch values for truthy condition.
+ * @param false_br batch value for falsy condition.
+ * @return the result of the selection.
+ */
+ template <class T, class A>
+ inline batch<std::complex<T>, A> select(batch_bool<T, A> const& cond, batch<std::complex<T>, A> const& true_br, batch<std::complex<T>, A> const& false_br) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::select<A>(cond, true_br, false_br, A {});
+ }
+
+ /**
+ * @ingroup batch_miscellaneous
+ *
+ * Ternary operator for batches: selects values from the batches \c true_br or \c false_br
+ * depending on the boolean values in the constant batch \c cond. Equivalent to
+ * \code{.cpp}
+ * for(std::size_t i = 0; i < N; ++i)
+ * res[i] = cond[i] ? true_br[i] : false_br[i];
+ * \endcode
+ * @param cond constant batch condition.
+ * @param true_br batch values for truthy condition.
+ * @param false_br batch value for falsy condition.
+ * @return the result of the selection.
+ */
+ template <class T, class A, bool... Values>
+ inline batch<T, A> select(batch_bool_constant<batch<T, A>, Values...> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::select<A>(cond, true_br, false_br, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Combine elements from \c x and \c y according to selector \c mask
+ * @param x batch
+ * @param y batch
+ * @param mask constant batch mask of integer elements of the same size as
+ * element of \c x and \c y. Each element of the mask index the vector that
+ * would be formed by the concatenation of \c x and \c y. For instance
+ * \code{.cpp}
+ * batch_constant<batch<uint32_t, sse2>, 0, 4, 3, 7>
+ * \endcode
+ * Picks \c x[0], \c y[0], \c x[3], \c y[3]
+ *
+ * @return combined batch
+ */
+ template <class T, class A, class Vt, Vt... Values>
+ inline typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type
+ shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<batch<Vt, A>, Values...> mask) noexcept
+ {
+ static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
+ detail::static_check_supported_config<T, A>();
+ return kernel::shuffle<A>(x, y, mask, A {});
+ }
+
+ /**
+ * @ingroup batch_miscellaneous
+ *
+ * Computes the sign of \c x
+ * @param x batch
+ * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element
+ */
+ template <class T, class A>
+ inline batch<T, A> sign(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::sign<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_miscellaneous
+ *
+ * Computes the sign of \c x, assuming x doesn't have any zero
+ * @param x batch
+ * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element
+ */
+ template <class T, class A>
+ inline batch<T, A> signnz(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::signnz<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the sine of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the sine of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> sin(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::sin<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the sine and the cosine of the batch \c x. This method is faster
+ * than calling sine and cosine independently.
+ * @param x batch of floating point values.
+ * @return a pair containing the sine then the cosine of batch \c x
+ */
+ template <class T, class A>
+ inline std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::sincos<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the hyperbolic sine of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the hyperbolic sine of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> sinh(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::sinh<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Slide the whole batch to the left by \c n bytes. This is different from
+ * \c bitwise_lshift that shifts each batch element to the left.
+ *
+ * @tparam N Amount of bytes to slide to the left.
+ * @param x batch of integer values.
+ * @return slided batch.
+ */
+ template <size_t N, class T, class A>
+ inline batch<T, A> slide_left(batch<T, A> const& x) noexcept
+ {
+ static_assert(std::is_integral<T>::value, "can only slide batch of integers");
+ detail::static_check_supported_config<T, A>();
+ return kernel::slide_left<N, A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Slide the whole batch to the right by \c N bytes. This is different from
+ * \c bitwise_rshift that shifts each batch element to the right.
+ *
+ * @tparam N Amount of bytes to slide to the right.
+ * @param x batch of integer values.
+ * @return slided batch.
+ */
+ template <size_t N, class T, class A>
+ inline batch<T, A> slide_right(batch<T, A> const& x) noexcept
+ {
+ static_assert(std::is_integral<T>::value, "can only slide batch of integers");
+ detail::static_check_supported_config<T, A>();
+ return kernel::slide_right<N, A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math
+ *
+ * Computes the square root of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the square root of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> sqrt(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::sqrt<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes the saturate difference of the batch \c x and the batch \c y.
+ * @tparam X the actual type of batch.
+ * @param x batch involved in the saturated difference.
+ * @param y batch involved in the saturated difference.
+ * @return the result of the saturated difference.
+ */
+ template <class T, class A>
+ inline batch<T, A> ssub(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::ssub<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Copy content of batch \c src to the buffer \c dst. The
+ * memory needs to be aligned.
+ * @param dst the memory buffer to write to
+ * @param src the batch to copy
+ */
+ template <class To, class A = default_arch, class From>
+ inline void store_as(To* dst, batch<From, A> const& src, aligned_mode) noexcept
+ {
+ kernel::store_aligned(dst, src, A {});
+ }
+
+ template <class A = default_arch, class From>
+ inline void store_as(bool* dst, batch_bool<From, A> const& src, aligned_mode) noexcept
+ {
+ kernel::store(src, dst, A {});
+ }
+
+ template <class To, class A = default_arch, class From>
+ inline void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, aligned_mode) noexcept
+ {
+ kernel::store_complex_aligned(dst, src, A {});
+ }
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ template <class To, class A = default_arch, class From, bool i3ec>
+ inline void store_as(xtl::xcomplex<To, To, i3ec>* dst, batch<std::complex<From>, A> const& src, aligned_mode) noexcept
+ {
+ store_as(reinterpret_cast<std::complex<To>*>(dst), src, aligned_mode());
+ }
+#endif
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Copy content of batch \c src to the buffer \c dst. The
+ * memory does not need to be aligned.
+ * @param dst the memory buffer to write to
+ * @param src the batch to copy
+ */
+ template <class To, class A = default_arch, class From>
+ inline void store_as(To* dst, batch<From, A> const& src, unaligned_mode) noexcept
+ {
+ kernel::store_unaligned(dst, src, A {});
+ }
+
+ template <class A = default_arch, class From>
+ inline void store_as(bool* dst, batch_bool<From, A> const& src, unaligned_mode) noexcept
+ {
+ kernel::store(src, dst, A {});
+ }
+
+ template <class To, class A = default_arch, class From>
+ inline void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, unaligned_mode) noexcept
+ {
+ kernel::store_complex_unaligned(dst, src, A {});
+ }
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ template <class To, class A = default_arch, class From, bool i3ec>
+ inline void store_as(xtl::xcomplex<To, To, i3ec>* dst, batch<std::complex<From>, A> const& src, unaligned_mode) noexcept
+ {
+ store_as(reinterpret_cast<std::complex<To>*>(dst), src, unaligned_mode());
+ }
+#endif
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Copy content of batch \c val to the buffer \c mem. The
+ * memory does not need to be aligned.
+ * @param mem the memory buffer to write to
+ * @param val the batch to copy from
+ */
+ template <class A, class T>
+ inline void store(T* mem, batch<T, A> const& val, aligned_mode = {}) noexcept
+ {
+ store_as<T, A>(mem, val, aligned_mode {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Copy content of batch \c val to the buffer \c mem. The
+ * memory does not need to be aligned.
+ * @param mem the memory buffer to write to
+ * @param val the batch to copy from
+ */
+ template <class A, class T>
+ inline void store(T* mem, batch<T, A> const& val, unaligned_mode) noexcept
+ {
+ store_as<T, A>(mem, val, unaligned_mode {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Copy content of batch \c val to the buffer \c mem. The
+ * memory needs to be aligned.
+ * @param mem the memory buffer to write to
+ * @param val the batch to copy from
+ */
+ template <class A, class T>
+ inline void store_aligned(T* mem, batch<T, A> const& val) noexcept
+ {
+ store_as<T, A>(mem, val, aligned_mode {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Copy content of batch \c val to the buffer \c mem. The
+ * memory does not need to be aligned.
+ * @param mem the memory buffer to write to
+ * @param val the batch to copy
+ */
+ template <class A, class T>
+ inline void store_unaligned(T* mem, batch<T, A> const& val) noexcept
+ {
+ store_as<T, A>(mem, val, unaligned_mode {});
+ }
+
+ /**
+ * @ingroup batch_arithmetic
+ *
+ * Computes the difference between \c x and \c y
+ * @tparam X the actual type of batch.
+ * @param x scalar or batch of scalars
+ * @param y scalar or batch of scalars
+ * @return the difference between \c x and \c y
+ */
+ template <class T, class A>
+ inline auto sub(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x - y)
+ {
+ detail::static_check_supported_config<T, A>();
+ return x - y;
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Rearrange elements from \c x according to constant mask \c mask
+ * @param x batch
+ * @param mask constant batch mask of integer elements of the same size as
+ * element of \c x
+ * @return swizzled batch
+ */
+ template <class T, class A, class Vt, Vt... Values>
+ inline typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type
+ swizzle(batch<T, A> const& x, batch_constant<batch<Vt, A>, Values...> mask) noexcept
+ {
+ static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
+ detail::static_check_supported_config<T, A>();
+ return kernel::swizzle<A>(x, mask, A {});
+ }
+ template <class T, class A, class Vt, Vt... Values>
+ inline batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& x, batch_constant<batch<Vt, A>, Values...> mask) noexcept
+ {
+ static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
+ detail::static_check_supported_config<T, A>();
+ return kernel::swizzle<A>(x, mask, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Rearrange elements from \c x according to mask \c mask
+ * @param x batch
+ * @param mask batch mask of integer elements of the same size as
+ * element of \c x
+ * @return swizzled batch
+ */
+ template <class T, class A, class Vt>
+ inline typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type
+ swizzle(batch<T, A> const& x, batch<Vt, A> mask) noexcept
+ {
+ static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
+ detail::static_check_supported_config<T, A>();
+ return kernel::swizzle<A>(x, mask, A {});
+ }
+
+ template <class T, class A, class Vt>
+ inline batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& x, batch<Vt, A> mask) noexcept
+ {
+ static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
+ detail::static_check_supported_config<T, A>();
+ return kernel::swizzle<A>(x, mask, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the tangent of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the tangent of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> tan(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::tan<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_trigo
+ *
+ * Computes the hyperbolic tangent of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the hyperbolic tangent of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> tanh(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::tanh<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_math_extra
+ *
+ * Computes the gamma function of the batch \c x.
+ * @param x batch of floating point values.
+ * @return the gamma function of \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> tgamma(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::tgamma<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_conversion
+ *
+ * Perform a conversion from \c i to a value of an floating point type of the same size as \c T.
+ * This is equivalent to \c batch_cast<as_float_t<T>>(i)
+ * @param i batch of integers.
+ * @return \c i converted to a value of an floating point type of the same size as \c T
+ */
+ template <class T, class A>
+ inline batch<as_float_t<T>, A> to_float(batch<T, A> const& i) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return batch_cast<as_float_t<T>>(i);
+ }
+
+ /**
+ * @ingroup batch_conversion
+ *
+ * Perform a conversion from \c x to a value of an integer type of the same size as \c T
+ * This is equivalent to \c batch_cast<as_integer_t<T>>(x)
+ * @param x batch.
+ * @return \c x converted to a value of an integer type of the same size as \c T
+ */
+ template <class T, class A>
+ inline batch<as_integer_t<T>, A> to_int(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return batch_cast<as_integer_t<T>>(x);
+ }
+
+ /**
+ * @ingroup batch_rounding
+ *
+ * Computes the batch of nearest integer values not greater in magnitude
+ * than scalars in \c x.
+ * @param x batch of floating point values.
+ * @return the batch of nearest integer values not greater in magnitude than \c x.
+ */
+ template <class T, class A>
+ inline batch<T, A> trunc(batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::trunc<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Unpack and interleave data from the HIGH half of batches \c x and \c y.
+ * Store the results in the Return value.
+ * @param x a batch of integer or floating point or double precision values.
+ * @param y a batch of integer or floating point or double precision values.
+ * @return a batch of the high part of shuffled values.
+ */
+ template <class T, class A>
+ inline batch<T, A> zip_hi(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::zip_hi<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_data_transfer
+ *
+ * Unpack and interleave data from the LOW half of batches \c x and \c y.
+ * Store the results in the Return value.
+ * @param x a batch of integer or floating point or double precision values.
+ * @param y a batch of integer or floating point or double precision values.
+ * @return a batch of the low part of shuffled values.
+ */
+ template <class T, class A>
+ inline batch<T, A> zip_lo(batch<T, A> const& x, batch<T, A> const& y) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::zip_lo<A>(x, y, A {});
+ }
+
+ /**
+ * @ingroup batch_conversion
+ *
+ * Cast a \c batch_bool of \c T into a \c batch of the same type using the
+ * following rule: if an element of \c self is true, it maps to -1 in the
+ * returned integral batch, otherwise it maps to 0.
+ *
+ * @param self batch_bool of \c T
+ * @return \c self cast to a \c batch of \c T
+ */
+ template <class T, class A, typename std::enable_if<std::is_integral<T>::value, int>::type = 3>
+ inline batch<T, A> bitwise_cast(batch_bool<T, A> const& self) noexcept
+ {
+ T z(0);
+ detail::static_check_supported_config<T, A>();
+ return select(self, batch<T, A>(T(~z)), batch<T, A>(z));
+ }
+
+ template <class T, class A, typename std::enable_if<std::is_floating_point<T>::value, int>::type = 3>
+ inline batch<T, A> bitwise_cast(batch_bool<T, A> const& self) noexcept
+ {
+ T z0(0), z1(0);
+ using int_type = as_unsigned_integer_t<T>;
+ int_type value(~int_type(0));
+ std::memcpy(&z1, &value, sizeof(int_type));
+ detail::static_check_supported_config<T, A>();
+ return select(self, batch<T, A>(z1), batch<T, A>(z0));
+ }
+
+ /**
+ * @ingroup batch_bool_reducers
+ *
+ * Returns true if all the boolean values in the batch are true,
+ * false otherwise.
+ * @param x the batch to reduce.
+ * @return a boolean scalar.
+ */
+ template <class T, class A>
+ inline bool all(batch_bool<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::all<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_bool_reducers
+ *
+ * Return true if any of the boolean values in the batch is true,
+ * false otherwise.
+ * @param x the batch to reduce.
+ * @return a boolean scalar.
+ */
+ template <class T, class A>
+ inline bool any(batch_bool<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::any<A>(x, A {});
+ }
+
+ /**
+ * @ingroup batch_bool_reducers
+ *
+ * Return true if none of the boolean values in the batch is true,
+ * false otherwise.
+ * @param x the batch to reduce.
+ * @return a boolean scalar.
+ */
+ template <class T, class A>
+ inline bool none(batch_bool<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return !xsimd::any(x);
+ }
+
+ /**
+ * @ingroup batch_miscellaneous
+ *
+ * Dump the content of batch \c x to stream \c o
+ * @param o the stream where the batch is dumped
+ * @param x batch to dump.
+ * @return a reference to \c o
+ */
+ template <class T, class A>
+ inline std::ostream& operator<<(std::ostream& o, batch<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ constexpr auto size = batch<T, A>::size;
+ alignas(A::alignment()) T buffer[size];
+ x.store_aligned(&buffer[0]);
+ o << '(';
+ for (std::size_t i = 0; i < size - 1; ++i)
+ o << buffer[i] << ", ";
+ return o << buffer[size - 1] << ')';
+ }
+
+ /**
+ * @ingroup batch_miscellaneous
+ *
+ * Dump the content of batch \c x to stream \c o
+ * @param o the stream where the batch is dumped
+ * @param x batch to dump.
+ * @return a reference to \c o
+ */
+ template <class T, class A>
+ inline std::ostream& operator<<(std::ostream& o, batch_bool<T, A> const& x) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ constexpr auto size = batch_bool<T, A>::size;
+ alignas(A::alignment()) bool buffer[size];
+ x.store_aligned(&buffer[0]);
+ o << '(';
+ for (std::size_t i = 0; i < size - 1; ++i)
+ o << buffer[i] << ", ";
+ return o << buffer[size - 1] << ')';
+ }
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx2_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx2_register.hpp
new file mode 100644
index 0000000000..cd10383e2b
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx2_register.hpp
@@ -0,0 +1,40 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX2_REGISTER_HPP
+#define XSIMD_AVX2_REGISTER_HPP
+
+#include "./xsimd_avx_register.hpp"
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * AVX2 instructions
+ */
+ struct avx2 : avx
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX2; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(2, 2, 0); }
+ static constexpr char const* name() noexcept { return "avx2"; }
+ };
+
+#if XSIMD_WITH_AVX2
+ namespace types
+ {
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx2, avx);
+ }
+#endif
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp
new file mode 100644
index 0000000000..15c19832ae
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp
@@ -0,0 +1,48 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512BW_REGISTER_HPP
+#define XSIMD_AVX512BW_REGISTER_HPP
+
+#include "./xsimd_avx512dq_register.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512BW instructions
+ */
+ struct avx512bw : avx512dq
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512BW; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 4, 0); }
+ static constexpr char const* name() noexcept { return "avx512bw"; }
+ };
+
+#if XSIMD_WITH_AVX512BW
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512bw>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512bw, avx512dq);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp
new file mode 100644
index 0000000000..29efca368c
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp
@@ -0,0 +1,48 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512CD_REGISTER_HPP
+#define XSIMD_AVX512CD_REGISTER_HPP
+
+#include "./xsimd_avx512f_register.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512CD instructions
+ */
+ struct avx512cd : avx512f
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512CD; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 2, 0); }
+ static constexpr char const* name() noexcept { return "avx512cd"; }
+ };
+
+#if XSIMD_WITH_AVX512CD
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512cd>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512cd, avx512f);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp
new file mode 100644
index 0000000000..25a255ec15
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp
@@ -0,0 +1,48 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512DQ_REGISTER_HPP
+#define XSIMD_AVX512DQ_REGISTER_HPP
+
+#include "./xsimd_avx512cd_register.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512DQ instructions
+ */
+ struct avx512dq : avx512cd
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512DQ; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 3, 0); }
+ static constexpr char const* name() noexcept { return "avx512dq"; }
+ };
+
+#if XSIMD_WITH_AVX512DQ
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512dq>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512dq, avx512cd);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp
new file mode 100644
index 0000000000..a99157cf37
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp
@@ -0,0 +1,48 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512ER_REGISTER_HPP
+#define XSIMD_AVX512ER_REGISTER_HPP
+
+#include "./xsimd_avx512dq_register.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512ER instructions
+ */
+ struct avx512er : avx512cd
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512ER; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 3, 1); }
+ static constexpr char const* name() noexcept { return "avx512er"; }
+ };
+
+#if XSIMD_WITH_AVX512ER
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512er>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512er, avx512cd);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp
new file mode 100644
index 0000000000..c1f80a122d
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp
@@ -0,0 +1,74 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512F_REGISTER_HPP
+#define XSIMD_AVX512F_REGISTER_HPP
+
+#include "./xsimd_generic_arch.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512F instructions
+ */
+ struct avx512f : generic
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512F; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 1, 0); }
+ static constexpr std::size_t alignment() noexcept { return 64; }
+ static constexpr bool requires_alignment() noexcept { return true; }
+ static constexpr char const* name() noexcept { return "avx512f"; }
+ };
+
+#if XSIMD_WITH_AVX512F
+
+ namespace types
+ {
+ template <class T>
+ struct simd_avx512_bool_register
+ {
+ using register_type = typename std::conditional<
+ (sizeof(T) < 4), std::conditional<(sizeof(T) == 1), __mmask64, __mmask32>,
+ std::conditional<(sizeof(T) == 4), __mmask16, __mmask8>>::type::type;
+ register_type data;
+ simd_avx512_bool_register() = default;
+ simd_avx512_bool_register(register_type r) { data = r; }
+ operator register_type() const noexcept { return data; }
+ };
+ template <class T>
+ struct get_bool_simd_register<T, avx512f>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER(signed char, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned char, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(char, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned short, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(short, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned int, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(int, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(long int, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(long long int, avx512f, __m512i);
+ XSIMD_DECLARE_SIMD_REGISTER(float, avx512f, __m512);
+ XSIMD_DECLARE_SIMD_REGISTER(double, avx512f, __m512d);
+
+ }
+#endif
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp
new file mode 100644
index 0000000000..ba76ea147b
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp
@@ -0,0 +1,48 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512IFMA_REGISTER_HPP
+#define XSIMD_AVX512IFMA_REGISTER_HPP
+
+#include "./xsimd_avx512bw_register.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512IFMA instructions
+ */
+ struct avx512ifma : avx512bw
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512IFMA; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 5, 0); }
+ static constexpr char const* name() noexcept { return "avx512ifma"; }
+ };
+
+#if XSIMD_WITH_AVX512IFMA
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512ifma>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512ifma, avx512bw);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp
new file mode 100644
index 0000000000..38a10f0227
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp
@@ -0,0 +1,48 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512PF_REGISTER_HPP
+#define XSIMD_AVX512PF_REGISTER_HPP
+
+#include "./xsimd_avx512er_register.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512BW instructions
+ */
+ struct avx512pf : avx512er
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512PF; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 4, 1); }
+ static constexpr char const* name() noexcept { return "avx512pf"; }
+ };
+
+#if XSIMD_WITH_AVX512PF
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512pf>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512pf, avx512er);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp
new file mode 100644
index 0000000000..19ff744d72
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp
@@ -0,0 +1,48 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512VBMI_REGISTER_HPP
+#define XSIMD_AVX512VBMI_REGISTER_HPP
+
+#include "./xsimd_avx512ifma_register.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512VBMI instructions
+ */
+ struct avx512vbmi : avx512ifma
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VBMI; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 6, 0); }
+ static constexpr char const* name() noexcept { return "avx512vbmi"; }
+ };
+
+#if XSIMD_WITH_AVX512VBMI
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512vbmi>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vbmi, avx512ifma);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp
new file mode 100644
index 0000000000..85edbdf230
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp
@@ -0,0 +1,51 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512VNNI_AVX512BW_REGISTER_HPP
+#define XSIMD_AVX512VNNI_AVX512BW_REGISTER_HPP
+
+#include "./xsimd_avx512bw_register.hpp"
+
+namespace xsimd
+{
+ template <typename arch>
+ struct avx512vnni;
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512VNNI instructions
+ */
+ template <>
+ struct avx512vnni<avx512bw> : avx512bw
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512BW; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 4, 1); }
+ static constexpr char const* name() noexcept { return "avx512vnni+avx512bw"; }
+ };
+
+#if XSIMD_WITH_AVX512VNNI_AVX512BW
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512vnni<avx512bw>>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni<avx512bw>, avx512bw);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp
new file mode 100644
index 0000000000..232b19a5cb
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi_register.hpp
@@ -0,0 +1,51 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP
+#define XSIMD_AVX512VNNI_AVX512VBMI_REGISTER_HPP
+
+#include "./xsimd_avx512vbmi_register.hpp"
+
+namespace xsimd
+{
+ template <typename arch>
+ struct avx512vnni;
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512VNNI instructions
+ */
+ template <>
+ struct avx512vnni<avx512vbmi> : avx512vbmi
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI_AVX512VBMI; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 6, 1); }
+ static constexpr char const* name() noexcept { return "avx512vnni+avx512vbmi"; }
+ };
+
+#if XSIMD_WITH_AVX512VNNI_AVX512VBMI
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512vnni<avx512vbmi>>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni<avx512vbmi>, avx512vbmi);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_register.hpp
new file mode 100644
index 0000000000..c276fb0079
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx512vnni_register.hpp
@@ -0,0 +1,48 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX512VNNI_REGISTER_HPP
+#define XSIMD_AVX512VNNI_REGISTER_HPP
+
+#include "./xsimd_avx512vbmi_register.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX512VNNI instructions
+ */
+ struct avx512vnni : avx512vbmi
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VNNI; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(3, 7, 0); }
+ static constexpr char const* name() noexcept { return "avx512vnni"; }
+ };
+
+#if XSIMD_WITH_AVX512VNNI
+
+ namespace types
+ {
+ template <class T>
+ struct get_bool_simd_register<T, avx512vnni>
+ {
+ using type = simd_avx512_bool_register<T>;
+ };
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avx512vnni, avx512vbmi);
+
+ }
+#endif
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avx_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avx_register.hpp
new file mode 100644
index 0000000000..6b1951f964
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avx_register.hpp
@@ -0,0 +1,61 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVX_REGISTER_HPP
+#define XSIMD_AVX_REGISTER_HPP
+
+#include "./xsimd_generic_arch.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX instructions
+ */
+ struct avx : generic
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVX; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(2, 1, 0); }
+ static constexpr std::size_t alignment() noexcept { return 32; }
+ static constexpr bool requires_alignment() noexcept { return true; }
+ static constexpr char const* name() noexcept { return "avx"; }
+ };
+}
+
+#if XSIMD_WITH_AVX
+
+#include <immintrin.h>
+
+namespace xsimd
+{
+ namespace types
+ {
+
+ XSIMD_DECLARE_SIMD_REGISTER(signed char, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned char, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(char, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned short, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(short, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned int, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(int, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(long int, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(long long int, avx, __m256i);
+ XSIMD_DECLARE_SIMD_REGISTER(float, avx, __m256);
+ XSIMD_DECLARE_SIMD_REGISTER(double, avx, __m256d);
+ }
+}
+#endif
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp
new file mode 100644
index 0000000000..f68fe16bad
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp
@@ -0,0 +1,40 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_AVXVNNI_REGISTER_HPP
+#define XSIMD_AVXVNNI_REGISTER_HPP
+
+#include "./xsimd_avx2_register.hpp"
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * AVXVNNI instructions
+ */
+ struct avxvnni : avx2
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_AVXVNNI; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(2, 3, 0); }
+ static constexpr char const* name() noexcept { return "avxvnni"; }
+ };
+
+#if XSIMD_WITH_AVXVNNI
+ namespace types
+ {
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(avxvnni, avx2);
+ }
+#endif
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_batch.hpp b/third_party/xsimd/include/xsimd/types/xsimd_batch.hpp
new file mode 100644
index 0000000000..b4989fc88d
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_batch.hpp
@@ -0,0 +1,1492 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_BATCH_HPP
+#define XSIMD_BATCH_HPP
+
+#include <cassert>
+#include <complex>
+
+#include "../config/xsimd_arch.hpp"
+#include "../memory/xsimd_alignment.hpp"
+#include "./xsimd_utils.hpp"
+
+namespace xsimd
+{
+ template <class T, class A = default_arch>
+ class batch;
+
+ namespace types
+ {
+ template <class T, class A>
+ struct integral_only_operators
+ {
+ inline batch<T, A>& operator%=(batch<T, A> const& other) noexcept;
+ inline batch<T, A>& operator>>=(int32_t other) noexcept;
+ inline batch<T, A>& operator>>=(batch<T, A> const& other) noexcept;
+ inline batch<T, A>& operator<<=(int32_t other) noexcept;
+ inline batch<T, A>& operator<<=(batch<T, A> const& other) noexcept;
+
+ /** Shorthand for xsimd::mod() */
+ friend inline batch<T, A> operator%(batch<T, A> const& self, batch<T, A> const& other) noexcept
+ {
+ return batch<T, A>(self) %= other;
+ }
+
+ /** Shorthand for xsimd::bitwise_rshift() */
+ friend inline batch<T, A> operator>>(batch<T, A> const& self, batch<T, A> const& other) noexcept
+ {
+ return batch<T, A>(self) >>= other;
+ }
+
+ /** Shorthand for xsimd::bitwise_lshift() */
+ friend inline batch<T, A> operator<<(batch<T, A> const& self, batch<T, A> const& other) noexcept
+ {
+ return batch<T, A>(self) <<= other;
+ }
+
+ /** Shorthand for xsimd::bitwise_rshift() */
+ friend inline batch<T, A> operator>>(batch<T, A> const& self, int32_t other) noexcept
+ {
+ return batch<T, A>(self) >>= other;
+ }
+
+ /** Shorthand for xsimd::bitwise_lshift() */
+ friend inline batch<T, A> operator<<(batch<T, A> const& self, int32_t other) noexcept
+ {
+ return batch<T, A>(self) <<= other;
+ }
+ };
+ template <class A>
+ struct integral_only_operators<float, A>
+ {
+ };
+ template <class A>
+ struct integral_only_operators<double, A>
+ {
+ };
+
+ }
+
+ namespace details
+ {
+ // These functions are forwarded declared here so that they can be used by friend functions
+ // with batch<T, A>. Their implementation must appear only once the
+ // kernel implementations have been included.
+ template <class T, class A>
+ inline batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other) noexcept;
+
+ template <class T, class A>
+ inline batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other) noexcept;
+
+ template <class T, class A>
+ inline batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other) noexcept;
+
+ template <class T, class A>
+ inline batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other) noexcept;
+
+ template <class T, class A>
+ inline batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other) noexcept;
+
+ template <class T, class A>
+ inline batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other) noexcept;
+ }
+
+ /**
+ * @brief batch of integer or floating point values.
+ *
+ * Abstract representation of an SIMD register for floating point or integral
+ * value.
+ *
+ * @tparam T the type of the underlying values.
+ * @tparam A the architecture this batch is tied too.
+ **/
+ template <class T, class A>
+ class batch : public types::simd_register<T, A>, public types::integral_only_operators<T, A>
+ {
+ static_assert(!std::is_same<T, bool>::value, "use xsimd::batch_bool<T, A> instead of xsimd::batch<bool, A>");
+
+ public:
+ static constexpr std::size_t size = sizeof(types::simd_register<T, A>) / sizeof(T); ///< Number of scalar elements in this batch.
+
+ using value_type = T; ///< Type of the scalar elements within this batch.
+ using arch_type = A; ///< SIMD Architecture abstracted by this batch.
+ using register_type = typename types::simd_register<T, A>::register_type; ///< SIMD register type abstracted by this batch.
+ using batch_bool_type = batch_bool<T, A>; ///< Associated batch type used to represented logical operations on this batch.
+
+ // constructors
+ inline batch() = default; ///< Create a batch initialized with undefined values.
+ inline batch(T val) noexcept;
+ template <class... Ts>
+ inline batch(T val0, T val1, Ts... vals) noexcept;
+ inline explicit batch(batch_bool_type const& b) noexcept;
+ inline batch(register_type reg) noexcept;
+
+ template <class U>
+ XSIMD_NO_DISCARD static inline batch broadcast(U val) noexcept;
+
+ // memory operators
+ template <class U>
+ inline void store_aligned(U* mem) const noexcept;
+ template <class U>
+ inline void store_unaligned(U* mem) const noexcept;
+ template <class U>
+ inline void store(U* mem, aligned_mode) const noexcept;
+ template <class U>
+ inline void store(U* mem, unaligned_mode) const noexcept;
+
+ template <class U>
+ XSIMD_NO_DISCARD static inline batch load_aligned(U const* mem) noexcept;
+ template <class U>
+ XSIMD_NO_DISCARD static inline batch load_unaligned(U const* mem) noexcept;
+ template <class U>
+ XSIMD_NO_DISCARD static inline batch load(U const* mem, aligned_mode) noexcept;
+ template <class U>
+ XSIMD_NO_DISCARD static inline batch load(U const* mem, unaligned_mode) noexcept;
+
+ template <class U, class V>
+ XSIMD_NO_DISCARD static inline batch gather(U const* src, batch<V, arch_type> const& index) noexcept;
+ template <class U, class V>
+ inline void scatter(U* dst, batch<V, arch_type> const& index) const noexcept;
+
+ inline T get(std::size_t i) const noexcept;
+
+ // comparison operators. Defined as friend to enable automatic
+ // conversion of parameters from scalar to batch, at the cost of using a
+ // proxy implementation from details::.
+ friend inline batch_bool<T, A> operator==(batch const& self, batch const& other) noexcept
+ {
+ return details::eq<T, A>(self, other);
+ }
+ friend inline batch_bool<T, A> operator!=(batch const& self, batch const& other) noexcept
+ {
+ return details::neq<T, A>(self, other);
+ }
+ friend inline batch_bool<T, A> operator>=(batch const& self, batch const& other) noexcept
+ {
+ return details::ge<T, A>(self, other);
+ }
+ friend inline batch_bool<T, A> operator<=(batch const& self, batch const& other) noexcept
+ {
+ return details::le<T, A>(self, other);
+ }
+ friend inline batch_bool<T, A> operator>(batch const& self, batch const& other) noexcept
+ {
+ return details::gt<T, A>(self, other);
+ }
+ friend inline batch_bool<T, A> operator<(batch const& self, batch const& other) noexcept
+ {
+ return details::lt<T, A>(self, other);
+ }
+
+ // Update operators
+ inline batch& operator+=(batch const& other) noexcept;
+ inline batch& operator-=(batch const& other) noexcept;
+ inline batch& operator*=(batch const& other) noexcept;
+ inline batch& operator/=(batch const& other) noexcept;
+ inline batch& operator&=(batch const& other) noexcept;
+ inline batch& operator|=(batch const& other) noexcept;
+ inline batch& operator^=(batch const& other) noexcept;
+
+ // incr/decr operators
+ inline batch& operator++() noexcept;
+ inline batch& operator--() noexcept;
+ inline batch operator++(int) noexcept;
+ inline batch operator--(int) noexcept;
+
+ // unary operators
+ inline batch_bool_type operator!() const noexcept;
+ inline batch operator~() const noexcept;
+ inline batch operator-() const noexcept;
+ inline batch operator+() const noexcept;
+
+ // arithmetic operators. They are defined as friend to enable automatic
+ // conversion of parameters from scalar to batch. Inline implementation
+ // is required to avoid warnings.
+
+ /** Shorthand for xsimd::add() */
+ friend inline batch operator+(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) += other;
+ }
+
+ /** Shorthand for xsimd::sub() */
+ friend inline batch operator-(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) -= other;
+ }
+
+ /** Shorthand for xsimd::mul() */
+ friend inline batch operator*(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) *= other;
+ }
+
+ /** Shorthand for xsimd::div() */
+ friend inline batch operator/(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) /= other;
+ }
+
+ /** Shorthand for xsimd::bitwise_and() */
+ friend inline batch operator&(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) &= other;
+ }
+
+ /** Shorthand for xsimd::bitwise_or() */
+ friend inline batch operator|(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) |= other;
+ }
+
+ /** Shorthand for xsimd::bitwise_xor() */
+ friend inline batch operator^(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) ^= other;
+ }
+
+ /** Shorthand for xsimd::logical_and() */
+ friend inline batch operator&&(batch const& self, batch const& other) noexcept
+ {
+ return batch(self).logical_and(other);
+ }
+
+ /** Shorthand for xsimd::logical_or() */
+ friend inline batch operator||(batch const& self, batch const& other) noexcept
+ {
+ return batch(self).logical_or(other);
+ }
+
+ private:
+ inline batch logical_and(batch const& other) const noexcept;
+ inline batch logical_or(batch const& other) const noexcept;
+ };
+
+ template <class T, class A>
+ constexpr std::size_t batch<T, A>::size;
+
+ /**
+ * @brief batch of predicate over scalar or complex values.
+ *
+ * Abstract representation of a predicate over SIMD register for scalar or
+ * complex values.
+ *
+ * @tparam T the type of the predicated values.
+ * @tparam A the architecture this batch is tied too.
+ **/
+ template <class T, class A = default_arch>
+ class batch_bool : public types::get_bool_simd_register_t<T, A>
+ {
+ using base_type = types::get_bool_simd_register_t<T, A>;
+
+ public:
+ static constexpr std::size_t size = sizeof(types::simd_register<T, A>) / sizeof(T); ///< Number of scalar elements in this batch.
+
+ using value_type = bool; ///< Type of the scalar elements within this batch.
+ using arch_type = A; ///< SIMD Architecture abstracted by this batch.
+ using register_type = typename base_type::register_type; ///< SIMD register type abstracted by this batch.
+ using batch_type = batch<T, A>; ///< Associated batch type this batch represents logical operations for.
+
+ // constructors
+ inline batch_bool() = default; ///< Create a batch initialized with undefined values.
+ inline batch_bool(bool val) noexcept;
+ inline batch_bool(register_type reg) noexcept;
+ template <class... Ts>
+ inline batch_bool(bool val0, bool val1, Ts... vals) noexcept;
+
+ template <class Tp>
+ inline batch_bool(Tp const*) = delete;
+
+ // memory operators
+ inline void store_aligned(bool* mem) const noexcept;
+ inline void store_unaligned(bool* mem) const noexcept;
+ XSIMD_NO_DISCARD static inline batch_bool load_aligned(bool const* mem) noexcept;
+ XSIMD_NO_DISCARD static inline batch_bool load_unaligned(bool const* mem) noexcept;
+
+ inline bool get(std::size_t i) const noexcept;
+
+ // mask operations
+ inline uint64_t mask() const noexcept;
+ inline static batch_bool from_mask(uint64_t mask) noexcept;
+
+ // comparison operators
+ inline batch_bool operator==(batch_bool const& other) const noexcept;
+ inline batch_bool operator!=(batch_bool const& other) const noexcept;
+
+ // logical operators
+ inline batch_bool operator~() const noexcept;
+ inline batch_bool operator!() const noexcept;
+ inline batch_bool operator&(batch_bool const& other) const noexcept;
+ inline batch_bool operator|(batch_bool const& other) const noexcept;
+ inline batch_bool operator^(batch_bool const& other) const noexcept;
+ inline batch_bool operator&&(batch_bool const& other) const noexcept;
+ inline batch_bool operator||(batch_bool const& other) const noexcept;
+
+ // update operators
+ inline batch_bool& operator&=(batch_bool const& other) noexcept { return (*this) = (*this) & other; }
+ inline batch_bool& operator|=(batch_bool const& other) noexcept { return (*this) = (*this) | other; }
+ inline batch_bool& operator^=(batch_bool const& other) noexcept { return (*this) = (*this) ^ other; }
+
+ private:
+ template <class U, class... V, size_t I, size_t... Is>
+ static inline register_type make_register(detail::index_sequence<I, Is...>, U u, V... v) noexcept;
+
+ template <class... V>
+ static inline register_type make_register(detail::index_sequence<>, V... v) noexcept;
+ };
+
+ template <class T, class A>
+ constexpr std::size_t batch_bool<T, A>::size;
+
+ /**
+ * @brief batch of complex values.
+ *
+ * Abstract representation of an SIMD register for complex values.
+ *
+ * @tparam T the type of the underlying values.
+ * @tparam A the architecture this batch is tied too.
+ **/
+ template <class T, class A>
+ class batch<std::complex<T>, A>
+ {
+ public:
+ using value_type = std::complex<T>; ///< Type of the complex elements within this batch.
+ using real_batch = batch<T, A>; ///< Type of the scalar elements within this batch.
+ using arch_type = A; ///< SIMD Architecture abstracted by this batch.
+ using batch_bool_type = batch_bool<T, A>; ///< Associated batch type used to represented logical operations on this batch.
+
+ static constexpr std::size_t size = real_batch::size; ///< Number of complex elements in this batch.
+
+ // constructors
+ inline batch() = default; ///< Create a batch initialized with undefined values.
+ inline batch(value_type const& val) noexcept;
+ inline batch(real_batch const& real, real_batch const& imag) noexcept;
+
+ inline batch(real_batch const& real) noexcept;
+ inline batch(T val) noexcept;
+ template <class... Ts>
+ inline batch(value_type val0, value_type val1, Ts... vals) noexcept;
+ inline explicit batch(batch_bool_type const& b) noexcept;
+
+ template <class U>
+ XSIMD_NO_DISCARD static inline batch broadcast(U val) noexcept;
+
+ // memory operators
+ XSIMD_NO_DISCARD static inline batch load_aligned(const T* real_src, const T* imag_src = nullptr) noexcept;
+ XSIMD_NO_DISCARD static inline batch load_unaligned(const T* real_src, const T* imag_src = nullptr) noexcept;
+ inline void store_aligned(T* real_dst, T* imag_dst) const noexcept;
+ inline void store_unaligned(T* real_dst, T* imag_dst) const noexcept;
+
+ XSIMD_NO_DISCARD static inline batch load_aligned(const value_type* src) noexcept;
+ XSIMD_NO_DISCARD static inline batch load_unaligned(const value_type* src) noexcept;
+ inline void store_aligned(value_type* dst) const noexcept;
+ inline void store_unaligned(value_type* dst) const noexcept;
+
+ template <class U>
+ XSIMD_NO_DISCARD static inline batch load(U const* mem, aligned_mode) noexcept;
+ template <class U>
+ XSIMD_NO_DISCARD static inline batch load(U const* mem, unaligned_mode) noexcept;
+ template <class U>
+ inline void store(U* mem, aligned_mode) const noexcept;
+ template <class U>
+ inline void store(U* mem, unaligned_mode) const noexcept;
+
+ inline real_batch real() const noexcept;
+ inline real_batch imag() const noexcept;
+
+ inline value_type get(std::size_t i) const noexcept;
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ // xtl-related methods
+ template <bool i3ec>
+ inline batch(xtl::xcomplex<T, T, i3ec> const& val) noexcept;
+ template <bool i3ec, class... Ts>
+ inline batch(xtl::xcomplex<T, T, i3ec> val0, xtl::xcomplex<T, T, i3ec> val1, Ts... vals) noexcept;
+
+ template <bool i3ec>
+ XSIMD_NO_DISCARD static inline batch load_aligned(const xtl::xcomplex<T, T, i3ec>* src) noexcept;
+ template <bool i3ec>
+ XSIMD_NO_DISCARD static inline batch load_unaligned(const xtl::xcomplex<T, T, i3ec>* src) noexcept;
+ template <bool i3ec>
+ inline void store_aligned(xtl::xcomplex<T, T, i3ec>* dst) const noexcept;
+ template <bool i3ec>
+ inline void store_unaligned(xtl::xcomplex<T, T, i3ec>* dst) const noexcept;
+#endif
+
+ // comparison operators
+ inline batch_bool<T, A> operator==(batch const& other) const noexcept;
+ inline batch_bool<T, A> operator!=(batch const& other) const noexcept;
+
+ // Update operators
+ inline batch& operator+=(batch const& other) noexcept;
+ inline batch& operator-=(batch const& other) noexcept;
+ inline batch& operator*=(batch const& other) noexcept;
+ inline batch& operator/=(batch const& other) noexcept;
+
+ // incr/decr operators
+ inline batch& operator++() noexcept;
+ inline batch& operator--() noexcept;
+ inline batch operator++(int) noexcept;
+ inline batch operator--(int) noexcept;
+
+ // unary operators
+ inline batch_bool_type operator!() const noexcept;
+ inline batch operator~() const noexcept;
+ inline batch operator-() const noexcept;
+ inline batch operator+() const noexcept;
+
+ // arithmetic operators. They are defined as friend to enable automatic
+ // conversion of parameters from scalar to batch
+
+ /** Shorthand for xsimd::add() */
+ friend inline batch operator+(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) += other;
+ }
+
+ /** Shorthand for xsimd::sub() */
+ friend inline batch operator-(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) -= other;
+ }
+
+ /** Shorthand for xsimd::mul() */
+ friend inline batch operator*(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) *= other;
+ }
+
+ /** Shorthand for xsimd::div() */
+ friend inline batch operator/(batch const& self, batch const& other) noexcept
+ {
+ return batch(self) /= other;
+ }
+
+ private:
+ real_batch m_real;
+ real_batch m_imag;
+ };
+
+ template <class T, class A>
+ constexpr std::size_t batch<std::complex<T>, A>::size;
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ template <typename T, bool i3ec, typename A>
+ struct batch<xtl::xcomplex<T, T, i3ec>, A>
+ {
+ static_assert(std::is_same<T, void>::value,
+ "Please use batch<std::complex<T>, A> initialized from xtl::xcomplex instead");
+ };
+#endif
+}
+
+#include "../arch/xsimd_isa.hpp"
+#include "./xsimd_batch_constant.hpp"
+#include "./xsimd_traits.hpp"
+
+namespace xsimd
+{
+
+ /**
+ * Create a batch with all element initialized to \c val.
+ */
+ template <class T, class A>
+ inline batch<T, A>::batch(T val) noexcept
+ : types::simd_register<T, A>(kernel::broadcast<A>(val, A {}))
+ {
+ detail::static_check_supported_config<T, A>();
+ }
+
+ /**
+ * Create a batch with elements initialized from \c val0, \c val1, \c vals...
+ * There must be exactly \c size elements in total.
+ */
+ template <class T, class A>
+ template <class... Ts>
+ inline batch<T, A>::batch(T val0, T val1, Ts... vals) noexcept
+ : batch(kernel::set<A>(batch {}, A {}, val0, val1, static_cast<T>(vals)...))
+ {
+ detail::static_check_supported_config<T, A>();
+ static_assert(sizeof...(Ts) + 2 == size, "The constructor requires as many arguments as batch elements.");
+ }
+
+ /**
+ * Converts a \c bool_batch to a \c batch where each element is
+ * set to 1 (resp. 0) if the corresponding element is `true`
+ * (resp. `false`).
+ */
+ template <class T, class A>
+ inline batch<T, A>::batch(batch_bool<T, A> const& b) noexcept
+ : batch(kernel::from_bool(b, A {}))
+ {
+ }
+
+ /**
+ * Wraps a compatible native simd register as a \c batch. This is generally not needed but
+ * becomes handy when doing architecture-specific operations.
+ */
+ template <class T, class A>
+ inline batch<T, A>::batch(register_type reg) noexcept
+ : types::simd_register<T, A>({ reg })
+ {
+ detail::static_check_supported_config<T, A>();
+ }
+
+ /**
+ * Equivalent to batch::batch(T val).
+ */
+ template <class T, class A>
+ template <class U>
+ XSIMD_NO_DISCARD inline batch<T, A> batch<T, A>::broadcast(U val) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return batch(static_cast<T>(val));
+ }
+
+ /**************************
+ * batch memory operators *
+ **************************/
+
+ /**
+ * Copy content of this batch to the buffer \c mem. The
+ * memory needs to be aligned.
+ */
+ template <class T, class A>
+ template <class U>
+ inline void batch<T, A>::store_aligned(U* mem) const noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ assert(((reinterpret_cast<uintptr_t>(mem) % A::alignment()) == 0)
+ && "store location is not properly aligned");
+ kernel::store_aligned<A>(mem, *this, A {});
+ }
+
+ /**
+ * Copy content of this batch to the buffer \c mem. The
+ * memory does not need to be aligned.
+ */
+ template <class T, class A>
+ template <class U>
+ inline void batch<T, A>::store_unaligned(U* mem) const noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ kernel::store_unaligned<A>(mem, *this, A {});
+ }
+
+ /**
+ * Equivalent to batch::store_aligned()
+ */
+ template <class T, class A>
+ template <class U>
+ inline void batch<T, A>::store(U* mem, aligned_mode) const noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return store_aligned(mem);
+ }
+
+ /**
+ * Equivalent to batch::store_unaligned()
+ */
+ template <class T, class A>
+ template <class U>
+ inline void batch<T, A>::store(U* mem, unaligned_mode) const noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return store_unaligned(mem);
+ }
+
+ /**
+ * Loading from aligned memory. May involve a conversion if \c U is different
+ * from \c T.
+ */
+ template <class T, class A>
+ template <class U>
+ inline batch<T, A> batch<T, A>::load_aligned(U const* mem) noexcept
+ {
+ assert(((reinterpret_cast<uintptr_t>(mem) % A::alignment()) == 0)
+ && "loaded pointer is not properly aligned");
+ detail::static_check_supported_config<T, A>();
+ return kernel::load_aligned<A>(mem, kernel::convert<T> {}, A {});
+ }
+
+ /**
+ * Loading from unaligned memory. May involve a conversion if \c U is different
+ * from \c T.
+ */
+ template <class T, class A>
+ template <class U>
+ inline batch<T, A> batch<T, A>::load_unaligned(U const* mem) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::load_unaligned<A>(mem, kernel::convert<T> {}, A {});
+ }
+
+ /**
+ * Equivalent to batch::load_aligned()
+ */
+ template <class T, class A>
+ template <class U>
+ inline batch<T, A> batch<T, A>::load(U const* mem, aligned_mode) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return load_aligned(mem);
+ }
+
+ /**
+ * Equivalent to batch::load_unaligned()
+ */
+ template <class T, class A>
+ template <class U>
+ inline batch<T, A> batch<T, A>::load(U const* mem, unaligned_mode) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return load_unaligned(mem);
+ }
+
+ /**
+ * Create a new batch gathering elements starting at address \c src and
+ * offset by each element in \c index.
+ * If \c T is not of the same size as \c U, a \c static_cast is performed
+ * at element gather time.
+ */
+ template <class T, class A>
+ template <typename U, typename V>
+ inline batch<T, A> batch<T, A>::gather(U const* src, batch<V, A> const& index) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ static_assert(std::is_convertible<T, U>::value, "Can't convert from src to this batch's type!");
+ return kernel::gather(batch {}, src, index, A {});
+ }
+
+ /**
+ * Scatter elements from this batch into addresses starting at \c dst
+ * and offset by each element in \c index.
+ * If \c T is not of the same size as \c U, a \c static_cast is performed
+ * at element scatter time.
+ */
+ template <class T, class A>
+ template <class U, class V>
+ inline void batch<T, A>::scatter(U* dst, batch<V, A> const& index) const noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ static_assert(std::is_convertible<T, U>::value, "Can't convert from this batch's type to dst!");
+ kernel::scatter<A>(*this, dst, index, A {});
+ }
+
+ /**
+ * Retrieve the \c i th scalar element in this batch.
+ *
+ * \c warning This is very inefficient and should only be used for debugging purpose.
+ */
+ template <class T, class A>
+ inline T batch<T, A>::get(std::size_t i) const noexcept
+ {
+ return kernel::get(*this, i, A {});
+ }
+
+ /******************************
+ * batch comparison operators *
+ ******************************/
+ namespace details
+ {
+ /**
+ * Shorthand for xsimd::eq()
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::eq<A>(self, other, A {});
+ }
+
+ /**
+ * Shorthand for xsimd::neq()
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::neq<A>(self, other, A {});
+ }
+
+ /**
+ * Shorthand for xsimd::ge()
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::ge<A>(self, other, A {});
+ }
+
+ /**
+ * Shorthand for xsimd::le()
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::le<A>(self, other, A {});
+ }
+
+ /**
+ * Shorthand for xsimd::gt()
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::gt<A>(self, other, A {});
+ }
+
+ /**
+ * Shorthand for xsimd::lt()
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::lt<A>(self, other, A {});
+ }
+ }
+
+ /**************************
+ * batch update operators *
+ **************************/
+
+ template <class T, class A>
+ inline batch<T, A>& batch<T, A>::operator+=(batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return *this = kernel::add<A>(*this, other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& batch<T, A>::operator-=(batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return *this = kernel::sub<A>(*this, other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& batch<T, A>::operator*=(batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return *this = kernel::mul<A>(*this, other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& batch<T, A>::operator/=(batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return *this = kernel::div<A>(*this, other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& types::integral_only_operators<T, A>::operator%=(batch<T, A> const& other) noexcept
+ {
+ ::xsimd::detail::static_check_supported_config<T, A>();
+ return *static_cast<batch<T, A>*>(this) = kernel::mod<A>(*static_cast<batch<T, A>*>(this), other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& batch<T, A>::operator&=(batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return *this = kernel::bitwise_and<A>(*this, other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& batch<T, A>::operator|=(batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return *this = kernel::bitwise_or<A>(*this, other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& batch<T, A>::operator^=(batch<T, A> const& other) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return *this = kernel::bitwise_xor<A>(*this, other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& kernel::integral_only_operators<T, A>::operator>>=(batch<T, A> const& other) noexcept
+ {
+ ::xsimd::detail::static_check_supported_config<T, A>();
+ return *static_cast<batch<T, A>*>(this) = kernel::bitwise_rshift<A>(*static_cast<batch<T, A>*>(this), other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& kernel::integral_only_operators<T, A>::operator<<=(batch<T, A> const& other) noexcept
+ {
+ ::xsimd::detail::static_check_supported_config<T, A>();
+ return *static_cast<batch<T, A>*>(this) = kernel::bitwise_lshift<A>(*static_cast<batch<T, A>*>(this), other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& kernel::integral_only_operators<T, A>::operator>>=(int32_t other) noexcept
+ {
+ ::xsimd::detail::static_check_supported_config<T, A>();
+ return *static_cast<batch<T, A>*>(this) = kernel::bitwise_rshift<A>(*static_cast<batch<T, A>*>(this), other, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& kernel::integral_only_operators<T, A>::operator<<=(int32_t other) noexcept
+ {
+ ::xsimd::detail::static_check_supported_config<T, A>();
+ return *static_cast<batch<T, A>*>(this) = kernel::bitwise_lshift<A>(*static_cast<batch<T, A>*>(this), other, A {});
+ }
+
+ /*****************************
+ * batch incr/decr operators *
+ *****************************/
+
+ template <class T, class A>
+ inline batch<T, A>& batch<T, A>::operator++() noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return operator+=(1);
+ }
+
+ template <class T, class A>
+ inline batch<T, A>& batch<T, A>::operator--() noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return operator-=(1);
+ }
+
+ template <class T, class A>
+ inline batch<T, A> batch<T, A>::operator++(int) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ batch<T, A> copy(*this);
+ operator+=(1);
+ return copy;
+ }
+
+ template <class T, class A>
+ inline batch<T, A> batch<T, A>::operator--(int) noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ batch copy(*this);
+ operator-=(1);
+ return copy;
+ }
+
+ /*************************
+ * batch unary operators *
+ *************************/
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch<T, A>::operator!() const noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::eq<A>(*this, batch(0), A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A> batch<T, A>::operator~() const noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::bitwise_not<A>(*this, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A> batch<T, A>::operator-() const noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return kernel::neg<A>(*this, A {});
+ }
+
+ template <class T, class A>
+ inline batch<T, A> batch<T, A>::operator+() const noexcept
+ {
+ detail::static_check_supported_config<T, A>();
+ return *this;
+ }
+
+ /************************
+ * batch private method *
+ ************************/
+
+ template <class T, class A>
+ inline batch<T, A> batch<T, A>::logical_and(batch<T, A> const& other) const noexcept
+ {
+ return kernel::logical_and<A>(*this, other, A());
+ }
+
+ template <class T, class A>
+ inline batch<T, A> batch<T, A>::logical_or(batch<T, A> const& other) const noexcept
+ {
+ return kernel::logical_or<A>(*this, other, A());
+ }
+
+ /***************************
+ * batch_bool constructors *
+ ***************************/
+
+ template <class T, class A>
+ inline batch_bool<T, A>::batch_bool(register_type reg) noexcept
+ : types::get_bool_simd_register_t<T, A>({ reg })
+ {
+ }
+
+ template <class T, class A>
+ template <class... Ts>
+ inline batch_bool<T, A>::batch_bool(bool val0, bool val1, Ts... vals) noexcept
+ : batch_bool(kernel::set<A>(batch_bool {}, A {}, val0, val1, static_cast<bool>(vals)...))
+ {
+ static_assert(sizeof...(Ts) + 2 == size, "The constructor requires as many arguments as batch elements.");
+ }
+
+ /*******************************
+ * batch_bool memory operators *
+ *******************************/
+
+ template <class T, class A>
+ inline void batch_bool<T, A>::store_aligned(bool* mem) const noexcept
+ {
+ kernel::store(*this, mem, A {});
+ }
+
+ template <class T, class A>
+ inline void batch_bool<T, A>::store_unaligned(bool* mem) const noexcept
+ {
+ store_aligned(mem);
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::load_aligned(bool const* mem) noexcept
+ {
+ batch_type ref(0);
+ alignas(A::alignment()) T buffer[size];
+ for (std::size_t i = 0; i < size; ++i)
+ buffer[i] = mem[i] ? 1 : 0;
+ return ref != batch_type::load_aligned(&buffer[0]);
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::load_unaligned(bool const* mem) noexcept
+ {
+ return load_aligned(mem);
+ }
+
+ /**
+ * Extract a scalar mask representation from this @c batch_bool.
+ *
+ * @return bit mask
+ */
+ template <class T, class A>
+ inline uint64_t batch_bool<T, A>::mask() const noexcept
+ {
+ return kernel::mask(*this, A {});
+ }
+
+ /**
+ * Extract a scalar mask representation from this @c batch_bool.
+ *
+ * @return bit mask
+ */
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::from_mask(uint64_t mask) noexcept
+ {
+ return kernel::from_mask(batch_bool<T, A>(), mask, A {});
+ }
+
+ template <class T, class A>
+ inline bool batch_bool<T, A>::get(std::size_t i) const noexcept
+ {
+ return kernel::get(*this, i, A {});
+ }
+
+ /***********************************
+ * batch_bool comparison operators *
+ ***********************************/
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::operator==(batch_bool<T, A> const& other) const noexcept
+ {
+ return kernel::eq<A>(*this, other, A {}).data;
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::operator!=(batch_bool<T, A> const& other) const noexcept
+ {
+ return kernel::neq<A>(*this, other, A {}).data;
+ }
+
+ /********************************
+ * batch_bool logical operators *
+ ********************************/
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::operator~() const noexcept
+ {
+ return kernel::bitwise_not<A>(*this, A {}).data;
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::operator!() const noexcept
+ {
+ return operator==(batch_bool(false));
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::operator&(batch_bool<T, A> const& other) const noexcept
+ {
+ return kernel::bitwise_and<A>(*this, other, A {}).data;
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::operator|(batch_bool<T, A> const& other) const noexcept
+ {
+ return kernel::bitwise_or<A>(*this, other, A {}).data;
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::operator^(batch_bool<T, A> const& other) const noexcept
+ {
+ return kernel::bitwise_xor<A>(*this, other, A {}).data;
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::operator&&(batch_bool const& other) const noexcept
+ {
+ return operator&(other);
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch_bool<T, A>::operator||(batch_bool const& other) const noexcept
+ {
+ return operator|(other);
+ }
+
+ /******************************
+ * batch_bool private methods *
+ ******************************/
+
+ template <class T, class A>
+ inline batch_bool<T, A>::batch_bool(bool val) noexcept
+ : base_type { make_register(detail::make_index_sequence<size - 1>(), val) }
+ {
+ }
+
+ template <class T, class A>
+ template <class U, class... V, size_t I, size_t... Is>
+ inline auto batch_bool<T, A>::make_register(detail::index_sequence<I, Is...>, U u, V... v) noexcept -> register_type
+ {
+ return make_register(detail::index_sequence<Is...>(), u, u, v...);
+ }
+
+ template <class T, class A>
+ template <class... V>
+ inline auto batch_bool<T, A>::make_register(detail::index_sequence<>, V... v) noexcept -> register_type
+ {
+ return kernel::set<A>(batch_bool<T, A>(), A {}, v...).data;
+ }
+
+ /*******************************
+ * batch<complex> constructors *
+ *******************************/
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>::batch(value_type const& val) noexcept
+ : m_real(val.real())
+ , m_imag(val.imag())
+ {
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>::batch(real_batch const& real, real_batch const& imag) noexcept
+ : m_real(real)
+ , m_imag(imag)
+ {
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>::batch(real_batch const& real) noexcept
+ : m_real(real)
+ , m_imag(0)
+ {
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>::batch(T val) noexcept
+ : m_real(val)
+ , m_imag(0)
+ {
+ }
+
+ template <class T, class A>
+ template <class... Ts>
+ inline batch<std::complex<T>, A>::batch(value_type val0, value_type val1, Ts... vals) noexcept
+ : batch(kernel::set<A>(batch {}, A {}, val0, val1, static_cast<value_type>(vals)...))
+ {
+ static_assert(sizeof...(Ts) + 2 == size, "as many arguments as batch elements");
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>::batch(batch_bool_type const& b) noexcept
+ : m_real(b)
+ , m_imag(0)
+ {
+ }
+
+ template <class T, class A>
+ template <class U>
+ XSIMD_NO_DISCARD inline batch<std::complex<T>, A> batch<std::complex<T>, A>::broadcast(U val) noexcept
+ {
+ return batch(static_cast<std::complex<T>>(val));
+ }
+
+ /***********************************
+ * batch<complex> memory operators *
+ ***********************************/
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_aligned(const T* real_src, const T* imag_src) noexcept
+ {
+ return { batch<T, A>::load_aligned(real_src), imag_src ? batch<T, A>::load_aligned(imag_src) : batch<T, A>(0) };
+ }
+ template <class T, class A>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_unaligned(const T* real_src, const T* imag_src) noexcept
+ {
+ return { batch<T, A>::load_unaligned(real_src), imag_src ? batch<T, A>::load_unaligned(imag_src) : batch<T, A>(0) };
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_aligned(const value_type* src) noexcept
+ {
+ assert(((reinterpret_cast<uintptr_t>(src) % A::alignment()) == 0)
+ && "loaded pointer is not properly aligned");
+ return kernel::load_complex_aligned<A>(src, kernel::convert<value_type> {}, A {});
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_unaligned(const value_type* src) noexcept
+ {
+ return kernel::load_complex_unaligned<A>(src, kernel::convert<value_type> {}, A {});
+ }
+
+ template <class T, class A>
+ inline void batch<std::complex<T>, A>::store_aligned(value_type* dst) const noexcept
+ {
+ assert(((reinterpret_cast<uintptr_t>(dst) % A::alignment()) == 0)
+ && "store location is not properly aligned");
+ return kernel::store_complex_aligned(dst, *this, A {});
+ }
+
+ template <class T, class A>
+ inline void batch<std::complex<T>, A>::store_unaligned(value_type* dst) const noexcept
+ {
+ return kernel::store_complex_unaligned(dst, *this, A {});
+ }
+
+ template <class T, class A>
+ inline void batch<std::complex<T>, A>::store_aligned(T* real_dst, T* imag_dst) const noexcept
+ {
+ m_real.store_aligned(real_dst);
+ m_imag.store_aligned(imag_dst);
+ }
+
+ template <class T, class A>
+ inline void batch<std::complex<T>, A>::store_unaligned(T* real_dst, T* imag_dst) const noexcept
+ {
+ m_real.store_unaligned(real_dst);
+ m_imag.store_unaligned(imag_dst);
+ }
+
+ template <class T, class A>
+ template <class U>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load(U const* mem, aligned_mode) noexcept
+ {
+ return load_aligned(mem);
+ }
+
+ template <class T, class A>
+ template <class U>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load(U const* mem, unaligned_mode) noexcept
+ {
+ return load_unaligned(mem);
+ }
+
+ template <class T, class A>
+ template <class U>
+ inline void batch<std::complex<T>, A>::store(U* mem, aligned_mode) const noexcept
+ {
+ return store_aligned(mem);
+ }
+
+ template <class T, class A>
+ template <class U>
+ inline void batch<std::complex<T>, A>::store(U* mem, unaligned_mode) const noexcept
+ {
+ return store_unaligned(mem);
+ }
+
+ template <class T, class A>
+ inline auto batch<std::complex<T>, A>::real() const noexcept -> real_batch
+ {
+ return m_real;
+ }
+
+ template <class T, class A>
+ inline auto batch<std::complex<T>, A>::imag() const noexcept -> real_batch
+ {
+ return m_imag;
+ }
+
+ template <class T, class A>
+ inline auto batch<std::complex<T>, A>::get(std::size_t i) const noexcept -> value_type
+ {
+ return kernel::get(*this, i, A {});
+ }
+
+ /**************************************
+ * batch<complex> xtl-related methods *
+ **************************************/
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+
+ template <class T, class A>
+ template <bool i3ec>
+ inline batch<std::complex<T>, A>::batch(xtl::xcomplex<T, T, i3ec> const& val) noexcept
+ : m_real(val.real())
+ , m_imag(val.imag())
+ {
+ }
+
+ template <class T, class A>
+ template <bool i3ec, class... Ts>
+ inline batch<std::complex<T>, A>::batch(xtl::xcomplex<T, T, i3ec> val0, xtl::xcomplex<T, T, i3ec> val1, Ts... vals) noexcept
+ : batch(kernel::set<A>(batch {}, A {}, val0, val1, static_cast<xtl::xcomplex<T, T, i3ec>>(vals)...))
+ {
+ static_assert(sizeof...(Ts) + 2 == size, "as many arguments as batch elements");
+ }
+
+ // Memory layout of an xcomplex and std::complex are the same when xcomplex
+ // stores values and not reference. Unfortunately, this breaks strict
+ // aliasing...
+
+ template <class T, class A>
+ template <bool i3ec>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_aligned(const xtl::xcomplex<T, T, i3ec>* src) noexcept
+ {
+ return load_aligned(reinterpret_cast<std::complex<T> const*>(src));
+ }
+
+ template <class T, class A>
+ template <bool i3ec>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::load_unaligned(const xtl::xcomplex<T, T, i3ec>* src) noexcept
+ {
+ return load_unaligned(reinterpret_cast<std::complex<T> const*>(src));
+ }
+
+ template <class T, class A>
+ template <bool i3ec>
+ inline void batch<std::complex<T>, A>::store_aligned(xtl::xcomplex<T, T, i3ec>* dst) const noexcept
+ {
+ store_aligned(reinterpret_cast<std::complex<T>*>(dst));
+ }
+
+ template <class T, class A>
+ template <bool i3ec>
+ inline void batch<std::complex<T>, A>::store_unaligned(xtl::xcomplex<T, T, i3ec>* dst) const noexcept
+ {
+ store_unaligned(reinterpret_cast<std::complex<T>*>(dst));
+ }
+
+#endif
+
+ /***************************************
+ * batch<complex> comparison operators *
+ ***************************************/
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch<std::complex<T>, A>::operator==(batch const& other) const noexcept
+ {
+ return m_real == other.m_real && m_imag == other.m_imag;
+ }
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch<std::complex<T>, A>::operator!=(batch const& other) const noexcept
+ {
+ return m_real != other.m_real || m_imag != other.m_imag;
+ }
+
+ /***********************************
+ * batch<complex> update operators *
+ ***********************************/
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator+=(batch const& other) noexcept
+ {
+ m_real += other.m_real;
+ m_imag += other.m_imag;
+ return *this;
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator-=(batch const& other) noexcept
+ {
+ m_real -= other.m_real;
+ m_imag -= other.m_imag;
+ return *this;
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator*=(batch const& other) noexcept
+ {
+ real_batch new_real = real() * other.real() - imag() * other.imag();
+ real_batch new_imag = real() * other.imag() + imag() * other.real();
+ m_real = new_real;
+ m_imag = new_imag;
+ return *this;
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator/=(batch const& other) noexcept
+ {
+ real_batch a = real();
+ real_batch b = imag();
+ real_batch c = other.real();
+ real_batch d = other.imag();
+ real_batch e = c * c + d * d;
+ m_real = (c * a + d * b) / e;
+ m_imag = (c * b - d * a) / e;
+ return *this;
+ }
+
+ /**************************************
+ * batch<complex> incr/decr operators *
+ **************************************/
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator++() noexcept
+ {
+ return operator+=(1);
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A>& batch<std::complex<T>, A>::operator--() noexcept
+ {
+ return operator-=(1);
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator++(int) noexcept
+ {
+ batch copy(*this);
+ operator+=(1);
+ return copy;
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator--(int) noexcept
+ {
+ batch copy(*this);
+ operator-=(1);
+ return copy;
+ }
+
+ /**********************************
+ * batch<complex> unary operators *
+ **********************************/
+
+ template <class T, class A>
+ inline batch_bool<T, A> batch<std::complex<T>, A>::operator!() const noexcept
+ {
+ return operator==(batch(0));
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator~() const noexcept
+ {
+ return { ~m_real, ~m_imag };
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator-() const noexcept
+ {
+ return { -m_real, -m_imag };
+ }
+
+ template <class T, class A>
+ inline batch<std::complex<T>, A> batch<std::complex<T>, A>::operator+() const noexcept
+ {
+ return { +m_real, +m_imag };
+ }
+
+ /**********************************
+ * size type aliases
+ **********************************/
+
+ namespace details
+ {
+ template <typename T, std::size_t N, class ArchList>
+ struct sized_batch;
+
+ template <typename T, std::size_t N>
+ struct sized_batch<T, N, xsimd::arch_list<>>
+ {
+ using type = void;
+ };
+
+ template <typename T, class Arch, bool BatchExists = xsimd::types::has_simd_register<T, Arch>::value>
+ struct batch_trait;
+
+ template <typename T, class Arch>
+ struct batch_trait<T, Arch, true>
+ {
+ using type = xsimd::batch<T, Arch>;
+ static constexpr std::size_t size = xsimd::batch<T, Arch>::size;
+ };
+
+ template <typename T, class Arch>
+ struct batch_trait<T, Arch, false>
+ {
+ using type = void;
+ static constexpr std::size_t size = 0;
+ };
+
+ template <typename T, std::size_t N, class Arch, class... Archs>
+ struct sized_batch<T, N, xsimd::arch_list<Arch, Archs...>>
+ {
+ using type = typename std::conditional<
+ batch_trait<T, Arch>::size == N,
+ typename batch_trait<T, Arch>::type,
+ typename sized_batch<T, N, xsimd::arch_list<Archs...>>::type>::type;
+ };
+ }
+
+ /**
+ * @brief type utility to select a batch of given type and size
+ *
+ * If one of the available architectures has a native vector type of the
+ * given type and size, sets the @p type member to the appropriate batch
+ * type. Otherwise set its to @p void.
+ *
+ * @tparam T the type of the underlying values.
+ * @tparam N the number of elements of that type in the batch.
+ **/
+ template <typename T, std::size_t N>
+ struct make_sized_batch
+ {
+ using type = typename details::sized_batch<T, N, supported_architectures>::type;
+ };
+
+ template <typename T, std::size_t N>
+ using make_sized_batch_t = typename make_sized_batch<T, N>::type;
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_batch_constant.hpp b/third_party/xsimd/include/xsimd/types/xsimd_batch_constant.hpp
new file mode 100644
index 0000000000..0de9c8ad42
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_batch_constant.hpp
@@ -0,0 +1,288 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_BATCH_CONSTANT_HPP
+#define XSIMD_BATCH_CONSTANT_HPP
+
+#include "./xsimd_batch.hpp"
+#include "./xsimd_utils.hpp"
+
+namespace xsimd
+{
+ /**
+ * @brief batch of boolean constant
+ *
+ * Abstract representation of a batch of boolean constants.
+ *
+ * @tparam batch_type the type of the associated batch values.
+ * @tparam Values boolean constant represented by this batch
+ **/
+ template <class batch_type, bool... Values>
+ struct batch_bool_constant
+ {
+
+ public:
+ static constexpr std::size_t size = sizeof...(Values);
+ using arch_type = typename batch_type::arch_type;
+ using value_type = bool;
+ static_assert(sizeof...(Values) == batch_type::size, "consistent batch size");
+
+ constexpr operator batch_bool<typename batch_type::value_type, arch_type>() const noexcept { return { Values... }; }
+
+ constexpr bool get(size_t i) const noexcept
+ {
+ return std::array<value_type, size> { { Values... } }[i];
+ }
+
+ static constexpr int mask() noexcept
+ {
+ return mask_helper(0, static_cast<int>(Values)...);
+ }
+
+ private:
+ static constexpr int mask_helper(int acc) noexcept { return acc; }
+
+ template <class... Tys>
+ static constexpr int mask_helper(int acc, int mask, Tys... masks) noexcept
+ {
+ return mask_helper(acc | mask, (masks << 1)...);
+ }
+
+ struct logical_or
+ {
+ constexpr bool operator()(bool x, bool y) const { return x || y; }
+ };
+ struct logical_and
+ {
+ constexpr bool operator()(bool x, bool y) const { return x && y; }
+ };
+ struct logical_xor
+ {
+ constexpr bool operator()(bool x, bool y) const { return x ^ y; }
+ };
+
+ template <class F, class SelfPack, class OtherPack, size_t... Indices>
+ static constexpr batch_bool_constant<batch_type, F()(std::tuple_element<Indices, SelfPack>::type::value, std::tuple_element<Indices, OtherPack>::type::value)...>
+ apply(detail::index_sequence<Indices...>)
+ {
+ return {};
+ }
+
+ template <class F, bool... OtherValues>
+ static constexpr auto apply(batch_bool_constant<batch_type, Values...>, batch_bool_constant<batch_type, OtherValues...>)
+ -> decltype(apply<F, std::tuple<std::integral_constant<bool, Values>...>, std::tuple<std::integral_constant<bool, OtherValues>...>>(detail::make_index_sequence<sizeof...(Values)>()))
+ {
+ static_assert(sizeof...(Values) == sizeof...(OtherValues), "compatible constant batches");
+ return apply<F, std::tuple<std::integral_constant<bool, Values>...>, std::tuple<std::integral_constant<bool, OtherValues>...>>(detail::make_index_sequence<sizeof...(Values)>());
+ }
+
+ public:
+#define MAKE_BINARY_OP(OP, NAME) \
+ template <bool... OtherValues> \
+ constexpr auto operator OP(batch_bool_constant<batch_type, OtherValues...> other) const \
+ -> decltype(apply<NAME>(*this, other)) \
+ { \
+ return apply<NAME>(*this, other); \
+ }
+
+ MAKE_BINARY_OP(|, logical_or)
+ MAKE_BINARY_OP(||, logical_or)
+ MAKE_BINARY_OP(&, logical_and)
+ MAKE_BINARY_OP(&&, logical_and)
+ MAKE_BINARY_OP(^, logical_xor)
+
+#undef MAKE_BINARY_OP
+
+ constexpr batch_bool_constant<batch_type, !Values...> operator!() const
+ {
+ return {};
+ }
+
+ constexpr batch_bool_constant<batch_type, !Values...> operator~() const
+ {
+ return {};
+ }
+ };
+
+ /**
+ * @brief batch of integral constants
+ *
+ * Abstract representation of a batch of integral constants.
+ *
+ * @tparam batch_type the type of the associated batch values.
+ * @tparam Values constants represented by this batch
+ **/
+ template <class batch_type, typename batch_type::value_type... Values>
+ struct batch_constant
+ {
+ static constexpr std::size_t size = sizeof...(Values);
+ using arch_type = typename batch_type::arch_type;
+ using value_type = typename batch_type::value_type;
+ static_assert(sizeof...(Values) == batch_type::size, "consistent batch size");
+
+ /**
+ * @brief Generate a batch of @p batch_type from this @p batch_constant
+ */
+ inline operator batch_type() const noexcept { return { Values... }; }
+
+ /**
+ * @brief Get the @p i th element of this @p batch_constant
+ */
+ constexpr value_type get(size_t i) const noexcept
+ {
+ return get(i, std::array<value_type, size> { Values... });
+ }
+
+ private:
+ constexpr value_type get(size_t i, std::array<value_type, size> const& values) const noexcept
+ {
+ return values[i];
+ }
+
+ struct arithmetic_add
+ {
+ constexpr value_type operator()(value_type x, value_type y) const { return x + y; }
+ };
+ struct arithmetic_sub
+ {
+ constexpr value_type operator()(value_type x, value_type y) const { return x - y; }
+ };
+ struct arithmetic_mul
+ {
+ constexpr value_type operator()(value_type x, value_type y) const { return x * y; }
+ };
+ struct arithmetic_div
+ {
+ constexpr value_type operator()(value_type x, value_type y) const { return x / y; }
+ };
+ struct arithmetic_mod
+ {
+ constexpr value_type operator()(value_type x, value_type y) const { return x % y; }
+ };
+ struct binary_and
+ {
+ constexpr value_type operator()(value_type x, value_type y) const { return x & y; }
+ };
+ struct binary_or
+ {
+ constexpr value_type operator()(value_type x, value_type y) const { return x | y; }
+ };
+ struct binary_xor
+ {
+ constexpr value_type operator()(value_type x, value_type y) const { return x ^ y; }
+ };
+
+ template <class F, class SelfPack, class OtherPack, size_t... Indices>
+ static constexpr batch_constant<batch_type, F()(std::tuple_element<Indices, SelfPack>::type::value, std::tuple_element<Indices, OtherPack>::type::value)...>
+ apply(detail::index_sequence<Indices...>)
+ {
+ return {};
+ }
+
+ template <class F, value_type... OtherValues>
+ static constexpr auto apply(batch_constant<batch_type, Values...>, batch_constant<batch_type, OtherValues...>)
+ -> decltype(apply<F, std::tuple<std::integral_constant<value_type, Values>...>, std::tuple<std::integral_constant<value_type, OtherValues>...>>(detail::make_index_sequence<sizeof...(Values)>()))
+ {
+ static_assert(sizeof...(Values) == sizeof...(OtherValues), "compatible constant batches");
+ return apply<F, std::tuple<std::integral_constant<value_type, Values>...>, std::tuple<std::integral_constant<value_type, OtherValues>...>>(detail::make_index_sequence<sizeof...(Values)>());
+ }
+
+ public:
+#define MAKE_BINARY_OP(OP, NAME) \
+ template <value_type... OtherValues> \
+ constexpr auto operator OP(batch_constant<batch_type, OtherValues...> other) const \
+ -> decltype(apply<NAME>(*this, other)) \
+ { \
+ return apply<NAME>(*this, other); \
+ }
+
+ MAKE_BINARY_OP(+, arithmetic_add)
+ MAKE_BINARY_OP(-, arithmetic_sub)
+ MAKE_BINARY_OP(*, arithmetic_mul)
+ MAKE_BINARY_OP(/, arithmetic_div)
+ MAKE_BINARY_OP(%, arithmetic_mod)
+ MAKE_BINARY_OP(&, binary_and)
+ MAKE_BINARY_OP(|, binary_or)
+ MAKE_BINARY_OP(^, binary_xor)
+
+#undef MAKE_BINARY_OP
+
+ constexpr batch_constant<batch_type, (value_type)-Values...> operator-() const
+ {
+ return {};
+ }
+
+ constexpr batch_constant<batch_type, (value_type) + Values...> operator+() const
+ {
+ return {};
+ }
+
+ constexpr batch_constant<batch_type, (value_type)~Values...> operator~() const
+ {
+ return {};
+ }
+ };
+
+ namespace detail
+ {
+ template <class batch_type, class G, std::size_t... Is>
+ inline constexpr auto make_batch_constant(detail::index_sequence<Is...>) noexcept
+ -> batch_constant<batch_type, (typename batch_type::value_type)G::get(Is, sizeof...(Is))...>
+ {
+ return {};
+ }
+ template <class batch_type, class G, std::size_t... Is>
+ inline constexpr auto make_batch_bool_constant(detail::index_sequence<Is...>) noexcept
+ -> batch_bool_constant<batch_type, G::get(Is, sizeof...(Is))...>
+ {
+ return {};
+ }
+
+ } // namespace detail
+
+ /**
+ * @brief Build a @c batch_constant out of a generator function
+ *
+ * @tparam batch_type type of the (non-constant) batch to build
+ * @tparam G type used to generate that batch. That type must have a static
+ * member @c get that's used to generate the batch constant. Conversely, the
+ * generated batch_constant has value `{G::get(0, batch_size), ... , G::get(batch_size - 1, batch_size)}`
+ *
+ * The following generator produces a batch of `(n - 1, 0, 1, ... n-2)`
+ *
+ * @code
+ * struct Rot
+ * {
+ * static constexpr unsigned get(unsigned i, unsigned n)
+ * {
+ * return (i + n - 1) % n;
+ * }
+ * };
+ * @endcode
+ */
+ template <class batch_type, class G>
+ inline constexpr auto make_batch_constant() noexcept -> decltype(detail::make_batch_constant<batch_type, G>(detail::make_index_sequence<batch_type::size>()))
+ {
+ return detail::make_batch_constant<batch_type, G>(detail::make_index_sequence<batch_type::size>());
+ }
+
+ template <class batch_type, class G>
+ inline constexpr auto make_batch_bool_constant() noexcept
+ -> decltype(detail::make_batch_bool_constant<batch_type, G>(
+ detail::make_index_sequence<batch_type::size>()))
+ {
+ return detail::make_batch_bool_constant<batch_type, G>(
+ detail::make_index_sequence<batch_type::size>());
+ }
+
+} // namespace xsimd
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp
new file mode 100644
index 0000000000..b9a5995414
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp
@@ -0,0 +1,46 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_FMA3_AVX2_REGISTER_HPP
+#define XSIMD_FMA3_AVX2_REGISTER_HPP
+
+#include "./xsimd_avx2_register.hpp"
+
+namespace xsimd
+{
+ template <typename arch>
+ struct fma3;
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX2 + FMA instructions
+ */
+ template <>
+ struct fma3<avx2> : avx2
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_FMA3_AVX2; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(2, 2, 1); }
+ static constexpr char const* name() noexcept { return "fma3+avx2"; }
+ };
+
+#if XSIMD_WITH_FMA3_AVX2
+ namespace types
+ {
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(fma3<avx2>, avx2);
+
+ }
+#endif
+
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp
new file mode 100644
index 0000000000..ae10598f2c
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp
@@ -0,0 +1,46 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_FMA3_AVX_REGISTER_HPP
+#define XSIMD_FMA3_AVX_REGISTER_HPP
+
+#include "./xsimd_avx_register.hpp"
+
+namespace xsimd
+{
+ template <typename arch>
+ struct fma3;
+
+ /**
+ * @ingroup architectures
+ *
+ * AVX + FMA instructions
+ */
+ template <>
+ struct fma3<avx> : avx
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_FMA3_AVX; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(2, 1, 1); }
+ static constexpr char const* name() noexcept { return "fma3+avx"; }
+ };
+
+#if XSIMD_WITH_FMA3_AVX
+ namespace types
+ {
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(fma3<avx>, avx);
+
+ }
+#endif
+
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp
new file mode 100644
index 0000000000..a267490d66
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp
@@ -0,0 +1,46 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_FMA3_SSE_REGISTER_HPP
+#define XSIMD_FMA3_SSE_REGISTER_HPP
+
+#include "./xsimd_sse4_2_register.hpp"
+
+namespace xsimd
+{
+ template <typename arch>
+ struct fma3;
+
+ /**
+ * @ingroup architectures
+ *
+ * SSE4.2 + FMA instructions
+ */
+ template <>
+ struct fma3<sse4_2> : sse4_2
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_FMA3_SSE; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(1, 4, 3); }
+ static constexpr char const* name() noexcept { return "fma3+sse4.2"; }
+ };
+
+#if XSIMD_WITH_FMA3_SSE
+ namespace types
+ {
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(fma3<sse4_2>, sse4_2);
+
+ }
+#endif
+
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_fma4_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_fma4_register.hpp
new file mode 100644
index 0000000000..3684bbb401
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_fma4_register.hpp
@@ -0,0 +1,42 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_FMA4_REGISTER_HPP
+#define XSIMD_FMA4_REGISTER_HPP
+
+#include "./xsimd_sse4_2_register.hpp"
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * SSE4.2 + FMA4 instructions
+ */
+ struct fma4 : sse4_2
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_FMA4; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(1, 4, 4); }
+ static constexpr char const* name() noexcept { return "fma4"; }
+ };
+
+#if XSIMD_WITH_FMA4
+ namespace types
+ {
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(fma4, sse4_2);
+
+ }
+#endif
+
+}
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp b/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp
new file mode 100644
index 0000000000..f4a2ca6aad
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_generic_arch.hpp
@@ -0,0 +1,52 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_GENERIC_ARCH_HPP
+#define XSIMD_GENERIC_ARCH_HPP
+
+#include "../config/xsimd_config.hpp"
+
+/**
+ * @defgroup architectures Architecture description
+ * */
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * Base class for all architectures.
+ */
+ struct generic
+ {
+ /// Whether this architecture is supported at compile-time.
+ static constexpr bool supported() noexcept { return true; }
+ /// Whether this architecture is available at run-time.
+ static constexpr bool available() noexcept { return true; }
+ /// If this architectures supports aligned memory accesses, the required
+ /// alignment.
+ static constexpr std::size_t alignment() noexcept { return 0; }
+ /// Whether this architecture requires aligned memory access.
+ static constexpr bool requires_alignment() noexcept { return false; }
+ /// Unique identifier for this architecture.
+ static constexpr unsigned version() noexcept { return generic::version(0, 0, 0); }
+ /// Name of the architecture.
+ static constexpr char const* name() noexcept { return "generic"; }
+
+ protected:
+ static constexpr unsigned version(unsigned major, unsigned minor, unsigned patch, unsigned multiplier = 100u) noexcept { return major * multiplier * multiplier + minor * multiplier + patch; }
+ };
+
+ struct unsupported
+ {
+ };
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_neon64_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_neon64_register.hpp
new file mode 100644
index 0000000000..3aa8973b63
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_neon64_register.hpp
@@ -0,0 +1,52 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_NEON64_REGISTER_HPP
+#define XSIMD_NEON64_REGISTER_HPP
+
+#include "xsimd_neon_register.hpp"
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * NEON instructions for arm64
+ */
+ struct neon64 : neon
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_NEON64; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr bool requires_alignment() noexcept { return true; }
+ static constexpr std::size_t alignment() noexcept { return 16; }
+ static constexpr unsigned version() noexcept { return generic::version(8, 1, 0); }
+ static constexpr char const* name() noexcept { return "arm64+neon"; }
+ };
+
+#if XSIMD_WITH_NEON64
+
+ namespace types
+ {
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(neon64, neon);
+ XSIMD_DECLARE_SIMD_REGISTER(double, neon64, float64x2_t);
+
+ template <class T>
+ struct get_bool_simd_register<T, neon64>
+ : detail::neon_bool_simd_register<T, neon64>
+ {
+ };
+ }
+
+#endif
+
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_neon_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_neon_register.hpp
new file mode 100644
index 0000000000..0ef4b381d3
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_neon_register.hpp
@@ -0,0 +1,155 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_NEON_REGISTER_HPP
+#define XSIMD_NEON_REGISTER_HPP
+
+#include "xsimd_generic_arch.hpp"
+#include "xsimd_register.hpp"
+
+#if XSIMD_WITH_NEON
+#include <arm_neon.h>
+#endif
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * NEON instructions for arm32
+ */
+ struct neon : generic
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_NEON; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr bool requires_alignment() noexcept { return true; }
+ static constexpr std::size_t alignment() noexcept { return 16; }
+ static constexpr unsigned version() noexcept { return generic::version(7, 0, 0); }
+ static constexpr char const* name() noexcept { return "arm32+neon"; }
+ };
+
+#if XSIMD_WITH_NEON
+ namespace types
+ {
+ namespace detail
+ {
+ template <size_t S>
+ struct neon_vector_type_impl;
+
+ template <>
+ struct neon_vector_type_impl<8>
+ {
+ using signed_type = int8x16_t;
+ using unsigned_type = uint8x16_t;
+ };
+
+ template <>
+ struct neon_vector_type_impl<16>
+ {
+ using signed_type = int16x8_t;
+ using unsigned_type = uint16x8_t;
+ };
+
+ template <>
+ struct neon_vector_type_impl<32>
+ {
+ using signed_type = int32x4_t;
+ using unsigned_type = uint32x4_t;
+ };
+
+ template <>
+ struct neon_vector_type_impl<64>
+ {
+ using signed_type = int64x2_t;
+ using unsigned_type = uint64x2_t;
+ };
+
+ template <class T>
+ using signed_neon_vector_type = typename neon_vector_type_impl<8 * sizeof(T)>::signed_type;
+
+ template <class T>
+ using unsigned_neon_vector_type = typename neon_vector_type_impl<8 * sizeof(T)>::unsigned_type;
+
+ template <class T>
+ using neon_vector_type = typename std::conditional<std::is_signed<T>::value,
+ signed_neon_vector_type<T>,
+ unsigned_neon_vector_type<T>>::type;
+
+ using char_neon_vector_type = typename std::conditional<std::is_signed<char>::value,
+ signed_neon_vector_type<char>,
+ unsigned_neon_vector_type<char>>::type;
+ }
+
+ XSIMD_DECLARE_SIMD_REGISTER(signed char, neon, detail::neon_vector_type<signed char>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned char, neon, detail::neon_vector_type<unsigned char>);
+ XSIMD_DECLARE_SIMD_REGISTER(char, neon, detail::char_neon_vector_type);
+ XSIMD_DECLARE_SIMD_REGISTER(short, neon, detail::neon_vector_type<short>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned short, neon, detail::neon_vector_type<unsigned short>);
+ XSIMD_DECLARE_SIMD_REGISTER(int, neon, detail::neon_vector_type<int>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned int, neon, detail::neon_vector_type<unsigned int>);
+ XSIMD_DECLARE_SIMD_REGISTER(long int, neon, detail::neon_vector_type<long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, neon, detail::neon_vector_type<unsigned long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(long long int, neon, detail::neon_vector_type<long long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, neon, detail::neon_vector_type<unsigned long long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(float, neon, float32x4_t);
+ XSIMD_DECLARE_INVALID_SIMD_REGISTER(double, neon);
+
+ namespace detail
+ {
+ template <size_t S>
+ struct get_unsigned_type;
+
+ template <>
+ struct get_unsigned_type<1>
+ {
+ using type = uint8_t;
+ };
+
+ template <>
+ struct get_unsigned_type<2>
+ {
+ using type = uint16_t;
+ };
+
+ template <>
+ struct get_unsigned_type<4>
+ {
+ using type = uint32_t;
+ };
+
+ template <>
+ struct get_unsigned_type<8>
+ {
+ using type = uint64_t;
+ };
+
+ template <size_t S>
+ using get_unsigned_type_t = typename get_unsigned_type<S>::type;
+
+ template <class T, class A>
+ struct neon_bool_simd_register
+ {
+ using type = simd_register<get_unsigned_type_t<sizeof(T)>, A>;
+ };
+ }
+
+ template <class T>
+ struct get_bool_simd_register<T, neon>
+ : detail::neon_bool_simd_register<T, neon>
+ {
+ };
+
+ }
+#endif
+
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_register.hpp
new file mode 100644
index 0000000000..4fe4f3f13f
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_register.hpp
@@ -0,0 +1,94 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_REGISTER_HPP
+#define XSIMD_REGISTER_HPP
+
+#include <type_traits>
+
+namespace xsimd
+{
+ namespace types
+ {
+ template <class T, class A>
+ struct has_simd_register : std::false_type
+ {
+ };
+
+ template <class T, class Arch>
+ struct simd_register
+ {
+ struct register_type
+ {
+ };
+ };
+
+#define XSIMD_DECLARE_SIMD_REGISTER(SCALAR_TYPE, ISA, VECTOR_TYPE) \
+ template <> \
+ struct simd_register<SCALAR_TYPE, ISA> \
+ { \
+ using register_type = VECTOR_TYPE; \
+ register_type data; \
+ inline operator register_type() const noexcept \
+ { \
+ return data; \
+ } \
+ }; \
+ template <> \
+ struct has_simd_register<SCALAR_TYPE, ISA> : std::true_type \
+ { \
+ }
+
+#define XSIMD_DECLARE_INVALID_SIMD_REGISTER(SCALAR_TYPE, ISA) \
+ template <> \
+ struct has_simd_register<SCALAR_TYPE, ISA> : std::false_type \
+ { \
+ }
+
+#define XSIMD_DECLARE_SIMD_REGISTER_ALIAS(ISA, ISA_BASE) \
+ template <class T> \
+ struct simd_register<T, ISA> : simd_register<T, ISA_BASE> \
+ { \
+ using register_type = typename simd_register<T, ISA_BASE>::register_type; \
+ simd_register(register_type reg) noexcept \
+ : simd_register<T, ISA_BASE> { reg } \
+ { \
+ } \
+ simd_register() = default; \
+ }; \
+ template <class T> \
+ struct has_simd_register<T, ISA> : has_simd_register<T, ISA_BASE> \
+ { \
+ }
+
+ template <class T, class Arch>
+ struct get_bool_simd_register
+ {
+ using type = simd_register<T, Arch>;
+ };
+
+ template <class T, class Arch>
+ using get_bool_simd_register_t = typename get_bool_simd_register<T, Arch>::type;
+ }
+
+ namespace kernel
+ {
+ template <class A>
+ // makes requires_arch equal to A const&, using type_traits functions
+ using requires_arch = typename std::add_lvalue_reference<typename std::add_const<A>::type>::type;
+ template <class T>
+ struct convert
+ {
+ };
+ }
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_rvv_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_rvv_register.hpp
new file mode 100644
index 0000000000..bdc0ef3b87
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_rvv_register.hpp
@@ -0,0 +1,419 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * Copyright (c) Yibo Cai *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_RVV_REGISTER_HPP
+#define XSIMD_RVV_REGISTER_HPP
+
+#include "xsimd_generic_arch.hpp"
+#include "xsimd_register.hpp"
+
+#if XSIMD_WITH_RVV
+#include <riscv_vector.h>
+#endif
+
+namespace xsimd
+{
+ namespace detail
+ {
+ /**
+ * @ingroup architectures
+ *
+ * RVV instructions (fixed vector size) for riscv
+ */
+ template <size_t Width>
+ struct rvv : xsimd::generic
+ {
+ static constexpr size_t width = Width;
+ static constexpr bool supported() noexcept { return Width == XSIMD_RVV_BITS; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr bool requires_alignment() noexcept { return true; }
+ static constexpr std::size_t alignment() noexcept { return 16; }
+ static constexpr unsigned version() noexcept { return generic::version(1, 0, 0, /*multiplier=*/1000); }
+ static constexpr char const* name() noexcept { return "riscv+rvv"; }
+ };
+ }
+
+#if XSIMD_WITH_RVV
+
+ using rvv = detail::rvv<__riscv_v_fixed_vlen>;
+
+#define XSIMD_RVV_JOINT_(a, b, c) a##b##c
+#define XSIMD_RVV_JOINT(a, b, c) XSIMD_RVV_JOINT_(a, b, c)
+#define XSIMD_RVV_JOINT5(a, b, c, d, e) XSIMD_RVV_JOINT(XSIMD_RVV_JOINT(a, b, c), d, e)
+
+#define XSIMD_RVV_TYPE_i(S, V) XSIMD_RVV_JOINT5(vint, S, m, V, _t)
+#define XSIMD_RVV_TYPE_u(S, V) XSIMD_RVV_JOINT5(vuint, S, m, V, _t)
+#define XSIMD_RVV_TYPE_f(S, V) XSIMD_RVV_JOINT5(vfloat, S, m, V, _t)
+#define XSIMD_RVV_TYPE(T, S, V) XSIMD_RVV_JOINT(XSIMD_RVV_TYPE, _, T)(S, V)
+
+ namespace types
+ {
+ namespace detail
+ {
+ static constexpr size_t rvv_width_mf8 = XSIMD_RVV_BITS / 8;
+ static constexpr size_t rvv_width_mf4 = XSIMD_RVV_BITS / 4;
+ static constexpr size_t rvv_width_mf2 = XSIMD_RVV_BITS / 2;
+ static constexpr size_t rvv_width_m1 = XSIMD_RVV_BITS;
+ static constexpr size_t rvv_width_m2 = XSIMD_RVV_BITS * 2;
+ static constexpr size_t rvv_width_m4 = XSIMD_RVV_BITS * 4;
+ static constexpr size_t rvv_width_m8 = XSIMD_RVV_BITS * 8;
+
+ // rvv_type_info is a utility class to convert scalar type and
+ // bitwidth into rvv register types.
+ //
+ // * `type` is the unadorned vector type.
+ // * `fixed_type` is the same type, but with the storage attribute
+ // applied.
+ // * `byte_type` is the type which is the same size in unsigned
+ // bytes, used as an intermediate step for bit-cast operations,
+ // because only a subset of __riscv_vreinterpret() intrinsics
+ // exist -- but always enough to get us to bytes and back.
+ //
+ template <class T, size_t Width>
+ struct rvv_type_info;
+#define XSIMD_RVV_MAKE_TYPE(scalar, t, s, vmul) \
+ template <> \
+ struct rvv_type_info<scalar, rvv_width_m1 * vmul> \
+ { \
+ static constexpr size_t width = rvv_width_m1 * vmul; \
+ using type = XSIMD_RVV_TYPE(t, s, vmul); \
+ using byte_type = XSIMD_RVV_TYPE(u, 8, vmul); \
+ using fixed_type = type __attribute__((riscv_rvv_vector_bits(width))); \
+ template <class U> \
+ static inline type bitcast(U x) noexcept \
+ { \
+ const auto words = XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, s, m, vmul)(x); \
+ return XSIMD_RVV_JOINT5(__riscv_vreinterpret_, t, s, m, vmul)(words); \
+ } \
+ template <> \
+ inline type bitcast<type>(type x) noexcept { return x; } \
+ static inline byte_type as_bytes(type x) noexcept \
+ { \
+ const auto words = XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, s, m, vmul)(x); \
+ return XSIMD_RVV_JOINT5(__riscv_vreinterpret_, u, 8, m, vmul)(words); \
+ } \
+ };
+
+#define XSIMD_RVV_MAKE_TYPES(vmul) \
+ XSIMD_RVV_MAKE_TYPE(int8_t, i, 8, vmul) \
+ XSIMD_RVV_MAKE_TYPE(uint8_t, u, 8, vmul) \
+ XSIMD_RVV_MAKE_TYPE(int16_t, i, 16, vmul) \
+ XSIMD_RVV_MAKE_TYPE(uint16_t, u, 16, vmul) \
+ XSIMD_RVV_MAKE_TYPE(int32_t, i, 32, vmul) \
+ XSIMD_RVV_MAKE_TYPE(uint32_t, u, 32, vmul) \
+ XSIMD_RVV_MAKE_TYPE(int64_t, i, 64, vmul) \
+ XSIMD_RVV_MAKE_TYPE(uint64_t, u, 64, vmul) \
+ XSIMD_RVV_MAKE_TYPE(float, f, 32, vmul) \
+ XSIMD_RVV_MAKE_TYPE(double, f, 64, vmul)
+
+ XSIMD_RVV_MAKE_TYPES(8)
+ XSIMD_RVV_MAKE_TYPES(4)
+ XSIMD_RVV_MAKE_TYPES(2)
+ XSIMD_RVV_MAKE_TYPES(1)
+#undef XSIMD_RVV_TYPE
+#undef XSIMD_RVV_TYPE_f
+#undef XSIMD_RVV_TYPE_u
+#undef XSIMD_RVV_TYPE_i
+#undef XSIMD_RVV_MAKE_TYPES
+#undef XSIMD_RVV_MAKE_TYPE
+
+ // rvv_blob is storage-type abstraction for a vector register.
+ template <class T, size_t Width>
+ struct rvv_blob : public rvv_type_info<T, Width>
+ {
+ using super = rvv_type_info<T, Width>;
+ using typename super::fixed_type;
+ using typename super::type;
+
+ fixed_type value;
+ type get() const { return value; }
+ void set(type v) { value = v; }
+ };
+ //
+ // But sometimes we want our storage type to be less than a whole
+ // register, while presenting as a whole register to the outside
+ // world. This is because some partial-register types are not
+ // defined, but they can (mostly) be emulated using shorter vl on a
+ // full-width register for arithmetic, and cast back to a partial
+ // byte register for storage.
+ //
+ template <class T, size_t divisor>
+ struct rvv_semiblob : public rvv_type_info<T, rvv_width_m1>
+ {
+ using super = rvv_type_info<T, rvv_width_m1>;
+ static constexpr size_t width = rvv_width_m1 / divisor;
+ using typename super::type;
+ template <size_t div>
+ struct semitype;
+ template <>
+ struct semitype<2>
+ {
+ using type = vuint8mf2_t __attribute__((riscv_rvv_vector_bits(rvv_width_mf2)));
+ };
+ template <>
+ struct semitype<4>
+ {
+ using type = vuint8mf4_t __attribute__((riscv_rvv_vector_bits(rvv_width_mf4)));
+ };
+ template <>
+ struct semitype<8>
+ {
+ using type = vuint8mf8_t __attribute__((riscv_rvv_vector_bits(rvv_width_mf8)));
+ };
+ using fixed_type = typename semitype<divisor>::type;
+ using super::as_bytes;
+ using super::bitcast;
+
+ fixed_type value;
+ template <size_t div>
+ vuint8m1_t get_bytes() const;
+ template <>
+ vuint8m1_t get_bytes<2>() const { return __riscv_vlmul_ext_v_u8mf2_u8m1(value); }
+ template <>
+ vuint8m1_t get_bytes<4>() const { return __riscv_vlmul_ext_v_u8mf4_u8m1(value); }
+ template <>
+ vuint8m1_t get_bytes<8>() const { return __riscv_vlmul_ext_v_u8mf8_u8m1(value); }
+ type get() const noexcept
+ {
+ vuint8m1_t bytes = get_bytes<divisor>();
+ return bitcast(bytes);
+ }
+ template <size_t div>
+ void set_bytes(vuint8m1_t);
+ template <>
+ void set_bytes<2>(vuint8m1_t v) { value = __riscv_vlmul_trunc_v_u8m1_u8mf2(v); }
+ template <>
+ void set_bytes<4>(vuint8m1_t v) { value = __riscv_vlmul_trunc_v_u8m1_u8mf4(v); }
+ template <>
+ void set_bytes<8>(vuint8m1_t v) { value = __riscv_vlmul_trunc_v_u8m1_u8mf8(v); }
+ void set(type v)
+ {
+ vuint8m1_t bytes = as_bytes(v);
+ set_bytes<divisor>(bytes);
+ }
+ };
+ template <class T>
+ struct rvv_blob<T, rvv_width_mf2> : rvv_semiblob<T, 2>
+ {
+ };
+ template <class T>
+ struct rvv_blob<T, rvv_width_mf4> : rvv_semiblob<T, 4>
+ {
+ };
+ template <class T>
+ struct rvv_blob<T, rvv_width_mf8> : rvv_semiblob<T, 8>
+ {
+ };
+
+ // It's difficult dealing with both char and whichever *int8_t type
+ // is compatible with char, so just avoid it altogether.
+ //
+ using rvv_char_t = typename std::conditional<std::is_signed<char>::value, int8_t, uint8_t>::type;
+ template <class T>
+ using rvv_fix_char_t = typename std::conditional<
+ std::is_same<char, typename std::decay<T>::type>::value,
+ rvv_char_t, T>::type;
+
+ // An explicit constructor isn't really explicit enough to allow
+ // implicit bit-casting operations between incompatible types, so
+ // we add this vacuous flag argument when we're serious:
+ //
+ enum rvv_bitcast_flag
+ {
+ XSIMD_RVV_BITCAST
+ };
+
+ // the general-purpose vector register type, usable within
+ // templates, and supporting arithmetic on partial registers for
+ // which there is no intrinsic type (by casting via a full register
+ // type).
+ //
+ template <class T, size_t Width>
+ struct rvv_reg
+ {
+ static constexpr size_t width = Width;
+ static constexpr size_t vl = Width / (sizeof(T) * 8);
+ using blob_type = rvv_blob<T, Width>;
+ using register_type = typename blob_type::type;
+ using byte_type = typename blob_type::byte_type;
+ blob_type value;
+ rvv_reg() noexcept = default;
+ rvv_reg(register_type x) noexcept { value.set(x); }
+ explicit rvv_reg(byte_type v, rvv_bitcast_flag) { value.set(value.bitcast(v)); }
+ template <class U>
+ explicit rvv_reg(rvv_reg<U, Width> v, rvv_bitcast_flag)
+ : rvv_reg(v.get_bytes(), XSIMD_RVV_BITCAST)
+ {
+ }
+ byte_type get_bytes() const noexcept
+ {
+ return blob_type::as_bytes(value.get());
+ }
+ operator register_type() const noexcept { return value.get(); }
+ };
+ template <class T, size_t Width = XSIMD_RVV_BITS>
+ using rvv_reg_t = typename std::conditional<!std::is_void<T>::value, rvv_reg<rvv_fix_char_t<T>, Width>, void>::type;
+
+ // And some more of the same stuff for bool types, which have
+ // similar problems and similar workarounds.
+ //
+ template <size_t>
+ struct rvv_bool_info;
+#define XSIMD_RVV_MAKE_BOOL_TYPE(i) \
+ template <> \
+ struct rvv_bool_info<i> \
+ { \
+ using type = XSIMD_RVV_JOINT(vbool, i, _t); \
+ template <class T> \
+ static inline type bitcast(T value) noexcept \
+ { \
+ return XSIMD_RVV_JOINT(__riscv_vreinterpret_b, i, )(value); \
+ } \
+ /*template <> static inline type bitcast(type value) noexcept { return value; }*/ \
+ };
+ XSIMD_RVV_MAKE_BOOL_TYPE(1);
+ XSIMD_RVV_MAKE_BOOL_TYPE(2);
+ XSIMD_RVV_MAKE_BOOL_TYPE(4);
+ XSIMD_RVV_MAKE_BOOL_TYPE(8);
+ XSIMD_RVV_MAKE_BOOL_TYPE(16);
+ XSIMD_RVV_MAKE_BOOL_TYPE(32);
+ XSIMD_RVV_MAKE_BOOL_TYPE(64);
+#undef XSIMD_RVV_MAKE_BOOL_TYPE
+#undef XSIMD_RVV_JOINT5
+#undef XSIMD_RVV_JOINT
+#undef XSIMD_RVV_JOINT_
+
+ template <class T, size_t Width>
+ struct rvv_bool
+ {
+ using bool_info = rvv_bool_info<rvv_width_m1 * sizeof(T) * 8 / Width>;
+ using storage_type = vuint8m1_t __attribute__((riscv_rvv_vector_bits(rvv_width_m1)));
+ using type = typename bool_info::type;
+ storage_type value;
+ rvv_bool() = default;
+ rvv_bool(type v) noexcept
+ : value(__riscv_vreinterpret_u8m1(v))
+ {
+ }
+ template <class U, typename std::enable_if<sizeof(T) == sizeof(U), int>::type = 0>
+ rvv_bool(rvv_bool<U, Width> v)
+ : value(v.value)
+ {
+ }
+ explicit rvv_bool(uint8_t mask) noexcept
+ : value(__riscv_vmv_v_x_u8m1(mask, rvv_width_m1 / 8))
+ {
+ }
+ explicit rvv_bool(uint64_t mask) noexcept
+ : value(__riscv_vreinterpret_v_u64m1_u8m1(__riscv_vmv_v_x_u64m1(mask, rvv_width_m1 / 64)))
+ {
+ }
+ operator type() const noexcept { return bool_info::bitcast(value); }
+ };
+
+ template <class T, size_t Width = XSIMD_RVV_BITS>
+ using rvv_bool_t = typename std::enable_if < !std::is_void<T>::value,
+ rvv_bool<rvv_fix_char_t<T>, Width<rvv_width_m1 ? rvv_width_m1 : Width>>::type;
+
+ template <size_t S>
+ struct rvv_vector_type_impl;
+
+ template <>
+ struct rvv_vector_type_impl<8>
+ {
+ using signed_type = rvv_reg_t<int8_t>;
+ using unsigned_type = rvv_reg_t<uint8_t>;
+ using floating_point_type = void;
+ };
+
+ template <>
+ struct rvv_vector_type_impl<16>
+ {
+ using signed_type = rvv_reg_t<int16_t>;
+ using unsigned_type = rvv_reg_t<uint16_t>;
+ using floating_point_type = rvv_reg_t<_Float16>;
+ };
+
+ template <>
+ struct rvv_vector_type_impl<32>
+ {
+ using signed_type = rvv_reg_t<int32_t>;
+ using unsigned_type = rvv_reg_t<uint32_t>;
+ using floating_point_type = rvv_reg_t<float>;
+ };
+
+ template <>
+ struct rvv_vector_type_impl<64>
+ {
+ using signed_type = rvv_reg_t<int64_t>;
+ using unsigned_type = rvv_reg_t<uint64_t>;
+ using floating_point_type = rvv_reg_t<double>;
+ };
+
+ template <class T>
+ using signed_int_rvv_vector_type = typename rvv_vector_type_impl<8 * sizeof(T)>::signed_type;
+
+ template <class T>
+ using unsigned_int_rvv_vector_type = typename rvv_vector_type_impl<8 * sizeof(T)>::unsigned_type;
+
+ template <class T>
+ using floating_point_rvv_vector_type = typename rvv_vector_type_impl<8 * sizeof(T)>::floating_point_type;
+
+ template <class T>
+ using signed_int_or_floating_point_rvv_vector_type = typename std::conditional<std::is_floating_point<T>::value,
+ floating_point_rvv_vector_type<T>,
+ signed_int_rvv_vector_type<T>>::type;
+
+ template <class T>
+ using rvv_vector_type = typename std::conditional<std::is_signed<T>::value,
+ signed_int_or_floating_point_rvv_vector_type<T>,
+ unsigned_int_rvv_vector_type<T>>::type;
+ } // namespace detail
+
+ XSIMD_DECLARE_SIMD_REGISTER(bool, rvv, detail::rvv_vector_type<unsigned char>);
+ XSIMD_DECLARE_SIMD_REGISTER(signed char, rvv, detail::rvv_vector_type<signed char>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned char, rvv, detail::rvv_vector_type<unsigned char>);
+ XSIMD_DECLARE_SIMD_REGISTER(char, rvv, detail::rvv_vector_type<char>);
+ XSIMD_DECLARE_SIMD_REGISTER(short, rvv, detail::rvv_vector_type<short>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned short, rvv, detail::rvv_vector_type<unsigned short>);
+ XSIMD_DECLARE_SIMD_REGISTER(int, rvv, detail::rvv_vector_type<int>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned int, rvv, detail::rvv_vector_type<unsigned int>);
+ XSIMD_DECLARE_SIMD_REGISTER(long int, rvv, detail::rvv_vector_type<long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, rvv, detail::rvv_vector_type<unsigned long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(long long int, rvv, detail::rvv_vector_type<long long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, rvv, detail::rvv_vector_type<unsigned long long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(float, rvv, detail::rvv_vector_type<float>);
+ XSIMD_DECLARE_SIMD_REGISTER(double, rvv, detail::rvv_vector_type<double>);
+
+ namespace detail
+ {
+ template <class T>
+ struct rvv_bool_simd_register
+ {
+ using register_type = rvv_bool_t<T>;
+ register_type data;
+ operator register_type() const noexcept { return data; }
+ };
+ } // namespace detail
+
+ template <class T>
+ struct get_bool_simd_register<T, rvv>
+ {
+ using type = detail::rvv_bool_simd_register<T>;
+ };
+ } // namespace types
+#else
+ using rvv = detail::rvv<0xFFFFFFFF>;
+#endif
+} // namespace xsimd
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sse2_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sse2_register.hpp
new file mode 100644
index 0000000000..a9dc8960b6
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_sse2_register.hpp
@@ -0,0 +1,60 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_SSE2_REGISTER_HPP
+#define XSIMD_SSE2_REGISTER_HPP
+
+#include "./xsimd_generic_arch.hpp"
+#include "./xsimd_register.hpp"
+
+#if XSIMD_WITH_SSE2
+#include <emmintrin.h>
+#include <xmmintrin.h>
+#endif
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * SSE2 instructions
+ */
+ struct sse2 : generic
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_SSE2; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr bool requires_alignment() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(1, 2, 0); }
+ static constexpr std::size_t alignment() noexcept { return 16; }
+ static constexpr char const* name() noexcept { return "sse2"; }
+ };
+
+#if XSIMD_WITH_SSE2
+ namespace types
+ {
+ XSIMD_DECLARE_SIMD_REGISTER(signed char, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned char, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(char, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned short, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(short, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned int, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(int, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(long int, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(long long int, sse2, __m128i);
+ XSIMD_DECLARE_SIMD_REGISTER(float, sse2, __m128);
+ XSIMD_DECLARE_SIMD_REGISTER(double, sse2, __m128d);
+ }
+#endif
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sse3_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sse3_register.hpp
new file mode 100644
index 0000000000..1a7708a896
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_sse3_register.hpp
@@ -0,0 +1,45 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_SSE3_REGISTER_HPP
+#define XSIMD_SSE3_REGISTER_HPP
+
+#include "./xsimd_sse2_register.hpp"
+
+#if XSIMD_WITH_SSE3
+#include <pmmintrin.h>
+#endif
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * SSE3 instructions
+ */
+ struct sse3 : sse2
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_SSE3; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(1, 3, 0); }
+ static constexpr char const* name() noexcept { return "sse3"; }
+ };
+
+#if XSIMD_WITH_SSE3
+ namespace types
+ {
+
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(sse3, sse2);
+ }
+#endif
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp
new file mode 100644
index 0000000000..d906712d56
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp
@@ -0,0 +1,44 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_SSE4_1_REGISTER_HPP
+#define XSIMD_SSE4_1_REGISTER_HPP
+
+#include "./xsimd_ssse3_register.hpp"
+
+#if XSIMD_WITH_SSE4_1
+#include <smmintrin.h>
+#endif
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * SSE4.1 instructions
+ */
+ struct sse4_1 : ssse3
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_SSE4_1; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(1, 4, 1); }
+ static constexpr char const* name() noexcept { return "sse4.1"; }
+ };
+
+#if XSIMD_WITH_SSE4_1
+ namespace types
+ {
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(sse4_1, ssse3);
+ }
+#endif
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp
new file mode 100644
index 0000000000..b3446c9091
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp
@@ -0,0 +1,44 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_SSE4_2_REGISTER_HPP
+#define XSIMD_SSE4_2_REGISTER_HPP
+
+#include "./xsimd_sse4_1_register.hpp"
+
+#if XSIMD_WITH_SSE4_2
+#include <nmmintrin.h>
+#endif
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * SSE4.2 instructions
+ */
+ struct sse4_2 : sse4_1
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_SSE4_2; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(1, 4, 2); }
+ static constexpr char const* name() noexcept { return "sse4.2"; }
+ };
+
+#if XSIMD_WITH_SSE4_2
+ namespace types
+ {
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(sse4_2, sse4_1);
+ }
+#endif
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp
new file mode 100644
index 0000000000..50ffac1e06
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp
@@ -0,0 +1,44 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_SSSE3_REGISTER_HPP
+#define XSIMD_SSSE3_REGISTER_HPP
+
+#include "./xsimd_sse3_register.hpp"
+
+#if XSIMD_WITH_SSSE3
+#include <tmmintrin.h>
+#endif
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * SSSE3 instructions
+ */
+ struct ssse3 : sse3
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_SSSE3; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(1, 3, 1); }
+ static constexpr char const* name() noexcept { return "ssse3"; }
+ };
+
+#if XSIMD_WITH_SSSE3
+ namespace types
+ {
+ XSIMD_DECLARE_SIMD_REGISTER_ALIAS(ssse3, sse3);
+ }
+#endif
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_sve_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_sve_register.hpp
new file mode 100644
index 0000000000..4f75c607e8
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_sve_register.hpp
@@ -0,0 +1,157 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * Copyright (c) Yibo Cai *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_SVE_REGISTER_HPP
+#define XSIMD_SVE_REGISTER_HPP
+
+#include "xsimd_generic_arch.hpp"
+#include "xsimd_register.hpp"
+
+#if XSIMD_WITH_SVE
+#include <arm_sve.h>
+#endif
+
+namespace xsimd
+{
+ namespace detail
+ {
+ /**
+ * @ingroup architectures
+ *
+ * SVE instructions (fixed vector size) for arm64
+ */
+ template <size_t Width>
+ struct sve : xsimd::generic
+ {
+ static constexpr bool supported() noexcept { return Width == XSIMD_SVE_BITS; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr bool requires_alignment() noexcept { return true; }
+ static constexpr std::size_t alignment() noexcept { return 16; }
+ static constexpr unsigned version() noexcept { return generic::version(9, Width / 32, 0); }
+ static constexpr char const* name() noexcept { return "arm64+sve"; }
+ };
+ }
+
+#if XSIMD_WITH_SVE
+
+ using sve = detail::sve<__ARM_FEATURE_SVE_BITS>;
+
+ namespace types
+ {
+ namespace detail
+ {
+// define fixed size alias per SVE sizeless type
+#define SVE_TO_FIXED_SIZE(ty) ty __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)))
+ using sve_int8_t = SVE_TO_FIXED_SIZE(svint8_t);
+ using sve_uint8_t = SVE_TO_FIXED_SIZE(svuint8_t);
+ using sve_int16_t = SVE_TO_FIXED_SIZE(svint16_t);
+ using sve_uint16_t = SVE_TO_FIXED_SIZE(svuint16_t);
+ using sve_int32_t = SVE_TO_FIXED_SIZE(svint32_t);
+ using sve_uint32_t = SVE_TO_FIXED_SIZE(svuint32_t);
+ using sve_int64_t = SVE_TO_FIXED_SIZE(svint64_t);
+ using sve_uint64_t = SVE_TO_FIXED_SIZE(svuint64_t);
+ using sve_float32_t = SVE_TO_FIXED_SIZE(svfloat32_t);
+ using sve_float64_t = SVE_TO_FIXED_SIZE(svfloat64_t);
+ using sve_bool_t = SVE_TO_FIXED_SIZE(svbool_t);
+#undef SVE_TO_FIXED_SIZE
+
+ template <size_t S>
+ struct sve_vector_type_impl;
+
+ template <>
+ struct sve_vector_type_impl<8>
+ {
+ using signed_type = sve_int8_t;
+ using unsigned_type = sve_uint8_t;
+ using floating_point_type = void;
+ };
+
+ template <>
+ struct sve_vector_type_impl<16>
+ {
+ using signed_type = sve_int16_t;
+ using unsigned_type = sve_uint16_t;
+ using floating_point_type = void;
+ };
+
+ template <>
+ struct sve_vector_type_impl<32>
+ {
+ using signed_type = sve_int32_t;
+ using unsigned_type = sve_uint32_t;
+ using floating_point_type = sve_float32_t;
+ };
+
+ template <>
+ struct sve_vector_type_impl<64>
+ {
+ using signed_type = sve_int64_t;
+ using unsigned_type = sve_uint64_t;
+ using floating_point_type = sve_float64_t;
+ };
+
+ template <class T>
+ using signed_int_sve_vector_type = typename sve_vector_type_impl<8 * sizeof(T)>::signed_type;
+
+ template <class T>
+ using unsigned_int_sve_vector_type = typename sve_vector_type_impl<8 * sizeof(T)>::unsigned_type;
+
+ template <class T>
+ using floating_point_sve_vector_type = typename sve_vector_type_impl<8 * sizeof(T)>::floating_point_type;
+
+ template <class T>
+ using signed_int_or_floating_point_sve_vector_type = typename std::conditional<std::is_floating_point<T>::value,
+ floating_point_sve_vector_type<T>,
+ signed_int_sve_vector_type<T>>::type;
+
+ template <class T>
+ using sve_vector_type = typename std::conditional<std::is_signed<T>::value,
+ signed_int_or_floating_point_sve_vector_type<T>,
+ unsigned_int_sve_vector_type<T>>::type;
+ } // namespace detail
+
+ XSIMD_DECLARE_SIMD_REGISTER(signed char, sve, detail::sve_vector_type<signed char>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned char, sve, detail::sve_vector_type<unsigned char>);
+ XSIMD_DECLARE_SIMD_REGISTER(char, sve, detail::sve_vector_type<char>);
+ XSIMD_DECLARE_SIMD_REGISTER(short, sve, detail::sve_vector_type<short>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned short, sve, detail::sve_vector_type<unsigned short>);
+ XSIMD_DECLARE_SIMD_REGISTER(int, sve, detail::sve_vector_type<int>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned int, sve, detail::sve_vector_type<unsigned int>);
+ XSIMD_DECLARE_SIMD_REGISTER(long int, sve, detail::sve_vector_type<long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, sve, detail::sve_vector_type<unsigned long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(long long int, sve, detail::sve_vector_type<long long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, sve, detail::sve_vector_type<unsigned long long int>);
+ XSIMD_DECLARE_SIMD_REGISTER(float, sve, detail::sve_vector_type<float>);
+ XSIMD_DECLARE_SIMD_REGISTER(double, sve, detail::sve_vector_type<double>);
+
+ namespace detail
+ {
+ struct sve_bool_simd_register
+ {
+ using register_type = sve_bool_t;
+ register_type data;
+ operator register_type() const noexcept { return data; }
+ };
+ } // namespace detail
+
+ template <class T>
+ struct get_bool_simd_register<T, sve>
+ {
+ using type = detail::sve_bool_simd_register;
+ };
+ } // namespace types
+#else
+ using sve = detail::sve<0xFFFFFFFF>;
+#endif
+} // namespace xsimd
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_traits.hpp b/third_party/xsimd/include/xsimd/types/xsimd_traits.hpp
new file mode 100644
index 0000000000..f848aab1f7
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_traits.hpp
@@ -0,0 +1,319 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_TRAITS_HPP
+#define XSIMD_TRAITS_HPP
+
+#include <type_traits>
+
+#include "xsimd_batch.hpp"
+
+/**
+ * high level type traits
+ *
+ * @defgroup batch_traits Type traits
+ *
+ **/
+
+namespace xsimd
+{
+
+ /**************************************
+ * simd_traits and revert_simd_traits *
+ **************************************/
+
+ template <class T, class A = default_arch>
+ struct has_simd_register : types::has_simd_register<T, A>
+ {
+ };
+
+ namespace detail
+ {
+ template <class T, bool>
+ struct simd_traits_impl;
+
+ template <class T>
+ struct simd_traits_impl<T, false>
+ {
+ using type = T;
+ using bool_type = bool;
+ static constexpr size_t size = 1;
+ };
+
+ template <class T>
+ constexpr size_t simd_traits_impl<T, false>::size;
+
+ template <class T>
+ struct simd_traits_impl<T, true>
+ {
+ using type = batch<T>;
+ using bool_type = typename type::batch_bool_type;
+ static constexpr size_t size = type::size;
+ };
+
+ template <class T>
+ constexpr size_t simd_traits_impl<T, true>::size;
+
+ template <class T, class A>
+ struct static_check_supported_config_emitter
+ {
+
+ static_assert(A::supported(),
+ "usage of batch type with unsupported architecture");
+ static_assert(!A::supported() || xsimd::has_simd_register<T, A>::value,
+ "usage of batch type with unsupported type");
+ };
+
+ template <class T, class A>
+ struct static_check_supported_config_emitter<std::complex<T>, A> : static_check_supported_config_emitter<T, A>
+ {
+ };
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ template <class T, class A, bool i3ec>
+ struct static_check_supported_config_emitter<xtl::xcomplex<T, T, i3ec>, A> : static_check_supported_config_emitter<T, A>
+ {
+ };
+#endif
+
+ // consistency checker
+ template <class T, class A>
+ inline void static_check_supported_config()
+ {
+ (void)static_check_supported_config_emitter<T, A>();
+ }
+ }
+
+ template <class T>
+ struct simd_traits : detail::simd_traits_impl<T, xsimd::has_simd_register<T>::value>
+ {
+ };
+
+ template <class T>
+ struct simd_traits<std::complex<T>>
+ : detail::simd_traits_impl<std::complex<T>, xsimd::has_simd_register<T>::value>
+ {
+ };
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ template <class T, bool i3ec>
+ struct simd_traits<xtl::xcomplex<T, T, i3ec>>
+ : detail::simd_traits_impl<std::complex<T>, xsimd::has_simd_register<T>::value>
+ {
+ };
+#endif
+
+ template <class T>
+ struct revert_simd_traits
+ {
+ using type = T;
+ static constexpr size_t size = simd_traits<type>::size;
+ };
+
+ template <class T>
+ constexpr size_t revert_simd_traits<T>::size;
+
+ template <class T>
+ struct revert_simd_traits<batch<T>>
+ {
+ using type = T;
+ static constexpr size_t size = batch<T>::size;
+ };
+
+ template <class T>
+ constexpr size_t revert_simd_traits<batch<T>>::size;
+
+ template <class T>
+ using simd_type = typename simd_traits<T>::type;
+
+ template <class T>
+ using simd_bool_type = typename simd_traits<T>::bool_type;
+
+ template <class T>
+ using revert_simd_type = typename revert_simd_traits<T>::type;
+
+ /********************
+ * simd_return_type *
+ ********************/
+
+ namespace detail
+ {
+ template <class T1, class T2>
+ struct simd_condition
+ {
+ static constexpr bool value = (std::is_same<T1, T2>::value && !std::is_same<T1, bool>::value) || (std::is_same<T1, bool>::value && !std::is_same<T2, bool>::value) || std::is_same<T1, float>::value || std::is_same<T1, double>::value || std::is_same<T1, int8_t>::value || std::is_same<T1, uint8_t>::value || std::is_same<T1, int16_t>::value || std::is_same<T1, uint16_t>::value || std::is_same<T1, int32_t>::value || std::is_same<T1, uint32_t>::value || std::is_same<T1, int64_t>::value || std::is_same<T1, uint64_t>::value || std::is_same<T1, char>::value || detail::is_complex<T1>::value;
+ };
+
+ template <class T1, class T2, class A>
+ struct simd_return_type_impl
+ : std::enable_if<simd_condition<T1, T2>::value, batch<T2, A>>
+ {
+ };
+
+ template <class T2, class A>
+ struct simd_return_type_impl<bool, T2, A>
+ : std::enable_if<simd_condition<bool, T2>::value, batch_bool<T2, A>>
+ {
+ };
+
+ template <class T2, class A>
+ struct simd_return_type_impl<bool, std::complex<T2>, A>
+ : std::enable_if<simd_condition<bool, T2>::value, batch_bool<T2, A>>
+ {
+ };
+
+ template <class T1, class T2, class A>
+ struct simd_return_type_impl<std::complex<T1>, T2, A>
+ : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>>
+ {
+ };
+
+ template <class T1, class T2, class A>
+ struct simd_return_type_impl<std::complex<T1>, std::complex<T2>, A>
+ : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>>
+ {
+ };
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ template <class T1, class T2, bool I3EC, class A>
+ struct simd_return_type_impl<xtl::xcomplex<T1, T1, I3EC>, T2, A>
+ : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>>
+ {
+ };
+
+ template <class T1, class T2, bool I3EC, class A>
+ struct simd_return_type_impl<xtl::xcomplex<T1, T1, I3EC>, std::complex<T2>, A>
+ : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>>
+ {
+ };
+
+ template <class T1, class T2, bool I3EC, class A>
+ struct simd_return_type_impl<xtl::xcomplex<T1, T1, I3EC>, xtl::xcomplex<T2, T2, I3EC>, A>
+ : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>>
+ {
+ };
+
+ template <class T1, class T2, bool I3EC, class A>
+ struct simd_return_type_impl<std::complex<T1>, xtl::xcomplex<T2, T2, I3EC>, A>
+ : std::enable_if<simd_condition<T1, T2>::value, batch<std::complex<T2>, A>>
+ {
+ };
+#endif
+ }
+
+ template <class T1, class T2, class A = default_arch>
+ using simd_return_type = typename detail::simd_return_type_impl<T1, T2, A>::type;
+
+ /**
+ * @ingroup batch_traits
+ *
+ * type traits that inherits from @c std::true_type for @c batch<...> types and from
+ * @c std::false_type otherwise.
+ *
+ * @tparam T type to analyze.
+ */
+ template <class T>
+ struct is_batch;
+
+ template <class T>
+ struct is_batch : std::false_type
+ {
+ };
+
+ template <class T, class A>
+ struct is_batch<batch<T, A>> : std::true_type
+ {
+ };
+
+ /**
+ * @ingroup batch_traits
+ *
+ * type traits that inherits from @c std::true_type for @c batch_bool<...> types and from
+ * @c std::false_type otherwise.
+ *
+ * @tparam T type to analyze.
+ */
+
+ template <class T>
+ struct is_batch_bool : std::false_type
+ {
+ };
+
+ template <class T, class A>
+ struct is_batch_bool<batch_bool<T, A>> : std::true_type
+ {
+ };
+
+ /**
+ * @ingroup batch_traits
+ *
+ * type traits that inherits from @c std::true_type for @c batch<std::complex<...>>
+ * types and from @c std::false_type otherwise.
+ *
+ * @tparam T type to analyze.
+ */
+
+ template <class T>
+ struct is_batch_complex : std::false_type
+ {
+ };
+
+ template <class T, class A>
+ struct is_batch_complex<batch<std::complex<T>, A>> : std::true_type
+ {
+ };
+
+ /**
+ * @ingroup batch_traits
+ *
+ * type traits whose @c type field is set to @c T::value_type if @c
+ * is_batch<T>::value and to @c T otherwise.
+ *
+ * @tparam T type to analyze.
+ */
+ template <class T>
+ struct scalar_type
+ {
+ using type = T;
+ };
+ template <class T, class A>
+ struct scalar_type<batch<T, A>>
+ {
+ using type = T;
+ };
+
+ template <class T>
+ using scalar_type_t = typename scalar_type<T>::type;
+
+ /**
+ * @ingroup batch_traits
+ *
+ * type traits whose @c type field is set to @c T::value_type if @c
+ * is_batch_bool<T>::value and to @c bool otherwise.
+ *
+ * @tparam T type to analyze.
+ */
+ template <class T>
+ struct mask_type
+ {
+ using type = bool;
+ };
+ template <class T, class A>
+ struct mask_type<batch<T, A>>
+ {
+ using type = typename batch<T, A>::batch_bool_type;
+ };
+
+ template <class T>
+ using mask_type_t = typename mask_type<T>::type;
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_utils.hpp b/third_party/xsimd/include/xsimd/types/xsimd_utils.hpp
new file mode 100644
index 0000000000..aa890f2410
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_utils.hpp
@@ -0,0 +1,530 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_UTILS_HPP
+#define XSIMD_UTILS_HPP
+
+#include <complex>
+#include <cstdint>
+#include <cstring>
+#include <tuple>
+#include <type_traits>
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+#include "xtl/xcomplex.hpp"
+#endif
+
+namespace xsimd
+{
+
+ template <class T, class A>
+ class batch;
+
+ template <class T, class A>
+ class batch_bool;
+
+ /**************
+ * index *
+ **************/
+
+ template <size_t I>
+ using index = std::integral_constant<size_t, I>;
+
+ /**************
+ * as_integer *
+ **************/
+
+ template <class T>
+ struct as_integer : std::make_signed<T>
+ {
+ };
+
+ template <>
+ struct as_integer<float>
+ {
+ using type = int32_t;
+ };
+
+ template <>
+ struct as_integer<double>
+ {
+ using type = int64_t;
+ };
+
+ template <class T, class A>
+ struct as_integer<batch<T, A>>
+ {
+ using type = batch<typename as_integer<T>::type, A>;
+ };
+
+ template <class B>
+ using as_integer_t = typename as_integer<B>::type;
+
+ /***********************
+ * as_unsigned_integer *
+ ***********************/
+
+ template <class T>
+ struct as_unsigned_integer : std::make_unsigned<T>
+ {
+ };
+
+ template <>
+ struct as_unsigned_integer<float>
+ {
+ using type = uint32_t;
+ };
+
+ template <>
+ struct as_unsigned_integer<double>
+ {
+ using type = uint64_t;
+ };
+
+ template <class T, class A>
+ struct as_unsigned_integer<batch<T, A>>
+ {
+ using type = batch<typename as_unsigned_integer<T>::type, A>;
+ };
+
+ template <class T>
+ using as_unsigned_integer_t = typename as_unsigned_integer<T>::type;
+
+ /*********************
+ * as_signed_integer *
+ *********************/
+
+ template <class T>
+ struct as_signed_integer : std::make_signed<T>
+ {
+ };
+
+ template <class T>
+ using as_signed_integer_t = typename as_signed_integer<T>::type;
+
+ /******************
+ * flip_sign_type *
+ ******************/
+
+ namespace detail
+ {
+ template <class T, bool is_signed>
+ struct flipped_sign_type_impl : std::make_signed<T>
+ {
+ };
+
+ template <class T>
+ struct flipped_sign_type_impl<T, true> : std::make_unsigned<T>
+ {
+ };
+ }
+
+ template <class T>
+ struct flipped_sign_type
+ : detail::flipped_sign_type_impl<T, std::is_signed<T>::value>
+ {
+ };
+
+ template <class T>
+ using flipped_sign_type_t = typename flipped_sign_type<T>::type;
+
+ /***********
+ * as_float *
+ ************/
+
+ template <class T>
+ struct as_float;
+
+ template <>
+ struct as_float<int32_t>
+ {
+ using type = float;
+ };
+
+ template <>
+ struct as_float<int64_t>
+ {
+ using type = double;
+ };
+
+ template <class T, class A>
+ struct as_float<batch<T, A>>
+ {
+ using type = batch<typename as_float<T>::type, A>;
+ };
+
+ template <class T>
+ using as_float_t = typename as_float<T>::type;
+
+ /**************
+ * as_logical *
+ **************/
+
+ template <class T>
+ struct as_logical;
+
+ template <class T, class A>
+ struct as_logical<batch<T, A>>
+ {
+ using type = batch_bool<T, A>;
+ };
+
+ template <class T>
+ using as_logical_t = typename as_logical<T>::type;
+
+ /********************
+ * bit_cast *
+ ********************/
+
+ template <class To, class From>
+ inline To bit_cast(From val) noexcept
+ {
+ static_assert(sizeof(From) == sizeof(To), "casting between compatible layout");
+ // FIXME: Some old version of GCC don't support that trait
+ // static_assert(std::is_trivially_copyable<From>::value, "input type is trivially copyable");
+ // static_assert(std::is_trivially_copyable<To>::value, "output type is trivially copyable");
+ To res;
+ std::memcpy(&res, &val, sizeof(val));
+ return res;
+ }
+
+ namespace kernel
+ {
+ namespace detail
+ {
+ /**************************************
+ * enabling / disabling metafunctions *
+ **************************************/
+
+ template <class T>
+ using enable_integral_t = typename std::enable_if<std::is_integral<T>::value, int>::type;
+
+ template <class T, size_t S>
+ using enable_sized_signed_t = typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value && sizeof(T) == S, int>::type;
+
+ template <class T, size_t S>
+ using enable_sized_unsigned_t = typename std::enable_if<std::is_integral<T>::value && !std::is_signed<T>::value && sizeof(T) == S, int>::type;
+
+ template <class T, size_t S>
+ using enable_sized_integral_t = typename std::enable_if<std::is_integral<T>::value && sizeof(T) == S, int>::type;
+
+ template <class T, size_t S>
+ using enable_sized_t = typename std::enable_if<sizeof(T) == S, int>::type;
+
+ template <class T, size_t S>
+ using enable_max_sized_integral_t = typename std::enable_if<std::is_integral<T>::value && sizeof(T) <= S, int>::type;
+
+ /********************************
+ * Matching & mismatching sizes *
+ ********************************/
+
+ template <class T, class U, class B = int>
+ using sizes_match_t = typename std::enable_if<sizeof(T) == sizeof(U), B>::type;
+
+ template <class T, class U, class B = int>
+ using sizes_mismatch_t = typename std::enable_if<sizeof(T) != sizeof(U), B>::type;
+
+ template <class T, class U, class B = int>
+ using stride_match_t = typename std::enable_if<!std::is_same<T, U>::value && sizeof(T) == sizeof(U), B>::type;
+ } // namespace detail
+ } // namespace kernel
+
+ /*****************************************
+ * Backport of index_sequence from c++14 *
+ *****************************************/
+
+ // TODO: Remove this once we drop C++11 support
+ namespace detail
+ {
+ template <typename T>
+ struct identity
+ {
+ using type = T;
+ };
+
+#ifdef __cpp_lib_integer_sequence
+ using std::index_sequence;
+ using std::integer_sequence;
+ using std::make_index_sequence;
+ using std::make_integer_sequence;
+
+ using std::index_sequence_for;
+#else
+ template <typename T, T... Is>
+ struct integer_sequence
+ {
+ using value_type = T;
+ static constexpr std::size_t size() noexcept { return sizeof...(Is); }
+ };
+
+ template <typename Lhs, typename Rhs>
+ struct make_integer_sequence_concat;
+
+ template <typename T, T... Lhs, T... Rhs>
+ struct make_integer_sequence_concat<integer_sequence<T, Lhs...>,
+ integer_sequence<T, Rhs...>>
+ : identity<integer_sequence<T, Lhs..., (sizeof...(Lhs) + Rhs)...>>
+ {
+ };
+
+ template <typename T>
+ struct make_integer_sequence_impl;
+
+ template <typename T>
+ struct make_integer_sequence_impl<std::integral_constant<T, (T)0>> : identity<integer_sequence<T>>
+ {
+ };
+
+ template <typename T>
+ struct make_integer_sequence_impl<std::integral_constant<T, (T)1>> : identity<integer_sequence<T, 0>>
+ {
+ };
+
+ template <typename T, T N>
+ struct make_integer_sequence_impl<std::integral_constant<T, N>>
+ : make_integer_sequence_concat<typename make_integer_sequence_impl<std::integral_constant<T, N / 2>>::type,
+ typename make_integer_sequence_impl<std::integral_constant<T, N - (N / 2)>>::type>
+ {
+ };
+
+ template <typename T, T N>
+ using make_integer_sequence = typename make_integer_sequence_impl<std::integral_constant<T, N>>::type;
+
+ template <std::size_t... Is>
+ using index_sequence = integer_sequence<std::size_t, Is...>;
+
+ template <std::size_t N>
+ using make_index_sequence = make_integer_sequence<std::size_t, N>;
+
+ template <typename... Ts>
+ using index_sequence_for = make_index_sequence<sizeof...(Ts)>;
+
+#endif
+
+ template <int... Is>
+ using int_sequence = integer_sequence<int, Is...>;
+
+ template <int N>
+ using make_int_sequence = make_integer_sequence<int, N>;
+
+ template <typename... Ts>
+ using int_sequence_for = make_int_sequence<(int)sizeof...(Ts)>;
+
+ // Type-casted index sequence.
+ template <class P, size_t... Is>
+ inline P indexes_from(index_sequence<Is...>) noexcept
+ {
+ return { static_cast<typename P::value_type>(Is)... };
+ }
+
+ template <class P>
+ inline P make_sequence_as_batch() noexcept
+ {
+ return indexes_from<P>(make_index_sequence<P::size>());
+ }
+ }
+
+ /***********************************
+ * Backport of std::get from C++14 *
+ ***********************************/
+
+ namespace detail
+ {
+ template <class T, class... Types, size_t I, size_t... Is>
+ inline const T& get_impl(const std::tuple<Types...>& t, std::is_same<T, T>, index_sequence<I, Is...>) noexcept
+ {
+ return std::get<I>(t);
+ }
+
+ template <class T, class U, class... Types, size_t I, size_t... Is>
+ inline const T& get_impl(const std::tuple<Types...>& t, std::is_same<T, U>, index_sequence<I, Is...>) noexcept
+ {
+ using tuple_elem = typename std::tuple_element<I + 1, std::tuple<Types...>>::type;
+ return get_impl<T>(t, std::is_same<T, tuple_elem>(), index_sequence<Is...>());
+ }
+
+ template <class T, class... Types>
+ inline const T& get(const std::tuple<Types...>& t) noexcept
+ {
+ using tuple_elem = typename std::tuple_element<0, std::tuple<Types...>>::type;
+ return get_impl<T>(t, std::is_same<T, tuple_elem>(), make_index_sequence<sizeof...(Types)>());
+ }
+ }
+
+ /*********************************
+ * Backport of void_t from C++17 *
+ *********************************/
+
+ namespace detail
+ {
+ template <class... T>
+ struct make_void
+ {
+ using type = void;
+ };
+
+ template <class... T>
+ using void_t = typename make_void<T...>::type;
+ }
+
+ /**************************************************
+ * Equivalent of void_t but with size_t parameter *
+ **************************************************/
+
+ namespace detail
+ {
+ template <std::size_t>
+ struct check_size
+ {
+ using type = void;
+ };
+
+ template <std::size_t S>
+ using check_size_t = typename check_size<S>::type;
+ }
+
+ /*****************************************
+ * Supplementary std::array constructors *
+ *****************************************/
+
+ namespace detail
+ {
+ // std::array constructor from scalar value ("broadcast")
+ template <typename T, std::size_t... Is>
+ inline constexpr std::array<T, sizeof...(Is)>
+ array_from_scalar_impl(const T& scalar, index_sequence<Is...>) noexcept
+ {
+ // You can safely ignore this silly ternary, the "scalar" is all
+ // that matters. The rest is just a dirty workaround...
+ return std::array<T, sizeof...(Is)> { (Is + 1) ? scalar : T()... };
+ }
+
+ template <typename T, std::size_t N>
+ inline constexpr std::array<T, N>
+ array_from_scalar(const T& scalar) noexcept
+ {
+ return array_from_scalar_impl(scalar, make_index_sequence<N>());
+ }
+
+ // std::array constructor from C-style pointer (handled as an array)
+ template <typename T, std::size_t... Is>
+ inline constexpr std::array<T, sizeof...(Is)>
+ array_from_pointer_impl(const T* c_array, index_sequence<Is...>) noexcept
+ {
+ return std::array<T, sizeof...(Is)> { c_array[Is]... };
+ }
+
+ template <typename T, std::size_t N>
+ inline constexpr std::array<T, N>
+ array_from_pointer(const T* c_array) noexcept
+ {
+ return array_from_pointer_impl(c_array, make_index_sequence<N>());
+ }
+ }
+
+ /************************
+ * is_array_initializer *
+ ************************/
+
+ namespace detail
+ {
+ template <bool...>
+ struct bool_pack;
+
+ template <bool... bs>
+ using all_true = std::is_same<
+ bool_pack<bs..., true>, bool_pack<true, bs...>>;
+
+ template <typename T, typename... Args>
+ using is_all_convertible = all_true<std::is_convertible<Args, T>::value...>;
+
+ template <typename T, std::size_t N, typename... Args>
+ using is_array_initializer = std::enable_if<
+ (sizeof...(Args) == N) && is_all_convertible<T, Args...>::value>;
+
+ // Check that a variadic argument pack is a list of N values of type T,
+ // as usable for instantiating a value of type std::array<T, N>.
+ template <typename T, std::size_t N, typename... Args>
+ using is_array_initializer_t = typename is_array_initializer<T, N, Args...>::type;
+ }
+
+ /**************
+ * is_complex *
+ **************/
+
+ // This is used in both xsimd_complex_base.hpp and xsimd_traits.hpp
+ // However xsimd_traits.hpp indirectly includes xsimd_complex_base.hpp
+ // so we cannot define is_complex in xsimd_traits.hpp. Besides, if
+ // no file defining batches is included, we still need this definition
+ // in xsimd_traits.hpp, so let's define it here.
+
+ namespace detail
+ {
+ template <class T>
+ struct is_complex : std::false_type
+ {
+ };
+
+ template <class T>
+ struct is_complex<std::complex<T>> : std::true_type
+ {
+ };
+
+#ifdef XSIMD_ENABLE_XTL_COMPLEX
+ template <class T, bool i3ec>
+ struct is_complex<xtl::xcomplex<T, T, i3ec>> : std::true_type
+ {
+ };
+#endif
+ }
+
+ /*******************
+ * real_batch_type *
+ *******************/
+
+ template <class B>
+ struct real_batch_type
+ {
+ using type = B;
+ };
+
+ template <class T, class A>
+ struct real_batch_type<batch<std::complex<T>, A>>
+ {
+ using type = batch<T, A>;
+ };
+
+ template <class B>
+ using real_batch_type_t = typename real_batch_type<B>::type;
+
+ /**********************
+ * complex_batch_type *
+ **********************/
+
+ template <class B>
+ struct complex_batch_type
+ {
+ using real_value_type = typename B::value_type;
+ using arch_type = typename B::arch_type;
+ using type = batch<std::complex<real_value_type>, arch_type>;
+ };
+
+ template <class T, class A>
+ struct complex_batch_type<batch<std::complex<T>, A>>
+ {
+ using type = batch<std::complex<T>, A>;
+ };
+
+ template <class B>
+ using complex_batch_type_t = typename complex_batch_type<B>::type;
+}
+
+#endif
diff --git a/third_party/xsimd/include/xsimd/types/xsimd_wasm_register.hpp b/third_party/xsimd/include/xsimd/types/xsimd_wasm_register.hpp
new file mode 100644
index 0000000000..237db95c6e
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/types/xsimd_wasm_register.hpp
@@ -0,0 +1,60 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * Copyright (c) Anutosh Bhat *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_WASM_REGISTER_HPP
+#define XSIMD_WASM_REGISTER_HPP
+
+#include "xsimd_generic_arch.hpp"
+#include "xsimd_register.hpp"
+
+#if XSIMD_WITH_WASM
+#include <wasm_simd128.h>
+#endif
+
+namespace xsimd
+{
+ /**
+ * @ingroup architectures
+ *
+ * WASM instructions
+ */
+ struct wasm : generic
+ {
+ static constexpr bool supported() noexcept { return XSIMD_WITH_WASM; }
+ static constexpr bool available() noexcept { return true; }
+ static constexpr bool requires_alignment() noexcept { return true; }
+ static constexpr unsigned version() noexcept { return generic::version(10, 0, 0); }
+ static constexpr std::size_t alignment() noexcept { return 16; }
+ static constexpr char const* name() noexcept { return "wasm"; }
+ };
+
+#if XSIMD_WITH_WASM
+ namespace types
+ {
+ XSIMD_DECLARE_SIMD_REGISTER(signed char, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned char, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(char, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned short, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(short, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned int, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(int, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long int, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(long int, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(unsigned long long int, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(long long int, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(float, wasm, v128_t);
+ XSIMD_DECLARE_SIMD_REGISTER(double, wasm, v128_t);
+ }
+#endif
+}
+
+#endif