diff options
Diffstat (limited to 'third_party/xsimd/include/xsimd/arch/xsimd_neon64.hpp')
-rw-r--r-- | third_party/xsimd/include/xsimd/arch/xsimd_neon64.hpp | 85 |
1 files changed, 84 insertions, 1 deletions
diff --git a/third_party/xsimd/include/xsimd/arch/xsimd_neon64.hpp b/third_party/xsimd/include/xsimd/arch/xsimd_neon64.hpp index bc982c7ce6..77538d1c2d 100644 --- a/third_party/xsimd/include/xsimd/arch/xsimd_neon64.hpp +++ b/third_party/xsimd/include/xsimd/arch/xsimd_neon64.hpp @@ -92,7 +92,7 @@ namespace xsimd template <class A, class T> inline batch<T, A> broadcast(T val, requires_arch<neon64>) noexcept { - return broadcast<neon64>(val, neon {}); + return broadcast<A>(val, neon {}); } template <class A> @@ -952,6 +952,41 @@ namespace xsimd /********** * zip_lo * **********/ + template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0> + inline batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip1q_u8(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0> + inline batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip1q_s8(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0> + inline batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip1q_u16(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0> + inline batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip1q_s16(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0> + inline batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip1q_u32(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0> + inline batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip1q_s32(lhs, rhs); + } template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0> inline batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept @@ -966,6 +1001,12 @@ namespace xsimd } template <class A> + inline batch<float, A> zip_lo(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip1q_f32(lhs, rhs); + } + + template <class A> inline batch<double, A> zip_lo(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<neon64>) noexcept { return vzip1q_f64(lhs, rhs); @@ -975,6 +1016,42 @@ namespace xsimd * zip_hi * **********/ + template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0> + inline batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip2q_u8(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0> + inline batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip2q_s8(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0> + inline batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip2q_u16(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0> + inline batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip2q_s16(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0> + inline batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip2q_u32(lhs, rhs); + } + + template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0> + inline batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip2q_s32(lhs, rhs); + } + template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0> inline batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon64>) noexcept { @@ -988,6 +1065,12 @@ namespace xsimd } template <class A> + inline batch<float, A> zip_hi(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<neon64>) noexcept + { + return vzip2q_f32(lhs, rhs); + } + + template <class A> inline batch<double, A> zip_hi(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<neon64>) noexcept { return vzip2q_f64(lhs, rhs); |