summaryrefslogtreecommitdiffstats
path: root/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_memory.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_memory.hpp')
-rw-r--r--third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_memory.hpp397
1 files changed, 397 insertions, 0 deletions
diff --git a/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_memory.hpp b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_memory.hpp
new file mode 100644
index 0000000000..bb40ddffc6
--- /dev/null
+++ b/third_party/xsimd/include/xsimd/arch/generic/xsimd_generic_memory.hpp
@@ -0,0 +1,397 @@
+/***************************************************************************
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
+ * Martin Renou *
+ * Copyright (c) QuantStack *
+ * Copyright (c) Serge Guelton *
+ * *
+ * Distributed under the terms of the BSD 3-Clause License. *
+ * *
+ * The full license is in the file LICENSE, distributed with this software. *
+ ****************************************************************************/
+
+#ifndef XSIMD_GENERIC_MEMORY_HPP
+#define XSIMD_GENERIC_MEMORY_HPP
+
+#include <algorithm>
+#include <complex>
+#include <stdexcept>
+
+#include "../../types/xsimd_batch_constant.hpp"
+#include "./xsimd_generic_details.hpp"
+
+namespace xsimd
+{
+ template <class batch_type, typename batch_type::value_type... Values>
+ struct batch_constant;
+
+ namespace kernel
+ {
+
+ using namespace types;
+
+ // extract_pair
+ template <class A, class T>
+ inline batch<T, A> extract_pair(batch<T, A> const& self, batch<T, A> const& other, std::size_t i, requires_arch<generic>) noexcept
+ {
+ constexpr std::size_t size = batch<T, A>::size;
+ assert(i < size && "index in bounds");
+
+ alignas(A::alignment()) T self_buffer[size];
+ self.store_aligned(self_buffer);
+
+ alignas(A::alignment()) T other_buffer[size];
+ other.store_aligned(other_buffer);
+
+ alignas(A::alignment()) T concat_buffer[size];
+
+ for (std::size_t j = 0; j < (size - i); ++j)
+ {
+ concat_buffer[j] = other_buffer[i + j];
+ if (j < i)
+ {
+ concat_buffer[size - 1 - j] = self_buffer[i - 1 - j];
+ }
+ }
+ return batch<T, A>::load_aligned(concat_buffer);
+ }
+
+ // gather
+ namespace detail
+ {
+ template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N == 0, int>::type = 0>
+ inline batch<T, A> gather(U const* src, batch<V, A> const& index,
+ ::xsimd::index<N> I) noexcept
+ {
+ return insert(batch<T, A> {}, static_cast<T>(src[index.get(I)]), I);
+ }
+
+ template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N != 0, int>::type = 0>
+ inline batch<T, A>
+ gather(U const* src, batch<V, A> const& index, ::xsimd::index<N> I) noexcept
+ {
+ static_assert(N <= batch<V, A>::size, "Incorrect value in recursion!");
+
+ const auto test = gather<N - 1, T, A>(src, index, {});
+ return insert(test, static_cast<T>(src[index.get(I)]), I);
+ }
+ } // namespace detail
+
+ template <typename T, typename A, typename V>
+ inline batch<T, A>
+ gather(batch<T, A> const&, T const* src, batch<V, A> const& index,
+ kernel::requires_arch<generic>) noexcept
+ {
+ static_assert(batch<T, A>::size == batch<V, A>::size,
+ "Index and destination sizes must match");
+
+ return detail::gather<batch<V, A>::size - 1, T, A>(src, index, {});
+ }
+
+ // Gather with runtime indexes and mismatched strides.
+ template <typename T, typename A, typename U, typename V>
+ inline detail::sizes_mismatch_t<T, U, batch<T, A>>
+ gather(batch<T, A> const&, U const* src, batch<V, A> const& index,
+ kernel::requires_arch<generic>) noexcept
+ {
+ static_assert(batch<T, A>::size == batch<V, A>::size,
+ "Index and destination sizes must match");
+
+ return detail::gather<batch<V, A>::size - 1, T, A>(src, index, {});
+ }
+
+ // Gather with runtime indexes and matching strides.
+ template <typename T, typename A, typename U, typename V>
+ inline detail::stride_match_t<T, U, batch<T, A>>
+ gather(batch<T, A> const&, U const* src, batch<V, A> const& index,
+ kernel::requires_arch<generic>) noexcept
+ {
+ static_assert(batch<T, A>::size == batch<V, A>::size,
+ "Index and destination sizes must match");
+
+ return batch_cast<T>(kernel::gather(batch<U, A> {}, src, index, A {}));
+ }
+
+ // insert
+ template <class A, class T, size_t I>
+ inline batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept
+ {
+ struct index_mask
+ {
+ static constexpr bool get(size_t index, size_t /* size*/)
+ {
+ return index != I;
+ }
+ };
+ batch<T, A> tmp(val);
+ return select(make_batch_bool_constant<batch<T, A>, index_mask>(), self, tmp);
+ }
+
+ // get
+ template <class A, size_t I, class T>
+ inline T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<generic>) noexcept
+ {
+ alignas(A::alignment()) T buffer[batch<T, A>::size];
+ self.store_aligned(&buffer[0]);
+ return buffer[I];
+ }
+
+ template <class A, size_t I, class T>
+ inline T get(batch_bool<T, A> const& self, ::xsimd::index<I>, requires_arch<generic>) noexcept
+ {
+ alignas(A::alignment()) T buffer[batch_bool<T, A>::size];
+ self.store_aligned(&buffer[0]);
+ return buffer[I];
+ }
+
+ template <class A, size_t I, class T>
+ inline auto get(batch<std::complex<T>, A> const& self, ::xsimd::index<I>, requires_arch<generic>) noexcept -> typename batch<std::complex<T>, A>::value_type
+ {
+ alignas(A::alignment()) T buffer[batch<std::complex<T>, A>::size];
+ self.store_aligned(&buffer[0]);
+ return buffer[I];
+ }
+
+ template <class A, class T>
+ inline T get(batch<T, A> const& self, std::size_t i, requires_arch<generic>) noexcept
+ {
+ alignas(A::alignment()) T buffer[batch<T, A>::size];
+ self.store_aligned(&buffer[0]);
+ return buffer[i];
+ }
+
+ template <class A, class T>
+ inline T get(batch_bool<T, A> const& self, std::size_t i, requires_arch<generic>) noexcept
+ {
+ alignas(A::alignment()) bool buffer[batch_bool<T, A>::size];
+ self.store_aligned(&buffer[0]);
+ return buffer[i];
+ }
+
+ template <class A, class T>
+ inline auto get(batch<std::complex<T>, A> const& self, std::size_t i, requires_arch<generic>) noexcept -> typename batch<std::complex<T>, A>::value_type
+ {
+ using T2 = typename batch<std::complex<T>, A>::value_type;
+ alignas(A::alignment()) T2 buffer[batch<std::complex<T>, A>::size];
+ self.store_aligned(&buffer[0]);
+ return buffer[i];
+ }
+
+ // load_aligned
+ namespace detail
+ {
+ template <class A, class T_in, class T_out>
+ inline batch<T_out, A> load_aligned(T_in const* mem, convert<T_out>, requires_arch<generic>, with_fast_conversion) noexcept
+ {
+ using batch_type_in = batch<T_in, A>;
+ using batch_type_out = batch<T_out, A>;
+ return fast_cast(batch_type_in::load_aligned(mem), batch_type_out(), A {});
+ }
+ template <class A, class T_in, class T_out>
+ inline batch<T_out, A> load_aligned(T_in const* mem, convert<T_out>, requires_arch<generic>, with_slow_conversion) noexcept
+ {
+ static_assert(!std::is_same<T_in, T_out>::value, "there should be a direct load for this type combination");
+ using batch_type_out = batch<T_out, A>;
+ alignas(A::alignment()) T_out buffer[batch_type_out::size];
+ std::copy(mem, mem + batch_type_out::size, std::begin(buffer));
+ return batch_type_out::load_aligned(buffer);
+ }
+ }
+ template <class A, class T_in, class T_out>
+ inline batch<T_out, A> load_aligned(T_in const* mem, convert<T_out> cvt, requires_arch<generic>) noexcept
+ {
+ return detail::load_aligned<A>(mem, cvt, A {}, detail::conversion_type<A, T_in, T_out> {});
+ }
+
+ // load_unaligned
+ namespace detail
+ {
+ template <class A, class T_in, class T_out>
+ inline batch<T_out, A> load_unaligned(T_in const* mem, convert<T_out>, requires_arch<generic>, with_fast_conversion) noexcept
+ {
+ using batch_type_in = batch<T_in, A>;
+ using batch_type_out = batch<T_out, A>;
+ return fast_cast(batch_type_in::load_unaligned(mem), batch_type_out(), A {});
+ }
+
+ template <class A, class T_in, class T_out>
+ inline batch<T_out, A> load_unaligned(T_in const* mem, convert<T_out> cvt, requires_arch<generic>, with_slow_conversion) noexcept
+ {
+ static_assert(!std::is_same<T_in, T_out>::value, "there should be a direct load for this type combination");
+ return load_aligned<A>(mem, cvt, generic {}, with_slow_conversion {});
+ }
+ }
+ template <class A, class T_in, class T_out>
+ inline batch<T_out, A> load_unaligned(T_in const* mem, convert<T_out> cvt, requires_arch<generic>) noexcept
+ {
+ return detail::load_unaligned<A>(mem, cvt, generic {}, detail::conversion_type<A, T_in, T_out> {});
+ }
+
+ namespace detail
+ {
+ // Scatter with runtime indexes.
+ template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N == 0, int>::type = 0>
+ inline void scatter(batch<T, A> const& src, U* dst,
+ batch<V, A> const& index,
+ ::xsimd::index<N> I) noexcept
+ {
+ dst[index.get(I)] = static_cast<U>(src.get(I));
+ }
+
+ template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N != 0, int>::type = 0>
+ inline void
+ scatter(batch<T, A> const& src, U* dst, batch<V, A> const& index,
+ ::xsimd::index<N> I) noexcept
+ {
+ static_assert(N <= batch<V, A>::size, "Incorrect value in recursion!");
+
+ kernel::detail::scatter<N - 1, T, A, U, V>(
+ src, dst, index, {});
+ dst[index.get(I)] = static_cast<U>(src.get(I));
+ }
+ } // namespace detail
+
+ template <typename A, typename T, typename V>
+ inline void
+ scatter(batch<T, A> const& src, T* dst,
+ batch<V, A> const& index,
+ kernel::requires_arch<generic>) noexcept
+ {
+ static_assert(batch<T, A>::size == batch<V, A>::size,
+ "Source and index sizes must match");
+ kernel::detail::scatter<batch<V, A>::size - 1, T, A, T, V>(
+ src, dst, index, {});
+ }
+
+ template <typename A, typename T, typename U, typename V>
+ inline detail::sizes_mismatch_t<T, U, void>
+ scatter(batch<T, A> const& src, U* dst,
+ batch<V, A> const& index,
+ kernel::requires_arch<generic>) noexcept
+ {
+ static_assert(batch<T, A>::size == batch<V, A>::size,
+ "Source and index sizes must match");
+ kernel::detail::scatter<batch<V, A>::size - 1, T, A, U, V>(
+ src, dst, index, {});
+ }
+
+ template <typename A, typename T, typename U, typename V>
+ inline detail::stride_match_t<T, U, void>
+ scatter(batch<T, A> const& src, U* dst,
+ batch<V, A> const& index,
+ kernel::requires_arch<generic>) noexcept
+ {
+ static_assert(batch<T, A>::size == batch<V, A>::size,
+ "Source and index sizes must match");
+ const auto tmp = batch_cast<U>(src);
+ kernel::scatter<A>(tmp, dst, index, A {});
+ }
+
+ // store
+ template <class T, class A>
+ inline void store(batch_bool<T, A> const& self, bool* mem, requires_arch<generic>) noexcept
+ {
+ using batch_type = batch<T, A>;
+ constexpr auto size = batch_bool<T, A>::size;
+ alignas(A::alignment()) T buffer[size];
+ kernel::store_aligned<A>(&buffer[0], batch_type(self), A {});
+ for (std::size_t i = 0; i < size; ++i)
+ mem[i] = bool(buffer[i]);
+ }
+
+ // store_aligned
+ template <class A, class T_in, class T_out>
+ inline void store_aligned(T_out* mem, batch<T_in, A> const& self, requires_arch<generic>) noexcept
+ {
+ static_assert(!std::is_same<T_in, T_out>::value, "there should be a direct store for this type combination");
+ alignas(A::alignment()) T_in buffer[batch<T_in, A>::size];
+ store_aligned(&buffer[0], self);
+ std::copy(std::begin(buffer), std::end(buffer), mem);
+ }
+
+ // store_unaligned
+ template <class A, class T_in, class T_out>
+ inline void store_unaligned(T_out* mem, batch<T_in, A> const& self, requires_arch<generic>) noexcept
+ {
+ static_assert(!std::is_same<T_in, T_out>::value, "there should be a direct store for this type combination");
+ return store_aligned<A>(mem, self, generic {});
+ }
+
+ // swizzle
+ template <class A, class T, class ITy, ITy... Vs>
+ inline batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& self, batch_constant<batch<ITy, A>, Vs...> mask, requires_arch<generic>) noexcept
+ {
+ return { swizzle(self.real(), mask), swizzle(self.imag(), mask) };
+ }
+
+ namespace detail
+ {
+ template <class A, class T>
+ inline batch<std::complex<T>, A> load_complex(batch<T, A> const& /*hi*/, batch<T, A> const& /*lo*/, requires_arch<generic>) noexcept
+ {
+ static_assert(std::is_same<T, void>::value, "load_complex not implemented for the required architecture");
+ }
+
+ template <class A, class T>
+ inline batch<T, A> complex_high(batch<std::complex<T>, A> const& /*src*/, requires_arch<generic>) noexcept
+ {
+ static_assert(std::is_same<T, void>::value, "complex_high not implemented for the required architecture");
+ }
+
+ template <class A, class T>
+ inline batch<T, A> complex_low(batch<std::complex<T>, A> const& /*src*/, requires_arch<generic>) noexcept
+ {
+ static_assert(std::is_same<T, void>::value, "complex_low not implemented for the required architecture");
+ }
+ }
+
+ // load_complex_aligned
+ template <class A, class T_out, class T_in>
+ inline batch<std::complex<T_out>, A> load_complex_aligned(std::complex<T_in> const* mem, convert<std::complex<T_out>>, requires_arch<generic>) noexcept
+ {
+ using real_batch = batch<T_out, A>;
+ T_in const* buffer = reinterpret_cast<T_in const*>(mem);
+ real_batch hi = real_batch::load_aligned(buffer),
+ lo = real_batch::load_aligned(buffer + real_batch::size);
+ return detail::load_complex(hi, lo, A {});
+ }
+
+ // load_complex_unaligned
+ template <class A, class T_out, class T_in>
+ inline batch<std::complex<T_out>, A> load_complex_unaligned(std::complex<T_in> const* mem, convert<std::complex<T_out>>, requires_arch<generic>) noexcept
+ {
+ using real_batch = batch<T_out, A>;
+ T_in const* buffer = reinterpret_cast<T_in const*>(mem);
+ real_batch hi = real_batch::load_unaligned(buffer),
+ lo = real_batch::load_unaligned(buffer + real_batch::size);
+ return detail::load_complex(hi, lo, A {});
+ }
+
+ // store_complex_aligned
+ template <class A, class T_out, class T_in>
+ inline void store_complex_aligned(std::complex<T_out>* dst, batch<std::complex<T_in>, A> const& src, requires_arch<generic>) noexcept
+ {
+ using real_batch = batch<T_in, A>;
+ real_batch hi = detail::complex_high(src, A {});
+ real_batch lo = detail::complex_low(src, A {});
+ T_out* buffer = reinterpret_cast<T_out*>(dst);
+ lo.store_aligned(buffer);
+ hi.store_aligned(buffer + real_batch::size);
+ }
+
+ // store_compelx_unaligned
+ template <class A, class T_out, class T_in>
+ inline void store_complex_unaligned(std::complex<T_out>* dst, batch<std::complex<T_in>, A> const& src, requires_arch<generic>) noexcept
+ {
+ using real_batch = batch<T_in, A>;
+ real_batch hi = detail::complex_high(src, A {});
+ real_batch lo = detail::complex_low(src, A {});
+ T_out* buffer = reinterpret_cast<T_out*>(dst);
+ lo.store_unaligned(buffer);
+ hi.store_unaligned(buffer + real_batch::size);
+ }
+
+ }
+
+}
+
+#endif