/*************************************************************************** * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * * Martin Renou * * Copyright (c) QuantStack * * Copyright (c) Serge Guelton * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ #ifndef XSIMD_SSE3_HPP #define XSIMD_SSE3_HPP #include "../types/xsimd_sse3_register.hpp" #include namespace xsimd { namespace kernel { using namespace types; // haddp template inline batch haddp(batch const* row, requires_arch) noexcept { return _mm_hadd_ps(_mm_hadd_ps(row[0], row[1]), _mm_hadd_ps(row[2], row[3])); } template inline batch haddp(batch const* row, requires_arch) noexcept { return _mm_hadd_pd(row[0], row[1]); } // load_unaligned template ::value, void>::type> inline batch load_unaligned(T const* mem, convert, requires_arch) noexcept { return _mm_lddqu_si128((__m128i const*)mem); } // reduce_add template inline float reduce_add(batch const& self, requires_arch) noexcept { __m128 tmp0 = _mm_hadd_ps(self, self); __m128 tmp1 = _mm_hadd_ps(tmp0, tmp0); return _mm_cvtss_f32(tmp1); } template inline double reduce_add(batch const& self, requires_arch) noexcept { __m128d tmp0 = _mm_hadd_pd(self, self); return _mm_cvtsd_f64(tmp0); } } } #endif