#include "../test.h" #include "../../intgemm/aligned.h" #include "../../intgemm/kernels.h" #include #include namespace intgemm { template void kernel_downcast32to8_test() { if (kCPU < CPUType_) return; using vi = vector_t; constexpr int LENGTH = sizeof(vi) / sizeof(int8_t); AlignedVector input(LENGTH); AlignedVector output(LENGTH); std::iota(input.begin(), input.end(), static_cast(-LENGTH / 2)); *output.template as() = kernels::downcast32to8( input.template as()[0], input.template as()[1], input.template as()[2], input.template as()[3]); for (std::size_t i = 0; i < output.size(); ++i) CHECK(output[i] == int8_t(input[i])); } template INTGEMM_SSE2 void kernel_downcast32to8_test(); KERNEL_TEST_CASE("downcast32to8 SSE2") { return kernel_downcast32to8_test(); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 template INTGEMM_AVX2 void kernel_downcast32to8_test(); KERNEL_TEST_CASE("downcast32to8 AVX2") { return kernel_downcast32to8_test(); } #endif #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW template INTGEMM_AVX512BW void kernel_downcast32to8_test(); KERNEL_TEST_CASE("downcast32to8 AVX512BW") { return kernel_downcast32to8_test(); } #endif template void kernel_downcast32to16_test() { if (kCPU < CPUType_) return; using vi = vector_t; constexpr int LENGTH = sizeof(vi) / sizeof(int16_t); AlignedVector input(LENGTH); AlignedVector output(LENGTH); std::iota(input.begin(), input.end(), static_cast(-LENGTH / 2)); *output.template as() = kernels::downcast32to16( input.template as()[0], input.template as()[1]); for (std::size_t i = 0; i < output.size(); ++i) CHECK(output[i] == int16_t(input[i])); } template INTGEMM_SSE2 void kernel_downcast32to16_test(); KERNEL_TEST_CASE("downcast32to16 SSE2") { return kernel_downcast32to16_test(); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 template INTGEMM_AVX2 void kernel_downcast32to16_test(); KERNEL_TEST_CASE("downcast32to16 AVX2") { return kernel_downcast32to16_test(); } #endif #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW template INTGEMM_AVX512BW void kernel_downcast32to16_test(); KERNEL_TEST_CASE("downcast32to16 AVX512BW") { return kernel_downcast32to16_test(); } #endif template void kernel_downcast16to8_test() { if (kCPU < CPUType_) return; using vi = vector_t; constexpr int LENGTH = sizeof(vi) / sizeof(int8_t); AlignedVector input(LENGTH); AlignedVector output(LENGTH); std::iota(input.begin(), input.end(), static_cast(-LENGTH / 2)); *output.template as() = kernels::downcast16to8( input.template as()[0], input.template as()[1]); for (std::size_t i = 0; i < output.size(); ++i) CHECK(output[i] == int8_t(input[i])); } template INTGEMM_SSE2 void kernel_downcast16to8_test(); KERNEL_TEST_CASE("downcast16to8 SSE2") { return kernel_downcast16to8_test(); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 template INTGEMM_AVX2 void kernel_downcast16to8_test(); KERNEL_TEST_CASE("downcast16to8 AVX2") { return kernel_downcast16to8_test(); } #endif #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW template INTGEMM_AVX512BW void kernel_downcast16to8_test(); KERNEL_TEST_CASE("downcast16to8 AVX512BW") { return kernel_downcast16to8_test(); } #endif }