#include "test.h" #include "../intgemm/aligned.h" #include "../intgemm/avx2_gemm.h" #include "../intgemm/avx512_gemm.h" #include "../intgemm/sse2_gemm.h" #include "../intgemm/ssse3_gemm.h" #include "../intgemm/stats.h" #include #include #include namespace intgemm { namespace { void QuantizeRef(const float *input, int16_t *output, float quant_mult, std::size_t size) { for (std::size_t i = 0; i < size; ++i) { float value = roundf(input[i] * quant_mult); value = std::max(-32768.0f, value); value = std::min(32767.0f, value); // float should be exact in this range. output[i] = static_cast(value); } } void QuantizeRef(const float *input, int8_t *output, float quant_mult, std::size_t size) { for (std::size_t i = 0; i < size; ++i) { float value = roundf(input[i] * quant_mult); value = std::max(-127.0f, value); value = std::min(127.0f, value); output[i] = static_cast(value); } } MeanStd VectorMeanStd(AlignedVector& vals, int num_items, bool absolute) { float normal_sums = 0; float squares_sum = 0; if (absolute) { std::for_each(vals.begin(), vals.end(), [&] (float n) {normal_sums+=std::abs(n);}); } else { std::for_each(vals.begin(), vals.end(), [&] (float n) {normal_sums+=n;}); } std::for_each(vals.begin(), vals.end(), [&] (float n) {squares_sum+=n*n;}); MeanStd ret; ret.mean = normal_sums/num_items; ret.stddev = std::sqrt((squares_sum/num_items) - (ret.mean*ret.mean)); return ret; } template void testVectorMeanStd(int num_items, bool absolute=false) { std::mt19937 gen; std::uniform_real_distribution dist(-1.0f, 1.0f); AlignedVector inputVec(num_items); for (auto&& it : inputVec) { it = dist(gen); } MeanStd reference = VectorMeanStd(inputVec, num_items, absolute); MeanStd fast = Backend(inputVec.begin(), inputVec.end(), absolute); float meanDifference = std::fabs(reference.mean - fast.mean); float stdDifference = std::fabs(reference.stddev - fast.stddev); float eps = 0.00002f; //Accumulating horizontal sums can lead to errors. CHECK_MESSAGE(meanDifference <= eps, "Items: " << num_items << " Absolute: " << absolute << " Reference mean: " << reference.mean << " actual: " << fast.mean); CHECK_MESSAGE(stdDifference <= eps, "Items: " << num_items << " Absolute: " << absolute << " Reference mean: " << reference.stddev << " actual: " << fast.stddev); } template bool IsOff(float from, I ref, I test) { if (ref == test) return false; if (ref - test > 1 && test - ref > 1) return true; float off_test = std::fabs(static_cast(test) - from); float off_ref = std::fabs(static_cast(ref) - from); // Allow 0.5 to round either way. if (off_test > 0.49 && off_test < 0.51 && off_ref > 0.49 && off_ref < 0.51) return false; return true; } template bool Test(const float *input_unaligned, float quant_mult, std::size_t size) { using Integer = typename Backend::Integer; bool success = true; AlignedVector input(size); std::memcpy(input.begin(), input_unaligned, sizeof(float) * size); AlignedVector ref(size); AlignedVector test(size); QuantizeRef(input.begin(), ref.begin(), quant_mult, static_cast(size)); Backend::Quantize(input.begin(), test.begin(), quant_mult, static_cast(size)); for (std::size_t i = 0; i < size; ++i) { if (IsOff(input[i] * quant_mult, ref[i], test[i])) { UNSCOPED_INFO("Error at " << i << " from " << input[i] << '*' << quant_mult << '=' << (input[i]*quant_mult) << " ref = " << static_cast(ref[i]) << " test = " << static_cast(test[i])); success = false; } } return success; } template void TestMany(std::size_t grow) { float input[33] = { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 25.f, 26.f, 27.f, 28.f, 29.f, 30.f, 31.f, 32.f}; float corners[33] = { -32769.f, -32768.f, -32767.f, -129.f, -128.f, -127.f, -1.f, 0.f, 1.f, 126.f, 127.f, 128.f, 129.f, 32766.f, 32768.f, 32769.f, -1.9f, -1.5f, -1.1f, -1.f, -0.9f, -0.5f, -0.1f, 0.0f, 0.1f, 0.5f, 0.9f, 1.0f, 1.1f, 1.5f, 1.9f, 16056.8f, 2.5f}; for (std::size_t len = 0; len <= 33; len += grow) { CHECK(Test(input, 1.0f, len)); CHECK(Test(input, 32.0f, len)); CHECK(Test(corners, 1.0f, len)); CHECK(Test(corners, -1.0f, len)); CHECK(Test(corners, -0.49f, len)); } } TEST_CASE ("Quantize SSE2", "[quantize]") { if (kCPU < CPUType::SSE2) return; TestMany(8); } TEST_CASE ("Quantize SSSE3", "[quantize]") { if (kCPU < CPUType::SSSE3) return; TestMany(1); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 TEST_CASE ("Quantize AVX2", "[quantize]") { if (kCPU < CPUType::AVX2) return; TestMany(1); TestMany(16); } #endif #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW TEST_CASE ("Quantize AVX512", "[quantize]") { if (kCPU < CPUType::AVX512BW) return; TestMany(1); TestMany(16); } #endif TEST_CASE("QuantizeStd SSSE3", "[VectorMeanStd]") { if (kCPU < CPUType::SSSE3) return; testVectorMeanStd(64); testVectorMeanStd(64, true); testVectorMeanStd(256); testVectorMeanStd(256, true); testVectorMeanStd(2048); testVectorMeanStd(2048, true); testVectorMeanStd(65536); testVectorMeanStd(65536, true); testVectorMeanStd(81920); testVectorMeanStd(81920, true); testVectorMeanStd(120832); testVectorMeanStd(120832, true); } #ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 TEST_CASE("QuantizeStd AVX2", "[VectorMeanStd]") { if (kCPU < CPUType::AVX2) return; testVectorMeanStd(64); testVectorMeanStd(64, true); testVectorMeanStd(256); testVectorMeanStd(256, true); testVectorMeanStd(2048); testVectorMeanStd(2048, true); testVectorMeanStd(65536); testVectorMeanStd(65536, true); testVectorMeanStd(81920); testVectorMeanStd(81920, true); testVectorMeanStd(120832); testVectorMeanStd(120832, true); } #endif #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW TEST_CASE("QuantizeStd AVX512BW", "[VectorMeanStd]") { if (kCPU < CPUType::AVX512BW) return; testVectorMeanStd(64); testVectorMeanStd(64, true); testVectorMeanStd(256); testVectorMeanStd(256, true); testVectorMeanStd(2048); testVectorMeanStd(2048, true); testVectorMeanStd(65536); testVectorMeanStd(65536, true); testVectorMeanStd(81920); testVectorMeanStd(81920, true); testVectorMeanStd(120832); testVectorMeanStd(120832, true); } #endif } // namespace } // namespace intgemm