From 43a97878ce14b72f0981164f87f2e35e14151312 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 11:22:09 +0200 Subject: Adding upstream version 110.0.1. Signed-off-by: Daniel Baumann --- .../intgemm/benchmarks/benchmark_quantizer.cc | 74 ++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 third_party/intgemm/benchmarks/benchmark_quantizer.cc (limited to 'third_party/intgemm/benchmarks/benchmark_quantizer.cc') diff --git a/third_party/intgemm/benchmarks/benchmark_quantizer.cc b/third_party/intgemm/benchmarks/benchmark_quantizer.cc new file mode 100644 index 0000000000..5235b1ea0d --- /dev/null +++ b/third_party/intgemm/benchmarks/benchmark_quantizer.cc @@ -0,0 +1,74 @@ +#include "../intgemm/intgemm.h" +#include "../intgemm/aligned.h" +#include "../intgemm/ssse3_gemm.h" +#include "../intgemm/avx2_gemm.h" +#include "../intgemm/avx512_gemm.h" + +#include +#include +#include +#include +#include + +namespace { + +float MaxAbsoluteBaseline(const float *begin, const float *end) { + auto res = std::minmax_element(begin, end); + return std::max(std::fabs(*res.first), std::fabs(*res.second)); +} + +void BenchmarkMaxAbsolute() { + std::mt19937 gen; + std::uniform_real_distribution dist(0.f, 1.f); + gen.seed(45678); + + intgemm::AlignedVector v(4096 * 4096); + for (auto& it : v) { + it = dist(gen); + } + + // Hopefully these don't get optimized out... + MaxAbsoluteBaseline(v.begin(), v.end()); + auto start = std::chrono::steady_clock::now(); + MaxAbsoluteBaseline(v.begin(), v.end()); + double baseline = std::chrono::duration(std::chrono::steady_clock::now() - start).count(); + intgemm::MaxAbsolute(v.begin(), v.end()); + start = std::chrono::steady_clock::now(); + intgemm::MaxAbsolute(v.begin(), v.end()); + double optimized = std::chrono::duration(std::chrono::steady_clock::now() - start).count(); + std::cout << "MaxAbsolute baseline = " << baseline << " optimized = " << optimized << " speedup = " << (optimized / baseline) << '\n'; +} + +template void QuantizerBench(const float *in, int8_t *out, intgemm::Index count) { + if (intgemm::kCPU < Backend::kUses) return; + Backend::Quantize(in, out, 1.0, count); + const std::size_t kTries = 60; + auto start = std::chrono::steady_clock::now(); + for (std::size_t t = 0; t < kTries; ++t) { + Backend::Quantize(in, out, 1.0, count); + } + auto end = std::chrono::steady_clock::now(); + double took = std::chrono::duration(end - start).count() / kTries; + std::cout << std::setw(9) << count << ' ' << std::fixed << std::setw(9) << std::setprecision(7) << took << ' ' << Backend::kName << std::endl; +} +} // namespace + +int main() { + BenchmarkMaxAbsolute(); + for (std::size_t count = 1; count < (1ULL<<30); count *= 2) { + intgemm::AlignedVector in(count); + intgemm::AlignedVector out(count); + std::mt19937 gen; + std::uniform_real_distribution dist(-129.0, 129.0); + for (float &element : in) { + element = dist(gen); + } + QuantizerBench(in.begin(), out.begin(), static_cast(count)); +#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 + QuantizerBench(in.begin(), out.begin(), static_cast(count)); +#endif +#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW + QuantizerBench(in.begin(), out.begin(), static_cast(count)); +#endif + } +} -- cgit v1.2.3