summaryrefslogtreecommitdiffstats
path: root/third_party/intgemm/example.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
commit43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/intgemm/example.cc
parentInitial commit. (diff)
downloadfirefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz
firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/intgemm/example.cc')
-rw-r--r--third_party/intgemm/example.cc79
1 files changed, 79 insertions, 0 deletions
diff --git a/third_party/intgemm/example.cc b/third_party/intgemm/example.cc
new file mode 100644
index 0000000000..5f558d0fcd
--- /dev/null
+++ b/third_party/intgemm/example.cc
@@ -0,0 +1,79 @@
+#include "intgemm/intgemm.h"
+// This is just for AlignedVector, which helps managed 64-byte aligned memory.
+// Feel free to manage memory yourself.
+#include "intgemm/aligned.h"
+#include "intgemm/callbacks.h"
+
+#include <cassert>
+#include <cmath>
+#include <random>
+
+int main() {
+ using intgemm::Index;
+ const Index A_rows = 1;
+ // The shared dimension: A's columns and B's rows.
+ const Index width = 64;
+ const Index B_cols = 8;
+
+ // This is a simple vector class that allocates memory aligned to 64 bytes.
+ // You don't have to use it; just use aligned_alloc and friends directly.
+ using intgemm::AlignedVector;
+ AlignedVector<float> A(A_rows * width);
+ AlignedVector<float> B(width * B_cols);
+
+ // Fill with random values in range [-2, 2].
+ std::mt19937 gen;
+ std::uniform_real_distribution<float> dist(-2.f, 2.f);
+ gen.seed(1);
+ for (auto& it : A) {
+ it = dist(gen);
+ }
+ for (auto& it : B) {
+ it = dist(gen);
+ }
+
+ // Compute the top left corner of C as a sanity check.
+ float top_left_reference = 0.0f;
+ for (Index w = 0; w < width; ++w) {
+ top_left_reference += A[w] * B[w * B_cols];
+ }
+
+ // 16-bit multiplication.
+ {
+ // For 16-bit, Jacob Devlin recommends 1024 so as to not overflow in 32-bit accumulation.
+ float quant_mult = 1024.0f;
+ AlignedVector<int16_t> A_prepared(A.size());
+ AlignedVector<int16_t> B_prepared(B.size());
+ // Quantize A.
+ intgemm::Int16::PrepareA(A.begin(), A_prepared.begin(), quant_mult, A_rows, width);
+ // Quantize and reshape B.
+ // Typically you will do this once when parameters are loaded, not every time.
+ intgemm::Int16::PrepareB(B.begin(), B_prepared.begin(), quant_mult, width, B_cols);
+
+ AlignedVector<float> C(A_rows * B_cols);
+ // Do the actual multiply.
+ intgemm::Int16::Multiply(A_prepared.begin(), B_prepared.begin(), A_rows, width, B_cols, intgemm::callbacks::UnquantizeAndWrite(1.0f / (quant_mult * quant_mult), C.begin()));
+ // Sanity check. C will be row major.
+ assert(std::fabs(C[0] - top_left_reference) < 0.05f);
+ }
+
+ // 8-bit multiplication.
+ {
+ // For 8-bit a good quantization multiplier is 127 / largest absolute value..
+ float quant_mult = 127.0f / 2.0f;
+ AlignedVector<int8_t> A_prepared(A.size());
+ AlignedVector<int8_t> B_prepared(B.size());
+ // Quantize A.
+ intgemm::Int8::PrepareA(A.begin(), A_prepared.begin(), quant_mult, A_rows, width);
+ // Quantize and reshape B.
+ // Typically you will do this once when parameters are loaded, not every time.
+ intgemm::Int8::PrepareB(B.begin(), B_prepared.begin(), quant_mult, width, B_cols);
+
+ AlignedVector<float> C(A_rows * B_cols);
+ // Do the actual multiply.
+ intgemm::Int8::Multiply(A_prepared.begin(), B_prepared.begin(), A_rows, width, B_cols, intgemm::callbacks::UnquantizeAndWrite(1.0f / (quant_mult * quant_mult), C.begin()));
+ // Sanity check. C will be row major.
+ assert(std::fabs(C[0] - top_left_reference) < 0.05f);
+ }
+ return 0;
+}