From 43a97878ce14b72f0981164f87f2e35e14151312 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 11:22:09 +0200 Subject: Adding upstream version 110.0.1. Signed-off-by: Daniel Baumann --- js/src/intgemm/IntegerGemmIntrinsic.cpp | 409 ++++++++++++++++++++++++++++++ js/src/intgemm/IntegerGemmIntrinsic.h | 358 ++++++++++++++++++++++++++ js/src/intgemm/README_MOZILLA | 18 ++ js/src/intgemm/enable_intel_extensions.py | 24 ++ js/src/intgemm/moz.build | 33 +++ js/src/intgemm/moz.yaml | 47 ++++ 6 files changed, 889 insertions(+) create mode 100644 js/src/intgemm/IntegerGemmIntrinsic.cpp create mode 100644 js/src/intgemm/IntegerGemmIntrinsic.h create mode 100644 js/src/intgemm/README_MOZILLA create mode 100644 js/src/intgemm/enable_intel_extensions.py create mode 100644 js/src/intgemm/moz.build create mode 100644 js/src/intgemm/moz.yaml (limited to 'js/src/intgemm') diff --git a/js/src/intgemm/IntegerGemmIntrinsic.cpp b/js/src/intgemm/IntegerGemmIntrinsic.cpp new file mode 100644 index 0000000000..adc074dbce --- /dev/null +++ b/js/src/intgemm/IntegerGemmIntrinsic.cpp @@ -0,0 +1,409 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +#include "intgemm/IntegerGemmIntrinsic.h" + +#include "mozilla/CheckedInt.h" +#include "mozilla/IntegerPrintfMacros.h" + +#include + +#include "js/ErrorReport.h" +#include "js/HeapAPI.h" +#include "vm/ArrayBufferObject.h" +#include "wasm/WasmBuiltins.h" +#include "wasm/WasmInstance.h" +#include "wasm/WasmLog.h" + +struct JSContext; + +static constexpr uint32_t ARRAY_ALIGNMENT = 64; +static constexpr uint32_t ROWS_A_MULTIPLIER = 1; +static constexpr uint32_t COLUMNS_A_MULTIPLIER = 64; +static constexpr uint32_t ROWS_B_MULTIPLIER = COLUMNS_A_MULTIPLIER; +static constexpr uint32_t COLUMNS_B_MULTIPLIER = 8; +static constexpr uint32_t SELECTED_COLUMNS_B_MULTIPLIER = 8; + +void ReportGemmError(JSContext* cx, const unsigned errorNumber) { + JS_ReportErrorNumberASCII(cx, js::GetErrorMessage, nullptr, errorNumber); +} + +size_t GetWasmRawBufferLength(const uint8_t* memBase) { + const js::WasmArrayRawBuffer* rawBuf = + js::WasmArrayRawBuffer::fromDataPtr(memBase); + return rawBuf->byteLength(); +} + +bool CheckMatrixDimension(JSContext* cx, uint32_t size, + uint32_t sizeMultiplier) { + // A valid size is a positive integral multiple of Multiplier + if ((size == 0) || (size % sizeMultiplier != 0)) { + js::wasm::Log( + cx, "Invalid dimension value:%" PRIu32 " (should be a multiple of %u)", + size, sizeMultiplier); + return false; + } + return true; +} + +bool CheckMatrixBound(JSContext* cx, uint32_t input, uint64_t inputSize, + size_t wasmBufferSize) { + mozilla::CheckedUint64 inputUpperLimit(inputSize); + inputUpperLimit += input; + + // Bound check fails if size overflows or it spans outside the wasm memory + if (!inputUpperLimit.isValid() || + (inputUpperLimit.value() >= (uint64_t)wasmBufferSize)) { + js::wasm::Log(cx, "Memory out of wasm bounds for matrix:%" PRIu32, input); + return false; + } + return true; +} + +bool CheckMatrixBoundAndAlignment(JSContext* cx, uint32_t input, + uint64_t inputSize, size_t wasmBufferSize) { + // Alignment check: It is sufficient to check alignment for the offset rather + // than for the actual pointer within wasm memory (as long as following assert + // is satisfied) + static_assert(js::gc::PageSize >= ARRAY_ALIGNMENT, + "PageSize should be bigger than Alignment"); + if (input % ARRAY_ALIGNMENT != 0) { + js::wasm::Log( + cx, "Unaligned access for matrix:%" PRIu32 " (should be %u aligned)", + input, ARRAY_ALIGNMENT); + return false; + } + + // Check Bound + return CheckMatrixBound(cx, input, inputSize, wasmBufferSize); +} + +int32_t js::intgemm::IntrI8PrepareB(wasm::Instance* instance, + uint32_t inputMatrixB, float scale, + float zeroPoint, uint32_t rowsB, + uint32_t colsB, uint32_t outputMatrixB, + uint8_t* memBase) { + MOZ_ASSERT(wasm::SASigIntrI8PrepareB.failureMode == + wasm::FailureMode::FailOnNegI32); + JSContext* cx = instance->cx(); + + // Size checks for matricies + if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) || + !CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) { + wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB, + colsB); + ReportGemmError(cx, JSMSG_WASM_UNREACHABLE); + return -1; + } + + // Memory Bound and Alignment checks for matricies + uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB; + size_t wasmBufferSize = GetWasmRawBufferLength(memBase); + if (!CheckMatrixBoundAndAlignment(cx, inputMatrixB, sizeB, wasmBufferSize) || + !CheckMatrixBoundAndAlignment(cx, outputMatrixB, sizeB, wasmBufferSize)) { + wasm::Log(cx, + "%s: inputB:%x rowsB:%" PRIu32 " colsB:%" PRIu32 + " outputB:%x sizeB:%" PRIu64 " wasmBufferSize:%zu", + __FUNCTION__, inputMatrixB, rowsB, colsB, outputMatrixB, sizeB, + wasmBufferSize); + ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS); + return -1; + } + + // Actual call to the 3rd party library (intgemm) for PrepareB + uint8_t* inputMatrixBPtr = &memBase[inputMatrixB]; + uint8_t* outputMatrixBPtr = &memBase[outputMatrixB]; + ::intgemm::Int8::PrepareB((const float*)inputMatrixBPtr, + (int8_t*)outputMatrixBPtr, + (float)scale, // Quant Mult + rowsB, colsB); + return 0; +} + +int32_t js::intgemm::IntrI8PrepareBFromTransposed( + wasm::Instance* instance, uint32_t inputMatrixBTransposed, float scale, + float zeroPoint, uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, + uint8_t* memBase) { + MOZ_ASSERT(wasm::SASigIntrI8PrepareBFromTransposed.failureMode == + wasm::FailureMode::FailOnNegI32); + JSContext* cx = instance->cx(); + + // Size checks for matricies + if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) || + !CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) { + wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB, + colsB); + ReportGemmError(cx, JSMSG_WASM_UNREACHABLE); + return -1; + } + + // Memory Bound checks for all matricies + uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB; + size_t wasmBufferSize = GetWasmRawBufferLength(memBase); + if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBTransposed, sizeB, + wasmBufferSize) || + !CheckMatrixBoundAndAlignment(cx, outputMatrixB, sizeB, wasmBufferSize)) { + wasm::Log(cx, + "%s: inputBT:%x rowsB:%" PRIu32 " colsB:%" PRIu32 + " outputB:%x sizeB:%" PRIu64 " wasmBufferSize:%zu", + __FUNCTION__, inputMatrixBTransposed, rowsB, colsB, outputMatrixB, + sizeB, wasmBufferSize); + ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS); + return -1; + } + + // Actual call to the 3rd party library (intgemm) for PrepareBTransposed + uint8_t* inputMatrixBTransposedPtr = &memBase[inputMatrixBTransposed]; + uint8_t* outputMatrixBPtr = &memBase[outputMatrixB]; + ::intgemm::Int8::PrepareBTransposed((const float*)inputMatrixBTransposedPtr, + (int8_t*)outputMatrixBPtr, + (float)scale, // Quant Mult + rowsB, colsB); + return 0; +} + +int32_t js::intgemm::IntrI8PrepareBFromQuantizedTransposed( + wasm::Instance* instance, uint32_t inputMatrixBQuantizedTransposed, + uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase) { + MOZ_ASSERT(wasm::SASigIntrI8PrepareBFromQuantizedTransposed.failureMode == + wasm::FailureMode::FailOnNegI32); + JSContext* cx = instance->cx(); + + // Size checks for matricies + if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) || + !CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) { + wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB, + colsB); + ReportGemmError(cx, JSMSG_WASM_UNREACHABLE); + return -1; + } + + // Memory Bound checks for all matricies + uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB; + size_t wasmBufferSize = GetWasmRawBufferLength(memBase); + if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBQuantizedTransposed, sizeB, + wasmBufferSize) || + !CheckMatrixBoundAndAlignment(cx, outputMatrixB, sizeB, wasmBufferSize)) { + wasm::Log(cx, + "%s: inputBQT:%x rowsB:%" PRIu32 " colsB:%" PRIu32 + " outputB:%x sizeA:%" PRIu64 " wasmBufferSize:%zu", + __FUNCTION__, inputMatrixBQuantizedTransposed, rowsB, colsB, + outputMatrixB, sizeB, wasmBufferSize); + ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS); + return -1; + } + + // Actual call to the 3rd party library (intgemm) + uint8_t* inputMatrixBQuantizedTransposedPtr = + &memBase[inputMatrixBQuantizedTransposed]; + uint8_t* outputMatrixBPtr = &memBase[outputMatrixB]; + ::intgemm::Int8::PrepareBQuantizedTransposed( + (const int8_t*)inputMatrixBQuantizedTransposedPtr, + (int8_t*)outputMatrixBPtr, rowsB, colsB); + return 0; +} + +int32_t js::intgemm::IntrI8PrepareA(wasm::Instance* instance, + uint32_t inputMatrixA, float scale, + float zeroPoint, uint32_t rowsA, + uint32_t colsA, uint32_t outputMatrixA, + uint8_t* memBase) { + MOZ_ASSERT(wasm::SASigIntrI8PrepareA.failureMode == + wasm::FailureMode::FailOnNegI32); + JSContext* cx = instance->cx(); + + // Size checks for matricies + if (!CheckMatrixDimension(cx, rowsA, ROWS_A_MULTIPLIER) || + !CheckMatrixDimension(cx, colsA, COLUMNS_A_MULTIPLIER)) { + wasm::Log(cx, "%s: rowsA:%" PRIu32 " colsA:%" PRIu32, __FUNCTION__, rowsA, + colsA); + ReportGemmError(cx, JSMSG_WASM_UNREACHABLE); + return -1; + } + + // Memory Bound checks for all matricies + uint64_t sizeA = (uint64_t)rowsA * (uint64_t)colsA; + size_t wasmBufferSize = GetWasmRawBufferLength(memBase); + if (!CheckMatrixBoundAndAlignment(cx, inputMatrixA, sizeA, wasmBufferSize) || + !CheckMatrixBoundAndAlignment(cx, outputMatrixA, sizeA, wasmBufferSize)) { + wasm::Log(cx, + "%s: inputA:%x rowsA:%" PRIu32 " colsA:%" PRIu32 + " outputA:%x sizeA:%" PRIu64 " wasmBufferSize:%zu", + __FUNCTION__, inputMatrixA, rowsA, colsA, outputMatrixA, sizeA, + wasmBufferSize); + ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS); + return -1; + } + + // Actual call to the 3rd party library (intgemm) + uint8_t* inputMatrixAPtr = &memBase[inputMatrixA]; + uint8_t* outputMatrixAPtr = &memBase[outputMatrixA]; + ::intgemm::Int8Shift::PrepareA((const float*)inputMatrixAPtr, + (int8_t*)outputMatrixAPtr, scale, rowsA, + colsA); + return 0; +} + +int32_t js::intgemm::IntrI8PrepareBias( + wasm::Instance* instance, uint32_t inputMatrixBPrepared, float scaleA, + float zeroPointA, float scaleB, float zeroPointB, uint32_t rowsB, + uint32_t colsB, uint32_t inputBias, uint32_t output, uint8_t* memBase) { + MOZ_ASSERT(wasm::SASigIntrI8PrepareBias.failureMode == + wasm::FailureMode::FailOnNegI32); + JSContext* cx = instance->cx(); + + // Size checks for matricies + if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) || + !CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) { + wasm::Log(cx, "%s: rowsB:%" PRIu32 " colsB:%" PRIu32, __FUNCTION__, rowsB, + colsB); + ReportGemmError(cx, JSMSG_WASM_UNREACHABLE); + return -1; + } + + // Memory Bound checks for all matricies + uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB; + uint64_t sizeBias = colsB; + size_t wasmBufferSize = GetWasmRawBufferLength(memBase); + if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBPrepared, sizeB, + wasmBufferSize) || + !CheckMatrixBound(cx, inputBias, sizeBias, wasmBufferSize) || + !CheckMatrixBound(cx, output, sizeBias, wasmBufferSize)) { + wasm::Log(cx, + "%s: preparedB:%x rowsB:%" PRIu32 " colsB:%" PRIu32 + " inputBias:%x outputBias:%x sizeB:%" PRIu64 + " wasmBufferSize:%zu", + __FUNCTION__, inputMatrixBPrepared, rowsB, colsB, inputBias, + output, sizeB, wasmBufferSize); + ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS); + return -1; + } + + // Actual call to the 3rd party library (intgemm) + uint8_t* inputMatrixBPreparedPtr = &memBase[inputMatrixBPrepared]; + uint8_t* inputBiasPtr = &memBase[inputBias]; + uint8_t* outputPtr = &memBase[output]; + float unquantFactor = + (-1) * ((127.0f / scaleA) * (127.0f / scaleB)) / (127.0f); + ::intgemm::Int8Shift::PrepareBias( + (const int8_t*)inputMatrixBPreparedPtr, rowsB, colsB, + ::intgemm::callbacks::UnquantizeAndAddBiasAndWrite( + unquantFactor, (const float*)inputBiasPtr, (float*)outputPtr)); + return 0; +} + +int32_t js::intgemm::IntrI8MultiplyAndAddBias( + wasm::Instance* instance, uint32_t inputMatrixAPrepared, float scaleA, + float zeroPointA, uint32_t inputMatrixBPrepared, float scaleB, + float zeroPointB, uint32_t inputBiasPrepared, float unquantMultiplier, + uint32_t rowsA, uint32_t width, uint32_t colsB, uint32_t output, + uint8_t* memBase) { + MOZ_ASSERT(wasm::SASigIntrI8MultiplyAndAddBias.failureMode == + wasm::FailureMode::FailOnNegI32); + JSContext* cx = instance->cx(); + + // Size checks for matricies + if (!CheckMatrixDimension(cx, rowsA, ROWS_A_MULTIPLIER) || + !CheckMatrixDimension(cx, width, COLUMNS_A_MULTIPLIER) || + !CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER)) { + wasm::Log(cx, "%s: rowsA:%" PRIu32 " width:%" PRIu32 " colsB:%" PRIu32, + __FUNCTION__, rowsA, width, colsB); + ReportGemmError(cx, JSMSG_WASM_UNREACHABLE); + return -1; + } + + // Memory Bound checks for all matricies + uint64_t sizeA = (uint64_t)rowsA * (uint64_t)width; + uint64_t sizeB = (uint64_t)width * (uint64_t)colsB; + uint64_t sizeBias = (uint64_t)colsB; + uint64_t sizeOutput = (uint64_t)rowsA * (uint64_t)colsB; + size_t wasmBufferSize = GetWasmRawBufferLength(memBase); + if (!CheckMatrixBoundAndAlignment(cx, inputMatrixAPrepared, sizeA, + wasmBufferSize) || + !CheckMatrixBoundAndAlignment(cx, inputMatrixBPrepared, sizeB, + wasmBufferSize) || + !CheckMatrixBound(cx, inputBiasPrepared, sizeBias, wasmBufferSize) || + !CheckMatrixBound(cx, output, sizeOutput, wasmBufferSize)) { + wasm::Log(cx, + "%s: preparedA:%x preparedB:%x preparedBias:%x rowsA:%" PRIu32 + " width:%" PRIu32 " colsB:%" PRIu32 + " output:%x sizeA:%" PRIu64 " sizeB:%" PRIu64 + " sizeBias:%" PRIu64 " sizeOutput:%" PRIu64, + __FUNCTION__, inputMatrixAPrepared, inputMatrixBPrepared, + inputBiasPrepared, rowsA, width, colsB, output, sizeA, sizeB, + sizeBias, sizeOutput); + ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS); + return -1; + } + + // Actual call to the 3rd party library (intgemm) + uint8_t* inputMatrixAPreparedPtr = &memBase[inputMatrixAPrepared]; + uint8_t* inputMatrixBPreparedPtr = &memBase[inputMatrixBPrepared]; + uint8_t* inputBiasPreparedPtr = &memBase[inputBiasPrepared]; + uint8_t* outputPtr = &memBase[output]; + float unquantFactor = unquantMultiplier / (scaleA * scaleB); + ::intgemm::Int8Shift::Multiply( + (const int8_t*)inputMatrixAPreparedPtr, + (const int8_t*)inputMatrixBPreparedPtr, rowsA, width, colsB, + ::intgemm::callbacks::UnquantizeAndAddBiasAndWrite( + unquantFactor, (const float*)inputBiasPreparedPtr, + (float*)outputPtr)); + return 0; +} + +int32_t js::intgemm::IntrI8SelectColumnsOfB(wasm::Instance* instance, + uint32_t inputMatrixBPrepared, + uint32_t rowsB, uint32_t colsB, + uint32_t colIndexList, + uint32_t sizeColIndexList, + uint32_t output, uint8_t* memBase) { + MOZ_ASSERT(wasm::SASigIntrI8SelectColumnsOfB.failureMode == + wasm::FailureMode::FailOnNegI32); + JSContext* cx = instance->cx(); + + // Size checks for matricies + if (!CheckMatrixDimension(cx, rowsB, ROWS_B_MULTIPLIER) || + !CheckMatrixDimension(cx, colsB, COLUMNS_B_MULTIPLIER) || + !CheckMatrixDimension(cx, sizeColIndexList, + SELECTED_COLUMNS_B_MULTIPLIER)) { + wasm::Log(cx, + "%s: rowsB:%" PRIu32 " colsB:%" PRIu32 + " sizeColIndexList:%" PRIu32, + __FUNCTION__, rowsB, colsB, sizeColIndexList); + ReportGemmError(cx, JSMSG_WASM_UNREACHABLE); + return -1; + } + + // Memory Bound checks for all matricies + uint64_t sizeB = (uint64_t)rowsB * (uint64_t)colsB; + uint64_t sizeOutput = (uint64_t)rowsB * (uint64_t)sizeColIndexList; + size_t wasmBufferSize = GetWasmRawBufferLength(memBase); + if (!CheckMatrixBoundAndAlignment(cx, inputMatrixBPrepared, sizeB, + wasmBufferSize) || + !CheckMatrixBound(cx, colIndexList, sizeColIndexList, wasmBufferSize) || + !CheckMatrixBound(cx, output, sizeOutput, wasmBufferSize)) { + wasm::Log(cx, + "%s: preparedB:%x rowsB:%" PRIu32 " colsB:%" PRIu32 + " colList:%x sizeColList:%" PRIu32 " output:%x sizeB:%" PRIu64 + " sizeOutput:%" PRIu64, + __FUNCTION__, inputMatrixBPrepared, rowsB, colsB, colIndexList, + sizeColIndexList, output, sizeB, sizeOutput); + ReportGemmError(cx, JSMSG_WASM_OUT_OF_BOUNDS); + return -1; + } + + // Actual call to the 3rd party library (intgemm) + uint8_t* inputMatrixBPreparedPtr = &memBase[inputMatrixBPrepared]; + uint8_t* colIndexListPtr = &memBase[colIndexList]; + uint8_t* outputPtr = &memBase[output]; + ::intgemm::Int8::SelectColumnsB( + (const int8_t*)inputMatrixBPreparedPtr, (int8_t*)outputPtr, rowsB, + (const uint32_t*)colIndexListPtr, + (const uint32_t*)colIndexListPtr + sizeColIndexList); + return 0; +} diff --git a/js/src/intgemm/IntegerGemmIntrinsic.h b/js/src/intgemm/IntegerGemmIntrinsic.h new file mode 100644 index 0000000000..2cbb70853f --- /dev/null +++ b/js/src/intgemm/IntegerGemmIntrinsic.h @@ -0,0 +1,358 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +#ifndef intgemm_IntegerGemmIntrinsic_h +#define intgemm_IntegerGemmIntrinsic_h + +#include + +namespace js { +namespace wasm { +class Instance; +} + +namespace intgemm { + +/* Interface for integer matrix multiplication followed by addition of bias. + * + * C = A * B + Bias + * + * Input matrix A: + * - A 2-D matrix that typically represents activations as floating point + * values + * - no. of rows should be a positive integer + * - no. of columns should be a positive integeral multiple of 64 + * - is represented as array (contiguous memory locations) in row-major format + * + * Input matrix B: + * - A 2-D matrix that typically represents fixed model parameters as + * floating point values + * - no. of rows should be: + * -- equal to no. of columns of Input matrix A + * -- a positive integeral multiple of 64 + * - no. of columns should be a positive integeral multiple of 8 + * - is represented as array (contiguous memory locations) in row-major format + * + * Please note that it is also possible to pass Input matrix B in 2 more forms: + * - One that is already a quantized and transposed version of Input matrix B + * - Other that is already a transposed version of Input matrix B + * + * Input Bias: + * - is an array (contiguous memory locations) that represents bias + * - size of the array should be equal to the no. of columns of Input matrix B + * + * Output matrix C: + * - is a 2-D matrix that represents the result (= A * B + Bias) + * - no. of rows = no. of rows of Input matrix A + * - no. of columns = no. of columns of Input matrix B (in + * untransposed form) + * - is represented as array (contiguous memory locations) in row-major format + * + * Please note that most of the functions in this interface might have + * architecture specific implementations. + * + * Conventions followed for the interface: + * - Unless explicitly mentioned, Input matrix B refers to an unquantized + * (i.e. float values) and non-transposed version + * - no. of rows of Input matrix A = `rowsA` + * - no. of columns of Input matrix A (`colsA`) = no. of rows of Input matrix B + * (`rowsB`) = `width` + * - no. of columns of Input matrix B = `colsB` + */ + +/* Prepare B for the Matrix Multiply function from Input matrix B. + * + * Quantization is performed on the input. + * The final prepared B is in CPU-dependent format and can be used as an input + * to matrix multiply function (`int8_multiply_and_add_bias`). + * + * Please note that this interface might have architecture specific + * implementation. + * + * @param[in] inputMatrixB An array representing the Input matrix B in + * row-major format. + * Size of the array = `rowsB` * `colsB`. + * Shape of the matrix: (`rowsB`, `colsB`) + * @param[in] scale The scaling factor (for quantization) + * @param[in] zeroPoint The zero point (for quantization) + * @param[in] rowsB No. of rows of Input matrix B. It should be + * a positive integer and a multiple of 64. + * @param[in] colsB No. of columns of Input matrix B. It should + * be a positive integer and a multiple of 8. + * @param[out] outputMatrixB An array representing the prepared B matrix. + * Size of the array = `rowsB` * `colsB`. + * + * This function implements the intrinsic: + * int8_prepare_b(inputMatrixB: i32, scale: f32, zeroPoint: f32, rowsB: i32, + * colsB: i32, outputMatrixB: i32) which implements the function: + * int8_prepare_b(const float* inputMatrixB, float scale, float zeroPoint, + * uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB) + */ +int32_t IntrI8PrepareB(wasm::Instance* instance, uint32_t inputMatrixB, + float scale, float zeroPoint, uint32_t rowsB, + uint32_t colsB, uint32_t outputMatrixB, + uint8_t* memBase); + +/* Prepare B for the Matrix Multiply function from transposed version of Input + * matrix B. + * + * Quantization is performed on floating values of input. + * The final prepared B is in CPU-dependent format and can be used as an input + * to matrix multiply function (`int8_multiply_and_add_bias`). + * + * Please note that this interface might have architecture specific + * implementation. + * + * @param[in] inputMatrixBTransposed An array representing transposed version + * of Input matrix B. + * It is in column-major format. + * Size of the array = `rowsB` * `colsB`. + * Shape of the matrix: (`colsB`, `rowsB`) + * @param[in] scale The scaling factor (for quantization) + * @param[in] zeroPoint The zero point (for quantization) + * @param[in] rowsB No. of rows of Input matrix B. It should + * be a positive integer and a multiple of + * 64. + * @param[in] colsB No. of columns of Input matrix B. It + * should be a positive integer and a + * multiple of 8. + * @param[out] outputMatrixB An array representing the prepared B + * matrix. Size of array = `rowsB`*`colsB` + * + * This function implements the intrinsic: + * int8_prepare_b_from_transposed(inputMatrixBTransposed: i32, scale: f32, + * zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements + * the function: int8_prepare_b_from_transposed(const float* + * inputMatrixBTransposed, float scale, float zeroPoint, uint32_t rowsB, + * uint32_t colsB, int8_t* outputMatrixB) + */ +int32_t IntrI8PrepareBFromTransposed(wasm::Instance* instance, + uint32_t inputMatrixBTransposed, + float scale, float zeroPoint, + uint32_t rowsB, uint32_t colsB, + uint32_t outputMatrixB, uint8_t* memBase); + +/* Prepare B for the Matrix Multiply function from a quantized and transposed + * version of Input matrix B which is also in a CPU-independent format. + * + * The final prepared B is in CPU-dependent format and can be used as an input + * to matrix multiply function (`int8_multiply_and_add_bias`). + * + * This function is useful while using the quantized models that are stored in a + * CPU-independent format on the disk. + * + * @param[in] inputMatrixBQuantizedTransposed An array representing the + * quantized and transposed + * version of Input matrix B. + * It is in column-major format. + * Size of array = + * `rowsB`*`colsB` + * Shape of the matrix: + * (`colsB`,`rowsB`) + * @param[in] rowsB No. of rows of Input matrix B. + * Should be a positive integer + * and a multiple of 64. + * @param[in] colsB No. of columns of Input matrix + * B. Should be a positive + * integer and a multiple of 8 + * @param[out] outputMatrixB An array representing the + * prepared B matrix. + * Size: `rowsB` * `colsB`. + * + * This function implements the intrinsic: + * int8_prepare_b_from_quantized_transposed(inputMatrixBQuantizedTransposed: + * i32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements the + * function: int8_prepare_b_from_quantized_transposed(const int8_t* + * inputMatrixBQuantizedTransposed, uint32_t rowsB, uint32_t colsB, int8_t* + * outputMatrixB) + */ +int32_t IntrI8PrepareBFromQuantizedTransposed( + wasm::Instance* instance, uint32_t inputMatrixBQuantizedTransposed, + uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase); + +/* Prepare A for the Matrix Multiply function from Input matrix A. + * + * It performs quantization on floating values of input. + * The final prepared A might be architecture dependent. e.g. On some + * architectures like x86, it might be unsigned (achieved by adding 127 to + * quantized values) while on others like Arm, it might be signed. The final + * prepared A can be used as an input to matrix multiply function + * (`int8_multiply_and_add_bias`). + * + * Please note that this interface might have architecture specific + * implementation. + * + * @param[in] inputMatrixA An array representing the Input matrix A in + * row-major format. + * Size of the array = `rowsA` * `colsA`. + * Shape of the matrix: (`rowsA`, `colsA`) + * @param[in] scale The scaling factor (for quantization) + * @param[in] zeroPoint The zero point (for quantization) + * @param[in] rowsA No. of rows of Input matrix A. It should be a + * positive integer. + * @param[in] colsA No. of columns of Input matrix A. It should be a + * positive integer and a multiple of 64. + * @param[out] outputMatrixA An array representing the prepared A matrix. + * Size of the array = `rowsA` * `colsA`. + * + * This function implements the intrinsic: + * int8_prepare_a(inputMatrixA: i32, scale: f32, zeroPoint: f32, rowsA: i32, + * colsA: i32, outputMatrixA: i32) which implements the function: + * int8_prepare_a(const float* inputMatrixA, float scale, float zeroPoint, + * uint32_t rowsA, uint32_t colsA, int8_t* outputMatrixA) + */ +int32_t IntrI8PrepareA(wasm::Instance* instance, uint32_t inputMatrixA, + float scale, float zeroPoint, uint32_t rowsA, + uint32_t colsA, uint32_t outputMatrixA, + uint8_t* memBase); + +/* Prepares bias for the Matrix Multiply function. + * + * It uses the prepared B (which must be obtained by using any of the + * int8_prepare_b* functions) and a bias input to prepare the final bias. + * + * The final bias can be used as an input to matrix multiply function + * (`int8_multiply_and_add_bias`). + * + * @param[in] inputMatrixBPrepared An array representing the prepared B + * matrix. Size of array = `rowsB`*`colsB`. + * @param[in] scaleA The scaling factor (for quantization) of A + * @param[in] zeroPointA The zero point (for quantization) of A + * @param[in] scaleB The scaling factor (for quantization) of B + * @param[in] zeroPointB The zero point (for quantization) of B + * @param[in] rowsB No. of rows of Input matrix B (unquantized + * & non-transposed). It should be a positive + * integer and a multiple of 64. + * @param[in] colsB No. of columns of Input matrix B + * (unquantized & non-transposed). It should + * be a positive integer and a multiple of 8. + * @param[in] inputBias An array representing the input bias. Size + * of array = `colsB` + * @param[out] output An array representing the final prepared + * bias. Size of the array = `colsB` + * + * This function implements the intrinsic: + * int8_prepare_bias(inputMatrixBPrepared: i32, scaleA: f32, zeroPointA: f32, + * scaleB: f32, zeroPointB: f32, rowsB: i32, colsB: i32, inputBias: i32, output: + * i32) which implements the function: int8_prepare_bias(const int8_t* + * inputMatrixBPrepared, float scaleA, float zeroPointA, float scaleB, float + * zeroPointB, uint32_t rowsB, uint32_t colsB, const float* inputBias, float* + * output) + */ +int32_t IntrI8PrepareBias(wasm::Instance* instance, + uint32_t inputMatrixBPrepared, float scaleA, + float zeroPointA, float scaleB, float zeroPointB, + uint32_t rowsB, uint32_t colsB, uint32_t inputBias, + uint32_t output, uint8_t* memBase); + +/* Perform multiplication of 2 matrices followed by adding a bias. + * + * i.e Output = inputMatrixAPrepared * inputMatrixBPrepared + inputBiasPrepared + * + * The inputs inputMatrixAPrepared, inputMatrixBPrepared and inputBiasPrepared + * of this function must be obtained by using `int8_prepare_A`, one of the + * `int8_prepare_b*` and `int8_prepare_bias` functions respectively. + * + * Please note that this interface might have architecture specific + * implementation. + * + * @param[in] inputMatrixAPrepared An array representing the prepared A + * matrix. This must be obtained by using + * `int8_prepare_A` function. Size of the + * array = `rowsA` * `width`. + * @param[in] scaleA The scaling factor (quantization) of A + * @param[in] zeroPointA The zero point (for quantization) of A + * @param[in] inputMatrixBPrepared An array representing the prepared B + * matrix. This must be obtained by using + * one of `int8_prepare_b*` functions. + * Size of the array = `width` * `colsB`. + * @param[in] scaleB The scaling factor (quantization) of B + * @param[in] zeroPointB The zero point (for quantization) of B + * @param[in] inputBiasPrepared An array representing the prepared bias. + * This must be obtained by using + * `int8_prepare_bias` function. + * Size of the array = `colsB` + * @param[in] unquantMultiplier A value that will be multiplied to the + * final unquantization factor that is + * prepared from `scaleA` and `scaleB`. + * @param[in] rowsA No. of rows of Input matrix A. It should + * be a positive integer. + * @param[in] width No. of columns of Input matrix A (same as + * no. of columns of Input matrix B). It + * should be a positive integer and a + * multiple of 64. + * @param[in] colsB No. of columns of Input matrix B. Should + * be a multiple of 8. + * @param[out] output An array representing the result matrix + * in row-major format. + * Size of the array = `rowsA` * `colsB`. + * + * This function implements the intrinsic: + * int8_multiply_and_add_bias(inputMatrixAPrepared: i32, scaleA: f32, + * zeroPointA: f32, inputMatrixBPrepared: i32, scaleB: f32, zeroPointB: f32, + * inputBiasPrepared: i32, unquantMultiplier: f32, + * rowsA: i32, width: i32, colsB: i32, output: i32) + * which implements the function: + * int8_multiply_and_add_bias(const int8_t* inputMatrixAPrepared, float + * scaleA, float zeroPointA, const int8_t* inputMatrixBPrepared, float scaleB, + * float zeroPointB, const float* inputBiasPrepared, float unquantMultiplier, + * uint32_t rowsA, uint32_t width, uint32_t colsB, float* + * output) + */ +int32_t IntrI8MultiplyAndAddBias(wasm::Instance* instance, + uint32_t inputMatrixAPrepared, float scaleA, + float zeroPointA, + uint32_t inputMatrixBPrepared, float scaleB, + float zeroPointB, uint32_t inputBiasPrepared, + float unquantMultiplier, uint32_t rowsA, + uint32_t width, uint32_t colsB, + uint32_t output, uint8_t* memBase); + +/* Select a subset of columns of prepared B. + * + * Indices of the columns to be selected are specified by an array. + * + * @param[in] inputMatrixBPrepared An array representing the prepared B + * matrix. This must be obtained by using + * one of the `int8_prepare_b*` functions. + * Size of the array = `rowsB` * `colsB`. + * @param[in] rowsB No. of rows of Input matrix B. It should + * be a positive integer and a multiple + * of 64. + * @param[in] colsB No. of columns of Input matrix B. It + * should be a positive integer and a + * multiple of 8. + * @param[in] colIndexList An array of column indices to be selected + * from prepared B. All indices of the array + * should be valid + * i.e. 0 <= colIndexList[N] < colsB + * where N = 0, 1 ....(`sizeColIndexList`-1) + * @param[in] sizeColIndexList Size of the `colIndexList` array. It + * should be a positive integer and a + * multiple of 8. + * @param[out] output An array representing the selected columns + * of prepared B. + * Size = `rowsB` * `sizeColIndexList`. + * + * This function implements the intrinsic: + * int8_select_columns_of_b(inputMatrixBPrepared: i32, rowsB: i32, colsB: i32, + * colIndexList: i32, sizeColIndexList: i32, output: i32) which implements the + * function: int8_select_columns_of_b(const int8_t* inputMatrixBPrepared, + * uint32_t rowsB, uint32_t colsB, const uint32_t* colIndexList, const uint32_t + * sizeColIndexList, int8_t* output) + */ +int32_t IntrI8SelectColumnsOfB(wasm::Instance* instance, + uint32_t inputMatrixBPrepared, uint32_t rowsB, + uint32_t colsB, uint32_t colIndexList, + uint32_t sizeColIndexList, uint32_t output, + uint8_t* memBase); + +} // namespace intgemm +} // namespace js + +#endif // intgemm_IntegerGemmIntrinsic_h diff --git a/js/src/intgemm/README_MOZILLA b/js/src/intgemm/README_MOZILLA new file mode 100644 index 0000000000..8d4c2a2093 --- /dev/null +++ b/js/src/intgemm/README_MOZILLA @@ -0,0 +1,18 @@ +This directory contains build files for the intgemm reference implementation. +The actual library source is in $TOPSRCDIR/third_party/intgemm/ + +Any patches or additional configuration to be applied to the +upstream source should be kept in $TOPSRCDIR/third_party/intgemm/. + +To update the library source and build config files, execute + + ./mach vendor js/src/intgemm/moz.yaml + +To update to a specific upstream git tag or commit, use + + ./mach vendor js/src/intgemm/moz.yaml -r + +The upstream git repository is https://github.com/kpu/intgemm + +To view the information about the current version, check the +'origin' section of moz.yaml. \ No newline at end of file diff --git a/js/src/intgemm/enable_intel_extensions.py b/js/src/intgemm/enable_intel_extensions.py new file mode 100644 index 0000000000..6259a1a1f7 --- /dev/null +++ b/js/src/intgemm/enable_intel_extensions.py @@ -0,0 +1,24 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +def main(output, intgemm_config): + with open(intgemm_config, "r") as f: + config = f.read() + + # Enable intel AVX2 hardware extension specific code to allow using AVX2 at run time + # if target cpu supports it + config = config.replace( + "#cmakedefine INTGEMM_COMPILER_SUPPORTS_AVX2", + "#define INTGEMM_COMPILER_SUPPORTS_AVX2", + ) + + # Disable more advanced intel hardware extensions for now because base-toolchain compiler + # versions aren't able to compile them + config = config.replace("#cmakedefine", "#undef") + + output.write(config) + output.close() + + return 0 diff --git a/js/src/intgemm/moz.build b/js/src/intgemm/moz.build new file mode 100644 index 0000000000..378f8c5e72 --- /dev/null +++ b/js/src/intgemm/moz.build @@ -0,0 +1,33 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +include("../js-config.mozbuild") +include("../js-cxxflags.mozbuild") + +FINAL_LIBRARY = "js" + +with Files("*"): + BUG_COMPONENT = ("Core", "JavaScript: WebAssembly") + +LOCAL_INCLUDES += [ + "!..", + "..", + "/third_party/intgemm/intgemm", +] + +SOURCES += [ + "/third_party/intgemm/intgemm/intgemm.cc", + "IntegerGemmIntrinsic.cpp", +] + +GeneratedFile( + "intgemm/intgemm_config.h", + script="enable_intel_extensions.py", + inputs=["/third_party/intgemm/intgemm/intgemm_config.h.in"], +) + +# We allow warnings for third-party code that can be updated from upstream. +AllowCompilerWarnings() diff --git a/js/src/intgemm/moz.yaml b/js/src/intgemm/moz.yaml new file mode 100644 index 0000000000..714090a573 --- /dev/null +++ b/js/src/intgemm/moz.yaml @@ -0,0 +1,47 @@ +# Version of this schema +schema: 1 + +bugzilla: + # Bugzilla product and component for this directory and subdirectories + product: Core + component: "JavaScript: WebAssembly" + +# Document the source of externally hosted code +origin: + + # Short name of the package/library + name: intgemm + + description: integer matrix multiplication + + # Full URL for the package's homepage/etc + # Usually different from repository url + url: https://github.com/kpu/intgemm + + # Human-readable identifier for this version/release + # Generally "version NNN", "tag SSS", "bookmark SSS" + release: commit fc3a614351ce6e667197307d97f45db5265c96af (2022-02-09T14:56:05Z). + + # Revision to pull in + # Must be a long or short commit SHA (long preferred) + revision: fc3a614351ce6e667197307d97f45db5265c96af + + # The package's license, where possible using the mnemonic from + # https://spdx.org/licenses/ + # Multiple licenses can be specified (as a YAML list) + # A "LICENSE" file must exist containing the full license text + license: MIT + +vendoring: + url: https://github.com/kpu/intgemm + source-hosting: github + vendor-directory: third_party/intgemm + + exclude: + - build/.gitattributes + - build/.gitignore + + update-actions: + - action: delete-path + path: '{yaml_dir}/config' + -- cgit v1.2.3