/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- * vim: set ts=8 sts=2 et sw=2 tw=80: * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ #ifndef intgemm_IntegerGemmIntrinsic_h #define intgemm_IntegerGemmIntrinsic_h #include namespace js { namespace wasm { class Instance; } namespace intgemm { /* Interface for integer matrix multiplication followed by addition of bias. * * C = A * B + Bias * * Input matrix A: * - A 2-D matrix that typically represents activations as floating point * values * - no. of rows should be a positive integer * - no. of columns should be a positive integeral multiple of 64 * - is represented as array (contiguous memory locations) in row-major format * * Input matrix B: * - A 2-D matrix that typically represents fixed model parameters as * floating point values * - no. of rows should be: * -- equal to no. of columns of Input matrix A * -- a positive integeral multiple of 64 * - no. of columns should be a positive integeral multiple of 8 * - is represented as array (contiguous memory locations) in row-major format * * Please note that it is also possible to pass Input matrix B in 2 more forms: * - One that is already a quantized and transposed version of Input matrix B * - Other that is already a transposed version of Input matrix B * * Input Bias: * - is an array (contiguous memory locations) that represents bias * - size of the array should be equal to the no. of columns of Input matrix B * * Output matrix C: * - is a 2-D matrix that represents the result (= A * B + Bias) * - no. of rows = no. of rows of Input matrix A * - no. of columns = no. of columns of Input matrix B (in * untransposed form) * - is represented as array (contiguous memory locations) in row-major format * * Please note that most of the functions in this interface might have * architecture specific implementations. * * Conventions followed for the interface: * - Unless explicitly mentioned, Input matrix B refers to an unquantized * (i.e. float values) and non-transposed version * - no. of rows of Input matrix A = `rowsA` * - no. of columns of Input matrix A (`colsA`) = no. of rows of Input matrix B * (`rowsB`) = `width` * - no. of columns of Input matrix B = `colsB` */ /* Prepare B for the Matrix Multiply function from Input matrix B. * * Quantization is performed on the input. * The final prepared B is in CPU-dependent format and can be used as an input * to matrix multiply function (`int8_multiply_and_add_bias`). * * Please note that this interface might have architecture specific * implementation. * * @param[in] inputMatrixB An array representing the Input matrix B in * row-major format. * Size of the array = `rowsB` * `colsB`. * Shape of the matrix: (`rowsB`, `colsB`) * @param[in] scale The scaling factor (for quantization) * @param[in] zeroPoint The zero point (for quantization) * @param[in] rowsB No. of rows of Input matrix B. It should be * a positive integer and a multiple of 64. * @param[in] colsB No. of columns of Input matrix B. It should * be a positive integer and a multiple of 8. * @param[out] outputMatrixB An array representing the prepared B matrix. * Size of the array = `rowsB` * `colsB`. * * This function implements the intrinsic: * int8_prepare_b(inputMatrixB: i32, scale: f32, zeroPoint: f32, rowsB: i32, * colsB: i32, outputMatrixB: i32) which implements the function: * int8_prepare_b(const float* inputMatrixB, float scale, float zeroPoint, * uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB) */ int32_t IntrI8PrepareB(wasm::Instance* instance, uint32_t inputMatrixB, float scale, float zeroPoint, uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase); /* Prepare B for the Matrix Multiply function from transposed version of Input * matrix B. * * Quantization is performed on floating values of input. * The final prepared B is in CPU-dependent format and can be used as an input * to matrix multiply function (`int8_multiply_and_add_bias`). * * Please note that this interface might have architecture specific * implementation. * * @param[in] inputMatrixBTransposed An array representing transposed version * of Input matrix B. * It is in column-major format. * Size of the array = `rowsB` * `colsB`. * Shape of the matrix: (`colsB`, `rowsB`) * @param[in] scale The scaling factor (for quantization) * @param[in] zeroPoint The zero point (for quantization) * @param[in] rowsB No. of rows of Input matrix B. It should * be a positive integer and a multiple of * 64. * @param[in] colsB No. of columns of Input matrix B. It * should be a positive integer and a * multiple of 8. * @param[out] outputMatrixB An array representing the prepared B * matrix. Size of array = `rowsB`*`colsB` * * This function implements the intrinsic: * int8_prepare_b_from_transposed(inputMatrixBTransposed: i32, scale: f32, * zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements * the function: int8_prepare_b_from_transposed(const float* * inputMatrixBTransposed, float scale, float zeroPoint, uint32_t rowsB, * uint32_t colsB, int8_t* outputMatrixB) */ int32_t IntrI8PrepareBFromTransposed(wasm::Instance* instance, uint32_t inputMatrixBTransposed, float scale, float zeroPoint, uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase); /* Prepare B for the Matrix Multiply function from a quantized and transposed * version of Input matrix B which is also in a CPU-independent format. * * The final prepared B is in CPU-dependent format and can be used as an input * to matrix multiply function (`int8_multiply_and_add_bias`). * * This function is useful while using the quantized models that are stored in a * CPU-independent format on the disk. * * @param[in] inputMatrixBQuantizedTransposed An array representing the * quantized and transposed * version of Input matrix B. * It is in column-major format. * Size of array = * `rowsB`*`colsB` * Shape of the matrix: * (`colsB`,`rowsB`) * @param[in] rowsB No. of rows of Input matrix B. * Should be a positive integer * and a multiple of 64. * @param[in] colsB No. of columns of Input matrix * B. Should be a positive * integer and a multiple of 8 * @param[out] outputMatrixB An array representing the * prepared B matrix. * Size: `rowsB` * `colsB`. * * This function implements the intrinsic: * int8_prepare_b_from_quantized_transposed(inputMatrixBQuantizedTransposed: * i32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements the * function: int8_prepare_b_from_quantized_transposed(const int8_t* * inputMatrixBQuantizedTransposed, uint32_t rowsB, uint32_t colsB, int8_t* * outputMatrixB) */ int32_t IntrI8PrepareBFromQuantizedTransposed( wasm::Instance* instance, uint32_t inputMatrixBQuantizedTransposed, uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase); /* Prepare A for the Matrix Multiply function from Input matrix A. * * It performs quantization on floating values of input. * The final prepared A might be architecture dependent. e.g. On some * architectures like x86, it might be unsigned (achieved by adding 127 to * quantized values) while on others like Arm, it might be signed. The final * prepared A can be used as an input to matrix multiply function * (`int8_multiply_and_add_bias`). * * Please note that this interface might have architecture specific * implementation. * * @param[in] inputMatrixA An array representing the Input matrix A in * row-major format. * Size of the array = `rowsA` * `colsA`. * Shape of the matrix: (`rowsA`, `colsA`) * @param[in] scale The scaling factor (for quantization) * @param[in] zeroPoint The zero point (for quantization) * @param[in] rowsA No. of rows of Input matrix A. It should be a * positive integer. * @param[in] colsA No. of columns of Input matrix A. It should be a * positive integer and a multiple of 64. * @param[out] outputMatrixA An array representing the prepared A matrix. * Size of the array = `rowsA` * `colsA`. * * This function implements the intrinsic: * int8_prepare_a(inputMatrixA: i32, scale: f32, zeroPoint: f32, rowsA: i32, * colsA: i32, outputMatrixA: i32) which implements the function: * int8_prepare_a(const float* inputMatrixA, float scale, float zeroPoint, * uint32_t rowsA, uint32_t colsA, int8_t* outputMatrixA) */ int32_t IntrI8PrepareA(wasm::Instance* instance, uint32_t inputMatrixA, float scale, float zeroPoint, uint32_t rowsA, uint32_t colsA, uint32_t outputMatrixA, uint8_t* memBase); /* Prepares bias for the Matrix Multiply function. * * It uses the prepared B (which must be obtained by using any of the * int8_prepare_b* functions) and a bias input to prepare the final bias. * * The final bias can be used as an input to matrix multiply function * (`int8_multiply_and_add_bias`). * * @param[in] inputMatrixBPrepared An array representing the prepared B * matrix. Size of array = `rowsB`*`colsB`. * @param[in] scaleA The scaling factor (for quantization) of A * @param[in] zeroPointA The zero point (for quantization) of A * @param[in] scaleB The scaling factor (for quantization) of B * @param[in] zeroPointB The zero point (for quantization) of B * @param[in] rowsB No. of rows of Input matrix B (unquantized * & non-transposed). It should be a positive * integer and a multiple of 64. * @param[in] colsB No. of columns of Input matrix B * (unquantized & non-transposed). It should * be a positive integer and a multiple of 8. * @param[in] inputBias An array representing the input bias. Size * of array = `colsB` * @param[out] output An array representing the final prepared * bias. Size of the array = `colsB` * * This function implements the intrinsic: * int8_prepare_bias(inputMatrixBPrepared: i32, scaleA: f32, zeroPointA: f32, * scaleB: f32, zeroPointB: f32, rowsB: i32, colsB: i32, inputBias: i32, output: * i32) which implements the function: int8_prepare_bias(const int8_t* * inputMatrixBPrepared, float scaleA, float zeroPointA, float scaleB, float * zeroPointB, uint32_t rowsB, uint32_t colsB, const float* inputBias, float* * output) */ int32_t IntrI8PrepareBias(wasm::Instance* instance, uint32_t inputMatrixBPrepared, float scaleA, float zeroPointA, float scaleB, float zeroPointB, uint32_t rowsB, uint32_t colsB, uint32_t inputBias, uint32_t output, uint8_t* memBase); /* Perform multiplication of 2 matrices followed by adding a bias. * * i.e Output = inputMatrixAPrepared * inputMatrixBPrepared + inputBiasPrepared * * The inputs inputMatrixAPrepared, inputMatrixBPrepared and inputBiasPrepared * of this function must be obtained by using `int8_prepare_A`, one of the * `int8_prepare_b*` and `int8_prepare_bias` functions respectively. * * Please note that this interface might have architecture specific * implementation. * * @param[in] inputMatrixAPrepared An array representing the prepared A * matrix. This must be obtained by using * `int8_prepare_A` function. Size of the * array = `rowsA` * `width`. * @param[in] scaleA The scaling factor (quantization) of A * @param[in] zeroPointA The zero point (for quantization) of A * @param[in] inputMatrixBPrepared An array representing the prepared B * matrix. This must be obtained by using * one of `int8_prepare_b*` functions. * Size of the array = `width` * `colsB`. * @param[in] scaleB The scaling factor (quantization) of B * @param[in] zeroPointB The zero point (for quantization) of B * @param[in] inputBiasPrepared An array representing the prepared bias. * This must be obtained by using * `int8_prepare_bias` function. * Size of the array = `colsB` * @param[in] unquantMultiplier A value that will be multiplied to the * final unquantization factor that is * prepared from `scaleA` and `scaleB`. * @param[in] rowsA No. of rows of Input matrix A. It should * be a positive integer. * @param[in] width No. of columns of Input matrix A (same as * no. of columns of Input matrix B). It * should be a positive integer and a * multiple of 64. * @param[in] colsB No. of columns of Input matrix B. Should * be a multiple of 8. * @param[out] output An array representing the result matrix * in row-major format. * Size of the array = `rowsA` * `colsB`. * * This function implements the intrinsic: * int8_multiply_and_add_bias(inputMatrixAPrepared: i32, scaleA: f32, * zeroPointA: f32, inputMatrixBPrepared: i32, scaleB: f32, zeroPointB: f32, * inputBiasPrepared: i32, unquantMultiplier: f32, * rowsA: i32, width: i32, colsB: i32, output: i32) * which implements the function: * int8_multiply_and_add_bias(const int8_t* inputMatrixAPrepared, float * scaleA, float zeroPointA, const int8_t* inputMatrixBPrepared, float scaleB, * float zeroPointB, const float* inputBiasPrepared, float unquantMultiplier, * uint32_t rowsA, uint32_t width, uint32_t colsB, float* * output) */ int32_t IntrI8MultiplyAndAddBias(wasm::Instance* instance, uint32_t inputMatrixAPrepared, float scaleA, float zeroPointA, uint32_t inputMatrixBPrepared, float scaleB, float zeroPointB, uint32_t inputBiasPrepared, float unquantMultiplier, uint32_t rowsA, uint32_t width, uint32_t colsB, uint32_t output, uint8_t* memBase); /* Select a subset of columns of prepared B. * * Indices of the columns to be selected are specified by an array. * * @param[in] inputMatrixBPrepared An array representing the prepared B * matrix. This must be obtained by using * one of the `int8_prepare_b*` functions. * Size of the array = `rowsB` * `colsB`. * @param[in] rowsB No. of rows of Input matrix B. It should * be a positive integer and a multiple * of 64. * @param[in] colsB No. of columns of Input matrix B. It * should be a positive integer and a * multiple of 8. * @param[in] colIndexList An array of column indices to be selected * from prepared B. All indices of the array * should be valid * i.e. 0 <= colIndexList[N] < colsB * where N = 0, 1 ....(`sizeColIndexList`-1) * @param[in] sizeColIndexList Size of the `colIndexList` array. It * should be a positive integer and a * multiple of 8. * @param[out] output An array representing the selected columns * of prepared B. * Size = `rowsB` * `sizeColIndexList`. * * This function implements the intrinsic: * int8_select_columns_of_b(inputMatrixBPrepared: i32, rowsB: i32, colsB: i32, * colIndexList: i32, sizeColIndexList: i32, output: i32) which implements the * function: int8_select_columns_of_b(const int8_t* inputMatrixBPrepared, * uint32_t rowsB, uint32_t colsB, const uint32_t* colIndexList, const uint32_t * sizeColIndexList, int8_t* output) */ int32_t IntrI8SelectColumnsOfB(wasm::Instance* instance, uint32_t inputMatrixBPrepared, uint32_t rowsB, uint32_t colsB, uint32_t colIndexList, uint32_t sizeColIndexList, uint32_t output, uint8_t* memBase); } // namespace intgemm } // namespace js #endif // intgemm_IntegerGemmIntrinsic_h