js/src/intgemm/IntegerGemmIntrinsic.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 * vim: set ts=8 sts=2 et sw=2 tw=80:
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

#ifndef intgemm_IntegerGemmIntrinsic_h
#define intgemm_IntegerGemmIntrinsic_h

#include <stdint.h>

namespace js {
namespace wasm {
class Instance;
}

namespace intgemm {

/* Interface for integer matrix multiplication followed by addition of bias.
 *
 * C = A * B + Bias
 *
 * Input matrix A:
 *  - A 2-D matrix that typically represents activations as floating point
 * values
 *  - no. of rows should be a positive integer
 *  - no. of columns should be a positive integeral multiple of 64
 *  - is represented as array (contiguous memory locations) in row-major format
 *
 * Input matrix B:
 *  - A 2-D matrix that typically represents fixed model parameters as
 * floating point values
 *  - no. of rows should be:
 *    -- equal to no. of columns of Input matrix A
 *    -- a positive integeral multiple of 64
 *  - no. of columns should be a positive integeral multiple of 8
 *  - is represented as array (contiguous memory locations) in row-major format
 *
 *  Please note that it is also possible to pass Input matrix B in 2 more forms:
 *   - One that is already a quantized and transposed version of Input matrix B
 *   - Other that is already a transposed version of Input matrix B
 *
 * Input Bias:
 *  - is an array (contiguous memory locations) that represents bias
 *  - size of the array should be equal to the no. of columns of Input matrix B
 *
 * Output matrix C:
 *  - is a 2-D matrix that represents the result (= A * B + Bias)
 *  - no. of rows = no. of rows of Input matrix A
 *  - no. of columns = no. of columns of Input matrix B (in
 * untransposed form)
 *  - is represented as array (contiguous memory locations) in row-major format
 *
 * Please note that most of the functions in this interface might have
 * architecture specific implementations.
 *
 * Conventions followed for the interface:
 *  - Unless explicitly mentioned, Input matrix B refers to an unquantized
 * (i.e. float values) and non-transposed version
 *  - no. of rows of Input matrix A = `rowsA`
 *  - no. of columns of Input matrix A (`colsA`) = no. of rows of Input matrix B
 * (`rowsB`) = `width`
 *  - no. of columns of Input matrix B = `colsB`
 */

/* Prepare B for the Matrix Multiply function from Input matrix B.
 *
 * Quantization is performed on the input.
 * The final prepared B is in CPU-dependent format and can be used as an input
 * to matrix multiply function (`int8_multiply_and_add_bias`).
 *
 * Please note that this interface might have architecture specific
 * implementation.
 *
 * @param[in]   inputMatrixB        An array representing the Input matrix B in
 *                                  row-major format.
 *                                  Size of the array = `rowsB` * `colsB`.
 *                                  Shape of the matrix: (`rowsB`, `colsB`)
 * @param[in]   scale               The scaling factor (for quantization)
 * @param[in]   zeroPoint           The zero point (for quantization)
 * @param[in]   rowsB               No. of rows of Input matrix B. It should be
 *                                  a positive integer and a multiple of 64.
 * @param[in]   colsB               No. of columns of Input matrix B. It should
 *                                  be a positive integer and a multiple of 8.
 * @param[out]  outputMatrixB       An array representing the prepared B matrix.
 *                                  Size of the array = `rowsB` * `colsB`.
 *
 * This function implements the intrinsic:
 *   int8_prepare_b(inputMatrixB: i32, scale: f32, zeroPoint: f32, rowsB: i32,
 * colsB: i32, outputMatrixB: i32) which implements the function:
 *   int8_prepare_b(const float* inputMatrixB, float scale, float zeroPoint,
 * uint32_t rowsB, uint32_t colsB, int8_t* outputMatrixB)
 */
int32_t IntrI8PrepareB(wasm::Instance* instance, uint32_t inputMatrixB,
                       float scale, float zeroPoint, uint32_t rowsB,
                       uint32_t colsB, uint32_t outputMatrixB,
                       uint8_t* memBase);

/* Prepare B for the Matrix Multiply function from transposed version of Input
 * matrix B.
 *
 * Quantization is performed on floating values of input.
 * The final prepared B is in CPU-dependent format and can be used as an input
 * to matrix multiply function (`int8_multiply_and_add_bias`).
 *
 * Please note that this interface might have architecture specific
 * implementation.
 *
 * @param[in]   inputMatrixBTransposed An array representing transposed version
 *                                     of Input matrix B.
 *                                     It is in column-major format.
 *                                     Size of the array = `rowsB` * `colsB`.
 *                                     Shape of the matrix: (`colsB`, `rowsB`)
 * @param[in]   scale                  The scaling factor (for quantization)
 * @param[in]   zeroPoint              The zero point (for quantization)
 * @param[in]   rowsB                  No. of rows of Input matrix B. It should
 *                                     be a positive integer and a multiple of
 *                                     64.
 * @param[in]   colsB                  No. of columns of Input matrix B. It
 *                                     should be a positive integer and a
 *                                     multiple of 8.
 * @param[out]  outputMatrixB          An array representing the prepared B
 *                                     matrix. Size of array = `rowsB`*`colsB`
 *
 * This function implements the intrinsic:
 *   int8_prepare_b_from_transposed(inputMatrixBTransposed: i32, scale: f32,
 * zeroPoint: f32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements
 * the function: int8_prepare_b_from_transposed(const float*
 * inputMatrixBTransposed, float scale, float zeroPoint, uint32_t rowsB,
 * uint32_t colsB, int8_t* outputMatrixB)
 */
int32_t IntrI8PrepareBFromTransposed(wasm::Instance* instance,
                                     uint32_t inputMatrixBTransposed,
                                     float scale, float zeroPoint,
                                     uint32_t rowsB, uint32_t colsB,
                                     uint32_t outputMatrixB, uint8_t* memBase);

/* Prepare B for the Matrix Multiply function from a quantized and transposed
 * version of Input matrix B which is also in a CPU-independent format.
 *
 * The final prepared B is in CPU-dependent format and can be used as an input
 * to matrix multiply function (`int8_multiply_and_add_bias`).
 *
 * This function is useful while using the quantized models that are stored in a
 * CPU-independent format on the disk.
 *
 * @param[in]   inputMatrixBQuantizedTransposed  An array representing the
 *                                               quantized and transposed
 *                                               version of Input matrix B.
 *                                               It is in column-major format.
 *                                               Size of array =
 *                                                 `rowsB`*`colsB`
 *                                               Shape of the matrix:
 *                                                 (`colsB`,`rowsB`)
 * @param[in]   rowsB                            No. of rows of Input matrix B.
 *                                               Should be a positive integer
 *                                               and a multiple of 64.
 * @param[in]   colsB                            No. of columns of Input matrix
 *                                               B. Should be a positive
 *                                               integer and a multiple of 8
 * @param[out]  outputMatrixB                    An array representing the
 *                                               prepared B matrix.
 *                                               Size: `rowsB` * `colsB`.
 *
 * This function implements the intrinsic:
 *   int8_prepare_b_from_quantized_transposed(inputMatrixBQuantizedTransposed:
 * i32, rowsB: i32, colsB: i32, outputMatrixB: i32) which implements the
 * function: int8_prepare_b_from_quantized_transposed(const int8_t*
 * inputMatrixBQuantizedTransposed, uint32_t rowsB, uint32_t colsB, int8_t*
 * outputMatrixB)
 */
int32_t IntrI8PrepareBFromQuantizedTransposed(
    wasm::Instance* instance, uint32_t inputMatrixBQuantizedTransposed,
    uint32_t rowsB, uint32_t colsB, uint32_t outputMatrixB, uint8_t* memBase);

/* Prepare A for the Matrix Multiply function from Input matrix A.
 *
 * It performs quantization on floating values of input.
 * The final prepared A might be architecture dependent. e.g. On some
 * architectures like x86, it might be unsigned (achieved by adding 127 to
 * quantized values) while on others like Arm, it might be signed. The final
 * prepared A can be used as an input to matrix multiply function
 * (`int8_multiply_and_add_bias`).
 *
 * Please note that this interface might have architecture specific
 * implementation.
 *
 * @param[in]   inputMatrixA   An array representing the Input matrix A in
 *                             row-major format.
 *                             Size of the array = `rowsA` * `colsA`.
 *                             Shape of the matrix: (`rowsA`, `colsA`)
 * @param[in]   scale          The scaling factor (for quantization)
 * @param[in]   zeroPoint      The zero point (for quantization)
 * @param[in]   rowsA          No. of rows of Input matrix A. It should be a
 *                             positive integer.
 * @param[in]   colsA          No. of columns of Input matrix A. It should be a
 *                             positive integer and a multiple of 64.
 * @param[out]  outputMatrixA  An array representing the prepared A matrix.
 *                             Size of the array = `rowsA` * `colsA`.
 *
 * This function implements the intrinsic:
 *   int8_prepare_a(inputMatrixA: i32, scale: f32, zeroPoint: f32, rowsA: i32,
 * colsA: i32, outputMatrixA: i32) which implements the function:
 *   int8_prepare_a(const float* inputMatrixA, float scale, float zeroPoint,
 * uint32_t rowsA, uint32_t colsA, int8_t* outputMatrixA)
 */
int32_t IntrI8PrepareA(wasm::Instance* instance, uint32_t inputMatrixA,
                       float scale, float zeroPoint, uint32_t rowsA,
                       uint32_t colsA, uint32_t outputMatrixA,
                       uint8_t* memBase);

/* Prepares bias for the Matrix Multiply function.
 *
 * It uses the prepared B (which must be obtained by using any of the
 * int8_prepare_b* functions) and a bias input to prepare the final bias.
 *
 * The final bias can be used as an input to matrix multiply function
 * (`int8_multiply_and_add_bias`).
 *
 * @param[in]   inputMatrixBPrepared An array representing the prepared B
 *                                   matrix. Size of array = `rowsB`*`colsB`.
 * @param[in]   scaleA               The scaling factor (for quantization) of A
 * @param[in]   zeroPointA           The zero point (for quantization) of A
 * @param[in]   scaleB               The scaling factor (for quantization) of B
 * @param[in]   zeroPointB           The zero point (for quantization) of B
 * @param[in]   rowsB                No. of rows of Input matrix B (unquantized
 *                                   & non-transposed). It should be a positive
 *                                   integer and a multiple of 64.
 * @param[in]   colsB                No. of columns of Input matrix B
 *                                   (unquantized & non-transposed). It should
 *                                   be a positive integer and a multiple of 8.
 * @param[in]   inputBias            An array representing the input bias. Size
 *                                   of array = `colsB`
 * @param[out]  output               An array representing the final prepared
 *                                   bias. Size of the array = `colsB`
 *
 * This function implements the intrinsic:
 *   int8_prepare_bias(inputMatrixBPrepared: i32, scaleA: f32, zeroPointA: f32,
 * scaleB: f32, zeroPointB: f32, rowsB: i32, colsB: i32, inputBias: i32, output:
 * i32) which implements the function: int8_prepare_bias(const int8_t*
 * inputMatrixBPrepared, float scaleA, float zeroPointA, float scaleB, float
 * zeroPointB, uint32_t rowsB, uint32_t colsB, const float* inputBias, float*
 * output)
 */
int32_t IntrI8PrepareBias(wasm::Instance* instance,
                          uint32_t inputMatrixBPrepared, float scaleA,
                          float zeroPointA, float scaleB, float zeroPointB,
                          uint32_t rowsB, uint32_t colsB, uint32_t inputBias,
                          uint32_t output, uint8_t* memBase);

/* Perform multiplication of 2 matrices followed by adding a bias.
 *
 * i.e Output = inputMatrixAPrepared * inputMatrixBPrepared + inputBiasPrepared
 *
 * The inputs inputMatrixAPrepared, inputMatrixBPrepared and inputBiasPrepared
 * of this function must be obtained by using `int8_prepare_A`, one of the
 * `int8_prepare_b*` and `int8_prepare_bias` functions respectively.
 *
 * Please note that this interface might have architecture specific
 * implementation.
 *
 * @param[in]   inputMatrixAPrepared   An array representing the prepared A
 *                                     matrix. This must be obtained by using
 *                                     `int8_prepare_A` function. Size of the
 *                                     array = `rowsA` * `width`.
 * @param[in]   scaleA                 The scaling factor (quantization) of A
 * @param[in]   zeroPointA             The zero point (for quantization) of A
 * @param[in]   inputMatrixBPrepared   An array representing the prepared B
 *                                     matrix. This must be obtained by using
 *                                     one of `int8_prepare_b*` functions.
 *                                     Size of the array = `width` * `colsB`.
 * @param[in]   scaleB                 The scaling factor (quantization) of B
 * @param[in]   zeroPointB             The zero point (for quantization) of B
 * @param[in]   inputBiasPrepared      An array representing the prepared bias.
 *                                     This must be obtained by using
 *                                     `int8_prepare_bias` function.
 *                                     Size of the array = `colsB`
 * @param[in]   unquantMultiplier      A value that will be multiplied to the
 *                                     final unquantization factor that is
 *                                     prepared from `scaleA` and `scaleB`.
 * @param[in]   rowsA                  No. of rows of Input matrix A. It should
 *                                     be a positive integer.
 * @param[in]   width                  No. of columns of Input matrix A (same as
 *                                     no. of columns of Input matrix B). It
 *                                     should be a positive integer and a
 *                                     multiple of 64.
 * @param[in]   colsB                  No. of columns of Input matrix B. Should
 *                                     be a multiple of 8.
 * @param[out]  output                 An array representing the result matrix
 *                                     in row-major format.
 *                                     Size of the array = `rowsA` * `colsB`.
 *
 * This function implements the intrinsic:
 *   int8_multiply_and_add_bias(inputMatrixAPrepared: i32, scaleA: f32,
 * zeroPointA: f32, inputMatrixBPrepared: i32, scaleB: f32, zeroPointB: f32,
 *                     inputBiasPrepared: i32, unquantMultiplier: f32,
 *                     rowsA: i32, width: i32, colsB: i32, output: i32)
 * which implements the function:
 *   int8_multiply_and_add_bias(const int8_t* inputMatrixAPrepared, float
 * scaleA, float zeroPointA, const int8_t* inputMatrixBPrepared, float scaleB,
 * float zeroPointB, const float* inputBiasPrepared, float unquantMultiplier,
 *                     uint32_t rowsA, uint32_t width, uint32_t colsB, float*
 * output)
 */
int32_t IntrI8MultiplyAndAddBias(wasm::Instance* instance,
                                 uint32_t inputMatrixAPrepared, float scaleA,
                                 float zeroPointA,
                                 uint32_t inputMatrixBPrepared, float scaleB,
                                 float zeroPointB, uint32_t inputBiasPrepared,
                                 float unquantMultiplier, uint32_t rowsA,
                                 uint32_t width, uint32_t colsB,
                                 uint32_t output, uint8_t* memBase);

/* Select a subset of columns of prepared B.
 *
 * Indices of the columns to be selected are specified by an array.
 *
 * @param[in]   inputMatrixBPrepared  An array representing the prepared B
 *                                    matrix. This must be obtained by using
 *                                    one of the `int8_prepare_b*` functions.
 *                                    Size of the array = `rowsB` * `colsB`.
 * @param[in]   rowsB                 No. of rows of Input matrix B. It should
 *                                    be a positive integer and a multiple
 *                                    of 64.
 * @param[in]   colsB                 No. of columns of Input matrix B. It
 *                                    should be a positive integer and a
 *                                    multiple of 8.
 * @param[in]   colIndexList          An array of column indices to be selected
 *                                    from prepared B. All indices of the array
 *                                    should be valid
 *                                    i.e. 0 <= colIndexList[N] < colsB
 *                                    where N = 0, 1 ....(`sizeColIndexList`-1)
 * @param[in]   sizeColIndexList      Size of the `colIndexList` array. It
 *                                    should be a positive integer and a
 *                                    multiple of 8.
 * @param[out]  output                An array representing the selected columns
 *                                    of prepared B.
 *                                    Size = `rowsB` * `sizeColIndexList`.
 *
 * This function implements the intrinsic:
 *   int8_select_columns_of_b(inputMatrixBPrepared: i32, rowsB: i32, colsB: i32,
 * colIndexList: i32, sizeColIndexList: i32, output: i32) which implements the
 * function: int8_select_columns_of_b(const int8_t* inputMatrixBPrepared,
 * uint32_t rowsB, uint32_t colsB, const uint32_t* colIndexList, const uint32_t
 * sizeColIndexList, int8_t* output)
 */
int32_t IntrI8SelectColumnsOfB(wasm::Instance* instance,
                               uint32_t inputMatrixBPrepared, uint32_t rowsB,
                               uint32_t colsB, uint32_t colIndexList,
                               uint32_t sizeColIndexList, uint32_t output,
                               uint8_t* memBase);

}  // namespace intgemm
}  // namespace js

#endif  // intgemm_IntegerGemmIntrinsic_h