src/isa-l/erasure_code/ppc64le/ec_base_vsx.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338

#ifndef _ERASURE_CODE_PPC64LE_H_
#define _ERASURE_CODE_PPC64LE_H_

#include "erasure_code.h"
#include <altivec.h>

#ifdef __cplusplus
extern "C" {
#endif

#if defined(__ibmxl__)
#define EC_vec_xl(a, b) vec_xl_be(a, b)
#define EC_vec_permxor(va, vb, vc) __vpermxor(va, vb, vc)
#elif defined __GNUC__ && __GNUC__ >= 8
#define EC_vec_xl(a, b) vec_xl_be(a, b)
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vc)
#elif defined __GNUC__ && __GNUC__ >= 7
#if defined _ARCH_PWR9
#define EC_vec_xl(a, b) vec_vsx_ld(a, b)
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
#else
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
	vector unsigned char vc;
	__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
	return vc;
}
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
#endif
#else
#if defined _ARCH_PWR8
inline vector unsigned char EC_vec_xl(int off, unsigned char *ptr) {
	vector unsigned char vc;
	__asm__ __volatile__("lxvd2x %x0, %1, %2; xxswapd %x0, %x0" : "=wa" (vc) : "r" (off), "r" (ptr));
	return vc;
}
#define EC_vec_permxor(va, vb, vc) __builtin_crypto_vpermxor(va, vb, vec_nor(vc, vc))
#else
#error "This code is only supported on ppc64le."
#endif
#endif

/**
 * @brief GF(2^8) vector multiply. VSX version.
 *
 * Does a GF(2^8) multiply across each byte of input source with expanded
 * constant and save to destination array. Can be used for erasure coding encode
 * and decode update when only one source is available at a time. Function
 * requires pre-calculation of a 32 byte constant array based on the input
 * coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param gftbls Pointer to array of input tables generated from coding
 * 		 coefficients in ec_init_tables(). Must be of size 32.
 * @param src    Array of pointers to source inputs.
 * @param dest   Pointer to destination data array.
 * @returns none
 */

void gf_vect_mul_vsx(int len, unsigned char *gftbls, unsigned char *src, unsigned char *dest);

/**
 * @brief GF(2^8) vector dot product. VSX version.
 *
 * Does a GF(2^8) dot product across each byte of the input array and a constant
 * set of coefficients to produce each byte of the output. Can be used for
 * erasure coding encode and decode. Function requires pre-calculation of a
 * 32*vlen byte constant array based on the input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vlen   Number of vector sources.
 * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
 *               on the array of input coefficients.
 * @param src    Array of pointers to source inputs.
 * @param dest   Pointer to destination data array.
 * @returns none
 */

void gf_vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
			  unsigned char **src, unsigned char *dest);

/**
 * @brief GF(2^8) vector dot product with two outputs. VSX version.
 *
 * Vector dot product optimized to calculate two outputs at a time. Does two
 * GF(2^8) dot products across each byte of the input array and two constant
 * sets of coefficients to produce each byte of the outputs. Can be used for
 * erasure coding encode and decode. Function requires pre-calculation of a
 * 2*32*vlen byte constant array based on the two sets of input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vlen   Number of vector sources.
 * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
 *               based on the array of input coefficients.
 * @param src    Array of pointers to source inputs.
 * @param dest   Array of pointers to destination data buffers.
 * @returns none
 */

void gf_2vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
			   unsigned char **src, unsigned char **dest);

/**
 * @brief GF(2^8) vector dot product with three outputs. VSX version.
 *
 * Vector dot product optimized to calculate three outputs at a time. Does three
 * GF(2^8) dot products across each byte of the input array and three constant
 * sets of coefficients to produce each byte of the outputs. Can be used for
 * erasure coding encode and decode. Function requires pre-calculation of a
 * 3*32*vlen byte constant array based on the three sets of input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vlen   Number of vector sources.
 * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
 *               based on the array of input coefficients.
 * @param src    Array of pointers to source inputs.
 * @param dest   Array of pointers to destination data buffers.
 * @returns none
 */

void gf_3vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
			   unsigned char **src, unsigned char **dest);

/**
 * @brief GF(2^8) vector dot product with four outputs. VSX version.
 *
 * Vector dot product optimized to calculate four outputs at a time. Does four
 * GF(2^8) dot products across each byte of the input array and four constant
 * sets of coefficients to produce each byte of the outputs. Can be used for
 * erasure coding encode and decode. Function requires pre-calculation of a
 * 4*32*vlen byte constant array based on the four sets of input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vlen   Number of vector sources.
 * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
 *               based on the array of input coefficients.
 * @param src    Array of pointers to source inputs.
 * @param dest   Array of pointers to destination data buffers.
 * @returns none
 */

void gf_4vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
			   unsigned char **src, unsigned char **dest);

/**
 * @brief GF(2^8) vector dot product with five outputs. VSX version.
 *
 * Vector dot product optimized to calculate five outputs at a time. Does five
 * GF(2^8) dot products across each byte of the input array and five constant
 * sets of coefficients to produce each byte of the outputs. Can be used for
 * erasure coding encode and decode. Function requires pre-calculation of a
 * 5*32*vlen byte constant array based on the five sets of input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes. Must >= 16.
 * @param vlen   Number of vector sources.
 * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
 *               based on the array of input coefficients.
 * @param src    Array of pointers to source inputs.
 * @param dest   Array of pointers to destination data buffers.
 * @returns none
 */

void gf_5vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
			   unsigned char **src, unsigned char **dest);

/**
 * @brief GF(2^8) vector dot product with six outputs. VSX version.
 *
 * Vector dot product optimized to calculate six outputs at a time. Does six
 * GF(2^8) dot products across each byte of the input array and six constant
 * sets of coefficients to produce each byte of the outputs. Can be used for
 * erasure coding encode and decode. Function requires pre-calculation of a
 * 6*32*vlen byte constant array based on the six sets of input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vlen   Number of vector sources.
 * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
 *               based on the array of input coefficients.
 * @param src    Array of pointers to source inputs.
 * @param dest   Array of pointers to destination data buffers.
 * @returns none
 */

void gf_6vect_dot_prod_vsx(int len, int vlen, unsigned char *gftbls,
			   unsigned char **src, unsigned char **dest);

/**
 * @brief GF(2^8) vector multiply accumulate. VSX version.
 *
 * Does a GF(2^8) multiply across each byte of input source with expanded
 * constant and add to destination array. Can be used for erasure coding encode
 * and decode update when only one source is available at a time. Function
 * requires pre-calculation of a 32*vec byte constant array based on the input
 * coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vec    The number of vector sources or rows in the generator matrix
 * 		 for coding.
 * @param vec_i  The vector index corresponding to the single input source.
 * @param gftbls Pointer to array of input tables generated from coding
 * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
 * @param src    Array of pointers to source inputs.
 * @param dest   Pointer to destination data array.
 * @returns none
 */

void gf_vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
		     unsigned char *dest);
/**
 * @brief GF(2^8) vector multiply with 2 accumulate. VSX version.
 *
 * Does a GF(2^8) multiply across each byte of input source with expanded
 * constants and add to destination arrays. Can be used for erasure coding
 * encode and decode update when only one source is available at a
 * time. Function requires pre-calculation of a 32*vec byte constant array based
 * on the input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vec    The number of vector sources or rows in the generator matrix
 * 		 for coding.
 * @param vec_i  The vector index corresponding to the single input source.
 * @param gftbls Pointer to array of input tables generated from coding
 * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
 * @param src    Pointer to source input array.
 * @param dest   Array of pointers to destination input/outputs.
 * @returns none
 */

void gf_2vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
		      unsigned char **dest);

/**
 * @brief GF(2^8) vector multiply with 3 accumulate. VSX version.
 *
 * Does a GF(2^8) multiply across each byte of input source with expanded
 * constants and add to destination arrays. Can be used for erasure coding
 * encode and decode update when only one source is available at a
 * time. Function requires pre-calculation of a 32*vec byte constant array based
 * on the input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vec    The number of vector sources or rows in the generator matrix
 * 		 for coding.
 * @param vec_i  The vector index corresponding to the single input source.
 * @param gftbls Pointer to array of input tables generated from coding
 * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
 * @param src    Pointer to source input array.
 * @param dest   Array of pointers to destination input/outputs.
 * @returns none
 */

void gf_3vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
		      unsigned char **dest);

/**
 * @brief GF(2^8) vector multiply with 4 accumulate. VSX version.
 *
 * Does a GF(2^8) multiply across each byte of input source with expanded
 * constants and add to destination arrays. Can be used for erasure coding
 * encode and decode update when only one source is available at a
 * time. Function requires pre-calculation of a 32*vec byte constant array based
 * on the input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vec    The number of vector sources or rows in the generator matrix
 * 		 for coding.
 * @param vec_i  The vector index corresponding to the single input source.
 * @param gftbls Pointer to array of input tables generated from coding
 * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
 * @param src    Pointer to source input array.
 * @param dest   Array of pointers to destination input/outputs.
 * @returns none
 */

void gf_4vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
		      unsigned char **dest);

/**
 * @brief GF(2^8) vector multiply with 5 accumulate. VSX version.
 *
 * Does a GF(2^8) multiply across each byte of input source with expanded
 * constants and add to destination arrays. Can be used for erasure coding
 * encode and decode update when only one source is available at a
 * time. Function requires pre-calculation of a 32*vec byte constant array based
 * on the input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vec    The number of vector sources or rows in the generator matrix
 * 		 for coding.
 * @param vec_i  The vector index corresponding to the single input source.
 * @param gftbls Pointer to array of input tables generated from coding
 * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
 * @param src    Pointer to source input array.
 * @param dest   Array of pointers to destination input/outputs.
 * @returns none
 */
void gf_5vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
		      unsigned char **dest);

/**
 * @brief GF(2^8) vector multiply with 6 accumulate. VSX version.
 *
 * Does a GF(2^8) multiply across each byte of input source with expanded
 * constants and add to destination arrays. Can be used for erasure coding
 * encode and decode update when only one source is available at a
 * time. Function requires pre-calculation of a 32*vec byte constant array based
 * on the input coefficients.
 * @requires VSX
 *
 * @param len    Length of each vector in bytes.
 * @param vec    The number of vector sources or rows in the generator matrix
 * 		 for coding.
 * @param vec_i  The vector index corresponding to the single input source.
 * @param gftbls Pointer to array of input tables generated from coding
 * 		 coefficients in ec_init_tables(). Must be of size 32*vec.
 * @param src    Pointer to source input array.
 * @param dest   Array of pointers to destination input/outputs.
 * @returns none
 */
void gf_6vect_mad_vsx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
		      unsigned char **dest);

#ifdef __cplusplus
}
#endif

#endif //_ERASURE_CODE_PPC64LE_H_